openalex-mcp-server 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ https://docs.openalex.org/~gitbook/mcp
package/package.json ADDED
@@ -0,0 +1,49 @@
1
+ {
2
+ "name": "openalex-mcp-server",
3
+ "version": "1.0.1",
4
+ "description": "轻量化的 OpenAlex MCP 服务器,为 AI Agent 提供快速学术文献检索、详细摘要信息和全文下载能力",
5
+ "main": "src/index.js",
6
+ "type": "module",
7
+ "scripts": {
8
+ "start": "node src/cli.js",
9
+ "release:patch": "npm version patch -m \"chore(release): v%s\"",
10
+ "release:minor": "npm version minor -m \"chore(release): v%s\"",
11
+ "release:major": "npm version major -m \"chore(release): v%s\"",
12
+ "typecheck": "tsc --noEmit",
13
+ "test": "node --test"
14
+ },
15
+ "bin": {
16
+ "openalex-mcp-server": "src/cli.js",
17
+ "openalex-mcp": "src/cli.js"
18
+ },
19
+ "engines": {
20
+ "node": ">=18"
21
+ },
22
+ "keywords": [
23
+ "mcp",
24
+ "openalex",
25
+ "academic",
26
+ "research",
27
+ "papers"
28
+ ],
29
+ "author": "https://github.com/SecretRichGarden/openAlex-mcp",
30
+ "license": "ISC",
31
+ "repository": {
32
+ "type": "git",
33
+ "url": "git+https://github.com/SecretRichGarden/openAlex-mcp.git"
34
+ },
35
+ "bugs": {
36
+ "url": "https://github.com/SecretRichGarden/openAlex-mcp/issues"
37
+ },
38
+ "homepage": "https://github.com/SecretRichGarden/openAlex-mcp#readme",
39
+ "dependencies": {
40
+ "@modelcontextprotocol/sdk": "^1.0.4",
41
+ "node-fetch": "^3.3.2",
42
+ "dotenv": "^16.4.5",
43
+ "pdf-parse": "^1.1.1"
44
+ },
45
+ "devDependencies": {
46
+ "@types/node": "^22.10.5",
47
+ "typescript": "^5.7.3"
48
+ }
49
+ }
@@ -0,0 +1,66 @@
1
+ # Ralph YOLO Progress Log
2
+ Started: Sun, Jan 25, 2026 3:09:23 PM
3
+ ---
4
+
5
+ ## US-001: 项目初始化和基础结构 ✅
6
+ - 完成时间: $(date)
7
+ - Agent: Full-stack-developer
8
+ - 状态: 已完成
9
+ - 实现内容:
10
+ - 创建 package.json (项目名: openalex-mcp-server, 版本: 1.0.0)
11
+ - 安装依赖: @modelcontextprotocol/sdk, node-fetch, dotenv
12
+ - 创建目录: src/, config/, cache/papers/, cache/fulltext/
13
+ - 创建配置文件: .env.example, .gitignore, tsconfig.json
14
+ - 文件修改: package.json, .env.example, .gitignore, tsconfig.json
15
+ - 学习点: TypeScript 配置已就绪,等待源代码文件
16
+
17
+ ---
18
+
19
+ ## US-002: 工具函数模块 ✅
20
+ - 完成时间: $(date)
21
+ - Agent: Full-stack-developer
22
+ - 提交哈希: 3b8ec91
23
+ - 状态: 已完成
24
+ - 实现内容:
25
+ - 创建 src/utils.js (111 行代码)
26
+ - 实现 rebuildAbstract() - 从倒排索引重建摘要
27
+ - 实现 simplifyOpenAlexId() - 简化 OpenAlex URL
28
+ - 实现 sleep() - 异步延迟
29
+ - 实现 validateWorkId() - 验证 Work ID 格式
30
+ - 更新 tsconfig.json 支持 JavaScript 类型检查
31
+ - 文件修改: src/utils.js (新建), tsconfig.json (更新)
32
+ - 学习点: 配置 TypeScript 检查 JavaScript 文件需要 allowJs 和 checkJs
33
+
34
+ ---
35
+
36
+ ## US-003: 缓存管理器模块 ✅
37
+ - 完成时间: $(date)
38
+ - Agent: Full-stack-developer
39
+ - 提交哈希: ad32123
40
+ - 状态: 已完成
41
+ - 实现内容:
42
+ - 创建 src/cache-manager.js (204 行代码)
43
+ - 实现 CacheManager 类,支持 get/set/has/delete/getStats 方法
44
+ - 缓存存储在 cache/papers/ 目录,JSON 格式
45
+ - 自动创建缓存目录,自动过期检查机制
46
+ - 提供详细的缓存统计功能
47
+ - 文件修改: src/cache-manager.js (新建)
48
+ - 学习点: 使用 import.meta.url 和 fileURLToPath 获取文件路径,实现智能过期检查
49
+
50
+ ---
51
+
52
+ ## US-004: OpenAlex API 客户端 - 基础请求 ✅
53
+ - 完成时间: $(date)
54
+ - Agent: Full-stack-developer
55
+ - 提交哈希: 6f5e5f8
56
+ - 状态: 已完成
57
+ - 实现内容:
58
+ - 创建 src/openalex-client.js (85 行代码)
59
+ - 实现 OpenAlexClient 类,支持可选 API key 认证
60
+ - 实现 _request 私有方法,处理 GET 请求和查询参数
61
+ - 基础 URL: https://api.openalex.org
62
+ - 完整的 HTTP 错误处理和网络错误处理
63
+ - 文件修改: src/openalex-client.js (新建)
64
+ - 学习点: TypeScript 严格类型检查要求 catch 块中使用 instanceof Error 进行类型守卫
65
+
66
+ ---
@@ -0,0 +1,204 @@
1
+ /**
2
+ * 缓存管理器模块
3
+ * 提供文件系统缓存管理功能,用于缓存论文元数据和全文
4
+ */
5
+
6
+ import { promises as fs } from 'fs';
7
+ import path from 'path';
8
+ import { fileURLToPath } from 'url';
9
+
10
+ const __filename = fileURLToPath(import.meta.url);
11
+ const __dirname = path.dirname(__filename);
12
+
13
+ /**
14
+ * 缓存管理器类
15
+ * 支持论文元数据和全文的缓存管理,避免重复 API 调用
16
+ */
17
+ export class CacheManager {
18
+ /**
19
+ * 创建缓存管理器实例
20
+ * @param {string} [cacheDir='cache/papers'] - 缓存目录路径(相对于项目根目录)
21
+ */
22
+ constructor(cacheDir = 'cache/papers') {
23
+ // 从 src 目录回到项目根目录
24
+ this.cacheDir = path.resolve(__dirname, '..', cacheDir);
25
+ this._ensureCacheDirExists();
26
+ }
27
+
28
+ /**
29
+ * 确保缓存目录存在,如果不存在则自动创建
30
+ * @private
31
+ * @returns {Promise<void>}
32
+ */
33
+ async _ensureCacheDirExists() {
34
+ try {
35
+ await fs.access(this.cacheDir);
36
+ } catch (error) {
37
+ // 目录不存在,创建它
38
+ await fs.mkdir(this.cacheDir, { recursive: true });
39
+ }
40
+ }
41
+
42
+ /**
43
+ * 生成缓存文件路径
44
+ * @private
45
+ * @param {string} key - 缓存键
46
+ * @returns {string} 缓存文件的完整路径
47
+ */
48
+ _getCacheFilePath(key) {
49
+ // 将键转换为安全的文件名(移除特殊字符)
50
+ const safeKey = key.replace(/[^a-zA-Z0-9_-]/g, '_');
51
+ return path.join(this.cacheDir, `${safeKey}.json`);
52
+ }
53
+
54
+ /**
55
+ * 检查缓存是否存在且未过期
56
+ * @param {string} key - 缓存键
57
+ * @returns {Promise<boolean>} 缓存是否有效
58
+ */
59
+ async has(key) {
60
+ try {
61
+ const filePath = this._getCacheFilePath(key);
62
+ await fs.access(filePath);
63
+
64
+ // 读取缓存文件检查过期时间
65
+ const content = await fs.readFile(filePath, 'utf8');
66
+ const cache = JSON.parse(content);
67
+
68
+ // 检查是否过期
69
+ if (cache.expiresAt && new Date(cache.expiresAt) < new Date()) {
70
+ // 缓存已过期,删除它
71
+ await this.delete(key);
72
+ return false;
73
+ }
74
+
75
+ return true;
76
+ } catch (error) {
77
+ return false;
78
+ }
79
+ }
80
+
81
+ /**
82
+ * 从缓存中获取数据
83
+ * @param {string} key - 缓存键
84
+ * @returns {Promise<any|null>} 缓存的数据,如果不存在或已过期则返回 null
85
+ */
86
+ async get(key) {
87
+ try {
88
+ const filePath = this._getCacheFilePath(key);
89
+ const content = await fs.readFile(filePath, 'utf8');
90
+ const cache = JSON.parse(content);
91
+
92
+ // 检查是否过期
93
+ if (cache.expiresAt && new Date(cache.expiresAt) < new Date()) {
94
+ // 缓存已过期,删除它并返回 null
95
+ await this.delete(key);
96
+ return null;
97
+ }
98
+
99
+ return cache.data;
100
+ } catch (error) {
101
+ // 文件不存在或读取失败
102
+ return null;
103
+ }
104
+ }
105
+
106
+ /**
107
+ * 将数据存入缓存
108
+ * @param {string} key - 缓存键
109
+ * @param {any} data - 要缓存的数据
110
+ * @param {number} [expiryDays=30] - 缓存过期天数,默认 30 天
111
+ * @returns {Promise<void>}
112
+ */
113
+ async set(key, data, expiryDays = 30) {
114
+ await this._ensureCacheDirExists();
115
+
116
+ const filePath = this._getCacheFilePath(key);
117
+
118
+ // 计算过期时间
119
+ const expiresAt = new Date();
120
+ expiresAt.setDate(expiresAt.getDate() + expiryDays);
121
+
122
+ const cache = {
123
+ key,
124
+ data,
125
+ createdAt: new Date().toISOString(),
126
+ expiresAt: expiresAt.toISOString()
127
+ };
128
+
129
+ await fs.writeFile(filePath, JSON.stringify(cache, null, 2), 'utf8');
130
+ }
131
+
132
+ /**
133
+ * 删除指定的缓存
134
+ * @param {string} key - 缓存键
135
+ * @returns {Promise<boolean>} 是否成功删除
136
+ */
137
+ async delete(key) {
138
+ try {
139
+ const filePath = this._getCacheFilePath(key);
140
+ await fs.unlink(filePath);
141
+ return true;
142
+ } catch (error) {
143
+ // 文件不存在或删除失败
144
+ return false;
145
+ }
146
+ }
147
+
148
+ /**
149
+ * 获取缓存统计信息
150
+ * @returns {Promise<{totalFiles: number, totalSize: number, oldestCache: string|null, newestCache: string|null}>}
151
+ */
152
+ async getStats() {
153
+ try {
154
+ await this._ensureCacheDirExists();
155
+
156
+ const files = await fs.readdir(this.cacheDir);
157
+ const jsonFiles = files.filter(file => file.endsWith('.json'));
158
+
159
+ let totalSize = 0;
160
+ let oldestCache = null;
161
+ let newestCache = null;
162
+ let oldestTime = Infinity;
163
+ let newestTime = 0;
164
+
165
+ for (const file of jsonFiles) {
166
+ const filePath = path.join(this.cacheDir, file);
167
+ const stats = await fs.stat(filePath);
168
+ totalSize += stats.size;
169
+
170
+ const content = await fs.readFile(filePath, 'utf8');
171
+ try {
172
+ const cache = JSON.parse(content);
173
+ const createdTime = new Date(cache.createdAt).getTime();
174
+
175
+ if (createdTime < oldestTime) {
176
+ oldestTime = createdTime;
177
+ oldestCache = cache.createdAt;
178
+ }
179
+
180
+ if (createdTime > newestTime) {
181
+ newestTime = createdTime;
182
+ newestCache = cache.createdAt;
183
+ }
184
+ } catch (parseError) {
185
+ // 忽略无效的 JSON 文件
186
+ }
187
+ }
188
+
189
+ return {
190
+ totalFiles: jsonFiles.length,
191
+ totalSize,
192
+ oldestCache,
193
+ newestCache
194
+ };
195
+ } catch (error) {
196
+ return {
197
+ totalFiles: 0,
198
+ totalSize: 0,
199
+ oldestCache: null,
200
+ newestCache: null
201
+ };
202
+ }
203
+ }
204
+ }
package/src/cli.js ADDED
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * CLI entry for OpenAlex MCP server (stdio).
4
+ */
5
+ import { createRequire } from 'module';
6
+ import path from 'path';
7
+ import { fileURLToPath } from 'url';
8
+
9
+ const require = createRequire(import.meta.url);
10
+ const __filename = fileURLToPath(import.meta.url);
11
+ const __dirname = path.dirname(__filename);
12
+ const pkg = require(path.resolve(__dirname, '..', 'package.json'));
13
+
14
+ function printHelp() {
15
+ const helpText = `
16
+ OpenAlex MCP Server (stdio)
17
+
18
+ Usage:
19
+ openalex-mcp-server [--help] [--version]
20
+ openalex-mcp [--help] [--version]
21
+ npx -y openalex-mcp-server
22
+
23
+ Environment:
24
+ OPENALEX_API_KEY Optional. Higher rate limit when provided.
25
+ CACHE_ENABLED Optional. Defaults to true.
26
+ ABSTRACT_MODE Optional. Defaults to quick.
27
+ `;
28
+ console.log(helpText.trim());
29
+ }
30
+
31
+ const args = process.argv.slice(2);
32
+ if (args.includes('-h') || args.includes('--help')) {
33
+ printHelp();
34
+ process.exit(0);
35
+ }
36
+
37
+ if (args.includes('-v') || args.includes('--version')) {
38
+ console.log(pkg.version);
39
+ process.exit(0);
40
+ }
41
+
42
+ try {
43
+ await import('./index.js');
44
+ } catch (error) {
45
+ console.error('Failed to start server:', error);
46
+ process.exit(1);
47
+ }
@@ -0,0 +1,333 @@
1
+ /**
2
+ * 全文下载器模块
3
+ * 提供论文全文下载、文本提取和章节识别功能
4
+ */
5
+
6
+ import fetch from 'node-fetch';
7
+ import { promises as fs } from 'fs';
8
+ import path from 'path';
9
+ import { fileURLToPath } from 'url';
10
+ import pdfParse from 'pdf-parse';
11
+
12
+ const __filename = fileURLToPath(import.meta.url);
13
+ const __dirname = path.dirname(__filename);
14
+
15
+ /**
16
+ * 全文下载器类
17
+ * 支持检测、下载和提取 OA 论文的全文内容
18
+ */
19
+ export class FulltextDownloader {
20
+ /**
21
+ * 创建全文下载器实例
22
+ * @param {string} [cacheDir='cache/fulltext'] - 缓存目录路径(相对于项目根目录)
23
+ */
24
+ constructor(cacheDir = 'cache/fulltext') {
25
+ this.cacheDir = path.resolve(__dirname, '..', cacheDir);
26
+ this._ensureCacheDirExists();
27
+ }
28
+
29
+ /**
30
+ * 确保缓存目录存在
31
+ * @private
32
+ * @returns {Promise<void>}
33
+ */
34
+ async _ensureCacheDirExists() {
35
+ try {
36
+ await fs.access(this.cacheDir);
37
+ } catch (error) {
38
+ await fs.mkdir(this.cacheDir, { recursive: true });
39
+ }
40
+ }
41
+
42
+ /**
43
+ * 获取缓存文件路径
44
+ * @private
45
+ * @param {string} workId - 论文 ID
46
+ * @param {string} extension - 文件扩展名
47
+ * @returns {string} 缓存文件路径
48
+ */
49
+ _getCachePath(workId, extension) {
50
+ const safeId = workId.replace(/[^a-zA-Z0-9_-]/g, '_');
51
+ return path.join(this.cacheDir, `${safeId}.${extension}`);
52
+ }
53
+
54
+ /**
55
+ * 检测论文是否有可用的 OA 全文 URL
56
+ * @param {Object} workData - 论文数据(优化后的格式)
57
+ * @returns {Promise<Object>} 检测结果
58
+ * @example
59
+ * const result = await downloader.detectFulltext(workData);
60
+ * // { work_id, is_oa, oa_status, oa_url, fulltext_available }
61
+ */
62
+ async detectFulltext(workData) {
63
+ if (!workData || !workData.id) {
64
+ throw new Error('无效的论文数据');
65
+ }
66
+
67
+ const result = {
68
+ work_id: workData.id,
69
+ is_oa: false,
70
+ oa_status: null,
71
+ oa_url: null,
72
+ fulltext_available: false
73
+ };
74
+
75
+ // 检查开放访问信息
76
+ if (workData.open_access) {
77
+ result.is_oa = workData.open_access.is_oa || false;
78
+ result.oa_status = workData.open_access.oa_status || null;
79
+ result.oa_url = workData.open_access.oa_url || null;
80
+ }
81
+
82
+ // 检查是否有最佳 PDF URL
83
+ if (workData.best_pdf_url) {
84
+ result.oa_url = workData.best_pdf_url;
85
+ result.fulltext_available = true;
86
+ } else if (result.oa_url) {
87
+ result.fulltext_available = true;
88
+ }
89
+
90
+ return result;
91
+ }
92
+
93
+ /**
94
+ * 下载论文全文 PDF
95
+ * @param {string} workId - 论文 ID
96
+ * @param {string} oaUrl - OA 全文 URL
97
+ * @param {boolean} [forceDownload=false] - 是否强制重新下载
98
+ * @returns {Promise<Object>} 下载状态
99
+ * @example
100
+ * const result = await downloader.downloadFulltext('W3128609807', 'https://arxiv.org/pdf/2301.xxxxx.pdf');
101
+ * // { work_id, status, cache_path, file_size }
102
+ */
103
+ async downloadFulltext(workId, oaUrl, forceDownload = false) {
104
+ if (!workId || !oaUrl) {
105
+ throw new Error('workId 和 oaUrl 是必需参数');
106
+ }
107
+
108
+ const pdfPath = this._getCachePath(workId, 'pdf');
109
+
110
+ // 检查是否已缓存
111
+ if (!forceDownload) {
112
+ try {
113
+ await fs.access(pdfPath);
114
+ const stats = await fs.stat(pdfPath);
115
+ return {
116
+ work_id: workId,
117
+ status: 'cached',
118
+ cache_path: pdfPath,
119
+ file_size: stats.size
120
+ };
121
+ } catch (error) {
122
+ // 文件不存在,继续下载
123
+ }
124
+ }
125
+
126
+ try {
127
+ // 下载 PDF
128
+ const response = await fetch(oaUrl, {
129
+ headers: {
130
+ 'User-Agent': 'openalex-mcp-server/1.0.0'
131
+ }
132
+ });
133
+
134
+ if (!response.ok) {
135
+ throw new Error(`下载失败: HTTP ${response.status} ${response.statusText}`);
136
+ }
137
+
138
+ // 检查 Content-Type
139
+ const contentType = response.headers.get('content-type');
140
+ if (!contentType || !contentType.includes('application/pdf')) {
141
+ console.warn(`警告: URL 可能不是 PDF 文件: ${contentType}`);
142
+ }
143
+
144
+ // 保存到缓存
145
+ await this._ensureCacheDirExists();
146
+ const buffer = await response.buffer();
147
+ await fs.writeFile(pdfPath, buffer);
148
+
149
+ return {
150
+ work_id: workId,
151
+ status: 'downloaded',
152
+ cache_path: pdfPath,
153
+ file_size: buffer.length
154
+ };
155
+
156
+ } catch (error) {
157
+ const errorMessage = error instanceof Error ? error.message : String(error);
158
+ return {
159
+ work_id: workId,
160
+ status: 'failed',
161
+ cache_path: null,
162
+ file_size: 0,
163
+ error: errorMessage
164
+ };
165
+ }
166
+ }
167
+
168
+ /**
169
+ * 从 PDF 中提取文本内容
170
+ * @param {string} pdfPath - PDF 文件路径
171
+ * @returns {Promise<string>} 提取的文本内容
172
+ * @example
173
+ * const text = await downloader.extractText('/path/to/paper.pdf');
174
+ */
175
+ async extractText(pdfPath) {
176
+ try {
177
+ const dataBuffer = await fs.readFile(pdfPath);
178
+ const data = await pdfParse(dataBuffer);
179
+ return data.text;
180
+ } catch (error) {
181
+ const errorMessage = error instanceof Error ? error.message : String(error);
182
+ throw new Error(`PDF 文本提取失败: ${errorMessage}`);
183
+ }
184
+ }
185
+
186
+ /**
187
+ * 尝试识别并提取论文章节
188
+ * @param {string} text - 论文全文文本
189
+ * @returns {Object} 提取的章节内容
190
+ * @example
191
+ * const sections = await downloader.extractSections(fullText);
192
+ * // { abstract: '', introduction: '', methods: '', results: '', discussion: '' }
193
+ */
194
+ extractSections(text) {
195
+ const sections = {
196
+ abstract: null,
197
+ introduction: null,
198
+ methods: null,
199
+ results: null,
200
+ discussion: null,
201
+ conclusion: null,
202
+ references: null
203
+ };
204
+
205
+ // 常见的章节标题模式(不区分大小写)
206
+ const patterns = {
207
+ abstract: /(?:^|\n)\s*(?:abstract|摘要|resumen)\s*\n/i,
208
+ introduction: /(?:^|\n)\s*(?:introduction|引言|introducción)\s*\n/i,
209
+ methods: /(?:^|\n)\s*(?:materials?\s+and\s+methods?|methodology|methods|方法|material\s+y\s+métodos)\s*\n/i,
210
+ results: /(?:^|\n)\s*(?:results|结果|resultados)\s*\n/i,
211
+ discussion: /(?:^|\n)\s*(?:discussion|讨论|discusión)\s*\n/i,
212
+ conclusion: /(?:^|\n)\s*(?:conclusion[s]?|结论|conclusión)\s*\n/i,
213
+ references: /(?:^|\n)\s*(?:references|bibliography|参考文献|referencias)\s*\n/i
214
+ };
215
+
216
+ // 找到所有章节的位置
217
+ const matches = [];
218
+ for (const [section, pattern] of Object.entries(patterns)) {
219
+ const match = text.match(pattern);
220
+ if (match) {
221
+ matches.push({
222
+ section,
223
+ index: match.index + match[0].length
224
+ });
225
+ }
226
+ }
227
+
228
+ // 按位置排序
229
+ matches.sort((a, b) => a.index - b.index);
230
+
231
+ // 提取每个章节的内容
232
+ for (let i = 0; i < matches.length; i++) {
233
+ const current = matches[i];
234
+ const next = matches[i + 1];
235
+
236
+ if (next) {
237
+ sections[current.section] = text.substring(current.index, next.index).trim();
238
+ } else {
239
+ sections[current.section] = text.substring(current.index).trim();
240
+ }
241
+ }
242
+
243
+ // 移除空值
244
+ for (const key of Object.keys(sections)) {
245
+ if (!sections[key] || sections[key].length === 0) {
246
+ delete sections[key];
247
+ }
248
+ }
249
+
250
+ return sections;
251
+ }
252
+
253
+ /**
254
+ * 提取并保存论文章节
255
+ * @param {string} workId - 论文 ID
256
+ * @returns {Promise<Object>} 提取结果
257
+ * @example
258
+ * const result = await downloader.extractAndSaveSections('W3128609807');
259
+ */
260
+ async extractAndSaveSections(workId) {
261
+ const pdfPath = this._getCachePath(workId, 'pdf');
262
+ const textPath = this._getCachePath(workId, 'txt');
263
+ const sectionsPath = this._getCachePath(workId, 'sections.json');
264
+
265
+ try {
266
+ // 检查 PDF 是否存在
267
+ await fs.access(pdfPath);
268
+
269
+ // 提取文本
270
+ let text;
271
+ try {
272
+ await fs.access(textPath);
273
+ text = await fs.readFile(textPath, 'utf8');
274
+ } catch (error) {
275
+ text = await this.extractText(pdfPath);
276
+ await fs.writeFile(textPath, text, 'utf8');
277
+ }
278
+
279
+ // 提取章节
280
+ const sections = this.extractSections(text);
281
+ await fs.writeFile(sectionsPath, JSON.stringify(sections, null, 2), 'utf8');
282
+
283
+ return {
284
+ work_id: workId,
285
+ status: 'success',
286
+ text_path: textPath,
287
+ sections_path: sectionsPath,
288
+ sections: Object.keys(sections)
289
+ };
290
+
291
+ } catch (error) {
292
+ const errorMessage = error instanceof Error ? error.message : String(error);
293
+ return {
294
+ work_id: workId,
295
+ status: 'failed',
296
+ error: errorMessage
297
+ };
298
+ }
299
+ }
300
+
301
+ /**
302
+ * 获取已提取的章节内容
303
+ * @param {string} workId - 论文 ID
304
+ * @param {string[]} [sections] - 要获取的章节列表(可选,默认返回所有)
305
+ * @returns {Promise<Object>} 章节内容
306
+ * @example
307
+ * const sections = await downloader.getSections('W3128609807', ['abstract', 'introduction']);
308
+ */
309
+ async getSections(workId, sections = null) {
310
+ const sectionsPath = this._getCachePath(workId, 'sections.json');
311
+
312
+ try {
313
+ const content = await fs.readFile(sectionsPath, 'utf8');
314
+ const allSections = JSON.parse(content);
315
+
316
+ // 如果指定了章节,只返回请求的章节
317
+ if (sections && Array.isArray(sections)) {
318
+ const result = {};
319
+ for (const section of sections) {
320
+ if (allSections[section]) {
321
+ result[section] = allSections[section];
322
+ }
323
+ }
324
+ return result;
325
+ }
326
+
327
+ return allSections;
328
+
329
+ } catch (error) {
330
+ throw new Error(`无法读取章节文件: ${error instanceof Error ? error.message : String(error)}`);
331
+ }
332
+ }
333
+ }