openalex-mcp-server 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/README.md +680 -0
- package/.claude/commands/prd.md +138 -0
- package/.claude/commands/ralph-yolo.md +346 -0
- package/.claude/commands/ralph.md +226 -0
- package/.claude/ralph-config.json +17 -0
- package/.claude/scripts/prompt.md +108 -0
- package/.claude/scripts/ralph.sh +127 -0
- package/.claude/skills/prd.md +270 -0
- package/.claude/skills/ralph-yolo.md +613 -0
- package/.claude/skills/ralph.md +315 -0
- package/.claude/templates/prd.json.example +64 -0
- package/.env.example +8 -0
- package/.github/workflows/npm-publish.yml +48 -0
- package/README.md +525 -0
- package/config/mcp-config.json +77 -0
- package/docs/PRD.md +897 -0
- package/docs/api-document.md +973 -0
- package/docs/document-mcp.txt +1 -0
- package/package.json +49 -0
- package/prd-progress.txt +66 -0
- package/src/cache-manager.js +204 -0
- package/src/cli.js +47 -0
- package/src/fulltext-downloader.js +333 -0
- package/src/index.js +603 -0
- package/src/json-optimizer.js +153 -0
- package/src/openalex-client.js +305 -0
- package/src/types/pdf-parse.d.ts +13 -0
- package/src/utils.js +90 -0
- package/tests/cli.test.js +31 -0
- package/tsconfig.json +22 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON 格式优化器模块
|
|
3
|
+
* 将 OpenAlex API 返回的原始数据转换为简洁格式,提升 LLM 上下文利用效率
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { rebuildAbstract, simplifyOpenAlexId } from './utils.js';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* 优化单篇论文数据
|
|
10
|
+
* 将 OpenAlex 原始响应转换为简化格式
|
|
11
|
+
* @param {Object} rawWork - OpenAlex API 返回的原始论文数据
|
|
12
|
+
* @returns {Object} 优化后的论文数据
|
|
13
|
+
* @example
|
|
14
|
+
* const optimized = optimizeWork(rawApiResponse);
|
|
15
|
+
*/
|
|
16
|
+
export function optimizeWork(rawWork) {
|
|
17
|
+
if (!rawWork || typeof rawWork !== 'object') {
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// 提取和简化基础字段
|
|
22
|
+
const optimized = {
|
|
23
|
+
id: simplifyOpenAlexId(rawWork.id),
|
|
24
|
+
title: rawWork.title || '',
|
|
25
|
+
doi: rawWork.doi || null,
|
|
26
|
+
pmid: rawWork.pmid || null,
|
|
27
|
+
publication_year: rawWork.publication_year || null,
|
|
28
|
+
type: rawWork.type || null,
|
|
29
|
+
cited_by_count: rawWork.cited_by_count || 0,
|
|
30
|
+
concepts: (rawWork.concepts || []).slice(0, 5).map(c => ({
|
|
31
|
+
id: simplifyOpenAlexId(c.id),
|
|
32
|
+
display_name: c.display_name,
|
|
33
|
+
score: c.score
|
|
34
|
+
}))
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
// 提取发表期刊/会议信息
|
|
38
|
+
if (rawWork.primary_location) {
|
|
39
|
+
const source = rawWork.primary_location.source;
|
|
40
|
+
if (source) {
|
|
41
|
+
optimized.venue = {
|
|
42
|
+
id: simplifyOpenAlexId(source.id),
|
|
43
|
+
display_name: source.display_name,
|
|
44
|
+
type: source.type,
|
|
45
|
+
issn: source.issn || null,
|
|
46
|
+
is_oa: source.is_oa || false
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// 提取卷期页码
|
|
51
|
+
if (rawWork.primary_location.volume) {
|
|
52
|
+
optimized.volume = rawWork.primary_location.volume;
|
|
53
|
+
}
|
|
54
|
+
if (rawWork.primary_location.issue) {
|
|
55
|
+
optimized.issue = rawWork.primary_location.issue;
|
|
56
|
+
}
|
|
57
|
+
if (rawWork.primary_location.pages) {
|
|
58
|
+
optimized.pages = rawWork.primary_location.pages;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// 扁平化作者信息
|
|
63
|
+
if (rawWork.authorships && rawWork.authorships.length > 0) {
|
|
64
|
+
optimized.authors = rawWork.authorships.map(authorship => ({
|
|
65
|
+
id: authorship.author ? simplifyOpenAlexId(authorship.author.id) : null,
|
|
66
|
+
display_name: authorship.author?.display_name || authorship.raw_author_name || '',
|
|
67
|
+
institution: authorship.institutions?.[0]?.display_name || null,
|
|
68
|
+
country: authorship.institutions?.[0]?.country_code || null,
|
|
69
|
+
is_corresponding: authorship.is_corresponding || null
|
|
70
|
+
}));
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// 提取开放访问信息
|
|
74
|
+
if (rawWork.open_access) {
|
|
75
|
+
optimized.open_access = {
|
|
76
|
+
is_oa: rawWork.open_access.is_oa || false,
|
|
77
|
+
oa_status: rawWork.open_access.oa_status || null,
|
|
78
|
+
oa_url: rawWork.open_access.oa_url || null
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// 重建摘要文本
|
|
83
|
+
if (rawWork.abstract_inverted_index) {
|
|
84
|
+
optimized.abstract = rebuildAbstract(rawWork.abstract_inverted_index);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// 提取主题/领域信息
|
|
88
|
+
if (rawWork.topics && rawWork.topics.length > 0) {
|
|
89
|
+
optimized.topics = rawWork.topics.slice(0, 3).map(topic => ({
|
|
90
|
+
id: simplifyOpenAlexId(topic.id),
|
|
91
|
+
display_name: topic.display_name,
|
|
92
|
+
subfield: topic.subfield?.display_name || null,
|
|
93
|
+
field: topic.field?.display_name || null,
|
|
94
|
+
domain: topic.domain?.display_name || null
|
|
95
|
+
}));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// 提取参考文献和被引信息
|
|
99
|
+
if (rawWork.referenced_works && rawWork.referenced_works.length > 0) {
|
|
100
|
+
optimized.referenced_works_count = rawWork.referenced_works.length;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// 提取最佳位置(OA URL)
|
|
104
|
+
if (rawWork.best_oa_location) {
|
|
105
|
+
optimized.best_pdf_url = rawWork.best_oa_location.pdf_url || null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return optimized;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* 优化搜索结果列表
|
|
113
|
+
* @param {Object} rawResults - OpenAlex API 返回的原始搜索结果
|
|
114
|
+
* @returns {Object} 优化后的搜索结果
|
|
115
|
+
* @example
|
|
116
|
+
* const optimized = optimizeSearchResults(rawSearchResponse);
|
|
117
|
+
*/
|
|
118
|
+
export function optimizeSearchResults(rawResults) {
|
|
119
|
+
if (!rawResults || typeof rawResults !== 'object') {
|
|
120
|
+
return {
|
|
121
|
+
meta: {
|
|
122
|
+
total_count: 0,
|
|
123
|
+
page: 1,
|
|
124
|
+
per_page: 20
|
|
125
|
+
},
|
|
126
|
+
papers: []
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// 提取元数据
|
|
131
|
+
const meta = {
|
|
132
|
+
total_count: rawResults.meta?.count || 0,
|
|
133
|
+
page: rawResults.meta?.page || 1,
|
|
134
|
+
per_page: rawResults.meta?.per_page || 20
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
// 优化每篇论文
|
|
138
|
+
const papers = (rawResults.results || rawResults.results || []).map(work => optimizeWork(work));
|
|
139
|
+
|
|
140
|
+
return {
|
|
141
|
+
meta,
|
|
142
|
+
papers: papers.filter(p => p !== null)
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* 优化批量查询结果
|
|
148
|
+
* @param {Object} rawResults - OpenAlex API 返回的批量查询结果
|
|
149
|
+
* @returns {Object} 优化后的批量查询结果
|
|
150
|
+
*/
|
|
151
|
+
export function optimizeBatchResults(rawResults) {
|
|
152
|
+
return optimizeSearchResults(rawResults);
|
|
153
|
+
}
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAlex API 客户端模块
|
|
3
|
+
* 提供与 OpenAlex API 交互的基础功能
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import fetch from 'node-fetch';
|
|
7
|
+
import { sleep } from './utils.js';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* OpenAlex API 客户端类
|
|
11
|
+
* 用于与 OpenAlex API 进行交互,支持可选的 API 密钥认证
|
|
12
|
+
*/
|
|
13
|
+
export class OpenAlexClient {
|
|
14
|
+
/**
|
|
15
|
+
* 创建 OpenAlex API 客户端实例
|
|
16
|
+
* @param {string} [apiKey] - 可选的 OpenAlex API 密钥
|
|
17
|
+
*/
|
|
18
|
+
constructor(apiKey) {
|
|
19
|
+
this.baseUrl = 'https://api.openalex.org';
|
|
20
|
+
this.apiKey = apiKey;
|
|
21
|
+
this.userAgent = 'openalex-mcp-server/1.0.0 (mailto:your-email@example.com)';
|
|
22
|
+
|
|
23
|
+
// 速率限制:最多 10 请求/秒
|
|
24
|
+
this.rateLimitDelay = 100;
|
|
25
|
+
this.lastRequestTime = 0;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* 发送 GET 请求到 OpenAlex API
|
|
30
|
+
* @private
|
|
31
|
+
* @param {string} endpoint - API 端点路径(相对于 baseUrl)
|
|
32
|
+
* @param {Object.<string, any>} [params={}] - 查询参数对象
|
|
33
|
+
* @returns {Promise<any>} API 响应的 JSON 数据
|
|
34
|
+
* @throws {Error} 当 HTTP 请求失败时抛出错误
|
|
35
|
+
* @example
|
|
36
|
+
* const data = await client._request('/works/W123456789');
|
|
37
|
+
* const results = await client._request('/works', { filter: 'title.search:quantum' });
|
|
38
|
+
*/
|
|
39
|
+
async _request(endpoint, params = {}) {
|
|
40
|
+
// 速率限制:确保不超过 10 请求/秒
|
|
41
|
+
const now = Date.now();
|
|
42
|
+
const timeSinceLastRequest = now - this.lastRequestTime;
|
|
43
|
+
if (timeSinceLastRequest < this.rateLimitDelay) {
|
|
44
|
+
await sleep(this.rateLimitDelay - timeSinceLastRequest);
|
|
45
|
+
}
|
|
46
|
+
this.lastRequestTime = Date.now();
|
|
47
|
+
|
|
48
|
+
// 指数退避重试
|
|
49
|
+
const maxRetries = 5;
|
|
50
|
+
const retryDelays = [1000, 2000, 4000, 8000, 16000];
|
|
51
|
+
let lastError;
|
|
52
|
+
|
|
53
|
+
for (let attempt = 0; attempt < maxRetries; attempt++) {
|
|
54
|
+
try {
|
|
55
|
+
return await this._executeRequest(endpoint, params);
|
|
56
|
+
} catch (error) {
|
|
57
|
+
lastError = error;
|
|
58
|
+
|
|
59
|
+
// 检查是否需要重试
|
|
60
|
+
if (error instanceof Error) {
|
|
61
|
+
const statusCode = this._extractStatusCode(error.message);
|
|
62
|
+
|
|
63
|
+
// 429 (Rate Limit)、500、502、503、504 可以重试
|
|
64
|
+
if (statusCode && (statusCode === 429 || statusCode >= 500)) {
|
|
65
|
+
if (attempt < maxRetries - 1) {
|
|
66
|
+
const delay = retryDelays[attempt];
|
|
67
|
+
console.error(`OpenAlex API 请求失败 (尝试 ${attempt + 1}/${maxRetries}): ${error.message}. ${delay}ms 后重试...`);
|
|
68
|
+
await sleep(delay);
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// 不需要重试的错误直接抛出
|
|
75
|
+
throw error;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
throw lastError;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* 执行单次 HTTP 请求
|
|
84
|
+
* @private
|
|
85
|
+
* @param {string} endpoint - API 端点路径
|
|
86
|
+
* @param {Object.<string, any>} params - 查询参数对象
|
|
87
|
+
* @returns {Promise<any>} API 响应的 JSON 数据
|
|
88
|
+
*/
|
|
89
|
+
async _executeRequest(endpoint, params = {}) {
|
|
90
|
+
// 构建 URL
|
|
91
|
+
const url = new URL(endpoint, this.baseUrl);
|
|
92
|
+
|
|
93
|
+
// 添加查询参数
|
|
94
|
+
for (const [key, value] of Object.entries(params)) {
|
|
95
|
+
if (value !== undefined && value !== null) {
|
|
96
|
+
url.searchParams.append(key, String(value));
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// 如果提供了 API 密钥,添加到查询参数
|
|
101
|
+
if (this.apiKey) {
|
|
102
|
+
url.searchParams.append('api_key', this.apiKey);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// 设置请求头
|
|
106
|
+
const headers = {
|
|
107
|
+
'User-Agent': this.userAgent,
|
|
108
|
+
'Accept': 'application/json'
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
try {
|
|
112
|
+
// 发送 GET 请求
|
|
113
|
+
const response = await fetch(url.toString(), {
|
|
114
|
+
method: 'GET',
|
|
115
|
+
headers
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
// 检查 HTTP 状态码
|
|
119
|
+
if (!response.ok) {
|
|
120
|
+
const errorText = await response.text();
|
|
121
|
+
throw new Error(
|
|
122
|
+
`OpenAlex API 请求失败: HTTP ${response.status} ${response.statusText}. ` +
|
|
123
|
+
`详情: ${errorText}`
|
|
124
|
+
);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// 解析并返回 JSON 响应
|
|
128
|
+
const data = await response.json();
|
|
129
|
+
return data;
|
|
130
|
+
|
|
131
|
+
} catch (error) {
|
|
132
|
+
// 如果是网络错误或其他异常,包装并重新抛出
|
|
133
|
+
if (error instanceof Error && error.message.includes('OpenAlex API 请求失败')) {
|
|
134
|
+
throw error;
|
|
135
|
+
}
|
|
136
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
137
|
+
throw new Error(`OpenAlex API 网络请求错误: ${errorMessage}`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* 从错误消息中提取 HTTP 状态码
|
|
143
|
+
* @private
|
|
144
|
+
* @param {string} errorMessage - 错误消息
|
|
145
|
+
* @returns {number|null} HTTP 状态码
|
|
146
|
+
*/
|
|
147
|
+
_extractStatusCode(errorMessage) {
|
|
148
|
+
const match = errorMessage.match(/HTTP (\d{3})/);
|
|
149
|
+
return match ? parseInt(match[1], 10) : null;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* 搜索论文
|
|
154
|
+
* @param {string} query - 搜索关键词(必需)
|
|
155
|
+
* @param {Object} [options={}] - 可选参数
|
|
156
|
+
* @param {number} [options.max_results=20] - 每页结果数(最大200)
|
|
157
|
+
* @param {number} [options.page=1] - 页码
|
|
158
|
+
* @param {string} [options.sort_by] - 排序字段,例如 'cited_by_count:desc'
|
|
159
|
+
* @param {Object} [options.filters] - 过滤器对象
|
|
160
|
+
* @param {number} [options.filters.publication_year] - 发表年份
|
|
161
|
+
* @param {boolean} [options.filters.is_oa] - 是否为开放获取
|
|
162
|
+
* @param {string} [options.filters.type] - 文献类型
|
|
163
|
+
* @returns {Promise<Object>} 搜索结果
|
|
164
|
+
* @example
|
|
165
|
+
* const results = await client.search('machine learning', { max_results: 10, sort_by: 'cited_by_count:desc' });
|
|
166
|
+
*/
|
|
167
|
+
async search(query, options = {}) {
|
|
168
|
+
const {
|
|
169
|
+
max_results = 20,
|
|
170
|
+
page = 1,
|
|
171
|
+
sort_by,
|
|
172
|
+
filters = {}
|
|
173
|
+
} = options;
|
|
174
|
+
|
|
175
|
+
// 构建查询参数
|
|
176
|
+
const params = {};
|
|
177
|
+
|
|
178
|
+
// 添加分页参数
|
|
179
|
+
if (max_results > 0 && max_results <= 200) {
|
|
180
|
+
params['per-page'] = max_results;
|
|
181
|
+
}
|
|
182
|
+
if (page > 1) {
|
|
183
|
+
params.page = page;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// 添加排序
|
|
187
|
+
if (sort_by) {
|
|
188
|
+
params.sort = sort_by;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// 添加过滤器
|
|
192
|
+
const filterParts = [`title.search:${query}`];
|
|
193
|
+
if (filters.publication_year) {
|
|
194
|
+
filterParts.push(`publication_year:${filters.publication_year}`);
|
|
195
|
+
}
|
|
196
|
+
if (filters.is_oa !== undefined) {
|
|
197
|
+
filterParts.push(`is_oa:${filters.is_oa}`);
|
|
198
|
+
}
|
|
199
|
+
if (filters.type) {
|
|
200
|
+
filterParts.push(`type:${filters.type}`);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
params.filter = filterParts.join(',');
|
|
204
|
+
|
|
205
|
+
// 使用 select 参数只获取必要字段
|
|
206
|
+
params.select = 'id,title,authorships,publication_year,primary_location,cited_by_count,open_access,doi,concepts';
|
|
207
|
+
|
|
208
|
+
return await this._request('/works', params);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* 获取单篇论文详情
|
|
213
|
+
* @param {string} workId - 论文 ID(支持 OpenAlex ID、DOI、PMID)
|
|
214
|
+
* @param {Object} [options={}] - 可选参数
|
|
215
|
+
* @param {boolean} [options.include_abstract=true] - 是否包含摘要
|
|
216
|
+
* @param {boolean} [options.include_authors=true] - 是否包含作者信息
|
|
217
|
+
* @param {boolean} [options.include_topics=true] - 是否包含主题信息
|
|
218
|
+
* @returns {Promise<Object>} 论文详情
|
|
219
|
+
* @example
|
|
220
|
+
* const work = await client.getWork('W3128609807');
|
|
221
|
+
* const work2 = await client.getWork('10.1038/nature12373', { include_abstract: true });
|
|
222
|
+
*/
|
|
223
|
+
async getWork(workId, options = {}) {
|
|
224
|
+
const {
|
|
225
|
+
include_abstract = true,
|
|
226
|
+
include_authors = true,
|
|
227
|
+
include_topics = true
|
|
228
|
+
} = options;
|
|
229
|
+
|
|
230
|
+
// 构建 select 参数
|
|
231
|
+
const selectParts = ['id', 'title', 'publication_year', 'type', 'cited_by_count', 'doi', 'pmid', 'primary_location', 'open_access', 'best_oa_location', 'referenced_works', 'concepts'];
|
|
232
|
+
|
|
233
|
+
if (include_abstract) {
|
|
234
|
+
selectParts.push('abstract_inverted_index');
|
|
235
|
+
}
|
|
236
|
+
if (include_authors) {
|
|
237
|
+
selectParts.push('authorships');
|
|
238
|
+
}
|
|
239
|
+
if (include_topics) {
|
|
240
|
+
selectParts.push('topics');
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const params = {
|
|
244
|
+
select: selectParts.join(',')
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
// 判断 ID 类型并构建相应的请求
|
|
248
|
+
let endpoint;
|
|
249
|
+
if (workId.startsWith('10.')) {
|
|
250
|
+
// DOI 格式
|
|
251
|
+
endpoint = `/works/https://doi.org/${workId}`;
|
|
252
|
+
} else if (workId.match(/^\d+$/) && workId.length <= 8) {
|
|
253
|
+
// PMID 格式(纯数字且不超过8位)
|
|
254
|
+
endpoint = `/works/https://pubmed.ncbi.nlm.nih.gov/${workId}`;
|
|
255
|
+
} else {
|
|
256
|
+
// OpenAlex ID 格式(W123 或完整 URL)
|
|
257
|
+
endpoint = `/works/${workId}`;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return await this._request(endpoint, params);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* 批量获取多篇论文信息
|
|
265
|
+
* @param {string[]} workIds - 论文 ID 数组(最多 50 个)
|
|
266
|
+
* @param {Object} [options={}] - 可选参数
|
|
267
|
+
* @param {boolean} [options.include_abstract=false] - 是否包含摘要
|
|
268
|
+
* @param {boolean} [options.include_authors=true] - 是否包含作者信息
|
|
269
|
+
* @returns {Promise<Object>} 批量查询结果
|
|
270
|
+
* @example
|
|
271
|
+
* const results = await client.batchGetWorks(['W3128609807', 'W2741809807']);
|
|
272
|
+
*/
|
|
273
|
+
async batchGetWorks(workIds, options = {}) {
|
|
274
|
+
if (!Array.isArray(workIds) || workIds.length === 0) {
|
|
275
|
+
throw new Error('workIds 必须是非空数组');
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
if (workIds.length > 50) {
|
|
279
|
+
throw new Error('批量查询最多支持 50 个论文 ID');
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
const {
|
|
283
|
+
include_abstract = false,
|
|
284
|
+
include_authors = true
|
|
285
|
+
} = options;
|
|
286
|
+
|
|
287
|
+
// 构建 select 参数
|
|
288
|
+
const selectParts = ['id', 'title', 'publication_year', 'type', 'cited_by_count', 'doi', 'pmid', 'primary_location', 'open_access', 'concepts'];
|
|
289
|
+
|
|
290
|
+
if (include_abstract) {
|
|
291
|
+
selectParts.push('abstract_inverted_index');
|
|
292
|
+
}
|
|
293
|
+
if (include_authors) {
|
|
294
|
+
selectParts.push('authorships');
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
const params = {
|
|
298
|
+
select: selectParts.join(','),
|
|
299
|
+
'per-page': Math.min(workIds.length, 200),
|
|
300
|
+
filter: `openalex:${workIds.join('|')}`
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
return await this._request('/works', params);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
declare module 'pdf-parse' {
|
|
2
|
+
interface PDFParseData {
|
|
3
|
+
text: string;
|
|
4
|
+
numpages: number;
|
|
5
|
+
numrender: number;
|
|
6
|
+
info: any;
|
|
7
|
+
metadata: any;
|
|
8
|
+
version: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function pdfParse(buffer: Buffer): Promise<PDFParseData>;
|
|
12
|
+
export = pdfParse;
|
|
13
|
+
}
|
package/src/utils.js
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 工具函数模块
|
|
3
|
+
* 提供 OpenAlex MCP 服务器所需的通用工具函数
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* 从 OpenAlex 的 abstract_inverted_index 重建完整摘要文本
|
|
8
|
+
* @param {Object.<string, number[]>} invertedIndex - 倒排索引对象,键为单词,值为位置数组
|
|
9
|
+
* @returns {string} 重建的完整摘要文本
|
|
10
|
+
* @example
|
|
11
|
+
* const invertedIndex = {
|
|
12
|
+
* "This": [0],
|
|
13
|
+
* "is": [1],
|
|
14
|
+
* "a": [2],
|
|
15
|
+
* "test": [3]
|
|
16
|
+
* };
|
|
17
|
+
* rebuildAbstract(invertedIndex); // "This is a test"
|
|
18
|
+
*/
|
|
19
|
+
export function rebuildAbstract(invertedIndex) {
|
|
20
|
+
if (!invertedIndex || typeof invertedIndex !== 'object') {
|
|
21
|
+
return '';
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// 创建一个数组来存储按位置排列的单词
|
|
25
|
+
const words = [];
|
|
26
|
+
|
|
27
|
+
// 遍历倒排索引,将每个单词放到对应的位置
|
|
28
|
+
for (const [word, positions] of Object.entries(invertedIndex)) {
|
|
29
|
+
for (const position of positions) {
|
|
30
|
+
words[position] = word;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// 过滤掉空位置并用空格连接
|
|
35
|
+
return words.filter(word => word !== undefined).join(' ');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* 将 OpenAlex 的完整 URL 简化为 ID
|
|
40
|
+
* @param {string} url - OpenAlex URL,例如 "https://openalex.org/W123456"
|
|
41
|
+
* @returns {string} 简化后的 ID,例如 "W123456"
|
|
42
|
+
* @example
|
|
43
|
+
* simplifyOpenAlexId("https://openalex.org/W123456"); // "W123456"
|
|
44
|
+
* simplifyOpenAlexId("W123456"); // "W123456"
|
|
45
|
+
*/
|
|
46
|
+
export function simplifyOpenAlexId(url) {
|
|
47
|
+
if (!url || typeof url !== 'string') {
|
|
48
|
+
return '';
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// 如果已经是简化格式,直接返回
|
|
52
|
+
if (!url.includes('/')) {
|
|
53
|
+
return url;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// 提取 URL 最后一部分作为 ID
|
|
57
|
+
const parts = url.split('/');
|
|
58
|
+
return parts[parts.length - 1];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* 异步延迟函数
|
|
63
|
+
* @param {number} ms - 延迟的毫秒数
|
|
64
|
+
* @returns {Promise<void>} 延迟后解决的 Promise
|
|
65
|
+
* @example
|
|
66
|
+
* await sleep(1000); // 延迟 1 秒
|
|
67
|
+
*/
|
|
68
|
+
export function sleep(ms) {
|
|
69
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* 验证 OpenAlex Work ID 格式
|
|
74
|
+
* @param {string} id - 要验证的 Work ID
|
|
75
|
+
* @returns {boolean} ID 格式是否有效
|
|
76
|
+
* @example
|
|
77
|
+
* validateWorkId("W123456789"); // true
|
|
78
|
+
* validateWorkId("W123"); // true
|
|
79
|
+
* validateWorkId("A123"); // false
|
|
80
|
+
* validateWorkId("123"); // false
|
|
81
|
+
*/
|
|
82
|
+
export function validateWorkId(id) {
|
|
83
|
+
if (!id || typeof id !== 'string') {
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// OpenAlex Work ID 格式:以 'W' 开头,后跟数字
|
|
88
|
+
const workIdPattern = /^W\d+$/;
|
|
89
|
+
return workIdPattern.test(id);
|
|
90
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { test } from 'node:test';
|
|
2
|
+
import assert from 'node:assert/strict';
|
|
3
|
+
import { spawnSync } from 'node:child_process';
|
|
4
|
+
import path from 'node:path';
|
|
5
|
+
import { fileURLToPath } from 'node:url';
|
|
6
|
+
import { createRequire } from 'node:module';
|
|
7
|
+
|
|
8
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
9
|
+
const __dirname = path.dirname(__filename);
|
|
10
|
+
const rootDir = path.resolve(__dirname, '..');
|
|
11
|
+
const require = createRequire(import.meta.url);
|
|
12
|
+
const pkg = require(path.join(rootDir, 'package.json'));
|
|
13
|
+
|
|
14
|
+
function runCli(args) {
|
|
15
|
+
const cliPath = path.join(rootDir, 'src', 'cli.js');
|
|
16
|
+
return spawnSync(process.execPath, [cliPath, ...args], {
|
|
17
|
+
encoding: 'utf-8'
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
test('cli --help exits 0', () => {
|
|
22
|
+
const result = runCli(['--help']);
|
|
23
|
+
assert.equal(result.status, 0);
|
|
24
|
+
assert.match(result.stdout, /OpenAlex MCP Server/i);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
test('cli --version prints package version', () => {
|
|
28
|
+
const result = runCli(['--version']);
|
|
29
|
+
assert.equal(result.status, 0);
|
|
30
|
+
assert.equal(result.stdout.trim(), pkg.version);
|
|
31
|
+
});
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "ES2022",
|
|
5
|
+
"moduleResolution": "node",
|
|
6
|
+
"lib": ["ES2022"],
|
|
7
|
+
"outDir": "./dist",
|
|
8
|
+
"rootDir": "./src",
|
|
9
|
+
"strict": false,
|
|
10
|
+
"esModuleInterop": true,
|
|
11
|
+
"skipLibCheck": true,
|
|
12
|
+
"forceConsistentCasingInFileNames": false,
|
|
13
|
+
"resolveJsonModule": true,
|
|
14
|
+
"allowSyntheticDefaultImports": true,
|
|
15
|
+
"noEmit": true,
|
|
16
|
+
"checkJs": false,
|
|
17
|
+
"allowJs": true,
|
|
18
|
+
"noImplicitAny": false
|
|
19
|
+
},
|
|
20
|
+
"include": ["src/**/*"],
|
|
21
|
+
"exclude": ["node_modules", "dist"]
|
|
22
|
+
}
|