paper-search-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +165 -0
- package/LICENSE +21 -0
- package/README-sc.md +642 -0
- package/README.md +642 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +637 -0
- package/dist/cli.js.map +1 -0
- package/dist/config/ConfigService.d.ts +26 -0
- package/dist/config/ConfigService.d.ts.map +1 -0
- package/dist/config/ConfigService.js +145 -0
- package/dist/config/ConfigService.js.map +1 -0
- package/dist/config/constants.d.ts +140 -0
- package/dist/config/constants.d.ts.map +1 -0
- package/dist/config/constants.js +93 -0
- package/dist/config/constants.js.map +1 -0
- package/dist/core/diagnostics.d.ts +43 -0
- package/dist/core/diagnostics.d.ts.map +1 -0
- package/dist/core/diagnostics.js +544 -0
- package/dist/core/diagnostics.js.map +1 -0
- package/dist/core/handleToolCall.d.ts +8 -0
- package/dist/core/handleToolCall.d.ts.map +1 -0
- package/dist/core/handleToolCall.js +440 -0
- package/dist/core/handleToolCall.js.map +1 -0
- package/dist/core/schemas.d.ts +454 -0
- package/dist/core/schemas.d.ts.map +1 -0
- package/dist/core/schemas.js +322 -0
- package/dist/core/schemas.js.map +1 -0
- package/dist/core/searchers.d.ts +45 -0
- package/dist/core/searchers.d.ts.map +1 -0
- package/dist/core/searchers.js +73 -0
- package/dist/core/searchers.js.map +1 -0
- package/dist/core/tools.d.ts +7 -0
- package/dist/core/tools.d.ts.map +1 -0
- package/dist/core/tools.js +640 -0
- package/dist/core/tools.js.map +1 -0
- package/dist/models/Paper.d.ts +64 -0
- package/dist/models/Paper.d.ts.map +1 -0
- package/dist/models/Paper.js +70 -0
- package/dist/models/Paper.js.map +1 -0
- package/dist/platforms/ArxivSearcher.d.ts +64 -0
- package/dist/platforms/ArxivSearcher.d.ts.map +1 -0
- package/dist/platforms/ArxivSearcher.js +531 -0
- package/dist/platforms/ArxivSearcher.js.map +1 -0
- package/dist/platforms/BioRxivSearcher.d.ts +47 -0
- package/dist/platforms/BioRxivSearcher.d.ts.map +1 -0
- package/dist/platforms/BioRxivSearcher.js +196 -0
- package/dist/platforms/BioRxivSearcher.js.map +1 -0
- package/dist/platforms/CORESearcher.d.ts +16 -0
- package/dist/platforms/CORESearcher.d.ts.map +1 -0
- package/dist/platforms/CORESearcher.js +148 -0
- package/dist/platforms/CORESearcher.js.map +1 -0
- package/dist/platforms/CrossrefSearcher.d.ts +34 -0
- package/dist/platforms/CrossrefSearcher.d.ts.map +1 -0
- package/dist/platforms/CrossrefSearcher.js +339 -0
- package/dist/platforms/CrossrefSearcher.js.map +1 -0
- package/dist/platforms/EuropePMCSearcher.d.ts +20 -0
- package/dist/platforms/EuropePMCSearcher.d.ts.map +1 -0
- package/dist/platforms/EuropePMCSearcher.js +173 -0
- package/dist/platforms/EuropePMCSearcher.js.map +1 -0
- package/dist/platforms/GoogleScholarSearcher.d.ts +77 -0
- package/dist/platforms/GoogleScholarSearcher.d.ts.map +1 -0
- package/dist/platforms/GoogleScholarSearcher.js +262 -0
- package/dist/platforms/GoogleScholarSearcher.js.map +1 -0
- package/dist/platforms/IACRSearcher.d.ts +51 -0
- package/dist/platforms/IACRSearcher.d.ts.map +1 -0
- package/dist/platforms/IACRSearcher.js +339 -0
- package/dist/platforms/IACRSearcher.js.map +1 -0
- package/dist/platforms/OpenAIRESearcher.d.ts +22 -0
- package/dist/platforms/OpenAIRESearcher.d.ts.map +1 -0
- package/dist/platforms/OpenAIRESearcher.js +223 -0
- package/dist/platforms/OpenAIRESearcher.js.map +1 -0
- package/dist/platforms/OpenAlexSearcher.d.ts +14 -0
- package/dist/platforms/OpenAlexSearcher.d.ts.map +1 -0
- package/dist/platforms/OpenAlexSearcher.js +114 -0
- package/dist/platforms/OpenAlexSearcher.js.map +1 -0
- package/dist/platforms/PMCSearcher.d.ts +20 -0
- package/dist/platforms/PMCSearcher.d.ts.map +1 -0
- package/dist/platforms/PMCSearcher.js +177 -0
- package/dist/platforms/PMCSearcher.js.map +1 -0
- package/dist/platforms/PaperSource.d.ts +143 -0
- package/dist/platforms/PaperSource.d.ts.map +1 -0
- package/dist/platforms/PaperSource.js +125 -0
- package/dist/platforms/PaperSource.js.map +1 -0
- package/dist/platforms/PubMedSearcher.d.ts +104 -0
- package/dist/platforms/PubMedSearcher.d.ts.map +1 -0
- package/dist/platforms/PubMedSearcher.js +422 -0
- package/dist/platforms/PubMedSearcher.js.map +1 -0
- package/dist/platforms/SciHubSearcher.d.ts +66 -0
- package/dist/platforms/SciHubSearcher.d.ts.map +1 -0
- package/dist/platforms/SciHubSearcher.js +398 -0
- package/dist/platforms/SciHubSearcher.js.map +1 -0
- package/dist/platforms/ScienceDirectSearcher.d.ts +42 -0
- package/dist/platforms/ScienceDirectSearcher.d.ts.map +1 -0
- package/dist/platforms/ScienceDirectSearcher.js +326 -0
- package/dist/platforms/ScienceDirectSearcher.js.map +1 -0
- package/dist/platforms/ScopusSearcher.d.ts +43 -0
- package/dist/platforms/ScopusSearcher.d.ts.map +1 -0
- package/dist/platforms/ScopusSearcher.js +364 -0
- package/dist/platforms/ScopusSearcher.js.map +1 -0
- package/dist/platforms/SemanticScholarSearcher.d.ts +96 -0
- package/dist/platforms/SemanticScholarSearcher.d.ts.map +1 -0
- package/dist/platforms/SemanticScholarSearcher.js +419 -0
- package/dist/platforms/SemanticScholarSearcher.js.map +1 -0
- package/dist/platforms/SpringerSearcher.d.ts +54 -0
- package/dist/platforms/SpringerSearcher.d.ts.map +1 -0
- package/dist/platforms/SpringerSearcher.js +407 -0
- package/dist/platforms/SpringerSearcher.js.map +1 -0
- package/dist/platforms/UnpaywallSearcher.d.ts +18 -0
- package/dist/platforms/UnpaywallSearcher.d.ts.map +1 -0
- package/dist/platforms/UnpaywallSearcher.js +115 -0
- package/dist/platforms/UnpaywallSearcher.js.map +1 -0
- package/dist/platforms/WebOfScienceSearcher.d.ts +111 -0
- package/dist/platforms/WebOfScienceSearcher.d.ts.map +1 -0
- package/dist/platforms/WebOfScienceSearcher.js +500 -0
- package/dist/platforms/WebOfScienceSearcher.js.map +1 -0
- package/dist/platforms/WileySearcher.d.ts +44 -0
- package/dist/platforms/WileySearcher.d.ts.map +1 -0
- package/dist/platforms/WileySearcher.js +148 -0
- package/dist/platforms/WileySearcher.js.map +1 -0
- package/dist/services/CitationService.d.ts +66 -0
- package/dist/services/CitationService.d.ts.map +1 -0
- package/dist/services/CitationService.js +237 -0
- package/dist/services/CitationService.js.map +1 -0
- package/dist/services/MultiSourceSearchService.d.ts +19 -0
- package/dist/services/MultiSourceSearchService.d.ts.map +1 -0
- package/dist/services/MultiSourceSearchService.js +96 -0
- package/dist/services/MultiSourceSearchService.js.map +1 -0
- package/dist/services/OpenAccessFallbackService.d.ts +20 -0
- package/dist/services/OpenAccessFallbackService.d.ts.map +1 -0
- package/dist/services/OpenAccessFallbackService.js +124 -0
- package/dist/services/OpenAccessFallbackService.js.map +1 -0
- package/dist/utils/ErrorHandler.d.ts +99 -0
- package/dist/utils/ErrorHandler.d.ts.map +1 -0
- package/dist/utils/ErrorHandler.js +266 -0
- package/dist/utils/ErrorHandler.js.map +1 -0
- package/dist/utils/Logger.d.ts +6 -0
- package/dist/utils/Logger.d.ts.map +1 -0
- package/dist/utils/Logger.js +26 -0
- package/dist/utils/Logger.js.map +1 -0
- package/dist/utils/PDFExtractor.d.ts +34 -0
- package/dist/utils/PDFExtractor.d.ts.map +1 -0
- package/dist/utils/PDFExtractor.js +130 -0
- package/dist/utils/PDFExtractor.js.map +1 -0
- package/dist/utils/PdfDownload.d.ts +7 -0
- package/dist/utils/PdfDownload.d.ts.map +1 -0
- package/dist/utils/PdfDownload.js +52 -0
- package/dist/utils/PdfDownload.js.map +1 -0
- package/dist/utils/QuotaManager.d.ts +32 -0
- package/dist/utils/QuotaManager.d.ts.map +1 -0
- package/dist/utils/QuotaManager.js +95 -0
- package/dist/utils/QuotaManager.js.map +1 -0
- package/dist/utils/RateLimiter.d.ts +50 -0
- package/dist/utils/RateLimiter.d.ts.map +1 -0
- package/dist/utils/RateLimiter.js +121 -0
- package/dist/utils/RateLimiter.js.map +1 -0
- package/dist/utils/RequestCache.d.ts +26 -0
- package/dist/utils/RequestCache.d.ts.map +1 -0
- package/dist/utils/RequestCache.js +66 -0
- package/dist/utils/RequestCache.js.map +1 -0
- package/dist/utils/SecurityUtils.d.ts +80 -0
- package/dist/utils/SecurityUtils.d.ts.map +1 -0
- package/dist/utils/SecurityUtils.js +357 -0
- package/dist/utils/SecurityUtils.js.map +1 -0
- package/package.json +111 -0
- package/skills/paper-search/SKILL.md +192 -0
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* bioRxiv API集成模块
|
|
3
|
+
* 支持bioRxiv和medRxiv预印本论文搜索
|
|
4
|
+
*/
|
|
5
|
+
import axios from 'axios';
|
|
6
|
+
import * as fs from 'fs';
|
|
7
|
+
import * as path from 'path';
|
|
8
|
+
import { PaperFactory } from '../models/Paper.js';
|
|
9
|
+
import { PaperSource } from './PaperSource.js';
|
|
10
|
+
import { TIMEOUTS, USER_AGENT } from '../config/constants.js';
|
|
11
|
+
import { logDebug } from '../utils/Logger.js';
|
|
12
|
+
import { RateLimiter } from '../utils/RateLimiter.js';
|
|
13
|
+
import { ErrorHandler } from '../utils/ErrorHandler.js';
|
|
14
|
+
import { downloadPdfFromUrl, safeFilename } from '../utils/PdfDownload.js';
|
|
15
|
+
export class BioRxivSearcher extends PaperSource {
|
|
16
|
+
serverType;
|
|
17
|
+
rateLimiter;
|
|
18
|
+
constructor(serverType = 'biorxiv') {
|
|
19
|
+
super(serverType, `https://api.biorxiv.org/details/${serverType}`);
|
|
20
|
+
this.serverType = serverType;
|
|
21
|
+
// bioRxiv rate limit: 1 req/s, burst=2
|
|
22
|
+
this.rateLimiter = new RateLimiter({
|
|
23
|
+
requestsPerSecond: 1,
|
|
24
|
+
burstCapacity: 2
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
getCapabilities() {
|
|
28
|
+
return {
|
|
29
|
+
search: true,
|
|
30
|
+
download: true,
|
|
31
|
+
fullText: true,
|
|
32
|
+
citations: false,
|
|
33
|
+
requiresApiKey: false,
|
|
34
|
+
supportedOptions: ['maxResults', 'days', 'category']
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* 搜索bioRxiv/medRxiv论文
|
|
39
|
+
*/
|
|
40
|
+
async search(query, options = {}) {
|
|
41
|
+
try {
|
|
42
|
+
// 计算日期范围
|
|
43
|
+
const days = options.days || 30;
|
|
44
|
+
const endDate = new Date().toISOString().split('T')[0];
|
|
45
|
+
const startDate = new Date(Date.now() - days * 24 * 60 * 60 * 1000).toISOString().split('T')[0];
|
|
46
|
+
// 构建搜索URL
|
|
47
|
+
const searchUrl = `${this.baseUrl}/${startDate}/${endDate}`;
|
|
48
|
+
const params = {
|
|
49
|
+
cursor: 0
|
|
50
|
+
};
|
|
51
|
+
// 添加分类过滤
|
|
52
|
+
if (query && query !== '*') {
|
|
53
|
+
// 将查询转换为分类格式
|
|
54
|
+
const category = query.toLowerCase().replace(/\s+/g, '_');
|
|
55
|
+
params.category = category;
|
|
56
|
+
}
|
|
57
|
+
logDebug(`${this.serverType} API Request: GET ${searchUrl}`);
|
|
58
|
+
logDebug(`${this.serverType} Request params:`, params);
|
|
59
|
+
await this.rateLimiter.waitForPermission();
|
|
60
|
+
const response = await ErrorHandler.retryWithBackoff(() => axios.get(searchUrl, {
|
|
61
|
+
params,
|
|
62
|
+
timeout: TIMEOUTS.DEFAULT,
|
|
63
|
+
headers: { 'User-Agent': USER_AGENT }
|
|
64
|
+
}), { context: `${this.serverType} search` });
|
|
65
|
+
logDebug(`${this.serverType} API Response: ${response.status} ${response.statusText}`);
|
|
66
|
+
const papers = this.parseSearchResponse(response.data, query, options);
|
|
67
|
+
logDebug(`${this.serverType} Parsed ${papers.length} papers`);
|
|
68
|
+
return papers.slice(0, options.maxResults || 10);
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
logDebug(`${this.serverType} Search Error:`, error.message);
|
|
72
|
+
this.handleHttpError(error, 'search');
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* 下载PDF文件
|
|
77
|
+
*/
|
|
78
|
+
async downloadPdf(paperId, options = {}) {
|
|
79
|
+
const savePath = options.savePath || './downloads';
|
|
80
|
+
const candidates = this.pdfUrlCandidates(paperId);
|
|
81
|
+
let lastError;
|
|
82
|
+
await this.rateLimiter.waitForPermission();
|
|
83
|
+
for (const pdfUrl of candidates) {
|
|
84
|
+
try {
|
|
85
|
+
return await downloadPdfFromUrl(pdfUrl, savePath, `${this.serverType}_${safeFilename(paperId)}`, {
|
|
86
|
+
headers: {
|
|
87
|
+
Referer: `https://www.${this.serverType}.org/`,
|
|
88
|
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
|
|
89
|
+
}
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
catch (error) {
|
|
93
|
+
lastError = error;
|
|
94
|
+
logDebug(`${this.serverType} PDF candidate failed: ${pdfUrl}`, error?.message || error);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
throw new Error(`${this.serverType} PDF download failed for ${paperId}. ${lastError?.message || String(lastError)}`);
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* 读取论文全文内容
|
|
101
|
+
*/
|
|
102
|
+
async readPaper(paperId, options = {}) {
|
|
103
|
+
try {
|
|
104
|
+
const savePath = options.savePath || './downloads';
|
|
105
|
+
const filePath = path.join(savePath, `${paperId.replace(/\//g, '_')}.pdf`);
|
|
106
|
+
// 如果PDF不存在,先下载
|
|
107
|
+
if (!fs.existsSync(filePath)) {
|
|
108
|
+
await this.downloadPdf(paperId, options);
|
|
109
|
+
}
|
|
110
|
+
return `PDF file downloaded at: ${filePath}. Full text extraction requires additional PDF parsing implementation.`;
|
|
111
|
+
}
|
|
112
|
+
catch (error) {
|
|
113
|
+
this.handleHttpError(error, 'read paper');
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* 解析搜索响应
|
|
118
|
+
*/
|
|
119
|
+
parseSearchResponse(data, query, options) {
|
|
120
|
+
if (!data.collection || !Array.isArray(data.collection)) {
|
|
121
|
+
return [];
|
|
122
|
+
}
|
|
123
|
+
// 如果有查询词,进行文本匹配过滤
|
|
124
|
+
let filteredCollection = data.collection;
|
|
125
|
+
if (query && query !== '*' && query.trim()) {
|
|
126
|
+
const queryLower = query.toLowerCase();
|
|
127
|
+
filteredCollection = data.collection.filter(item => item.title.toLowerCase().includes(queryLower) ||
|
|
128
|
+
item.abstract.toLowerCase().includes(queryLower) ||
|
|
129
|
+
item.authors.toLowerCase().includes(queryLower) ||
|
|
130
|
+
item.category.toLowerCase().includes(queryLower));
|
|
131
|
+
}
|
|
132
|
+
return filteredCollection.map(item => this.parseBioRxivPaper(item))
|
|
133
|
+
.filter(paper => paper !== null);
|
|
134
|
+
}
|
|
135
|
+
pdfUrlCandidates(paperId) {
|
|
136
|
+
const clean = paperId.trim();
|
|
137
|
+
if (/v\d+$/i.test(clean)) {
|
|
138
|
+
return [`https://www.${this.serverType}.org/content/${clean}.full.pdf`];
|
|
139
|
+
}
|
|
140
|
+
return [
|
|
141
|
+
`https://www.${this.serverType}.org/content/${clean}.full.pdf`,
|
|
142
|
+
`https://www.${this.serverType}.org/content/${clean}v1.full.pdf`
|
|
143
|
+
];
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* 解析单个bioRxiv论文
|
|
147
|
+
*/
|
|
148
|
+
parseBioRxivPaper(item) {
|
|
149
|
+
try {
|
|
150
|
+
// 解析作者
|
|
151
|
+
const authors = item.authors.split(';').map(author => author.trim());
|
|
152
|
+
// 解析日期
|
|
153
|
+
const publishedDate = this.parseDate(item.date);
|
|
154
|
+
const year = publishedDate?.getFullYear();
|
|
155
|
+
// 构建URL
|
|
156
|
+
const paperUrl = `https://www.${this.serverType}.org/content/${item.doi}v${item.version}`;
|
|
157
|
+
const pdfUrl = `https://www.${this.serverType}.org/content/${item.doi}v${item.version}.full.pdf`;
|
|
158
|
+
return PaperFactory.create({
|
|
159
|
+
paperId: item.doi,
|
|
160
|
+
title: this.cleanText(item.title),
|
|
161
|
+
authors: authors,
|
|
162
|
+
abstract: this.cleanText(item.abstract),
|
|
163
|
+
doi: item.doi,
|
|
164
|
+
publishedDate: publishedDate,
|
|
165
|
+
pdfUrl: pdfUrl,
|
|
166
|
+
url: paperUrl,
|
|
167
|
+
source: this.serverType,
|
|
168
|
+
categories: [item.category],
|
|
169
|
+
keywords: [],
|
|
170
|
+
citationCount: 0,
|
|
171
|
+
year: year,
|
|
172
|
+
extra: {
|
|
173
|
+
version: item.version,
|
|
174
|
+
type: item.type,
|
|
175
|
+
license: item.license,
|
|
176
|
+
server: item.server,
|
|
177
|
+
corresponding_author: item.author_corresponding,
|
|
178
|
+
corresponding_institution: item.author_corresponding_institution
|
|
179
|
+
}
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
catch (error) {
|
|
183
|
+
logDebug(`Error parsing ${this.serverType} paper:`, error);
|
|
184
|
+
return null;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* medRxiv搜索器 - 继承自BioRxivSearcher
|
|
190
|
+
*/
|
|
191
|
+
export class MedRxivSearcher extends BioRxivSearcher {
|
|
192
|
+
constructor() {
|
|
193
|
+
super('medrxiv');
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
//# sourceMappingURL=BioRxivSearcher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"BioRxivSearcher.js","sourceRoot":"","sources":["../../src/platforms/BioRxivSearcher.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAS,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAwD,MAAM,kBAAkB,CAAC;AACrG,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAkC3E,MAAM,OAAO,eAAgB,SAAQ,WAAW;IAC7B,UAAU,CAAwB;IAClC,WAAW,CAAc;IAE1C,YAAY,aAAoC,SAAS;QACvD,KAAK,CAAC,UAAU,EAAE,mCAAmC,UAAU,EAAE,CAAC,CAAC;QACnE,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,uCAAuC;QACvC,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC;YACjC,iBAAiB,EAAE,CAAC;YACpB,aAAa,EAAE,CAAC;SACjB,CAAC,CAAC;IACL,CAAC;IAED,eAAe;QACb,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,IAAI;YACd,QAAQ,EAAE,IAAI;YACd,SAAS,EAAE,KAAK;YAChB,cAAc,EAAE,KAAK;YACrB,gBAAgB,EAAE,CAAC,YAAY,EAAE,MAAM,EAAE,UAAU,CAAC;SACrD,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAAgC,EAAE;QAC5D,IAAI,CAAC;YACH,SAAS;YACT,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,EAAE,CAAC;YAChC,MAAM,OAAO,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YACvD,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAEhG,UAAU;YACV,MAAM,SAAS,GAAG,GAAG,IAAI,CAAC,OAAO,IAAI,SAAS,IAAI,OAAO,EAAE,CAAC;YAE5D,MAAM,MAAM,GAAwB;gBAClC,MAAM,EAAE,CAAC;aACV,CAAC;YAEF,SAAS;YACT,IAAI,KAAK,IAAI,KAAK,KAAK,GAAG,EAAE,CAAC;gBAC3B,aAAa;gBACb,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;gBAC1D,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC;YAC7B,CAAC;YAED,QAAQ,CAAC,GAAG,IAAI,CAAC,UAAU,qBAAqB,SAAS,EAAE,CAAC,CAAC;YAC7D,QAAQ,CAAC,GAAG,IAAI,CAAC,UAAU,kBAAkB,EAAE,MAAM,CAAC,CAAC;YAEvD,MAAM,IAAI,CAAC,WAAW,CAAC,iBAAiB,EAAE,CAAC;YAE3C,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,gBAAgB,CAClD,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,EAAE;gBACzB,MAAM;gBACN,OAAO,EAAE,QAAQ,CAAC,OAAO;gBACzB,OAAO,EAAE,EAAE,YAAY,EAAE,UAAU,EAAE;aACtC,CAAC,EACF,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC,UAAU,SAAS,EAAE,CACzC,CAAC;YAEF,QAAQ,CAAC,GAAG,IAAI,CAAC,UAAU,kBAAkB,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YAEvF,MAAM,MAAM,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;YACvE,QAAQ,CAAC,GAAG,IAAI,CAAC,UAAU,WAAW,MAAM,CAAC,MAAM,SAAS,CAAC,CAAC;YAE9D,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC;QACnD,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,QAAQ,CAAC,GAAG,IAAI,CAAC,UAAU,gBAAgB,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;YAC5D,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CAAC,OAAe,EAAE,UAA2B,EAAE;QAC9D,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,aAAa,CAAC;QACnD,MAAM,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;QAClD,IAAI,SAAc,CAAC;QAEnB,MAAM,IAAI,CAAC,WAAW,CAAC,iBAAiB,EAAE,CAAC;QAE3C,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;YAChC,IAAI,CAAC;gBACH,OAAO,MAAM,kBAAkB,CAC7B,MAAM,EACN,QAAQ,EACR,GAAG,IAAI,CAAC,UAAU,IAAI,YAAY,CAAC,OAAO,CAAC,EAAE,EAC7C;oBACE,OAAO,EAAE;wBACP,OAAO,EAAE,eAAe,IAAI,CAAC,UAAU,OAAO;wBAC9C,YAAY,EACV,oEAAoE;qBACvE;iBACF,CACF,CAAC;YACJ,CAAC;YAAC,OAAO,KAAU,EAAE,CAAC;gBACpB,SAAS,GAAG,KAAK,CAAC;gBAClB,QAAQ,CAAC,GAAG,IAAI,CAAC,UAAU,0BAA0B,MAAM,EAAE,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,CAAC;YAC1F,CAAC;QACH,CAAC;QAED,MAAM,IAAI,KAAK,CACb,GAAG,IAAI,CAAC,UAAU,4BAA4B,OAAO,KAAK,SAAS,EAAE,OAAO,IAAI,MAAM,CAAC,SAAS,CAAC,EAAE,CACpG,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,OAAe,EAAE,UAA2B,EAAE;QAC5D,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,aAAa,CAAC;YACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;YAE3E,eAAe;YACf,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC7B,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YAC3C,CAAC;YAED,OAAO,2BAA2B,QAAQ,wEAAwE,CAAC;QACrH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,IAAqB,EAAE,KAAa,EAAE,OAA6B;QAC7F,IAAI,CAAC,IAAI,CAAC,UAAU,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YACxD,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,kBAAkB;QAClB,IAAI,kBAAkB,GAAG,IAAI,CAAC,UAAU,CAAC;QACzC,IAAI,KAAK,IAAI,KAAK,KAAK,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC;YAC3C,MAAM,UAAU,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;YACvC,kBAAkB,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CACjD,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC;gBAC7C,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC;gBAChD,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC;gBAC/C,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,CACjD,CAAC;QACJ,CAAC;QAED,OAAO,kBAAkB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;aAChE,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,KAAK,IAAI,CAAY,CAAC;IAChD,CAAC;IAEO,gBAAgB,CAAC,OAAe;QACtC,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO,CAAC,eAAe,IAAI,CAAC,UAAU,gBAAgB,KAAK,WAAW,CAAC,CAAC;QAC1E,CAAC;QACD,OAAO;YACL,eAAe,IAAI,CAAC,UAAU,gBAAgB,KAAK,WAAW;YAC9D,eAAe,IAAI,CAAC,UAAU,gBAAgB,KAAK,aAAa;SACjE,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,IAAkB;QAC1C,IAAI,CAAC;YACH,OAAO;YACP,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;YAErE,OAAO;YACP,MAAM,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAChD,MAAM,IAAI,GAAG,aAAa,EAAE,WAAW,EAAE,CAAC;YAE1C,QAAQ;YACR,MAAM,QAAQ,GAAG,eAAe,IAAI,CAAC,UAAU,gBAAgB,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAC1F,MAAM,MAAM,GAAG,eAAe,IAAI,CAAC,UAAU,gBAAgB,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,OAAO,WAAW,CAAC;YAEjG,OAAO,YAAY,CAAC,MAAM,CAAC;gBACzB,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC;gBACjC,OAAO,EAAE,OAAO;gBAChB,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC;gBACvC,GAAG,EAAE,IAAI,CAAC,GAAG;gBACb,aAAa,EAAE,aAAa;gBAC5B,MAAM,EAAE,MAAM;gBACd,GAAG,EAAE,QAAQ;gBACb,MAAM,EAAE,IAAI,CAAC,UAAU;gBACvB,UAAU,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC;gBAC3B,QAAQ,EAAE,EAAE;gBACZ,aAAa,EAAE,CAAC;gBAChB,IAAI,EAAE,IAAI;gBACV,KAAK,EAAE;oBACL,OAAO,EAAE,IAAI,CAAC,OAAO;oBACrB,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,OAAO,EAAE,IAAI,CAAC,OAAO;oBACrB,MAAM,EAAE,IAAI,CAAC,MAAM;oBACnB,oBAAoB,EAAE,IAAI,CAAC,oBAAoB;oBAC/C,yBAAyB,EAAE,IAAI,CAAC,gCAAgC;iBACjE;aACF,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,QAAQ,CAAC,iBAAiB,IAAI,CAAC,UAAU,SAAS,EAAE,KAAK,CAAC,CAAC;YAC3D,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;CACF;AAED;;GAEG;AACH,MAAM,OAAO,eAAgB,SAAQ,eAAe;IAClD;QACE,KAAK,CAAC,SAAS,CAAC,CAAC;IACnB,CAAC;CACF"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { Paper } from '../models/Paper.js';
|
|
2
|
+
import { PaperSource, SearchOptions, DownloadOptions, PlatformCapabilities } from './PaperSource.js';
|
|
3
|
+
export declare class CORESearcher extends PaperSource {
|
|
4
|
+
private readonly client;
|
|
5
|
+
constructor(apiKey?: string);
|
|
6
|
+
getCapabilities(): PlatformCapabilities;
|
|
7
|
+
search(query: string, options?: SearchOptions): Promise<Paper[]>;
|
|
8
|
+
downloadPdf(paperId: string, options?: DownloadOptions): Promise<string>;
|
|
9
|
+
readPaper(paperId: string, options?: DownloadOptions): Promise<string>;
|
|
10
|
+
private getDetails;
|
|
11
|
+
private requestWithRetry;
|
|
12
|
+
private parseWork;
|
|
13
|
+
private findPdfUrl;
|
|
14
|
+
private nameList;
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=CORESearcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CORESearcher.d.ts","sourceRoot":"","sources":["../../src/platforms/CORESearcher.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAgB,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,eAAe,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAyBrG,qBAAa,YAAa,SAAQ,WAAW;IAC3C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,MAAM,CAAC,EAAE,MAAM;IAe3B,eAAe,IAAI,oBAAoB;IAWjC,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAepE,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;IAS5E,SAAS,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;YAWlE,UAAU;YAUV,gBAAgB;IAgC9B,OAAO,CAAC,SAAS;IA4BjB,OAAO,CAAC,UAAU;IAKlB,OAAO,CAAC,QAAQ;CAMjB"}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import { PaperFactory } from '../models/Paper.js';
|
|
3
|
+
import { PaperSource } from './PaperSource.js';
|
|
4
|
+
import { TIMEOUTS, USER_AGENT } from '../config/constants.js';
|
|
5
|
+
import { downloadPdfFromUrl, safeFilename } from '../utils/PdfDownload.js';
|
|
6
|
+
import { PDFExtractor } from '../utils/PDFExtractor.js';
|
|
7
|
+
export class CORESearcher extends PaperSource {
|
|
8
|
+
client;
|
|
9
|
+
constructor(apiKey) {
|
|
10
|
+
const configuredKey = apiKey || process.env.PAPER_SEARCH_CORE_API_KEY || process.env.CORE_API_KEY || '';
|
|
11
|
+
super('core', 'https://api.core.ac.uk/v3', configuredKey);
|
|
12
|
+
this.client = axios.create({
|
|
13
|
+
baseURL: this.baseUrl,
|
|
14
|
+
timeout: TIMEOUTS.DEFAULT,
|
|
15
|
+
headers: {
|
|
16
|
+
Accept: 'application/json',
|
|
17
|
+
'User-Agent': USER_AGENT,
|
|
18
|
+
...(configuredKey ? { Authorization: `Bearer ${configuredKey}` } : {})
|
|
19
|
+
},
|
|
20
|
+
validateStatus: status => status < 500 || [500, 502, 503, 504].includes(status)
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
getCapabilities() {
|
|
24
|
+
return {
|
|
25
|
+
search: true,
|
|
26
|
+
download: true,
|
|
27
|
+
fullText: true,
|
|
28
|
+
citations: true,
|
|
29
|
+
requiresApiKey: false,
|
|
30
|
+
supportedOptions: ['maxResults', 'year']
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
async search(query, options = {}) {
|
|
34
|
+
try {
|
|
35
|
+
const response = await this.requestWithRetry('/search/works', {
|
|
36
|
+
q: query,
|
|
37
|
+
limit: Math.min(options.maxResults || 10, 100),
|
|
38
|
+
offset: 0,
|
|
39
|
+
...(options.year ? { year: options.year } : {})
|
|
40
|
+
});
|
|
41
|
+
const results = Array.isArray(response?.results) ? response.results : [];
|
|
42
|
+
return results.map((item) => this.parseWork(item)).filter(Boolean);
|
|
43
|
+
}
|
|
44
|
+
catch (error) {
|
|
45
|
+
this.handleHttpError(error, 'search');
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
async downloadPdf(paperId, options = {}) {
|
|
49
|
+
const details = await this.getDetails(paperId);
|
|
50
|
+
const pdfUrl = details ? this.findPdfUrl(details) : '';
|
|
51
|
+
if (!pdfUrl) {
|
|
52
|
+
throw new Error(`CORE paper ${paperId} does not expose an accessible PDF URL`);
|
|
53
|
+
}
|
|
54
|
+
return downloadPdfFromUrl(pdfUrl, options.savePath || './downloads', `core_${safeFilename(paperId)}`);
|
|
55
|
+
}
|
|
56
|
+
async readPaper(paperId, options = {}) {
|
|
57
|
+
const details = await this.getDetails(paperId);
|
|
58
|
+
if (details?.fullText && details.fullText.length > 500) {
|
|
59
|
+
return details.fullText;
|
|
60
|
+
}
|
|
61
|
+
const pdfPath = await this.downloadPdf(paperId, options);
|
|
62
|
+
const result = await new PDFExtractor().extractFromFile(pdfPath);
|
|
63
|
+
return result.text || `PDF downloaded to ${pdfPath}, but no text could be extracted.`;
|
|
64
|
+
}
|
|
65
|
+
async getDetails(paperId) {
|
|
66
|
+
try {
|
|
67
|
+
const response = await this.client.get(`/works/${encodeURIComponent(paperId)}`);
|
|
68
|
+
if (response.status >= 400)
|
|
69
|
+
return null;
|
|
70
|
+
return response.data;
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
async requestWithRetry(path, params) {
|
|
77
|
+
let lastError;
|
|
78
|
+
for (let attempt = 0; attempt < 3; attempt += 1) {
|
|
79
|
+
try {
|
|
80
|
+
const response = await this.client.get(path, { params });
|
|
81
|
+
if ([401, 403, 429].includes(response.status) && !this.apiKey) {
|
|
82
|
+
return { results: [] };
|
|
83
|
+
}
|
|
84
|
+
if ([429, 500, 502, 503, 504].includes(response.status)) {
|
|
85
|
+
await new Promise(resolve => setTimeout(resolve, Math.min(8000, 1000 * 2 ** attempt)));
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
if ([401, 403].includes(response.status) && this.apiKey) {
|
|
89
|
+
const fallback = await axios.get(`${this.baseUrl}${path}`, {
|
|
90
|
+
params,
|
|
91
|
+
timeout: TIMEOUTS.DEFAULT,
|
|
92
|
+
headers: { Accept: 'application/json', 'User-Agent': USER_AGENT }
|
|
93
|
+
});
|
|
94
|
+
return fallback.data;
|
|
95
|
+
}
|
|
96
|
+
if (response.status >= 400) {
|
|
97
|
+
throw new Error(`CORE request failed with HTTP ${response.status}`);
|
|
98
|
+
}
|
|
99
|
+
return response.data;
|
|
100
|
+
}
|
|
101
|
+
catch (error) {
|
|
102
|
+
lastError = error;
|
|
103
|
+
await new Promise(resolve => setTimeout(resolve, Math.min(8000, 1000 * 2 ** attempt)));
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
throw lastError instanceof Error ? lastError : new Error('CORE request failed');
|
|
107
|
+
}
|
|
108
|
+
parseWork(item) {
|
|
109
|
+
if (!item.id || !item.title)
|
|
110
|
+
return null;
|
|
111
|
+
const categories = this.nameList(item.subjects);
|
|
112
|
+
const keywords = this.nameList(item.tags);
|
|
113
|
+
const pdfUrl = this.findPdfUrl(item);
|
|
114
|
+
return PaperFactory.create({
|
|
115
|
+
paperId: String(item.id),
|
|
116
|
+
title: this.cleanText(item.title),
|
|
117
|
+
authors: this.nameList(item.authors),
|
|
118
|
+
abstract: item.abstract || '',
|
|
119
|
+
doi: item.doi || '',
|
|
120
|
+
publishedDate: item.publishedDate ? this.parseDate(item.publishedDate) : null,
|
|
121
|
+
pdfUrl,
|
|
122
|
+
url: item.url || (item.doi ? `https://doi.org/${item.doi}` : ''),
|
|
123
|
+
source: 'core',
|
|
124
|
+
categories: categories.slice(0, 10),
|
|
125
|
+
keywords: keywords.slice(0, 10),
|
|
126
|
+
citationCount: Number(item.citationCount || 0),
|
|
127
|
+
year: item.yearPublished,
|
|
128
|
+
extra: {
|
|
129
|
+
repository: item.repository?.name || '',
|
|
130
|
+
language: item.language || '',
|
|
131
|
+
downloadCount: item.downloadCount || 0
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
findPdfUrl(item) {
|
|
136
|
+
if (item.downloadUrl?.toLowerCase().includes('pdf'))
|
|
137
|
+
return item.downloadUrl;
|
|
138
|
+
return (item.fullTextUrls || []).find(url => url.toLowerCase().includes('pdf')) || '';
|
|
139
|
+
}
|
|
140
|
+
nameList(values) {
|
|
141
|
+
if (!Array.isArray(values))
|
|
142
|
+
return [];
|
|
143
|
+
return values
|
|
144
|
+
.map(value => (typeof value === 'string' ? value : value.name || ''))
|
|
145
|
+
.filter(Boolean);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
//# sourceMappingURL=CORESearcher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CORESearcher.js","sourceRoot":"","sources":["../../src/platforms/CORESearcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAwB,MAAM,OAAO,CAAC;AAC7C,OAAO,EAAS,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAwD,MAAM,kBAAkB,CAAC;AACrG,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAC9D,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAC3E,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAsBxD,MAAM,OAAO,YAAa,SAAQ,WAAW;IAC1B,MAAM,CAAgB;IAEvC,YAAY,MAAe;QACzB,MAAM,aAAa,GAAG,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,yBAAyB,IAAI,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,EAAE,CAAC;QACxG,KAAK,CAAC,MAAM,EAAE,2BAA2B,EAAE,aAAa,CAAC,CAAC;QAC1D,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,OAAO,EAAE;gBACP,MAAM,EAAE,kBAAkB;gBAC1B,YAAY,EAAE,UAAU;gBACxB,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,UAAU,aAAa,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aACvE;YACD,cAAc,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC;SAChF,CAAC,CAAC;IACL,CAAC;IAED,eAAe;QACb,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,IAAI;YACd,QAAQ,EAAE,IAAI;YACd,SAAS,EAAE,IAAI;YACf,cAAc,EAAE,KAAK;YACrB,gBAAgB,EAAE,CAAC,YAAY,EAAE,MAAM,CAAC;SACzC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAAyB,EAAE;QACrD,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,eAAe,EAAE;gBAC5D,CAAC,EAAE,KAAK;gBACR,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,IAAI,EAAE,EAAE,GAAG,CAAC;gBAC9C,MAAM,EAAE,CAAC;gBACT,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aAChD,CAAC,CAAC;YACH,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;YACzE,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,IAAc,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAY,CAAC;QAC1F,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,OAAe,EAAE,UAA2B,EAAE;QAC9D,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACvD,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,cAAc,OAAO,wCAAwC,CAAC,CAAC;QACjF,CAAC;QACD,OAAO,kBAAkB,CAAC,MAAM,EAAE,OAAO,CAAC,QAAQ,IAAI,aAAa,EAAE,QAAQ,YAAY,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACxG,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,OAAe,EAAE,UAA2B,EAAE;QAC5D,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QAC/C,IAAI,OAAO,EAAE,QAAQ,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACvD,OAAO,OAAO,CAAC,QAAQ,CAAC;QAC1B,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACzD,MAAM,MAAM,GAAG,MAAM,IAAI,YAAY,EAAE,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;QACjE,OAAO,MAAM,CAAC,IAAI,IAAI,qBAAqB,OAAO,mCAAmC,CAAC;IACxF,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,OAAe;QACtC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,UAAU,kBAAkB,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YAChF,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG;gBAAE,OAAO,IAAI,CAAC;YACxC,OAAO,QAAQ,CAAC,IAAgB,CAAC;QACnC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,gBAAgB,CAAC,IAAY,EAAE,MAA+B;QAC1E,IAAI,SAAkB,CAAC;QACvB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;YAChD,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;gBACzD,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;oBAC9D,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;gBACzB,CAAC;gBACD,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;oBACxD,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,GAAG,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC;oBACvF,SAAS;gBACX,CAAC;gBACD,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;oBACxD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,OAAO,GAAG,IAAI,EAAE,EAAE;wBACzD,MAAM;wBACN,OAAO,EAAE,QAAQ,CAAC,OAAO;wBACzB,OAAO,EAAE,EAAE,MAAM,EAAE,kBAAkB,EAAE,YAAY,EAAE,UAAU,EAAE;qBAClE,CAAC,CAAC;oBACH,OAAO,QAAQ,CAAC,IAAI,CAAC;gBACvB,CAAC;gBACD,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;oBAC3B,MAAM,IAAI,KAAK,CAAC,iCAAiC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;gBACtE,CAAC;gBACD,OAAO,QAAQ,CAAC,IAAI,CAAC;YACvB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,SAAS,GAAG,KAAK,CAAC;gBAClB,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,GAAG,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC;YACzF,CAAC;QACH,CAAC;QACD,MAAM,SAAS,YAAY,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IAClF,CAAC;IAEO,SAAS,CAAC,IAAc;QAC9B,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QACzC,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAErC,OAAO,YAAY,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;YACxB,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC;YACjC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC;YACpC,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,EAAE;YAC7B,GAAG,EAAE,IAAI,CAAC,GAAG,IAAI,EAAE;YACnB,aAAa,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,IAAI;YAC7E,MAAM;YACN,GAAG,EAAE,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,mBAAmB,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAChE,MAAM,EAAE,MAAM;YACd,UAAU,EAAE,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;YACnC,QAAQ,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;YAC/B,aAAa,EAAE,MAAM,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,CAAC;YAC9C,IAAI,EAAE,IAAI,CAAC,aAAa;YACxB,KAAK,EAAE;gBACL,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,IAAI,IAAI,EAAE;gBACvC,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,EAAE;gBAC7B,aAAa,EAAE,IAAI,CAAC,aAAa,IAAI,CAAC;aACvC;SACF,CAAC,CAAC;IACL,CAAC;IAEO,UAAU,CAAC,IAAc;QAC/B,IAAI,IAAI,CAAC,WAAW,EAAE,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,OAAO,IAAI,CAAC,WAAW,CAAC;QAC7E,OAAO,CAAC,IAAI,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;IACxF,CAAC;IAEO,QAAQ,CAAC,MAA4C;QAC3D,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;YAAE,OAAO,EAAE,CAAC;QACtC,OAAO,MAAM;aACV,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;aACpE,MAAM,CAAC,OAAO,CAAC,CAAC;IACrB,CAAC;CACF"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Crossref API Integration
|
|
3
|
+
*
|
|
4
|
+
* Crossref is a DOI registration agency providing free access to scholarly metadata.
|
|
5
|
+
* No API key required, but providing email (mailto parameter) is recommended for polite pool access.
|
|
6
|
+
*
|
|
7
|
+
* Documentation: https://api.crossref.org/
|
|
8
|
+
*/
|
|
9
|
+
import { Paper } from '../models/Paper.js';
|
|
10
|
+
import { PaperSource, SearchOptions, DownloadOptions, PlatformCapabilities } from './PaperSource.js';
|
|
11
|
+
export declare class CrossrefSearcher extends PaperSource {
|
|
12
|
+
private client;
|
|
13
|
+
private mailto;
|
|
14
|
+
private readonly rateLimiter;
|
|
15
|
+
private readonly cache;
|
|
16
|
+
constructor(mailto?: string);
|
|
17
|
+
getCapabilities(): PlatformCapabilities;
|
|
18
|
+
/**
|
|
19
|
+
* Clean and validate DOI format
|
|
20
|
+
* @param doi Raw DOI string (may include URL prefixes)
|
|
21
|
+
* @returns Cleaned DOI or null if invalid
|
|
22
|
+
*/
|
|
23
|
+
private cleanAndValidateDoi;
|
|
24
|
+
search(query: string, options?: SearchOptions): Promise<Paper[]>;
|
|
25
|
+
getPaperByDoi(doi: string): Promise<Paper | null>;
|
|
26
|
+
getCitations(doi: string): Promise<Paper[]>;
|
|
27
|
+
getReferences(doi: string): Promise<Paper[]>;
|
|
28
|
+
downloadPdf(paperId: string, options?: DownloadOptions): Promise<string>;
|
|
29
|
+
readPaper(paperId: string, options?: DownloadOptions): Promise<string>;
|
|
30
|
+
private parseSearchResponse;
|
|
31
|
+
private parsePaper;
|
|
32
|
+
private extractReferenceDois;
|
|
33
|
+
}
|
|
34
|
+
//# sourceMappingURL=CrossrefSearcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CrossrefSearcher.d.ts","sourceRoot":"","sources":["../../src/platforms/CrossrefSearcher.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,EAAE,KAAK,EAAgB,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,eAAe,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAQrG,qBAAa,gBAAiB,SAAQ,WAAW;IAC/C,OAAO,CAAC,MAAM,CAAgB;IAC9B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAwB;gBAElC,MAAM,CAAC,EAAE,MAAM;IAyB3B,eAAe,IAAI,oBAAoB;IAWvC;;;;OAIG;IACH,OAAO,CAAC,mBAAmB;IAKrB,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IA6EpE,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC;IAuCjD,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAyD3C,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IA0B5C,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC;IAIxE,SAAS,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC;IAI5E,OAAO,CAAC,mBAAmB;IAc3B,OAAO,CAAC,UAAU;IAoGlB,OAAO,CAAC,oBAAoB;CAa7B"}
|