paper-search-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +165 -0
- package/LICENSE +21 -0
- package/README-sc.md +642 -0
- package/README.md +642 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +637 -0
- package/dist/cli.js.map +1 -0
- package/dist/config/ConfigService.d.ts +26 -0
- package/dist/config/ConfigService.d.ts.map +1 -0
- package/dist/config/ConfigService.js +145 -0
- package/dist/config/ConfigService.js.map +1 -0
- package/dist/config/constants.d.ts +140 -0
- package/dist/config/constants.d.ts.map +1 -0
- package/dist/config/constants.js +93 -0
- package/dist/config/constants.js.map +1 -0
- package/dist/core/diagnostics.d.ts +43 -0
- package/dist/core/diagnostics.d.ts.map +1 -0
- package/dist/core/diagnostics.js +544 -0
- package/dist/core/diagnostics.js.map +1 -0
- package/dist/core/handleToolCall.d.ts +8 -0
- package/dist/core/handleToolCall.d.ts.map +1 -0
- package/dist/core/handleToolCall.js +440 -0
- package/dist/core/handleToolCall.js.map +1 -0
- package/dist/core/schemas.d.ts +454 -0
- package/dist/core/schemas.d.ts.map +1 -0
- package/dist/core/schemas.js +322 -0
- package/dist/core/schemas.js.map +1 -0
- package/dist/core/searchers.d.ts +45 -0
- package/dist/core/searchers.d.ts.map +1 -0
- package/dist/core/searchers.js +73 -0
- package/dist/core/searchers.js.map +1 -0
- package/dist/core/tools.d.ts +7 -0
- package/dist/core/tools.d.ts.map +1 -0
- package/dist/core/tools.js +640 -0
- package/dist/core/tools.js.map +1 -0
- package/dist/models/Paper.d.ts +64 -0
- package/dist/models/Paper.d.ts.map +1 -0
- package/dist/models/Paper.js +70 -0
- package/dist/models/Paper.js.map +1 -0
- package/dist/platforms/ArxivSearcher.d.ts +64 -0
- package/dist/platforms/ArxivSearcher.d.ts.map +1 -0
- package/dist/platforms/ArxivSearcher.js +531 -0
- package/dist/platforms/ArxivSearcher.js.map +1 -0
- package/dist/platforms/BioRxivSearcher.d.ts +47 -0
- package/dist/platforms/BioRxivSearcher.d.ts.map +1 -0
- package/dist/platforms/BioRxivSearcher.js +196 -0
- package/dist/platforms/BioRxivSearcher.js.map +1 -0
- package/dist/platforms/CORESearcher.d.ts +16 -0
- package/dist/platforms/CORESearcher.d.ts.map +1 -0
- package/dist/platforms/CORESearcher.js +148 -0
- package/dist/platforms/CORESearcher.js.map +1 -0
- package/dist/platforms/CrossrefSearcher.d.ts +34 -0
- package/dist/platforms/CrossrefSearcher.d.ts.map +1 -0
- package/dist/platforms/CrossrefSearcher.js +339 -0
- package/dist/platforms/CrossrefSearcher.js.map +1 -0
- package/dist/platforms/EuropePMCSearcher.d.ts +20 -0
- package/dist/platforms/EuropePMCSearcher.d.ts.map +1 -0
- package/dist/platforms/EuropePMCSearcher.js +173 -0
- package/dist/platforms/EuropePMCSearcher.js.map +1 -0
- package/dist/platforms/GoogleScholarSearcher.d.ts +77 -0
- package/dist/platforms/GoogleScholarSearcher.d.ts.map +1 -0
- package/dist/platforms/GoogleScholarSearcher.js +262 -0
- package/dist/platforms/GoogleScholarSearcher.js.map +1 -0
- package/dist/platforms/IACRSearcher.d.ts +51 -0
- package/dist/platforms/IACRSearcher.d.ts.map +1 -0
- package/dist/platforms/IACRSearcher.js +339 -0
- package/dist/platforms/IACRSearcher.js.map +1 -0
- package/dist/platforms/OpenAIRESearcher.d.ts +22 -0
- package/dist/platforms/OpenAIRESearcher.d.ts.map +1 -0
- package/dist/platforms/OpenAIRESearcher.js +223 -0
- package/dist/platforms/OpenAIRESearcher.js.map +1 -0
- package/dist/platforms/OpenAlexSearcher.d.ts +14 -0
- package/dist/platforms/OpenAlexSearcher.d.ts.map +1 -0
- package/dist/platforms/OpenAlexSearcher.js +114 -0
- package/dist/platforms/OpenAlexSearcher.js.map +1 -0
- package/dist/platforms/PMCSearcher.d.ts +20 -0
- package/dist/platforms/PMCSearcher.d.ts.map +1 -0
- package/dist/platforms/PMCSearcher.js +177 -0
- package/dist/platforms/PMCSearcher.js.map +1 -0
- package/dist/platforms/PaperSource.d.ts +143 -0
- package/dist/platforms/PaperSource.d.ts.map +1 -0
- package/dist/platforms/PaperSource.js +125 -0
- package/dist/platforms/PaperSource.js.map +1 -0
- package/dist/platforms/PubMedSearcher.d.ts +104 -0
- package/dist/platforms/PubMedSearcher.d.ts.map +1 -0
- package/dist/platforms/PubMedSearcher.js +422 -0
- package/dist/platforms/PubMedSearcher.js.map +1 -0
- package/dist/platforms/SciHubSearcher.d.ts +66 -0
- package/dist/platforms/SciHubSearcher.d.ts.map +1 -0
- package/dist/platforms/SciHubSearcher.js +398 -0
- package/dist/platforms/SciHubSearcher.js.map +1 -0
- package/dist/platforms/ScienceDirectSearcher.d.ts +42 -0
- package/dist/platforms/ScienceDirectSearcher.d.ts.map +1 -0
- package/dist/platforms/ScienceDirectSearcher.js +326 -0
- package/dist/platforms/ScienceDirectSearcher.js.map +1 -0
- package/dist/platforms/ScopusSearcher.d.ts +43 -0
- package/dist/platforms/ScopusSearcher.d.ts.map +1 -0
- package/dist/platforms/ScopusSearcher.js +364 -0
- package/dist/platforms/ScopusSearcher.js.map +1 -0
- package/dist/platforms/SemanticScholarSearcher.d.ts +96 -0
- package/dist/platforms/SemanticScholarSearcher.d.ts.map +1 -0
- package/dist/platforms/SemanticScholarSearcher.js +419 -0
- package/dist/platforms/SemanticScholarSearcher.js.map +1 -0
- package/dist/platforms/SpringerSearcher.d.ts +54 -0
- package/dist/platforms/SpringerSearcher.d.ts.map +1 -0
- package/dist/platforms/SpringerSearcher.js +407 -0
- package/dist/platforms/SpringerSearcher.js.map +1 -0
- package/dist/platforms/UnpaywallSearcher.d.ts +18 -0
- package/dist/platforms/UnpaywallSearcher.d.ts.map +1 -0
- package/dist/platforms/UnpaywallSearcher.js +115 -0
- package/dist/platforms/UnpaywallSearcher.js.map +1 -0
- package/dist/platforms/WebOfScienceSearcher.d.ts +111 -0
- package/dist/platforms/WebOfScienceSearcher.d.ts.map +1 -0
- package/dist/platforms/WebOfScienceSearcher.js +500 -0
- package/dist/platforms/WebOfScienceSearcher.js.map +1 -0
- package/dist/platforms/WileySearcher.d.ts +44 -0
- package/dist/platforms/WileySearcher.d.ts.map +1 -0
- package/dist/platforms/WileySearcher.js +148 -0
- package/dist/platforms/WileySearcher.js.map +1 -0
- package/dist/services/CitationService.d.ts +66 -0
- package/dist/services/CitationService.d.ts.map +1 -0
- package/dist/services/CitationService.js +237 -0
- package/dist/services/CitationService.js.map +1 -0
- package/dist/services/MultiSourceSearchService.d.ts +19 -0
- package/dist/services/MultiSourceSearchService.d.ts.map +1 -0
- package/dist/services/MultiSourceSearchService.js +96 -0
- package/dist/services/MultiSourceSearchService.js.map +1 -0
- package/dist/services/OpenAccessFallbackService.d.ts +20 -0
- package/dist/services/OpenAccessFallbackService.d.ts.map +1 -0
- package/dist/services/OpenAccessFallbackService.js +124 -0
- package/dist/services/OpenAccessFallbackService.js.map +1 -0
- package/dist/utils/ErrorHandler.d.ts +99 -0
- package/dist/utils/ErrorHandler.d.ts.map +1 -0
- package/dist/utils/ErrorHandler.js +266 -0
- package/dist/utils/ErrorHandler.js.map +1 -0
- package/dist/utils/Logger.d.ts +6 -0
- package/dist/utils/Logger.d.ts.map +1 -0
- package/dist/utils/Logger.js +26 -0
- package/dist/utils/Logger.js.map +1 -0
- package/dist/utils/PDFExtractor.d.ts +34 -0
- package/dist/utils/PDFExtractor.d.ts.map +1 -0
- package/dist/utils/PDFExtractor.js +130 -0
- package/dist/utils/PDFExtractor.js.map +1 -0
- package/dist/utils/PdfDownload.d.ts +7 -0
- package/dist/utils/PdfDownload.d.ts.map +1 -0
- package/dist/utils/PdfDownload.js +52 -0
- package/dist/utils/PdfDownload.js.map +1 -0
- package/dist/utils/QuotaManager.d.ts +32 -0
- package/dist/utils/QuotaManager.d.ts.map +1 -0
- package/dist/utils/QuotaManager.js +95 -0
- package/dist/utils/QuotaManager.js.map +1 -0
- package/dist/utils/RateLimiter.d.ts +50 -0
- package/dist/utils/RateLimiter.d.ts.map +1 -0
- package/dist/utils/RateLimiter.js +121 -0
- package/dist/utils/RateLimiter.js.map +1 -0
- package/dist/utils/RequestCache.d.ts +26 -0
- package/dist/utils/RequestCache.d.ts.map +1 -0
- package/dist/utils/RequestCache.js +66 -0
- package/dist/utils/RequestCache.js.map +1 -0
- package/dist/utils/SecurityUtils.d.ts +80 -0
- package/dist/utils/SecurityUtils.d.ts.map +1 -0
- package/dist/utils/SecurityUtils.js +357 -0
- package/dist/utils/SecurityUtils.js.map +1 -0
- package/package.json +111 -0
- package/skills/paper-search/SKILL.md +192 -0
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Crossref API Integration
|
|
3
|
+
*
|
|
4
|
+
* Crossref is a DOI registration agency providing free access to scholarly metadata.
|
|
5
|
+
* No API key required, but providing email (mailto parameter) is recommended for polite pool access.
|
|
6
|
+
*
|
|
7
|
+
* Documentation: https://api.crossref.org/
|
|
8
|
+
*/
|
|
9
|
+
import axios from 'axios';
|
|
10
|
+
import { PaperFactory } from '../models/Paper.js';
|
|
11
|
+
import { PaperSource } from './PaperSource.js';
|
|
12
|
+
import { sanitizeDoi, withTimeout } from '../utils/SecurityUtils.js';
|
|
13
|
+
import { API_ENDPOINTS, DEFAULT_MAILTO, TIMEOUTS, USER_AGENT } from '../config/constants.js';
|
|
14
|
+
import { logDebug } from '../utils/Logger.js';
|
|
15
|
+
import { RateLimiter } from '../utils/RateLimiter.js';
|
|
16
|
+
import { ErrorHandler } from '../utils/ErrorHandler.js';
|
|
17
|
+
import { RequestCache } from '../utils/RequestCache.js';
|
|
18
|
+
export class CrossrefSearcher extends PaperSource {
|
|
19
|
+
client;
|
|
20
|
+
mailto;
|
|
21
|
+
rateLimiter;
|
|
22
|
+
cache;
|
|
23
|
+
constructor(mailto) {
|
|
24
|
+
super('crossref', 'https://api.crossref.org/works', undefined);
|
|
25
|
+
this.mailto = mailto || process.env.CROSSREF_MAILTO || DEFAULT_MAILTO;
|
|
26
|
+
this.client = axios.create({
|
|
27
|
+
baseURL: this.baseUrl,
|
|
28
|
+
timeout: TIMEOUTS.DEFAULT,
|
|
29
|
+
headers: {
|
|
30
|
+
'Accept': 'application/json',
|
|
31
|
+
'User-Agent': `${USER_AGENT} paper-search-cli/0.1.0 (mailto:${this.mailto})`
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
// Crossref polite pool: 50 req/s with mailto, conservative 3 req/s, burst=5
|
|
35
|
+
this.rateLimiter = new RateLimiter({
|
|
36
|
+
requestsPerSecond: 3,
|
|
37
|
+
burstCapacity: 5
|
|
38
|
+
});
|
|
39
|
+
this.cache = new RequestCache({
|
|
40
|
+
maxSize: 100,
|
|
41
|
+
ttlMs: 3600000 // 1 hour
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
getCapabilities() {
|
|
45
|
+
return {
|
|
46
|
+
search: true,
|
|
47
|
+
download: false,
|
|
48
|
+
fullText: false,
|
|
49
|
+
citations: true,
|
|
50
|
+
requiresApiKey: false,
|
|
51
|
+
supportedOptions: ['maxResults', 'year', 'author', 'sortBy', 'sortOrder']
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Clean and validate DOI format
|
|
56
|
+
* @param doi Raw DOI string (may include URL prefixes)
|
|
57
|
+
* @returns Cleaned DOI or null if invalid
|
|
58
|
+
*/
|
|
59
|
+
cleanAndValidateDoi(doi) {
|
|
60
|
+
const result = sanitizeDoi(doi);
|
|
61
|
+
return result.valid ? result.sanitized : null;
|
|
62
|
+
}
|
|
63
|
+
async search(query, options = {}) {
|
|
64
|
+
const customOptions = options;
|
|
65
|
+
const forceRefresh = customOptions.forceRefresh === true;
|
|
66
|
+
// Check cache first
|
|
67
|
+
if (!forceRefresh) {
|
|
68
|
+
const cacheKey = this.cache.generateKey('crossref', query, options);
|
|
69
|
+
const cached = this.cache.get(cacheKey);
|
|
70
|
+
if (cached) {
|
|
71
|
+
return cached;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
const maxResults = Math.min(options.maxResults || 10, 1000);
|
|
75
|
+
const params = {
|
|
76
|
+
query: query,
|
|
77
|
+
rows: maxResults,
|
|
78
|
+
mailto: this.mailto
|
|
79
|
+
};
|
|
80
|
+
// Build filters
|
|
81
|
+
const filters = [];
|
|
82
|
+
// Year filter
|
|
83
|
+
if (options.year) {
|
|
84
|
+
const yearMatch = options.year.match(/^(\d{4})(?:-(\d{4})?)?$/);
|
|
85
|
+
if (yearMatch) {
|
|
86
|
+
const startYear = yearMatch[1];
|
|
87
|
+
const endYear = yearMatch[2] || startYear;
|
|
88
|
+
if (startYear) {
|
|
89
|
+
filters.push(`from-pub-date:${startYear}`);
|
|
90
|
+
}
|
|
91
|
+
if (endYear && endYear !== startYear) {
|
|
92
|
+
filters.push(`until-pub-date:${endYear}`);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
// Add filters
|
|
97
|
+
if (filters.length > 0) {
|
|
98
|
+
params.filter = filters.join(',');
|
|
99
|
+
}
|
|
100
|
+
// Sorting
|
|
101
|
+
const sortMapping = {
|
|
102
|
+
'relevance': 'relevance',
|
|
103
|
+
'date': 'published',
|
|
104
|
+
'citations': 'is-referenced-by-count'
|
|
105
|
+
};
|
|
106
|
+
params.sort = sortMapping[options.sortBy || 'relevance'] || 'relevance';
|
|
107
|
+
params.order = options.sortOrder === 'asc' ? 'asc' : 'desc';
|
|
108
|
+
try {
|
|
109
|
+
await this.rateLimiter.waitForPermission();
|
|
110
|
+
const response = await ErrorHandler.retryWithBackoff(() => this.client.get('', { params }), { context: 'Crossref search' });
|
|
111
|
+
if (response.status === 200 && response.data?.message?.items) {
|
|
112
|
+
const papers = this.parseSearchResponse(response.data);
|
|
113
|
+
// Cache results
|
|
114
|
+
const cacheKey = this.cache.generateKey('crossref', query, options);
|
|
115
|
+
this.cache.set(cacheKey, papers);
|
|
116
|
+
return papers;
|
|
117
|
+
}
|
|
118
|
+
return [];
|
|
119
|
+
}
|
|
120
|
+
catch (error) {
|
|
121
|
+
this.handleHttpError(error, 'search');
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
async getPaperByDoi(doi) {
|
|
125
|
+
const cleanDoi = this.cleanAndValidateDoi(doi);
|
|
126
|
+
if (!cleanDoi) {
|
|
127
|
+
return null;
|
|
128
|
+
}
|
|
129
|
+
try {
|
|
130
|
+
// Encode DOI for URL path (DOIs can contain special characters like /)
|
|
131
|
+
const encodedDoi = encodeURIComponent(cleanDoi);
|
|
132
|
+
await this.rateLimiter.waitForPermission();
|
|
133
|
+
const response = await ErrorHandler.retryWithBackoff(() => this.client.get(`/${encodedDoi}`, { params: { mailto: this.mailto } }), { context: 'Crossref getPaperByDoi' });
|
|
134
|
+
if (response.status === 200 && response.data?.message) {
|
|
135
|
+
const paper = this.parsePaper(response.data.message);
|
|
136
|
+
// Extract references
|
|
137
|
+
if (paper) {
|
|
138
|
+
const references = this.extractReferenceDois(response.data.message);
|
|
139
|
+
paper.references = references;
|
|
140
|
+
}
|
|
141
|
+
return paper;
|
|
142
|
+
}
|
|
143
|
+
return null;
|
|
144
|
+
}
|
|
145
|
+
catch (error) {
|
|
146
|
+
// 404 means not found
|
|
147
|
+
if (error?.response?.status === 404) {
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
this.handleHttpError(error, 'getPaperByDoi');
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
async getCitations(doi) {
|
|
155
|
+
// Crossref API doesn't directly provide citations
|
|
156
|
+
// Use OpenCitations COCI API as supplement
|
|
157
|
+
const cleanDoi = this.cleanAndValidateDoi(doi);
|
|
158
|
+
if (!cleanDoi) {
|
|
159
|
+
return [];
|
|
160
|
+
}
|
|
161
|
+
try {
|
|
162
|
+
// Encode DOI for URL path
|
|
163
|
+
const encodedDoi = encodeURIComponent(cleanDoi);
|
|
164
|
+
// Wrap with timeout for additional protection
|
|
165
|
+
const response = await withTimeout(axios.get(`${API_ENDPOINTS.OPENCITATIONS}/citations/${encodedDoi}`, { timeout: TIMEOUTS.DEFAULT }), TIMEOUTS.DEFAULT + TIMEOUTS.BUFFER, 'OpenCitations API request timed out');
|
|
166
|
+
if (response.status !== 200) {
|
|
167
|
+
return [];
|
|
168
|
+
}
|
|
169
|
+
const citingDois = [];
|
|
170
|
+
for (const item of response.data || []) {
|
|
171
|
+
if (item.citing) {
|
|
172
|
+
citingDois.push(item.citing);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
if (citingDois.length === 0) {
|
|
176
|
+
return [];
|
|
177
|
+
}
|
|
178
|
+
// Fetch citing papers (limit to 50)
|
|
179
|
+
const papers = [];
|
|
180
|
+
for (const citingDoi of citingDois.slice(0, 50)) {
|
|
181
|
+
try {
|
|
182
|
+
const paper = await this.getPaperByDoi(citingDoi);
|
|
183
|
+
if (paper) {
|
|
184
|
+
papers.push(paper);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
catch (error) {
|
|
188
|
+
// Skip failed DOIs
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return papers;
|
|
192
|
+
}
|
|
193
|
+
catch (error) {
|
|
194
|
+
this.handleHttpError(error, 'getCitations');
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
async getReferences(doi) {
|
|
198
|
+
try {
|
|
199
|
+
const paper = await this.getPaperByDoi(doi);
|
|
200
|
+
if (!paper || !paper.references || paper.references.length === 0) {
|
|
201
|
+
return [];
|
|
202
|
+
}
|
|
203
|
+
// Fetch reference papers (limit to 50)
|
|
204
|
+
const papers = [];
|
|
205
|
+
for (const refDoi of paper.references.slice(0, 50)) {
|
|
206
|
+
try {
|
|
207
|
+
const refPaper = await this.getPaperByDoi(refDoi);
|
|
208
|
+
if (refPaper) {
|
|
209
|
+
papers.push(refPaper);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
catch (error) {
|
|
213
|
+
// Skip failed DOIs
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
return papers;
|
|
217
|
+
}
|
|
218
|
+
catch (error) {
|
|
219
|
+
this.handleHttpError(error, 'getReferences');
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
async downloadPdf(paperId, options) {
|
|
223
|
+
throw new Error('Crossref does not support direct PDF download');
|
|
224
|
+
}
|
|
225
|
+
async readPaper(paperId, options) {
|
|
226
|
+
throw new Error('Crossref does not support full text extraction');
|
|
227
|
+
}
|
|
228
|
+
parseSearchResponse(data) {
|
|
229
|
+
const papers = [];
|
|
230
|
+
const items = data.message?.items || [];
|
|
231
|
+
for (const item of items) {
|
|
232
|
+
const paper = this.parsePaper(item);
|
|
233
|
+
if (paper) {
|
|
234
|
+
papers.push(paper);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
return papers;
|
|
238
|
+
}
|
|
239
|
+
parsePaper(data) {
|
|
240
|
+
try {
|
|
241
|
+
const doi = data.DOI || '';
|
|
242
|
+
// Extract title
|
|
243
|
+
const titleList = data.title || [];
|
|
244
|
+
const title = titleList[0] || 'No title';
|
|
245
|
+
// Extract authors
|
|
246
|
+
const authors = [];
|
|
247
|
+
for (const author of data.author || []) {
|
|
248
|
+
const given = author.given || '';
|
|
249
|
+
const family = author.family || '';
|
|
250
|
+
const fullName = `${given} ${family}`.trim();
|
|
251
|
+
if (fullName) {
|
|
252
|
+
authors.push(fullName);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
// Extract abstract - may contain HTML tags
|
|
256
|
+
let abstract = data.abstract || '';
|
|
257
|
+
if (abstract) {
|
|
258
|
+
// Remove HTML tags
|
|
259
|
+
abstract = abstract.replace(/<[^>]+>/g, '');
|
|
260
|
+
}
|
|
261
|
+
// Extract publication date
|
|
262
|
+
let publishedDate = null;
|
|
263
|
+
let year;
|
|
264
|
+
const dateData = data['published-print'] ||
|
|
265
|
+
data['published-online'] ||
|
|
266
|
+
data['published'] ||
|
|
267
|
+
data['created'];
|
|
268
|
+
if (dateData && dateData['date-parts']?.[0]) {
|
|
269
|
+
const dateParts = dateData['date-parts'][0];
|
|
270
|
+
if (dateParts.length > 0 && typeof dateParts[0] === 'number') {
|
|
271
|
+
year = dateParts[0];
|
|
272
|
+
const month = dateParts[1] || 1;
|
|
273
|
+
const day = dateParts[2] || 1;
|
|
274
|
+
try {
|
|
275
|
+
publishedDate = new Date(year, month - 1, day);
|
|
276
|
+
}
|
|
277
|
+
catch {
|
|
278
|
+
// Ignore date parsing errors
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
// Extract journal name
|
|
283
|
+
const containerTitleList = data['container-title'] || [];
|
|
284
|
+
const journal = containerTitleList[0] || undefined;
|
|
285
|
+
// Extract publisher
|
|
286
|
+
const publisher = data.publisher || '';
|
|
287
|
+
// Extract citation count
|
|
288
|
+
const citationCount = data['is-referenced-by-count'] || 0;
|
|
289
|
+
// Extract URL
|
|
290
|
+
const url = data.URL || (doi ? `https://doi.org/${doi}` : '');
|
|
291
|
+
// Extract pages, volume, issue
|
|
292
|
+
const pages = data.page || undefined;
|
|
293
|
+
const volume = data.volume || undefined;
|
|
294
|
+
const issue = data.issue || undefined;
|
|
295
|
+
// Document type
|
|
296
|
+
const docType = data.type || '';
|
|
297
|
+
return PaperFactory.create({
|
|
298
|
+
paperId: doi,
|
|
299
|
+
title: title,
|
|
300
|
+
authors: authors,
|
|
301
|
+
abstract: abstract,
|
|
302
|
+
source: 'crossref',
|
|
303
|
+
publishedDate: publishedDate,
|
|
304
|
+
year: year,
|
|
305
|
+
journal: journal,
|
|
306
|
+
doi: doi,
|
|
307
|
+
url: url,
|
|
308
|
+
pdfUrl: '',
|
|
309
|
+
volume: volume,
|
|
310
|
+
issue: issue,
|
|
311
|
+
pages: pages,
|
|
312
|
+
citationCount: citationCount,
|
|
313
|
+
extra: {
|
|
314
|
+
publisher: publisher,
|
|
315
|
+
type: docType,
|
|
316
|
+
issn: data.ISSN || [],
|
|
317
|
+
isbn: data.ISBN || [],
|
|
318
|
+
subjects: data.subject || []
|
|
319
|
+
}
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
catch (error) {
|
|
323
|
+
logDebug('Error parsing Crossref paper:', error.message);
|
|
324
|
+
return null;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
extractReferenceDois(data) {
|
|
328
|
+
const references = [];
|
|
329
|
+
const referenceData = data.reference || [];
|
|
330
|
+
for (const ref of referenceData) {
|
|
331
|
+
const doi = ref.DOI;
|
|
332
|
+
if (doi) {
|
|
333
|
+
references.push(doi);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
return references;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
//# sourceMappingURL=CrossrefSearcher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CrossrefSearcher.js","sourceRoot":"","sources":["../../src/platforms/CrossrefSearcher.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAwB,MAAM,OAAO,CAAC;AAC7C,OAAO,EAAS,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAwD,MAAM,kBAAkB,CAAC;AACrG,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AACrE,OAAO,EAAE,aAAa,EAAE,cAAc,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAC7F,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,MAAM,OAAO,gBAAiB,SAAQ,WAAW;IACvC,MAAM,CAAgB;IACtB,MAAM,CAAS;IACN,WAAW,CAAc;IACzB,KAAK,CAAwB;IAE9C,YAAY,MAAe;QACzB,KAAK,CAAC,UAAU,EAAE,gCAAgC,EAAE,SAAS,CAAC,CAAC;QAC/D,IAAI,CAAC,MAAM,GAAG,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,cAAc,CAAC;QAEtE,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE,kBAAkB;gBAC5B,YAAY,EAAE,GAAG,UAAU,mCAAmC,IAAI,CAAC,MAAM,GAAG;aAC7E;SACF,CAAC,CAAC;QAEH,4EAA4E;QAC5E,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC;YACjC,iBAAiB,EAAE,CAAC;YACpB,aAAa,EAAE,CAAC;SACjB,CAAC,CAAC;QAEH,IAAI,CAAC,KAAK,GAAG,IAAI,YAAY,CAAU;YACrC,OAAO,EAAE,GAAG;YACZ,KAAK,EAAE,OAAO,CAAC,SAAS;SACzB,CAAC,CAAC;IACL,CAAC;IAED,eAAe;QACb,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,KAAK;YACf,QAAQ,EAAE,KAAK;YACf,SAAS,EAAE,IAAI;YACf,cAAc,EAAE,KAAK;YACrB,gBAAgB,EAAE,CAAC,YAAY,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,WAAW,CAAC;SAC1E,CAAC;IACJ,CAAC;IAED;;;;OAIG;IACK,mBAAmB,CAAC,GAAW;QACrC,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;QAChC,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC;IAChD,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAAyB,EAAE;QACrD,MAAM,aAAa,GAAG,OAAc,CAAC;QACrC,MAAM,YAAY,GAAG,aAAa,CAAC,YAAY,KAAK,IAAI,CAAC;QAEzD,oBAAoB;QACpB,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,UAAU,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;YACpE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACxC,IAAI,MAAM,EAAE,CAAC;gBACX,OAAO,MAAM,CAAC;YAChB,CAAC;QACH,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,IAAI,EAAE,EAAE,IAAI,CAAC,CAAC;QAE5D,MAAM,MAAM,GAAwB;YAClC,KAAK,EAAE,KAAK;YACZ,IAAI,EAAE,UAAU;YAChB,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC;QAEF,gBAAgB;QAChB,MAAM,OAAO,GAAa,EAAE,CAAC;QAE7B,cAAc;QACd,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;YACjB,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;YAChE,IAAI,SAAS,EAAE,CAAC;gBACd,MAAM,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;gBAC/B,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC;gBAC1C,IAAI,SAAS,EAAE,CAAC;oBACd,OAAO,CAAC,IAAI,CAAC,iBAAiB,SAAS,EAAE,CAAC,CAAC;gBAC7C,CAAC;gBACD,IAAI,OAAO,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;oBACrC,OAAO,CAAC,IAAI,CAAC,kBAAkB,OAAO,EAAE,CAAC,CAAC;gBAC5C,CAAC;YACH,CAAC;QACH,CAAC;QAED,cAAc;QACd,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,CAAC,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACpC,CAAC;QAED,UAAU;QACV,MAAM,WAAW,GAA2B;YAC1C,WAAW,EAAE,WAAW;YACxB,MAAM,EAAE,WAAW;YACnB,WAAW,EAAE,wBAAwB;SACtC,CAAC;QACF,MAAM,CAAC,IAAI,GAAG,WAAW,CAAC,OAAO,CAAC,MAAM,IAAI,WAAW,CAAC,IAAI,WAAW,CAAC;QACxE,MAAM,CAAC,KAAK,GAAG,OAAO,CAAC,SAAS,KAAK,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC;QAE5D,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,WAAW,CAAC,iBAAiB,EAAE,CAAC;YAE3C,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,gBAAgB,CAClD,GAAG,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,EACrC,EAAE,OAAO,EAAE,iBAAiB,EAAE,CAC/B,CAAC;YAEF,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;gBAC7D,MAAM,MAAM,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;gBAEvD,gBAAgB;gBAChB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,UAAU,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;gBACpE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;gBAEjC,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,OAAO,EAAE,CAAC;QACZ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,GAAW;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC;QAC/C,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,CAAC;YACH,uEAAuE;YACvE,MAAM,UAAU,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAChD,MAAM,IAAI,CAAC,WAAW,CAAC,iBAAiB,EAAE,CAAC;YAE3C,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,gBAAgB,CAClD,GAAG,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,UAAU,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,EAC5E,EAAE,OAAO,EAAE,wBAAwB,EAAE,CACtC,CAAC;YAEF,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,CAAC;gBACtD,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAErD,qBAAqB;gBACrB,IAAI,KAAK,EAAE,CAAC;oBACV,MAAM,UAAU,GAAG,IAAI,CAAC,oBAAoB,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;oBACpE,KAAK,CAAC,UAAU,GAAG,UAAU,CAAC;gBAChC,CAAC;gBAED,OAAO,KAAK,CAAC;YACf,CAAC;YAED,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,sBAAsB;YACtB,IAAI,KAAK,EAAE,QAAQ,EAAE,MAAM,KAAK,GAAG,EAAE,CAAC;gBACpC,OAAO,IAAI,CAAC;YACd,CAAC;YACD,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,eAAe,CAAC,CAAC;YAC7C,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,GAAW;QAC5B,kDAAkD;QAClD,2CAA2C;QAE3C,MAAM,QAAQ,GAAG,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC;QAC/C,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,IAAI,CAAC;YACH,0BAA0B;YAC1B,MAAM,UAAU,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAEhD,8CAA8C;YAC9C,MAAM,QAAQ,GAAG,MAAM,WAAW,CAChC,KAAK,CAAC,GAAG,CACP,GAAG,aAAa,CAAC,aAAa,cAAc,UAAU,EAAE,EACxD,EAAE,OAAO,EAAE,QAAQ,CAAC,OAAO,EAAE,CAC9B,EACD,QAAQ,CAAC,OAAO,GAAG,QAAQ,CAAC,MAAM,EAClC,qCAAqC,CACtC,CAAC;YAEF,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;gBAC5B,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,MAAM,UAAU,GAAa,EAAE,CAAC;YAChC,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,IAAI,IAAI,EAAE,EAAE,CAAC;gBACvC,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;oBAChB,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC/B,CAAC;YACH,CAAC;YAED,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC5B,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,oCAAoC;YACpC,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,KAAK,MAAM,SAAS,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;gBAChD,IAAI,CAAC;oBACH,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;oBAClD,IAAI,KAAK,EAAE,CAAC;wBACV,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACrB,CAAC;gBACH,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,mBAAmB;gBACrB,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,GAAW;QAC7B,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YAC5C,IAAI,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,UAAU,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACjE,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,uCAAuC;YACvC,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,KAAK,MAAM,MAAM,IAAI,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;gBACnD,IAAI,CAAC;oBACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;oBAClD,IAAI,QAAQ,EAAE,CAAC;wBACb,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;oBACxB,CAAC;gBACH,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,mBAAmB;gBACrB,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,eAAe,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,OAAe,EAAE,OAAyB;QAC1D,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,OAAe,EAAE,OAAyB;QACxD,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;IACpE,CAAC;IAEO,mBAAmB,CAAC,IAAS;QACnC,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,EAAE,KAAK,IAAI,EAAE,CAAC;QAExC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;YACpC,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,UAAU,CAAC,IAAS;QAC1B,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC;YAE3B,gBAAgB;YAChB,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,UAAU,CAAC;YAEzC,kBAAkB;YAClB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;gBACvC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC;gBACjC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC;gBACnC,MAAM,QAAQ,GAAG,GAAG,KAAK,IAAI,MAAM,EAAE,CAAC,IAAI,EAAE,CAAC;gBAC7C,IAAI,QAAQ,EAAE,CAAC;oBACb,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACzB,CAAC;YACH,CAAC;YAED,2CAA2C;YAC3C,IAAI,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;YACnC,IAAI,QAAQ,EAAE,CAAC;gBACb,mBAAmB;gBACnB,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;YAC9C,CAAC;YAED,2BAA2B;YAC3B,IAAI,aAAa,GAAgB,IAAI,CAAC;YACtC,IAAI,IAAwB,CAAC;YAE7B,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,CAAC;gBACvB,IAAI,CAAC,kBAAkB,CAAC;gBACxB,IAAI,CAAC,WAAW,CAAC;gBACjB,IAAI,CAAC,SAAS,CAAC,CAAC;YAEjC,IAAI,QAAQ,IAAI,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC5C,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC5C,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,SAAS,CAAC,CAAC,CAAC,KAAK,QAAQ,EAAE,CAAC;oBAC7D,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;oBACpB,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;oBAChC,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;oBAC9B,IAAI,CAAC;wBACH,aAAa,GAAG,IAAI,IAAI,CAAC,IAAc,EAAE,KAAK,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;oBAC3D,CAAC;oBAAC,MAAM,CAAC;wBACP,6BAA6B;oBAC/B,CAAC;gBACH,CAAC;YACH,CAAC;YAED,uBAAuB;YACvB,MAAM,kBAAkB,GAAG,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,CAAC;YACzD,MAAM,OAAO,GAAG,kBAAkB,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC;YAEnD,oBAAoB;YACpB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,EAAE,CAAC;YAEvC,yBAAyB;YACzB,MAAM,aAAa,GAAG,IAAI,CAAC,wBAAwB,CAAC,IAAI,CAAC,CAAC;YAE1D,cAAc;YACd,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YAE9D,+BAA+B;YAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,IAAI,SAAS,CAAC;YACrC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,SAAS,CAAC;YACxC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,SAAS,CAAC;YAEtC,gBAAgB;YAChB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;YAEhC,OAAO,YAAY,CAAC,MAAM,CAAC;gBACzB,OAAO,EAAE,GAAG;gBACZ,KAAK,EAAE,KAAK;gBACZ,OAAO,EAAE,OAAO;gBAChB,QAAQ,EAAE,QAAQ;gBAClB,MAAM,EAAE,UAAU;gBAClB,aAAa,EAAE,aAAa;gBAC5B,IAAI,EAAE,IAAI;gBACV,OAAO,EAAE,OAAO;gBAChB,GAAG,EAAE,GAAG;gBACR,GAAG,EAAE,GAAG;gBACR,MAAM,EAAE,EAAE;gBACV,MAAM,EAAE,MAAM;gBACd,KAAK,EAAE,KAAK;gBACZ,KAAK,EAAE,KAAK;gBACZ,aAAa,EAAE,aAAa;gBAC5B,KAAK,EAAE;oBACL,SAAS,EAAE,SAAS;oBACpB,IAAI,EAAE,OAAO;oBACb,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,EAAE;oBACrB,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,EAAE;oBACrB,QAAQ,EAAE,IAAI,CAAC,OAAO,IAAI,EAAE;iBAC7B;aACF,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,QAAQ,CAAC,+BAA+B,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;YACzD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAEO,oBAAoB,CAAC,IAAS;QACpC,MAAM,UAAU,GAAa,EAAE,CAAC;QAChC,MAAM,aAAa,GAAG,IAAI,CAAC,SAAS,IAAI,EAAE,CAAC;QAE3C,KAAK,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;YAChC,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACpB,IAAI,GAAG,EAAE,CAAC;gBACR,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;CACF"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { Paper } from '../models/Paper.js';
|
|
2
|
+
import { PaperSource, SearchOptions, DownloadOptions, PlatformCapabilities } from './PaperSource.js';
|
|
3
|
+
export declare class EuropePMCSearcher extends PaperSource {
|
|
4
|
+
private readonly client;
|
|
5
|
+
constructor();
|
|
6
|
+
getCapabilities(): PlatformCapabilities;
|
|
7
|
+
search(query: string, options?: SearchOptions): Promise<Paper[]>;
|
|
8
|
+
downloadPdf(paperId: string, options?: DownloadOptions): Promise<string>;
|
|
9
|
+
readPaper(paperId: string, options?: DownloadOptions): Promise<string>;
|
|
10
|
+
private parseItem;
|
|
11
|
+
private getDetails;
|
|
12
|
+
private normalizeId;
|
|
13
|
+
private parseAuthors;
|
|
14
|
+
private parsePublicationDate;
|
|
15
|
+
private findPdfUrl;
|
|
16
|
+
private findPdfUrls;
|
|
17
|
+
private isEuropePmcRenderUrl;
|
|
18
|
+
private findLandingUrl;
|
|
19
|
+
}
|
|
20
|
+
//# sourceMappingURL=EuropePMCSearcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EuropePMCSearcher.d.ts","sourceRoot":"","sources":["../../src/platforms/EuropePMCSearcher.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAgB,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,eAAe,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAgCrG,qBAAa,iBAAkB,SAAQ,WAAW;IAChD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;;IAcvC,eAAe,IAAI,oBAAoB;IAWjC,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAmBpE,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;IAmB5E,SAAS,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;IAMhF,OAAO,CAAC,SAAS;YAiCH,UAAU;IAexB,OAAO,CAAC,WAAW;IAMnB,OAAO,CAAC,YAAY;IAQpB,OAAO,CAAC,oBAAoB;IAO5B,OAAO,CAAC,UAAU;IAIlB,OAAO,CAAC,WAAW;IAiBnB,OAAO,CAAC,oBAAoB;IAI5B,OAAO,CAAC,cAAc;CAUvB"}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import { PaperFactory } from '../models/Paper.js';
|
|
3
|
+
import { PaperSource } from './PaperSource.js';
|
|
4
|
+
import { TIMEOUTS, USER_AGENT } from '../config/constants.js';
|
|
5
|
+
import { downloadPdfFromUrl, safeFilename } from '../utils/PdfDownload.js';
|
|
6
|
+
import { PDFExtractor } from '../utils/PDFExtractor.js';
|
|
7
|
+
export class EuropePMCSearcher extends PaperSource {
|
|
8
|
+
client;
|
|
9
|
+
constructor() {
|
|
10
|
+
super('europepmc', 'https://www.ebi.ac.uk/europepmc/webservices/rest');
|
|
11
|
+
this.client = axios.create({
|
|
12
|
+
baseURL: this.baseUrl,
|
|
13
|
+
timeout: TIMEOUTS.DEFAULT,
|
|
14
|
+
headers: {
|
|
15
|
+
Accept: 'application/json',
|
|
16
|
+
'User-Agent': USER_AGENT
|
|
17
|
+
}
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
getCapabilities() {
|
|
21
|
+
return {
|
|
22
|
+
search: true,
|
|
23
|
+
download: true,
|
|
24
|
+
fullText: true,
|
|
25
|
+
citations: true,
|
|
26
|
+
requiresApiKey: false,
|
|
27
|
+
supportedOptions: ['maxResults', 'year']
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
async search(query, options = {}) {
|
|
31
|
+
try {
|
|
32
|
+
const response = await this.client.get('/search', {
|
|
33
|
+
params: {
|
|
34
|
+
query,
|
|
35
|
+
pageSize: Math.min(options.maxResults || 10, 100),
|
|
36
|
+
format: 'json',
|
|
37
|
+
resultType: 'core',
|
|
38
|
+
...(options.year ? { year: options.year } : {})
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
const results = response.data?.resultList?.result || [];
|
|
42
|
+
return results.map((item) => this.parseItem(item)).filter(Boolean);
|
|
43
|
+
}
|
|
44
|
+
catch (error) {
|
|
45
|
+
this.handleHttpError(error, 'search');
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
async downloadPdf(paperId, options = {}) {
|
|
49
|
+
const details = await this.getDetails(paperId);
|
|
50
|
+
const pdfUrls = details ? this.findPdfUrls(details) : [];
|
|
51
|
+
if (pdfUrls.length === 0) {
|
|
52
|
+
throw new Error(`Europe PMC paper ${paperId} does not expose an accessible PDF URL`);
|
|
53
|
+
}
|
|
54
|
+
const errors = [];
|
|
55
|
+
for (const pdfUrl of pdfUrls) {
|
|
56
|
+
try {
|
|
57
|
+
return await downloadPdfFromUrl(pdfUrl, options.savePath || './downloads', `europepmc_${safeFilename(paperId)}`);
|
|
58
|
+
}
|
|
59
|
+
catch (error) {
|
|
60
|
+
errors.push(`${pdfUrl}: ${error?.message || String(error)}`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
throw new Error(`Europe PMC paper ${paperId} PDF candidates failed. ${errors.join(' | ')}`);
|
|
64
|
+
}
|
|
65
|
+
async readPaper(paperId, options = {}) {
|
|
66
|
+
const pdfPath = await this.downloadPdf(paperId, options);
|
|
67
|
+
const result = await new PDFExtractor().extractFromFile(pdfPath);
|
|
68
|
+
return result.text || `PDF downloaded to ${pdfPath}, but no text could be extracted.`;
|
|
69
|
+
}
|
|
70
|
+
parseItem(item) {
|
|
71
|
+
if (!item.id || !item.title)
|
|
72
|
+
return null;
|
|
73
|
+
const paperId = this.normalizeId(item);
|
|
74
|
+
const doi = item.doi || item.doiId || '';
|
|
75
|
+
const pdfUrl = this.findPdfUrl(item);
|
|
76
|
+
const landingUrl = this.findLandingUrl(item, paperId, doi);
|
|
77
|
+
const keywords = item.keywordList?.keyword;
|
|
78
|
+
return PaperFactory.create({
|
|
79
|
+
paperId,
|
|
80
|
+
title: this.cleanText(item.title),
|
|
81
|
+
authors: this.parseAuthors(item),
|
|
82
|
+
abstract: item.abstractText || '',
|
|
83
|
+
doi,
|
|
84
|
+
publishedDate: this.parsePublicationDate(item),
|
|
85
|
+
pdfUrl,
|
|
86
|
+
url: landingUrl,
|
|
87
|
+
source: 'europepmc',
|
|
88
|
+
journal: item.journalTitle || '',
|
|
89
|
+
keywords: Array.isArray(keywords) ? keywords : keywords ? [keywords] : [],
|
|
90
|
+
citationCount: Number(item.citedByCount || 0),
|
|
91
|
+
year: item.pubYear ? Number(item.pubYear) || undefined : undefined,
|
|
92
|
+
extra: {
|
|
93
|
+
issn: item.journalISSN || '',
|
|
94
|
+
isOpenAccess: item.isOpenAccess === 'Y',
|
|
95
|
+
openAccessLicence: item.openAccessLicence || '',
|
|
96
|
+
pmid: item.pmid || '',
|
|
97
|
+
pmcid: item.pmcid || ''
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
async getDetails(paperId) {
|
|
102
|
+
const query = paperId.startsWith('PMID:')
|
|
103
|
+
? `ext_id:${paperId.replace('PMID:', '')} src:med`
|
|
104
|
+
: paperId.startsWith('PMC')
|
|
105
|
+
? paperId
|
|
106
|
+
: paperId.startsWith('10.')
|
|
107
|
+
? `doi:${paperId}`
|
|
108
|
+
: `ext_id:${paperId}`;
|
|
109
|
+
const response = await this.client.get('/search', {
|
|
110
|
+
params: { query, format: 'json', resultType: 'core', pageSize: 1 }
|
|
111
|
+
});
|
|
112
|
+
return response.data?.resultList?.result?.[0] || null;
|
|
113
|
+
}
|
|
114
|
+
normalizeId(item) {
|
|
115
|
+
if (item.source === 'MED')
|
|
116
|
+
return `PMID:${item.id}`;
|
|
117
|
+
if (item.source === 'PMC')
|
|
118
|
+
return item.id?.startsWith('PMC') ? item.id : `PMC${item.id}`;
|
|
119
|
+
return item.id || item.doi || '';
|
|
120
|
+
}
|
|
121
|
+
parseAuthors(item) {
|
|
122
|
+
const authors = item.authorList?.author || [];
|
|
123
|
+
if (!Array.isArray(authors))
|
|
124
|
+
return [];
|
|
125
|
+
return authors
|
|
126
|
+
.map(author => (typeof author === 'string' ? author : author.fullName || ''))
|
|
127
|
+
.filter(Boolean);
|
|
128
|
+
}
|
|
129
|
+
parsePublicationDate(item) {
|
|
130
|
+
if (!item.pubYear)
|
|
131
|
+
return null;
|
|
132
|
+
const month = String(item.pubMonth || '1').padStart(2, '0');
|
|
133
|
+
const day = String(item.pubDay || '1').padStart(2, '0');
|
|
134
|
+
return this.parseDate(`${item.pubYear}-${month}-${day}`);
|
|
135
|
+
}
|
|
136
|
+
findPdfUrl(item) {
|
|
137
|
+
return this.findPdfUrls(item)[0] || '';
|
|
138
|
+
}
|
|
139
|
+
findPdfUrls(item) {
|
|
140
|
+
const urls = item.fullTextUrlList?.fullTextUrl;
|
|
141
|
+
const list = Array.isArray(urls) ? urls : urls ? [urls] : [];
|
|
142
|
+
const pdfs = list.filter(entry => String(entry.documentStyle || '').toLowerCase() === 'pdf' && entry.url);
|
|
143
|
+
const direct = pdfs
|
|
144
|
+
.map(entry => entry.url || '')
|
|
145
|
+
.filter(url => url && !this.isEuropePmcRenderUrl(url) && !url.startsWith('ftp://'));
|
|
146
|
+
const render = pdfs
|
|
147
|
+
.map(entry => entry.url || '')
|
|
148
|
+
.filter(url => this.isEuropePmcRenderUrl(url));
|
|
149
|
+
const pmcid = item.pmcid || (item.source === 'PMC' ? item.id : '');
|
|
150
|
+
const ncbiRender = pmcid
|
|
151
|
+
? [`https://www.ncbi.nlm.nih.gov/pmc/articles/${pmcid.startsWith('PMC') ? pmcid : `PMC${pmcid}`}/pdf/`]
|
|
152
|
+
: [];
|
|
153
|
+
return [...direct, ...render, ...ncbiRender].filter((url, index, urls) => url && urls.indexOf(url) === index);
|
|
154
|
+
}
|
|
155
|
+
isEuropePmcRenderUrl(url) {
|
|
156
|
+
return /europepmc\.org\/articles\/[^?]+\?pdf=render/i.test(url);
|
|
157
|
+
}
|
|
158
|
+
findLandingUrl(item, paperId, doi) {
|
|
159
|
+
const urls = item.fullTextUrlList?.fullTextUrl;
|
|
160
|
+
const list = Array.isArray(urls) ? urls : urls ? [urls] : [];
|
|
161
|
+
const html = list.find(entry => entry.documentStyle === 'html' && entry.url)?.url || '';
|
|
162
|
+
if (html)
|
|
163
|
+
return html;
|
|
164
|
+
if (doi)
|
|
165
|
+
return `https://doi.org/${doi}`;
|
|
166
|
+
if (paperId.startsWith('PMID:'))
|
|
167
|
+
return `https://pubmed.ncbi.nlm.nih.gov/${paperId.replace('PMID:', '')}/`;
|
|
168
|
+
if (paperId.startsWith('PMC'))
|
|
169
|
+
return `https://www.ncbi.nlm.nih.gov/pmc/articles/${paperId}/`;
|
|
170
|
+
return '';
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
//# sourceMappingURL=EuropePMCSearcher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EuropePMCSearcher.js","sourceRoot":"","sources":["../../src/platforms/EuropePMCSearcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAwB,MAAM,OAAO,CAAC;AAC7C,OAAO,EAAS,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAwD,MAAM,kBAAkB,CAAC;AACrG,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAC9D,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAC3E,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AA6BxD,MAAM,OAAO,iBAAkB,SAAQ,WAAW;IAC/B,MAAM,CAAgB;IAEvC;QACE,KAAK,CAAC,WAAW,EAAE,kDAAkD,CAAC,CAAC;QACvE,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,OAAO,EAAE;gBACP,MAAM,EAAE,kBAAkB;gBAC1B,YAAY,EAAE,UAAU;aACzB;SACF,CAAC,CAAC;IACL,CAAC;IAED,eAAe;QACb,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,IAAI;YACd,QAAQ,EAAE,IAAI;YACd,SAAS,EAAE,IAAI;YACf,cAAc,EAAE,KAAK;YACrB,gBAAgB,EAAE,CAAC,YAAY,EAAE,MAAM,CAAC;SACzC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAAyB,EAAE;QACrD,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE;gBAChD,MAAM,EAAE;oBACN,KAAK;oBACL,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,IAAI,EAAE,EAAE,GAAG,CAAC;oBACjD,MAAM,EAAE,MAAM;oBACd,UAAU,EAAE,MAAM;oBAClB,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBAChD;aACF,CAAC,CAAC;YAEH,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,UAAU,EAAE,MAAM,IAAI,EAAE,CAAC;YACxD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,IAAmB,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAY,CAAC;QAC/F,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,OAAe,EAAE,UAA2B,EAAE;QAC9D,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACzD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CAAC,oBAAoB,OAAO,wCAAwC,CAAC,CAAC;QACvF,CAAC;QAED,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,OAAO,MAAM,kBAAkB,CAAC,MAAM,EAAE,OAAO,CAAC,QAAQ,IAAI,aAAa,EAAE,aAAa,YAAY,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YACnH,CAAC;YAAC,OAAO,KAAU,EAAE,CAAC;gBACpB,MAAM,CAAC,IAAI,CAAC,GAAG,MAAM,KAAK,KAAK,EAAE,OAAO,IAAI,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;QAED,MAAM,IAAI,KAAK,CAAC,oBAAoB,OAAO,2BAA2B,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC9F,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,OAAe,EAAE,UAA2B,EAAE;QAC5D,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACzD,MAAM,MAAM,GAAG,MAAM,IAAI,YAAY,EAAE,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;QACjE,OAAO,MAAM,CAAC,IAAI,IAAI,qBAAqB,OAAO,mCAAmC,CAAC;IACxF,CAAC;IAEO,SAAS,CAAC,IAAmB;QACnC,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAEzC,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QACvC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;QACzC,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,CAAC,CAAC;QAC3D,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,OAAO,CAAC;QAE3C,OAAO,YAAY,CAAC,MAAM,CAAC;YACzB,OAAO;YACP,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC;YACjC,OAAO,EAAE,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC;YAChC,QAAQ,EAAE,IAAI,CAAC,YAAY,IAAI,EAAE;YACjC,GAAG;YACH,aAAa,EAAE,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC;YAC9C,MAAM;YACN,GAAG,EAAE,UAAU;YACf,MAAM,EAAE,WAAW;YACnB,OAAO,EAAE,IAAI,CAAC,YAAY,IAAI,EAAE;YAChC,QAAQ,EAAE,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE;YACzE,aAAa,EAAE,MAAM,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,CAAC;YAC7C,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,SAAS,CAAC,CAAC,CAAC,SAAS;YAClE,KAAK,EAAE;gBACL,IAAI,EAAE,IAAI,CAAC,WAAW,IAAI,EAAE;gBAC5B,YAAY,EAAE,IAAI,CAAC,YAAY,KAAK,GAAG;gBACvC,iBAAiB,EAAE,IAAI,CAAC,iBAAiB,IAAI,EAAE;gBAC/C,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,EAAE;gBACrB,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,EAAE;aACxB;SACF,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,UAAU,CAAC,OAAe;QACtC,MAAM,KAAK,GAAG,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC;YACvC,CAAC,CAAC,UAAU,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU;YAClD,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;gBACzB,CAAC,CAAC,OAAO;gBACT,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;oBACzB,CAAC,CAAC,OAAO,OAAO,EAAE;oBAClB,CAAC,CAAC,UAAU,OAAO,EAAE,CAAC;QAE5B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE;YAChD,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,EAAE;SACnE,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IACxD,CAAC;IAEO,WAAW,CAAC,IAAmB;QACrC,IAAI,IAAI,CAAC,MAAM,KAAK,KAAK;YAAE,OAAO,QAAQ,IAAI,CAAC,EAAE,EAAE,CAAC;QACpD,IAAI,IAAI,CAAC,MAAM,KAAK,KAAK;YAAE,OAAO,IAAI,CAAC,EAAE,EAAE,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,EAAE,EAAE,CAAC;QACzF,OAAO,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC;IACnC,CAAC;IAEO,YAAY,CAAC,IAAmB;QACtC,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,EAAE,MAAM,IAAI,EAAE,CAAC;QAC9C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC;YAAE,OAAO,EAAE,CAAC;QACvC,OAAO,OAAO;aACX,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC;aAC5E,MAAM,CAAC,OAAO,CAAC,CAAC;IACrB,CAAC;IAEO,oBAAoB,CAAC,IAAmB;QAC9C,IAAI,CAAC,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAC5D,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACxD,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,OAAO,IAAI,KAAK,IAAI,GAAG,EAAE,CAAC,CAAC;IAC3D,CAAC;IAEO,UAAU,CAAC,IAAmB;QACpC,OAAO,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACzC,CAAC;IAEO,WAAW,CAAC,IAAmB;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,eAAe,EAAE,WAAW,CAAC;QAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,aAAa,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,KAAK,KAAK,IAAI,KAAK,CAAC,GAAG,CAAC,CAAC;QAC1G,MAAM,MAAM,GAAG,IAAI;aAChB,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,GAAG,IAAI,EAAE,CAAC;aAC7B,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC;QACtF,MAAM,MAAM,GAAG,IAAI;aAChB,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,GAAG,IAAI,EAAE,CAAC;aAC7B,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,CAAC,CAAC;QACjD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,CAAC,IAAI,CAAC,MAAM,KAAK,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,KAAK;YACtB,CAAC,CAAC,CAAC,6CAA6C,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,KAAK,EAAE,OAAO,CAAC;YACvG,CAAC,CAAC,EAAE,CAAC;QACP,OAAO,CAAC,GAAG,MAAM,EAAE,GAAG,MAAM,EAAE,GAAG,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,GAAG,IAAI,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,CAAC;IAChH,CAAC;IAEO,oBAAoB,CAAC,GAAW;QACtC,OAAO,8CAA8C,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAClE,CAAC;IAEO,cAAc,CAAC,IAAmB,EAAE,OAAe,EAAE,GAAW;QACtE,MAAM,IAAI,GAAG,IAAI,CAAC,eAAe,EAAE,WAAW,CAAC;QAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,aAAa,KAAK,MAAM,IAAI,KAAK,CAAC,GAAG,CAAC,EAAE,GAAG,IAAI,EAAE,CAAC;QACxF,IAAI,IAAI;YAAE,OAAO,IAAI,CAAC;QACtB,IAAI,GAAG;YAAE,OAAO,mBAAmB,GAAG,EAAE,CAAC;QACzC,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC;YAAE,OAAO,mCAAmC,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,GAAG,CAAC;QAC3G,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;YAAE,OAAO,6CAA6C,OAAO,GAAG,CAAC;QAC9F,OAAO,EAAE,CAAC;IACZ,CAAC;CACF"}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Google Scholar搜索器 - 网页抓取实现
|
|
3
|
+
* 基于HTML解析,包含反检测机制
|
|
4
|
+
*/
|
|
5
|
+
import { Paper } from '../models/Paper.js';
|
|
6
|
+
import { PaperSource, SearchOptions, DownloadOptions, PlatformCapabilities } from './PaperSource.js';
|
|
7
|
+
interface GoogleScholarOptions extends SearchOptions {
|
|
8
|
+
/** 语言设置 */
|
|
9
|
+
language?: string;
|
|
10
|
+
/** 时间范围(年份) */
|
|
11
|
+
yearLow?: number;
|
|
12
|
+
yearHigh?: number;
|
|
13
|
+
}
|
|
14
|
+
export declare class GoogleScholarSearcher extends PaperSource {
|
|
15
|
+
private readonly scholarUrl;
|
|
16
|
+
private readonly userAgents;
|
|
17
|
+
constructor();
|
|
18
|
+
getCapabilities(): PlatformCapabilities;
|
|
19
|
+
/**
|
|
20
|
+
* 搜索Google Scholar论文
|
|
21
|
+
*/
|
|
22
|
+
search(query: string, options?: GoogleScholarOptions): Promise<Paper[]>;
|
|
23
|
+
/**
|
|
24
|
+
* Google Scholar不支持直接PDF下载
|
|
25
|
+
*/
|
|
26
|
+
downloadPdf(paperId: string, options?: DownloadOptions): Promise<string>;
|
|
27
|
+
/**
|
|
28
|
+
* Google Scholar不提供全文内容
|
|
29
|
+
*/
|
|
30
|
+
readPaper(paperId: string, options?: DownloadOptions): Promise<string>;
|
|
31
|
+
/**
|
|
32
|
+
* 构建搜索参数
|
|
33
|
+
*/
|
|
34
|
+
private buildSearchParams;
|
|
35
|
+
/**
|
|
36
|
+
* 发起Scholar请求
|
|
37
|
+
*/
|
|
38
|
+
private makeScholarRequest;
|
|
39
|
+
/**
|
|
40
|
+
* 解析单个Scholar搜索结果
|
|
41
|
+
*/
|
|
42
|
+
private parseScholarResult;
|
|
43
|
+
/**
|
|
44
|
+
* 提取作者信息
|
|
45
|
+
*/
|
|
46
|
+
private extractAuthors;
|
|
47
|
+
/**
|
|
48
|
+
* 提取年份
|
|
49
|
+
*/
|
|
50
|
+
private extractYear;
|
|
51
|
+
/**
|
|
52
|
+
* 提取期刊信息
|
|
53
|
+
*/
|
|
54
|
+
private extractJournal;
|
|
55
|
+
/**
|
|
56
|
+
* 提取引用次数
|
|
57
|
+
*/
|
|
58
|
+
private extractCitationCount;
|
|
59
|
+
/**
|
|
60
|
+
* 生成论文ID
|
|
61
|
+
*/
|
|
62
|
+
private generatePaperId;
|
|
63
|
+
/**
|
|
64
|
+
* 简单哈希函数
|
|
65
|
+
*/
|
|
66
|
+
private simpleHash;
|
|
67
|
+
/**
|
|
68
|
+
* 获取随机User-Agent
|
|
69
|
+
*/
|
|
70
|
+
private getRandomUserAgent;
|
|
71
|
+
/**
|
|
72
|
+
* 随机延迟
|
|
73
|
+
*/
|
|
74
|
+
private randomDelay;
|
|
75
|
+
}
|
|
76
|
+
export {};
|
|
77
|
+
//# sourceMappingURL=GoogleScholarSearcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"GoogleScholarSearcher.d.ts","sourceRoot":"","sources":["../../src/platforms/GoogleScholarSearcher.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,EAAE,KAAK,EAAgB,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,eAAe,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAKrG,UAAU,oBAAqB,SAAQ,aAAa;IAClD,WAAW;IACX,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,eAAe;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,qBAAa,qBAAsB,SAAQ,WAAW;IACpD,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAwC;IACnE,OAAO,CAAC,QAAQ,CAAC,UAAU,CAIzB;;IAMF,eAAe,IAAI,oBAAoB;IAWvC;;OAEG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,oBAAyB,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAoDjF;;OAEG;IACG,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC;IAI9E;;OAEG;IACG,SAAS,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC;IAI5E;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAuBzB;;OAEG;YACW,kBAAkB;IAyBhC;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAiE1B;;OAEG;IACH,OAAO,CAAC,cAAc;IAStB;;OAEG;IACH,OAAO,CAAC,WAAW;IAKnB;;OAEG;IACH,OAAO,CAAC,cAAc;IAStB;;OAEG;IACH,OAAO,CAAC,oBAAoB;IAK5B;;OAEG;IACH,OAAO,CAAC,eAAe;IAMvB;;OAEG;IACH,OAAO,CAAC,UAAU;IAUlB;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAI1B;;OAEG;YACW,WAAW;CAI1B"}
|