paper-search-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. package/.env.example +165 -0
  2. package/LICENSE +21 -0
  3. package/README-sc.md +642 -0
  4. package/README.md +642 -0
  5. package/dist/cli.d.ts +3 -0
  6. package/dist/cli.d.ts.map +1 -0
  7. package/dist/cli.js +637 -0
  8. package/dist/cli.js.map +1 -0
  9. package/dist/config/ConfigService.d.ts +26 -0
  10. package/dist/config/ConfigService.d.ts.map +1 -0
  11. package/dist/config/ConfigService.js +145 -0
  12. package/dist/config/ConfigService.js.map +1 -0
  13. package/dist/config/constants.d.ts +140 -0
  14. package/dist/config/constants.d.ts.map +1 -0
  15. package/dist/config/constants.js +93 -0
  16. package/dist/config/constants.js.map +1 -0
  17. package/dist/core/diagnostics.d.ts +43 -0
  18. package/dist/core/diagnostics.d.ts.map +1 -0
  19. package/dist/core/diagnostics.js +544 -0
  20. package/dist/core/diagnostics.js.map +1 -0
  21. package/dist/core/handleToolCall.d.ts +8 -0
  22. package/dist/core/handleToolCall.d.ts.map +1 -0
  23. package/dist/core/handleToolCall.js +440 -0
  24. package/dist/core/handleToolCall.js.map +1 -0
  25. package/dist/core/schemas.d.ts +454 -0
  26. package/dist/core/schemas.d.ts.map +1 -0
  27. package/dist/core/schemas.js +322 -0
  28. package/dist/core/schemas.js.map +1 -0
  29. package/dist/core/searchers.d.ts +45 -0
  30. package/dist/core/searchers.d.ts.map +1 -0
  31. package/dist/core/searchers.js +73 -0
  32. package/dist/core/searchers.js.map +1 -0
  33. package/dist/core/tools.d.ts +7 -0
  34. package/dist/core/tools.d.ts.map +1 -0
  35. package/dist/core/tools.js +640 -0
  36. package/dist/core/tools.js.map +1 -0
  37. package/dist/models/Paper.d.ts +64 -0
  38. package/dist/models/Paper.d.ts.map +1 -0
  39. package/dist/models/Paper.js +70 -0
  40. package/dist/models/Paper.js.map +1 -0
  41. package/dist/platforms/ArxivSearcher.d.ts +64 -0
  42. package/dist/platforms/ArxivSearcher.d.ts.map +1 -0
  43. package/dist/platforms/ArxivSearcher.js +531 -0
  44. package/dist/platforms/ArxivSearcher.js.map +1 -0
  45. package/dist/platforms/BioRxivSearcher.d.ts +47 -0
  46. package/dist/platforms/BioRxivSearcher.d.ts.map +1 -0
  47. package/dist/platforms/BioRxivSearcher.js +196 -0
  48. package/dist/platforms/BioRxivSearcher.js.map +1 -0
  49. package/dist/platforms/CORESearcher.d.ts +16 -0
  50. package/dist/platforms/CORESearcher.d.ts.map +1 -0
  51. package/dist/platforms/CORESearcher.js +148 -0
  52. package/dist/platforms/CORESearcher.js.map +1 -0
  53. package/dist/platforms/CrossrefSearcher.d.ts +34 -0
  54. package/dist/platforms/CrossrefSearcher.d.ts.map +1 -0
  55. package/dist/platforms/CrossrefSearcher.js +339 -0
  56. package/dist/platforms/CrossrefSearcher.js.map +1 -0
  57. package/dist/platforms/EuropePMCSearcher.d.ts +20 -0
  58. package/dist/platforms/EuropePMCSearcher.d.ts.map +1 -0
  59. package/dist/platforms/EuropePMCSearcher.js +173 -0
  60. package/dist/platforms/EuropePMCSearcher.js.map +1 -0
  61. package/dist/platforms/GoogleScholarSearcher.d.ts +77 -0
  62. package/dist/platforms/GoogleScholarSearcher.d.ts.map +1 -0
  63. package/dist/platforms/GoogleScholarSearcher.js +262 -0
  64. package/dist/platforms/GoogleScholarSearcher.js.map +1 -0
  65. package/dist/platforms/IACRSearcher.d.ts +51 -0
  66. package/dist/platforms/IACRSearcher.d.ts.map +1 -0
  67. package/dist/platforms/IACRSearcher.js +339 -0
  68. package/dist/platforms/IACRSearcher.js.map +1 -0
  69. package/dist/platforms/OpenAIRESearcher.d.ts +22 -0
  70. package/dist/platforms/OpenAIRESearcher.d.ts.map +1 -0
  71. package/dist/platforms/OpenAIRESearcher.js +223 -0
  72. package/dist/platforms/OpenAIRESearcher.js.map +1 -0
  73. package/dist/platforms/OpenAlexSearcher.d.ts +14 -0
  74. package/dist/platforms/OpenAlexSearcher.d.ts.map +1 -0
  75. package/dist/platforms/OpenAlexSearcher.js +114 -0
  76. package/dist/platforms/OpenAlexSearcher.js.map +1 -0
  77. package/dist/platforms/PMCSearcher.d.ts +20 -0
  78. package/dist/platforms/PMCSearcher.d.ts.map +1 -0
  79. package/dist/platforms/PMCSearcher.js +177 -0
  80. package/dist/platforms/PMCSearcher.js.map +1 -0
  81. package/dist/platforms/PaperSource.d.ts +143 -0
  82. package/dist/platforms/PaperSource.d.ts.map +1 -0
  83. package/dist/platforms/PaperSource.js +125 -0
  84. package/dist/platforms/PaperSource.js.map +1 -0
  85. package/dist/platforms/PubMedSearcher.d.ts +104 -0
  86. package/dist/platforms/PubMedSearcher.d.ts.map +1 -0
  87. package/dist/platforms/PubMedSearcher.js +422 -0
  88. package/dist/platforms/PubMedSearcher.js.map +1 -0
  89. package/dist/platforms/SciHubSearcher.d.ts +66 -0
  90. package/dist/platforms/SciHubSearcher.d.ts.map +1 -0
  91. package/dist/platforms/SciHubSearcher.js +398 -0
  92. package/dist/platforms/SciHubSearcher.js.map +1 -0
  93. package/dist/platforms/ScienceDirectSearcher.d.ts +42 -0
  94. package/dist/platforms/ScienceDirectSearcher.d.ts.map +1 -0
  95. package/dist/platforms/ScienceDirectSearcher.js +326 -0
  96. package/dist/platforms/ScienceDirectSearcher.js.map +1 -0
  97. package/dist/platforms/ScopusSearcher.d.ts +43 -0
  98. package/dist/platforms/ScopusSearcher.d.ts.map +1 -0
  99. package/dist/platforms/ScopusSearcher.js +364 -0
  100. package/dist/platforms/ScopusSearcher.js.map +1 -0
  101. package/dist/platforms/SemanticScholarSearcher.d.ts +96 -0
  102. package/dist/platforms/SemanticScholarSearcher.d.ts.map +1 -0
  103. package/dist/platforms/SemanticScholarSearcher.js +419 -0
  104. package/dist/platforms/SemanticScholarSearcher.js.map +1 -0
  105. package/dist/platforms/SpringerSearcher.d.ts +54 -0
  106. package/dist/platforms/SpringerSearcher.d.ts.map +1 -0
  107. package/dist/platforms/SpringerSearcher.js +407 -0
  108. package/dist/platforms/SpringerSearcher.js.map +1 -0
  109. package/dist/platforms/UnpaywallSearcher.d.ts +18 -0
  110. package/dist/platforms/UnpaywallSearcher.d.ts.map +1 -0
  111. package/dist/platforms/UnpaywallSearcher.js +115 -0
  112. package/dist/platforms/UnpaywallSearcher.js.map +1 -0
  113. package/dist/platforms/WebOfScienceSearcher.d.ts +111 -0
  114. package/dist/platforms/WebOfScienceSearcher.d.ts.map +1 -0
  115. package/dist/platforms/WebOfScienceSearcher.js +500 -0
  116. package/dist/platforms/WebOfScienceSearcher.js.map +1 -0
  117. package/dist/platforms/WileySearcher.d.ts +44 -0
  118. package/dist/platforms/WileySearcher.d.ts.map +1 -0
  119. package/dist/platforms/WileySearcher.js +148 -0
  120. package/dist/platforms/WileySearcher.js.map +1 -0
  121. package/dist/services/CitationService.d.ts +66 -0
  122. package/dist/services/CitationService.d.ts.map +1 -0
  123. package/dist/services/CitationService.js +237 -0
  124. package/dist/services/CitationService.js.map +1 -0
  125. package/dist/services/MultiSourceSearchService.d.ts +19 -0
  126. package/dist/services/MultiSourceSearchService.d.ts.map +1 -0
  127. package/dist/services/MultiSourceSearchService.js +96 -0
  128. package/dist/services/MultiSourceSearchService.js.map +1 -0
  129. package/dist/services/OpenAccessFallbackService.d.ts +20 -0
  130. package/dist/services/OpenAccessFallbackService.d.ts.map +1 -0
  131. package/dist/services/OpenAccessFallbackService.js +124 -0
  132. package/dist/services/OpenAccessFallbackService.js.map +1 -0
  133. package/dist/utils/ErrorHandler.d.ts +99 -0
  134. package/dist/utils/ErrorHandler.d.ts.map +1 -0
  135. package/dist/utils/ErrorHandler.js +266 -0
  136. package/dist/utils/ErrorHandler.js.map +1 -0
  137. package/dist/utils/Logger.d.ts +6 -0
  138. package/dist/utils/Logger.d.ts.map +1 -0
  139. package/dist/utils/Logger.js +26 -0
  140. package/dist/utils/Logger.js.map +1 -0
  141. package/dist/utils/PDFExtractor.d.ts +34 -0
  142. package/dist/utils/PDFExtractor.d.ts.map +1 -0
  143. package/dist/utils/PDFExtractor.js +130 -0
  144. package/dist/utils/PDFExtractor.js.map +1 -0
  145. package/dist/utils/PdfDownload.d.ts +7 -0
  146. package/dist/utils/PdfDownload.d.ts.map +1 -0
  147. package/dist/utils/PdfDownload.js +52 -0
  148. package/dist/utils/PdfDownload.js.map +1 -0
  149. package/dist/utils/QuotaManager.d.ts +32 -0
  150. package/dist/utils/QuotaManager.d.ts.map +1 -0
  151. package/dist/utils/QuotaManager.js +95 -0
  152. package/dist/utils/QuotaManager.js.map +1 -0
  153. package/dist/utils/RateLimiter.d.ts +50 -0
  154. package/dist/utils/RateLimiter.d.ts.map +1 -0
  155. package/dist/utils/RateLimiter.js +121 -0
  156. package/dist/utils/RateLimiter.js.map +1 -0
  157. package/dist/utils/RequestCache.d.ts +26 -0
  158. package/dist/utils/RequestCache.d.ts.map +1 -0
  159. package/dist/utils/RequestCache.js +66 -0
  160. package/dist/utils/RequestCache.js.map +1 -0
  161. package/dist/utils/SecurityUtils.d.ts +80 -0
  162. package/dist/utils/SecurityUtils.d.ts.map +1 -0
  163. package/dist/utils/SecurityUtils.js +357 -0
  164. package/dist/utils/SecurityUtils.js.map +1 -0
  165. package/package.json +111 -0
  166. package/skills/paper-search/SKILL.md +192 -0
@@ -0,0 +1,262 @@
1
+ /**
2
+ * Google Scholar搜索器 - 网页抓取实现
3
+ * 基于HTML解析,包含反检测机制
4
+ */
5
+ import axios from 'axios';
6
+ import * as cheerio from 'cheerio';
7
+ import { PaperFactory } from '../models/Paper.js';
8
+ import { PaperSource } from './PaperSource.js';
9
+ import { TIMEOUTS } from '../config/constants.js';
10
+ import { logDebug } from '../utils/Logger.js';
11
+ import { ErrorHandler } from '../utils/ErrorHandler.js';
12
+ export class GoogleScholarSearcher extends PaperSource {
13
+ scholarUrl = 'https://scholar.google.com/scholar';
14
+ userAgents = [
15
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
16
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
17
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
18
+ ];
19
+ constructor() {
20
+ super('google_scholar', 'https://scholar.google.com');
21
+ }
22
+ getCapabilities() {
23
+ return {
24
+ search: true,
25
+ download: false, // Google Scholar不提供直接下载
26
+ fullText: false, // 只有元数据和摘要
27
+ citations: true, // 可以获取引用次数
28
+ requiresApiKey: false, // 不需要API密钥,但可能被限制
29
+ supportedOptions: ['maxResults', 'year', 'author']
30
+ };
31
+ }
32
+ /**
33
+ * 搜索Google Scholar论文
34
+ */
35
+ async search(query, options = {}) {
36
+ logDebug(`Google Scholar Search: query="${query}"`);
37
+ try {
38
+ const papers = [];
39
+ let start = 0;
40
+ const resultsPerPage = 10;
41
+ const maxResults = options.maxResults || 10;
42
+ while (papers.length < maxResults) {
43
+ // 添加随机延迟避免检测
44
+ await this.randomDelay();
45
+ const params = this.buildSearchParams(query, start, options);
46
+ const response = await this.makeScholarRequest(params);
47
+ if (response.status !== 200) {
48
+ logDebug(`Google Scholar HTTP Error: ${response.status}`);
49
+ break;
50
+ }
51
+ const $ = cheerio.load(response.data);
52
+ const results = $('.gs_ri'); // 搜索结果容器
53
+ if (results.length === 0) {
54
+ logDebug('Google Scholar: No more results found');
55
+ break;
56
+ }
57
+ logDebug(`Google Scholar: Found ${results.length} results on page`);
58
+ // 解析每个结果
59
+ results.each((index, element) => {
60
+ if (papers.length >= maxResults)
61
+ return false; // 停止遍历
62
+ const paper = this.parseScholarResult($, $(element));
63
+ if (paper) {
64
+ papers.push(paper);
65
+ }
66
+ });
67
+ start += resultsPerPage;
68
+ }
69
+ logDebug(`Google Scholar Results: Found ${papers.length} papers`);
70
+ return papers;
71
+ }
72
+ catch (error) {
73
+ this.handleHttpError(error, 'search');
74
+ }
75
+ }
76
+ /**
77
+ * Google Scholar不支持直接PDF下载
78
+ */
79
+ async downloadPdf(paperId, options) {
80
+ throw new Error('Google Scholar does not support direct PDF download. Please use the paper URL to access the publisher.');
81
+ }
82
+ /**
83
+ * Google Scholar不提供全文内容
84
+ */
85
+ async readPaper(paperId, options) {
86
+ throw new Error('Google Scholar does not provide full-text content. Please use the paper URL to access the full text.');
87
+ }
88
+ /**
89
+ * 构建搜索参数
90
+ */
91
+ buildSearchParams(query, start, options) {
92
+ const params = {
93
+ q: query,
94
+ start: start,
95
+ hl: options.language || 'en',
96
+ as_sdt: '0,5', // 包括文章和引用
97
+ as_vis: '1' // 排除引用,只显示学术论文
98
+ };
99
+ // 添加年份过滤
100
+ if (options.yearLow || options.yearHigh) {
101
+ params.as_ylo = options.yearLow || '';
102
+ params.as_yhi = options.yearHigh || '';
103
+ }
104
+ // 添加作者过滤
105
+ if (options.author) {
106
+ params.as_sauthors = options.author;
107
+ }
108
+ return params;
109
+ }
110
+ /**
111
+ * 发起Scholar请求
112
+ */
113
+ async makeScholarRequest(params) {
114
+ const userAgent = this.getRandomUserAgent();
115
+ const config = {
116
+ params,
117
+ headers: {
118
+ 'User-Agent': userAgent,
119
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
120
+ 'Accept-Language': 'en-US,en;q=0.5',
121
+ 'Accept-Encoding': 'gzip, deflate',
122
+ 'Connection': 'keep-alive',
123
+ 'Upgrade-Insecure-Requests': '1'
124
+ },
125
+ timeout: TIMEOUTS.DEFAULT
126
+ };
127
+ logDebug(`Google Scholar Request: GET ${this.scholarUrl}`);
128
+ logDebug('Scholar params:', params);
129
+ return await ErrorHandler.retryWithBackoff(() => axios.get(this.scholarUrl, config), { context: 'Google Scholar search' });
130
+ }
131
+ /**
132
+ * 解析单个Scholar搜索结果
133
+ */
134
+ parseScholarResult($, element) {
135
+ try {
136
+ // 提取标题和链接
137
+ const titleElement = element.find('h3.gs_rt');
138
+ const titleLink = titleElement.find('a');
139
+ const title = titleElement.text().replace(/^\[PDF\]|\[HTML\]|\[BOOK\]|\[B\]/, '').trim();
140
+ const url = titleLink.attr('href') || '';
141
+ if (!title) {
142
+ return null;
143
+ }
144
+ // 过滤掉书籍结果,优先学术论文
145
+ const titleText = titleElement.text();
146
+ if (titleText.includes('[BOOK]') || titleText.includes('[B]') ||
147
+ url.includes('books.google.com')) {
148
+ return null; // 跳过书籍结果
149
+ }
150
+ // 提取作者和出版信息
151
+ const infoElement = element.find('div.gs_a');
152
+ const infoText = infoElement.text();
153
+ const authors = this.extractAuthors(infoText);
154
+ const year = this.extractYear(infoText);
155
+ // 提取摘要
156
+ const abstractElement = element.find('div.gs_rs');
157
+ const abstract = abstractElement.text() || '';
158
+ // 提取引用次数
159
+ const citationElement = element.find('div.gs_fl a').filter((i, el) => {
160
+ return $(el).text().includes('Cited by');
161
+ });
162
+ const citationText = citationElement.text();
163
+ const citationCount = this.extractCitationCount(citationText);
164
+ // 生成论文ID
165
+ const paperId = this.generatePaperId(title, authors);
166
+ return PaperFactory.create({
167
+ paperId,
168
+ title: this.cleanText(title),
169
+ authors,
170
+ abstract: this.cleanText(abstract),
171
+ doi: '', // Google Scholar通常不直接提供DOI
172
+ publishedDate: year ? new Date(year, 0, 1) : null,
173
+ pdfUrl: '', // 需要额外处理PDF链接
174
+ url,
175
+ source: 'googlescholar',
176
+ categories: [],
177
+ keywords: [],
178
+ citationCount,
179
+ journal: this.extractJournal(infoText),
180
+ year,
181
+ extra: {
182
+ scholarId: paperId,
183
+ infoText
184
+ }
185
+ });
186
+ }
187
+ catch (error) {
188
+ logDebug('Error parsing Google Scholar result:', error);
189
+ return null;
190
+ }
191
+ }
192
+ /**
193
+ * 提取作者信息
194
+ */
195
+ extractAuthors(infoText) {
196
+ const parts = infoText.split(' - ');
197
+ if (parts.length > 0) {
198
+ const authorPart = parts[0];
199
+ return authorPart.split(',').map(author => author.trim()).filter(a => a.length > 0);
200
+ }
201
+ return [];
202
+ }
203
+ /**
204
+ * 提取年份
205
+ */
206
+ extractYear(text) {
207
+ const yearMatch = text.match(/\b(19|20)\d{2}\b/);
208
+ return yearMatch ? parseInt(yearMatch[0], 10) : undefined;
209
+ }
210
+ /**
211
+ * 提取期刊信息
212
+ */
213
+ extractJournal(infoText) {
214
+ const parts = infoText.split(' - ');
215
+ if (parts.length > 1) {
216
+ // 通常期刊在第二部分
217
+ return parts[1].split(',')[0].trim();
218
+ }
219
+ return '';
220
+ }
221
+ /**
222
+ * 提取引用次数
223
+ */
224
+ extractCitationCount(citationText) {
225
+ const match = citationText.match(/Cited by (\d+)/);
226
+ return match ? parseInt(match[1], 10) : 0;
227
+ }
228
+ /**
229
+ * 生成论文ID
230
+ */
231
+ generatePaperId(title, authors) {
232
+ const titleHash = this.simpleHash(title);
233
+ const authorHash = this.simpleHash(authors.join(''));
234
+ return `gs_${titleHash}_${authorHash}`;
235
+ }
236
+ /**
237
+ * 简单哈希函数
238
+ */
239
+ simpleHash(str) {
240
+ let hash = 0;
241
+ for (let i = 0; i < str.length; i++) {
242
+ const char = str.charCodeAt(i);
243
+ hash = ((hash << 5) - hash) + char;
244
+ hash = hash & hash; // 转换为32位整数
245
+ }
246
+ return Math.abs(hash).toString(36);
247
+ }
248
+ /**
249
+ * 获取随机User-Agent
250
+ */
251
+ getRandomUserAgent() {
252
+ return this.userAgents[Math.floor(Math.random() * this.userAgents.length)];
253
+ }
254
+ /**
255
+ * 随机延迟
256
+ */
257
+ async randomDelay() {
258
+ const delay = Math.random() * 2000 + 1000; // 1-3秒随机延迟
259
+ await new Promise(resolve => setTimeout(resolve, delay));
260
+ }
261
+ }
262
+ //# sourceMappingURL=GoogleScholarSearcher.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GoogleScholarSearcher.js","sourceRoot":"","sources":["../../src/platforms/GoogleScholarSearcher.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAS,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAwD,MAAM,kBAAkB,CAAC;AACrG,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAClD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAUxD,MAAM,OAAO,qBAAsB,SAAQ,WAAW;IACnC,UAAU,GAAG,oCAAoC,CAAC;IAClD,UAAU,GAAG;QAC5B,qHAAqH;QACrH,yHAAyH;QACzH,2GAA2G;KAC5G,CAAC;IAEF;QACE,KAAK,CAAC,gBAAgB,EAAE,4BAA4B,CAAC,CAAC;IACxD,CAAC;IAED,eAAe;QACb,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,KAAK,EAAE,wBAAwB;YACzC,QAAQ,EAAE,KAAK,EAAE,WAAW;YAC5B,SAAS,EAAE,IAAI,EAAE,WAAW;YAC5B,cAAc,EAAE,KAAK,EAAE,kBAAkB;YACzC,gBAAgB,EAAE,CAAC,YAAY,EAAE,MAAM,EAAE,QAAQ,CAAC;SACnD,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAAgC,EAAE;QAC5D,QAAQ,CAAC,iCAAiC,KAAK,GAAG,CAAC,CAAC;QAEpD,IAAI,CAAC;YACH,MAAM,MAAM,GAAY,EAAE,CAAC;YAC3B,IAAI,KAAK,GAAG,CAAC,CAAC;YACd,MAAM,cAAc,GAAG,EAAE,CAAC;YAC1B,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC;YAE5C,OAAO,MAAM,CAAC,MAAM,GAAG,UAAU,EAAE,CAAC;gBAClC,aAAa;gBACb,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;gBAEzB,MAAM,MAAM,GAAG,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;gBAC7D,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,CAAC;gBAEvD,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;oBAC5B,QAAQ,CAAC,8BAA8B,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;oBAC1D,MAAM;gBACR,CAAC;gBAED,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;gBACtC,MAAM,OAAO,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;gBAEtC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACzB,QAAQ,CAAC,uCAAuC,CAAC,CAAC;oBAClD,MAAM;gBACR,CAAC;gBAED,QAAQ,CAAC,yBAAyB,OAAO,CAAC,MAAM,kBAAkB,CAAC,CAAC;gBAEpE,SAAS;gBACT,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;oBAC9B,IAAI,MAAM,CAAC,MAAM,IAAI,UAAU;wBAAE,OAAO,KAAK,CAAC,CAAC,OAAO;oBAEtD,MAAM,KAAK,GAAG,IAAI,CAAC,kBAAkB,CAAC,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;oBACrD,IAAI,KAAK,EAAE,CAAC;wBACV,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;oBACrB,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,KAAK,IAAI,cAAc,CAAC;YAC1B,CAAC;YAED,QAAQ,CAAC,iCAAiC,MAAM,CAAC,MAAM,SAAS,CAAC,CAAC;YAClE,OAAO,MAAM,CAAC;QAEhB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CAAC,OAAe,EAAE,OAAyB;QAC1D,MAAM,IAAI,KAAK,CAAC,wGAAwG,CAAC,CAAC;IAC5H,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,OAAe,EAAE,OAAyB;QACxD,MAAM,IAAI,KAAK,CAAC,sGAAsG,CAAC,CAAC;IAC1H,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,KAAa,EAAE,KAAa,EAAE,OAA6B;QACnF,MAAM,MAAM,GAAwB;YAClC,CAAC,EAAE,KAAK;YACR,KAAK,EAAE,KAAK;YACZ,EAAE,EAAE,OAAO,CAAC,QAAQ,IAAI,IAAI;YAC5B,MAAM,EAAE,KAAK,EAAE,UAAU;YACzB,MAAM,EAAE,GAAG,CAAC,eAAe;SAC5B,CAAC;QAEF,SAAS;QACT,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YACxC,MAAM,CAAC,MAAM,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;YACtC,MAAM,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;QACzC,CAAC;QAED,SAAS;QACT,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;YACnB,MAAM,CAAC,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;QACtC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,kBAAkB,CAAC,MAA2B;QAC1D,MAAM,SAAS,GAAG,IAAI,CAAC,kBAAkB,EAAE,CAAC;QAE5C,MAAM,MAAM,GAAG;YACb,MAAM;YACN,OAAO,EAAE;gBACP,YAAY,EAAE,SAAS;gBACvB,QAAQ,EAAE,4EAA4E;gBACtF,iBAAiB,EAAE,gBAAgB;gBACnC,iBAAiB,EAAE,eAAe;gBAClC,YAAY,EAAE,YAAY;gBAC1B,2BAA2B,EAAE,GAAG;aACjC;YACD,OAAO,EAAE,QAAQ,CAAC,OAAO;SAC1B,CAAC;QAEF,QAAQ,CAAC,+BAA+B,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAC3D,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC;QAEpC,OAAO,MAAM,YAAY,CAAC,gBAAgB,CACxC,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,EACxC,EAAE,OAAO,EAAE,uBAAuB,EAAE,CACrC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,kBAAkB,CAAC,CAAqB,EAAE,OAA6B;QAC7E,IAAI,CAAC;YACH,UAAU;YACV,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC9C,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACzC,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,kCAAkC,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACzF,MAAM,GAAG,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;YAEzC,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,OAAO,IAAI,CAAC;YACd,CAAC;YAED,iBAAiB;YACjB,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC;YACtC,IAAI,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC;gBACzD,GAAG,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;gBACrC,OAAO,IAAI,CAAC,CAAC,SAAS;YACxB,CAAC;YAED,YAAY;YACZ,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC7C,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC;YACpC,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;YAExC,OAAO;YACP,MAAM,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAClD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;YAE9C,SAAS;YACT,MAAM,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;gBACnE,OAAO,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;YAC3C,CAAC,CAAC,CAAC;YACH,MAAM,YAAY,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC;YAC5C,MAAM,aAAa,GAAG,IAAI,CAAC,oBAAoB,CAAC,YAAY,CAAC,CAAC;YAE9D,SAAS;YACT,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;YAErD,OAAO,YAAY,CAAC,MAAM,CAAC;gBACzB,OAAO;gBACP,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC;gBAC5B,OAAO;gBACP,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;gBAClC,GAAG,EAAE,EAAE,EAAE,2BAA2B;gBACpC,aAAa,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;gBACjD,MAAM,EAAE,EAAE,EAAE,cAAc;gBAC1B,GAAG;gBACH,MAAM,EAAE,eAAe;gBACvB,UAAU,EAAE,EAAE;gBACd,QAAQ,EAAE,EAAE;gBACZ,aAAa;gBACb,OAAO,EAAE,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC;gBACtC,IAAI;gBACJ,KAAK,EAAE;oBACL,SAAS,EAAE,OAAO;oBAClB,QAAQ;iBACT;aACF,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,QAAQ,CAAC,sCAAsC,EAAE,KAAK,CAAC,CAAC;YACxD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAgB;QACrC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACpC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,MAAM,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAC5B,OAAO,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACtF,CAAC;QACD,OAAO,EAAE,CAAC;IACZ,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,IAAY;QAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACjD,OAAO,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAC5D,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAgB;QACrC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACpC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,YAAY;YACZ,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACvC,CAAC;QACD,OAAO,EAAE,CAAC;IACZ,CAAC;IAED;;OAEG;IACK,oBAAoB,CAAC,YAAoB;QAC/C,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;QACnD,OAAO,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,KAAa,EAAE,OAAiB;QACtD,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;QACrD,OAAO,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;IACzC,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,GAAW;QAC5B,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAC/B,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;YACnC,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,WAAW;QACjC,CAAC;QACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IACrC,CAAC;IAED;;OAEG;IACK,kBAAkB;QACxB,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;IAC7E,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,WAAW;QACvB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,WAAW;QACtD,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;IAC3D,CAAC;CACF"}
@@ -0,0 +1,51 @@
1
+ /**
2
+ * IACR ePrint Archive集成模块
3
+ * 密码学和相关领域的学术论文搜索
4
+ */
5
+ import { Paper } from '../models/Paper.js';
6
+ import { PaperSource, SearchOptions, DownloadOptions, PlatformCapabilities } from './PaperSource.js';
7
+ interface IACRSearchOptions extends SearchOptions {
8
+ /** 是否获取详细信息 */
9
+ fetchDetails?: boolean;
10
+ }
11
+ export declare class IACRSearcher extends PaperSource {
12
+ private readonly searchUrl;
13
+ private readonly userAgents;
14
+ private readonly rateLimiter;
15
+ constructor();
16
+ getCapabilities(): PlatformCapabilities;
17
+ /**
18
+ * 搜索IACR ePrint Archive论文
19
+ */
20
+ search(query: string, options?: IACRSearchOptions): Promise<Paper[]>;
21
+ /**
22
+ * 获取论文详细信息
23
+ */
24
+ getPaperDetails(paperId: string): Promise<Paper | null>;
25
+ /**
26
+ * 下载PDF文件
27
+ */
28
+ downloadPdf(paperId: string, options?: DownloadOptions): Promise<string>;
29
+ /**
30
+ * 读取论文全文内容
31
+ */
32
+ readPaper(paperId: string, options?: DownloadOptions): Promise<string>;
33
+ /**
34
+ * 解析搜索响应
35
+ */
36
+ private parseSearchResponse;
37
+ /**
38
+ * 解析IACR论文详细页面
39
+ */
40
+ private parseIACRPaperDetails;
41
+ /**
42
+ * 获取随机User-Agent
43
+ */
44
+ private getRandomUserAgent;
45
+ /**
46
+ * 延迟函数
47
+ */
48
+ private delay;
49
+ }
50
+ export {};
51
+ //# sourceMappingURL=IACRSearcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"IACRSearcher.d.ts","sourceRoot":"","sources":["../../src/platforms/IACRSearcher.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,OAAO,EAAE,KAAK,EAAgB,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,eAAe,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAMrG,UAAU,iBAAkB,SAAQ,aAAa;IAC/C,eAAe;IACf,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB;AAED,qBAAa,YAAa,SAAQ,WAAW;IAC3C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAW;IACtC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;;IAiB1C,eAAe,IAAI,oBAAoB;IAWvC;;OAEG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,iBAAsB,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAoC9E;;OAEG;IACG,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC;IA8B7D;;OAEG;IACG,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;IAyClF;;OAEG;IACG,SAAS,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;IAiBhF;;OAEG;YACW,mBAAmB;IAsGjC;;OAEG;IACH,OAAO,CAAC,qBAAqB;IA0F7B;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAI1B;;OAEG;IACH,OAAO,CAAC,KAAK;CAGd"}
@@ -0,0 +1,339 @@
1
+ /**
2
+ * IACR ePrint Archive集成模块
3
+ * 密码学和相关领域的学术论文搜索
4
+ */
5
+ import axios from 'axios';
6
+ import * as cheerio from 'cheerio';
7
+ import * as fs from 'fs';
8
+ import * as path from 'path';
9
+ import { PaperFactory } from '../models/Paper.js';
10
+ import { PaperSource } from './PaperSource.js';
11
+ import { TIMEOUTS } from '../config/constants.js';
12
+ import { logDebug } from '../utils/Logger.js';
13
+ import { RateLimiter } from '../utils/RateLimiter.js';
14
+ import { ErrorHandler } from '../utils/ErrorHandler.js';
15
+ export class IACRSearcher extends PaperSource {
16
+ searchUrl;
17
+ userAgents;
18
+ rateLimiter;
19
+ constructor() {
20
+ super('iacr', 'https://eprint.iacr.org');
21
+ this.searchUrl = `${this.baseUrl}/search`;
22
+ this.userAgents = [
23
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
24
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)',
25
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36'
26
+ ];
27
+ // IACR rate limit: 1 req/s, burst=2
28
+ this.rateLimiter = new RateLimiter({
29
+ requestsPerSecond: 1,
30
+ burstCapacity: 2
31
+ });
32
+ }
33
+ getCapabilities() {
34
+ return {
35
+ search: true,
36
+ download: true,
37
+ fullText: true,
38
+ citations: false,
39
+ requiresApiKey: false,
40
+ supportedOptions: ['maxResults', 'fetchDetails']
41
+ };
42
+ }
43
+ /**
44
+ * 搜索IACR ePrint Archive论文
45
+ */
46
+ async search(query, options = {}) {
47
+ try {
48
+ const params = {
49
+ q: query
50
+ };
51
+ logDebug(`IACR API Request: GET ${this.searchUrl}`);
52
+ logDebug('IACR Request params:', params);
53
+ await this.rateLimiter.waitForPermission();
54
+ const response = await ErrorHandler.retryWithBackoff(() => axios.get(this.searchUrl, {
55
+ params,
56
+ timeout: TIMEOUTS.DEFAULT,
57
+ headers: {
58
+ 'User-Agent': this.getRandomUserAgent(),
59
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
60
+ 'Accept-Language': 'en-US,en;q=0.9'
61
+ }
62
+ }), { context: 'IACR search' });
63
+ logDebug(`IACR API Response: ${response.status} ${response.statusText}`);
64
+ const papers = await this.parseSearchResponse(response.data, options);
65
+ logDebug(`IACR Parsed ${papers.length} papers`);
66
+ return papers.slice(0, options.maxResults || 10);
67
+ }
68
+ catch (error) {
69
+ logDebug('IACR Search Error:', error.message);
70
+ this.handleHttpError(error, 'search');
71
+ }
72
+ }
73
+ /**
74
+ * 获取论文详细信息
75
+ */
76
+ async getPaperDetails(paperId) {
77
+ try {
78
+ const paperUrl = paperId.startsWith('http') ? paperId : `${this.baseUrl}/${paperId}`;
79
+ await this.rateLimiter.waitForPermission();
80
+ const response = await ErrorHandler.retryWithBackoff(() => axios.get(paperUrl, {
81
+ timeout: TIMEOUTS.DEFAULT,
82
+ headers: {
83
+ 'User-Agent': this.getRandomUserAgent(),
84
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
85
+ 'Accept-Language': 'en-US,en;q=0.9'
86
+ }
87
+ }), { context: 'IACR paper details' });
88
+ if (response.status !== 200) {
89
+ logDebug(`Failed to fetch paper details: HTTP ${response.status}`);
90
+ return null;
91
+ }
92
+ return this.parseIACRPaperDetails(response.data, paperId);
93
+ }
94
+ catch (error) {
95
+ logDebug(`Error fetching paper details for ${paperId}:`, error.message);
96
+ return null;
97
+ }
98
+ }
99
+ /**
100
+ * 下载PDF文件
101
+ */
102
+ async downloadPdf(paperId, options = {}) {
103
+ try {
104
+ const pdfUrl = `${this.baseUrl}/${paperId}.pdf`;
105
+ const savePath = options.savePath || './downloads';
106
+ // 确保保存目录存在
107
+ if (!fs.existsSync(savePath)) {
108
+ fs.mkdirSync(savePath, { recursive: true });
109
+ }
110
+ const filename = `iacr_${paperId.replace(/\//g, '_')}.pdf`;
111
+ const filePath = path.join(savePath, filename);
112
+ // 检查文件是否已存在
113
+ if (fs.existsSync(filePath) && !options.overwrite) {
114
+ return filePath;
115
+ }
116
+ await this.rateLimiter.waitForPermission();
117
+ const response = await ErrorHandler.retryWithBackoff(() => axios.get(pdfUrl, {
118
+ responseType: 'stream',
119
+ timeout: TIMEOUTS.EXTENDED,
120
+ headers: { 'User-Agent': this.getRandomUserAgent() }
121
+ }), { context: 'IACR download' });
122
+ const writer = fs.createWriteStream(filePath);
123
+ response.data.pipe(writer);
124
+ return new Promise((resolve, reject) => {
125
+ writer.on('finish', () => resolve(filePath));
126
+ writer.on('error', reject);
127
+ });
128
+ }
129
+ catch (error) {
130
+ this.handleHttpError(error, 'download PDF');
131
+ }
132
+ }
133
+ /**
134
+ * 读取论文全文内容
135
+ */
136
+ async readPaper(paperId, options = {}) {
137
+ try {
138
+ const savePath = options.savePath || './downloads';
139
+ const filename = `iacr_${paperId.replace(/\//g, '_')}.pdf`;
140
+ const filePath = path.join(savePath, filename);
141
+ // 如果PDF不存在,先下载
142
+ if (!fs.existsSync(filePath)) {
143
+ await this.downloadPdf(paperId, options);
144
+ }
145
+ return `PDF file downloaded at: ${filePath}. Full text extraction requires additional PDF parsing implementation.`;
146
+ }
147
+ catch (error) {
148
+ this.handleHttpError(error, 'read paper');
149
+ }
150
+ }
151
+ /**
152
+ * 解析搜索响应
153
+ */
154
+ async parseSearchResponse(html, options) {
155
+ const $ = cheerio.load(html);
156
+ const papers = [];
157
+ // 查找所有搜索结果条目
158
+ $('.mb-4').each((index, element) => {
159
+ try {
160
+ const $element = $(element);
161
+ // 提取论文ID和链接
162
+ const paperLink = $element.find('.d-flex .paperlink').first();
163
+ if (!paperLink.length)
164
+ return;
165
+ const paperId = paperLink.text().trim();
166
+ const paperUrl = this.baseUrl + paperLink.attr('href');
167
+ // 提取PDF链接
168
+ const pdfLink = $element.find('a[href$=".pdf"]').first();
169
+ const pdfUrl = pdfLink.length ? this.baseUrl + pdfLink.attr('href') : '';
170
+ // 提取更新日期
171
+ const lastUpdatedElem = $element.find('small.ms-auto');
172
+ let updatedDate = null;
173
+ if (lastUpdatedElem.length) {
174
+ const dateText = lastUpdatedElem.text().replace('Last updated:', '').trim();
175
+ updatedDate = this.parseDate(dateText);
176
+ }
177
+ // 从内容区域提取信息
178
+ const contentDiv = $element.find('.ms-md-4');
179
+ if (!contentDiv.length)
180
+ return;
181
+ // 提取标题
182
+ const titleElem = contentDiv.find('strong').first();
183
+ const title = titleElem.text().trim();
184
+ // 提取作者
185
+ const authorsElem = contentDiv.find('span.fst-italic').first();
186
+ const authors = authorsElem.length ?
187
+ authorsElem.text().split(',').map(author => author.trim()) : [];
188
+ // 提取分类
189
+ const categoryElem = contentDiv.find('small.badge').first();
190
+ const categories = categoryElem.length ? [categoryElem.text().trim()] : [];
191
+ // 提取摘要
192
+ const abstractElem = contentDiv.find('p.search-abstract').first();
193
+ const abstract = abstractElem.text().trim();
194
+ const paper = PaperFactory.create({
195
+ paperId: paperId,
196
+ title: this.cleanText(title),
197
+ authors: authors,
198
+ abstract: this.cleanText(abstract),
199
+ doi: '',
200
+ publishedDate: updatedDate || new Date(),
201
+ pdfUrl: pdfUrl,
202
+ url: paperUrl,
203
+ source: 'iacr',
204
+ updatedDate: updatedDate || undefined,
205
+ categories: categories,
206
+ keywords: [],
207
+ citationCount: 0,
208
+ year: updatedDate?.getFullYear(),
209
+ extra: {
210
+ iacrId: paperId
211
+ }
212
+ });
213
+ papers.push(paper);
214
+ }
215
+ catch (error) {
216
+ logDebug('Error parsing IACR search result:', error);
217
+ }
218
+ });
219
+ // 如果需要详细信息,获取每篇论文的详细信息
220
+ if (options.fetchDetails && papers.length > 0) {
221
+ logDebug('Fetching detailed information for IACR papers...');
222
+ const detailedPapers = [];
223
+ const concurrency = 3;
224
+ for (let i = 0; i < papers.length; i += concurrency) {
225
+ const chunk = papers.slice(i, i + concurrency);
226
+ const detailPromises = chunk.map(async (paper) => {
227
+ try {
228
+ const detailedPaper = await this.getPaperDetails(paper.paperId);
229
+ return detailedPaper || paper; // 退回到搜索结果数据
230
+ }
231
+ catch (error) {
232
+ logDebug(`Error fetching details for ${paper.paperId}:`, error);
233
+ return paper;
234
+ }
235
+ });
236
+ detailedPapers.push(...await Promise.all(detailPromises));
237
+ }
238
+ return detailedPapers;
239
+ }
240
+ return papers;
241
+ }
242
+ /**
243
+ * 解析IACR论文详细页面
244
+ */
245
+ parseIACRPaperDetails(html, paperId) {
246
+ try {
247
+ const $ = cheerio.load(html);
248
+ // 提取标题
249
+ const title = $('h3.mb-3').text().trim();
250
+ // 提取作者
251
+ const authorText = $('p.fst-italic').text().trim();
252
+ const authors = authorText ?
253
+ authorText.replace(/ and /g, ',').split(',').map(author => author.trim()) : [];
254
+ // 提取摘要
255
+ const abstract = $('p[style*="white-space: pre-wrap"]').text().trim();
256
+ // 提取关键词
257
+ const keywords = [];
258
+ $('a.badge.bg-secondary.keyword').each((index, element) => {
259
+ keywords.push($(element).text().trim());
260
+ });
261
+ // 提取发表信息和历史记录
262
+ const pageText = $.text();
263
+ const lines = pageText.split('\n').map(line => line.trim()).filter(line => line);
264
+ let publicationInfo = '';
265
+ let historyEntries = [];
266
+ let lastUpdated = null;
267
+ // 查找发表信息
268
+ for (let i = 0; i < lines.length; i++) {
269
+ if (lines[i].includes('Publication info') && i + 1 < lines.length) {
270
+ publicationInfo = lines[i + 1];
271
+ break;
272
+ }
273
+ }
274
+ // 查找历史记录
275
+ let historyFound = false;
276
+ for (let i = 0; i < lines.length; i++) {
277
+ const line = lines[i];
278
+ if (line === 'History' && !line.includes(':')) {
279
+ historyFound = true;
280
+ continue;
281
+ }
282
+ else if (historyFound && line.includes(':') && !line.startsWith('Short URL')) {
283
+ historyEntries.push(line);
284
+ // 尝试从第一个历史记录中提取最后更新日期
285
+ if (!lastUpdated) {
286
+ const dateStr = line.split(':')[0].trim();
287
+ lastUpdated = this.parseDate(dateStr);
288
+ }
289
+ }
290
+ else if (historyFound && (line.startsWith('Short URL') || line.startsWith('License'))) {
291
+ break;
292
+ }
293
+ }
294
+ // 构建PDF URL
295
+ const pdfUrl = `${this.baseUrl}/${paperId}.pdf`;
296
+ const paperUrl = `${this.baseUrl}/${paperId}`;
297
+ // 使用最后更新日期或当前日期作为发表日期
298
+ const publishedDate = lastUpdated || new Date();
299
+ return PaperFactory.create({
300
+ paperId: paperId,
301
+ title: this.cleanText(title),
302
+ authors: authors,
303
+ abstract: this.cleanText(abstract),
304
+ doi: '',
305
+ publishedDate: publishedDate,
306
+ pdfUrl: pdfUrl,
307
+ url: paperUrl,
308
+ source: 'iacr',
309
+ updatedDate: lastUpdated || undefined,
310
+ categories: [],
311
+ keywords: keywords,
312
+ citationCount: 0,
313
+ year: publishedDate.getFullYear(),
314
+ extra: {
315
+ iacrId: paperId,
316
+ publicationInfo: publicationInfo,
317
+ history: historyEntries.join('; ')
318
+ }
319
+ });
320
+ }
321
+ catch (error) {
322
+ logDebug('Error parsing IACR paper details:', error);
323
+ return null;
324
+ }
325
+ }
326
+ /**
327
+ * 获取随机User-Agent
328
+ */
329
+ getRandomUserAgent() {
330
+ return this.userAgents[Math.floor(Math.random() * this.userAgents.length)];
331
+ }
332
+ /**
333
+ * 延迟函数
334
+ */
335
+ delay(ms) {
336
+ return new Promise(resolve => setTimeout(resolve, ms));
337
+ }
338
+ }
339
+ //# sourceMappingURL=IACRSearcher.js.map