smart-image-scraper-mcp 2.10.0 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/package.json +14 -1
- package/src/index.js +40 -22
- package/src/infrastructure/cache.js +3 -2
- package/src/infrastructure/gracefulShutdown.js +7 -7
- package/src/infrastructure/httpClient.js +6 -6
- package/src/infrastructure/logger.js +4 -6
- package/src/infrastructure/metrics.js +3 -2
- package/src/infrastructure/rateLimiter.js +1 -0
- package/src/infrastructure/requestQueue.js +2 -1
- package/src/providers/bingScraper.js +14 -9
- package/src/providers/googleScraper.js +22 -28
- package/src/services/fileManager.js +1 -1
- package/src/services/imageProcessor.js +6 -5
- package/src/services/linkValidator.js +11 -4
- package/src/services/orchestrator.js +54 -14
- package/src/index.backup.js +0 -340
- package/src/index.new.js +0 -213
- package/src/index.simple.js +0 -213
- package/src/services/orchestrator.simple.js +0 -259
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "smart-image-scraper-mcp",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.11.0",
|
|
4
4
|
"description": "全网智能图片抓取 MCP 服务器 - 支持 Bing/Google 图片搜索、验证和下载",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -27,6 +27,19 @@
|
|
|
27
27
|
"engines": {
|
|
28
28
|
"node": ">=18.0.0"
|
|
29
29
|
},
|
|
30
|
+
"files": [
|
|
31
|
+
"src/index.js",
|
|
32
|
+
"src/config/",
|
|
33
|
+
"src/infrastructure/",
|
|
34
|
+
"src/providers/",
|
|
35
|
+
"src/services/orchestrator.js",
|
|
36
|
+
"src/services/linkValidator.js",
|
|
37
|
+
"src/services/fileManager.js",
|
|
38
|
+
"src/services/imageProcessor.js",
|
|
39
|
+
"src/services/index.js",
|
|
40
|
+
"README.md",
|
|
41
|
+
"LICENSE"
|
|
42
|
+
],
|
|
30
43
|
"repository": {
|
|
31
44
|
"type": "git",
|
|
32
45
|
"url": ""
|
package/src/index.js
CHANGED
|
@@ -175,10 +175,14 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
175
175
|
};
|
|
176
176
|
}
|
|
177
177
|
|
|
178
|
+
// MCP 层最外层超时保护(55秒硬限制)
|
|
179
|
+
const MCP_TIMEOUT = 55000;
|
|
180
|
+
|
|
181
|
+
// 主流做法:每个请求创建新的 Orchestrator 实例,确保无状态
|
|
182
|
+
const orchestrator = new Orchestrator();
|
|
183
|
+
let mcpTimeoutId;
|
|
184
|
+
|
|
178
185
|
try {
|
|
179
|
-
// 主流做法:每个请求创建新的 Orchestrator 实例,确保无状态
|
|
180
|
-
const orchestrator = new Orchestrator();
|
|
181
|
-
|
|
182
186
|
// 规范化参数
|
|
183
187
|
const params = {
|
|
184
188
|
query: args.query.trim(),
|
|
@@ -194,30 +198,44 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
194
198
|
minFileSize: ['any', '50kb', '100kb', '200kb', '500kb', '1mb'].includes(args.minFileSize) ? args.minFileSize : 'any',
|
|
195
199
|
};
|
|
196
200
|
|
|
197
|
-
//
|
|
198
|
-
const result = await
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
201
|
+
// 使用 Promise.race 确保一定会在超时内返回
|
|
202
|
+
const result = await Promise.race([
|
|
203
|
+
(async () => {
|
|
204
|
+
const result = await orchestrator.execute(params);
|
|
205
|
+
const formattedResult = orchestrator.formatResult(result);
|
|
206
|
+
if (!result.success) {
|
|
207
|
+
return {
|
|
208
|
+
content: [{ type: 'text', text: formattedResult }],
|
|
209
|
+
isError: true,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
return {
|
|
213
|
+
content: [{ type: 'text', text: formattedResult }],
|
|
214
|
+
};
|
|
215
|
+
})(),
|
|
216
|
+
new Promise((_, reject) => {
|
|
217
|
+
mcpTimeoutId = setTimeout(() => {
|
|
218
|
+
// 超时时中止 orchestrator 的所有操作
|
|
219
|
+
if (orchestrator.abortController) {
|
|
220
|
+
orchestrator.abortController.abort();
|
|
221
|
+
}
|
|
222
|
+
reject(new Error('MCP_TIMEOUT: 请求超时(55秒),请减少关键词数量或稍后重试'));
|
|
223
|
+
}, MCP_TIMEOUT);
|
|
224
|
+
})
|
|
225
|
+
]);
|
|
226
|
+
clearTimeout(mcpTimeoutId);
|
|
227
|
+
return result;
|
|
214
228
|
} catch (error) {
|
|
215
|
-
|
|
229
|
+
clearTimeout(mcpTimeoutId);
|
|
230
|
+
// 确保中止所有操作
|
|
231
|
+
if (orchestrator.abortController && !orchestrator.abortController.signal.aborted) {
|
|
232
|
+
orchestrator.abortController.abort();
|
|
233
|
+
}
|
|
216
234
|
console.error(`[MCP Error] ${error.message}`);
|
|
217
235
|
return {
|
|
218
236
|
content: [{
|
|
219
237
|
type: 'text',
|
|
220
|
-
text: `## ❌ 执行错误\n\n**错误信息**: ${error.message}\n\n
|
|
238
|
+
text: `## ❌ 执行错误\n\n**错误信息**: ${error.message}\n\n请减少关键词数量或稍后重试。`
|
|
221
239
|
}],
|
|
222
240
|
isError: true,
|
|
223
241
|
};
|
|
@@ -253,10 +253,11 @@ export class ValidationCache extends LRUCache {
|
|
|
253
253
|
export const searchCache = new SearchCache();
|
|
254
254
|
export const validationCache = new ValidationCache();
|
|
255
255
|
|
|
256
|
-
//
|
|
257
|
-
setInterval(() => {
|
|
256
|
+
// 定期清理过期缓存(unref 避免阻止进程退出)
|
|
257
|
+
const cacheCleanupInterval = setInterval(() => {
|
|
258
258
|
searchCache.cleanup();
|
|
259
259
|
validationCache.cleanup();
|
|
260
260
|
}, 60000); // 每分钟清理一次
|
|
261
|
+
cacheCleanupInterval.unref();
|
|
261
262
|
|
|
262
263
|
export default { LRUCache, SearchCache, ValidationCache, searchCache, validationCache };
|
|
@@ -26,7 +26,8 @@ export class GracefulShutdown {
|
|
|
26
26
|
* 注册信号处理器
|
|
27
27
|
*/
|
|
28
28
|
_registerSignalHandlers() {
|
|
29
|
-
|
|
29
|
+
// 仅注册 SIGINT 和 SIGTERM(SIGQUIT 在 Windows 上不存在)
|
|
30
|
+
const signals = ['SIGINT', 'SIGTERM'];
|
|
30
31
|
|
|
31
32
|
signals.forEach(signal => {
|
|
32
33
|
process.on(signal, async () => {
|
|
@@ -35,16 +36,15 @@ export class GracefulShutdown {
|
|
|
35
36
|
});
|
|
36
37
|
});
|
|
37
38
|
|
|
38
|
-
// 处理未捕获的异常
|
|
39
|
-
process.on('uncaughtException',
|
|
39
|
+
// 处理未捕获的异常 - 仅记录日志,不退出进程(避免中断 MCP 通信)
|
|
40
|
+
process.on('uncaughtException', (error) => {
|
|
40
41
|
logger.error('Uncaught exception', { error: error.message, stack: error.stack });
|
|
41
|
-
|
|
42
|
+
// 不调用 process.exit,让 MCP 连接保持活跃
|
|
42
43
|
});
|
|
43
44
|
|
|
44
|
-
// 处理未处理的 Promise 拒绝
|
|
45
|
-
process.on('unhandledRejection',
|
|
45
|
+
// 处理未处理的 Promise 拒绝 - 仅记录日志
|
|
46
|
+
process.on('unhandledRejection', (reason, promise) => {
|
|
46
47
|
logger.error('Unhandled rejection', { reason: String(reason) });
|
|
47
|
-
// 不立即退出,只记录日志
|
|
48
48
|
});
|
|
49
49
|
}
|
|
50
50
|
|
|
@@ -14,19 +14,19 @@ import logger from './logger.js';
|
|
|
14
14
|
const httpAgent = new http.Agent({
|
|
15
15
|
keepAlive: true, // 启用 Keep-Alive
|
|
16
16
|
keepAliveMsecs: 1000, // Keep-Alive 探测间隔
|
|
17
|
-
maxSockets:
|
|
18
|
-
maxFreeSockets:
|
|
17
|
+
maxSockets: 20, // 降低最大并发连接数,避免资源耗尽
|
|
18
|
+
maxFreeSockets: 5, // 降低最大空闲连接数
|
|
19
19
|
scheduling: 'lifo', // 后进先出,优先使用最近的连接
|
|
20
|
-
timeout:
|
|
20
|
+
timeout: 10000, // 空闲连接10秒后关闭
|
|
21
21
|
});
|
|
22
22
|
|
|
23
23
|
const httpsAgent = new https.Agent({
|
|
24
24
|
keepAlive: true,
|
|
25
25
|
keepAliveMsecs: 1000,
|
|
26
|
-
maxSockets:
|
|
27
|
-
maxFreeSockets:
|
|
26
|
+
maxSockets: 20, // 降低最大并发连接数
|
|
27
|
+
maxFreeSockets: 5, // 降低最大空闲连接数
|
|
28
28
|
scheduling: 'lifo',
|
|
29
|
-
timeout:
|
|
29
|
+
timeout: 10000, // 空闲连接10秒后关闭
|
|
30
30
|
rejectUnauthorized: false, // 允许自签名证书
|
|
31
31
|
});
|
|
32
32
|
|
|
@@ -137,14 +137,12 @@ class Logger {
|
|
|
137
137
|
// 输出到 stderr
|
|
138
138
|
console.error(formatted);
|
|
139
139
|
|
|
140
|
-
//
|
|
140
|
+
// 输出到文件(异步写入,避免阻塞事件循环)
|
|
141
141
|
if (this.logFile) {
|
|
142
|
-
|
|
143
|
-
fs.appendFileSync(this.logFile, formatted + '\n');
|
|
144
|
-
this._rotateLogIfNeeded();
|
|
145
|
-
} catch (error) {
|
|
142
|
+
fs.appendFile(this.logFile, formatted + '\n', (err) => {
|
|
146
143
|
// 忽略文件写入错误
|
|
147
|
-
|
|
144
|
+
if (!err) this._rotateLogIfNeeded();
|
|
145
|
+
});
|
|
148
146
|
}
|
|
149
147
|
}
|
|
150
148
|
|
|
@@ -282,11 +282,12 @@ export class MetricsCollector {
|
|
|
282
282
|
// 全局指标收集器
|
|
283
283
|
export const metrics = new MetricsCollector();
|
|
284
284
|
|
|
285
|
-
// 定期输出指标日志(每5
|
|
286
|
-
setInterval(() => {
|
|
285
|
+
// 定期输出指标日志(每5分钟,unref 避免阻止进程退出)
|
|
286
|
+
const metricsInterval = setInterval(() => {
|
|
287
287
|
if (metrics.metrics.requests.total > 0) {
|
|
288
288
|
metrics.logSummary();
|
|
289
289
|
}
|
|
290
290
|
}, 5 * 60 * 1000);
|
|
291
|
+
metricsInterval.unref();
|
|
291
292
|
|
|
292
293
|
export default metrics;
|
|
@@ -23,7 +23,6 @@ export class BingScraper extends BaseScraper {
|
|
|
23
23
|
* @returns {Promise<string[]>} - 图片URL列表
|
|
24
24
|
*/
|
|
25
25
|
async search(keyword, limit = 10, options = {}) {
|
|
26
|
-
this.options = options;
|
|
27
26
|
const pageSize = 35;
|
|
28
27
|
|
|
29
28
|
// 计算需要获取的页数(最多3页,避免触发速率限制)
|
|
@@ -31,13 +30,19 @@ export class BingScraper extends BaseScraper {
|
|
|
31
30
|
logger.info(`[Bing] Searching "${keyword}" - ${pagesNeeded} page(s) for ${limit} images`);
|
|
32
31
|
|
|
33
32
|
try {
|
|
33
|
+
const seen = new Set(); // 去重
|
|
34
34
|
let allUrls = [];
|
|
35
35
|
|
|
36
36
|
// 顺序获取多页(避免并发触发限制)
|
|
37
37
|
for (let page = 0; page < pagesNeeded; page++) {
|
|
38
38
|
const offset = page * pageSize;
|
|
39
|
-
const urls = await this._fetchPage(keyword, offset);
|
|
40
|
-
|
|
39
|
+
const urls = await this._fetchPage(keyword, offset, options);
|
|
40
|
+
for (const url of urls) {
|
|
41
|
+
if (!seen.has(url)) {
|
|
42
|
+
seen.add(url);
|
|
43
|
+
allUrls.push(url);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
41
46
|
|
|
42
47
|
// 如果已经够了就停止
|
|
43
48
|
if (allUrls.length >= limit) {
|
|
@@ -62,8 +67,8 @@ export class BingScraper extends BaseScraper {
|
|
|
62
67
|
/**
|
|
63
68
|
* 获取单页结果
|
|
64
69
|
*/
|
|
65
|
-
async _fetchPage(keyword, offset) {
|
|
66
|
-
const searchUrl = this._buildSearchUrl(keyword, offset);
|
|
70
|
+
async _fetchPage(keyword, offset, options = {}) {
|
|
71
|
+
const searchUrl = this._buildSearchUrl(keyword, offset, options);
|
|
67
72
|
|
|
68
73
|
try {
|
|
69
74
|
const response = await withRetry(
|
|
@@ -89,7 +94,7 @@ export class BingScraper extends BaseScraper {
|
|
|
89
94
|
/**
|
|
90
95
|
* 构建搜索 URL
|
|
91
96
|
*/
|
|
92
|
-
_buildSearchUrl(keyword, offset = 0) {
|
|
97
|
+
_buildSearchUrl(keyword, offset = 0, options = {}) {
|
|
93
98
|
// 尺寸过滤映射
|
|
94
99
|
const sizeMap = {
|
|
95
100
|
'small': '+filterui:imagesize-small',
|
|
@@ -114,9 +119,9 @@ export class BingScraper extends BaseScraper {
|
|
|
114
119
|
'strict': 'strict',
|
|
115
120
|
};
|
|
116
121
|
|
|
117
|
-
const size =
|
|
118
|
-
const aspect =
|
|
119
|
-
const safeSearch =
|
|
122
|
+
const size = options.size || 'all';
|
|
123
|
+
const aspect = options.aspect || 'all';
|
|
124
|
+
const safeSearch = options.safeSearch || 'moderate';
|
|
120
125
|
|
|
121
126
|
let qft = '+filterui:photo-photo';
|
|
122
127
|
if (sizeMap[size]) {
|
|
@@ -23,7 +23,6 @@ export class GoogleScraper extends BaseScraper {
|
|
|
23
23
|
* @returns {Promise<string[]>} - 图片URL列表
|
|
24
24
|
*/
|
|
25
25
|
async search(keyword, limit = 10, options = {}) {
|
|
26
|
-
this.options = options;
|
|
27
26
|
const pageSize = 20; // Google 每页约20张
|
|
28
27
|
|
|
29
28
|
// 计算需要获取的页数(最多3页,避免触发速率限制)
|
|
@@ -31,13 +30,19 @@ export class GoogleScraper extends BaseScraper {
|
|
|
31
30
|
logger.info(`[Google] Searching "${keyword}" - ${pagesNeeded} page(s) for ${limit} images`);
|
|
32
31
|
|
|
33
32
|
try {
|
|
33
|
+
const seen = new Set(); // 去重
|
|
34
34
|
let allUrls = [];
|
|
35
35
|
|
|
36
36
|
// 顺序获取多页
|
|
37
37
|
for (let page = 0; page < pagesNeeded; page++) {
|
|
38
38
|
const start = page * pageSize;
|
|
39
|
-
const urls = await this._fetchPage(keyword, start);
|
|
40
|
-
|
|
39
|
+
const urls = await this._fetchPage(keyword, start, options);
|
|
40
|
+
for (const url of urls) {
|
|
41
|
+
if (!seen.has(url)) {
|
|
42
|
+
seen.add(url);
|
|
43
|
+
allUrls.push(url);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
41
46
|
|
|
42
47
|
if (allUrls.length >= limit) {
|
|
43
48
|
break;
|
|
@@ -57,16 +62,12 @@ export class GoogleScraper extends BaseScraper {
|
|
|
57
62
|
return [];
|
|
58
63
|
}
|
|
59
64
|
}
|
|
60
|
-
|
|
61
|
-
_delay(ms) {
|
|
62
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
63
|
-
}
|
|
64
65
|
|
|
65
66
|
/**
|
|
66
67
|
* 获取单页结果
|
|
67
68
|
*/
|
|
68
|
-
async _fetchPage(keyword, start) {
|
|
69
|
-
const searchUrl = this._buildSearchUrl(keyword, start);
|
|
69
|
+
async _fetchPage(keyword, start, options = {}) {
|
|
70
|
+
const searchUrl = this._buildSearchUrl(keyword, start, options);
|
|
70
71
|
|
|
71
72
|
try {
|
|
72
73
|
const response = await withRetry(
|
|
@@ -98,7 +99,7 @@ export class GoogleScraper extends BaseScraper {
|
|
|
98
99
|
/**
|
|
99
100
|
* 构建搜索 URL
|
|
100
101
|
*/
|
|
101
|
-
_buildSearchUrl(keyword, start = 0) {
|
|
102
|
+
_buildSearchUrl(keyword, start = 0, options = {}) {
|
|
102
103
|
// 尺寸过滤映射 (Google 使用 tbs 参数)
|
|
103
104
|
const sizeMap = {
|
|
104
105
|
'small': 'isz:s',
|
|
@@ -123,9 +124,9 @@ export class GoogleScraper extends BaseScraper {
|
|
|
123
124
|
'strict': 'active',
|
|
124
125
|
};
|
|
125
126
|
|
|
126
|
-
const size =
|
|
127
|
-
const aspect =
|
|
128
|
-
const safeSearch =
|
|
127
|
+
const size = options.size || 'all';
|
|
128
|
+
const aspect = options.aspect || 'all';
|
|
129
|
+
const safeSearch = options.safeSearch || 'moderate';
|
|
129
130
|
|
|
130
131
|
const params = new URLSearchParams({
|
|
131
132
|
q: keyword,
|
|
@@ -158,14 +159,16 @@ export class GoogleScraper extends BaseScraper {
|
|
|
158
159
|
|
|
159
160
|
try {
|
|
160
161
|
// 方法1: 使用正则提取图片URL
|
|
161
|
-
//
|
|
162
|
-
const
|
|
162
|
+
// 每次创建新的 RegExp 实例避免全局标志 lastIndex 状态污染
|
|
163
|
+
const patternDefs = [
|
|
163
164
|
/\["(https?:\/\/[^"]+\.(?:jpg|jpeg|png|gif|webp)[^"]*)"/gi,
|
|
164
165
|
/"ou":"(https?:\/\/[^"]+)"/gi,
|
|
165
166
|
/\["(https?:\/\/[^"]+)",\d+,\d+\]/gi,
|
|
166
167
|
];
|
|
167
168
|
|
|
168
|
-
for (const pattern of
|
|
169
|
+
for (const pattern of patternDefs) {
|
|
170
|
+
// 重置 lastIndex 确保每次从头开始匹配
|
|
171
|
+
pattern.lastIndex = 0;
|
|
169
172
|
let match;
|
|
170
173
|
while ((match = pattern.exec(html)) !== null) {
|
|
171
174
|
const url = this._decodeUrl(match[1]);
|
|
@@ -240,14 +243,11 @@ export class GoogleScraper extends BaseScraper {
|
|
|
240
243
|
// 排除 Google 自身的缩略图和无效链接
|
|
241
244
|
const invalidPatterns = [
|
|
242
245
|
'gstatic.com',
|
|
243
|
-
'google.com',
|
|
244
|
-
'
|
|
245
|
-
'
|
|
246
|
+
'google.com/images',
|
|
247
|
+
'google.com/logos',
|
|
248
|
+
'googleapis.com/proxy',
|
|
246
249
|
'data:image',
|
|
247
250
|
'base64',
|
|
248
|
-
'favicon',
|
|
249
|
-
'logo',
|
|
250
|
-
'icon',
|
|
251
251
|
];
|
|
252
252
|
|
|
253
253
|
for (const pattern of invalidPatterns) {
|
|
@@ -263,12 +263,6 @@ export class GoogleScraper extends BaseScraper {
|
|
|
263
263
|
return hasImageExt || looksLikeImage || url.length > 50;
|
|
264
264
|
}
|
|
265
265
|
|
|
266
|
-
/**
|
|
267
|
-
* 延迟函数
|
|
268
|
-
*/
|
|
269
|
-
_delay(ms) {
|
|
270
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
271
|
-
}
|
|
272
266
|
}
|
|
273
267
|
|
|
274
268
|
export default GoogleScraper;
|
|
@@ -6,8 +6,12 @@
|
|
|
6
6
|
import sharp from 'sharp';
|
|
7
7
|
import fs from 'fs-extra';
|
|
8
8
|
import path from 'path';
|
|
9
|
+
import pLimit from 'p-limit';
|
|
9
10
|
import logger from '../infrastructure/logger.js';
|
|
10
11
|
|
|
12
|
+
// sharp 是 CPU 密集型操作,限制并发避免卡死
|
|
13
|
+
const imageProcessLimit = pLimit(2);
|
|
14
|
+
|
|
11
15
|
export class ImageProcessor {
|
|
12
16
|
/**
|
|
13
17
|
* 预设尺寸配置
|
|
@@ -80,9 +84,6 @@ export class ImageProcessor {
|
|
|
80
84
|
// 替换原文件
|
|
81
85
|
await fs.move(tempPath, finalOutputPath, { overwrite: true });
|
|
82
86
|
|
|
83
|
-
// 获取处理后的信息
|
|
84
|
-
const newMetadata = await sharp(finalOutputPath).metadata();
|
|
85
|
-
|
|
86
87
|
logger.debug(`Processed image: ${inputPath} -> ${width}x${height}`);
|
|
87
88
|
|
|
88
89
|
return {
|
|
@@ -90,7 +91,7 @@ export class ImageProcessor {
|
|
|
90
91
|
path: finalOutputPath,
|
|
91
92
|
metadata: {
|
|
92
93
|
original: { width: metadata.width, height: metadata.height },
|
|
93
|
-
processed: { width
|
|
94
|
+
processed: { width, height },
|
|
94
95
|
},
|
|
95
96
|
};
|
|
96
97
|
} catch (error) {
|
|
@@ -130,7 +131,7 @@ export class ImageProcessor {
|
|
|
130
131
|
*/
|
|
131
132
|
async processMany(files, options = {}) {
|
|
132
133
|
const results = await Promise.all(
|
|
133
|
-
files.map(file => this.processOne(file.path, options))
|
|
134
|
+
files.map(file => imageProcessLimit(() => this.processOne(file.path, options)))
|
|
134
135
|
);
|
|
135
136
|
|
|
136
137
|
const success = [];
|
|
@@ -8,9 +8,8 @@ import httpClient from '../infrastructure/httpClient.js';
|
|
|
8
8
|
import logger from '../infrastructure/logger.js';
|
|
9
9
|
import config from '../config/index.js';
|
|
10
10
|
|
|
11
|
-
//
|
|
12
|
-
const
|
|
13
|
-
const globalValidateLimit = pLimit(MAX_VALIDATE_CONCURRENCY);
|
|
11
|
+
// 使用配置中的并发数,避免硬编码与配置不一致
|
|
12
|
+
const globalValidateLimit = pLimit(config.MAX_VALIDATE_CONCURRENCY);
|
|
14
13
|
|
|
15
14
|
export class LinkValidator {
|
|
16
15
|
constructor() {
|
|
@@ -65,13 +64,21 @@ export class LinkValidator {
|
|
|
65
64
|
return { url, valid: true, quality };
|
|
66
65
|
}
|
|
67
66
|
|
|
67
|
+
// 某些服务器不支持 HEAD,返回 405/403 时尝试 GET 降级
|
|
68
|
+
if (response.status === 405 || response.status === 403) {
|
|
69
|
+
return await this._validateWithGet(url, fetchQuality);
|
|
70
|
+
}
|
|
71
|
+
|
|
68
72
|
return { url, valid: false, error: `status=${response.status}` };
|
|
69
73
|
} catch (error) {
|
|
70
74
|
clearTimeout(timeoutId);
|
|
71
|
-
// 确保 abort controller 被清理
|
|
72
75
|
if (!controller.signal.aborted) {
|
|
73
76
|
controller.abort();
|
|
74
77
|
}
|
|
78
|
+
// 网络错误时也尝试 GET 降级(某些 CDN 完全拒绝 HEAD)
|
|
79
|
+
if (error.response && (error.response.status === 405 || error.response.status === 403)) {
|
|
80
|
+
return await this._validateWithGet(url, fetchQuality);
|
|
81
|
+
}
|
|
75
82
|
return { url, valid: false, error: 'timeout' };
|
|
76
83
|
}
|
|
77
84
|
}
|