smart-image-scraper-mcp 2.11.0 → 2.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -44,11 +44,11 @@ export class LinkValidator {
|
|
|
44
44
|
}
|
|
45
45
|
|
|
46
46
|
const controller = new AbortController();
|
|
47
|
-
const timeoutId = setTimeout(() => controller.abort(),
|
|
47
|
+
const timeoutId = setTimeout(() => controller.abort(), 2000); // 超时 2 秒
|
|
48
48
|
|
|
49
49
|
try {
|
|
50
50
|
const response = await httpClient.head(url, {
|
|
51
|
-
timeout:
|
|
51
|
+
timeout: 1800, // 超时 1.8 秒
|
|
52
52
|
maxRedirects: 1, // 最多 1 次重定向
|
|
53
53
|
signal: controller.signal,
|
|
54
54
|
});
|
|
@@ -64,21 +64,12 @@ export class LinkValidator {
|
|
|
64
64
|
return { url, valid: true, quality };
|
|
65
65
|
}
|
|
66
66
|
|
|
67
|
-
// 某些服务器不支持 HEAD,返回 405/403 时尝试 GET 降级
|
|
68
|
-
if (response.status === 405 || response.status === 403) {
|
|
69
|
-
return await this._validateWithGet(url, fetchQuality);
|
|
70
|
-
}
|
|
71
|
-
|
|
72
67
|
return { url, valid: false, error: `status=${response.status}` };
|
|
73
68
|
} catch (error) {
|
|
74
69
|
clearTimeout(timeoutId);
|
|
75
70
|
if (!controller.signal.aborted) {
|
|
76
71
|
controller.abort();
|
|
77
72
|
}
|
|
78
|
-
// 网络错误时也尝试 GET 降级(某些 CDN 完全拒绝 HEAD)
|
|
79
|
-
if (error.response && (error.response.status === 405 || error.response.status === 403)) {
|
|
80
|
-
return await this._validateWithGet(url, fetchQuality);
|
|
81
|
-
}
|
|
82
73
|
return { url, valid: false, error: 'timeout' };
|
|
83
74
|
}
|
|
84
75
|
}
|
|
@@ -236,12 +227,18 @@ export class LinkValidator {
|
|
|
236
227
|
* @returns {Promise<{valid: Array, invalid: Array}>}
|
|
237
228
|
*/
|
|
238
229
|
async validateMany(urls, options = {}) {
|
|
239
|
-
const { fetchQuality = false, sortByQuality = false } = options;
|
|
230
|
+
const { fetchQuality = false, sortByQuality = false, signal = null } = options;
|
|
240
231
|
|
|
241
232
|
logger.info(`Validating ${urls.length} URLs with concurrency ${config.MAX_VALIDATE_CONCURRENCY}${fetchQuality ? ' (with quality check)' : ''}`);
|
|
242
233
|
|
|
243
234
|
const results = await Promise.all(
|
|
244
|
-
urls.map(url => this.limit(() =>
|
|
235
|
+
urls.map(url => this.limit(() => {
|
|
236
|
+
// 如果已中止,跳过验证直接返回失败
|
|
237
|
+
if (signal?.aborted) {
|
|
238
|
+
return { url, valid: false, error: 'aborted' };
|
|
239
|
+
}
|
|
240
|
+
return this.validateOne(url, fetchQuality);
|
|
241
|
+
}))
|
|
245
242
|
);
|
|
246
243
|
|
|
247
244
|
let valid = [];
|
|
@@ -117,6 +117,7 @@ export class Orchestrator {
|
|
|
117
117
|
const fastMode = qualityMode === 'fast';
|
|
118
118
|
const prioritizeQuality = qualityMode === 'high';
|
|
119
119
|
const minFileSize = this._parseMinFileSize(options.minFileSize);
|
|
120
|
+
const signal = this.abortController?.signal;
|
|
120
121
|
|
|
121
122
|
try {
|
|
122
123
|
const scraper = getScraper(source);
|
|
@@ -132,6 +133,8 @@ export class Orchestrator {
|
|
|
132
133
|
rawUrls = cachedUrls;
|
|
133
134
|
metrics.recordCacheHit();
|
|
134
135
|
} else {
|
|
136
|
+
// 检查是否已中止
|
|
137
|
+
if (signal?.aborted) throw new Error('操作已取消');
|
|
135
138
|
logger.info(`[SEARCH] "${keyword}" (target: ${searchCount})...`);
|
|
136
139
|
rawUrls = await scraper.search(keyword, searchCount, options);
|
|
137
140
|
if (rawUrls.length > 0) {
|
|
@@ -153,17 +156,23 @@ export class Orchestrator {
|
|
|
153
156
|
let resultUrls;
|
|
154
157
|
let qualityModeLabel;
|
|
155
158
|
|
|
159
|
+
// 检查是否已中止
|
|
160
|
+
if (signal?.aborted) throw new Error('操作已取消');
|
|
161
|
+
|
|
156
162
|
if (fastMode) {
|
|
157
163
|
// fast 模式:直接使用搜索结果,不验证
|
|
158
164
|
resultUrls = rawUrls.slice(0, count);
|
|
159
165
|
qualityModeLabel = '快速模式(跳过验证)';
|
|
160
166
|
logger.info(`[FAST] "${keyword}" - ${resultUrls.length} URLs`);
|
|
161
167
|
} else {
|
|
162
|
-
// balanced 或 high
|
|
163
|
-
const
|
|
168
|
+
// balanced 或 high 模式:验证链接(限制验证数量避免超时)
|
|
169
|
+
const maxValidate = Math.min(rawUrls.length, count * 2 + 5);
|
|
170
|
+
const urlsToValidate = rawUrls.slice(0, maxValidate);
|
|
171
|
+
const { valid } = await this.linkValidator.validateMany(urlsToValidate, {
|
|
164
172
|
fetchQuality: prioritizeQuality,
|
|
165
173
|
sortByQuality: prioritizeQuality,
|
|
166
174
|
minFileSize: minFileSize,
|
|
175
|
+
signal,
|
|
167
176
|
});
|
|
168
177
|
|
|
169
178
|
// 过滤最小文件大小
|
|
@@ -246,6 +255,7 @@ export class Orchestrator {
|
|
|
246
255
|
const qualityMode = options.quality || 'balanced';
|
|
247
256
|
const prioritizeQuality = qualityMode !== 'fast';
|
|
248
257
|
const minFileSize = this._parseMinFileSize(options.minFileSize);
|
|
258
|
+
const signal = this.abortController?.signal;
|
|
249
259
|
|
|
250
260
|
try {
|
|
251
261
|
const scraper = getScraper(source);
|
|
@@ -261,6 +271,7 @@ export class Orchestrator {
|
|
|
261
271
|
rawUrls = cachedUrls;
|
|
262
272
|
metrics.recordCacheHit();
|
|
263
273
|
} else {
|
|
274
|
+
if (signal?.aborted) throw new Error('操作已取消');
|
|
264
275
|
logger.info(`[SEARCH] "${keyword}" (target: ${searchCount})...`);
|
|
265
276
|
rawUrls = await scraper.search(keyword, searchCount, options);
|
|
266
277
|
if (rawUrls.length > 0) {
|
|
@@ -278,8 +289,12 @@ export class Orchestrator {
|
|
|
278
289
|
};
|
|
279
290
|
}
|
|
280
291
|
|
|
281
|
-
//
|
|
282
|
-
|
|
292
|
+
// 检查是否已中止
|
|
293
|
+
if (signal?.aborted) throw new Error('操作已取消');
|
|
294
|
+
|
|
295
|
+
// 根据 quality 模式处理(限制验证数量避免超时)
|
|
296
|
+
const maxValidate = Math.min(rawUrls.length, count * 2 + 5);
|
|
297
|
+
let urlsToDownload = rawUrls.slice(0, maxValidate);
|
|
283
298
|
if (prioritizeQuality) {
|
|
284
299
|
const sortByQuality = qualityMode === 'high';
|
|
285
300
|
logger.info(`Validating ${urlsToDownload.length} URLs (quality=${qualityMode})...`);
|
|
@@ -287,6 +302,7 @@ export class Orchestrator {
|
|
|
287
302
|
fetchQuality: sortByQuality,
|
|
288
303
|
sortByQuality: sortByQuality,
|
|
289
304
|
minFileSize: minFileSize,
|
|
305
|
+
signal,
|
|
290
306
|
});
|
|
291
307
|
|
|
292
308
|
// 过滤最小文件大小
|
|
@@ -364,10 +380,10 @@ export class Orchestrator {
|
|
|
364
380
|
metrics.recordRequest();
|
|
365
381
|
const requestId = `req_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
|
366
382
|
|
|
367
|
-
// 根据关键词数量动态计算超时时间(每个关键词
|
|
383
|
+
// 根据关键词数量动态计算超时时间(每个关键词 15 秒,最少 25 秒,最多 50 秒)
|
|
368
384
|
const keywords = this.parseKeywords(params.query);
|
|
369
385
|
const keywordCount = keywords.length;
|
|
370
|
-
const GLOBAL_TIMEOUT = Math.min(Math.max(keywordCount *
|
|
386
|
+
const GLOBAL_TIMEOUT = Math.min(Math.max(keywordCount * 15000, 25000), 50000);
|
|
371
387
|
|
|
372
388
|
logger.info(`[Orchestrator] Starting request: ${requestId}, keywords: ${keywordCount}, timeout: ${GLOBAL_TIMEOUT/1000}s`);
|
|
373
389
|
|