smart-image-scraper-mcp 2.11.0 → 2.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smart-image-scraper-mcp",
3
- "version": "2.11.0",
3
+ "version": "2.11.2",
4
4
  "description": "全网智能图片抓取 MCP 服务器 - 支持 Bing/Google 图片搜索、验证和下载",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -44,11 +44,11 @@ export class LinkValidator {
44
44
  }
45
45
 
46
46
  const controller = new AbortController();
47
- const timeoutId = setTimeout(() => controller.abort(), 3000); // 超时 3
47
+ const timeoutId = setTimeout(() => controller.abort(), 2000); // 超时 2
48
48
 
49
49
  try {
50
50
  const response = await httpClient.head(url, {
51
- timeout: 2500, // 超时 2.5
51
+ timeout: 1800, // 超时 1.8
52
52
  maxRedirects: 1, // 最多 1 次重定向
53
53
  signal: controller.signal,
54
54
  });
@@ -64,21 +64,12 @@ export class LinkValidator {
64
64
  return { url, valid: true, quality };
65
65
  }
66
66
 
67
- // 某些服务器不支持 HEAD,返回 405/403 时尝试 GET 降级
68
- if (response.status === 405 || response.status === 403) {
69
- return await this._validateWithGet(url, fetchQuality);
70
- }
71
-
72
67
  return { url, valid: false, error: `status=${response.status}` };
73
68
  } catch (error) {
74
69
  clearTimeout(timeoutId);
75
70
  if (!controller.signal.aborted) {
76
71
  controller.abort();
77
72
  }
78
- // 网络错误时也尝试 GET 降级(某些 CDN 完全拒绝 HEAD)
79
- if (error.response && (error.response.status === 405 || error.response.status === 403)) {
80
- return await this._validateWithGet(url, fetchQuality);
81
- }
82
73
  return { url, valid: false, error: 'timeout' };
83
74
  }
84
75
  }
@@ -236,12 +227,18 @@ export class LinkValidator {
236
227
  * @returns {Promise<{valid: Array, invalid: Array}>}
237
228
  */
238
229
  async validateMany(urls, options = {}) {
239
- const { fetchQuality = false, sortByQuality = false } = options;
230
+ const { fetchQuality = false, sortByQuality = false, signal = null } = options;
240
231
 
241
232
  logger.info(`Validating ${urls.length} URLs with concurrency ${config.MAX_VALIDATE_CONCURRENCY}${fetchQuality ? ' (with quality check)' : ''}`);
242
233
 
243
234
  const results = await Promise.all(
244
- urls.map(url => this.limit(() => this.validateOne(url, fetchQuality)))
235
+ urls.map(url => this.limit(() => {
236
+ // 如果已中止,跳过验证直接返回失败
237
+ if (signal?.aborted) {
238
+ return { url, valid: false, error: 'aborted' };
239
+ }
240
+ return this.validateOne(url, fetchQuality);
241
+ }))
245
242
  );
246
243
 
247
244
  let valid = [];
@@ -117,6 +117,7 @@ export class Orchestrator {
117
117
  const fastMode = qualityMode === 'fast';
118
118
  const prioritizeQuality = qualityMode === 'high';
119
119
  const minFileSize = this._parseMinFileSize(options.minFileSize);
120
+ const signal = this.abortController?.signal;
120
121
 
121
122
  try {
122
123
  const scraper = getScraper(source);
@@ -132,6 +133,8 @@ export class Orchestrator {
132
133
  rawUrls = cachedUrls;
133
134
  metrics.recordCacheHit();
134
135
  } else {
136
+ // 检查是否已中止
137
+ if (signal?.aborted) throw new Error('操作已取消');
135
138
  logger.info(`[SEARCH] "${keyword}" (target: ${searchCount})...`);
136
139
  rawUrls = await scraper.search(keyword, searchCount, options);
137
140
  if (rawUrls.length > 0) {
@@ -153,17 +156,23 @@ export class Orchestrator {
153
156
  let resultUrls;
154
157
  let qualityModeLabel;
155
158
 
159
+ // 检查是否已中止
160
+ if (signal?.aborted) throw new Error('操作已取消');
161
+
156
162
  if (fastMode) {
157
163
  // fast 模式:直接使用搜索结果,不验证
158
164
  resultUrls = rawUrls.slice(0, count);
159
165
  qualityModeLabel = '快速模式(跳过验证)';
160
166
  logger.info(`[FAST] "${keyword}" - ${resultUrls.length} URLs`);
161
167
  } else {
162
- // balanced 或 high 模式:验证链接
163
- const { valid } = await this.linkValidator.validateMany(rawUrls, {
168
+ // balanced 或 high 模式:验证链接(限制验证数量避免超时)
169
+ const maxValidate = Math.min(rawUrls.length, count * 2 + 5);
170
+ const urlsToValidate = rawUrls.slice(0, maxValidate);
171
+ const { valid } = await this.linkValidator.validateMany(urlsToValidate, {
164
172
  fetchQuality: prioritizeQuality,
165
173
  sortByQuality: prioritizeQuality,
166
174
  minFileSize: minFileSize,
175
+ signal,
167
176
  });
168
177
 
169
178
  // 过滤最小文件大小
@@ -246,6 +255,7 @@ export class Orchestrator {
246
255
  const qualityMode = options.quality || 'balanced';
247
256
  const prioritizeQuality = qualityMode !== 'fast';
248
257
  const minFileSize = this._parseMinFileSize(options.minFileSize);
258
+ const signal = this.abortController?.signal;
249
259
 
250
260
  try {
251
261
  const scraper = getScraper(source);
@@ -261,6 +271,7 @@ export class Orchestrator {
261
271
  rawUrls = cachedUrls;
262
272
  metrics.recordCacheHit();
263
273
  } else {
274
+ if (signal?.aborted) throw new Error('操作已取消');
264
275
  logger.info(`[SEARCH] "${keyword}" (target: ${searchCount})...`);
265
276
  rawUrls = await scraper.search(keyword, searchCount, options);
266
277
  if (rawUrls.length > 0) {
@@ -278,8 +289,12 @@ export class Orchestrator {
278
289
  };
279
290
  }
280
291
 
281
- // 根据 quality 模式处理
282
- let urlsToDownload = rawUrls.slice(0, searchCount);
292
+ // 检查是否已中止
293
+ if (signal?.aborted) throw new Error('操作已取消');
294
+
295
+ // 根据 quality 模式处理(限制验证数量避免超时)
296
+ const maxValidate = Math.min(rawUrls.length, count * 2 + 5);
297
+ let urlsToDownload = rawUrls.slice(0, maxValidate);
283
298
  if (prioritizeQuality) {
284
299
  const sortByQuality = qualityMode === 'high';
285
300
  logger.info(`Validating ${urlsToDownload.length} URLs (quality=${qualityMode})...`);
@@ -287,6 +302,7 @@ export class Orchestrator {
287
302
  fetchQuality: sortByQuality,
288
303
  sortByQuality: sortByQuality,
289
304
  minFileSize: minFileSize,
305
+ signal,
290
306
  });
291
307
 
292
308
  // 过滤最小文件大小
@@ -364,10 +380,10 @@ export class Orchestrator {
364
380
  metrics.recordRequest();
365
381
  const requestId = `req_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
366
382
 
367
- // 根据关键词数量动态计算超时时间(每个关键词 8 秒,最少 20 秒,最多 50 秒)
383
+ // 根据关键词数量动态计算超时时间(每个关键词 15 秒,最少 25 秒,最多 50 秒)
368
384
  const keywords = this.parseKeywords(params.query);
369
385
  const keywordCount = keywords.length;
370
- const GLOBAL_TIMEOUT = Math.min(Math.max(keywordCount * 8000, 20000), 50000);
386
+ const GLOBAL_TIMEOUT = Math.min(Math.max(keywordCount * 15000, 25000), 50000);
371
387
 
372
388
  logger.info(`[Orchestrator] Starting request: ${requestId}, keywords: ${keywordCount}, timeout: ${GLOBAL_TIMEOUT/1000}s`);
373
389