smart-image-scraper-mcp 2.13.3 → 2.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smart-image-scraper-mcp",
3
- "version": "2.13.3",
3
+ "version": "2.14.0",
4
4
  "description": "全网智能图片抓取 MCP 服务器 - 支持 Bing/Google 搜索、三种模式(link/download/both)、自定义保存路径、防盗链处理、质量控制、尺寸统一",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -74,9 +74,8 @@ export class BingScraper extends BaseScraper {
74
74
  const response = await withRetry(
75
75
  () => httpClient.get(searchUrl, {
76
76
  timeout: 8000, // 8秒超时,快速失败
77
- signal: AbortSignal.timeout(8000),
78
77
  }),
79
- { maxRetries: 1, retryCondition: isRetryableError } // 只重试1次
78
+ { maxRetries: 1, retryCondition: (e) => e.code === 'ECONNRESET' || (e.response?.status >= 500) } // 只重试真正的网络重置或服务器错误,超时不重试
80
79
  );
81
80
 
82
81
  if (response.status !== 200) {
@@ -73,7 +73,6 @@ export class GoogleScraper extends BaseScraper {
73
73
  const response = await withRetry(
74
74
  () => httpClient.get(searchUrl, {
75
75
  timeout: 8000, // 8秒超时,快速失败
76
- signal: AbortSignal.timeout(8000),
77
76
  headers: {
78
77
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
79
78
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
@@ -81,7 +80,7 @@ export class GoogleScraper extends BaseScraper {
81
80
  'Referer': 'https://www.google.com/',
82
81
  },
83
82
  }),
84
- { maxRetries: 1, retryCondition: isRetryableError } // 只重试1次
83
+ { maxRetries: 1, retryCondition: (e) => e.code === 'ECONNRESET' || (e.response?.status >= 500) } // 只重试真正的网络重置或服务器错误,超时不重试
85
84
  );
86
85
 
87
86
  if (response.status !== 200) {
@@ -11,13 +11,10 @@ import httpClient from '../infrastructure/httpClient.js';
11
11
  import logger from '../infrastructure/logger.js';
12
12
  import config from '../config/index.js';
13
13
 
14
- // 全局共享的下载并发限制器(避免每个实例独立限制)
15
- const globalDownloadLimit = pLimit(config.MAX_DOWNLOAD_CONCURRENCY || 10);
16
-
17
14
  export class FileManager {
18
15
  constructor(options = {}) {
19
16
  this.saveRoot = options.saveRoot || config.SAVE_ROOT;
20
- this.limit = globalDownloadLimit; // 使用全局共享限制器
17
+ this.limit = pLimit(config.MAX_DOWNLOAD_CONCURRENCY || 3);
21
18
  }
22
19
 
23
20
  /**
@@ -183,7 +180,6 @@ export class FileManager {
183
180
  maxContentLength: 50 * 1024 * 1024, // 最大50MB
184
181
  maxBodyLength: 50 * 1024 * 1024,
185
182
  headers: downloadHeaders,
186
- signal: AbortSignal.timeout(12000), // 12秒硬超时,防止连接挂起
187
183
  });
188
184
 
189
185
  if (response.status !== 200) {
@@ -305,13 +301,22 @@ export class FileManager {
305
301
  * 批量下载图片
306
302
  * @param {string[]} urls - URL列表
307
303
  * @param {string} keyword - 搜索关键词
304
+ * @param {number} maxSuccess - 达到此成功数后跳过剩余下载(提前退出)
308
305
  * @returns {Promise<{success: Array, failed: Array}>}
309
306
  */
310
- async downloadMany(urls, keyword) {
307
+ async downloadMany(urls, keyword, maxSuccess = Infinity) {
311
308
  logger.info(`Downloading ${urls.length} images for "${keyword}" with concurrency ${config.MAX_DOWNLOAD_CONCURRENCY}`);
312
309
 
310
+ let successCount = 0;
313
311
  const results = await Promise.all(
314
- urls.map(url => this.limit(() => this.downloadOne(url, keyword)))
312
+ urls.map(url => this.limit(async () => {
313
+ if (successCount >= maxSuccess) {
314
+ return { success: false, url, error: 'skipped' };
315
+ }
316
+ const result = await this.downloadOne(url, keyword);
317
+ if (result.success) successCount++;
318
+ return result;
319
+ }))
315
320
  );
316
321
 
317
322
  const success = [];
@@ -9,10 +9,12 @@ import path from 'path';
9
9
  import pLimit from 'p-limit';
10
10
  import logger from '../infrastructure/logger.js';
11
11
 
12
- // sharp 是 CPU 密集型操作,限制并发避免卡死
13
- const imageProcessLimit = pLimit(2);
14
12
 
15
13
  export class ImageProcessor {
14
+ constructor() {
15
+ // sharp 是 CPU 密集型操作,限制并发避免卡死;每个实例独立防止跨请求堵塞
16
+ this._limit = pLimit(2);
17
+ }
16
18
  /**
17
19
  * 预设尺寸配置
18
20
  */
@@ -64,15 +66,16 @@ export class ImageProcessor {
64
66
  return { success: false, path: inputPath, error: '文件不存在' };
65
67
  }
66
68
 
67
- // 读取原始图片信息
68
- const metadata = await sharp(inputPath).metadata();
69
+ // 用 Buffer 读取,避免 Windows 文件句柄锁导致后续 move 失败
70
+ const inputBuffer = await fs.readFile(inputPath);
71
+ const metadata = await sharp(inputBuffer).metadata();
69
72
 
70
73
  // 确定输出路径
71
74
  const finalOutputPath = outputPath || inputPath;
72
75
  const tempPath = inputPath + '.tmp';
73
76
 
74
77
  // 处理图片
75
- await sharp(inputPath)
78
+ await sharp(inputBuffer)
76
79
  .resize(width, height, {
77
80
  fit: fit,
78
81
  position: position,
@@ -131,7 +134,7 @@ export class ImageProcessor {
131
134
  */
132
135
  async processMany(files, options = {}) {
133
136
  const results = await Promise.all(
134
- files.map(file => imageProcessLimit(() => this.processOne(file.path, options)))
137
+ files.map(file => this._limit(() => this.processOne(file.path, options)))
135
138
  );
136
139
 
137
140
  const success = [];
@@ -8,9 +8,6 @@ import httpClient from '../infrastructure/httpClient.js';
8
8
  import logger from '../infrastructure/logger.js';
9
9
  import config from '../config/index.js';
10
10
 
11
- // 使用配置中的并发数,避免硬编码与配置不一致
12
- const globalValidateLimit = pLimit(config.MAX_VALIDATE_CONCURRENCY);
13
-
14
11
  // 已知有严格防盗链保护的域名列表(仅包含确实无法在浏览器直接打开的)
15
12
  // 这些域名的图片在浏览器地址栏直接打开会返回 403、替换图或空白
16
13
  const HOTLINK_PROTECTED_DOMAINS = [
@@ -27,7 +24,7 @@ const HOTLINK_PROTECTED_DOMAINS = [
27
24
 
28
25
  export class LinkValidator {
29
26
  constructor() {
30
- this.limit = globalValidateLimit;
27
+ this.limit = pLimit(config.MAX_VALIDATE_CONCURRENCY);
31
28
  }
32
29
 
33
30
  /**
@@ -320,8 +320,8 @@ export class Orchestrator {
320
320
  // 检查是否已中止
321
321
  if (signal?.aborted) throw new Error('操作已取消');
322
322
 
323
- // 根据 quality 模式处理(限制验证数量避免超时)
324
- const maxValidate = Math.min(rawUrls.length, count * 2 + 5);
323
+ // 根据 quality 模式处理(只多取少量备用,避免下载过多)
324
+ const maxValidate = Math.min(rawUrls.length, count + 3);
325
325
  let urlsToDownload = rawUrls.slice(0, maxValidate);
326
326
  if (prioritizeQuality) {
327
327
  const sortByQuality = qualityMode === 'high';
@@ -347,8 +347,8 @@ export class Orchestrator {
347
347
  logger.info(`Quality filtered: ${urlsToDownload.length} valid URLs`);
348
348
  }
349
349
 
350
- // 下载图片(已按质量排序,高质量优先)
351
- const { success, failed } = await this.fileManager.downloadMany(urlsToDownload, keyword);
350
+ // 下载图片(已按质量排序,高质量优先),达到目标数量后跳过剩余
351
+ const { success, failed } = await this.fileManager.downloadMany(urlsToDownload, keyword, count);
352
352
 
353
353
  // 截取需要的数量
354
354
  let resultDownloads = success.slice(0, count);
@@ -490,11 +490,11 @@ export class Orchestrator {
490
490
 
491
491
  if (fastMode) {
492
492
  // fast 模式:不验证
493
- allValidUrls = rawUrls.slice(0, count * 2 + 5);
493
+ allValidUrls = rawUrls.slice(0, count + 3);
494
494
  qualityModeLabel = '快速模式(跳过验证)';
495
495
  } else {
496
496
  // balanced/high 模式:统一验证所有原始 URL
497
- const maxValidate = Math.min(rawUrls.length, count * 2 + 5);
497
+ const maxValidate = Math.min(rawUrls.length, count + 3);
498
498
  const urlsToValidate = rawUrls.slice(0, maxValidate);
499
499
  const { valid } = await this.linkValidator.validateMany(urlsToValidate, {
500
500
  fetchQuality: prioritizeQuality,
@@ -528,11 +528,11 @@ export class Orchestrator {
528
528
  }
529
529
 
530
530
  // 下载列表:使用所有验证通过的 URL(不过滤防盗链,下载时加 Referer 绕过)
531
- downloadUrls = allValidUrls.slice(0, count * 2);
531
+ downloadUrls = allValidUrls.slice(0, count + 3);
532
532
 
533
533
  if (signal?.aborted) throw new Error('操作已取消');
534
534
 
535
- const { success, failed } = await this.fileManager.downloadMany(downloadUrls, keyword);
535
+ const { success, failed } = await this.fileManager.downloadMany(downloadUrls, keyword, count);
536
536
 
537
537
  let resultDownloads = success.slice(0, count);
538
538