npm - smart-image-scraper-mcp - Versions diffs - 2.13.3 → 2.14.0 - Mend

smart-image-scraper-mcp 2.13.3 → 2.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +1 -1
package/src/providers/bingScraper.js +1 -2
package/src/providers/googleScraper.js +1 -2
package/src/services/fileManager.js +12 -7
package/src/services/imageProcessor.js +9 -6
package/src/services/linkValidator.js +1 -4
package/src/services/orchestrator.js +8 -8

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "smart-image-scraper-mcp",
-  "version": "2.13.3",
+  "version": "2.14.0",
   "description": "全网智能图片抓取 MCP 服务器 - 支持 Bing/Google 搜索、三种模式(link/download/both)、自定义保存路径、防盗链处理、质量控制、尺寸统一",
   "main": "src/index.js",
   "type": "module",

package/src/providers/bingScraper.js CHANGED Viewed

@@ -74,9 +74,8 @@ export class BingScraper extends BaseScraper {
       const response = await withRetry(
         () => httpClient.get(searchUrl, {
           timeout: 8000,  // 8秒超时，快速失败
-          signal: AbortSignal.timeout(8000),
         }),
-        { maxRetries: 1, retryCondition: isRetryableError } // 只重试1次
+        { maxRetries: 1, retryCondition: (e) => e.code === 'ECONNRESET' || (e.response?.status >= 500) } // 只重试真正的网络重置或服务器错误，超时不重试
       );
       if (response.status !== 200) {

package/src/providers/googleScraper.js CHANGED Viewed

@@ -73,7 +73,6 @@ export class GoogleScraper extends BaseScraper {
       const response = await withRetry(
         () => httpClient.get(searchUrl, {
           timeout: 8000,  // 8秒超时，快速失败
-          signal: AbortSignal.timeout(8000),
           headers: {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
             'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
@@ -81,7 +80,7 @@ export class GoogleScraper extends BaseScraper {
             'Referer': 'https://www.google.com/',
           },
         }),
-        { maxRetries: 1, retryCondition: isRetryableError } // 只重试1次
+        { maxRetries: 1, retryCondition: (e) => e.code === 'ECONNRESET' || (e.response?.status >= 500) } // 只重试真正的网络重置或服务器错误，超时不重试
       );
       if (response.status !== 200) {

package/src/services/fileManager.js CHANGED Viewed

@@ -11,13 +11,10 @@ import httpClient from '../infrastructure/httpClient.js';
 import logger from '../infrastructure/logger.js';
 import config from '../config/index.js';
-// 全局共享的下载并发限制器（避免每个实例独立限制）
-const globalDownloadLimit = pLimit(config.MAX_DOWNLOAD_CONCURRENCY || 10);
 export class FileManager {
   constructor(options = {}) {
     this.saveRoot = options.saveRoot || config.SAVE_ROOT;
-    this.limit = globalDownloadLimit; // 使用全局共享限制器
+    this.limit = pLimit(config.MAX_DOWNLOAD_CONCURRENCY || 3);
   }
   /**
@@ -183,7 +180,6 @@ export class FileManager {
         maxContentLength: 50 * 1024 * 1024, // 最大50MB
         maxBodyLength: 50 * 1024 * 1024,
         headers: downloadHeaders,
-        signal: AbortSignal.timeout(12000), // 12秒硬超时，防止连接挂起
       });
       if (response.status !== 200) {
@@ -305,13 +301,22 @@ export class FileManager {
    * 批量下载图片
    * @param {string[]} urls - URL列表
    * @param {string} keyword - 搜索关键词
+   * @param {number} maxSuccess - 达到此成功数后跳过剩余下载（提前退出）
    * @returns {Promise<{success: Array, failed: Array}>}
    */
-  async downloadMany(urls, keyword) {
+  async downloadMany(urls, keyword, maxSuccess = Infinity) {
     logger.info(`Downloading ${urls.length} images for "${keyword}" with concurrency ${config.MAX_DOWNLOAD_CONCURRENCY}`);
+    let successCount = 0;
     const results = await Promise.all(
-      urls.map(url => this.limit(() => this.downloadOne(url, keyword)))
+      urls.map(url => this.limit(async () => {
+        if (successCount >= maxSuccess) {
+          return { success: false, url, error: 'skipped' };
+        }
+        const result = await this.downloadOne(url, keyword);
+        if (result.success) successCount++;
+        return result;
+      }))
     );
     const success = [];

package/src/services/imageProcessor.js CHANGED Viewed

@@ -9,10 +9,12 @@ import path from 'path';
 import pLimit from 'p-limit';
 import logger from '../infrastructure/logger.js';
-// sharp 是 CPU 密集型操作，限制并发避免卡死
-const imageProcessLimit = pLimit(2);
 export class ImageProcessor {
+  constructor() {
+    // sharp 是 CPU 密集型操作，限制并发避免卡死；每个实例独立防止跨请求堵塞
+    this._limit = pLimit(2);
+  }
   /**
    * 预设尺寸配置
    */
@@ -64,15 +66,16 @@ export class ImageProcessor {
         return { success: false, path: inputPath, error: '文件不存在' };
       }
-      // 读取原始图片信息
-      const metadata = await sharp(inputPath).metadata();
+      // 用 Buffer 读取，避免 Windows 文件句柄锁导致后续 move 失败
+      const inputBuffer = await fs.readFile(inputPath);
+      const metadata = await sharp(inputBuffer).metadata();
       // 确定输出路径
       const finalOutputPath = outputPath || inputPath;
       const tempPath = inputPath + '.tmp';
       // 处理图片
-      await sharp(inputPath)
+      await sharp(inputBuffer)
         .resize(width, height, {
           fit: fit,
           position: position,
@@ -131,7 +134,7 @@ export class ImageProcessor {
    */
   async processMany(files, options = {}) {
     const results = await Promise.all(
-      files.map(file => imageProcessLimit(() => this.processOne(file.path, options)))
+      files.map(file => this._limit(() => this.processOne(file.path, options)))
     );
     const success = [];

package/src/services/linkValidator.js CHANGED Viewed

@@ -8,9 +8,6 @@ import httpClient from '../infrastructure/httpClient.js';
 import logger from '../infrastructure/logger.js';
 import config from '../config/index.js';
-// 使用配置中的并发数，避免硬编码与配置不一致
-const globalValidateLimit = pLimit(config.MAX_VALIDATE_CONCURRENCY);
 // 已知有严格防盗链保护的域名列表（仅包含确实无法在浏览器直接打开的）
 // 这些域名的图片在浏览器地址栏直接打开会返回 403、替换图或空白
 const HOTLINK_PROTECTED_DOMAINS = [
@@ -27,7 +24,7 @@ const HOTLINK_PROTECTED_DOMAINS = [
 export class LinkValidator {
   constructor() {
-    this.limit = globalValidateLimit;
+    this.limit = pLimit(config.MAX_VALIDATE_CONCURRENCY);
   }
   /**

package/src/services/orchestrator.js CHANGED Viewed

@@ -320,8 +320,8 @@ export class Orchestrator {
       // 检查是否已中止
       if (signal?.aborted) throw new Error('操作已取消');
-      // 根据 quality 模式处理（限制验证数量避免超时）
-      const maxValidate = Math.min(rawUrls.length, count * 2 + 5);
+      // 根据 quality 模式处理（只多取少量备用，避免下载过多）
+      const maxValidate = Math.min(rawUrls.length, count + 3);
       let urlsToDownload = rawUrls.slice(0, maxValidate);
       if (prioritizeQuality) {
         const sortByQuality = qualityMode === 'high';
@@ -347,8 +347,8 @@ export class Orchestrator {
         logger.info(`Quality filtered: ${urlsToDownload.length} valid URLs`);
       }
-      // 下载图片（已按质量排序，高质量优先）
-      const { success, failed } = await this.fileManager.downloadMany(urlsToDownload, keyword);
+      // 下载图片（已按质量排序，高质量优先），达到目标数量后跳过剩余
+      const { success, failed } = await this.fileManager.downloadMany(urlsToDownload, keyword, count);
       // 截取需要的数量
       let resultDownloads = success.slice(0, count);
@@ -490,11 +490,11 @@ export class Orchestrator {
       if (fastMode) {
         // fast 模式：不验证
-        allValidUrls = rawUrls.slice(0, count * 2 + 5);
+        allValidUrls = rawUrls.slice(0, count + 3);
         qualityModeLabel = '快速模式（跳过验证）';
       } else {
         // balanced/high 模式：统一验证所有原始 URL
-        const maxValidate = Math.min(rawUrls.length, count * 2 + 5);
+        const maxValidate = Math.min(rawUrls.length, count + 3);
         const urlsToValidate = rawUrls.slice(0, maxValidate);
         const { valid } = await this.linkValidator.validateMany(urlsToValidate, {
           fetchQuality: prioritizeQuality,
@@ -528,11 +528,11 @@ export class Orchestrator {
       }
       // 下载列表：使用所有验证通过的 URL（不过滤防盗链，下载时加 Referer 绕过）
-      downloadUrls = allValidUrls.slice(0, count * 2);
+      downloadUrls = allValidUrls.slice(0, count + 3);
       if (signal?.aborted) throw new Error('操作已取消');
-      const { success, failed } = await this.fileManager.downloadMany(downloadUrls, keyword);
+      const { success, failed } = await this.fileManager.downloadMany(downloadUrls, keyword, count);
       let resultDownloads = success.slice(0, count);