npm - smart-image-scraper-mcp - Versions diffs - 1.0.0 → 1.1.0 - Mend

smart-image-scraper-mcp 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json +2 -1
package/src/index.js +46 -12
package/src/providers/bingScraper.js +22 -4
package/src/providers/googleScraper.js +28 -5
package/src/services/imageProcessor.js +242 -0
package/src/services/index.js +1 -0
package/src/services/linkValidator.js +133 -25
package/src/services/orchestrator.js +60 -8

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "smart-image-scraper-mcp",
-  "version": "1.0.0",
+  "version": "1.1.0",
   "description": "全网智能图片抓取 MCP 服务器 - 支持 Bing/Google 图片搜索、验证和下载",
   "main": "src/index.js",
   "type": "module",
@@ -38,6 +38,7 @@
     "fs-extra": "^11.2.0",
     "https-proxy-agent": "^7.0.0",
     "p-limit": "^5.0.0",
+    "sharp": "^0.34.5",
     "socks-proxy-agent": "^8.0.0",
     "uuid": "^9.0.0"
   }

package/src/index.js CHANGED Viewed

@@ -46,20 +46,35 @@ const orchestrator = new Orchestrator();
 // 定义 Tool Schema
 const SMART_SCRAPER_TOOL = {
   name: 'smart_scraper',
-  description: `全网智能图片抓取工具。
+  description: `全网智能图片抓取工具 - 从 Bing/Google 搜索、验证、下载高质量图片。
-【功能】从 Bing/Google 搜索图片，支持验证链接和下载到本地。
+【核心功能】
+1. 搜索图片链接 (mode=link) - 返回验证过的图片URL列表
+2. 下载图片 (mode=download) - 下载到本地，自动按质量排序优先高清
+3. 尺寸统一 (targetSize) - 下载后自动裁剪/缩放到指定尺寸
+4. 宽高比过滤 (aspect) - 横向/竖向/正方形
-【使用场景】
-- 用户说"找图片"、"搜索图片" → 使用 mode="link"
-- 用户说"下载图片"、"保存图片" → 使用 mode="download"
-- 用户说"高清"、"壁纸" → 使用 size="large" 或 "wallpaper"
-- 用户说"猫和狗的图片" → 使用 query="猫,狗"（逗号分隔）
+【参数选择指南】
+- 用户要"找/搜索/查找图片" → mode="link"
+- 用户要"下载/保存/获取图片" → mode="download"
+- 用户要"高清/大图/壁纸" → size="large" 或 "wallpaper"
+- 用户要"电脑壁纸/横屏/横向" → aspect="wide"
+- 用户要"手机壁纸/竖屏/竖向" → aspect="tall"
+- 用户要"统一尺寸/固定大小" → targetSize="1920x1080" 或预设名
+- 用户要"多种类型图片" → query="猫,狗,鸟"（英文逗号分隔）
-【示例调用】
-1. 搜索10张猫咪图片链接: {"query":"猫咪","mode":"link","count":10}
-2. 下载20张风景壁纸: {"query":"风景","mode":"download","count":20,"size":"wallpaper"}
-3. 批量下载多类图片: {"query":"猫,狗,鸟","mode":"download","count":5}`,
+【预设尺寸名称】
+- 电脑壁纸: desktop_1080p(1920x1080), desktop_2k(2560x1440), desktop_4k(3840x2160)
+- 手机壁纸: mobile_hd(1080x1920), mobile_2k(1440x2560)
+- 正方形: square_1080(1080x1080), square_512(512x512)
+- 社交媒体: instagram(1080x1080), twitter(1200x675), facebook(1200x630)
+【调用示例】
+1. 搜索5张猫的图片: {"query":"可爱的猫","mode":"link","count":5}
+2. 下载10张高清风景图: {"query":"风景","mode":"download","count":10,"size":"large"}
+3. 下载电脑壁纸并统一为1080p: {"query":"风景","mode":"download","count":10,"aspect":"wide","targetSize":"desktop_1080p"}
+4. 下载手机壁纸: {"query":"动漫","mode":"download","count":10,"aspect":"tall","targetSize":"mobile_hd"}
+5. 批量下载多类图片: {"query":"猫,狗,兔子","mode":"download","count":5}`,
   inputSchema: {
     type: 'object',
     properties: {
@@ -74,7 +89,7 @@ const SMART_SCRAPER_TOOL = {
       },
       count: {
         type: 'number',
-        description: '每个关键词获取的图片数量。范围1-100，推荐10-20。用户说"几张"用5-10，说"很多"用20-30',
+        description: '每个关键词获取的图片数量。范围1-100，推荐1-20。用户说"几张"用5-10，说"很多"用20-30',
         default: 10,
       },
       source: {
@@ -89,6 +104,22 @@ const SMART_SCRAPER_TOOL = {
         description: '图片尺寸。all=不限；small=小图/图标；medium=中图；large=大图/高清；wallpaper=壁纸级别(1080p+)',
         default: 'all',
       },
+      aspect: {
+        type: 'string',
+        enum: ['all', 'wide', 'tall', 'square'],
+        description: '图片宽高比。all=不限；wide=横向/宽屏(电脑壁纸)；tall=纵向/竖屏(手机壁纸)；square=正方形',
+        default: 'all',
+      },
+      targetSize: {
+        type: 'string',
+        description: '目标尺寸，下载后统一裁剪/缩放到此尺寸。格式: "宽x高"(如"1920x1080")或预设名(desktop_1080p/desktop_2k/desktop_4k/mobile_hd/mobile_2k/square_1080/instagram/twitter/facebook)',
+      },
+      fit: {
+        type: 'string',
+        enum: ['cover', 'contain', 'fill'],
+        description: '尺寸处理时的适应模式。cover=裁剪填充(默认,不留白)；contain=包含留白；fill=拉伸填充',
+        default: 'cover',
+      },
       safeSearch: {
         type: 'string',
         enum: ['off', 'moderate', 'strict'],
@@ -221,6 +252,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
           count: count,
           source: source,
           size: args.size || 'all',
+          aspect: args.aspect || 'all',
+          targetSize: args.targetSize || null,
+          fit: args.fit || 'cover',
           safeSearch: args.safeSearch || 'moderate',
         }),
         timeoutPromise,

package/src/providers/bingScraper.js CHANGED Viewed

@@ -34,9 +34,9 @@ export class BingScraper extends BaseScraper {
         logger.info(`Searching Bing Images: ${keyword}, offset: ${offset}`);
         const response = await withRetry(
-          () => httpClient.get(searchUrl),
+          () => httpClient.get(searchUrl, { timeout: 15000 }), // 添加超时
           {
-            maxRetries: 3,
+            maxRetries: 2, // 减少重试次数
             retryCondition: isRetryableError,
           }
         );
@@ -47,7 +47,7 @@ export class BingScraper extends BaseScraper {
         }
         // 添加请求间隔，防止被封
-        await this._delay(300 + Math.random() * 200);
+        await this._delay(200 + Math.random() * 100);
         const newUrls = this._parseResponse(response.data);
@@ -62,10 +62,15 @@ export class BingScraper extends BaseScraper {
           }
         });
+        // 如果已经获取足够数量，直接跳出
+        if (urls.size >= limit) {
+          break;
+        }
         offset += pageSize;
         // 防止无限循环
-        if (offset > 500) {
+        if (offset > 200) { // 减少最大偏移量
           logger.warn('Reached maximum offset limit');
           break;
         }
@@ -74,6 +79,7 @@ export class BingScraper extends BaseScraper {
       logger.error(`Bing search error for "${keyword}"`, { message: error.message });
     }
+    logger.info(`Bing search complete: found ${urls.size} URLs for "${keyword}"`);
     return Array.from(urls);
   }
@@ -90,6 +96,14 @@ export class BingScraper extends BaseScraper {
       'all': '',
     };
+    // 宽高比过滤映射
+    const aspectMap = {
+      'wide': '+filterui:aspect-wide',       // 横向/宽屏 (16:9, 4:3 等)
+      'tall': '+filterui:aspect-tall',       // 纵向/竖屏 (9:16, 3:4 等)
+      'square': '+filterui:aspect-square',   // 正方形 (1:1)
+      'all': '',
+    };
     // 安全搜索映射
     const safeSearchMap = {
       'off': 'off',
@@ -98,12 +112,16 @@ export class BingScraper extends BaseScraper {
     };
     const size = this.options?.size || 'all';
+    const aspect = this.options?.aspect || 'all';
     const safeSearch = this.options?.safeSearch || 'moderate';
     let qft = '+filterui:photo-photo';
     if (sizeMap[size]) {
       qft += sizeMap[size];
     }
+    if (aspectMap[aspect]) {
+      qft += aspectMap[aspect];
+    }
     const params = new URLSearchParams({
       q: keyword,

package/src/providers/googleScraper.js CHANGED Viewed

@@ -29,12 +29,13 @@ export class GoogleScraper extends BaseScraper {
     const pageSize = 20;
     try {
-      while (urls.size < limit && start < 200) {
+      while (urls.size < limit && start < 100) { // 减少最大偏移
         const searchUrl = this._buildSearchUrl(keyword, start);
         logger.info(`Searching Google Images: ${keyword}, start: ${start}`);
         const response = await withRetry(
           () => httpClient.get(searchUrl, {
+            timeout: 15000, // 添加超时
             headers: {
               'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
               'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
@@ -43,7 +44,7 @@ export class GoogleScraper extends BaseScraper {
             },
           }),
           {
-            maxRetries: 3,
+            maxRetries: 2, // 减少重试次数
             retryCondition: isRetryableError,
           }
         );
@@ -66,15 +67,21 @@ export class GoogleScraper extends BaseScraper {
           }
         });
+        // 如果已经获取足够数量，直接跳出
+        if (urls.size >= limit) {
+          break;
+        }
         start += pageSize;
         // 添加延迟防止被封
-        await this._delay(500 + Math.random() * 500);
+        await this._delay(300 + Math.random() * 200);
       }
     } catch (error) {
       logger.error(`Google search error for "${keyword}"`, { message: error.message });
     }
+    logger.info(`Google search complete: found ${urls.size} URLs for "${keyword}"`);
     return Array.from(urls);
   }
@@ -91,6 +98,14 @@ export class GoogleScraper extends BaseScraper {
       'all': '',
     };
+    // 宽高比过滤映射 (Google 使用 iar 参数)
+    const aspectMap = {
+      'wide': 'iar:w',      // 横向/宽屏
+      'tall': 'iar:t',      // 纵向/竖屏
+      'square': 'iar:s',    // 正方形
+      'all': '',
+    };
     // 安全搜索映射
     const safeSearchMap = {
       'off': 'off',
@@ -99,6 +114,7 @@ export class GoogleScraper extends BaseScraper {
     };
     const size = this.options?.size || 'all';
+    const aspect = this.options?.aspect || 'all';
     const safeSearch = this.options?.safeSearch || 'moderate';
     const params = new URLSearchParams({
@@ -109,9 +125,16 @@ export class GoogleScraper extends BaseScraper {
       safe: safeSearchMap[safeSearch] || 'medium',
     });
-    // 添加尺寸过滤
+    // 构建 tbs 参数（尺寸 + 宽高比）
+    const tbsParts = [];
     if (sizeMap[size]) {
-      params.set('tbs', sizeMap[size]);
+      tbsParts.push(sizeMap[size]);
+    }
+    if (aspectMap[aspect]) {
+      tbsParts.push(aspectMap[aspect]);
+    }
+    if (tbsParts.length > 0) {
+      params.set('tbs', tbsParts.join(','));
     }
     return `${this.baseUrl}?${params.toString()}`;

package/src/services/imageProcessor.js ADDED Viewed

@@ -0,0 +1,242 @@
+/**
+ * 图片处理器
+ * 负责图片的裁剪、缩放、尺寸统一处理
+ */
+import sharp from 'sharp';
+import fs from 'fs-extra';
+import path from 'path';
+import logger from '../infrastructure/logger.js';
+export class ImageProcessor {
+  /**
+   * 预设尺寸配置
+   */
+  static PRESETS = {
+    // 电脑壁纸
+    'desktop_1080p': { width: 1920, height: 1080 },
+    'desktop_2k': { width: 2560, height: 1440 },
+    'desktop_4k': { width: 3840, height: 2160 },
+    // 手机壁纸
+    'mobile_hd': { width: 1080, height: 1920 },
+    'mobile_2k': { width: 1440, height: 2560 },
+    // 正方形
+    'square_1080': { width: 1080, height: 1080 },
+    'square_512': { width: 512, height: 512 },
+    // 社交媒体
+    'instagram': { width: 1080, height: 1080 },
+    'twitter': { width: 1200, height: 675 },
+    'facebook': { width: 1200, height: 630 },
+  };
+  /**
+   * 处理单张图片
+   * @param {string} inputPath - 输入图片路径
+   * @param {Object} options - 处理选项
+   * @param {number} options.width - 目标宽度
+   * @param {number} options.height - 目标高度
+   * @param {string} options.fit - 适应模式: cover(裁剪填充), contain(包含留白), fill(拉伸)
+   * @param {string} options.position - 裁剪位置: center, top, bottom, left, right
+   * @param {string} options.outputPath - 输出路径（可选，默认覆盖原文件）
+   * @returns {Promise<{success: boolean, path?: string, error?: string, metadata?: Object}>}
+   */
+  async processOne(inputPath, options = {}) {
+    const {
+      width,
+      height,
+      fit = 'cover',
+      position = 'center',
+      outputPath = null,
+      quality = 90,
+    } = options;
+    if (!width || !height) {
+      return { success: false, path: inputPath, error: '必须指定目标宽度和高度' };
+    }
+    try {
+      // 检查文件是否存在
+      if (!await fs.pathExists(inputPath)) {
+        return { success: false, path: inputPath, error: '文件不存在' };
+      }
+      // 读取原始图片信息
+      const metadata = await sharp(inputPath).metadata();
+      // 确定输出路径
+      const finalOutputPath = outputPath || inputPath;
+      const tempPath = inputPath + '.tmp';
+      // 处理图片
+      await sharp(inputPath)
+        .resize(width, height, {
+          fit: fit,
+          position: position,
+          withoutEnlargement: false, // 允许放大小图
+        })
+        .jpeg({ quality: quality })
+        .toFile(tempPath);
+      // 替换原文件
+      await fs.move(tempPath, finalOutputPath, { overwrite: true });
+      // 获取处理后的信息
+      const newMetadata = await sharp(finalOutputPath).metadata();
+      logger.debug(`Processed image: ${inputPath} -> ${width}x${height}`);
+      return {
+        success: true,
+        path: finalOutputPath,
+        metadata: {
+          original: { width: metadata.width, height: metadata.height },
+          processed: { width: newMetadata.width, height: newMetadata.height },
+        },
+      };
+    } catch (error) {
+      logger.error(`Image processing error: ${inputPath}`, { error: error.message });
+      // 清理临时文件
+      try {
+        await fs.remove(inputPath + '.tmp');
+      } catch {}
+      return { success: false, path: inputPath, error: error.message };
+    }
+  }
+  /**
+   * 使用预设处理图片
+   * @param {string} inputPath - 输入图片路径
+   * @param {string} preset - 预设名称
+   * @param {Object} extraOptions - 额外选项
+   * @returns {Promise<Object>}
+   */
+  async processWithPreset(inputPath, preset, extraOptions = {}) {
+    const presetConfig = ImageProcessor.PRESETS[preset];
+    if (!presetConfig) {
+      return { success: false, path: inputPath, error: `未知预设: ${preset}` };
+    }
+    return this.processOne(inputPath, {
+      ...presetConfig,
+      ...extraOptions,
+    });
+  }
+  /**
+   * 批量处理图片
+   * @param {Array<{path: string}>} files - 文件列表
+   * @param {Object} options - 处理选项
+   * @returns {Promise<{success: Array, failed: Array}>}
+   */
+  async processMany(files, options = {}) {
+    const results = await Promise.all(
+      files.map(file => this.processOne(file.path, options))
+    );
+    const success = [];
+    const failed = [];
+    for (let i = 0; i < results.length; i++) {
+      const result = results[i];
+      if (result.success) {
+        success.push({
+          url: files[i].url,
+          path: result.path,
+          metadata: result.metadata,
+        });
+      } else {
+        failed.push({
+          url: files[i].url,
+          path: files[i].path,
+          error: result.error,
+        });
+      }
+    }
+    logger.info(`Image processing complete: ${success.length} success, ${failed.length} failed`);
+    return { success, failed };
+  }
+  /**
+   * 获取图片尺寸信息
+   * @param {string} imagePath - 图片路径
+   * @returns {Promise<{width: number, height: number, format: string} | null>}
+   */
+  async getImageInfo(imagePath) {
+    try {
+      const metadata = await sharp(imagePath).metadata();
+      return {
+        width: metadata.width,
+        height: metadata.height,
+        format: metadata.format,
+        aspectRatio: metadata.width / metadata.height,
+      };
+    } catch (error) {
+      logger.warn(`Failed to get image info: ${imagePath}`, { error: error.message });
+      return null;
+    }
+  }
+  /**
+   * 检查图片是否符合目标尺寸
+   * @param {string} imagePath - 图片路径
+   * @param {number} targetWidth - 目标宽度
+   * @param {number} targetHeight - 目标高度
+   * @param {number} tolerance - 容差百分比 (0-1)
+   * @returns {Promise<boolean>}
+   */
+  async matchesSize(imagePath, targetWidth, targetHeight, tolerance = 0.1) {
+    const info = await this.getImageInfo(imagePath);
+    if (!info) return false;
+    const targetRatio = targetWidth / targetHeight;
+    const ratioDiff = Math.abs(info.aspectRatio - targetRatio) / targetRatio;
+    return ratioDiff <= tolerance;
+  }
+  /**
+   * 解析目标尺寸参数
+   * @param {string|Object} target - 目标尺寸（预设名称或 {width, height} 对象或 "1920x1080" 字符串）
+   * @returns {{width: number, height: number} | null}
+   */
+  parseTargetSize(target) {
+    if (!target) return null;
+    // 如果是预设名称
+    if (typeof target === 'string' && ImageProcessor.PRESETS[target]) {
+      return ImageProcessor.PRESETS[target];
+    }
+    // 如果是 "宽x高" 格式的字符串
+    if (typeof target === 'string') {
+      const match = target.match(/^(\d+)[xX×](\d+)$/);
+      if (match) {
+        return {
+          width: parseInt(match[1], 10),
+          height: parseInt(match[2], 10),
+        };
+      }
+    }
+    // 如果是对象
+    if (typeof target === 'object' && target.width && target.height) {
+      return {
+        width: parseInt(target.width, 10),
+        height: parseInt(target.height, 10),
+      };
+    }
+    return null;
+  }
+  /**
+   * 获取所有可用预设
+   * @returns {Object}
+   */
+  static getPresets() {
+    return { ...ImageProcessor.PRESETS };
+  }
+}
+export default ImageProcessor;

package/src/services/index.js CHANGED Viewed

@@ -4,4 +4,5 @@
 export { LinkValidator } from './linkValidator.js';
 export { FileManager } from './fileManager.js';
+export { ImageProcessor } from './imageProcessor.js';
 export { Orchestrator } from './orchestrator.js';

package/src/services/linkValidator.js CHANGED Viewed

@@ -31,21 +31,24 @@ export class LinkValidator {
   /**
    * 验证单个链接
    * @param {string} url - 图片URL
-   * @returns {Promise<{url: string, valid: boolean, error?: string}>}
+   * @param {boolean} fetchQuality - 是否获取质量信息
+   * @returns {Promise<{url: string, valid: boolean, error?: string, quality?: Object}>}
    */
-  async validateOne(url) {
+  async validateOne(url, fetchQuality = false) {
     // 先验证 URL 格式
     if (!this._isValidUrlFormat(url)) {
       return { url, valid: false, error: 'Invalid URL format' };
     }
     const controller = new AbortController();
-    const timeoutId = setTimeout(() => controller.abort(), 8000);
+    const timeoutId = setTimeout(() => {
+      controller.abort();
+    }, 5000); // 缩短超时时间到5秒
     try {
       const response = await httpClient.head(url, {
-        timeout: 5000,
-        maxRedirects: 3,
+        timeout: 4000, // 缩短超时
+        maxRedirects: 2, // 减少重定向次数
         signal: controller.signal,
       });
@@ -56,32 +59,118 @@ export class LinkValidator {
       const isValidStatus = response.status === 200;
       if (isValidStatus && isImage) {
-        return { url, valid: true };
-      }
-      // 如果 HEAD 请求失败，尝试 GET 请求（某些服务器不支持 HEAD）
-      if (!isValidStatus || !isImage) {
-        return await this._validateWithGet(url);
+        // 获取质量信息
+        const quality = fetchQuality ? this._extractQualityInfo(response.headers, url) : null;
+        return { url, valid: true, quality };
       }
-      return { url, valid: false, error: `Invalid response: status=${response.status}, contentType=${contentType}` };
+      // HEAD 失败不再尝试 GET，直接返回失败（加快速度）
+      return { url, valid: false, error: `Invalid: status=${response.status}` };
     } catch (error) {
       clearTimeout(timeoutId);
-      // 如果是取消错误，直接返回失败
-      if (error.name === 'AbortError' || error.code === 'ERR_CANCELED') {
-        return { url, valid: false, error: 'Request timeout' };
+      // 超时或取消，直接返回失败
+      if (error.name === 'AbortError' || error.code === 'ERR_CANCELED' || error.code === 'ECONNABORTED') {
+        return { url, valid: false, error: 'Timeout' };
       }
-      // HEAD 请求失败，尝试 GET
-      return await this._validateWithGet(url);
+      // 其他错误也直接返回失败（不再尝试 GET）
+      return { url, valid: false, error: error.message || 'Request failed' };
     }
   }
+  /**
+   * 从响应头和URL提取质量信息
+   */
+  _extractQualityInfo(headers, url) {
+    const contentLength = parseInt(headers['content-length'] || '0', 10);
+    const contentType = headers['content-type'] || '';
+    // 从 URL 提取可能的尺寸信息
+    const sizeHints = this._extractSizeFromUrl(url);
+    // 计算质量分数
+    let score = 0;
+    // 文件大小评分（越大通常质量越高，但有上限）
+    if (contentLength > 0) {
+      if (contentLength > 1024 * 1024) score += 50;       // >1MB
+      else if (contentLength > 500 * 1024) score += 40;   // >500KB
+      else if (contentLength > 200 * 1024) score += 30;   // >200KB
+      else if (contentLength > 100 * 1024) score += 20;   // >100KB
+      else if (contentLength > 50 * 1024) score += 10;    // >50KB
+      else score += 5;                                     // <50KB
+    }
+    // 格式评分
+    if (contentType.includes('png')) score += 10;         // PNG 无损
+    else if (contentType.includes('webp')) score += 8;    // WebP 高效
+    else if (contentType.includes('jpeg') || contentType.includes('jpg')) score += 5;
+    // URL 中的尺寸提示评分
+    if (sizeHints.width && sizeHints.height) {
+      const pixels = sizeHints.width * sizeHints.height;
+      if (pixels >= 3840 * 2160) score += 30;             // 4K+
+      else if (pixels >= 1920 * 1080) score += 25;        // 1080p+
+      else if (pixels >= 1280 * 720) score += 15;         // 720p+
+      else if (pixels >= 640 * 480) score += 5;           // VGA+
+    }
+    // URL 质量关键词评分
+    const urlLower = url.toLowerCase();
+    if (urlLower.includes('original') || urlLower.includes('full')) score += 15;
+    if (urlLower.includes('hd') || urlLower.includes('hq')) score += 10;
+    if (urlLower.includes('large') || urlLower.includes('big')) score += 8;
+    if (urlLower.includes('thumb') || urlLower.includes('small')) score -= 20;
+    if (urlLower.includes('preview') || urlLower.includes('mini')) score -= 15;
+    return {
+      contentLength,
+      contentType,
+      estimatedWidth: sizeHints.width,
+      estimatedHeight: sizeHints.height,
+      score,
+    };
+  }
+  /**
+   * 从 URL 提取可能的尺寸信息
+   */
+  _extractSizeFromUrl(url) {
+    const result = { width: null, height: null };
+    // 常见模式: 1920x1080, 1920_1080, w=1920&h=1080, width=1920, etc.
+    const patterns = [
+      /(\d{3,4})[x×X](\d{3,4})/,                    // 1920x1080
+      /(\d{3,4})_(\d{3,4})/,                          // 1920_1080
+      /[wW](?:idth)?[=:](\d{3,4}).*[hH](?:eight)?[=:](\d{3,4})/, // w=1920&h=1080
+      /[hH](?:eight)?[=:](\d{3,4}).*[wW](?:idth)?[=:](\d{3,4})/, // h=1080&w=1920
+    ];
+    for (const pattern of patterns) {
+      const match = url.match(pattern);
+      if (match) {
+        const num1 = parseInt(match[1], 10);
+        const num2 = parseInt(match[2], 10);
+        // 确定哪个是宽度哪个是高度
+        if (num1 > num2) {
+          result.width = num1;
+          result.height = num2;
+        } else {
+          result.width = num2;
+          result.height = num1;
+        }
+        break;
+      }
+    }
+    return result;
+  }
   /**
    * 使用 GET 请求验证（某些服务器不支持 HEAD）
    */
-  async _validateWithGet(url) {
+  async _validateWithGet(url, fetchQuality = false) {
     const controller = new AbortController();
     const timeoutId = setTimeout(() => controller.abort(), 8000);
@@ -103,7 +192,8 @@ export class LinkValidator {
       const isValidStatus = response.status === 200 || response.status === 206;
       if (isValidStatus && isImage) {
-        return { url, valid: true };
+        const quality = fetchQuality ? this._extractQualityInfo(response.headers, url) : null;
+        return { url, valid: true, quality };
       }
       return { url, valid: false, error: `GET validation failed: status=${response.status}` };
@@ -120,27 +210,45 @@ export class LinkValidator {
   /**
    * 批量验证链接
    * @param {string[]} urls - URL列表
-   * @returns {Promise<{valid: string[], invalid: {url: string, error: string}[]}>}
+   * @param {Object} options - 选项
+   * @param {boolean} options.fetchQuality - 是否获取质量信息
+   * @param {boolean} options.sortByQuality - 是否按质量排序
+   * @returns {Promise<{valid: Array, invalid: Array}>}
    */
-  async validateMany(urls) {
-    logger.info(`Validating ${urls.length} URLs with concurrency ${config.MAX_VALIDATE_CONCURRENCY}`);
+  async validateMany(urls, options = {}) {
+    const { fetchQuality = false, sortByQuality = false } = options;
+    logger.info(`Validating ${urls.length} URLs with concurrency ${config.MAX_VALIDATE_CONCURRENCY}${fetchQuality ? ' (with quality check)' : ''}`);
     const results = await Promise.all(
-      urls.map(url => this.limit(() => this.validateOne(url)))
+      urls.map(url => this.limit(() => this.validateOne(url, fetchQuality)))
     );
-    const valid = [];
+    let valid = [];
     const invalid = [];
     for (const result of results) {
       if (result.valid) {
-        valid.push(result.url);
+        valid.push({
+          url: result.url,
+          quality: result.quality,
+        });
       } else {
         invalid.push({ url: result.url, error: result.error });
         logger.debug(`Invalid URL: ${result.url}`, { error: result.error });
       }
     }
+    // 按质量分数排序（高分优先）
+    if (sortByQuality && fetchQuality) {
+      valid.sort((a, b) => {
+        const scoreA = a.quality?.score || 0;
+        const scoreB = b.quality?.score || 0;
+        return scoreB - scoreA;
+      });
+      logger.info(`Sorted ${valid.length} URLs by quality score`);
+    }
     logger.info(`Validation complete: ${valid.length} valid, ${invalid.length} invalid`);
     return { valid, invalid };
   }

package/src/services/orchestrator.js CHANGED Viewed

@@ -7,6 +7,7 @@ import pLimit from 'p-limit';
 import { getScraper } from '../providers/index.js';
 import { LinkValidator } from './linkValidator.js';
 import { FileManager } from './fileManager.js';
+import { ImageProcessor } from './imageProcessor.js';
 import logger from '../infrastructure/logger.js';
 import config from '../config/index.js';
@@ -14,6 +15,7 @@ export class Orchestrator {
   constructor() {
     this.linkValidator = new LinkValidator();
     this.fileManager = new FileManager();
+    this.imageProcessor = new ImageProcessor();
     this.keywordLimit = pLimit(config.MAX_KEYWORD_CONCURRENCY);
   }
@@ -43,15 +45,21 @@ export class Orchestrator {
    */
   async processKeywordLink(keyword, count, source, options = {}) {
     const startTime = Date.now();
+    // Link 模式默认不进行质量评估（加快速度），除非明确要求
+    const prioritizeQuality = options.prioritizeQuality === true;
     try {
       // 获取搜索源
       const scraper = getScraper(source);
+      logger.info(`Searching for "${keyword}"...`);
       // 搜索图片（多获取一些以弥补验证失败的损失）
       const searchCount = Math.ceil(count * 1.5);
       const rawUrls = await scraper.search(keyword, searchCount, options);
+      logger.info(`Found ${rawUrls.length} URLs for "${keyword}"`);
       if (rawUrls.length === 0) {
         return {
           keyword,
@@ -62,10 +70,13 @@ export class Orchestrator {
       }
       // 验证链接
-      const { valid, invalid } = await this.linkValidator.validateMany(rawUrls);
+      const { valid, invalid } = await this.linkValidator.validateMany(rawUrls, {
+        fetchQuality: prioritizeQuality,
+        sortByQuality: prioritizeQuality,
+      });
       // 截取需要的数量
-      const resultUrls = valid.slice(0, count);
+      const resultUrls = valid.slice(0, count).map(v => v.url);
       return {
         keyword,
@@ -76,6 +87,7 @@ export class Orchestrator {
         totalInvalid: invalid.length,
         urls: resultUrls,
         count: resultUrls.length,
+        qualitySorted: prioritizeQuality,
         duration: Date.now() - startTime,
       };
     } catch (error) {
@@ -99,6 +111,7 @@ export class Orchestrator {
    */
   async processKeywordDownload(keyword, count, source, options = {}) {
     const startTime = Date.now();
+    const prioritizeQuality = options.prioritizeQuality !== false; // 默认优先高质量
     try {
       // 获取搜索源
@@ -117,11 +130,43 @@ export class Orchestrator {
         };
       }
-      // 下载图片
-      const { success, failed } = await this.fileManager.downloadMany(rawUrls.slice(0, searchCount), keyword);
+      // 先验证链接并按质量排序
+      let urlsToDownload = rawUrls.slice(0, searchCount);
+      if (prioritizeQuality) {
+        logger.info(`Validating and sorting ${urlsToDownload.length} URLs by quality...`);
+        const { valid } = await this.linkValidator.validateMany(urlsToDownload, {
+          fetchQuality: true,
+          sortByQuality: true,
+        });
+        // 使用排序后的URL列表
+        urlsToDownload = valid.map(v => v.url);
+        logger.info(`Quality sorted: ${urlsToDownload.length} valid URLs`);
+      }
+      // 下载图片（已按质量排序，高质量优先）
+      const { success, failed } = await this.fileManager.downloadMany(urlsToDownload, keyword);
       // 截取需要的数量
-      const resultDownloads = success.slice(0, count);
+      let resultDownloads = success.slice(0, count);
+      // 如果指定了目标尺寸，进行后处理
+      let processedCount = 0;
+      let processFailedCount = 0;
+      if (options.targetSize && resultDownloads.length > 0) {
+        const targetSize = this.imageProcessor.parseTargetSize(options.targetSize);
+        if (targetSize) {
+          logger.info(`Processing images to ${targetSize.width}x${targetSize.height}`);
+          const processResult = await this.imageProcessor.processMany(resultDownloads, {
+            width: targetSize.width,
+            height: targetSize.height,
+            fit: options.fit || 'cover',
+            position: options.position || 'center',
+          });
+          resultDownloads = processResult.success;
+          processedCount = processResult.success.length;
+          processFailedCount = processResult.failed.length;
+        }
+      }
       // 保存元数据
       let metadataPath = null;
@@ -136,10 +181,13 @@ export class Orchestrator {
         totalSearched: rawUrls.length,
         totalDownloaded: success.length,
         totalFailed: failed.length,
+        totalProcessed: processedCount,
+        totalProcessFailed: processFailedCount,
         files: resultDownloads,
         count: resultDownloads.length,
         saveDir: this.fileManager.getKeywordDir(keyword),
         metadataPath,
+        targetSize: options.targetSize || null,
         duration: Date.now() - startTime,
       };
     } catch (error) {
@@ -159,8 +207,8 @@ export class Orchestrator {
    * @returns {Promise<Object>} - 执行结果
    */
   async execute(params) {
-    const { query, mode, count = config.DEFAULT_COUNT, source = config.DEFAULT_SOURCE, size = 'all', safeSearch = 'moderate' } = params;
-    const options = { size, safeSearch };
+    const { query, mode, count = config.DEFAULT_COUNT, source = config.DEFAULT_SOURCE, size = 'all', safeSearch = 'moderate', aspect = 'all', targetSize = null, fit = 'cover', position = 'center' } = params;
+    const options = { size, safeSearch, aspect, targetSize, fit, position };
     const startTime = Date.now();
     const keywords = this.parseKeywords(query);
@@ -247,7 +295,11 @@ export class Orchestrator {
         lines.push(`- 搜索到: ${r.totalSearched} 张`);
         lines.push(`- 下载成功: ${r.totalDownloaded} 张`);
         lines.push(`- 下载失败: ${r.totalFailed} 张`);
-        lines.push(`- 保存: ${r.count} 张`);
+        if (r.targetSize) {
+          lines.push(`- 尺寸处理: ${r.totalProcessed} 成功, ${r.totalProcessFailed} 失败`);
+          lines.push(`- 目标尺寸: ${r.targetSize}`);
+        }
+        lines.push(`- 最终保存: ${r.count} 张`);
         lines.push(`- 存储目录: \`${r.saveDir}\``);
         lines.push(`- 耗时: ${(r.duration / 1000).toFixed(2)}秒`);
         lines.push('');