npm - smart-image-scraper-mcp - Versions diffs - 2.5.2 → 2.7.0 - Mend

smart-image-scraper-mcp 2.5.2 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +22 -4
package/package.json +1 -1
package/src/config/index.js +4 -4
package/src/index.backup.js +340 -0
package/src/index.js +77 -200
package/src/index.new.js +213 -0
package/src/index.simple.js +213 -0
package/src/infrastructure/cache.js +1 -0
package/src/infrastructure/gracefulShutdown.js +4 -0
package/src/infrastructure/httpClient.js +15 -5
package/src/services/fileManager.js +61 -18
package/src/services/linkValidator.js +15 -8
package/src/services/orchestrator.js +79 -32
package/src/services/orchestrator.simple.js +259 -0

package/src/index.js CHANGED Viewed

@@ -4,13 +4,10 @@
  * 全网智能图片抓取 MCP 服务器
  * 基于 Model Context Protocol 的图片搜索、验证、下载工具
  *
- * 生产级功能：
- * - 优雅关闭和资源清理
- * - 健康检查和状态监控
- * - 性能指标收集
- * - 速率限制
- * - 缓存机制
- * - 错误分类
+ * 设计原则（模仿主流 MCP 实现）：
+ * - 简洁：最小化基础设施代码
+ * - 无状态：每个请求独立处理
+ * - 可靠：简单的错误处理，避免资源泄漏
  */
 import { Server } from '@modelcontextprotocol/sdk/server/index.js';
@@ -22,16 +19,8 @@ import {
 import { createRequire } from 'module';
 import { Orchestrator } from './services/orchestrator.js';
-import logger from './infrastructure/logger.js';
-import { gracefulShutdown } from './infrastructure/gracefulShutdown.js';
-import { healthChecker } from './infrastructure/healthCheck.js';
-import { metrics } from './infrastructure/metrics.js';
-import { searchCache, validationCache } from './infrastructure/cache.js';
 import config from './config/index.js';
-// 请求计数器，用于定期清理
-let requestCount = 0;
 // 从 package.json 读取版本号
 const require = createRequire(import.meta.url);
 const packageJson = require('../package.json');
@@ -40,7 +29,7 @@ const packageJson = require('../package.json');
 const server = new Server(
   {
     name: 'smart-image-scraper',
-    version: packageJson.version, // 动态读取版本号
+    version: packageJson.version,
   },
   {
     capabilities: {
@@ -49,9 +38,6 @@ const server = new Server(
   }
 );
-// 创建编排器实例
-const orchestrator = new Orchestrator();
 // 定义 Tool Schema
 const SMART_SCRAPER_TOOL = {
   name: 'smart_scraper',
@@ -66,7 +52,8 @@ const SMART_SCRAPER_TOOL = {
 【参数选择指南】
 - 用户要"找/搜索/查找图片" → mode="link"
 - 用户要"下载/保存/获取图片" → mode="download"
-- 用户要"高清/大图/壁纸" → size="large" 或 "wallpaper"
+- 用户要"高清/大图/壁纸" → size="large" 或 "wallpaper"，quality="high"
+- 用户要"高质量/精选/优质" → quality="high"
 - 用户要"电脑壁纸/横屏/横向" → aspect="wide"
 - 用户要"手机壁纸/竖屏/竖向" → aspect="tall"
 - 用户要"统一尺寸/固定大小" → targetSize="1920x1080" 或预设名
@@ -83,7 +70,8 @@ const SMART_SCRAPER_TOOL = {
 2. 下载10张高清风景图: {"query":"风景","mode":"download","count":10,"size":"large"}
 3. 下载电脑壁纸并统一为1080p: {"query":"风景","mode":"download","count":10,"aspect":"wide","targetSize":"desktop_1080p"}
 4. 下载手机壁纸: {"query":"动漫","mode":"download","count":10,"aspect":"tall","targetSize":"mobile_hd"}
-5. 批量下载多类图片: {"query":"猫,狗,兔子","mode":"download","count":5}`,
+5. 批量下载多类图片: {"query":"猫,狗,兔子","mode":"download","count":5}
+6. 获取高质量图片: {"query":"风景","mode":"link","count":5,"size":"large","quality":"high"}`,
   inputSchema: {
     type: 'object',
     properties: {
@@ -135,218 +123,107 @@ const SMART_SCRAPER_TOOL = {
         description: '安全搜索。off=关闭；moderate=中等过滤(默认)；strict=严格过滤(儿童/家庭内容)',
         default: 'moderate',
       },
+      quality: {
+        type: 'string',
+        enum: ['fast', 'balanced', 'high'],
+        description: '质量模式。fast=快速返回(不验证,速度最快)；balanced=平衡模式(验证有效性,默认)；high=高质量优先(验证+按质量排序,速度较慢但质量最好)',
+        default: 'balanced',
+      },
+      minFileSize: {
+        type: 'string',
+        enum: ['any', '50kb', '100kb', '200kb', '500kb', '1mb'],
+        description: '最小文件大小过滤。文件越大通常质量越高。any=不限制；建议高清图片用100kb以上',
+        default: 'any',
+      },
     },
     required: ['query', 'mode'],
   },
 };
-// 注册工具列表处理器
-server.setRequestHandler(ListToolsRequestSchema, async () => {
-  return {
-    tools: [SMART_SCRAPER_TOOL],
-  };
-});
+// 注册工具列表（主流做法：简单返回）
+server.setRequestHandler(ListToolsRequestSchema, async () => ({
+  tools: [SMART_SCRAPER_TOOL],
+}));
-// 注册工具调用处理器
+// 注册工具调用（主流做法：每个请求创建新实例，避免状态污染）
 server.setRequestHandler(CallToolRequestSchema, async (request) => {
-  const requestStartTime = Date.now();
   const { name, arguments: args } = request.params;
-  logger.info(`[MCP] 收到请求: ${name}, query="${args?.query?.substring(0, 30)}..."`);
   if (name !== 'smart_scraper') {
     return {
-      content: [
-        {
-          type: 'text',
-          text: `未知工具: ${name}`,
-        },
-      ],
+      content: [{ type: 'text', text: `未知工具: ${name}` }],
       isError: true,
     };
   }
-  try {
-    // 参数校验
-    if (!args.query || typeof args.query !== 'string') {
-      return {
-        content: [
-          {
-            type: 'text',
-            text: '错误: 请提供有效的搜索关键词 (query)',
-          },
-        ],
-        isError: true,
-      };
-    }
-    if (!args.mode || !['link', 'download'].includes(args.mode)) {
-      return {
-        content: [
-          {
-            type: 'text',
-            text: "错误: 请指定有效的运行模式 (mode): 'link' 或 'download'",
-          },
-        ],
-        isError: true,
-      };
-    }
-    // 验证 count 参数
-    let count = parseInt(args.count, 10) || 10;
-    if (count < 1) count = 1;
-    if (count > 100) count = 100; // 限制最大数量
-    // 验证 query 长度
-    const query = args.query.trim();
-    if (query.length === 0) {
-      return {
-        content: [
-          {
-            type: 'text',
-            text: '错误: 搜索关键词不能为空',
-          },
-        ],
-        isError: true,
-      };
-    }
-    if (query.length > 500) {
-      return {
-        content: [
-          {
-            type: 'text',
-            text: '错误: 搜索关键词过长（最大500字符）',
-          },
-        ],
-        isError: true,
-      };
-    }
-    // 验证 source 参数
-    const source = args.source || 'bing';
-    if (!['bing', 'google'].includes(source)) {
-      return {
-        content: [
-          {
-            type: 'text',
-            text: "错误: 无效的搜索源，请使用 'bing' 或 'google'",
-          },
-        ],
-        isError: true,
-      };
-    }
-    // 检查是否正在关闭
-    if (gracefulShutdown.isShuttingDownNow()) {
-      return {
-        content: [{ type: 'text', text: '服务器正在关闭，无法处理新请求' }],
-        isError: true,
-      };
-    }
-    // 开始操作追踪
-    const operation = gracefulShutdown.startOperation(`scraper:${query.substring(0, 20)}`);
-    const startTime = Date.now();
-    try {
-      // 执行任务（requestQueue 已有超时机制，这里不再重复设置）
-      logger.info('Executing smart_scraper', { args });
-      const result = await orchestrator.execute({
-        query: query,
-        mode: args.mode,
-        count: count,
-        source: source,
-        size: args.size || 'all',
-        aspect: args.aspect || 'all',
-        targetSize: args.targetSize || null,
-        fit: args.fit || 'cover',
-        safeSearch: args.safeSearch || 'moderate',
-      });
-      // 记录成功指标
-      metrics.recordSearch(source, true, Date.now() - startTime, result.results?.length || 0);
-      // 格式化输出
-      const formattedResult = orchestrator.formatResult(result);
-      const totalTime = Date.now() - requestStartTime;
-      logger.info(`[MCP] 请求完成: ${totalTime}ms, requestId=${result.requestId}`);
-      // 每3个请求清理一次缓存，避免内存累积
-      requestCount++;
-      if (requestCount >= 3) {
-        searchCache.clear();
-        validationCache.clear();
-        requestCount = 0;
-        logger.info('[MCP] 缓存已清理');
-      }
+  // 参数验证（主流做法：快速失败）
+  if (!args?.query || typeof args.query !== 'string' || !args.query.trim()) {
+    return {
+      content: [{ type: 'text', text: '错误: 请提供有效的搜索关键词 (query)' }],
+      isError: true,
+    };
+  }
-      const response = {
-        content: [
-          {
-            type: 'text',
-            text: formattedResult,
-          },
-        ],
-      };
-      return response;
-    } catch (innerError) {
-      // 记录失败指标
-      metrics.recordSearch(source, false, Date.now() - startTime);
-      metrics.recordError(innerError);
-      logger.error(`[MCP] 内部错误: ${innerError.message}`);
-      throw innerError;
-    } finally {
-      // 结束操作追踪
-      operation.end();
-    }
-  } catch (error) {
-    const totalTime = Date.now() - requestStartTime;
-    logger.error(`[MCP] 请求失败: ${totalTime}ms, error=${error.message}`);
+  if (!args.mode || !['link', 'download'].includes(args.mode)) {
     return {
-      content: [
-        {
-          type: 'text',
-          text: `## ❌ 执行错误\n\n**错误信息**: ${error.message}\n\n请检查网络连接或稍后重试。`,
-        },
-      ],
+      content: [{ type: 'text', text: "错误: 请指定有效的运行模式 (mode): 'link' 或 'download'" }],
       isError: true,
     };
   }
-});
-// 注册关闭回调
-gracefulShutdown.onShutdown(async () => {
-  logger.info('Closing MCP server connection...');
   try {
-    await server.close();
+    // 主流做法：每个请求创建新的 Orchestrator 实例，确保无状态
+    const orchestrator = new Orchestrator();
+    // 规范化参数
+    const params = {
+      query: args.query.trim(),
+      mode: args.mode,
+      count: Math.min(Math.max(parseInt(args.count, 10) || 10, 1), 100),
+      source: ['bing', 'google'].includes(args.source) ? args.source : 'bing',
+      size: args.size || 'all',
+      aspect: args.aspect || 'all',
+      targetSize: args.targetSize || null,
+      fit: args.fit || 'cover',
+      safeSearch: args.safeSearch || 'moderate',
+      quality: ['fast', 'balanced', 'high'].includes(args.quality) ? args.quality : 'balanced',
+      minFileSize: ['any', '50kb', '100kb', '200kb', '500kb', '1mb'].includes(args.minFileSize) ? args.minFileSize : 'any',
+    };
+    // 执行任务
+    const result = await orchestrator.execute(params);
+    // 格式化输出
+    const formattedResult = orchestrator.formatResult(result);
+    return {
+      content: [{ type: 'text', text: formattedResult }],
+    };
   } catch (error) {
-    logger.warn('Error closing server', { error: error.message });
+    // 主流做法：简洁的错误处理，使用 stderr 输出日志
+    console.error(`[MCP Error] ${error.message}`);
+    return {
+      content: [{
+        type: 'text',
+        text: `## ❌ 执行错误\n\n**错误信息**: ${error.message}\n\n请检查网络连接或稍后重试。`
+      }],
+      isError: true,
+    };
   }
 });
-// 启动服务器
+// 启动服务器（主流做法：最简启动，使用 stderr 输出日志避免干扰 stdio 通信）
 async function main() {
-  logger.info('Starting Smart Image Scraper MCP Server...');
-  logger.info('Configuration', {
-    saveRoot: config.SAVE_ROOT,
-    maxKeywordConcurrency: config.MAX_KEYWORD_CONCURRENCY,
-    maxDownloadConcurrency: config.MAX_DOWNLOAD_CONCURRENCY,
-  });
-  // 执行初始健康检查
-  const healthResult = await healthChecker.runAllChecks();
-  logger.info('Initial health check', { status: healthResult.status });
+  console.error(`[MCP] Starting Smart Image Scraper v${packageJson.version}`);
+  console.error(`[MCP] Save root: ${config.SAVE_ROOT}`);
   const transport = new StdioServerTransport();
   await server.connect(transport);
-  logger.info('MCP Server is running and ready to accept requests');
+  console.error('[MCP] Server is running');
 }
 main().catch((error) => {
-  logger.error('Server startup error', { error: error.message });
+  console.error(`[MCP] Startup error: ${error.message}`);
   process.exit(1);
 });

package/src/index.new.js ADDED Viewed

@@ -0,0 +1,213 @@
+#!/usr/bin/env node
+/**
+ * 全网智能图片抓取 MCP 服务器
+ * 基于 Model Context Protocol 的图片搜索、验证、下载工具
+ *
+ * 设计原则（模仿主流 MCP 实现）：
+ * - 简洁：最小化基础设施代码
+ * - 无状态：每个请求独立处理
+ * - 可靠：简单的错误处理，避免资源泄漏
+ */
+import { Server } from '@modelcontextprotocol/sdk/server/index.js';
+import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
+import {
+  CallToolRequestSchema,
+  ListToolsRequestSchema,
+} from '@modelcontextprotocol/sdk/types.js';
+import { createRequire } from 'module';
+import { Orchestrator } from './services/orchestrator.js';
+import config from './config/index.js';
+// 从 package.json 读取版本号
+const require = createRequire(import.meta.url);
+const packageJson = require('../package.json');
+// 创建 MCP 服务器
+const server = new Server(
+  {
+    name: 'smart-image-scraper',
+    version: packageJson.version,
+  },
+  {
+    capabilities: {
+      tools: {},
+    },
+  }
+);
+// 定义 Tool Schema
+const SMART_SCRAPER_TOOL = {
+  name: 'smart_scraper',
+  description: `全网智能图片抓取工具 - 从 Bing/Google 搜索、验证、下载高质量图片。
+【核心功能】
+1. 搜索图片链接 (mode=link) - 返回验证过的图片URL列表
+2. 下载图片 (mode=download) - 下载到本地，自动按质量排序优先高清
+3. 尺寸统一 (targetSize) - 下载后自动裁剪/缩放到指定尺寸
+4. 宽高比过滤 (aspect) - 横向/竖向/正方形
+【参数选择指南】
+- 用户要"找/搜索/查找图片" → mode="link"
+- 用户要"下载/保存/获取图片" → mode="download"
+- 用户要"高清/大图/壁纸" → size="large" 或 "wallpaper"
+- 用户要"电脑壁纸/横屏/横向" → aspect="wide"
+- 用户要"手机壁纸/竖屏/竖向" → aspect="tall"
+- 用户要"统一尺寸/固定大小" → targetSize="1920x1080" 或预设名
+- 用户要"多种类型图片" → query="猫,狗,鸟"（英文逗号分隔）
+【预设尺寸名称】
+- 电脑壁纸: desktop_1080p(1920x1080), desktop_2k(2560x1440), desktop_4k(3840x2160)
+- 手机壁纸: mobile_hd(1080x1920), mobile_2k(1440x2560)
+- 正方形: square_1080(1080x1080), square_512(512x512)
+- 社交媒体: instagram(1080x1080), twitter(1200x675), facebook(1200x630)
+【调用示例】
+1. 搜索5张猫的图片: {"query":"可爱的猫","mode":"link","count":5}
+2. 下载10张高清风景图: {"query":"风景","mode":"download","count":10,"size":"large"}
+3. 下载电脑壁纸并统一为1080p: {"query":"风景","mode":"download","count":10,"aspect":"wide","targetSize":"desktop_1080p"}
+4. 下载手机壁纸: {"query":"动漫","mode":"download","count":10,"aspect":"tall","targetSize":"mobile_hd"}
+5. 批量下载多类图片: {"query":"猫,狗,兔子","mode":"download","count":5}`,
+  inputSchema: {
+    type: 'object',
+    properties: {
+      query: {
+        type: 'string',
+        description: '搜索关键词。批量搜索用英文逗号分隔，如 "猫,狗,鸟"。建议使用具体描述性词语如"可爱的橘猫"而非"猫"',
+      },
+      mode: {
+        type: 'string',
+        enum: ['link', 'download'],
+        description: "运行模式。link=仅返回验证过的图片URL列表（用户只需要链接时使用）；download=下载图片到本地文件系统（用户说下载/保存时使用）",
+      },
+      count: {
+        type: 'number',
+        description: '每个关键词获取的图片数量。范围1-100，推荐1-20。用户说"几张"用5-10，说"很多"用20-30',
+        default: 10,
+      },
+      source: {
+        type: 'string',
+        enum: ['bing', 'google'],
+        description: '搜索引擎。bing更稳定推荐优先使用，google结果可能更丰富但可能被限制',
+        default: 'bing',
+      },
+      size: {
+        type: 'string',
+        enum: ['all', 'small', 'medium', 'large', 'wallpaper'],
+        description: '图片尺寸。all=不限；small=小图/图标；medium=中图；large=大图/高清；wallpaper=壁纸级别(1080p+)',
+        default: 'all',
+      },
+      aspect: {
+        type: 'string',
+        enum: ['all', 'wide', 'tall', 'square'],
+        description: '图片宽高比。all=不限；wide=横向/宽屏(电脑壁纸)；tall=纵向/竖屏(手机壁纸)；square=正方形',
+        default: 'all',
+      },
+      targetSize: {
+        type: 'string',
+        description: '目标尺寸，下载后统一裁剪/缩放到此尺寸。格式: "宽x高"(如"1920x1080")或预设名(desktop_1080p/desktop_2k/desktop_4k/mobile_hd/mobile_2k/square_1080/instagram/twitter/facebook)',
+      },
+      fit: {
+        type: 'string',
+        enum: ['cover', 'contain', 'fill'],
+        description: '尺寸处理时的适应模式。cover=裁剪填充(默认,不留白)；contain=包含留白；fill=拉伸填充',
+        default: 'cover',
+      },
+      safeSearch: {
+        type: 'string',
+        enum: ['off', 'moderate', 'strict'],
+        description: '安全搜索。off=关闭；moderate=中等过滤(默认)；strict=严格过滤(儿童/家庭内容)',
+        default: 'moderate',
+      },
+    },
+    required: ['query', 'mode'],
+  },
+};
+// 注册工具列表（主流做法：简单返回）
+server.setRequestHandler(ListToolsRequestSchema, async () => ({
+  tools: [SMART_SCRAPER_TOOL],
+}));
+// 注册工具调用（主流做法：每个请求创建新实例，避免状态污染）
+server.setRequestHandler(CallToolRequestSchema, async (request) => {
+  const { name, arguments: args } = request.params;
+  if (name !== 'smart_scraper') {
+    return {
+      content: [{ type: 'text', text: `未知工具: ${name}` }],
+      isError: true,
+    };
+  }
+  // 参数验证（主流做法：快速失败）
+  if (!args?.query || typeof args.query !== 'string' || !args.query.trim()) {
+    return {
+      content: [{ type: 'text', text: '错误: 请提供有效的搜索关键词 (query)' }],
+      isError: true,
+    };
+  }
+  if (!args.mode || !['link', 'download'].includes(args.mode)) {
+    return {
+      content: [{ type: 'text', text: "错误: 请指定有效的运行模式 (mode): 'link' 或 'download'" }],
+      isError: true,
+    };
+  }
+  try {
+    // 主流做法：每个请求创建新的 Orchestrator 实例，确保无状态
+    const orchestrator = new Orchestrator();
+    // 规范化参数
+    const params = {
+      query: args.query.trim(),
+      mode: args.mode,
+      count: Math.min(Math.max(parseInt(args.count, 10) || 10, 1), 100),
+      source: ['bing', 'google'].includes(args.source) ? args.source : 'bing',
+      size: args.size || 'all',
+      aspect: args.aspect || 'all',
+      targetSize: args.targetSize || null,
+      fit: args.fit || 'cover',
+      safeSearch: args.safeSearch || 'moderate',
+    };
+    // 执行任务
+    const result = await orchestrator.execute(params);
+    // 格式化输出
+    const formattedResult = orchestrator.formatResult(result);
+    return {
+      content: [{ type: 'text', text: formattedResult }],
+    };
+  } catch (error) {
+    // 主流做法：简洁的错误处理，使用 stderr 输出日志
+    console.error(`[MCP Error] ${error.message}`);
+    return {
+      content: [{
+        type: 'text',
+        text: `## ❌ 执行错误\n\n**错误信息**: ${error.message}\n\n请检查网络连接或稍后重试。`
+      }],
+      isError: true,
+    };
+  }
+});
+// 启动服务器（主流做法：最简启动，使用 stderr 输出日志避免干扰 stdio 通信）
+async function main() {
+  console.error(`[MCP] Starting Smart Image Scraper v${packageJson.version}`);
+  console.error(`[MCP] Save root: ${config.SAVE_ROOT}`);
+  const transport = new StdioServerTransport();
+  await server.connect(transport);
+  console.error('[MCP] Server is running');
+}
+main().catch((error) => {
+  console.error(`[MCP] Startup error: ${error.message}`);
+  process.exit(1);
+});