smart-image-scraper-mcp 2.12.4 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smart-image-scraper-mcp",
3
- "version": "2.12.4",
3
+ "version": "2.13.0",
4
4
  "description": "全网智能图片抓取 MCP 服务器 - 支持 Bing/Google 图片搜索、验证和下载",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
package/src/index.js CHANGED
@@ -46,12 +46,16 @@ const SMART_SCRAPER_TOOL = {
46
46
  【核心功能】
47
47
  1. 搜索图片链接 (mode=link) - 返回验证过的图片URL列表
48
48
  2. 下载图片 (mode=download) - 下载到本地,自动按质量排序优先高清
49
- 3. 尺寸统一 (targetSize) - 下载后自动裁剪/缩放到指定尺寸
50
- 4. 宽高比过滤 (aspect) - 横向/竖向/正方形
49
+ 3. 搜索+下载 (mode=both) - 同时返回链接列表和下载文件,适合需要链接备份的场景
50
+ 4. 尺寸统一 (targetSize) - 下载后自动裁剪/缩放到指定尺寸
51
+ 5. 宽高比过滤 (aspect) - 横向/竖向/正方形
52
+ 6. 自定义下载路径 (savePath) - 指定图片保存位置,不填则保存到MCP服务器项目下images目录
51
53
 
52
54
  【参数选择指南】
53
55
  - 用户要"找/搜索/查找图片" → mode="link"
54
56
  - 用户要"下载/保存/获取图片" → mode="download"
57
+ - 用户要"搜索并下载/链接和下载都要" → mode="both"
58
+ - 用户要"保存到指定目录" → savePath="D:/my/path"
55
59
  - 用户要"高清/大图/壁纸" → size="large" 或 "wallpaper",quality="high"
56
60
  - 用户要"高质量/精选/优质" → quality="high"
57
61
  - 用户要"电脑壁纸/横屏/横向" → aspect="wide"
@@ -75,7 +79,9 @@ const SMART_SCRAPER_TOOL = {
75
79
  3. 下载电脑壁纸并统一为1080p: {"query":"风景","mode":"download","count":10,"aspect":"wide","targetSize":"desktop_1080p"}
76
80
  4. 下载手机壁纸: {"query":"动漫","mode":"download","count":10,"aspect":"tall","targetSize":"mobile_hd"}
77
81
  5. 批量下载多类图片: {"query":"猫,狗,兔子","mode":"download","count":5}
78
- 6. 获取高质量图片: {"query":"风景","mode":"link","count":5,"size":"large","quality":"high"}`,
82
+ 6. 获取高质量图片: {"query":"风景","mode":"link","count":5,"size":"large","quality":"high"}
83
+ 7. 搜索并下载到指定目录: {"query":"风景","mode":"both","count":5,"savePath":"D:/photos"}
84
+ 8. 下载到自定义目录: {"query":"猫","mode":"download","count":5,"savePath":"D:/my/images"}`,
79
85
  inputSchema: {
80
86
  type: 'object',
81
87
  properties: {
@@ -85,8 +91,8 @@ const SMART_SCRAPER_TOOL = {
85
91
  },
86
92
  mode: {
87
93
  type: 'string',
88
- enum: ['link', 'download'],
89
- description: "运行模式。link=仅返回验证过的图片URL列表(用户只需要链接时使用);download=下载图片到本地文件系统(用户说下载/保存时使用)",
94
+ enum: ['link', 'download', 'both'],
95
+ description: "运行模式。link=仅返回验证过的图片URL列表;download=下载图片到本地;both=同时返回链接和下载文件(用户需要链接备份时使用)",
90
96
  },
91
97
  count: {
92
98
  type: 'number',
@@ -139,6 +145,15 @@ const SMART_SCRAPER_TOOL = {
139
145
  description: '最小文件大小过滤。文件越大通常质量越高。any=不限制;建议高清图片用100kb以上',
140
146
  default: 'any',
141
147
  },
148
+ savePath: {
149
+ type: 'string',
150
+ description: '自定义图片保存路径(绝对路径)。仅对download和both模式有效。不填则默认保存到MCP服务器项目下images目录。示例: "D:/my/photos" 或 "C:/Users/xxx/Pictures"',
151
+ },
152
+ filterHotlink: {
153
+ type: 'boolean',
154
+ description: '是否过滤防盗链图片。默认true(开启过滤)。设为false可获取更多结果但部分链接可能无法直接在浏览器打开',
155
+ default: true,
156
+ },
142
157
  },
143
158
  required: ['query', 'mode'],
144
159
  },
@@ -168,9 +183,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
168
183
  };
169
184
  }
170
185
 
171
- if (!args.mode || !['link', 'download'].includes(args.mode)) {
186
+ if (!args.mode || !['link', 'download', 'both'].includes(args.mode)) {
172
187
  return {
173
- content: [{ type: 'text', text: "错误: 请指定有效的运行模式 (mode): 'link' 或 'download'" }],
188
+ content: [{ type: 'text', text: "错误: 请指定有效的运行模式 (mode): 'link', 'download' 或 'both'" }],
174
189
  isError: true,
175
190
  };
176
191
  }
@@ -196,6 +211,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
196
211
  safeSearch: args.safeSearch || 'moderate',
197
212
  quality: ['fast', 'balanced', 'high'].includes(args.quality) ? args.quality : 'balanced',
198
213
  minFileSize: ['any', '50kb', '100kb', '200kb', '500kb', '1mb'].includes(args.minFileSize) ? args.minFileSize : 'any',
214
+ savePath: args.savePath && typeof args.savePath === 'string' ? args.savePath.trim() : null,
215
+ filterHotlink: args.filterHotlink !== false, // 默认 true
199
216
  };
200
217
 
201
218
  // 使用 Promise.race 确保一定会在超时内返回
@@ -15,11 +15,21 @@ import config from '../config/index.js';
15
15
  const globalDownloadLimit = pLimit(config.MAX_DOWNLOAD_CONCURRENCY || 10);
16
16
 
17
17
  export class FileManager {
18
- constructor() {
19
- this.saveRoot = config.SAVE_ROOT;
18
+ constructor(options = {}) {
19
+ this.saveRoot = options.saveRoot || config.SAVE_ROOT;
20
20
  this.limit = globalDownloadLimit; // 使用全局共享限制器
21
21
  }
22
22
 
23
+ /**
24
+ * 设置自定义保存根目录
25
+ * @param {string} savePath - 绝对路径
26
+ */
27
+ setSaveRoot(savePath) {
28
+ if (savePath && typeof savePath === 'string') {
29
+ this.saveRoot = savePath;
30
+ }
31
+ }
32
+
23
33
  /**
24
34
  * 清理文件名,移除非法字符
25
35
  * @param {string} filename - 原始文件名
@@ -174,11 +174,16 @@ export class Orchestrator {
174
174
  // 检查是否已中止
175
175
  if (signal?.aborted) throw new Error('操作已取消');
176
176
 
177
- // link 模式:过滤掉有防盗链保护的 URL(用户无法直接在浏览器中打开)
178
- const filteredRawUrls = rawUrls.filter(url => !this.linkValidator.isHotlinkProtected(url));
179
- const hotlinkCount = rawUrls.length - filteredRawUrls.length;
180
- if (hotlinkCount > 0) {
181
- logger.warn(`[HOTLINK] "${keyword}" - filtered ${hotlinkCount} hotlink-protected URLs`);
177
+ // 根据 filterHotlink 参数决定是否过滤防盗链 URL
178
+ const shouldFilterHotlink = options.filterHotlink !== false;
179
+ let filteredRawUrls = rawUrls;
180
+ let hotlinkCount = 0;
181
+ if (shouldFilterHotlink) {
182
+ filteredRawUrls = rawUrls.filter(url => !this.linkValidator.isHotlinkProtected(url));
183
+ hotlinkCount = rawUrls.length - filteredRawUrls.length;
184
+ if (hotlinkCount > 0) {
185
+ logger.warn(`[HOTLINK] "${keyword}" - filtered ${hotlinkCount} hotlink-protected URLs`);
186
+ }
182
187
  }
183
188
 
184
189
  if (fastMode) {
@@ -394,6 +399,189 @@ export class Orchestrator {
394
399
  }
395
400
  }
396
401
 
402
+ /**
403
+ * 处理单个关键词 - Both 模式(同时返回链接和下载文件)
404
+ * @param {string} keyword - 关键词
405
+ * @param {number} count - 需要的图片数量
406
+ * @param {string} source - 搜索源
407
+ * @param {Object} options - 搜索选项
408
+ * @returns {Promise<Object>} - 处理结果
409
+ */
410
+ async processKeywordBoth(keyword, count, source, options = {}) {
411
+ const startTime = Date.now();
412
+ const KEYWORD_TIMEOUT = 45000; // both 模式45秒超时(包含下载)
413
+
414
+ // 检查是否已被中止
415
+ if (this.abortController?.signal?.aborted) {
416
+ return { keyword, success: false, error: '操作已取消', duration: 0 };
417
+ }
418
+
419
+ let timeoutId;
420
+ const timeoutPromise = new Promise((_, reject) => {
421
+ timeoutId = setTimeout(() => reject(new Error(`关键词 "${keyword}" 处理超时(45秒)`)), KEYWORD_TIMEOUT);
422
+ });
423
+
424
+ try {
425
+ const result = await Promise.race([
426
+ this._processKeywordBothInternal(keyword, count, source, options, startTime),
427
+ timeoutPromise
428
+ ]);
429
+ clearTimeout(timeoutId);
430
+ return result;
431
+ } catch (error) {
432
+ clearTimeout(timeoutId);
433
+ logger.error(`Process keyword both error: ${keyword}`, { error: error.message });
434
+ return {
435
+ keyword,
436
+ success: false,
437
+ error: error.message,
438
+ duration: Date.now() - startTime,
439
+ };
440
+ }
441
+ }
442
+
443
+ async _processKeywordBothInternal(keyword, count, source, options, startTime) {
444
+ const qualityMode = options.quality || 'balanced';
445
+ const fastMode = qualityMode === 'fast';
446
+ const prioritizeQuality = qualityMode === 'high';
447
+ const minFileSize = this._parseMinFileSize(options.minFileSize);
448
+ const signal = this.abortController?.signal;
449
+ const shouldFilterHotlink = options.filterHotlink !== false;
450
+
451
+ try {
452
+ const scraper = getScraper(source);
453
+ const searchCount = Math.max(count * 3, 10);
454
+
455
+ // 搜索
456
+ const cachedUrls = searchCache.getSearchResult(keyword, source, options);
457
+ let rawUrls;
458
+
459
+ if (cachedUrls && cachedUrls.length >= count) {
460
+ logger.info(`[CACHE] "${keyword}" - ${cachedUrls.length} URLs`);
461
+ rawUrls = cachedUrls;
462
+ metrics.recordCacheHit();
463
+ } else {
464
+ if (signal?.aborted) throw new Error('操作已取消');
465
+ logger.info(`[SEARCH] "${keyword}" (target: ${searchCount})...`);
466
+ rawUrls = await scraper.search(keyword, searchCount, options);
467
+ if (rawUrls.length > 0) {
468
+ searchCache.setSearchResult(keyword, source, options, rawUrls);
469
+ }
470
+ metrics.recordCacheMiss();
471
+ }
472
+
473
+ if (rawUrls.length === 0) {
474
+ return {
475
+ keyword,
476
+ success: false,
477
+ error: '未找到任何图片',
478
+ duration: Date.now() - startTime,
479
+ };
480
+ }
481
+
482
+ if (signal?.aborted) throw new Error('操作已取消');
483
+
484
+ // 防盗链过滤(用于链接列表展示)
485
+ let linkUrls = rawUrls;
486
+ let hotlinkCount = 0;
487
+ if (shouldFilterHotlink) {
488
+ linkUrls = rawUrls.filter(url => !this.linkValidator.isHotlinkProtected(url));
489
+ hotlinkCount = rawUrls.length - linkUrls.length;
490
+ if (hotlinkCount > 0) {
491
+ logger.warn(`[HOTLINK] "${keyword}" - filtered ${hotlinkCount} hotlink-protected URLs`);
492
+ }
493
+ }
494
+
495
+ // 验证链接
496
+ let validatedUrls;
497
+ let qualityModeLabel;
498
+
499
+ if (fastMode) {
500
+ validatedUrls = linkUrls.slice(0, count);
501
+ qualityModeLabel = '快速模式(跳过验证)';
502
+ } else {
503
+ const maxValidate = Math.min(linkUrls.length, count * 2 + 5);
504
+ const urlsToValidate = linkUrls.slice(0, maxValidate);
505
+ const { valid } = await this.linkValidator.validateMany(urlsToValidate, {
506
+ fetchQuality: prioritizeQuality,
507
+ sortByQuality: prioritizeQuality,
508
+ minFileSize: minFileSize,
509
+ signal,
510
+ });
511
+
512
+ let filteredValid = valid;
513
+ if (minFileSize > 0) {
514
+ filteredValid = valid.filter(v => {
515
+ const size = v.quality?.contentLength || 0;
516
+ return size >= minFileSize || size === 0;
517
+ });
518
+ }
519
+
520
+ validatedUrls = filteredValid.slice(0, count).map(v => v.url);
521
+ qualityModeLabel = prioritizeQuality ? '高质量模式(验证+排序)' : '平衡模式(验证)';
522
+ }
523
+
524
+ if (signal?.aborted) throw new Error('操作已取消');
525
+
526
+ // 下载图片(使用所有原始 URL,不受防盗链过滤影响,因为下载时会加 Referer)
527
+ const downloadUrls = fastMode ? rawUrls.slice(0, count * 2 + 5) : validatedUrls;
528
+ const { success, failed } = await this.fileManager.downloadMany(downloadUrls, keyword);
529
+
530
+ let resultDownloads = success.slice(0, count);
531
+
532
+ // 如果指定了目标尺寸,进行后处理
533
+ let processedCount = 0;
534
+ let processFailedCount = 0;
535
+ if (options.targetSize && resultDownloads.length > 0) {
536
+ const targetSize = this.imageProcessor.parseTargetSize(options.targetSize);
537
+ if (targetSize) {
538
+ logger.info(`Processing images to ${targetSize.width}x${targetSize.height}`);
539
+ const processResult = await this.imageProcessor.processMany(resultDownloads, {
540
+ width: targetSize.width,
541
+ height: targetSize.height,
542
+ fit: options.fit || 'cover',
543
+ position: options.position || 'center',
544
+ });
545
+ resultDownloads = processResult.success;
546
+ processedCount = processResult.success.length;
547
+ processFailedCount = processResult.failed.length;
548
+ }
549
+ }
550
+
551
+ // 保存元数据
552
+ let metadataPath = null;
553
+ if (resultDownloads.length > 0) {
554
+ metadataPath = await this.fileManager.saveMetadata(keyword, resultDownloads);
555
+ }
556
+
557
+ return {
558
+ keyword,
559
+ success: true,
560
+ mode: 'both',
561
+ totalSearched: rawUrls.length,
562
+ hotlinkFiltered: hotlinkCount,
563
+ // 链接部分
564
+ urls: validatedUrls,
565
+ urlCount: validatedUrls.length,
566
+ qualityMode,
567
+ qualityModeLabel,
568
+ // 下载部分
569
+ totalDownloaded: success.length,
570
+ totalFailed: failed.length,
571
+ totalProcessed: processedCount,
572
+ totalProcessFailed: processFailedCount,
573
+ files: resultDownloads,
574
+ count: resultDownloads.length,
575
+ saveDir: this.fileManager.getKeywordDir(keyword),
576
+ metadataPath,
577
+ targetSize: options.targetSize || null,
578
+ duration: Date.now() - startTime,
579
+ };
580
+ } catch (error) {
581
+ throw error;
582
+ }
583
+ }
584
+
397
585
  /**
398
586
  * 执行任务 - 直接执行(不使用队列,避免 MCP 兼容性问题)
399
587
  * @param {Object} params - 任务参数
@@ -468,8 +656,14 @@ export class Orchestrator {
468
656
  * 内部执行逻辑
469
657
  */
470
658
  async _executeInternal(params) {
471
- const { query, mode, count = config.DEFAULT_COUNT, source = config.DEFAULT_SOURCE, size = 'all', safeSearch = 'moderate', aspect = 'all', targetSize = null, fit = 'cover', position = 'center', quality = 'balanced', minFileSize = 'any' } = params;
472
- const options = { size, safeSearch, aspect, targetSize, fit, position, quality, minFileSize };
659
+ const { query, mode, count = config.DEFAULT_COUNT, source = config.DEFAULT_SOURCE, size = 'all', safeSearch = 'moderate', aspect = 'all', targetSize = null, fit = 'cover', position = 'center', quality = 'balanced', minFileSize = 'any', savePath = null, filterHotlink = true } = params;
660
+ const options = { size, safeSearch, aspect, targetSize, fit, position, quality, minFileSize, filterHotlink };
661
+
662
+ // 如果指定了自定义保存路径,更新 fileManager
663
+ if (savePath && (mode === 'download' || mode === 'both')) {
664
+ this.fileManager.setSaveRoot(savePath);
665
+ logger.info(`[Orchestrator] Custom save path: ${savePath}`);
666
+ }
473
667
 
474
668
  const startTime = Date.now();
475
669
  let keywords = this.parseKeywords(query);
@@ -491,9 +685,14 @@ export class Orchestrator {
491
685
  logger.info(`Starting task: mode=${mode}, keywords=${keywords.join(', ')}, count=${count}, source=${source}`);
492
686
 
493
687
  // 根据模式选择处理函数
494
- const processFunc = mode === 'link'
495
- ? this.processKeywordLink.bind(this)
496
- : this.processKeywordDownload.bind(this);
688
+ let processFunc;
689
+ if (mode === 'link') {
690
+ processFunc = this.processKeywordLink.bind(this);
691
+ } else if (mode === 'both') {
692
+ processFunc = this.processKeywordBoth.bind(this);
693
+ } else {
694
+ processFunc = this.processKeywordDownload.bind(this);
695
+ }
497
696
 
498
697
  // 串行处理关键词,避免阻塞事件循环
499
698
  const results = [];
@@ -552,7 +751,8 @@ export class Orchestrator {
552
751
  const lines = [];
553
752
  lines.push(`# 📷 图片抓取报告`);
554
753
  lines.push('');
555
- lines.push(`- **模式**: ${result.mode === 'link' ? '链接提取' : '本地下载'}`);
754
+ const modeLabels = { link: '链接提取', download: '本地下载', both: '链接提取+本地下载' };
755
+ lines.push(`- **模式**: ${modeLabels[result.mode] || result.mode}`);
556
756
  lines.push(`- **搜索源**: ${result.source}`);
557
757
  lines.push(`- **关键词数量**: ${result.totalKeywords}`);
558
758
  lines.push(`- **成功**: ${result.successCount} | **失败**: ${result.failedCount}`);
@@ -583,7 +783,37 @@ export class Orchestrator {
583
783
  (r.urls || []).forEach((url, i) => {
584
784
  lines.push(`${i + 1}. ${url}`);
585
785
  });
786
+ } else if (r.mode === 'both') {
787
+ // both 模式:同时显示链接和下载文件
788
+ lines.push(`- 搜索到: ${r.totalSearched || 0} 张`);
789
+ if (r.hotlinkFiltered > 0) {
790
+ lines.push(`- 防盗链过滤: ${r.hotlinkFiltered} 张`);
791
+ }
792
+ lines.push(`- 质量模式: ${r.qualityModeLabel || '快速模式'}`);
793
+ lines.push(`- 有效链接: ${r.urlCount || 0} 张`);
794
+ lines.push(`- 下载成功: ${r.totalDownloaded} 张`);
795
+ lines.push(`- 下载失败: ${r.totalFailed} 张`);
796
+ if (r.targetSize) {
797
+ lines.push(`- 尺寸处理: ${r.totalProcessed} 成功, ${r.totalProcessFailed} 失败`);
798
+ lines.push(`- 目标尺寸: ${r.targetSize}`);
799
+ }
800
+ lines.push(`- 最终保存: ${r.count} 张`);
801
+ lines.push(`- 存储目录: \`${r.saveDir}\``);
802
+ lines.push(`- 耗时: ${(r.duration / 1000).toFixed(2)}秒`);
803
+ lines.push('');
804
+ lines.push('### 有效链接');
805
+ lines.push('');
806
+ (r.urls || []).forEach((url, i) => {
807
+ lines.push(`${i + 1}. ${url}`);
808
+ });
809
+ lines.push('');
810
+ lines.push('### 已下载文件');
811
+ lines.push('');
812
+ (r.files || []).forEach((file, i) => {
813
+ lines.push(`${i + 1}. \`${file.path}\``);
814
+ });
586
815
  } else {
816
+ // download 模式
587
817
  lines.push(`- 搜索到: ${r.totalSearched} 张`);
588
818
  lines.push(`- 下载成功: ${r.totalDownloaded} 张`);
589
819
  lines.push(`- 下载失败: ${r.totalFailed} 张`);