smart-image-scraper-mcp 2.6.0 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,7 +7,7 @@
7
7
 
8
8
  ## ✨ 核心特性
9
9
 
10
- ### 🚀 高性能架构 (v2.4.0)
10
+ ### 🚀 高性能架构 (v2.7.0)
11
11
  - **多请求并行**:同时处理 5 个 MCP 请求
12
12
  - **并行翻页搜索**:同时获取多页结果,速度提升 5x
13
13
  - **HTTP 连接池**:Keep-Alive 复用 TCP 连接
@@ -23,7 +23,8 @@
23
23
  - **尺寸过滤**:small/medium/large/wallpaper
24
24
  - **宽高比过滤**:wide(横屏)/tall(竖屏)/square(正方形)
25
25
  - **尺寸统一**:下载后自动裁剪/缩放到指定尺寸
26
- - **质量优先**:自动按图片质量排序,高清优先
26
+ - **质量控制**:三种质量模式(fast/balanced/high)
27
+ - **文件大小过滤**:按最小文件大小筛选高质量图片
27
28
 
28
29
  ## 安装
29
30
 
@@ -100,6 +101,9 @@ npm install
100
101
  | `count` | number | ❌ | 每个关键词获取的图片数量,默认 10 |
101
102
  | `source` | string | ❌ | 搜索源,支持 `bing` 和 `google`,默认 `bing` |
102
103
  | `size` | string | ❌ | 图片尺寸:`all`, `small`, `medium`, `large`, `wallpaper`,默认 `all` |
104
+ | `aspect` | string | ❌ | 宽高比:`all`, `wide`, `tall`, `square`,默认 `all` |
105
+ | `quality` | string | ❌ | 质量模式:`fast`, `balanced`, `high`,默认 `balanced` |
106
+ | `minFileSize` | string | ❌ | 最小文件大小:`any`, `50kb`, `100kb`, `200kb`, `500kb`, `1mb`,默认 `any` |
103
107
  | `safeSearch` | string | ❌ | 安全搜索:`off`, `moderate`, `strict`,默认 `moderate` |
104
108
 
105
109
  **使用示例**:
@@ -128,6 +132,16 @@ npm install
128
132
  "size": "wallpaper",
129
133
  "safeSearch": "strict"
130
134
  }
135
+
136
+ // 高质量模式 - 获取验证过的高清图片
137
+ {
138
+ "query": "风景",
139
+ "mode": "link",
140
+ "count": 10,
141
+ "size": "large",
142
+ "quality": "high",
143
+ "minFileSize": "100kb"
144
+ }
131
145
  ```
132
146
 
133
147
  ## 架构设计
@@ -267,6 +281,7 @@ npm test
267
281
 
268
282
  | 版本 | 日期 | 主要更新 |
269
283
  |------|------|----------|
284
+ | 2.7.0 | 2026-02 | 新增质量控制:quality 参数(fast/balanced/high)、minFileSize 文件大小过滤 |
270
285
  | 2.4.0 | 2026-02 | 修复所有分析问题:超时策略统一、缓存键一致、版本号动态读取 |
271
286
  | 2.3.0 | 2026-02 | 性能优化:HTTP 连接池、并行翻页搜索、内存泄漏修复 |
272
287
  | 2.2.0 | 2026-02 | 请求队列管理系统、自动资源释放 |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smart-image-scraper-mcp",
3
- "version": "2.6.0",
3
+ "version": "2.7.0",
4
4
  "description": "全网智能图片抓取 MCP 服务器 - 支持 Bing/Google 图片搜索、验证和下载",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
package/src/index.js CHANGED
@@ -52,7 +52,8 @@ const SMART_SCRAPER_TOOL = {
52
52
  【参数选择指南】
53
53
  - 用户要"找/搜索/查找图片" → mode="link"
54
54
  - 用户要"下载/保存/获取图片" → mode="download"
55
- - 用户要"高清/大图/壁纸" → size="large" 或 "wallpaper"
55
+ - 用户要"高清/大图/壁纸" → size="large" 或 "wallpaper",quality="high"
56
+ - 用户要"高质量/精选/优质" → quality="high"
56
57
  - 用户要"电脑壁纸/横屏/横向" → aspect="wide"
57
58
  - 用户要"手机壁纸/竖屏/竖向" → aspect="tall"
58
59
  - 用户要"统一尺寸/固定大小" → targetSize="1920x1080" 或预设名
@@ -69,7 +70,8 @@ const SMART_SCRAPER_TOOL = {
69
70
  2. 下载10张高清风景图: {"query":"风景","mode":"download","count":10,"size":"large"}
70
71
  3. 下载电脑壁纸并统一为1080p: {"query":"风景","mode":"download","count":10,"aspect":"wide","targetSize":"desktop_1080p"}
71
72
  4. 下载手机壁纸: {"query":"动漫","mode":"download","count":10,"aspect":"tall","targetSize":"mobile_hd"}
72
- 5. 批量下载多类图片: {"query":"猫,狗,兔子","mode":"download","count":5}`,
73
+ 5. 批量下载多类图片: {"query":"猫,狗,兔子","mode":"download","count":5}
74
+ 6. 获取高质量图片: {"query":"风景","mode":"link","count":5,"size":"large","quality":"high"}`,
73
75
  inputSchema: {
74
76
  type: 'object',
75
77
  properties: {
@@ -121,6 +123,18 @@ const SMART_SCRAPER_TOOL = {
121
123
  description: '安全搜索。off=关闭;moderate=中等过滤(默认);strict=严格过滤(儿童/家庭内容)',
122
124
  default: 'moderate',
123
125
  },
126
+ quality: {
127
+ type: 'string',
128
+ enum: ['fast', 'balanced', 'high'],
129
+ description: '质量模式。fast=快速返回(不验证,速度最快);balanced=平衡模式(验证有效性,默认);high=高质量优先(验证+按质量排序,速度较慢但质量最好)',
130
+ default: 'balanced',
131
+ },
132
+ minFileSize: {
133
+ type: 'string',
134
+ enum: ['any', '50kb', '100kb', '200kb', '500kb', '1mb'],
135
+ description: '最小文件大小过滤。文件越大通常质量越高。any=不限制;建议高清图片用100kb以上',
136
+ default: 'any',
137
+ },
124
138
  },
125
139
  required: ['query', 'mode'],
126
140
  },
@@ -172,6 +186,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
172
186
  targetSize: args.targetSize || null,
173
187
  fit: args.fit || 'cover',
174
188
  safeSearch: args.safeSearch || 'moderate',
189
+ quality: ['fast', 'balanced', 'high'].includes(args.quality) ? args.quality : 'balanced',
190
+ minFileSize: ['any', '50kb', '100kb', '200kb', '500kb', '1mb'].includes(args.minFileSize) ? args.minFileSize : 'any',
175
191
  };
176
192
 
177
193
  // 执行任务
@@ -50,6 +50,25 @@ export class Orchestrator {
50
50
  .filter(k => k.length > 0);
51
51
  }
52
52
 
53
+ /**
54
+ * 解析最小文件大小参数
55
+ * @param {string} minFileSize - 最小文件大小字符串
56
+ * @returns {number} - 字节数
57
+ */
58
+ _parseMinFileSize(minFileSize) {
59
+ if (!minFileSize || minFileSize === 'any') return 0;
60
+
61
+ const sizeMap = {
62
+ '50kb': 50 * 1024,
63
+ '100kb': 100 * 1024,
64
+ '200kb': 200 * 1024,
65
+ '500kb': 500 * 1024,
66
+ '1mb': 1024 * 1024,
67
+ };
68
+
69
+ return sizeMap[minFileSize.toLowerCase()] || 0;
70
+ }
71
+
53
72
  /**
54
73
  * 处理单个关键词 - Link 模式
55
74
  * @param {string} keyword - 关键词
@@ -60,8 +79,11 @@ export class Orchestrator {
60
79
  */
61
80
  async processKeywordLink(keyword, count, source, options = {}) {
62
81
  const startTime = Date.now();
63
- const fastMode = options.fastMode !== false;
64
- const prioritizeQuality = options.prioritizeQuality === true;
82
+ // 根据 quality 参数决定模式
83
+ const qualityMode = options.quality || 'balanced';
84
+ const fastMode = qualityMode === 'fast';
85
+ const prioritizeQuality = qualityMode === 'high';
86
+ const minFileSize = this._parseMinFileSize(options.minFileSize);
65
87
 
66
88
  try {
67
89
  const scraper = getScraper(source);
@@ -94,21 +116,36 @@ export class Orchestrator {
94
116
  };
95
117
  }
96
118
 
97
- // 快速模式:直接返回搜索结果(不验证)
119
+ // 根据 quality 模式处理
98
120
  let resultUrls;
99
- if (fastMode && !prioritizeQuality) {
100
- // 快速模式:直接使用搜索结果
121
+ let qualityModeLabel;
122
+
123
+ if (fastMode) {
124
+ // fast 模式:直接使用搜索结果,不验证
101
125
  resultUrls = rawUrls.slice(0, count);
126
+ qualityModeLabel = '快速模式(跳过验证)';
102
127
  logger.info(`[FAST] "${keyword}" - ${resultUrls.length} URLs`);
103
128
  } else {
104
- // 完整验证模式:验证不通过的继续搜索更多
129
+ // balanced 或 high 模式:验证链接
105
130
  const { valid } = await this.linkValidator.validateMany(rawUrls, {
106
131
  fetchQuality: prioritizeQuality,
107
132
  sortByQuality: prioritizeQuality,
133
+ minFileSize: minFileSize,
108
134
  });
109
- resultUrls = valid.slice(0, count).map(v => v.url);
110
135
 
111
- // 如果验证通过的不够,记录警告
136
+ // 过滤最小文件大小
137
+ let filteredValid = valid;
138
+ if (minFileSize > 0) {
139
+ filteredValid = valid.filter(v => {
140
+ const size = v.quality?.contentLength || 0;
141
+ return size >= minFileSize || size === 0; // size=0 表示未知,保留
142
+ });
143
+ logger.info(`[FILTER] minFileSize=${options.minFileSize}: ${valid.length} -> ${filteredValid.length}`);
144
+ }
145
+
146
+ resultUrls = filteredValid.slice(0, count).map(v => v.url);
147
+ qualityModeLabel = prioritizeQuality ? '高质量模式(验证+排序)' : '平衡模式(验证)';
148
+
112
149
  if (resultUrls.length < count) {
113
150
  logger.warn(`[VALIDATE] "${keyword}" - only ${resultUrls.length}/${count} valid`);
114
151
  }
@@ -121,7 +158,8 @@ export class Orchestrator {
121
158
  totalSearched: rawUrls.length,
122
159
  urls: resultUrls,
123
160
  count: resultUrls.length,
124
- fastMode,
161
+ qualityMode,
162
+ qualityModeLabel,
125
163
  duration: Date.now() - startTime,
126
164
  };
127
165
  } catch (error) {
@@ -145,7 +183,10 @@ export class Orchestrator {
145
183
  */
146
184
  async processKeywordDownload(keyword, count, source, options = {}) {
147
185
  const startTime = Date.now();
148
- const prioritizeQuality = options.prioritizeQuality !== false;
186
+ // 根据 quality 参数决定模式(download 模式默认高质量)
187
+ const qualityMode = options.quality || 'balanced';
188
+ const prioritizeQuality = qualityMode !== 'fast';
189
+ const minFileSize = this._parseMinFileSize(options.minFileSize);
149
190
 
150
191
  try {
151
192
  const scraper = getScraper(source);
@@ -178,17 +219,29 @@ export class Orchestrator {
178
219
  };
179
220
  }
180
221
 
181
- // 先验证链接并按质量排序
222
+ // 根据 quality 模式处理
182
223
  let urlsToDownload = rawUrls.slice(0, searchCount);
183
224
  if (prioritizeQuality) {
184
- logger.info(`Validating and sorting ${urlsToDownload.length} URLs by quality...`);
225
+ const sortByQuality = qualityMode === 'high';
226
+ logger.info(`Validating ${urlsToDownload.length} URLs (quality=${qualityMode})...`);
185
227
  const { valid } = await this.linkValidator.validateMany(urlsToDownload, {
186
- fetchQuality: true,
187
- sortByQuality: true,
228
+ fetchQuality: sortByQuality,
229
+ sortByQuality: sortByQuality,
230
+ minFileSize: minFileSize,
188
231
  });
189
- // 使用排序后的URL列表
190
- urlsToDownload = valid.map(v => v.url);
191
- logger.info(`Quality sorted: ${urlsToDownload.length} valid URLs`);
232
+
233
+ // 过滤最小文件大小
234
+ let filteredValid = valid;
235
+ if (minFileSize > 0) {
236
+ filteredValid = valid.filter(v => {
237
+ const size = v.quality?.contentLength || 0;
238
+ return size >= minFileSize || size === 0;
239
+ });
240
+ logger.info(`[FILTER] minFileSize: ${valid.length} -> ${filteredValid.length}`);
241
+ }
242
+
243
+ urlsToDownload = filteredValid.map(v => v.url);
244
+ logger.info(`Quality filtered: ${urlsToDownload.length} valid URLs`);
192
245
  }
193
246
 
194
247
  // 下载图片(已按质量排序,高质量优先)
@@ -360,11 +413,7 @@ export class Orchestrator {
360
413
 
361
414
  if (r.mode === 'link') {
362
415
  lines.push(`- 搜索到: ${r.totalSearched || 0} 张`);
363
- if (r.fastMode) {
364
- lines.push(`- 模式: 快速模式(跳过验证)`);
365
- } else {
366
- lines.push(`- 验证通过: ${r.totalValidated || r.count || 0} 张`);
367
- }
416
+ lines.push(`- 质量模式: ${r.qualityModeLabel || '快速模式'}`);
368
417
  lines.push(`- 返回: ${r.count || 0} 张`);
369
418
  lines.push(`- 耗时: ${(r.duration / 1000).toFixed(2)}秒`);
370
419
  lines.push('');