smart-image-scraper-mcp 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/package.json +1 -1
- package/src/index.js +18 -2
- package/src/services/orchestrator.js +71 -22
package/README.md
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
## ✨ 核心特性
|
|
9
9
|
|
|
10
|
-
### 🚀 高性能架构 (v2.
|
|
10
|
+
### 🚀 高性能架构 (v2.7.0)
|
|
11
11
|
- **多请求并行**:同时处理 5 个 MCP 请求
|
|
12
12
|
- **并行翻页搜索**:同时获取多页结果,速度提升 5x
|
|
13
13
|
- **HTTP 连接池**:Keep-Alive 复用 TCP 连接
|
|
@@ -23,7 +23,8 @@
|
|
|
23
23
|
- **尺寸过滤**:small/medium/large/wallpaper
|
|
24
24
|
- **宽高比过滤**:wide(横屏)/tall(竖屏)/square(正方形)
|
|
25
25
|
- **尺寸统一**:下载后自动裁剪/缩放到指定尺寸
|
|
26
|
-
-
|
|
26
|
+
- **质量控制**:三种质量模式(fast/balanced/high)
|
|
27
|
+
- **文件大小过滤**:按最小文件大小筛选高质量图片
|
|
27
28
|
|
|
28
29
|
## 安装
|
|
29
30
|
|
|
@@ -100,6 +101,9 @@ npm install
|
|
|
100
101
|
| `count` | number | ❌ | 每个关键词获取的图片数量,默认 10 |
|
|
101
102
|
| `source` | string | ❌ | 搜索源,支持 `bing` 和 `google`,默认 `bing` |
|
|
102
103
|
| `size` | string | ❌ | 图片尺寸:`all`, `small`, `medium`, `large`, `wallpaper`,默认 `all` |
|
|
104
|
+
| `aspect` | string | ❌ | 宽高比:`all`, `wide`, `tall`, `square`,默认 `all` |
|
|
105
|
+
| `quality` | string | ❌ | 质量模式:`fast`, `balanced`, `high`,默认 `balanced` |
|
|
106
|
+
| `minFileSize` | string | ❌ | 最小文件大小:`any`, `50kb`, `100kb`, `200kb`, `500kb`, `1mb`,默认 `any` |
|
|
103
107
|
| `safeSearch` | string | ❌ | 安全搜索:`off`, `moderate`, `strict`,默认 `moderate` |
|
|
104
108
|
|
|
105
109
|
**使用示例**:
|
|
@@ -128,6 +132,16 @@ npm install
|
|
|
128
132
|
"size": "wallpaper",
|
|
129
133
|
"safeSearch": "strict"
|
|
130
134
|
}
|
|
135
|
+
|
|
136
|
+
// 高质量模式 - 获取验证过的高清图片
|
|
137
|
+
{
|
|
138
|
+
"query": "风景",
|
|
139
|
+
"mode": "link",
|
|
140
|
+
"count": 10,
|
|
141
|
+
"size": "large",
|
|
142
|
+
"quality": "high",
|
|
143
|
+
"minFileSize": "100kb"
|
|
144
|
+
}
|
|
131
145
|
```
|
|
132
146
|
|
|
133
147
|
## 架构设计
|
|
@@ -267,6 +281,7 @@ npm test
|
|
|
267
281
|
|
|
268
282
|
| 版本 | 日期 | 主要更新 |
|
|
269
283
|
|------|------|----------|
|
|
284
|
+
| 2.7.0 | 2026-02 | 新增质量控制:quality 参数(fast/balanced/high)、minFileSize 文件大小过滤 |
|
|
270
285
|
| 2.4.0 | 2026-02 | 修复所有分析问题:超时策略统一、缓存键一致、版本号动态读取 |
|
|
271
286
|
| 2.3.0 | 2026-02 | 性能优化:HTTP 连接池、并行翻页搜索、内存泄漏修复 |
|
|
272
287
|
| 2.2.0 | 2026-02 | 请求队列管理系统、自动资源释放 |
|
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -52,7 +52,8 @@ const SMART_SCRAPER_TOOL = {
|
|
|
52
52
|
【参数选择指南】
|
|
53
53
|
- 用户要"找/搜索/查找图片" → mode="link"
|
|
54
54
|
- 用户要"下载/保存/获取图片" → mode="download"
|
|
55
|
-
- 用户要"高清/大图/壁纸" → size="large" 或 "wallpaper"
|
|
55
|
+
- 用户要"高清/大图/壁纸" → size="large" 或 "wallpaper",quality="high"
|
|
56
|
+
- 用户要"高质量/精选/优质" → quality="high"
|
|
56
57
|
- 用户要"电脑壁纸/横屏/横向" → aspect="wide"
|
|
57
58
|
- 用户要"手机壁纸/竖屏/竖向" → aspect="tall"
|
|
58
59
|
- 用户要"统一尺寸/固定大小" → targetSize="1920x1080" 或预设名
|
|
@@ -69,7 +70,8 @@ const SMART_SCRAPER_TOOL = {
|
|
|
69
70
|
2. 下载10张高清风景图: {"query":"风景","mode":"download","count":10,"size":"large"}
|
|
70
71
|
3. 下载电脑壁纸并统一为1080p: {"query":"风景","mode":"download","count":10,"aspect":"wide","targetSize":"desktop_1080p"}
|
|
71
72
|
4. 下载手机壁纸: {"query":"动漫","mode":"download","count":10,"aspect":"tall","targetSize":"mobile_hd"}
|
|
72
|
-
5. 批量下载多类图片: {"query":"猫,狗,兔子","mode":"download","count":5}
|
|
73
|
+
5. 批量下载多类图片: {"query":"猫,狗,兔子","mode":"download","count":5}
|
|
74
|
+
6. 获取高质量图片: {"query":"风景","mode":"link","count":5,"size":"large","quality":"high"}`,
|
|
73
75
|
inputSchema: {
|
|
74
76
|
type: 'object',
|
|
75
77
|
properties: {
|
|
@@ -121,6 +123,18 @@ const SMART_SCRAPER_TOOL = {
|
|
|
121
123
|
description: '安全搜索。off=关闭;moderate=中等过滤(默认);strict=严格过滤(儿童/家庭内容)',
|
|
122
124
|
default: 'moderate',
|
|
123
125
|
},
|
|
126
|
+
quality: {
|
|
127
|
+
type: 'string',
|
|
128
|
+
enum: ['fast', 'balanced', 'high'],
|
|
129
|
+
description: '质量模式。fast=快速返回(不验证,速度最快);balanced=平衡模式(验证有效性,默认);high=高质量优先(验证+按质量排序,速度较慢但质量最好)',
|
|
130
|
+
default: 'balanced',
|
|
131
|
+
},
|
|
132
|
+
minFileSize: {
|
|
133
|
+
type: 'string',
|
|
134
|
+
enum: ['any', '50kb', '100kb', '200kb', '500kb', '1mb'],
|
|
135
|
+
description: '最小文件大小过滤。文件越大通常质量越高。any=不限制;建议高清图片用100kb以上',
|
|
136
|
+
default: 'any',
|
|
137
|
+
},
|
|
124
138
|
},
|
|
125
139
|
required: ['query', 'mode'],
|
|
126
140
|
},
|
|
@@ -172,6 +186,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
172
186
|
targetSize: args.targetSize || null,
|
|
173
187
|
fit: args.fit || 'cover',
|
|
174
188
|
safeSearch: args.safeSearch || 'moderate',
|
|
189
|
+
quality: ['fast', 'balanced', 'high'].includes(args.quality) ? args.quality : 'balanced',
|
|
190
|
+
minFileSize: ['any', '50kb', '100kb', '200kb', '500kb', '1mb'].includes(args.minFileSize) ? args.minFileSize : 'any',
|
|
175
191
|
};
|
|
176
192
|
|
|
177
193
|
// 执行任务
|
|
@@ -50,6 +50,25 @@ export class Orchestrator {
|
|
|
50
50
|
.filter(k => k.length > 0);
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
+
/**
|
|
54
|
+
* 解析最小文件大小参数
|
|
55
|
+
* @param {string} minFileSize - 最小文件大小字符串
|
|
56
|
+
* @returns {number} - 字节数
|
|
57
|
+
*/
|
|
58
|
+
_parseMinFileSize(minFileSize) {
|
|
59
|
+
if (!minFileSize || minFileSize === 'any') return 0;
|
|
60
|
+
|
|
61
|
+
const sizeMap = {
|
|
62
|
+
'50kb': 50 * 1024,
|
|
63
|
+
'100kb': 100 * 1024,
|
|
64
|
+
'200kb': 200 * 1024,
|
|
65
|
+
'500kb': 500 * 1024,
|
|
66
|
+
'1mb': 1024 * 1024,
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
return sizeMap[minFileSize.toLowerCase()] || 0;
|
|
70
|
+
}
|
|
71
|
+
|
|
53
72
|
/**
|
|
54
73
|
* 处理单个关键词 - Link 模式
|
|
55
74
|
* @param {string} keyword - 关键词
|
|
@@ -60,8 +79,11 @@ export class Orchestrator {
|
|
|
60
79
|
*/
|
|
61
80
|
async processKeywordLink(keyword, count, source, options = {}) {
|
|
62
81
|
const startTime = Date.now();
|
|
63
|
-
|
|
64
|
-
const
|
|
82
|
+
// 根据 quality 参数决定模式
|
|
83
|
+
const qualityMode = options.quality || 'balanced';
|
|
84
|
+
const fastMode = qualityMode === 'fast';
|
|
85
|
+
const prioritizeQuality = qualityMode === 'high';
|
|
86
|
+
const minFileSize = this._parseMinFileSize(options.minFileSize);
|
|
65
87
|
|
|
66
88
|
try {
|
|
67
89
|
const scraper = getScraper(source);
|
|
@@ -94,21 +116,36 @@ export class Orchestrator {
|
|
|
94
116
|
};
|
|
95
117
|
}
|
|
96
118
|
|
|
97
|
-
//
|
|
119
|
+
// 根据 quality 模式处理
|
|
98
120
|
let resultUrls;
|
|
99
|
-
|
|
100
|
-
|
|
121
|
+
let qualityModeLabel;
|
|
122
|
+
|
|
123
|
+
if (fastMode) {
|
|
124
|
+
// fast 模式:直接使用搜索结果,不验证
|
|
101
125
|
resultUrls = rawUrls.slice(0, count);
|
|
126
|
+
qualityModeLabel = '快速模式(跳过验证)';
|
|
102
127
|
logger.info(`[FAST] "${keyword}" - ${resultUrls.length} URLs`);
|
|
103
128
|
} else {
|
|
104
|
-
//
|
|
129
|
+
// balanced 或 high 模式:验证链接
|
|
105
130
|
const { valid } = await this.linkValidator.validateMany(rawUrls, {
|
|
106
131
|
fetchQuality: prioritizeQuality,
|
|
107
132
|
sortByQuality: prioritizeQuality,
|
|
133
|
+
minFileSize: minFileSize,
|
|
108
134
|
});
|
|
109
|
-
resultUrls = valid.slice(0, count).map(v => v.url);
|
|
110
135
|
|
|
111
|
-
//
|
|
136
|
+
// 过滤最小文件大小
|
|
137
|
+
let filteredValid = valid;
|
|
138
|
+
if (minFileSize > 0) {
|
|
139
|
+
filteredValid = valid.filter(v => {
|
|
140
|
+
const size = v.quality?.contentLength || 0;
|
|
141
|
+
return size >= minFileSize || size === 0; // size=0 表示未知,保留
|
|
142
|
+
});
|
|
143
|
+
logger.info(`[FILTER] minFileSize=${options.minFileSize}: ${valid.length} -> ${filteredValid.length}`);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
resultUrls = filteredValid.slice(0, count).map(v => v.url);
|
|
147
|
+
qualityModeLabel = prioritizeQuality ? '高质量模式(验证+排序)' : '平衡模式(验证)';
|
|
148
|
+
|
|
112
149
|
if (resultUrls.length < count) {
|
|
113
150
|
logger.warn(`[VALIDATE] "${keyword}" - only ${resultUrls.length}/${count} valid`);
|
|
114
151
|
}
|
|
@@ -121,7 +158,8 @@ export class Orchestrator {
|
|
|
121
158
|
totalSearched: rawUrls.length,
|
|
122
159
|
urls: resultUrls,
|
|
123
160
|
count: resultUrls.length,
|
|
124
|
-
|
|
161
|
+
qualityMode,
|
|
162
|
+
qualityModeLabel,
|
|
125
163
|
duration: Date.now() - startTime,
|
|
126
164
|
};
|
|
127
165
|
} catch (error) {
|
|
@@ -145,7 +183,10 @@ export class Orchestrator {
|
|
|
145
183
|
*/
|
|
146
184
|
async processKeywordDownload(keyword, count, source, options = {}) {
|
|
147
185
|
const startTime = Date.now();
|
|
148
|
-
|
|
186
|
+
// 根据 quality 参数决定模式(download 模式默认高质量)
|
|
187
|
+
const qualityMode = options.quality || 'balanced';
|
|
188
|
+
const prioritizeQuality = qualityMode !== 'fast';
|
|
189
|
+
const minFileSize = this._parseMinFileSize(options.minFileSize);
|
|
149
190
|
|
|
150
191
|
try {
|
|
151
192
|
const scraper = getScraper(source);
|
|
@@ -178,17 +219,29 @@ export class Orchestrator {
|
|
|
178
219
|
};
|
|
179
220
|
}
|
|
180
221
|
|
|
181
|
-
//
|
|
222
|
+
// 根据 quality 模式处理
|
|
182
223
|
let urlsToDownload = rawUrls.slice(0, searchCount);
|
|
183
224
|
if (prioritizeQuality) {
|
|
184
|
-
|
|
225
|
+
const sortByQuality = qualityMode === 'high';
|
|
226
|
+
logger.info(`Validating ${urlsToDownload.length} URLs (quality=${qualityMode})...`);
|
|
185
227
|
const { valid } = await this.linkValidator.validateMany(urlsToDownload, {
|
|
186
|
-
fetchQuality:
|
|
187
|
-
sortByQuality:
|
|
228
|
+
fetchQuality: sortByQuality,
|
|
229
|
+
sortByQuality: sortByQuality,
|
|
230
|
+
minFileSize: minFileSize,
|
|
188
231
|
});
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
232
|
+
|
|
233
|
+
// 过滤最小文件大小
|
|
234
|
+
let filteredValid = valid;
|
|
235
|
+
if (minFileSize > 0) {
|
|
236
|
+
filteredValid = valid.filter(v => {
|
|
237
|
+
const size = v.quality?.contentLength || 0;
|
|
238
|
+
return size >= minFileSize || size === 0;
|
|
239
|
+
});
|
|
240
|
+
logger.info(`[FILTER] minFileSize: ${valid.length} -> ${filteredValid.length}`);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
urlsToDownload = filteredValid.map(v => v.url);
|
|
244
|
+
logger.info(`Quality filtered: ${urlsToDownload.length} valid URLs`);
|
|
192
245
|
}
|
|
193
246
|
|
|
194
247
|
// 下载图片(已按质量排序,高质量优先)
|
|
@@ -360,11 +413,7 @@ export class Orchestrator {
|
|
|
360
413
|
|
|
361
414
|
if (r.mode === 'link') {
|
|
362
415
|
lines.push(`- 搜索到: ${r.totalSearched || 0} 张`);
|
|
363
|
-
|
|
364
|
-
lines.push(`- 模式: 快速模式(跳过验证)`);
|
|
365
|
-
} else {
|
|
366
|
-
lines.push(`- 验证通过: ${r.totalValidated || r.count || 0} 张`);
|
|
367
|
-
}
|
|
416
|
+
lines.push(`- 质量模式: ${r.qualityModeLabel || '快速模式'}`);
|
|
368
417
|
lines.push(`- 返回: ${r.count || 0} 张`);
|
|
369
418
|
lines.push(`- 耗时: ${(r.duration / 1000).toFixed(2)}秒`);
|
|
370
419
|
lines.push('');
|