smart-image-scraper-mcp 2.11.3 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smart-image-scraper-mcp",
3
- "version": "2.11.3",
3
+ "version": "2.12.0",
4
4
  "description": "全网智能图片抓取 MCP 服务器 - 支持 Bing/Google 图片搜索、验证和下载",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -134,8 +134,11 @@ class Logger {
134
134
 
135
135
  const formatted = this._format(level, message, data);
136
136
 
137
- // 输出到 stderr
138
- console.error(formatted);
137
+ // 输出到 stderr(使用 process.stderr.write 避免 console.error 阻塞 MCP stdio)
138
+ // 仅输出 WARN 及以上级别到 stderr,减少 IO 压力
139
+ if (level >= LogLevel.WARN) {
140
+ process.stderr.write(formatted + '\n');
141
+ }
139
142
 
140
143
  // 输出到文件(异步写入,避免阻塞事件循环)
141
144
  if (this.logFile) {
@@ -204,7 +207,7 @@ class Logger {
204
207
  }
205
208
  }
206
209
 
207
- // 根据环境变量确定日志级别
208
- const defaultLevel = process.env.NODE_ENV === 'production' ? LogLevel.INFO : LogLevel.DEBUG;
210
+ // MCP 模式下默认 WARN 级别,避免 stderr 输出阻塞 stdio 通信
211
+ const defaultLevel = LogLevel[process.env.LOG_LEVEL?.toUpperCase()] ?? LogLevel.WARN;
209
212
  export const logger = new Logger({ level: defaultLevel });
210
213
  export default logger;
@@ -160,11 +160,19 @@ export class FileManager {
160
160
  return { success: false, url, error: 'Malformed URL' };
161
161
  }
162
162
 
163
+ // 为防盗链域名添加 Referer 头,绕过防盗链检测
164
+ const downloadHeaders = {};
165
+ try {
166
+ const parsed = new URL(url);
167
+ downloadHeaders['Referer'] = `${parsed.protocol}//${parsed.hostname}/`;
168
+ } catch { /* ignore */ }
169
+
163
170
  response = await httpClient.get(url, {
164
171
  responseType: 'stream',
165
172
  timeout: 20000, // 连接超时20秒
166
173
  maxContentLength: 50 * 1024 * 1024, // 最大50MB
167
174
  maxBodyLength: 50 * 1024 * 1024,
175
+ headers: downloadHeaders,
168
176
  });
169
177
 
170
178
  if (response.status !== 200) {
@@ -11,11 +11,90 @@ import config from '../config/index.js';
11
11
  // 使用配置中的并发数,避免硬编码与配置不一致
12
12
  const globalValidateLimit = pLimit(config.MAX_VALIDATE_CONCURRENCY);
13
13
 
14
+ // 已知有防盗链保护的域名列表
15
+ // 这些域名的图片在浏览器直接打开会返回 403 或替换图
16
+ const HOTLINK_PROTECTED_DOMAINS = [
17
+ 'pic.huitu.com',
18
+ 'img.shetu66.com',
19
+ 'pic.nximg.cn',
20
+ 'gd-hbimg.huaban.com',
21
+ 'hbimg.huaban.com',
22
+ 'img.zcool.cn',
23
+ 'img.zcool.com',
24
+ 'pic1.zhimg.com',
25
+ 'pic2.zhimg.com',
26
+ 'pic3.zhimg.com',
27
+ 'pic4.zhimg.com',
28
+ 'picx.zhimg.com',
29
+ 'img.alicdn.com',
30
+ 'img.taobao.com',
31
+ 'gw.alicdn.com',
32
+ 'cbu01.alicdn.com',
33
+ 'img.pconline.com.cn',
34
+ 'img.zol-img.com.cn',
35
+ 'p0.meituan.net',
36
+ 'p1.meituan.net',
37
+ 'img.doubanio.com',
38
+ 'img1.doubanio.com',
39
+ 'img2.doubanio.com',
40
+ 'img3.doubanio.com',
41
+ 'img9.doubanio.com',
42
+ 'ww1.sinaimg.cn',
43
+ 'ww2.sinaimg.cn',
44
+ 'ww3.sinaimg.cn',
45
+ 'ww4.sinaimg.cn',
46
+ 'wx1.sinaimg.cn',
47
+ 'wx2.sinaimg.cn',
48
+ 'wx3.sinaimg.cn',
49
+ 'wx4.sinaimg.cn',
50
+ 'tvax1.sinaimg.cn',
51
+ 'tvax2.sinaimg.cn',
52
+ 'tvax3.sinaimg.cn',
53
+ 'tvax4.sinaimg.cn',
54
+ 'tva1.sinaimg.cn',
55
+ 'tva2.sinaimg.cn',
56
+ 'tva3.sinaimg.cn',
57
+ 'tva4.sinaimg.cn',
58
+ 'cdn.pixabay.com',
59
+ 'images.unsplash.com',
60
+ 'img.freepik.com',
61
+ ];
62
+
14
63
  export class LinkValidator {
15
64
  constructor() {
16
65
  this.limit = globalValidateLimit;
17
66
  }
18
67
 
68
+ /**
69
+ * 检测 URL 是否有防盗链保护
70
+ * @param {string} url - 图片URL
71
+ * @returns {boolean} true 表示有防盗链
72
+ */
73
+ isHotlinkProtected(url) {
74
+ try {
75
+ const hostname = new URL(url).hostname.toLowerCase();
76
+ return HOTLINK_PROTECTED_DOMAINS.some(domain =>
77
+ hostname === domain || hostname.endsWith('.' + domain)
78
+ );
79
+ } catch {
80
+ return false;
81
+ }
82
+ }
83
+
84
+ /**
85
+ * 获取防盗链 URL 对应的 Referer
86
+ * @param {string} url - 图片URL
87
+ * @returns {string|null} Referer URL
88
+ */
89
+ getRefererForUrl(url) {
90
+ try {
91
+ const parsed = new URL(url);
92
+ return `${parsed.protocol}//${parsed.hostname}/`;
93
+ } catch {
94
+ return null;
95
+ }
96
+ }
97
+
19
98
  /**
20
99
  * 验证 URL 格式
21
100
  * @param {string} url - URL字符串
@@ -159,15 +159,22 @@ export class Orchestrator {
159
159
  // 检查是否已中止
160
160
  if (signal?.aborted) throw new Error('操作已取消');
161
161
 
162
+ // link 模式:过滤掉有防盗链保护的 URL(用户无法直接在浏览器中打开)
163
+ const filteredRawUrls = rawUrls.filter(url => !this.linkValidator.isHotlinkProtected(url));
164
+ const hotlinkCount = rawUrls.length - filteredRawUrls.length;
165
+ if (hotlinkCount > 0) {
166
+ logger.warn(`[HOTLINK] "${keyword}" - filtered ${hotlinkCount} hotlink-protected URLs`);
167
+ }
168
+
162
169
  if (fastMode) {
163
170
  // fast 模式:直接使用搜索结果,不验证
164
- resultUrls = rawUrls.slice(0, count);
171
+ resultUrls = filteredRawUrls.slice(0, count);
165
172
  qualityModeLabel = '快速模式(跳过验证)';
166
173
  logger.info(`[FAST] "${keyword}" - ${resultUrls.length} URLs`);
167
174
  } else {
168
175
  // balanced 或 high 模式:验证链接(限制验证数量避免超时)
169
- const maxValidate = Math.min(rawUrls.length, count * 2 + 5);
170
- const urlsToValidate = rawUrls.slice(0, maxValidate);
176
+ const maxValidate = Math.min(filteredRawUrls.length, count * 2 + 5);
177
+ const urlsToValidate = filteredRawUrls.slice(0, maxValidate);
171
178
  const { valid } = await this.linkValidator.validateMany(urlsToValidate, {
172
179
  fetchQuality: prioritizeQuality,
173
180
  sortByQuality: prioritizeQuality,
@@ -198,6 +205,7 @@ export class Orchestrator {
198
205
  success: true,
199
206
  mode: 'link',
200
207
  totalSearched: rawUrls.length,
208
+ hotlinkFiltered: hotlinkCount,
201
209
  urls: resultUrls,
202
210
  count: resultUrls.length,
203
211
  qualityMode,
@@ -548,6 +556,9 @@ export class Orchestrator {
548
556
 
549
557
  if (r.mode === 'link') {
550
558
  lines.push(`- 搜索到: ${r.totalSearched || 0} 张`);
559
+ if (r.hotlinkFiltered > 0) {
560
+ lines.push(`- 防盗链过滤: ${r.hotlinkFiltered} 张`);
561
+ }
551
562
  lines.push(`- 质量模式: ${r.qualityModeLabel || '快速模式'}`);
552
563
  lines.push(`- 返回: ${r.count || 0} 张`);
553
564
  lines.push(`- 耗时: ${(r.duration / 1000).toFixed(2)}秒`);