smart-image-scraper-mcp 2.11.3 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -134,8 +134,11 @@ class Logger {
|
|
|
134
134
|
|
|
135
135
|
const formatted = this._format(level, message, data);
|
|
136
136
|
|
|
137
|
-
// 输出到 stderr
|
|
138
|
-
|
|
137
|
+
// 输出到 stderr(使用 process.stderr.write 避免 console.error 阻塞 MCP stdio)
|
|
138
|
+
// 仅输出 WARN 及以上级别到 stderr,减少 IO 压力
|
|
139
|
+
if (level >= LogLevel.WARN) {
|
|
140
|
+
process.stderr.write(formatted + '\n');
|
|
141
|
+
}
|
|
139
142
|
|
|
140
143
|
// 输出到文件(异步写入,避免阻塞事件循环)
|
|
141
144
|
if (this.logFile) {
|
|
@@ -204,7 +207,7 @@ class Logger {
|
|
|
204
207
|
}
|
|
205
208
|
}
|
|
206
209
|
|
|
207
|
-
//
|
|
208
|
-
const defaultLevel = process.env.
|
|
210
|
+
// MCP 模式下默认 WARN 级别,避免 stderr 输出阻塞 stdio 通信
|
|
211
|
+
const defaultLevel = LogLevel[process.env.LOG_LEVEL?.toUpperCase()] ?? LogLevel.WARN;
|
|
209
212
|
export const logger = new Logger({ level: defaultLevel });
|
|
210
213
|
export default logger;
|
|
@@ -160,11 +160,19 @@ export class FileManager {
|
|
|
160
160
|
return { success: false, url, error: 'Malformed URL' };
|
|
161
161
|
}
|
|
162
162
|
|
|
163
|
+
// 为防盗链域名添加 Referer 头,绕过防盗链检测
|
|
164
|
+
const downloadHeaders = {};
|
|
165
|
+
try {
|
|
166
|
+
const parsed = new URL(url);
|
|
167
|
+
downloadHeaders['Referer'] = `${parsed.protocol}//${parsed.hostname}/`;
|
|
168
|
+
} catch { /* ignore */ }
|
|
169
|
+
|
|
163
170
|
response = await httpClient.get(url, {
|
|
164
171
|
responseType: 'stream',
|
|
165
172
|
timeout: 20000, // 连接超时20秒
|
|
166
173
|
maxContentLength: 50 * 1024 * 1024, // 最大50MB
|
|
167
174
|
maxBodyLength: 50 * 1024 * 1024,
|
|
175
|
+
headers: downloadHeaders,
|
|
168
176
|
});
|
|
169
177
|
|
|
170
178
|
if (response.status !== 200) {
|
|
@@ -11,11 +11,90 @@ import config from '../config/index.js';
|
|
|
11
11
|
// 使用配置中的并发数,避免硬编码与配置不一致
|
|
12
12
|
const globalValidateLimit = pLimit(config.MAX_VALIDATE_CONCURRENCY);
|
|
13
13
|
|
|
14
|
+
// 已知有防盗链保护的域名列表
|
|
15
|
+
// 这些域名的图片在浏览器直接打开会返回 403 或替换图
|
|
16
|
+
const HOTLINK_PROTECTED_DOMAINS = [
|
|
17
|
+
'pic.huitu.com',
|
|
18
|
+
'img.shetu66.com',
|
|
19
|
+
'pic.nximg.cn',
|
|
20
|
+
'gd-hbimg.huaban.com',
|
|
21
|
+
'hbimg.huaban.com',
|
|
22
|
+
'img.zcool.cn',
|
|
23
|
+
'img.zcool.com',
|
|
24
|
+
'pic1.zhimg.com',
|
|
25
|
+
'pic2.zhimg.com',
|
|
26
|
+
'pic3.zhimg.com',
|
|
27
|
+
'pic4.zhimg.com',
|
|
28
|
+
'picx.zhimg.com',
|
|
29
|
+
'img.alicdn.com',
|
|
30
|
+
'img.taobao.com',
|
|
31
|
+
'gw.alicdn.com',
|
|
32
|
+
'cbu01.alicdn.com',
|
|
33
|
+
'img.pconline.com.cn',
|
|
34
|
+
'img.zol-img.com.cn',
|
|
35
|
+
'p0.meituan.net',
|
|
36
|
+
'p1.meituan.net',
|
|
37
|
+
'img.doubanio.com',
|
|
38
|
+
'img1.doubanio.com',
|
|
39
|
+
'img2.doubanio.com',
|
|
40
|
+
'img3.doubanio.com',
|
|
41
|
+
'img9.doubanio.com',
|
|
42
|
+
'ww1.sinaimg.cn',
|
|
43
|
+
'ww2.sinaimg.cn',
|
|
44
|
+
'ww3.sinaimg.cn',
|
|
45
|
+
'ww4.sinaimg.cn',
|
|
46
|
+
'wx1.sinaimg.cn',
|
|
47
|
+
'wx2.sinaimg.cn',
|
|
48
|
+
'wx3.sinaimg.cn',
|
|
49
|
+
'wx4.sinaimg.cn',
|
|
50
|
+
'tvax1.sinaimg.cn',
|
|
51
|
+
'tvax2.sinaimg.cn',
|
|
52
|
+
'tvax3.sinaimg.cn',
|
|
53
|
+
'tvax4.sinaimg.cn',
|
|
54
|
+
'tva1.sinaimg.cn',
|
|
55
|
+
'tva2.sinaimg.cn',
|
|
56
|
+
'tva3.sinaimg.cn',
|
|
57
|
+
'tva4.sinaimg.cn',
|
|
58
|
+
'cdn.pixabay.com',
|
|
59
|
+
'images.unsplash.com',
|
|
60
|
+
'img.freepik.com',
|
|
61
|
+
];
|
|
62
|
+
|
|
14
63
|
export class LinkValidator {
|
|
15
64
|
constructor() {
|
|
16
65
|
this.limit = globalValidateLimit;
|
|
17
66
|
}
|
|
18
67
|
|
|
68
|
+
/**
|
|
69
|
+
* 检测 URL 是否有防盗链保护
|
|
70
|
+
* @param {string} url - 图片URL
|
|
71
|
+
* @returns {boolean} true 表示有防盗链
|
|
72
|
+
*/
|
|
73
|
+
isHotlinkProtected(url) {
|
|
74
|
+
try {
|
|
75
|
+
const hostname = new URL(url).hostname.toLowerCase();
|
|
76
|
+
return HOTLINK_PROTECTED_DOMAINS.some(domain =>
|
|
77
|
+
hostname === domain || hostname.endsWith('.' + domain)
|
|
78
|
+
);
|
|
79
|
+
} catch {
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* 获取防盗链 URL 对应的 Referer
|
|
86
|
+
* @param {string} url - 图片URL
|
|
87
|
+
* @returns {string|null} Referer URL
|
|
88
|
+
*/
|
|
89
|
+
getRefererForUrl(url) {
|
|
90
|
+
try {
|
|
91
|
+
const parsed = new URL(url);
|
|
92
|
+
return `${parsed.protocol}//${parsed.hostname}/`;
|
|
93
|
+
} catch {
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
19
98
|
/**
|
|
20
99
|
* 验证 URL 格式
|
|
21
100
|
* @param {string} url - URL字符串
|
|
@@ -159,15 +159,22 @@ export class Orchestrator {
|
|
|
159
159
|
// 检查是否已中止
|
|
160
160
|
if (signal?.aborted) throw new Error('操作已取消');
|
|
161
161
|
|
|
162
|
+
// link 模式:过滤掉有防盗链保护的 URL(用户无法直接在浏览器中打开)
|
|
163
|
+
const filteredRawUrls = rawUrls.filter(url => !this.linkValidator.isHotlinkProtected(url));
|
|
164
|
+
const hotlinkCount = rawUrls.length - filteredRawUrls.length;
|
|
165
|
+
if (hotlinkCount > 0) {
|
|
166
|
+
logger.warn(`[HOTLINK] "${keyword}" - filtered ${hotlinkCount} hotlink-protected URLs`);
|
|
167
|
+
}
|
|
168
|
+
|
|
162
169
|
if (fastMode) {
|
|
163
170
|
// fast 模式:直接使用搜索结果,不验证
|
|
164
|
-
resultUrls =
|
|
171
|
+
resultUrls = filteredRawUrls.slice(0, count);
|
|
165
172
|
qualityModeLabel = '快速模式(跳过验证)';
|
|
166
173
|
logger.info(`[FAST] "${keyword}" - ${resultUrls.length} URLs`);
|
|
167
174
|
} else {
|
|
168
175
|
// balanced 或 high 模式:验证链接(限制验证数量避免超时)
|
|
169
|
-
const maxValidate = Math.min(
|
|
170
|
-
const urlsToValidate =
|
|
176
|
+
const maxValidate = Math.min(filteredRawUrls.length, count * 2 + 5);
|
|
177
|
+
const urlsToValidate = filteredRawUrls.slice(0, maxValidate);
|
|
171
178
|
const { valid } = await this.linkValidator.validateMany(urlsToValidate, {
|
|
172
179
|
fetchQuality: prioritizeQuality,
|
|
173
180
|
sortByQuality: prioritizeQuality,
|
|
@@ -198,6 +205,7 @@ export class Orchestrator {
|
|
|
198
205
|
success: true,
|
|
199
206
|
mode: 'link',
|
|
200
207
|
totalSearched: rawUrls.length,
|
|
208
|
+
hotlinkFiltered: hotlinkCount,
|
|
201
209
|
urls: resultUrls,
|
|
202
210
|
count: resultUrls.length,
|
|
203
211
|
qualityMode,
|
|
@@ -548,6 +556,9 @@ export class Orchestrator {
|
|
|
548
556
|
|
|
549
557
|
if (r.mode === 'link') {
|
|
550
558
|
lines.push(`- 搜索到: ${r.totalSearched || 0} 张`);
|
|
559
|
+
if (r.hotlinkFiltered > 0) {
|
|
560
|
+
lines.push(`- 防盗链过滤: ${r.hotlinkFiltered} 张`);
|
|
561
|
+
}
|
|
551
562
|
lines.push(`- 质量模式: ${r.qualityModeLabel || '快速模式'}`);
|
|
552
563
|
lines.push(`- 返回: ${r.count || 0} 张`);
|
|
553
564
|
lines.push(`- 耗时: ${(r.duration / 1000).toFixed(2)}秒`);
|