smart-image-scraper-mcp 2.1.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -74,16 +74,15 @@ export class LinkValidator {
|
|
|
74
74
|
|
|
75
75
|
/**
|
|
76
76
|
* 快速验证 - 仅检查 URL 格式,不发送 HTTP 请求
|
|
77
|
+
* 宽松模式:只要 URL 格式正确就通过
|
|
77
78
|
*/
|
|
78
79
|
quickValidate(url) {
|
|
79
80
|
if (!this._isValidUrlFormat(url)) {
|
|
80
81
|
return { url, valid: false, error: 'Invalid URL' };
|
|
81
82
|
}
|
|
82
|
-
//
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
const hasImageExt = imageExtensions.some(ext => urlLower.includes(ext));
|
|
86
|
-
return { url, valid: hasImageExt, quality: null };
|
|
83
|
+
// 宽松验证:只要 URL 格式正确就认为有效
|
|
84
|
+
// 图片 URL 可能不包含扩展名(动态生成的 URL)
|
|
85
|
+
return { url, valid: true, quality: null };
|
|
87
86
|
}
|
|
88
87
|
|
|
89
88
|
/**
|
|
@@ -67,23 +67,24 @@ export class Orchestrator {
|
|
|
67
67
|
*/
|
|
68
68
|
async processKeywordLink(keyword, count, source, options = {}) {
|
|
69
69
|
const startTime = Date.now();
|
|
70
|
-
const fastMode = options.fastMode !== false;
|
|
70
|
+
const fastMode = options.fastMode !== false;
|
|
71
71
|
const prioritizeQuality = options.prioritizeQuality === true;
|
|
72
72
|
|
|
73
73
|
try {
|
|
74
74
|
const scraper = getScraper(source);
|
|
75
|
-
|
|
75
|
+
// 多搜索一些以确保有足够的结果
|
|
76
|
+
const searchCount = Math.max(count * 3, 10);
|
|
76
77
|
|
|
77
78
|
// 尝试从缓存获取
|
|
78
79
|
const cachedUrls = searchCache.getSearchResult(keyword, source, options);
|
|
79
80
|
let rawUrls;
|
|
80
81
|
|
|
81
|
-
if (cachedUrls && cachedUrls.length >=
|
|
82
|
+
if (cachedUrls && cachedUrls.length >= count) {
|
|
82
83
|
logger.info(`[CACHE] "${keyword}" - ${cachedUrls.length} URLs`);
|
|
83
84
|
rawUrls = cachedUrls;
|
|
84
85
|
metrics.recordCacheHit();
|
|
85
86
|
} else {
|
|
86
|
-
logger.info(`[SEARCH] "${keyword}"...`);
|
|
87
|
+
logger.info(`[SEARCH] "${keyword}" (target: ${searchCount})...`);
|
|
87
88
|
rawUrls = await scraper.search(keyword, searchCount, options);
|
|
88
89
|
if (rawUrls.length > 0) {
|
|
89
90
|
searchCache.setSearchResult(keyword, source, options, rawUrls);
|
|
@@ -100,20 +101,24 @@ export class Orchestrator {
|
|
|
100
101
|
};
|
|
101
102
|
}
|
|
102
103
|
|
|
103
|
-
//
|
|
104
|
+
// 快速模式:直接返回搜索结果(不验证)
|
|
104
105
|
let resultUrls;
|
|
105
106
|
if (fastMode && !prioritizeQuality) {
|
|
106
|
-
//
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
logger.info(`[FAST] "${keyword}" - ${resultUrls.length} URLs (no HTTP validation)`);
|
|
107
|
+
// 快速模式:直接使用搜索结果
|
|
108
|
+
resultUrls = rawUrls.slice(0, count);
|
|
109
|
+
logger.info(`[FAST] "${keyword}" - ${resultUrls.length} URLs`);
|
|
110
110
|
} else {
|
|
111
|
-
//
|
|
111
|
+
// 完整验证模式:验证不通过的继续搜索更多
|
|
112
112
|
const { valid } = await this.linkValidator.validateMany(rawUrls, {
|
|
113
113
|
fetchQuality: prioritizeQuality,
|
|
114
114
|
sortByQuality: prioritizeQuality,
|
|
115
115
|
});
|
|
116
116
|
resultUrls = valid.slice(0, count).map(v => v.url);
|
|
117
|
+
|
|
118
|
+
// 如果验证通过的不够,记录警告
|
|
119
|
+
if (resultUrls.length < count) {
|
|
120
|
+
logger.warn(`[VALIDATE] "${keyword}" - only ${resultUrls.length}/${count} valid`);
|
|
121
|
+
}
|
|
117
122
|
}
|
|
118
123
|
|
|
119
124
|
return {
|