smart-image-scraper-mcp 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smart-image-scraper-mcp",
3
- "version": "2.1.0",
3
+ "version": "2.1.1",
4
4
  "description": "全网智能图片抓取 MCP 服务器 - 支持 Bing/Google 图片搜索、验证和下载",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -74,16 +74,15 @@ export class LinkValidator {
74
74
 
75
75
  /**
76
76
  * 快速验证 - 仅检查 URL 格式,不发送 HTTP 请求
77
+ * 宽松模式:只要 URL 格式正确就通过
77
78
  */
78
79
  quickValidate(url) {
79
80
  if (!this._isValidUrlFormat(url)) {
80
81
  return { url, valid: false, error: 'Invalid URL' };
81
82
  }
82
- // 检查常见图片扩展名
83
- const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.svg'];
84
- const urlLower = url.toLowerCase();
85
- const hasImageExt = imageExtensions.some(ext => urlLower.includes(ext));
86
- return { url, valid: hasImageExt, quality: null };
83
+ // 宽松验证:只要 URL 格式正确就认为有效
84
+ // 图片 URL 可能不包含扩展名(动态生成的 URL)
85
+ return { url, valid: true, quality: null };
87
86
  }
88
87
 
89
88
  /**
@@ -67,23 +67,24 @@ export class Orchestrator {
67
67
  */
68
68
  async processKeywordLink(keyword, count, source, options = {}) {
69
69
  const startTime = Date.now();
70
- const fastMode = options.fastMode !== false; // 默认开启快速模式
70
+ const fastMode = options.fastMode !== false;
71
71
  const prioritizeQuality = options.prioritizeQuality === true;
72
72
 
73
73
  try {
74
74
  const scraper = getScraper(source);
75
- const searchCount = fastMode ? count : Math.ceil(count * 1.5);
75
+ // 多搜索一些以确保有足够的结果
76
+ const searchCount = Math.max(count * 3, 10);
76
77
 
77
78
  // 尝试从缓存获取
78
79
  const cachedUrls = searchCache.getSearchResult(keyword, source, options);
79
80
  let rawUrls;
80
81
 
81
- if (cachedUrls && cachedUrls.length >= searchCount) {
82
+ if (cachedUrls && cachedUrls.length >= count) {
82
83
  logger.info(`[CACHE] "${keyword}" - ${cachedUrls.length} URLs`);
83
84
  rawUrls = cachedUrls;
84
85
  metrics.recordCacheHit();
85
86
  } else {
86
- logger.info(`[SEARCH] "${keyword}"...`);
87
+ logger.info(`[SEARCH] "${keyword}" (target: ${searchCount})...`);
87
88
  rawUrls = await scraper.search(keyword, searchCount, options);
88
89
  if (rawUrls.length > 0) {
89
90
  searchCache.setSearchResult(keyword, source, options, rawUrls);
@@ -100,20 +101,24 @@ export class Orchestrator {
100
101
  };
101
102
  }
102
103
 
103
- // 快速模式:跳过 HTTP 验证,仅检查 URL 格式
104
+ // 快速模式:直接返回搜索结果(不验证)
104
105
  let resultUrls;
105
106
  if (fastMode && !prioritizeQuality) {
106
- // 快速验证:仅检查 URL 格式和扩展名
107
- const quickResults = rawUrls.map(url => this.linkValidator.quickValidate(url));
108
- resultUrls = quickResults.filter(r => r.valid).slice(0, count).map(r => r.url);
109
- logger.info(`[FAST] "${keyword}" - ${resultUrls.length} URLs (no HTTP validation)`);
107
+ // 快速模式:直接使用搜索结果
108
+ resultUrls = rawUrls.slice(0, count);
109
+ logger.info(`[FAST] "${keyword}" - ${resultUrls.length} URLs`);
110
110
  } else {
111
- // 完整验证
111
+ // 完整验证模式:验证不通过的继续搜索更多
112
112
  const { valid } = await this.linkValidator.validateMany(rawUrls, {
113
113
  fetchQuality: prioritizeQuality,
114
114
  sortByQuality: prioritizeQuality,
115
115
  });
116
116
  resultUrls = valid.slice(0, count).map(v => v.url);
117
+
118
+ // 如果验证通过的不够,记录警告
119
+ if (resultUrls.length < count) {
120
+ logger.warn(`[VALIDATE] "${keyword}" - only ${resultUrls.length}/${count} valid`);
121
+ }
117
122
  }
118
123
 
119
124
  return {