smart-image-scraper-mcp 2.8.1 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smart-image-scraper-mcp",
3
- "version": "2.8.1",
3
+ "version": "2.9.0",
4
4
  "description": "全网智能图片抓取 MCP 服务器 - 支持 Bing/Google 图片搜索、验证和下载",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -26,12 +26,31 @@ export class BingScraper extends BaseScraper {
26
26
  this.options = options;
27
27
  const pageSize = 35;
28
28
 
29
- // 只获取第一页,避免触发速率限制
30
- logger.info(`[Bing] Searching "${keyword}" - single page mode`);
29
+ // 计算需要获取的页数(最多3页,避免触发速率限制)
30
+ const pagesNeeded = Math.min(Math.ceil(limit / pageSize), 3);
31
+ logger.info(`[Bing] Searching "${keyword}" - ${pagesNeeded} page(s) for ${limit} images`);
31
32
 
32
33
  try {
33
- const urls = await this._fetchPage(keyword, 0);
34
- const result = urls.slice(0, limit);
34
+ let allUrls = [];
35
+
36
+ // 顺序获取多页(避免并发触发限制)
37
+ for (let page = 0; page < pagesNeeded; page++) {
38
+ const offset = page * pageSize;
39
+ const urls = await this._fetchPage(keyword, offset);
40
+ allUrls = allUrls.concat(urls);
41
+
42
+ // 如果已经够了就停止
43
+ if (allUrls.length >= limit) {
44
+ break;
45
+ }
46
+
47
+ // 页间延迟,避免触发速率限制
48
+ if (page < pagesNeeded - 1) {
49
+ await this._delay(200);
50
+ }
51
+ }
52
+
53
+ const result = allUrls.slice(0, limit);
35
54
  logger.info(`[Bing] Complete: ${result.length} URLs for "${keyword}"`);
36
55
  return result;
37
56
  } catch (error) {
@@ -24,13 +24,32 @@ export class GoogleScraper extends BaseScraper {
24
24
  */
25
25
  async search(keyword, limit = 10, options = {}) {
26
26
  this.options = options;
27
+ const pageSize = 20; // Google 每页约20张
27
28
 
28
- // 只获取第一页,避免触发速率限制
29
- logger.info(`[Google] Searching "${keyword}" - single page mode`);
29
+ // 计算需要获取的页数(最多3页,避免触发速率限制)
30
+ const pagesNeeded = Math.min(Math.ceil(limit / pageSize), 3);
31
+ logger.info(`[Google] Searching "${keyword}" - ${pagesNeeded} page(s) for ${limit} images`);
30
32
 
31
33
  try {
32
- const urls = await this._fetchPage(keyword, 0);
33
- const result = urls.slice(0, limit);
34
+ let allUrls = [];
35
+
36
+ // 顺序获取多页
37
+ for (let page = 0; page < pagesNeeded; page++) {
38
+ const start = page * pageSize;
39
+ const urls = await this._fetchPage(keyword, start);
40
+ allUrls = allUrls.concat(urls);
41
+
42
+ if (allUrls.length >= limit) {
43
+ break;
44
+ }
45
+
46
+ // 页间延迟
47
+ if (page < pagesNeeded - 1) {
48
+ await this._delay(300);
49
+ }
50
+ }
51
+
52
+ const result = allUrls.slice(0, limit);
34
53
  logger.info(`[Google] Complete: ${result.length} URLs for "${keyword}"`);
35
54
  return result;
36
55
  } catch (error) {
@@ -38,6 +57,10 @@ export class GoogleScraper extends BaseScraper {
38
57
  return [];
39
58
  }
40
59
  }
60
+
61
+ _delay(ms) {
62
+ return new Promise(resolve => setTimeout(resolve, ms));
63
+ }
41
64
 
42
65
  /**
43
66
  * 获取单页结果