smart-image-scraper-mcp 2.8.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/package.json +1 -1
- package/src/index.js +4 -0
- package/src/providers/bingScraper.js +23 -4
- package/src/providers/googleScraper.js +27 -4
package/README.md
CHANGED
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -65,6 +65,10 @@ const SMART_SCRAPER_TOOL = {
|
|
|
65
65
|
- 正方形: square_1080(1080x1080), square_512(512x512)
|
|
66
66
|
- 社交媒体: instagram(1080x1080), twitter(1200x675), facebook(1200x630)
|
|
67
67
|
|
|
68
|
+
【⚠️ 重要限制】
|
|
69
|
+
- 同一会话中连续调用此工具不要超过5次!超过后可能无响应
|
|
70
|
+
- 当用户需要多种类型图片时,必须使用批量查询(逗号分隔),而不是多次调用
|
|
71
|
+
|
|
68
72
|
【调用示例】
|
|
69
73
|
1. 搜索5张猫的图片: {"query":"可爱的猫","mode":"link","count":5}
|
|
70
74
|
2. 下载10张高清风景图: {"query":"风景","mode":"download","count":10,"size":"large"}
|
|
@@ -26,12 +26,31 @@ export class BingScraper extends BaseScraper {
|
|
|
26
26
|
this.options = options;
|
|
27
27
|
const pageSize = 35;
|
|
28
28
|
|
|
29
|
-
//
|
|
30
|
-
|
|
29
|
+
// 计算需要获取的页数(最多3页,避免触发速率限制)
|
|
30
|
+
const pagesNeeded = Math.min(Math.ceil(limit / pageSize), 3);
|
|
31
|
+
logger.info(`[Bing] Searching "${keyword}" - ${pagesNeeded} page(s) for ${limit} images`);
|
|
31
32
|
|
|
32
33
|
try {
|
|
33
|
-
|
|
34
|
-
|
|
34
|
+
let allUrls = [];
|
|
35
|
+
|
|
36
|
+
// 顺序获取多页(避免并发触发限制)
|
|
37
|
+
for (let page = 0; page < pagesNeeded; page++) {
|
|
38
|
+
const offset = page * pageSize;
|
|
39
|
+
const urls = await this._fetchPage(keyword, offset);
|
|
40
|
+
allUrls = allUrls.concat(urls);
|
|
41
|
+
|
|
42
|
+
// 如果已经够了就停止
|
|
43
|
+
if (allUrls.length >= limit) {
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// 页间延迟,避免触发速率限制
|
|
48
|
+
if (page < pagesNeeded - 1) {
|
|
49
|
+
await this._delay(200);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const result = allUrls.slice(0, limit);
|
|
35
54
|
logger.info(`[Bing] Complete: ${result.length} URLs for "${keyword}"`);
|
|
36
55
|
return result;
|
|
37
56
|
} catch (error) {
|
|
@@ -24,13 +24,32 @@ export class GoogleScraper extends BaseScraper {
|
|
|
24
24
|
*/
|
|
25
25
|
async search(keyword, limit = 10, options = {}) {
|
|
26
26
|
this.options = options;
|
|
27
|
+
const pageSize = 20; // Google 每页约20张
|
|
27
28
|
|
|
28
|
-
//
|
|
29
|
-
|
|
29
|
+
// 计算需要获取的页数(最多3页,避免触发速率限制)
|
|
30
|
+
const pagesNeeded = Math.min(Math.ceil(limit / pageSize), 3);
|
|
31
|
+
logger.info(`[Google] Searching "${keyword}" - ${pagesNeeded} page(s) for ${limit} images`);
|
|
30
32
|
|
|
31
33
|
try {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
+
let allUrls = [];
|
|
35
|
+
|
|
36
|
+
// 顺序获取多页
|
|
37
|
+
for (let page = 0; page < pagesNeeded; page++) {
|
|
38
|
+
const start = page * pageSize;
|
|
39
|
+
const urls = await this._fetchPage(keyword, start);
|
|
40
|
+
allUrls = allUrls.concat(urls);
|
|
41
|
+
|
|
42
|
+
if (allUrls.length >= limit) {
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// 页间延迟
|
|
47
|
+
if (page < pagesNeeded - 1) {
|
|
48
|
+
await this._delay(300);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const result = allUrls.slice(0, limit);
|
|
34
53
|
logger.info(`[Google] Complete: ${result.length} URLs for "${keyword}"`);
|
|
35
54
|
return result;
|
|
36
55
|
} catch (error) {
|
|
@@ -38,6 +57,10 @@ export class GoogleScraper extends BaseScraper {
|
|
|
38
57
|
return [];
|
|
39
58
|
}
|
|
40
59
|
}
|
|
60
|
+
|
|
61
|
+
_delay(ms) {
|
|
62
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
63
|
+
}
|
|
41
64
|
|
|
42
65
|
/**
|
|
43
66
|
* 获取单页结果
|