smart-image-scraper-mcp 2.8.1 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -26,12 +26,31 @@ export class BingScraper extends BaseScraper {
|
|
|
26
26
|
this.options = options;
|
|
27
27
|
const pageSize = 35;
|
|
28
28
|
|
|
29
|
-
//
|
|
30
|
-
|
|
29
|
+
// 计算需要获取的页数(最多3页,避免触发速率限制)
|
|
30
|
+
const pagesNeeded = Math.min(Math.ceil(limit / pageSize), 3);
|
|
31
|
+
logger.info(`[Bing] Searching "${keyword}" - ${pagesNeeded} page(s) for ${limit} images`);
|
|
31
32
|
|
|
32
33
|
try {
|
|
33
|
-
|
|
34
|
-
|
|
34
|
+
let allUrls = [];
|
|
35
|
+
|
|
36
|
+
// 顺序获取多页(避免并发触发限制)
|
|
37
|
+
for (let page = 0; page < pagesNeeded; page++) {
|
|
38
|
+
const offset = page * pageSize;
|
|
39
|
+
const urls = await this._fetchPage(keyword, offset);
|
|
40
|
+
allUrls = allUrls.concat(urls);
|
|
41
|
+
|
|
42
|
+
// 如果已经够了就停止
|
|
43
|
+
if (allUrls.length >= limit) {
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// 页间延迟,避免触发速率限制
|
|
48
|
+
if (page < pagesNeeded - 1) {
|
|
49
|
+
await this._delay(200);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const result = allUrls.slice(0, limit);
|
|
35
54
|
logger.info(`[Bing] Complete: ${result.length} URLs for "${keyword}"`);
|
|
36
55
|
return result;
|
|
37
56
|
} catch (error) {
|
|
@@ -24,13 +24,32 @@ export class GoogleScraper extends BaseScraper {
|
|
|
24
24
|
*/
|
|
25
25
|
async search(keyword, limit = 10, options = {}) {
|
|
26
26
|
this.options = options;
|
|
27
|
+
const pageSize = 20; // Google 每页约20张
|
|
27
28
|
|
|
28
|
-
//
|
|
29
|
-
|
|
29
|
+
// 计算需要获取的页数(最多3页,避免触发速率限制)
|
|
30
|
+
const pagesNeeded = Math.min(Math.ceil(limit / pageSize), 3);
|
|
31
|
+
logger.info(`[Google] Searching "${keyword}" - ${pagesNeeded} page(s) for ${limit} images`);
|
|
30
32
|
|
|
31
33
|
try {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
+
let allUrls = [];
|
|
35
|
+
|
|
36
|
+
// 顺序获取多页
|
|
37
|
+
for (let page = 0; page < pagesNeeded; page++) {
|
|
38
|
+
const start = page * pageSize;
|
|
39
|
+
const urls = await this._fetchPage(keyword, start);
|
|
40
|
+
allUrls = allUrls.concat(urls);
|
|
41
|
+
|
|
42
|
+
if (allUrls.length >= limit) {
|
|
43
|
+
break;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// 页间延迟
|
|
47
|
+
if (page < pagesNeeded - 1) {
|
|
48
|
+
await this._delay(300);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const result = allUrls.slice(0, limit);
|
|
34
53
|
logger.info(`[Google] Complete: ${result.length} URLs for "${keyword}"`);
|
|
35
54
|
return result;
|
|
36
55
|
} catch (error) {
|
|
@@ -38,6 +57,10 @@ export class GoogleScraper extends BaseScraper {
|
|
|
38
57
|
return [];
|
|
39
58
|
}
|
|
40
59
|
}
|
|
60
|
+
|
|
61
|
+
_delay(ms) {
|
|
62
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
63
|
+
}
|
|
41
64
|
|
|
42
65
|
/**
|
|
43
66
|
* 获取单页结果
|