smart-image-scraper-mcp 2.4.2 → 2.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -10,20 +10,21 @@ import config from '../config/index.js';
|
|
|
10
10
|
import logger from './logger.js';
|
|
11
11
|
|
|
12
12
|
// HTTP 连接池配置 - 复用 TCP 连接,大幅提升性能
|
|
13
|
+
// 注意:不设置 timeout,让 axios 控制超时,避免连接被过早关闭
|
|
13
14
|
const httpAgent = new http.Agent({
|
|
14
15
|
keepAlive: true, // 启用 Keep-Alive
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
scheduling: '
|
|
16
|
+
keepAliveMsecs: 1000, // Keep-Alive 探测间隔
|
|
17
|
+
maxSockets: 100, // 增加最大并发连接数
|
|
18
|
+
maxFreeSockets: 20, // 增加最大空闲连接数
|
|
19
|
+
scheduling: 'lifo', // 后进先出,优先使用最近的连接
|
|
19
20
|
});
|
|
20
21
|
|
|
21
22
|
const httpsAgent = new https.Agent({
|
|
22
23
|
keepAlive: true,
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
scheduling: '
|
|
24
|
+
keepAliveMsecs: 1000,
|
|
25
|
+
maxSockets: 100,
|
|
26
|
+
maxFreeSockets: 20,
|
|
27
|
+
scheduling: 'lifo',
|
|
27
28
|
rejectUnauthorized: false, // 允许自签名证书
|
|
28
29
|
});
|
|
29
30
|
|
|
@@ -63,16 +63,26 @@ export class BingScraper extends BaseScraper {
|
|
|
63
63
|
async _fetchPage(keyword, offset) {
|
|
64
64
|
const searchUrl = this._buildSearchUrl(keyword, offset);
|
|
65
65
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
66
|
+
try {
|
|
67
|
+
const response = await withRetry(
|
|
68
|
+
() => httpClient.get(searchUrl, {
|
|
69
|
+
timeout: 15000, // 增加超时
|
|
70
|
+
// 每个请求使用独立的 AbortController
|
|
71
|
+
signal: AbortSignal.timeout(15000),
|
|
72
|
+
}),
|
|
73
|
+
{ maxRetries: 2, retryCondition: isRetryableError }
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
if (response.status !== 200) {
|
|
77
|
+
logger.warn(`[Bing] Page ${offset} failed: status ${response.status}`);
|
|
78
|
+
return [];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return this._parseResponse(response.data);
|
|
82
|
+
} catch (error) {
|
|
83
|
+
logger.warn(`[Bing] Page ${offset} error: ${error.message}`);
|
|
72
84
|
return [];
|
|
73
85
|
}
|
|
74
|
-
|
|
75
|
-
return this._parseResponse(response.data);
|
|
76
86
|
}
|
|
77
87
|
|
|
78
88
|
/**
|
|
@@ -63,24 +63,31 @@ export class GoogleScraper extends BaseScraper {
|
|
|
63
63
|
async _fetchPage(keyword, start) {
|
|
64
64
|
const searchUrl = this._buildSearchUrl(keyword, start);
|
|
65
65
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
66
|
+
try {
|
|
67
|
+
const response = await withRetry(
|
|
68
|
+
() => httpClient.get(searchUrl, {
|
|
69
|
+
timeout: 15000,
|
|
70
|
+
signal: AbortSignal.timeout(15000),
|
|
71
|
+
headers: {
|
|
72
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
73
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
74
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
75
|
+
'Referer': 'https://www.google.com/',
|
|
76
|
+
},
|
|
77
|
+
}),
|
|
78
|
+
{ maxRetries: 2, retryCondition: isRetryableError }
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
if (response.status !== 200) {
|
|
82
|
+
logger.warn(`[Google] Page ${start} failed: status ${response.status}`);
|
|
83
|
+
return [];
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return this._parseResponse(response.data);
|
|
87
|
+
} catch (error) {
|
|
88
|
+
logger.warn(`[Google] Page ${start} error: ${error.message}`);
|
|
80
89
|
return [];
|
|
81
90
|
}
|
|
82
|
-
|
|
83
|
-
return this._parseResponse(response.data);
|
|
84
91
|
}
|
|
85
92
|
|
|
86
93
|
/**
|