smart-image-scraper-mcp 2.4.2 → 2.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smart-image-scraper-mcp",
3
- "version": "2.4.2",
3
+ "version": "2.4.3",
4
4
  "description": "全网智能图片抓取 MCP 服务器 - 支持 Bing/Google 图片搜索、验证和下载",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -10,20 +10,21 @@ import config from '../config/index.js';
10
10
  import logger from './logger.js';
11
11
 
12
12
  // HTTP 连接池配置 - 复用 TCP 连接,大幅提升性能
13
+ // 注意:不设置 timeout,让 axios 控制超时,避免连接被过早关闭
13
14
  const httpAgent = new http.Agent({
14
15
  keepAlive: true, // 启用 Keep-Alive
15
- maxSockets: 50, // 最大并发连接数
16
- maxFreeSockets: 10, // 最大空闲连接数
17
- timeout: 30000, // 连接超时 30 秒
18
- scheduling: 'fifo', // 先进先出调度
16
+ keepAliveMsecs: 1000, // Keep-Alive 探测间隔
17
+ maxSockets: 100, // 增加最大并发连接数
18
+ maxFreeSockets: 20, // 增加最大空闲连接数
19
+ scheduling: 'lifo', // 后进先出,优先使用最近的连接
19
20
  });
20
21
 
21
22
  const httpsAgent = new https.Agent({
22
23
  keepAlive: true,
23
- maxSockets: 50,
24
- maxFreeSockets: 10,
25
- timeout: 30000,
26
- scheduling: 'fifo',
24
+ keepAliveMsecs: 1000,
25
+ maxSockets: 100,
26
+ maxFreeSockets: 20,
27
+ scheduling: 'lifo',
27
28
  rejectUnauthorized: false, // 允许自签名证书
28
29
  });
29
30
 
@@ -63,16 +63,26 @@ export class BingScraper extends BaseScraper {
63
63
  async _fetchPage(keyword, offset) {
64
64
  const searchUrl = this._buildSearchUrl(keyword, offset);
65
65
 
66
- const response = await withRetry(
67
- () => httpClient.get(searchUrl, { timeout: 10000 }),
68
- { maxRetries: 1, retryCondition: isRetryableError }
69
- );
70
-
71
- if (response.status !== 200) {
66
+ try {
67
+ const response = await withRetry(
68
+ () => httpClient.get(searchUrl, {
69
+ timeout: 15000, // 增加超时
70
+ // 每个请求使用独立的 AbortController
71
+ signal: AbortSignal.timeout(15000),
72
+ }),
73
+ { maxRetries: 2, retryCondition: isRetryableError }
74
+ );
75
+
76
+ if (response.status !== 200) {
77
+ logger.warn(`[Bing] Page ${offset} failed: status ${response.status}`);
78
+ return [];
79
+ }
80
+
81
+ return this._parseResponse(response.data);
82
+ } catch (error) {
83
+ logger.warn(`[Bing] Page ${offset} error: ${error.message}`);
72
84
  return [];
73
85
  }
74
-
75
- return this._parseResponse(response.data);
76
86
  }
77
87
 
78
88
  /**
@@ -63,24 +63,31 @@ export class GoogleScraper extends BaseScraper {
63
63
  async _fetchPage(keyword, start) {
64
64
  const searchUrl = this._buildSearchUrl(keyword, start);
65
65
 
66
- const response = await withRetry(
67
- () => httpClient.get(searchUrl, {
68
- timeout: 10000,
69
- headers: {
70
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
71
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
72
- 'Accept-Language': 'en-US,en;q=0.5',
73
- 'Referer': 'https://www.google.com/',
74
- },
75
- }),
76
- { maxRetries: 1, retryCondition: isRetryableError }
77
- );
78
-
79
- if (response.status !== 200) {
66
+ try {
67
+ const response = await withRetry(
68
+ () => httpClient.get(searchUrl, {
69
+ timeout: 15000,
70
+ signal: AbortSignal.timeout(15000),
71
+ headers: {
72
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
73
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
74
+ 'Accept-Language': 'en-US,en;q=0.5',
75
+ 'Referer': 'https://www.google.com/',
76
+ },
77
+ }),
78
+ { maxRetries: 2, retryCondition: isRetryableError }
79
+ );
80
+
81
+ if (response.status !== 200) {
82
+ logger.warn(`[Google] Page ${start} failed: status ${response.status}`);
83
+ return [];
84
+ }
85
+
86
+ return this._parseResponse(response.data);
87
+ } catch (error) {
88
+ logger.warn(`[Google] Page ${start} error: ${error.message}`);
80
89
  return [];
81
90
  }
82
-
83
- return this._parseResponse(response.data);
84
91
  }
85
92
 
86
93
  /**