smart-image-scraper-mcp 2.5.0 → 2.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +16 -1
- package/src/providers/bingScraper.js +6 -23
- package/src/providers/googleScraper.js +6 -24
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -26,8 +26,12 @@ import logger from './infrastructure/logger.js';
|
|
|
26
26
|
import { gracefulShutdown } from './infrastructure/gracefulShutdown.js';
|
|
27
27
|
import { healthChecker } from './infrastructure/healthCheck.js';
|
|
28
28
|
import { metrics } from './infrastructure/metrics.js';
|
|
29
|
+
import { searchCache, validationCache } from './infrastructure/cache.js';
|
|
29
30
|
import config from './config/index.js';
|
|
30
31
|
|
|
32
|
+
// 请求计数器,用于定期清理
|
|
33
|
+
let requestCount = 0;
|
|
34
|
+
|
|
31
35
|
// 从 package.json 读取版本号
|
|
32
36
|
const require = createRequire(import.meta.url);
|
|
33
37
|
const packageJson = require('../package.json');
|
|
@@ -269,7 +273,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
269
273
|
const totalTime = Date.now() - requestStartTime;
|
|
270
274
|
logger.info(`[MCP] 请求完成: ${totalTime}ms, requestId=${result.requestId}`);
|
|
271
275
|
|
|
272
|
-
|
|
276
|
+
// 每3个请求清理一次缓存,避免内存累积
|
|
277
|
+
requestCount++;
|
|
278
|
+
if (requestCount >= 3) {
|
|
279
|
+
searchCache.clear();
|
|
280
|
+
validationCache.clear();
|
|
281
|
+
requestCount = 0;
|
|
282
|
+
logger.info('[MCP] 缓存已清理');
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
const response = {
|
|
273
286
|
content: [
|
|
274
287
|
{
|
|
275
288
|
type: 'text',
|
|
@@ -277,6 +290,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
277
290
|
},
|
|
278
291
|
],
|
|
279
292
|
};
|
|
293
|
+
|
|
294
|
+
return response;
|
|
280
295
|
} catch (innerError) {
|
|
281
296
|
// 记录失败指标
|
|
282
297
|
metrics.recordSearch(source, false, Date.now() - startTime);
|
|
@@ -26,31 +26,14 @@ export class BingScraper extends BaseScraper {
|
|
|
26
26
|
this.options = options;
|
|
27
27
|
const pageSize = 35;
|
|
28
28
|
|
|
29
|
-
//
|
|
30
|
-
|
|
31
|
-
const offsets = Array.from({ length: pagesNeeded }, (_, i) => i * pageSize);
|
|
32
|
-
|
|
33
|
-
logger.info(`[Bing] Searching "${keyword}" - ${pagesNeeded} pages in parallel`);
|
|
29
|
+
// 只获取第一页,避免触发速率限制
|
|
30
|
+
logger.info(`[Bing] Searching "${keyword}" - single page mode`);
|
|
34
31
|
|
|
35
32
|
try {
|
|
36
|
-
|
|
37
|
-
const
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
// 合并结果
|
|
41
|
-
const urls = new Set();
|
|
42
|
-
for (const result of results) {
|
|
43
|
-
if (result.status === 'fulfilled') {
|
|
44
|
-
result.value.forEach(url => {
|
|
45
|
-
if (urls.size < limit) {
|
|
46
|
-
urls.add(url);
|
|
47
|
-
}
|
|
48
|
-
});
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
logger.info(`[Bing] Complete: ${urls.size} URLs for "${keyword}"`);
|
|
53
|
-
return Array.from(urls);
|
|
33
|
+
const urls = await this._fetchPage(keyword, 0);
|
|
34
|
+
const result = urls.slice(0, limit);
|
|
35
|
+
logger.info(`[Bing] Complete: ${result.length} URLs for "${keyword}"`);
|
|
36
|
+
return result;
|
|
54
37
|
} catch (error) {
|
|
55
38
|
logger.error(`Bing search error for "${keyword}"`, { message: error.message });
|
|
56
39
|
return [];
|
|
@@ -24,33 +24,15 @@ export class GoogleScraper extends BaseScraper {
|
|
|
24
24
|
*/
|
|
25
25
|
async search(keyword, limit = 10, options = {}) {
|
|
26
26
|
this.options = options;
|
|
27
|
-
const pageSize = 20;
|
|
28
27
|
|
|
29
|
-
//
|
|
30
|
-
|
|
31
|
-
const starts = Array.from({ length: pagesNeeded }, (_, i) => i * pageSize);
|
|
32
|
-
|
|
33
|
-
logger.info(`[Google] Searching "${keyword}" - ${pagesNeeded} pages in parallel`);
|
|
28
|
+
// 只获取第一页,避免触发速率限制
|
|
29
|
+
logger.info(`[Google] Searching "${keyword}" - single page mode`);
|
|
34
30
|
|
|
35
31
|
try {
|
|
36
|
-
|
|
37
|
-
const
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
// 合并结果
|
|
41
|
-
const urls = new Set();
|
|
42
|
-
for (const result of results) {
|
|
43
|
-
if (result.status === 'fulfilled') {
|
|
44
|
-
result.value.forEach(url => {
|
|
45
|
-
if (urls.size < limit) {
|
|
46
|
-
urls.add(url);
|
|
47
|
-
}
|
|
48
|
-
});
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
logger.info(`[Google] Complete: ${urls.size} URLs for "${keyword}"`);
|
|
53
|
-
return Array.from(urls);
|
|
32
|
+
const urls = await this._fetchPage(keyword, 0);
|
|
33
|
+
const result = urls.slice(0, limit);
|
|
34
|
+
logger.info(`[Google] Complete: ${result.length} URLs for "${keyword}"`);
|
|
35
|
+
return result;
|
|
54
36
|
} catch (error) {
|
|
55
37
|
logger.error(`Google search error for "${keyword}"`, { message: error.message });
|
|
56
38
|
return [];
|