crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio';
|
|
2
|
+
|
|
3
|
+
export class DuckDuckGoSearchAdapter {
|
|
4
|
+
constructor(options = {}) {
|
|
5
|
+
this.timeout = options.timeout || 30000;
|
|
6
|
+
this.maxRetries = options.maxRetries || 3;
|
|
7
|
+
this.userAgent = options.userAgent || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36';
|
|
8
|
+
this.retryDelay = options.retryDelay || 1000;
|
|
9
|
+
this.baseUrl = 'https://html.duckduckgo.com/html/';
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
async search(params) {
|
|
13
|
+
const {
|
|
14
|
+
query,
|
|
15
|
+
num = 10,
|
|
16
|
+
start = 1,
|
|
17
|
+
lr,
|
|
18
|
+
safe = 'moderate',
|
|
19
|
+
dateRestrict
|
|
20
|
+
} = params;
|
|
21
|
+
|
|
22
|
+
// Calculate pagination offset for DuckDuckGo
|
|
23
|
+
const offset = (start - 1) * num;
|
|
24
|
+
|
|
25
|
+
// Build form data for POST request to DuckDuckGo HTML endpoint
|
|
26
|
+
const formData = new URLSearchParams({
|
|
27
|
+
q: query,
|
|
28
|
+
b: offset.toString(), // DuckDuckGo uses 'b' for pagination offset
|
|
29
|
+
kl: 'us-en', // Default language
|
|
30
|
+
df: '', // Date filter
|
|
31
|
+
safe: 'moderate' // Safe search setting
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
// Update safe search parameter
|
|
35
|
+
if (safe === 'active') {
|
|
36
|
+
formData.set('safe', 'strict');
|
|
37
|
+
} else if (safe === 'off') {
|
|
38
|
+
formData.set('safe', 'off');
|
|
39
|
+
} else {
|
|
40
|
+
formData.set('safe', 'moderate');
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Add language if specified
|
|
44
|
+
if (lr && lr.startsWith('lang_')) {
|
|
45
|
+
const lang = lr.replace('lang_', '');
|
|
46
|
+
formData.set('kl', this.mapLanguageCode(lang));
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Add date filter if specified
|
|
50
|
+
if (dateRestrict) {
|
|
51
|
+
const timeFilter = this.mapDateRestrict(dateRestrict);
|
|
52
|
+
if (timeFilter) {
|
|
53
|
+
formData.set('df', timeFilter);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
let lastError;
|
|
58
|
+
for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
|
|
59
|
+
try {
|
|
60
|
+
const htmlResponse = await this.makeRequest(formData);
|
|
61
|
+
return this.parseHtmlResponse(htmlResponse, query, num, start);
|
|
62
|
+
} catch (error) {
|
|
63
|
+
lastError = error;
|
|
64
|
+
if (attempt < this.maxRetries) {
|
|
65
|
+
// Exponential backoff
|
|
66
|
+
await new Promise(resolve =>
|
|
67
|
+
setTimeout(resolve, this.retryDelay * Math.pow(2, attempt - 1))
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
throw new Error(`DuckDuckGo search failed after ${this.maxRetries} attempts: ${lastError.message}`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async makeRequest(formData) {
|
|
77
|
+
const controller = new AbortController();
|
|
78
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
const response = await fetch(this.baseUrl, {
|
|
82
|
+
method: 'POST',
|
|
83
|
+
headers: {
|
|
84
|
+
'User-Agent': this.userAgent,
|
|
85
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
86
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
87
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
88
|
+
'Content-Type': 'application/x-www-form-urlencoded',
|
|
89
|
+
'Origin': 'https://duckduckgo.com',
|
|
90
|
+
'Referer': 'https://duckduckgo.com/',
|
|
91
|
+
'Upgrade-Insecure-Requests': '1',
|
|
92
|
+
'Sec-Fetch-Dest': 'document',
|
|
93
|
+
'Sec-Fetch-Mode': 'navigate',
|
|
94
|
+
'Sec-Fetch-Site': 'same-site'
|
|
95
|
+
},
|
|
96
|
+
body: formData.toString(),
|
|
97
|
+
signal: controller.signal
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
clearTimeout(timeoutId);
|
|
101
|
+
|
|
102
|
+
if (!response.ok) {
|
|
103
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const html = await response.text();
|
|
107
|
+
return html;
|
|
108
|
+
} catch (error) {
|
|
109
|
+
clearTimeout(timeoutId);
|
|
110
|
+
|
|
111
|
+
if (error.name === 'AbortError') {
|
|
112
|
+
throw new Error(`Request timeout after ${this.timeout}ms`);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
throw error;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
parseHtmlResponse(html, query, num, start) {
|
|
120
|
+
try {
|
|
121
|
+
const $ = cheerio.load(html);
|
|
122
|
+
const items = [];
|
|
123
|
+
|
|
124
|
+
// Look for search result containers - DuckDuckGo uses various selectors
|
|
125
|
+
const resultSelectors = [
|
|
126
|
+
'.result', // Primary result class
|
|
127
|
+
'.results_links', // Alternative result class
|
|
128
|
+
'.web-result', // Another possible class
|
|
129
|
+
'.result__body' // Result body container
|
|
130
|
+
];
|
|
131
|
+
|
|
132
|
+
let results = $();
|
|
133
|
+
for (const selector of resultSelectors) {
|
|
134
|
+
results = $(selector);
|
|
135
|
+
if (results.length > 0) break;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// If no results found with standard selectors, try more generic approach
|
|
139
|
+
if (results.length === 0) {
|
|
140
|
+
results = $('div[data-domain]'); // DuckDuckGo sometimes uses data-domain attribute
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
results.each((index, element) => {
|
|
144
|
+
if (items.length >= num) return false; // Stop if we have enough results
|
|
145
|
+
|
|
146
|
+
const $result = $(element);
|
|
147
|
+
|
|
148
|
+
// Extract title - try multiple selectors
|
|
149
|
+
let title = '';
|
|
150
|
+
const titleSelectors = [
|
|
151
|
+
'a.result__a',
|
|
152
|
+
'.result__title a',
|
|
153
|
+
'h2 a',
|
|
154
|
+
'.result-title a',
|
|
155
|
+
'a[href^="http"]'
|
|
156
|
+
];
|
|
157
|
+
|
|
158
|
+
for (const selector of titleSelectors) {
|
|
159
|
+
const titleElement = $result.find(selector).first();
|
|
160
|
+
if (titleElement.length > 0) {
|
|
161
|
+
title = titleElement.text().trim();
|
|
162
|
+
break;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Extract URL - try multiple selectors
|
|
167
|
+
let url = '';
|
|
168
|
+
const urlSelectors = [
|
|
169
|
+
'a.result__a',
|
|
170
|
+
'.result__title a',
|
|
171
|
+
'h2 a',
|
|
172
|
+
'.result-title a',
|
|
173
|
+
'a[href^="http"]'
|
|
174
|
+
];
|
|
175
|
+
|
|
176
|
+
for (const selector of urlSelectors) {
|
|
177
|
+
const urlElement = $result.find(selector).first();
|
|
178
|
+
if (urlElement.length > 0) {
|
|
179
|
+
url = urlElement.attr('href') || '';
|
|
180
|
+
break;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// Extract snippet - try multiple selectors
|
|
185
|
+
let snippet = '';
|
|
186
|
+
const snippetSelectors = [
|
|
187
|
+
'a.result__snippet',
|
|
188
|
+
'.result__snippet',
|
|
189
|
+
'.result-snippet',
|
|
190
|
+
'.snippet',
|
|
191
|
+
'.result__body',
|
|
192
|
+
'span.result__snippet'
|
|
193
|
+
];
|
|
194
|
+
|
|
195
|
+
for (const selector of snippetSelectors) {
|
|
196
|
+
const snippetElement = $result.find(selector).first();
|
|
197
|
+
if (snippetElement.length > 0) {
|
|
198
|
+
snippet = snippetElement.text().trim();
|
|
199
|
+
break;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// If no snippet found, try to get any text content
|
|
204
|
+
if (!snippet) {
|
|
205
|
+
const allText = $result.text().trim();
|
|
206
|
+
// Remove title from text to get snippet
|
|
207
|
+
snippet = allText.replace(title, '').trim().substring(0, 300);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Clean and validate the extracted data
|
|
211
|
+
if (title && url && this.isValidUrl(url)) {
|
|
212
|
+
items.push({
|
|
213
|
+
title: this.cleanText(title),
|
|
214
|
+
link: url,
|
|
215
|
+
snippet: this.cleanText(snippet),
|
|
216
|
+
displayLink: this.extractDomain(url),
|
|
217
|
+
formattedUrl: url,
|
|
218
|
+
htmlSnippet: this.cleanText(snippet),
|
|
219
|
+
pagemap: {
|
|
220
|
+
metatags: {
|
|
221
|
+
title: this.cleanText(title),
|
|
222
|
+
description: this.cleanText(snippet)
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
metadata: {
|
|
226
|
+
source: 'duckduckgo_html',
|
|
227
|
+
type: 'web_result'
|
|
228
|
+
}
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
// If no results found, provide helpful feedback
|
|
234
|
+
if (items.length === 0) {
|
|
235
|
+
// Check if there's a "no results" message
|
|
236
|
+
const noResultsIndicators = [
|
|
237
|
+
'No results found',
|
|
238
|
+
'no web results',
|
|
239
|
+
'Try searching for'
|
|
240
|
+
];
|
|
241
|
+
|
|
242
|
+
let hasNoResults = false;
|
|
243
|
+
for (const indicator of noResultsIndicators) {
|
|
244
|
+
if (html.toLowerCase().includes(indicator.toLowerCase())) {
|
|
245
|
+
hasNoResults = true;
|
|
246
|
+
break;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if (hasNoResults) {
|
|
251
|
+
throw new Error(`No search results found for query: "${query}"`);
|
|
252
|
+
} else {
|
|
253
|
+
throw new Error('Could not parse search results from DuckDuckGo response');
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return {
|
|
258
|
+
kind: 'duckduckgo#search',
|
|
259
|
+
searchInformation: {
|
|
260
|
+
searchTime: 0.1,
|
|
261
|
+
formattedSearchTime: '0.10',
|
|
262
|
+
totalResults: items.length.toString(),
|
|
263
|
+
formattedTotalResults: items.length.toLocaleString()
|
|
264
|
+
},
|
|
265
|
+
items: items
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
} catch (error) {
|
|
269
|
+
if (error.message.includes('No search results found') || error.message.includes('Could not parse')) {
|
|
270
|
+
throw error;
|
|
271
|
+
}
|
|
272
|
+
throw new Error(`Failed to parse DuckDuckGo HTML response: ${error.message}`);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
isValidUrl(url) {
|
|
277
|
+
if (!url) return false;
|
|
278
|
+
try {
|
|
279
|
+
const urlObj = new URL(url);
|
|
280
|
+
return urlObj.protocol === 'http:' || urlObj.protocol === 'https:';
|
|
281
|
+
} catch {
|
|
282
|
+
return false;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
cleanText(text) {
|
|
287
|
+
if (!text) return '';
|
|
288
|
+
// Remove HTML tags, normalize whitespace, and trim
|
|
289
|
+
return text
|
|
290
|
+
.replace(/<[^>]*>/g, '')
|
|
291
|
+
.replace(/\s+/g, ' ')
|
|
292
|
+
.replace(/ /g, ' ')
|
|
293
|
+
.replace(/&/g, '&')
|
|
294
|
+
.replace(/</g, '<')
|
|
295
|
+
.replace(/>/g, '>')
|
|
296
|
+
.replace(/"/g, '"')
|
|
297
|
+
.replace(/'/g, "'")
|
|
298
|
+
.trim();
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
extractDomain(url) {
|
|
302
|
+
if (!url) return '';
|
|
303
|
+
try {
|
|
304
|
+
return new URL(url).hostname;
|
|
305
|
+
} catch {
|
|
306
|
+
return '';
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
mapLanguageCode(code) {
|
|
311
|
+
// Map common language codes to DuckDuckGo's format
|
|
312
|
+
const languageMap = {
|
|
313
|
+
'en': 'us-en',
|
|
314
|
+
'es': 'es-es',
|
|
315
|
+
'fr': 'fr-fr',
|
|
316
|
+
'de': 'de-de',
|
|
317
|
+
'it': 'it-it',
|
|
318
|
+
'pt': 'pt-br',
|
|
319
|
+
'ru': 'ru-ru',
|
|
320
|
+
'ja': 'jp-jp',
|
|
321
|
+
'ko': 'kr-kr',
|
|
322
|
+
'zh': 'cn-zh'
|
|
323
|
+
};
|
|
324
|
+
return languageMap[code] || 'us-en';
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
mapDateRestrict(dateRestrict) {
|
|
328
|
+
// Map Google's dateRestrict format to DuckDuckGo's time filters
|
|
329
|
+
const dateMap = {
|
|
330
|
+
'd1': 'd', // past day
|
|
331
|
+
'w1': 'w', // past week
|
|
332
|
+
'm1': 'm', // past month
|
|
333
|
+
'y1': 'y' // past year
|
|
334
|
+
};
|
|
335
|
+
return dateMap[dateRestrict] || null;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
async getSuggestions(query) {
|
|
339
|
+
try {
|
|
340
|
+
// DuckDuckGo's autocomplete endpoint
|
|
341
|
+
const url = `https://duckduckgo.com/ac/?q=${encodeURIComponent(query)}&type=list`;
|
|
342
|
+
|
|
343
|
+
const controller = new AbortController();
|
|
344
|
+
const timeoutId = setTimeout(() => controller.abort(), 5000); // Shorter timeout for suggestions
|
|
345
|
+
|
|
346
|
+
const response = await fetch(url, {
|
|
347
|
+
headers: {
|
|
348
|
+
'User-Agent': this.userAgent,
|
|
349
|
+
'Accept': 'application/json',
|
|
350
|
+
'Referer': 'https://duckduckgo.com/'
|
|
351
|
+
},
|
|
352
|
+
signal: controller.signal
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
clearTimeout(timeoutId);
|
|
356
|
+
|
|
357
|
+
if (!response.ok) {
|
|
358
|
+
return [];
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
const data = await response.json();
|
|
362
|
+
return Array.isArray(data) && data.length > 1 ? data[1] : [];
|
|
363
|
+
} catch (error) {
|
|
364
|
+
// Fail silently for suggestions
|
|
365
|
+
return [];
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
async getRelatedSearches(query) {
|
|
370
|
+
// DuckDuckGo doesn't provide a direct related searches API
|
|
371
|
+
// Return some common query variations
|
|
372
|
+
const words = query.split(' ').filter(w => w.length > 2);
|
|
373
|
+
const related = [];
|
|
374
|
+
|
|
375
|
+
if (words.length > 0) {
|
|
376
|
+
related.push(`${query} tutorial`);
|
|
377
|
+
related.push(`${query} guide`);
|
|
378
|
+
related.push(`${query} examples`);
|
|
379
|
+
related.push(`how to ${query}`);
|
|
380
|
+
related.push(`${query} best practices`);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
return related.slice(0, 5);
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
async validateApiKey() {
|
|
387
|
+
// DuckDuckGo doesn't require API keys, test HTML scraping functionality
|
|
388
|
+
try {
|
|
389
|
+
const result = await this.search({ query: 'test search', num: 1 });
|
|
390
|
+
return result && result.items && result.items.length >= 0; // Even 0 results is valid
|
|
391
|
+
} catch (error) {
|
|
392
|
+
console.warn('DuckDuckGo validation failed:', error.message);
|
|
393
|
+
return false;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
export default DuckDuckGoSearchAdapter;
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import { customsearch } from '@googleapis/customsearch';
|
|
2
|
+
import { RetryManager } from '../../../utils/RetryManager.js';
|
|
3
|
+
import { createCircuitBreaker } from '../../../utils/CircuitBreaker.js';
|
|
4
|
+
import { logger } from '../../../utils/Logger.js';
|
|
5
|
+
|
|
6
|
+
export class GoogleSearchAdapter {
|
|
7
|
+
constructor(apiKey, searchEngineId, options = {}) {
|
|
8
|
+
if (!apiKey || !searchEngineId) {
|
|
9
|
+
throw new Error('Google API key and Search Engine ID are required');
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
this.apiKey = apiKey;
|
|
13
|
+
this.searchEngineId = searchEngineId;
|
|
14
|
+
this.customsearch = customsearch('v1');
|
|
15
|
+
|
|
16
|
+
// Initialize error handling components
|
|
17
|
+
this.retryManager = options.retryManager || RetryManager.createPreset('api');
|
|
18
|
+
this.circuitBreaker = options.circuitBreaker || createCircuitBreaker('api');
|
|
19
|
+
this.logger = logger.child({ component: 'GoogleSearchAdapter' });
|
|
20
|
+
|
|
21
|
+
// Service identifier for circuit breaker
|
|
22
|
+
this.serviceId = 'google-search-api';
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
async search(params) {
|
|
26
|
+
const requestId = this.logger.startRequest({
|
|
27
|
+
operation: 'search',
|
|
28
|
+
query: params.query,
|
|
29
|
+
parameters: { ...params, query: '[REDACTED]' } // Don't log sensitive query data
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
try {
|
|
33
|
+
// Simplified execution without circuit breaker for now
|
|
34
|
+
const executeSearch = async () => {
|
|
35
|
+
this.logger.debug('Executing Google search API call', { params }, requestId);
|
|
36
|
+
|
|
37
|
+
const response = await this.customsearch.cse.list({
|
|
38
|
+
auth: this.apiKey,
|
|
39
|
+
cx: this.searchEngineId,
|
|
40
|
+
q: params.query,
|
|
41
|
+
num: params.num || 10,
|
|
42
|
+
start: params.start || 1,
|
|
43
|
+
lr: params.lr,
|
|
44
|
+
safe: params.safe,
|
|
45
|
+
dateRestrict: params.dateRestrict,
|
|
46
|
+
siteSearch: params.siteSearch,
|
|
47
|
+
siteSearchFilter: params.siteSearchFilter,
|
|
48
|
+
fileType: params.fileType,
|
|
49
|
+
rights: params.rights,
|
|
50
|
+
imgSize: params.imgSize,
|
|
51
|
+
imgType: params.imgType,
|
|
52
|
+
imgColorType: params.imgColorType,
|
|
53
|
+
imgDominantColor: params.imgDominantColor
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
this.logger.info('Google search API call successful', {
|
|
57
|
+
resultsCount: response.data?.items?.length || 0,
|
|
58
|
+
searchTime: response.data?.searchInformation?.searchTime
|
|
59
|
+
}, requestId);
|
|
60
|
+
|
|
61
|
+
return response.data;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
// Try to use retry manager if available, otherwise execute directly
|
|
65
|
+
let result;
|
|
66
|
+
try {
|
|
67
|
+
result = await this.retryManager.execute(executeSearch, { operation: 'search', query: params.query });
|
|
68
|
+
} catch (retryError) {
|
|
69
|
+
// If retry manager fails, try direct execution
|
|
70
|
+
this.logger.warn('Retry manager failed, executing directly', { error: retryError.message }, requestId);
|
|
71
|
+
result = await executeSearch();
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
this.logger.endRequest(requestId, {
|
|
75
|
+
success: true,
|
|
76
|
+
resultsCount: result?.items?.length || 0
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
return result;
|
|
80
|
+
} catch (error) {
|
|
81
|
+
this.logger.requestError(requestId, error, {
|
|
82
|
+
operation: 'search',
|
|
83
|
+
query: params.query
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
// Enhanced error handling with detailed logging
|
|
87
|
+
if (error.response) {
|
|
88
|
+
const status = error.response.status;
|
|
89
|
+
const message = error.response.data?.error?.message || error.message;
|
|
90
|
+
|
|
91
|
+
this.logger.warn('Google Search API error response', {
|
|
92
|
+
status,
|
|
93
|
+
message,
|
|
94
|
+
query: params.query
|
|
95
|
+
}, requestId);
|
|
96
|
+
|
|
97
|
+
if (status === 429) {
|
|
98
|
+
throw new Error('API rate limit exceeded. Please try again later.');
|
|
99
|
+
} else if (status === 403) {
|
|
100
|
+
throw new Error('API access forbidden. Check your API key and permissions.');
|
|
101
|
+
} else if (status === 400) {
|
|
102
|
+
throw new Error(`Invalid search parameters: ${message}`);
|
|
103
|
+
} else if (status >= 500) {
|
|
104
|
+
throw new Error(`Google Search API server error (${status}): ${message}`);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
throw new Error(`Google Search API error: ${error.message}`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
async getSuggestions(query) {
|
|
113
|
+
// Google doesn't provide suggestions through the Custom Search API
|
|
114
|
+
// This could be implemented with a separate API or service
|
|
115
|
+
return [];
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async getRelatedSearches(query) {
|
|
119
|
+
try {
|
|
120
|
+
// Perform a search and extract related searches from the response
|
|
121
|
+
const response = await this.search({
|
|
122
|
+
query,
|
|
123
|
+
num: 1
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
if (response.queries && response.queries.related) {
|
|
127
|
+
return response.queries.related.map(r => r.searchTerms);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return [];
|
|
131
|
+
} catch {
|
|
132
|
+
return [];
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async validateApiKey() {
|
|
137
|
+
const requestId = this.logger.startRequest({ operation: 'validateApiKey' });
|
|
138
|
+
|
|
139
|
+
try {
|
|
140
|
+
await this.search({
|
|
141
|
+
query: 'test',
|
|
142
|
+
num: 1
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
this.logger.endRequest(requestId, { success: true, valid: true });
|
|
146
|
+
return true;
|
|
147
|
+
} catch (error) {
|
|
148
|
+
this.logger.requestError(requestId, error, { operation: 'validateApiKey' });
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Get error handling statistics
|
|
155
|
+
* @returns {Object} Statistics from retry manager and circuit breaker
|
|
156
|
+
*/
|
|
157
|
+
getStats() {
|
|
158
|
+
return {
|
|
159
|
+
retryStats: this.retryManager.getStats(),
|
|
160
|
+
circuitBreakerStats: this.circuitBreaker.getStats(),
|
|
161
|
+
loggerStats: this.logger.getStats()
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Reset error handling statistics
|
|
167
|
+
*/
|
|
168
|
+
resetStats() {
|
|
169
|
+
this.retryManager.resetStats();
|
|
170
|
+
this.circuitBreaker.reset(this.serviceId);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Get health status of the service
|
|
175
|
+
* @returns {Object} Health status information
|
|
176
|
+
*/
|
|
177
|
+
getHealthStatus() {
|
|
178
|
+
const circuitStats = this.circuitBreaker.getServiceMetrics(this.serviceId);
|
|
179
|
+
const retryStats = this.retryManager.getStats();
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
status: circuitStats.state === 'CLOSED' ? 'healthy' : 'degraded',
|
|
183
|
+
circuitState: circuitStats.state,
|
|
184
|
+
errorRate: circuitStats.errorRate,
|
|
185
|
+
successRate: retryStats.successRate,
|
|
186
|
+
lastFailure: circuitStats.lastFailure,
|
|
187
|
+
nextAttempt: circuitStats.nextAttempt
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export class MockSearchAdapter {
|
|
193
|
+
// Mock adapter for testing without API keys
|
|
194
|
+
async search(params) {
|
|
195
|
+
return {
|
|
196
|
+
kind: 'customsearch#search',
|
|
197
|
+
searchInformation: {
|
|
198
|
+
searchTime: 0.123,
|
|
199
|
+
formattedSearchTime: '0.12',
|
|
200
|
+
totalResults: '1000',
|
|
201
|
+
formattedTotalResults: '1,000'
|
|
202
|
+
},
|
|
203
|
+
items: [
|
|
204
|
+
{
|
|
205
|
+
title: `Mock result for: ${params.query}`,
|
|
206
|
+
link: `https://example.com/mock/${params.query.replace(/\s+/g, '-')}`,
|
|
207
|
+
displayLink: 'example.com',
|
|
208
|
+
snippet: `This is a mock search result for the query "${params.query}". It demonstrates the search functionality without requiring API credentials.`,
|
|
209
|
+
htmlSnippet: `This is a mock search result for the query "<b>${params.query}</b>". It demonstrates the search functionality without requiring API credentials.`,
|
|
210
|
+
formattedUrl: 'https://example.com/mock',
|
|
211
|
+
pagemap: {
|
|
212
|
+
metatags: [{
|
|
213
|
+
'og:title': `Mock: ${params.query}`,
|
|
214
|
+
'og:description': 'Mock search result description',
|
|
215
|
+
'og:image': 'https://example.com/image.jpg'
|
|
216
|
+
}]
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
]
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
async getSuggestions(query) {
|
|
224
|
+
return [`${query} tutorial`, `${query} examples`, `${query} documentation`];
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
async getRelatedSearches(query) {
|
|
228
|
+
return [`${query} best practices`, `${query} alternatives`, `how to ${query}`];
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
async validateApiKey() {
|
|
232
|
+
return true;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
export default GoogleSearchAdapter;
|