crawlforge-mcp-server 3.0.6 → 3.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +18 -18
- package/README.md +1 -1
- package/package.json +1 -1
- package/server.js +58 -76
- package/src/constants/config.js +9 -93
- package/src/tools/search/adapters/crawlforgeSearch.js +107 -0
- package/src/tools/search/adapters/googleSearch.js +106 -211
- package/src/tools/search/adapters/searchProviderFactory.js +150 -60
- package/src/tools/search/searchWeb.js +42 -42
- package/src/tools/search/adapters/duckduckgoSearch.js +0 -500
|
@@ -63,9 +63,8 @@ const SearchWebSchema = z.object({
|
|
|
63
63
|
export class SearchWebTool {
|
|
64
64
|
constructor(options = {}) {
|
|
65
65
|
const {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
duckduckgo = {},
|
|
66
|
+
apiKey,
|
|
67
|
+
apiBaseUrl,
|
|
69
68
|
cacheEnabled = true,
|
|
70
69
|
cacheTTL = 3600000, // 1 hour
|
|
71
70
|
expanderOptions = {},
|
|
@@ -73,17 +72,22 @@ export class SearchWebTool {
|
|
|
73
72
|
deduplicationOptions = {}
|
|
74
73
|
} = options;
|
|
75
74
|
|
|
76
|
-
//
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
75
|
+
// Check for Creator Mode - allows search without API key for development/testing
|
|
76
|
+
const isCreatorMode = process.env.CRAWLFORGE_CREATOR_MODE === 'true';
|
|
77
|
+
|
|
78
|
+
if (!apiKey && !isCreatorMode) {
|
|
79
|
+
throw new Error('CrawlForge API key is required for search functionality');
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Create the search adapter (CrawlForge API or DuckDuckGo fallback for Creator Mode)
|
|
80
83
|
try {
|
|
81
|
-
this.searchAdapter = SearchProviderFactory.createAdapter(
|
|
82
|
-
|
|
83
|
-
|
|
84
|
+
this.searchAdapter = SearchProviderFactory.createAdapter(apiKey, {
|
|
85
|
+
apiBaseUrl,
|
|
86
|
+
creatorMode: isCreatorMode
|
|
84
87
|
});
|
|
88
|
+
this.isCreatorModeFallback = !apiKey && isCreatorMode;
|
|
85
89
|
} catch (error) {
|
|
86
|
-
throw new Error(`Failed to initialize search
|
|
90
|
+
throw new Error(`Failed to initialize search adapter: ${error.message}`);
|
|
87
91
|
}
|
|
88
92
|
|
|
89
93
|
this.cache = cacheEnabled ? new CacheManager({ ttl: cacheTTL }) : null;
|
|
@@ -102,27 +106,6 @@ export class SearchWebTool {
|
|
|
102
106
|
});
|
|
103
107
|
}
|
|
104
108
|
|
|
105
|
-
determineProvider(configuredProvider, providerOptions) {
|
|
106
|
-
switch (configuredProvider.toLowerCase()) {
|
|
107
|
-
case 'google':
|
|
108
|
-
if (!providerOptions.google?.apiKey || !providerOptions.google?.searchEngineId) {
|
|
109
|
-
throw new Error('Google provider requires apiKey and searchEngineId');
|
|
110
|
-
}
|
|
111
|
-
return 'google';
|
|
112
|
-
|
|
113
|
-
case 'duckduckgo':
|
|
114
|
-
return 'duckduckgo';
|
|
115
|
-
|
|
116
|
-
case 'auto':
|
|
117
|
-
default:
|
|
118
|
-
// Auto mode: prefer Google if credentials available, otherwise use DuckDuckGo
|
|
119
|
-
if (providerOptions.google?.apiKey && providerOptions.google?.searchEngineId) {
|
|
120
|
-
return 'google';
|
|
121
|
-
}
|
|
122
|
-
return 'duckduckgo';
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
|
|
126
109
|
async execute(params) {
|
|
127
110
|
try {
|
|
128
111
|
const validated = SearchWebSchema.parse(params);
|
|
@@ -308,9 +291,15 @@ export class SearchWebTool {
|
|
|
308
291
|
cached: false,
|
|
309
292
|
|
|
310
293
|
// Add provider information
|
|
311
|
-
provider: {
|
|
312
|
-
name:
|
|
313
|
-
|
|
294
|
+
provider: this.isCreatorModeFallback ? {
|
|
295
|
+
name: 'google',
|
|
296
|
+
backend: 'Google Custom Search API (Creator Mode)',
|
|
297
|
+
note: 'Using Google Search API directly. Production users use CrawlForge API.',
|
|
298
|
+
capabilities: SearchProviderFactory.getProviderCapabilities('google')
|
|
299
|
+
} : {
|
|
300
|
+
name: 'crawlforge',
|
|
301
|
+
backend: 'Google Search',
|
|
302
|
+
capabilities: SearchProviderFactory.getProviderCapabilities('crawlforge')
|
|
314
303
|
},
|
|
315
304
|
|
|
316
305
|
// Add localization information
|
|
@@ -458,10 +447,16 @@ export class SearchWebTool {
|
|
|
458
447
|
|
|
459
448
|
getStats() {
|
|
460
449
|
return {
|
|
461
|
-
provider: {
|
|
462
|
-
name:
|
|
463
|
-
|
|
450
|
+
provider: this.isCreatorModeFallback ? {
|
|
451
|
+
name: 'google',
|
|
452
|
+
backend: 'Google Custom Search API (Creator Mode)',
|
|
453
|
+
note: 'Using Google Search API directly'
|
|
454
|
+
} : {
|
|
455
|
+
name: 'crawlforge',
|
|
456
|
+
backend: 'Google Search',
|
|
457
|
+
capabilities: SearchProviderFactory.getProviderCapabilities('crawlforge')
|
|
464
458
|
},
|
|
459
|
+
creatorMode: this.isCreatorModeFallback || false,
|
|
465
460
|
cacheStats: this.cache ? this.cache.getStats() : null,
|
|
466
461
|
queryExpanderStats: this.queryExpander ? this.queryExpander.getStats() : null,
|
|
467
462
|
rankingStats: this.resultRanker ? this.resultRanker.getStats() : null,
|
|
@@ -471,12 +466,17 @@ export class SearchWebTool {
|
|
|
471
466
|
|
|
472
467
|
getProviderInfo() {
|
|
473
468
|
return {
|
|
474
|
-
activeProvider: this.
|
|
475
|
-
|
|
469
|
+
activeProvider: this.isCreatorModeFallback ? 'google' : 'crawlforge',
|
|
470
|
+
backend: this.isCreatorModeFallback
|
|
471
|
+
? 'Google Custom Search API (Creator Mode)'
|
|
472
|
+
: 'Google Search via CrawlForge API',
|
|
473
|
+
capabilities: SearchProviderFactory.getProviderCapabilities(
|
|
474
|
+
this.isCreatorModeFallback ? 'google' : 'crawlforge'
|
|
475
|
+
),
|
|
476
476
|
supportedProviders: SearchProviderFactory.getSupportedProviders(),
|
|
477
|
-
|
|
477
|
+
isCreatorMode: this.isCreatorModeFallback || false
|
|
478
478
|
};
|
|
479
479
|
}
|
|
480
480
|
}
|
|
481
481
|
|
|
482
|
-
export default SearchWebTool;
|
|
482
|
+
export default SearchWebTool;
|
|
@@ -1,500 +0,0 @@
|
|
|
1
|
-
import * as cheerio from 'cheerio';
|
|
2
|
-
import { search as ddgSearch, SafeSearchType, SearchTimeType } from 'duck-duck-scrape';
|
|
3
|
-
|
|
4
|
-
export class DuckDuckGoSearchAdapter {
|
|
5
|
-
constructor(options = {}) {
|
|
6
|
-
this.timeout = options.timeout || 30000;
|
|
7
|
-
this.maxRetries = options.maxRetries || 3;
|
|
8
|
-
this.userAgent = options.userAgent || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
9
|
-
this.retryDelay = options.retryDelay || 2000; // Increased base delay
|
|
10
|
-
this.baseUrl = 'https://html.duckduckgo.com/html/';
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
async search(params) {
|
|
14
|
-
const {
|
|
15
|
-
query,
|
|
16
|
-
num = 10,
|
|
17
|
-
start = 1,
|
|
18
|
-
lr,
|
|
19
|
-
safe = 'moderate',
|
|
20
|
-
dateRestrict
|
|
21
|
-
} = params;
|
|
22
|
-
|
|
23
|
-
// Try duck-duck-scrape library first (more reliable API access)
|
|
24
|
-
try {
|
|
25
|
-
const results = await this.searchWithLibrary(query, num, safe, dateRestrict);
|
|
26
|
-
if (results.items && results.items.length > 0) {
|
|
27
|
-
return results;
|
|
28
|
-
}
|
|
29
|
-
} catch (libraryError) {
|
|
30
|
-
console.warn('DuckDuckGo library search failed:', libraryError.message);
|
|
31
|
-
// Check if it's a CAPTCHA/anomaly error
|
|
32
|
-
if (libraryError.message.includes('anomaly') || libraryError.message.includes('too quickly')) {
|
|
33
|
-
throw new Error(
|
|
34
|
-
'DuckDuckGo is blocking automated requests. ' +
|
|
35
|
-
'To use web search reliably, please configure Google Custom Search API by setting ' +
|
|
36
|
-
'GOOGLE_API_KEY and GOOGLE_SEARCH_ENGINE_ID environment variables. ' +
|
|
37
|
-
'See: https://developers.google.com/custom-search/v1/introduction'
|
|
38
|
-
);
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
// Fallback to HTML scraping (legacy method)
|
|
43
|
-
const offset = (start - 1) * num;
|
|
44
|
-
|
|
45
|
-
const formData = new URLSearchParams({
|
|
46
|
-
q: query,
|
|
47
|
-
b: offset.toString(),
|
|
48
|
-
kl: 'us-en',
|
|
49
|
-
df: '',
|
|
50
|
-
safe: 'moderate'
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
if (safe === 'active') {
|
|
54
|
-
formData.set('safe', 'strict');
|
|
55
|
-
} else if (safe === 'off') {
|
|
56
|
-
formData.set('safe', 'off');
|
|
57
|
-
} else {
|
|
58
|
-
formData.set('safe', 'moderate');
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
if (lr && lr.startsWith('lang_')) {
|
|
62
|
-
const lang = lr.replace('lang_', '');
|
|
63
|
-
formData.set('kl', this.mapLanguageCode(lang));
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
if (dateRestrict) {
|
|
67
|
-
const timeFilter = this.mapDateRestrict(dateRestrict);
|
|
68
|
-
if (timeFilter) {
|
|
69
|
-
formData.set('df', timeFilter);
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
let lastError;
|
|
74
|
-
for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
|
|
75
|
-
try {
|
|
76
|
-
// Add delay between attempts to avoid rate limiting
|
|
77
|
-
if (attempt > 1) {
|
|
78
|
-
await new Promise(resolve =>
|
|
79
|
-
setTimeout(resolve, this.retryDelay * Math.pow(2, attempt - 1))
|
|
80
|
-
);
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
const htmlResponse = await this.makeRequest(formData);
|
|
84
|
-
return this.parseHtmlResponse(htmlResponse, query, num, start);
|
|
85
|
-
} catch (error) {
|
|
86
|
-
lastError = error;
|
|
87
|
-
// If it's a CAPTCHA error, don't retry - it won't help
|
|
88
|
-
if (error.message.includes('CAPTCHA') || error.message.includes('automated requests')) {
|
|
89
|
-
throw error;
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
throw new Error(`DuckDuckGo search failed after ${this.maxRetries} attempts: ${lastError.message}`);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
async searchWithLibrary(query, num, safe, dateRestrict) {
|
|
98
|
-
// Map safe search settings
|
|
99
|
-
let safeSearch = SafeSearchType.MODERATE;
|
|
100
|
-
if (safe === 'active' || safe === 'strict') {
|
|
101
|
-
safeSearch = SafeSearchType.STRICT;
|
|
102
|
-
} else if (safe === 'off') {
|
|
103
|
-
safeSearch = SafeSearchType.OFF;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
// Map time filter
|
|
107
|
-
let time = undefined;
|
|
108
|
-
if (dateRestrict) {
|
|
109
|
-
const timeMap = {
|
|
110
|
-
'd1': SearchTimeType.DAY,
|
|
111
|
-
'w1': SearchTimeType.WEEK,
|
|
112
|
-
'm1': SearchTimeType.MONTH,
|
|
113
|
-
'y1': SearchTimeType.YEAR
|
|
114
|
-
};
|
|
115
|
-
time = timeMap[dateRestrict];
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
const searchResults = await ddgSearch(query, {
|
|
119
|
-
safeSearch,
|
|
120
|
-
time,
|
|
121
|
-
locale: 'en-us'
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
// Transform results to match expected format
|
|
125
|
-
const items = (searchResults.results || []).slice(0, num).map(result => ({
|
|
126
|
-
title: result.title || '',
|
|
127
|
-
link: result.url || '',
|
|
128
|
-
snippet: result.description || '',
|
|
129
|
-
displayLink: this.extractDomain(result.url),
|
|
130
|
-
formattedUrl: result.url || '',
|
|
131
|
-
htmlSnippet: result.description || '',
|
|
132
|
-
pagemap: {
|
|
133
|
-
metatags: {
|
|
134
|
-
title: result.title || '',
|
|
135
|
-
description: result.description || ''
|
|
136
|
-
}
|
|
137
|
-
},
|
|
138
|
-
metadata: {
|
|
139
|
-
source: 'duckduckgo_api',
|
|
140
|
-
type: 'web_result',
|
|
141
|
-
hostname: result.hostname || '',
|
|
142
|
-
icon: result.icon || ''
|
|
143
|
-
}
|
|
144
|
-
}));
|
|
145
|
-
|
|
146
|
-
return {
|
|
147
|
-
kind: 'duckduckgo#search',
|
|
148
|
-
searchInformation: {
|
|
149
|
-
searchTime: 0.1,
|
|
150
|
-
formattedSearchTime: '0.10',
|
|
151
|
-
totalResults: items.length.toString(),
|
|
152
|
-
formattedTotalResults: items.length.toLocaleString()
|
|
153
|
-
},
|
|
154
|
-
items: items
|
|
155
|
-
};
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
async makeRequest(formData) {
|
|
159
|
-
const controller = new AbortController();
|
|
160
|
-
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
161
|
-
|
|
162
|
-
try {
|
|
163
|
-
const response = await fetch(this.baseUrl, {
|
|
164
|
-
method: 'POST',
|
|
165
|
-
headers: {
|
|
166
|
-
'User-Agent': this.userAgent,
|
|
167
|
-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
168
|
-
'Accept-Language': 'en-US,en;q=0.5',
|
|
169
|
-
'Accept-Encoding': 'gzip, deflate, br',
|
|
170
|
-
'Content-Type': 'application/x-www-form-urlencoded',
|
|
171
|
-
'Origin': 'https://duckduckgo.com',
|
|
172
|
-
'Referer': 'https://duckduckgo.com/',
|
|
173
|
-
'Upgrade-Insecure-Requests': '1',
|
|
174
|
-
'Sec-Fetch-Dest': 'document',
|
|
175
|
-
'Sec-Fetch-Mode': 'navigate',
|
|
176
|
-
'Sec-Fetch-Site': 'same-site'
|
|
177
|
-
},
|
|
178
|
-
body: formData.toString(),
|
|
179
|
-
signal: controller.signal
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
clearTimeout(timeoutId);
|
|
183
|
-
|
|
184
|
-
if (!response.ok) {
|
|
185
|
-
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
const html = await response.text();
|
|
189
|
-
return html;
|
|
190
|
-
} catch (error) {
|
|
191
|
-
clearTimeout(timeoutId);
|
|
192
|
-
|
|
193
|
-
if (error.name === 'AbortError') {
|
|
194
|
-
throw new Error(`Request timeout after ${this.timeout}ms`);
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
throw error;
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
parseHtmlResponse(html, query, num, start) {
|
|
202
|
-
try {
|
|
203
|
-
const $ = cheerio.load(html);
|
|
204
|
-
const items = [];
|
|
205
|
-
|
|
206
|
-
// Check for CAPTCHA challenge (DuckDuckGo bot protection)
|
|
207
|
-
const captchaIndicators = [
|
|
208
|
-
'anomaly-modal',
|
|
209
|
-
'Unfortunately, bots use DuckDuckGo too',
|
|
210
|
-
'Select all squares containing a duck',
|
|
211
|
-
'confirm this search was made by a human',
|
|
212
|
-
'challenge-form'
|
|
213
|
-
];
|
|
214
|
-
|
|
215
|
-
for (const indicator of captchaIndicators) {
|
|
216
|
-
if (html.includes(indicator)) {
|
|
217
|
-
throw new Error(
|
|
218
|
-
'DuckDuckGo CAPTCHA detected - automated requests are being blocked. ' +
|
|
219
|
-
'To use web search reliably, please configure Google Custom Search API by setting ' +
|
|
220
|
-
'GOOGLE_API_KEY and GOOGLE_SEARCH_ENGINE_ID environment variables. ' +
|
|
221
|
-
'See: https://developers.google.com/custom-search/v1/introduction'
|
|
222
|
-
);
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
// Look for search result containers - DuckDuckGo uses various selectors
|
|
227
|
-
const resultSelectors = [
|
|
228
|
-
'.result', // Primary result class
|
|
229
|
-
'.results_links', // Alternative result class
|
|
230
|
-
'.web-result', // Another possible class
|
|
231
|
-
'.result__body' // Result body container
|
|
232
|
-
];
|
|
233
|
-
|
|
234
|
-
let results = $();
|
|
235
|
-
for (const selector of resultSelectors) {
|
|
236
|
-
results = $(selector);
|
|
237
|
-
if (results.length > 0) break;
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
// If no results found with standard selectors, try more generic approach
|
|
241
|
-
if (results.length === 0) {
|
|
242
|
-
results = $('div[data-domain]'); // DuckDuckGo sometimes uses data-domain attribute
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
results.each((index, element) => {
|
|
246
|
-
if (items.length >= num) return false; // Stop if we have enough results
|
|
247
|
-
|
|
248
|
-
const $result = $(element);
|
|
249
|
-
|
|
250
|
-
// Extract title - try multiple selectors
|
|
251
|
-
let title = '';
|
|
252
|
-
const titleSelectors = [
|
|
253
|
-
'a.result__a',
|
|
254
|
-
'.result__title a',
|
|
255
|
-
'h2 a',
|
|
256
|
-
'.result-title a',
|
|
257
|
-
'a[href^="http"]'
|
|
258
|
-
];
|
|
259
|
-
|
|
260
|
-
for (const selector of titleSelectors) {
|
|
261
|
-
const titleElement = $result.find(selector).first();
|
|
262
|
-
if (titleElement.length > 0) {
|
|
263
|
-
title = titleElement.text().trim();
|
|
264
|
-
break;
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
// Extract URL - try multiple selectors
|
|
269
|
-
let url = '';
|
|
270
|
-
const urlSelectors = [
|
|
271
|
-
'a.result__a',
|
|
272
|
-
'.result__title a',
|
|
273
|
-
'h2 a',
|
|
274
|
-
'.result-title a',
|
|
275
|
-
'a[href^="http"]'
|
|
276
|
-
];
|
|
277
|
-
|
|
278
|
-
for (const selector of urlSelectors) {
|
|
279
|
-
const urlElement = $result.find(selector).first();
|
|
280
|
-
if (urlElement.length > 0) {
|
|
281
|
-
url = urlElement.attr('href') || '';
|
|
282
|
-
break;
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
// Extract snippet - try multiple selectors
|
|
287
|
-
let snippet = '';
|
|
288
|
-
const snippetSelectors = [
|
|
289
|
-
'a.result__snippet',
|
|
290
|
-
'.result__snippet',
|
|
291
|
-
'.result-snippet',
|
|
292
|
-
'.snippet',
|
|
293
|
-
'.result__body',
|
|
294
|
-
'span.result__snippet'
|
|
295
|
-
];
|
|
296
|
-
|
|
297
|
-
for (const selector of snippetSelectors) {
|
|
298
|
-
const snippetElement = $result.find(selector).first();
|
|
299
|
-
if (snippetElement.length > 0) {
|
|
300
|
-
snippet = snippetElement.text().trim();
|
|
301
|
-
break;
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
// If no snippet found, try to get any text content
|
|
306
|
-
if (!snippet) {
|
|
307
|
-
const allText = $result.text().trim();
|
|
308
|
-
// Remove title from text to get snippet
|
|
309
|
-
snippet = allText.replace(title, '').trim().substring(0, 300);
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
// Clean and validate the extracted data
|
|
313
|
-
if (title && url && this.isValidUrl(url)) {
|
|
314
|
-
items.push({
|
|
315
|
-
title: this.cleanText(title),
|
|
316
|
-
link: url,
|
|
317
|
-
snippet: this.cleanText(snippet),
|
|
318
|
-
displayLink: this.extractDomain(url),
|
|
319
|
-
formattedUrl: url,
|
|
320
|
-
htmlSnippet: this.cleanText(snippet),
|
|
321
|
-
pagemap: {
|
|
322
|
-
metatags: {
|
|
323
|
-
title: this.cleanText(title),
|
|
324
|
-
description: this.cleanText(snippet)
|
|
325
|
-
}
|
|
326
|
-
},
|
|
327
|
-
metadata: {
|
|
328
|
-
source: 'duckduckgo_html',
|
|
329
|
-
type: 'web_result'
|
|
330
|
-
}
|
|
331
|
-
});
|
|
332
|
-
}
|
|
333
|
-
});
|
|
334
|
-
|
|
335
|
-
// If no results found, provide helpful feedback
|
|
336
|
-
if (items.length === 0) {
|
|
337
|
-
// Check if there's a "no results" message
|
|
338
|
-
const noResultsIndicators = [
|
|
339
|
-
'No results found',
|
|
340
|
-
'no web results',
|
|
341
|
-
'Try searching for'
|
|
342
|
-
];
|
|
343
|
-
|
|
344
|
-
let hasNoResults = false;
|
|
345
|
-
for (const indicator of noResultsIndicators) {
|
|
346
|
-
if (html.toLowerCase().includes(indicator.toLowerCase())) {
|
|
347
|
-
hasNoResults = true;
|
|
348
|
-
break;
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
if (hasNoResults) {
|
|
353
|
-
throw new Error(`No search results found for query: "${query}"`);
|
|
354
|
-
} else {
|
|
355
|
-
throw new Error('Could not parse search results from DuckDuckGo response');
|
|
356
|
-
}
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
return {
|
|
360
|
-
kind: 'duckduckgo#search',
|
|
361
|
-
searchInformation: {
|
|
362
|
-
searchTime: 0.1,
|
|
363
|
-
formattedSearchTime: '0.10',
|
|
364
|
-
totalResults: items.length.toString(),
|
|
365
|
-
formattedTotalResults: items.length.toLocaleString()
|
|
366
|
-
},
|
|
367
|
-
items: items
|
|
368
|
-
};
|
|
369
|
-
|
|
370
|
-
} catch (error) {
|
|
371
|
-
if (error.message.includes('No search results found') || error.message.includes('Could not parse')) {
|
|
372
|
-
throw error;
|
|
373
|
-
}
|
|
374
|
-
throw new Error(`Failed to parse DuckDuckGo HTML response: ${error.message}`);
|
|
375
|
-
}
|
|
376
|
-
}
|
|
377
|
-
|
|
378
|
-
isValidUrl(url) {
|
|
379
|
-
if (!url) return false;
|
|
380
|
-
try {
|
|
381
|
-
const urlObj = new URL(url);
|
|
382
|
-
return urlObj.protocol === 'http:' || urlObj.protocol === 'https:';
|
|
383
|
-
} catch {
|
|
384
|
-
return false;
|
|
385
|
-
}
|
|
386
|
-
}
|
|
387
|
-
|
|
388
|
-
cleanText(text) {
|
|
389
|
-
if (!text) return '';
|
|
390
|
-
// Remove HTML tags, normalize whitespace, and trim
|
|
391
|
-
return text
|
|
392
|
-
.replace(/<[^>]*>/g, '')
|
|
393
|
-
.replace(/\s+/g, ' ')
|
|
394
|
-
.replace(/ /g, ' ')
|
|
395
|
-
.replace(/&/g, '&')
|
|
396
|
-
.replace(/</g, '<')
|
|
397
|
-
.replace(/>/g, '>')
|
|
398
|
-
.replace(/"/g, '"')
|
|
399
|
-
.replace(/'/g, "'")
|
|
400
|
-
.trim();
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
extractDomain(url) {
|
|
404
|
-
if (!url) return '';
|
|
405
|
-
try {
|
|
406
|
-
return new URL(url).hostname;
|
|
407
|
-
} catch {
|
|
408
|
-
return '';
|
|
409
|
-
}
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
mapLanguageCode(code) {
|
|
413
|
-
// Map common language codes to DuckDuckGo's format
|
|
414
|
-
const languageMap = {
|
|
415
|
-
'en': 'us-en',
|
|
416
|
-
'es': 'es-es',
|
|
417
|
-
'fr': 'fr-fr',
|
|
418
|
-
'de': 'de-de',
|
|
419
|
-
'it': 'it-it',
|
|
420
|
-
'pt': 'pt-br',
|
|
421
|
-
'ru': 'ru-ru',
|
|
422
|
-
'ja': 'jp-jp',
|
|
423
|
-
'ko': 'kr-kr',
|
|
424
|
-
'zh': 'cn-zh'
|
|
425
|
-
};
|
|
426
|
-
return languageMap[code] || 'us-en';
|
|
427
|
-
}
|
|
428
|
-
|
|
429
|
-
mapDateRestrict(dateRestrict) {
|
|
430
|
-
// Map Google's dateRestrict format to DuckDuckGo's time filters
|
|
431
|
-
const dateMap = {
|
|
432
|
-
'd1': 'd', // past day
|
|
433
|
-
'w1': 'w', // past week
|
|
434
|
-
'm1': 'm', // past month
|
|
435
|
-
'y1': 'y' // past year
|
|
436
|
-
};
|
|
437
|
-
return dateMap[dateRestrict] || null;
|
|
438
|
-
}
|
|
439
|
-
|
|
440
|
-
async getSuggestions(query) {
|
|
441
|
-
try {
|
|
442
|
-
// DuckDuckGo's autocomplete endpoint
|
|
443
|
-
const url = `https://duckduckgo.com/ac/?q=${encodeURIComponent(query)}&type=list`;
|
|
444
|
-
|
|
445
|
-
const controller = new AbortController();
|
|
446
|
-
const timeoutId = setTimeout(() => controller.abort(), 5000); // Shorter timeout for suggestions
|
|
447
|
-
|
|
448
|
-
const response = await fetch(url, {
|
|
449
|
-
headers: {
|
|
450
|
-
'User-Agent': this.userAgent,
|
|
451
|
-
'Accept': 'application/json',
|
|
452
|
-
'Referer': 'https://duckduckgo.com/'
|
|
453
|
-
},
|
|
454
|
-
signal: controller.signal
|
|
455
|
-
});
|
|
456
|
-
|
|
457
|
-
clearTimeout(timeoutId);
|
|
458
|
-
|
|
459
|
-
if (!response.ok) {
|
|
460
|
-
return [];
|
|
461
|
-
}
|
|
462
|
-
|
|
463
|
-
const data = await response.json();
|
|
464
|
-
return Array.isArray(data) && data.length > 1 ? data[1] : [];
|
|
465
|
-
} catch (error) {
|
|
466
|
-
// Fail silently for suggestions
|
|
467
|
-
return [];
|
|
468
|
-
}
|
|
469
|
-
}
|
|
470
|
-
|
|
471
|
-
async getRelatedSearches(query) {
|
|
472
|
-
// DuckDuckGo doesn't provide a direct related searches API
|
|
473
|
-
// Return some common query variations
|
|
474
|
-
const words = query.split(' ').filter(w => w.length > 2);
|
|
475
|
-
const related = [];
|
|
476
|
-
|
|
477
|
-
if (words.length > 0) {
|
|
478
|
-
related.push(`${query} tutorial`);
|
|
479
|
-
related.push(`${query} guide`);
|
|
480
|
-
related.push(`${query} examples`);
|
|
481
|
-
related.push(`how to ${query}`);
|
|
482
|
-
related.push(`${query} best practices`);
|
|
483
|
-
}
|
|
484
|
-
|
|
485
|
-
return related.slice(0, 5);
|
|
486
|
-
}
|
|
487
|
-
|
|
488
|
-
async validateApiKey() {
|
|
489
|
-
// DuckDuckGo doesn't require API keys, test HTML scraping functionality
|
|
490
|
-
try {
|
|
491
|
-
const result = await this.search({ query: 'test search', num: 1 });
|
|
492
|
-
return result && result.items && result.items.length >= 0; // Even 0 results is valid
|
|
493
|
-
} catch (error) {
|
|
494
|
-
console.warn('DuckDuckGo validation failed:', error.message);
|
|
495
|
-
return false;
|
|
496
|
-
}
|
|
497
|
-
}
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
export default DuckDuckGoSearchAdapter;
|