@oevortex/ddg_search 1.1.9 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,16 +5,17 @@ import { getRandomUserAgent } from './user_agents.js';
5
5
 
6
6
  // Constants
7
7
  const MAX_CACHE_PAGES = 5;
8
+ const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
9
+ const REQUEST_TIMEOUT = 10000; // 10 seconds
8
10
 
9
11
  // Cache results to avoid repeated requests
10
12
  const resultsCache = new Map();
11
- const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
12
13
 
13
14
  // HTTPS agent configuration to handle certificate chain issues
14
15
  const httpsAgent = new https.Agent({
15
16
  rejectUnauthorized: true, // Keep security enabled
16
17
  keepAlive: true,
17
- timeout: 10000,
18
+ timeout: REQUEST_TIMEOUT,
18
19
  // Provide fallback for certificate issues while maintaining security
19
20
  secureProtocol: 'TLSv1_2_method'
20
21
  });
@@ -107,15 +108,42 @@ function getFaviconUrl(url) {
107
108
  }
108
109
  }
109
110
 
111
+ /**
112
+ * Generate a Jina AI URL for a given website URL
113
+ * @param {string} url - The website URL
114
+ * @returns {string} The Jina AI URL
115
+ */
116
+ function getJinaAiUrl(url) {
117
+ try {
118
+ const urlObj = new URL(url);
119
+ return `https://r.jina.ai/${urlObj.href}`;
120
+ } catch {
121
+ return '';
122
+ }
123
+ }
110
124
 
111
125
  /**
112
126
  * Scrapes search results from DuckDuckGo HTML
113
127
  * @param {string} query - The search query
114
128
  * @param {number} numResults - Number of results to return (default: 10)
129
+ * @param {string} mode - 'short' or 'detailed' mode (default: 'short')
115
130
  * @returns {Promise<Array>} - Array of search results
116
131
  */
117
132
  async function searchDuckDuckGo(query, numResults = 10, mode = 'short') {
118
133
  try {
134
+ // Input validation
135
+ if (!query || typeof query !== 'string') {
136
+ throw new Error('Invalid query: query must be a non-empty string');
137
+ }
138
+
139
+ if (!Number.isInteger(numResults) || numResults < 1 || numResults > 20) {
140
+ throw new Error('Invalid numResults: must be an integer between 1 and 20');
141
+ }
142
+
143
+ if (!['short', 'detailed'].includes(mode)) {
144
+ throw new Error('Invalid mode: must be "short" or "detailed"');
145
+ }
146
+
119
147
  // Clear old cache entries
120
148
  clearOldCache();
121
149
 
@@ -124,145 +152,172 @@ async function searchDuckDuckGo(query, numResults = 10, mode = 'short') {
124
152
  const cachedResults = resultsCache.get(cacheKey);
125
153
 
126
154
  if (cachedResults && Date.now() - cachedResults.timestamp < CACHE_DURATION) {
155
+ console.log(`Cache hit for query: "${query}"`);
127
156
  return cachedResults.results.slice(0, numResults);
128
157
  }
129
158
 
130
159
  // Get a random user agent
131
160
  const userAgent = getRandomUserAgent();
132
161
 
133
- // Fetch results
134
- const response = await axios.get(
135
- `https://duckduckgo.com/html/?q=${encodeURIComponent(query)}`,
136
- {
137
- headers: {
138
- 'User-Agent': userAgent
139
- },
140
- httpsAgent: httpsAgent
141
- }
142
- );
162
+ console.log(`Searching DuckDuckGo for: "${query}" (${numResults} results, mode: ${mode})`);
163
+
164
+ // Fetch results with timeout
165
+ const controller = new AbortController();
166
+ const timeoutId = setTimeout(() => controller.abort(), REQUEST_TIMEOUT);
167
+
168
+ try {
169
+ const response = await axios.get(
170
+ `https://duckduckgo.com/html/?q=${encodeURIComponent(query)}`,
171
+ {
172
+ signal: controller.signal,
173
+ headers: {
174
+ 'User-Agent': userAgent
175
+ },
176
+ httpsAgent: httpsAgent,
177
+ timeout: REQUEST_TIMEOUT
178
+ }
179
+ );
143
180
 
144
- if (response.status !== 200) {
145
- throw new Error('Failed to fetch search results');
146
- }
181
+ clearTimeout(timeoutId);
147
182
 
148
- const html = response.data;
149
-
150
- // Parse results using cheerio
151
- const $ = cheerio.load(html);
152
-
153
- const results = [];
154
- const jinaFetchPromises = [];
155
- $('.result').each((i, result) => {
156
- const $result = $(result);
157
- const titleEl = $result.find('.result__title a');
158
- const linkEl = $result.find('.result__url');
159
- const snippetEl = $result.find('.result__snippet');
160
-
161
- const title = titleEl.text()?.trim();
162
- const rawLink = titleEl.attr('href');
163
- const description = snippetEl.text()?.trim();
164
- const displayUrl = linkEl.text()?.trim();
165
-
166
- const directLink = extractDirectUrl(rawLink || '');
167
- const favicon = getFaviconUrl(directLink);
168
- const jinaUrl = getJinaAiUrl(directLink);
169
-
170
- if (title && directLink) {
171
- if (mode === 'detailed') {
172
- jinaFetchPromises.push(
173
- axios.get(jinaUrl, {
174
- headers: {
175
- 'User-Agent': getRandomUserAgent()
176
- },
177
- httpsAgent: httpsAgent,
178
- timeout: 10000
179
- })
180
- .then(jinaRes => {
181
- let jinaContent = '';
182
- if (jinaRes.status === 200 && typeof jinaRes.data === 'string') {
183
- const $jina = cheerio.load(jinaRes.data);
184
- jinaContent = $jina('body').text()
185
- }
186
- return {
187
- title,
188
- url: directLink,
189
- snippet: description || '',
190
- favicon: favicon,
191
- displayUrl: displayUrl || '',
192
- Description: jinaContent
193
- };
194
- })
195
- .catch(() => {
196
- return {
183
+ if (response.status !== 200) {
184
+ throw new Error(`HTTP ${response.status}: Failed to fetch search results`);
185
+ }
186
+
187
+ const html = response.data;
188
+
189
+ // Parse results using cheerio
190
+ const $ = cheerio.load(html);
191
+
192
+ const results = [];
193
+ const jinaFetchPromises = [];
194
+
195
+ $('.result').each((i, result) => {
196
+ const $result = $(result);
197
+ const titleEl = $result.find('.result__title a');
198
+ const linkEl = $result.find('.result__url');
199
+ const snippetEl = $result.find('.result__snippet');
200
+
201
+ const title = titleEl.text()?.trim();
202
+ const rawLink = titleEl.attr('href');
203
+ const description = snippetEl.text()?.trim();
204
+ const displayUrl = linkEl.text()?.trim();
205
+
206
+ const directLink = extractDirectUrl(rawLink || '');
207
+ const favicon = getFaviconUrl(directLink);
208
+ const jinaUrl = getJinaAiUrl(directLink);
209
+
210
+ if (title && directLink) {
211
+ if (mode === 'detailed') {
212
+ jinaFetchPromises.push(
213
+ axios.get(jinaUrl, {
214
+ headers: {
215
+ 'User-Agent': getRandomUserAgent()
216
+ },
217
+ httpsAgent: httpsAgent,
218
+ timeout: 8000
219
+ })
220
+ .then(jinaRes => {
221
+ let jinaContent = '';
222
+ if (jinaRes.status === 200 && typeof jinaRes.data === 'string') {
223
+ const $jina = cheerio.load(jinaRes.data);
224
+ jinaContent = $jina('body').text();
225
+ }
226
+ return {
227
+ title,
228
+ url: directLink,
229
+ snippet: description || '',
230
+ favicon: favicon,
231
+ displayUrl: displayUrl || '',
232
+ description: jinaContent
233
+ };
234
+ })
235
+ .catch(() => {
236
+ // Return fallback without content
237
+ return {
238
+ title,
239
+ url: directLink,
240
+ snippet: description || '',
241
+ favicon: favicon,
242
+ displayUrl: displayUrl || '',
243
+ description: ''
244
+ };
245
+ })
246
+ );
247
+ } else {
248
+ // short mode: omit description
249
+ jinaFetchPromises.push(
250
+ Promise.resolve({
197
251
  title,
198
252
  url: directLink,
199
253
  snippet: description || '',
200
254
  favicon: favicon,
201
- displayUrl: displayUrl || '',
202
- Description: ''
203
- };
204
- })
205
- );
206
- } else {
207
- // short mode: omit Description
208
- jinaFetchPromises.push(
209
- Promise.resolve({
210
- title,
211
- url: directLink,
212
- snippet: description || '',
213
- favicon: favicon,
214
- displayUrl: displayUrl || ''
215
- })
216
- );
255
+ displayUrl: displayUrl || ''
256
+ })
257
+ );
258
+ }
217
259
  }
260
+ });
261
+
262
+ // Wait for all Jina AI fetches to complete with timeout
263
+ const jinaResults = await Promise.race([
264
+ Promise.all(jinaFetchPromises),
265
+ new Promise((_, reject) =>
266
+ setTimeout(() => reject(new Error('Content fetch timeout')), 15000)
267
+ )
268
+ ]);
269
+
270
+ results.push(...jinaResults);
271
+
272
+ // Get limited results
273
+ const limitedResults = results.slice(0, numResults);
274
+
275
+ // Cache the results
276
+ resultsCache.set(cacheKey, {
277
+ results: limitedResults,
278
+ timestamp: Date.now()
279
+ });
280
+
281
+ // If cache is too big, remove oldest entries
282
+ if (resultsCache.size > MAX_CACHE_PAGES) {
283
+ const oldestKey = Array.from(resultsCache.keys())[0];
284
+ resultsCache.delete(oldestKey);
218
285
  }
219
- });
220
-
221
- // Wait for all Jina AI fetches to complete
222
- const jinaResults = await Promise.all(jinaFetchPromises);
223
- results.push(...jinaResults);
224
-
225
- // Get limited results
226
- const limitedResults = results.slice(0, numResults);
227
286
 
228
- // Cache the results
229
- resultsCache.set(cacheKey, {
230
- results: limitedResults,
231
- timestamp: Date.now()
232
- });
233
-
234
- // If cache is too big, remove oldest entries
235
- if (resultsCache.size > MAX_CACHE_PAGES) {
236
- const oldestKey = Array.from(resultsCache.keys())[0];
237
- resultsCache.delete(oldestKey);
287
+ console.log(`Found ${limitedResults.length} results for query: "${query}"`);
288
+ return limitedResults;
289
+ } catch (fetchError) {
290
+ clearTimeout(timeoutId);
291
+
292
+ if (fetchError.name === 'AbortError') {
293
+ throw new Error('Search request timeout: took longer than 10 seconds');
294
+ }
295
+
296
+ if (fetchError.code === 'ENOTFOUND') {
297
+ throw new Error('Network error: unable to resolve host');
298
+ }
299
+
300
+ if (fetchError.code === 'ECONNREFUSED') {
301
+ throw new Error('Network error: connection refused');
302
+ }
303
+
304
+ throw fetchError;
238
305
  }
239
-
240
- return limitedResults;
241
306
  } catch (error) {
242
307
  console.error('Error searching DuckDuckGo:', error.message);
243
- throw error;
308
+
309
+ // Enhanced error reporting
310
+ if (error.message.includes('Invalid')) {
311
+ throw error; // Re-throw validation errors as-is
312
+ }
313
+
314
+ throw new Error(`Search failed for "${query}": ${error.message}`);
244
315
  }
245
316
  }
246
317
 
247
-
248
318
  export {
249
319
  searchDuckDuckGo,
250
320
  extractDirectUrl,
251
- getFaviconUrl
252
- };
253
-
254
- /**
255
- * Generate a Jina AI URL for a given website URL
256
- * @param {string} url - The website URL
257
- * @returns {string} The Jina AI URL
258
- */
259
- function getJinaAiUrl(url) {
260
- try {
261
- const urlObj = new URL(url);
262
- return `https://r.jina.ai/${urlObj.href}`;
263
- } catch {
264
- return '';
265
- }
266
- }
267
-
268
- export { getJinaAiUrl };
321
+ getFaviconUrl,
322
+ getJinaAiUrl
323
+ };
@@ -0,0 +1,167 @@
1
+ import axios from 'axios';
2
+ import { randomBytes } from 'crypto';
3
+ import { getRandomUserAgent } from './user_agents.js';
4
+
5
+ const BASE_URL = 'https://search.brave.com/api/tap/v1';
6
+ const DEFAULT_TIMEOUT = 30000;
7
+
8
+ function generateKeyB64() {
9
+ const key = randomBytes(32);
10
+ const k = key.toString('base64url');
11
+ const jwk = {
12
+ alg: 'A256GCM',
13
+ ext: true,
14
+ k,
15
+ key_ops: ['encrypt', 'decrypt'],
16
+ kty: 'oct'
17
+ };
18
+ return Buffer.from(JSON.stringify(jwk)).toString('base64');
19
+ }
20
+
21
+ function buildHeaders() {
22
+ return {
23
+ accept: 'application/json',
24
+ 'accept-language': 'en-US,en;q=0.9',
25
+ 'user-agent': getRandomUserAgent(),
26
+ 'sec-ch-ua': '"Chromium";v="127", "Not)A;Brand";v="99"',
27
+ 'sec-ch-ua-mobile': '?0',
28
+ 'sec-ch-ua-platform': '"Windows"',
29
+ 'sec-fetch-dest': 'empty',
30
+ 'sec-fetch-mode': 'cors',
31
+ 'sec-fetch-site': 'same-origin',
32
+ referer: 'https://search.brave.com/ask'
33
+ };
34
+ }
35
+
36
+ function parseStream(stream) {
37
+ return new Promise((resolve, reject) => {
38
+ let buffer = '';
39
+ let text = '';
40
+
41
+ stream.on('data', (chunk) => {
42
+ buffer += chunk.toString();
43
+ const lines = buffer.split('\n');
44
+ buffer = lines.pop() ?? '';
45
+
46
+ for (const line of lines) {
47
+ const trimmed = line.trim();
48
+ if (!trimmed) {
49
+ continue;
50
+ }
51
+
52
+ try {
53
+ const payload = JSON.parse(trimmed);
54
+ if (payload?.type === 'text_delta') {
55
+ text += payload.delta ?? '';
56
+ }
57
+ } catch (error) {
58
+ // Ignore malformed lines
59
+ }
60
+ }
61
+ });
62
+
63
+ stream.on('end', () => resolve(text));
64
+ stream.on('error', (error) => reject(error));
65
+ });
66
+ }
67
+
68
+ /**
69
+ * Search using Brave AI Search.
70
+ * @param {string} prompt - The search query.
71
+ * @param {object} [options] - Search options.
72
+ * @param {boolean} [options.enableResearch=false] - Enable deep research mode.
73
+ * @param {number} [options.timeout=30000] - Request timeout in ms.
74
+ * @param {string} [options.language='en'] - Language code.
75
+ * @param {string} [options.country='US'] - Country code.
76
+ * @param {string} [options.uiLang='en-us'] - UI language.
77
+ * @param {string|null} [options.geoloc=null] - Geolocation coordinates.
78
+ * @returns {Promise<string>} AI-generated response text.
79
+ */
80
+ export async function searchBraveAI(
81
+ prompt,
82
+ {
83
+ enableResearch = false,
84
+ timeout = DEFAULT_TIMEOUT,
85
+ language = 'en',
86
+ country = 'US',
87
+ uiLang = 'en-us',
88
+ geoloc = null
89
+ } = {}
90
+ ) {
91
+ if (!prompt || typeof prompt !== 'string') {
92
+ throw new Error('Invalid prompt: must be a non-empty string');
93
+ }
94
+
95
+ if (prompt.length > 5000) {
96
+ throw new Error('Invalid prompt: too long (maximum 5000 characters)');
97
+ }
98
+
99
+ const symmetricKey = generateKeyB64();
100
+ const client = axios.create({
101
+ timeout,
102
+ headers: buildHeaders(),
103
+ validateStatus: (status) => status >= 200 && status < 500
104
+ });
105
+
106
+ const newParams = {
107
+ language,
108
+ country,
109
+ ui_lang: uiLang,
110
+ symmetric_key: symmetricKey,
111
+ source: enableResearch ? 'home' : 'llmSuggest',
112
+ query: prompt,
113
+ enable_research: enableResearch ? 'true' : 'false'
114
+ };
115
+
116
+ if (geoloc) {
117
+ newParams.geoloc = geoloc;
118
+ }
119
+
120
+ try {
121
+ const newResponse = await client.get(`${BASE_URL}/new`, { params: newParams });
122
+ if (newResponse.status !== 200) {
123
+ throw new Error(`Brave AI failed to initialize chat: HTTP ${newResponse.status}`);
124
+ }
125
+
126
+ const chatId = newResponse.data?.id;
127
+ if (!chatId) {
128
+ throw new Error('Brave AI failed to initialize chat: missing conversation id');
129
+ }
130
+
131
+ const streamParams = {
132
+ id: chatId,
133
+ query: prompt,
134
+ symmetric_key: symmetricKey,
135
+ language,
136
+ country,
137
+ ui_lang: uiLang,
138
+ enable_research: enableResearch ? 'true' : 'false',
139
+ enable_followups: enableResearch ? 'true' : 'false'
140
+ };
141
+
142
+ const referer = `https://search.brave.com/ask?q=${encodeURIComponent(prompt)}&conversation=${chatId}`;
143
+ const streamResponse = await client.get(`${BASE_URL}/stream`, {
144
+ params: streamParams,
145
+ responseType: 'stream',
146
+ headers: {
147
+ referer
148
+ }
149
+ });
150
+
151
+ if (streamResponse.status !== 200) {
152
+ throw new Error(`Brave AI stream failed: HTTP ${streamResponse.status}`);
153
+ }
154
+
155
+ return await parseStream(streamResponse.data);
156
+ } catch (error) {
157
+ if (error.response?.status === 429) {
158
+ throw new Error('Brave AI rate limit: too many requests');
159
+ }
160
+
161
+ if (error.code === 'ECONNABORTED') {
162
+ throw new Error('Brave AI request timeout: took too long');
163
+ }
164
+
165
+ throw new Error(`Brave AI search failed for "${prompt}": ${error.message}`);
166
+ }
167
+ }