@oevortex/ddg_search 1.1.5 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -1
- package/package.json +58 -1
- package/src/index.js +4 -4
- package/src/utils/search.js +0 -174
- package/src/utils/search_iask.js +157 -237
package/CHANGELOG.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
-
## [1.1.
|
|
5
|
+
## [1.1.5] - 2025-11-30
|
|
6
6
|
### Changed
|
|
7
7
|
- Replaced Felo AI tool with IAsk AI tool for advanced AI-powered search
|
|
8
8
|
- Added `src/utils/search_iask.js` implementing IAsk API client
|
package/package.json
CHANGED
|
@@ -1 +1,58 @@
|
|
|
1
|
-
{
|
|
1
|
+
{
|
|
2
|
+
"name": "@oevortex/ddg_search",
|
|
3
|
+
"version": "1.1.6",
|
|
4
|
+
"description": "A Model Context Protocol server for web search using DuckDuckGo and IAsk AI",
|
|
5
|
+
"main": "src/index.js",
|
|
6
|
+
"module": "src/index.ts",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": {
|
|
9
|
+
"import": "./src/index.js",
|
|
10
|
+
"default": "./src/index.js"
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
"bin": {
|
|
14
|
+
"ddg-search-mcp": "bin/cli.js",
|
|
15
|
+
"oevortex-ddg-search": "bin/cli.js"
|
|
16
|
+
},
|
|
17
|
+
"scripts": {
|
|
18
|
+
"test": "echo \"Error: no test specified\" && exit 1",
|
|
19
|
+
"start": "node bin/cli.js",
|
|
20
|
+
"prepublishOnly": "npm run lint",
|
|
21
|
+
"lint": "echo \"No linting configured\"",
|
|
22
|
+
"build": "npx @smithery/cli build",
|
|
23
|
+
"dev": "npx @smithery/cli dev"
|
|
24
|
+
},
|
|
25
|
+
"publishConfig": {
|
|
26
|
+
"access": "public"
|
|
27
|
+
},
|
|
28
|
+
"keywords": [
|
|
29
|
+
"mcp",
|
|
30
|
+
"model-context-protocol",
|
|
31
|
+
"duckduckgo",
|
|
32
|
+
"iask",
|
|
33
|
+
"search",
|
|
34
|
+
"web-search",
|
|
35
|
+
"ai-search",
|
|
36
|
+
"claude",
|
|
37
|
+
"ai",
|
|
38
|
+
"llm"
|
|
39
|
+
],
|
|
40
|
+
"author": "OEvortex",
|
|
41
|
+
"license": "Apache-2.0",
|
|
42
|
+
"type": "module",
|
|
43
|
+
"dependencies": {
|
|
44
|
+
"@modelcontextprotocol/sdk": "^1.17.4",
|
|
45
|
+
"axios": "^1.8.4",
|
|
46
|
+
"axios-cookiejar-support": "^6.0.5",
|
|
47
|
+
"cheerio": "^1.0.0",
|
|
48
|
+
"smithery": "^0.5.2",
|
|
49
|
+
"tough-cookie": "^6.0.0",
|
|
50
|
+
"turndown": "^7.2.2",
|
|
51
|
+
"ws": "^8.18.3"
|
|
52
|
+
},
|
|
53
|
+
"devDependencies": {
|
|
54
|
+
"@types/node": "^24.3.0",
|
|
55
|
+
"tsx": "^4.20.4",
|
|
56
|
+
"typescript": "^5.9.2"
|
|
57
|
+
}
|
|
58
|
+
}
|
package/src/index.js
CHANGED
|
@@ -3,7 +3,7 @@ import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprot
|
|
|
3
3
|
|
|
4
4
|
// Import tool definitions and handlers
|
|
5
5
|
import { searchToolDefinition, searchToolHandler } from './tools/searchTool.js';
|
|
6
|
-
import {
|
|
6
|
+
import { iaskToolDefinition, iaskToolHandler } from './tools/iaskTool.js';
|
|
7
7
|
|
|
8
8
|
// Required: Export default createServer function for Smithery
|
|
9
9
|
export default function createServer({ config } = {}) {
|
|
@@ -12,7 +12,7 @@ export default function createServer({ config } = {}) {
|
|
|
12
12
|
// Global variable to track available tools
|
|
13
13
|
const availableTools = [
|
|
14
14
|
searchToolDefinition,
|
|
15
|
-
|
|
15
|
+
iaskToolDefinition
|
|
16
16
|
];
|
|
17
17
|
|
|
18
18
|
console.log('Available tools:', availableTools.map(t => t.name));
|
|
@@ -48,8 +48,8 @@ export default function createServer({ config } = {}) {
|
|
|
48
48
|
case 'web-search':
|
|
49
49
|
return await searchToolHandler(args);
|
|
50
50
|
|
|
51
|
-
case '
|
|
52
|
-
return await
|
|
51
|
+
case 'iask-search':
|
|
52
|
+
return await iaskToolHandler(args);
|
|
53
53
|
|
|
54
54
|
default:
|
|
55
55
|
throw new Error(`Tool not found: ${name}`);
|
package/src/utils/search.js
CHANGED
|
@@ -219,183 +219,9 @@ async function searchDuckDuckGo(query, page = 1, numResults = 10) {
|
|
|
219
219
|
}
|
|
220
220
|
}
|
|
221
221
|
|
|
222
|
-
/**
|
|
223
|
-
* Fetches the content of a URL and returns it as text
|
|
224
|
-
* @param {string} url - The URL to fetch
|
|
225
|
-
* @param {Object} options - Options for content extraction
|
|
226
|
-
* @param {boolean} options.extractMainContent - Whether to attempt to extract main content (default: true)
|
|
227
|
-
* @param {boolean} options.includeLinks - Whether to include link text (default: true)
|
|
228
|
-
* @param {boolean} options.includeImages - Whether to include image alt text (default: true)
|
|
229
|
-
* @param {string[]} options.excludeTags - Tags to exclude from extraction
|
|
230
|
-
* @returns {Promise<string>} - The content of the URL
|
|
231
|
-
*/
|
|
232
|
-
async function fetchUrlContent(url, options = {}) {
|
|
233
|
-
try {
|
|
234
|
-
// Default options
|
|
235
|
-
const {
|
|
236
|
-
extractMainContent = true,
|
|
237
|
-
includeLinks = true,
|
|
238
|
-
includeImages = true,
|
|
239
|
-
excludeTags = ['script', 'style', 'noscript', 'iframe', 'svg', 'nav', 'footer', 'header', 'aside']
|
|
240
|
-
} = options;
|
|
241
|
-
|
|
242
|
-
// Get a random user agent
|
|
243
|
-
const userAgent = getRandomUserAgent();
|
|
244
|
-
|
|
245
|
-
const response = await axios.get(url, {
|
|
246
|
-
headers: {
|
|
247
|
-
'User-Agent': userAgent
|
|
248
|
-
},
|
|
249
|
-
timeout: 10000, // 10 second timeout
|
|
250
|
-
httpsAgent: httpsAgent
|
|
251
|
-
});
|
|
252
|
-
|
|
253
|
-
if (response.status !== 200) {
|
|
254
|
-
throw new Error(`Failed to fetch URL: ${url}`);
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
// If the content is HTML, extract the text content
|
|
258
|
-
const contentType = response.headers['content-type'] || '';
|
|
259
|
-
if (contentType.includes('text/html')) {
|
|
260
|
-
const $ = cheerio.load(response.data);
|
|
261
|
-
|
|
262
|
-
// Remove unwanted elements
|
|
263
|
-
excludeTags.forEach(tag => {
|
|
264
|
-
$(tag).remove();
|
|
265
|
-
});
|
|
266
|
-
|
|
267
|
-
// Remove ads and other common unwanted elements
|
|
268
|
-
const unwantedSelectors = [
|
|
269
|
-
'[id*="ad"]', '[class*="ad"]', '[id*="banner"]', '[class*="banner"]',
|
|
270
|
-
'[id*="popup"]', '[class*="popup"]', '[class*="cookie"]',
|
|
271
|
-
'[id*="cookie"]', '[class*="newsletter"]', '[id*="newsletter"]',
|
|
272
|
-
'[class*="social"]', '[id*="social"]', '[class*="share"]', '[id*="share"]'
|
|
273
|
-
];
|
|
274
|
-
|
|
275
|
-
unwantedSelectors.forEach(selector => {
|
|
276
|
-
try {
|
|
277
|
-
$(selector).remove();
|
|
278
|
-
} catch (e) {
|
|
279
|
-
// Ignore invalid selectors
|
|
280
|
-
}
|
|
281
|
-
});
|
|
282
|
-
|
|
283
|
-
// Handle links and images
|
|
284
|
-
if (!includeLinks) {
|
|
285
|
-
$('a').each((i, link) => {
|
|
286
|
-
$(link).replaceWith($(link).text());
|
|
287
|
-
});
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
if (!includeImages) {
|
|
291
|
-
$('img').remove();
|
|
292
|
-
} else {
|
|
293
|
-
// Replace images with their alt text
|
|
294
|
-
$('img').each((i, img) => {
|
|
295
|
-
const alt = $(img).attr('alt');
|
|
296
|
-
if (alt) {
|
|
297
|
-
$(img).replaceWith(`[Image: ${alt}]`);
|
|
298
|
-
} else {
|
|
299
|
-
$(img).remove();
|
|
300
|
-
}
|
|
301
|
-
});
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
// Try to extract main content if requested
|
|
305
|
-
if (extractMainContent) {
|
|
306
|
-
// Common content selectors in order of priority
|
|
307
|
-
const contentSelectors = [
|
|
308
|
-
'article', 'main', '[role="main"]', '.post-content', '.article-content',
|
|
309
|
-
'.content', '#content', '.post', '.article', '.entry-content',
|
|
310
|
-
'.page-content', '.post-body', '.post-text', '.story-body'
|
|
311
|
-
];
|
|
312
|
-
|
|
313
|
-
for (const selector of contentSelectors) {
|
|
314
|
-
const mainContent = $(selector).first();
|
|
315
|
-
if (mainContent.length > 0) {
|
|
316
|
-
// Clean up the content
|
|
317
|
-
return cleanText(mainContent.text());
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
// If no main content found or not requested, use the body
|
|
323
|
-
return cleanText($('body').text());
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
// For non-HTML content, return as is
|
|
327
|
-
return response.data.toString();
|
|
328
|
-
} catch (error) {
|
|
329
|
-
console.error('Error fetching URL content:', error.message);
|
|
330
|
-
throw error;
|
|
331
|
-
}
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
/**
|
|
335
|
-
* Cleans up text by removing excessive whitespace and normalizing line breaks
|
|
336
|
-
* @param {string} text - The text to clean
|
|
337
|
-
* @returns {string} - The cleaned text
|
|
338
|
-
*/
|
|
339
|
-
function cleanText(text) {
|
|
340
|
-
return text
|
|
341
|
-
.replace(/\s+/g, ' ') // Replace multiple whitespace with single space
|
|
342
|
-
.replace(/\n\s*\n/g, '\n\n') // Normalize multiple line breaks
|
|
343
|
-
.replace(/^\s+|\s+$/g, '') // Trim start and end
|
|
344
|
-
.trim();
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
/**
|
|
348
|
-
* Extracts metadata from a URL (title, description, etc.)
|
|
349
|
-
* @param {string} url - The URL to extract metadata from
|
|
350
|
-
* @returns {Promise<Object>} - The metadata
|
|
351
|
-
*/
|
|
352
|
-
async function extractUrlMetadata(url) {
|
|
353
|
-
try {
|
|
354
|
-
// Get a random user agent
|
|
355
|
-
const userAgent = getRandomUserAgent();
|
|
356
|
-
|
|
357
|
-
const response = await axios.get(url, {
|
|
358
|
-
headers: {
|
|
359
|
-
'User-Agent': userAgent
|
|
360
|
-
},
|
|
361
|
-
httpsAgent: httpsAgent
|
|
362
|
-
});
|
|
363
|
-
|
|
364
|
-
if (response.status !== 200) {
|
|
365
|
-
throw new Error(`Failed to fetch URL: ${url}`);
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
const $ = cheerio.load(response.data);
|
|
369
|
-
|
|
370
|
-
// Extract metadata
|
|
371
|
-
const title = $('title').text() || '';
|
|
372
|
-
const description = $('meta[name="description"]').attr('content') ||
|
|
373
|
-
$('meta[property="og:description"]').attr('content') || '';
|
|
374
|
-
const ogImage = $('meta[property="og:image"]').attr('content') || '';
|
|
375
|
-
const favicon = $('link[rel="icon"]').attr('href') ||
|
|
376
|
-
$('link[rel="shortcut icon"]').attr('href') || '';
|
|
377
|
-
|
|
378
|
-
// Resolve relative URLs
|
|
379
|
-
const resolvedFavicon = favicon ? new URL(favicon, url).href : getFaviconUrl(url);
|
|
380
|
-
const resolvedOgImage = ogImage ? new URL(ogImage, url).href : '';
|
|
381
|
-
|
|
382
|
-
return {
|
|
383
|
-
title,
|
|
384
|
-
description,
|
|
385
|
-
ogImage: resolvedOgImage,
|
|
386
|
-
favicon: resolvedFavicon,
|
|
387
|
-
url
|
|
388
|
-
};
|
|
389
|
-
} catch (error) {
|
|
390
|
-
console.error('Error extracting URL metadata:', error.message);
|
|
391
|
-
throw error;
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
222
|
|
|
395
223
|
export {
|
|
396
224
|
searchDuckDuckGo,
|
|
397
|
-
fetchUrlContent,
|
|
398
|
-
extractUrlMetadata,
|
|
399
225
|
extractDirectUrl,
|
|
400
226
|
getFaviconUrl
|
|
401
227
|
};
|
package/src/utils/search_iask.js
CHANGED
|
@@ -1,52 +1,22 @@
|
|
|
1
1
|
import axios from 'axios';
|
|
2
|
+
import WebSocket from 'ws';
|
|
2
3
|
import * as cheerio from 'cheerio';
|
|
3
4
|
import TurndownService from 'turndown';
|
|
5
|
+
import * as tough from 'tough-cookie';
|
|
6
|
+
import { wrapper } from 'axios-cookiejar-support';
|
|
4
7
|
|
|
5
|
-
|
|
6
|
-
const USER_AGENTS = [
|
|
7
|
-
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
8
|
-
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Edge/120.0.0.0',
|
|
9
|
-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15',
|
|
10
|
-
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0',
|
|
11
|
-
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
12
|
-
];
|
|
13
|
-
|
|
14
|
-
// Cache results to avoid repeated requests
|
|
15
|
-
const resultsCache = new Map();
|
|
16
|
-
const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
|
|
8
|
+
const { CookieJar } = tough;
|
|
17
9
|
|
|
18
10
|
// Valid modes and detail levels
|
|
19
11
|
const VALID_MODES = ['question', 'academic', 'forums', 'wiki', 'thinking'];
|
|
20
12
|
const VALID_DETAIL_LEVELS = ['concise', 'detailed', 'comprehensive'];
|
|
21
13
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class Response {
|
|
26
|
-
/**
|
|
27
|
-
* Create a new Response
|
|
28
|
-
* @param {string} text - The text content of the response
|
|
29
|
-
*/
|
|
30
|
-
constructor(text) {
|
|
31
|
-
this.text = text;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* String representation of the response
|
|
36
|
-
* @returns {string} The text content
|
|
37
|
-
*/
|
|
38
|
-
toString() {
|
|
39
|
-
return this.text;
|
|
40
|
-
}
|
|
41
|
-
}
|
|
14
|
+
// Cache results to avoid repeated requests
|
|
15
|
+
const resultsCache = new Map();
|
|
16
|
+
const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
|
|
42
17
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
* @returns {string} A random user agent string
|
|
46
|
-
*/
|
|
47
|
-
function getRandomUserAgent() {
|
|
48
|
-
return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
|
|
49
|
-
}
|
|
18
|
+
const DEFAULT_TIMEOUT = 30000;
|
|
19
|
+
const API_ENDPOINT = 'https://iask.ai/';
|
|
50
20
|
|
|
51
21
|
/**
|
|
52
22
|
* Generate a cache key for a search query
|
|
@@ -72,23 +42,25 @@ function clearOldCache() {
|
|
|
72
42
|
}
|
|
73
43
|
|
|
74
44
|
/**
|
|
75
|
-
*
|
|
76
|
-
* @param {
|
|
77
|
-
* @returns {string|null} The found
|
|
45
|
+
* Recursively search for cached HTML content in diff object
|
|
46
|
+
* @param {any} diff - The diff object to search
|
|
47
|
+
* @returns {string|null} The found content or null
|
|
78
48
|
*/
|
|
79
49
|
function cacheFind(diff) {
|
|
80
50
|
const values = Array.isArray(diff) ? diff : Object.values(diff);
|
|
81
|
-
const turndown = new TurndownService();
|
|
82
51
|
|
|
83
52
|
for (const value of values) {
|
|
84
|
-
if (typeof value === 'object' && value !== null) {
|
|
53
|
+
if (Array.isArray(value) || (typeof value === 'object' && value !== null)) {
|
|
85
54
|
const cache = cacheFind(value);
|
|
86
55
|
if (cache) return cache;
|
|
87
56
|
}
|
|
57
|
+
|
|
88
58
|
if (typeof value === 'string' && /<p>.+?<\/p>/.test(value)) {
|
|
89
|
-
|
|
59
|
+
const turndownService = new TurndownService();
|
|
60
|
+
return turndownService.turndown(value).trim();
|
|
90
61
|
}
|
|
91
62
|
}
|
|
63
|
+
|
|
92
64
|
return null;
|
|
93
65
|
}
|
|
94
66
|
|
|
@@ -98,6 +70,8 @@ function cacheFind(diff) {
|
|
|
98
70
|
* @returns {string} Formatted text
|
|
99
71
|
*/
|
|
100
72
|
function formatHtml(htmlContent) {
|
|
73
|
+
if (!htmlContent) return '';
|
|
74
|
+
|
|
101
75
|
const $ = cheerio.load(htmlContent);
|
|
102
76
|
const outputLines = [];
|
|
103
77
|
|
|
@@ -133,37 +107,10 @@ function formatHtml(htmlContent) {
|
|
|
133
107
|
}
|
|
134
108
|
|
|
135
109
|
/**
|
|
136
|
-
*
|
|
137
|
-
* @returns {Object} Axios instance
|
|
138
|
-
*/
|
|
139
|
-
function createSession() {
|
|
140
|
-
return axios.create({
|
|
141
|
-
timeout: 30000,
|
|
142
|
-
headers: {
|
|
143
|
-
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
144
|
-
'accept-encoding': 'gzip, deflate, br',
|
|
145
|
-
'accept-language': 'en-US,en;q=0.9',
|
|
146
|
-
'cache-control': 'no-cache',
|
|
147
|
-
'dnt': '1',
|
|
148
|
-
'pragma': 'no-cache',
|
|
149
|
-
'sec-ch-ua': '"Not)A;Brand";v="99", "Microsoft Edge";v="127", "Chromium";v="127"',
|
|
150
|
-
'sec-ch-ua-mobile': '?0',
|
|
151
|
-
'sec-ch-ua-platform': '"Windows"',
|
|
152
|
-
'sec-fetch-dest': 'document',
|
|
153
|
-
'sec-fetch-mode': 'navigate',
|
|
154
|
-
'sec-fetch-site': 'none',
|
|
155
|
-
'sec-fetch-user': '?1',
|
|
156
|
-
'upgrade-insecure-requests': '1',
|
|
157
|
-
'user-agent': getRandomUserAgent()
|
|
158
|
-
}
|
|
159
|
-
});
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
/**
|
|
163
|
-
* Search using the IAsk AI API
|
|
110
|
+
* Search using IAsk AI via WebSocket (Phoenix LiveView)
|
|
164
111
|
* @param {string} prompt - The search query or prompt
|
|
165
|
-
* @param {boolean} stream - If true,
|
|
166
|
-
* @param {boolean} raw - If true, returns raw response
|
|
112
|
+
* @param {boolean} stream - If true, returns async generator for streaming
|
|
113
|
+
* @param {boolean} raw - If true, returns raw response (not used currently)
|
|
167
114
|
* @param {string} mode - Search mode: 'question', 'academic', 'forums', 'wiki', 'thinking'
|
|
168
115
|
* @param {string|null} detailLevel - Detail level: 'concise', 'detailed', 'comprehensive'
|
|
169
116
|
* @returns {Promise<string|AsyncGenerator<string>>} The search results
|
|
@@ -182,200 +129,173 @@ async function searchIAsk(prompt, stream = false, raw = false, mode = 'question'
|
|
|
182
129
|
// Clear old cache entries
|
|
183
130
|
clearOldCache();
|
|
184
131
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
const cacheKey = getCacheKey(prompt, mode, detailLevel);
|
|
188
|
-
const cachedResults = resultsCache.get(cacheKey);
|
|
132
|
+
const cacheKey = getCacheKey(prompt, mode, detailLevel);
|
|
133
|
+
const cachedResults = resultsCache.get(cacheKey);
|
|
189
134
|
|
|
190
|
-
|
|
191
|
-
|
|
135
|
+
if (cachedResults && Date.now() - cachedResults.timestamp < CACHE_DURATION) {
|
|
136
|
+
const result = cachedResults.results;
|
|
137
|
+
if (stream) {
|
|
138
|
+
return (async function*() { yield result; })();
|
|
192
139
|
}
|
|
140
|
+
return result;
|
|
193
141
|
}
|
|
194
142
|
|
|
195
|
-
|
|
196
|
-
const
|
|
197
|
-
|
|
198
|
-
// Build URL with parameters
|
|
199
|
-
const params = new URLSearchParams({
|
|
200
|
-
mode: mode,
|
|
201
|
-
q: prompt
|
|
202
|
-
});
|
|
143
|
+
// Build URL parameters
|
|
144
|
+
const params = new URLSearchParams({ mode, q: prompt });
|
|
203
145
|
if (detailLevel) {
|
|
204
146
|
params.append('options[detail_level]', detailLevel);
|
|
205
147
|
}
|
|
206
148
|
|
|
207
|
-
//
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
// First, get the initial page to extract tokens
|
|
211
|
-
const initialUrl = `${apiEndpoint}?${params.toString()}`;
|
|
212
|
-
const initialResponse = await session.get(initialUrl);
|
|
149
|
+
// Create a cookie jar for session management
|
|
150
|
+
const jar = new CookieJar();
|
|
151
|
+
const client = wrapper(axios.create({ jar }));
|
|
213
152
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
153
|
+
// Get initial page and extract tokens
|
|
154
|
+
const response = await client.get(API_ENDPOINT, {
|
|
155
|
+
params: Object.fromEntries(params),
|
|
156
|
+
timeout: DEFAULT_TIMEOUT,
|
|
157
|
+
headers: {
|
|
158
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
159
|
+
}
|
|
160
|
+
});
|
|
217
161
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
const csrfToken = $('meta[name="csrf-token"]').attr('content');
|
|
225
|
-
|
|
226
|
-
if (!phxId || !csrfToken) {
|
|
227
|
-
throw new Error('Failed to extract required tokens from IAsk page');
|
|
228
|
-
}
|
|
162
|
+
const $ = cheerio.load(response.data);
|
|
163
|
+
|
|
164
|
+
const phxNode = $('[id^="phx-"]').first();
|
|
165
|
+
const csrfToken = $('[name="csrf-token"]').attr('content');
|
|
166
|
+
const phxId = phxNode.attr('id');
|
|
167
|
+
const phxSession = phxNode.attr('data-phx-session');
|
|
229
168
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
169
|
+
if (!phxId || !csrfToken) {
|
|
170
|
+
throw new Error('Failed to extract required tokens from page');
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Get the actual response URL (after any redirects)
|
|
174
|
+
const responseUrl = response.request.res?.responseUrl || response.config.url;
|
|
175
|
+
|
|
176
|
+
// Get cookies from the jar for WebSocket connection
|
|
177
|
+
const cookies = await jar.getCookies(API_ENDPOINT);
|
|
178
|
+
const cookieString = cookies.map(c => `${c.key}=${c.value}`).join('; ');
|
|
179
|
+
|
|
180
|
+
// Build WebSocket URL
|
|
181
|
+
const wsParams = new URLSearchParams({
|
|
182
|
+
'_csrf_token': csrfToken,
|
|
183
|
+
'vsn': '2.0.0'
|
|
184
|
+
});
|
|
185
|
+
const wsUrl = `wss://iask.ai/live/websocket?${wsParams.toString()}`;
|
|
186
|
+
|
|
187
|
+
return new Promise((resolve, reject) => {
|
|
188
|
+
const ws = new WebSocket(wsUrl, {
|
|
189
|
+
headers: {
|
|
190
|
+
'Cookie': cookieString,
|
|
191
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
192
|
+
'Origin': 'https://iask.ai'
|
|
193
|
+
}
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
let buffer = '';
|
|
197
|
+
const chunks = [];
|
|
198
|
+
let timeoutId;
|
|
199
|
+
|
|
200
|
+
ws.on('open', () => {
|
|
201
|
+
// Send phx_join message
|
|
202
|
+
ws.send(JSON.stringify([
|
|
203
|
+
null,
|
|
204
|
+
null,
|
|
205
|
+
`lv:${phxId}`,
|
|
206
|
+
'phx_join',
|
|
207
|
+
{
|
|
208
|
+
params: { _csrf_token: csrfToken },
|
|
209
|
+
url: responseUrl,
|
|
210
|
+
session: phxSession
|
|
238
211
|
}
|
|
239
|
-
|
|
212
|
+
]));
|
|
213
|
+
});
|
|
240
214
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
resolveWs = resolve;
|
|
246
|
-
rejectWs = reject;
|
|
247
|
-
});
|
|
215
|
+
ws.on('message', (data) => {
|
|
216
|
+
try {
|
|
217
|
+
const msg = JSON.parse(data.toString());
|
|
218
|
+
if (!msg) return;
|
|
248
219
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
// Send join message
|
|
254
|
-
const joinMessage = [
|
|
255
|
-
null,
|
|
256
|
-
null,
|
|
257
|
-
`lv:${phxId}`,
|
|
258
|
-
'phx_join',
|
|
259
|
-
{
|
|
260
|
-
params: { _csrf_token: csrfToken },
|
|
261
|
-
url: initialUrl,
|
|
262
|
-
session: phxSession
|
|
263
|
-
}
|
|
264
|
-
];
|
|
265
|
-
ws.send(JSON.stringify(joinMessage));
|
|
266
|
-
});
|
|
220
|
+
const diff = msg[4];
|
|
221
|
+
if (!diff) return;
|
|
222
|
+
|
|
223
|
+
let chunk = null;
|
|
267
224
|
|
|
268
|
-
ws.on('message', (data) => {
|
|
269
225
|
try {
|
|
270
|
-
|
|
271
|
-
if
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
if (diff.e && diff.e[0] && diff.e[0][1] && diff.e[0][1].data) {
|
|
278
|
-
let chunk = diff.e[0][1].data;
|
|
279
|
-
// Check if chunk contains HTML
|
|
226
|
+
// Try to get chunk from diff.e[0][1].data
|
|
227
|
+
// Use non-optional chaining to trigger exception if path doesn't exist
|
|
228
|
+
if (diff.e) {
|
|
229
|
+
chunk = diff.e[0][1].data;
|
|
230
|
+
|
|
231
|
+
if (chunk) {
|
|
232
|
+
let formatted;
|
|
280
233
|
if (/<[^>]+>/.test(chunk)) {
|
|
281
|
-
|
|
234
|
+
formatted = formatHtml(chunk);
|
|
282
235
|
} else {
|
|
283
|
-
|
|
236
|
+
formatted = chunk.replace(/<br\/>/g, '\n');
|
|
284
237
|
}
|
|
285
|
-
|
|
238
|
+
|
|
239
|
+
buffer += formatted;
|
|
240
|
+
chunks.push(formatted);
|
|
286
241
|
}
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
ws.close();
|
|
300
|
-
}
|
|
242
|
+
} else {
|
|
243
|
+
throw new Error('No diff.e');
|
|
244
|
+
}
|
|
245
|
+
} catch {
|
|
246
|
+
// Fallback to cacheFind
|
|
247
|
+
const cache = cacheFind(diff);
|
|
248
|
+
if (cache) {
|
|
249
|
+
let formatted;
|
|
250
|
+
if (/<[^>]+>/.test(cache)) {
|
|
251
|
+
formatted = formatHtml(cache);
|
|
252
|
+
} else {
|
|
253
|
+
formatted = cache;
|
|
301
254
|
}
|
|
255
|
+
buffer += formatted;
|
|
256
|
+
chunks.push(formatted);
|
|
257
|
+
// Close after cache find
|
|
258
|
+
ws.close();
|
|
259
|
+
return;
|
|
302
260
|
}
|
|
303
|
-
} catch (error) {
|
|
304
|
-
console.debug('WebSocket message parse error:', error.message);
|
|
305
261
|
}
|
|
306
|
-
})
|
|
307
|
-
|
|
308
|
-
ws.on('close', () => {
|
|
309
|
-
resolveWs();
|
|
310
|
-
});
|
|
311
|
-
|
|
312
|
-
ws.on('error', (error) => {
|
|
313
|
-
rejectWs(error);
|
|
314
|
-
});
|
|
315
|
-
|
|
316
|
-
// Set timeout
|
|
317
|
-
const timeout = setTimeout(() => {
|
|
262
|
+
} catch (err) {
|
|
263
|
+
reject(new Error(`IAsk API error: ${err.message}`));
|
|
318
264
|
ws.close();
|
|
319
|
-
rejectWs(new Error('WebSocket connection timed out'));
|
|
320
|
-
}, 30000);
|
|
321
|
-
|
|
322
|
-
// Wait for WebSocket to complete
|
|
323
|
-
await wsPromise;
|
|
324
|
-
clearTimeout(timeout);
|
|
325
|
-
|
|
326
|
-
// Yield all collected chunks
|
|
327
|
-
for (const chunk of chunks) {
|
|
328
|
-
streamingText += chunk;
|
|
329
|
-
if (raw) {
|
|
330
|
-
yield { text: chunk };
|
|
331
|
-
} else {
|
|
332
|
-
yield new Response(chunk).toString();
|
|
333
|
-
}
|
|
334
265
|
}
|
|
266
|
+
});
|
|
335
267
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
268
|
+
ws.on('close', () => {
|
|
269
|
+
clearTimeout(timeoutId);
|
|
270
|
+
|
|
271
|
+
// Cache the result
|
|
272
|
+
if (buffer) {
|
|
273
|
+
resultsCache.set(cacheKey, {
|
|
274
|
+
results: buffer,
|
|
340
275
|
timestamp: Date.now()
|
|
341
276
|
});
|
|
342
277
|
}
|
|
343
|
-
|
|
344
|
-
} catch (error) {
|
|
345
|
-
console.error('Error searching IAsk:', error.message);
|
|
346
|
-
|
|
347
|
-
if (error.response) {
|
|
348
|
-
const status = error.response.status;
|
|
349
|
-
const statusText = error.response.statusText;
|
|
350
|
-
throw new Error(`IAsk API error: ${status} ${statusText}`);
|
|
351
|
-
}
|
|
352
278
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
return streamFunction();
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
// For non-streaming, collect all chunks and return as a single string
|
|
363
|
-
let fullResponse = '';
|
|
364
|
-
|
|
365
|
-
try {
|
|
366
|
-
for await (const chunk of streamFunction()) {
|
|
367
|
-
if (raw) {
|
|
368
|
-
fullResponse += chunk.text;
|
|
279
|
+
if (stream) {
|
|
280
|
+
resolve((async function*() {
|
|
281
|
+
for (const chunk of chunks) {
|
|
282
|
+
yield chunk;
|
|
283
|
+
}
|
|
284
|
+
})());
|
|
369
285
|
} else {
|
|
370
|
-
|
|
286
|
+
resolve(buffer || 'No results found.');
|
|
371
287
|
}
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
ws.on('error', (err) => {
|
|
291
|
+
clearTimeout(timeoutId);
|
|
292
|
+
reject(new Error(`WebSocket error: ${err.message}`));
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
timeoutId = setTimeout(() => {
|
|
296
|
+
ws.close();
|
|
297
|
+
}, DEFAULT_TIMEOUT);
|
|
298
|
+
});
|
|
379
299
|
}
|
|
380
300
|
|
|
381
301
|
export { searchIAsk, VALID_MODES, VALID_DETAIL_LEVELS };
|