agent-search-mcp 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,200 @@
1
+ import { z } from 'zod';
2
+ import { logger } from '../infrastructure/index.js';
3
+ /**
4
+ * Extract GitHub README content from a repository URL.
5
+ */
6
+ export async function fetchGithubReadme(url) {
7
+ try {
8
+ // Parse GitHub URL to extract owner/repo
9
+ const githubMatch = url.match(/github\.com\/([^\/]+)\/([^\/]+)/);
10
+ if (!githubMatch) {
11
+ throw new Error('Invalid GitHub URL');
12
+ }
13
+ const [, owner, repo] = githubMatch;
14
+ const cleanRepo = repo.replace(/\.git$/, '');
15
+ // Try common README filenames
16
+ const readmeFiles = ['README.md', 'readme.md', 'Readme.md', 'README.MD', 'README'];
17
+ for (const filename of readmeFiles) {
18
+ try {
19
+ const rawUrl = `https://raw.githubusercontent.com/${owner}/${cleanRepo}/main/${filename}`;
20
+ const response = await fetch(rawUrl, {
21
+ signal: AbortSignal.timeout(10000),
22
+ });
23
+ if (response.ok) {
24
+ const content = await response.text();
25
+ return `# ${owner}/${cleanRepo}\n\n${content}`;
26
+ }
27
+ }
28
+ catch {
29
+ // Try master branch if main fails
30
+ try {
31
+ const rawUrl = `https://raw.githubusercontent.com/${owner}/${cleanRepo}/master/${filename}`;
32
+ const response = await fetch(rawUrl, {
33
+ signal: AbortSignal.timeout(10000),
34
+ });
35
+ if (response.ok) {
36
+ const content = await response.text();
37
+ return `# ${owner}/${cleanRepo}\n\n${content}`;
38
+ }
39
+ }
40
+ catch {
41
+ // Continue to next filename
42
+ }
43
+ }
44
+ }
45
+ throw new Error('README not found');
46
+ }
47
+ catch (error) {
48
+ logger.error({ err: error instanceof Error ? error.message : String(error) }, 'Failed to fetch GitHub README');
49
+ throw error;
50
+ }
51
+ }
52
+ /**
53
+ * Extract CSDN article content.
54
+ */
55
+ export async function fetchCsdnArticle(url) {
56
+ try {
57
+ const response = await fetch(url, {
58
+ headers: {
59
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
60
+ },
61
+ signal: AbortSignal.timeout(10000),
62
+ });
63
+ if (!response.ok) {
64
+ throw new Error(`HTTP ${response.status}`);
65
+ }
66
+ const html = await response.text();
67
+ // Simple extraction: find article content between common markers
68
+ const articleMatch = html.match(/<article[^>]*>([\s\S]*?)<\/article>/i) ||
69
+ html.match(/<div[^>]*class="[^"]*article[^"]*"[^>]*>([\s\S]*?)<\/div>/i);
70
+ if (articleMatch) {
71
+ // Basic HTML to text conversion
72
+ let content = articleMatch[1]
73
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
74
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
75
+ .replace(/<[^>]+>/g, '\n')
76
+ .replace(/\n{3,}/g, '\n\n')
77
+ .trim();
78
+ return content;
79
+ }
80
+ throw new Error('Article content not found');
81
+ }
82
+ catch (error) {
83
+ logger.error({ err: error instanceof Error ? error.message : String(error) }, 'Failed to fetch CSDN article');
84
+ throw error;
85
+ }
86
+ }
87
+ /**
88
+ * Extract Juejin article content.
89
+ */
90
+ export async function fetchJuejinArticle(url) {
91
+ try {
92
+ // Extract article ID from URL
93
+ const idMatch = url.match(/post\/(\d+)/);
94
+ if (!idMatch) {
95
+ throw new Error('Invalid Juejin URL');
96
+ }
97
+ const articleId = idMatch[1];
98
+ const apiUrl = `https://api.juejin.cn/content_api/v1/article/detail?article_id=${articleId}`;
99
+ const response = await fetch(apiUrl, {
100
+ signal: AbortSignal.timeout(10000),
101
+ });
102
+ if (!response.ok) {
103
+ throw new Error(`HTTP ${response.status}`);
104
+ }
105
+ const data = await response.json();
106
+ if (data.err_no !== 0 || !data.data) {
107
+ throw new Error(data.err_msg || 'Failed to fetch article');
108
+ }
109
+ const article = data.data;
110
+ const content = article.article_info?.markdown_content || article.article_info?.content || '';
111
+ return `# ${article.article_info?.title || 'Juejin Article'}\n\n${content}`;
112
+ }
113
+ catch (error) {
114
+ logger.error({ err: error instanceof Error ? error.message : String(error) }, 'Failed to fetch Juejin article');
115
+ throw error;
116
+ }
117
+ }
118
+ export function setupFetchTools(server) {
119
+ // GitHub README tool
120
+ server.tool('fetch_github_readme', 'Fetch README content from a GitHub repository', {
121
+ url: z.string().url('Must be a valid URL').describe('GitHub repository URL (e.g., https://github.com/owner/repo)'),
122
+ }, async ({ url }) => {
123
+ try {
124
+ const content = await fetchGithubReadme(url);
125
+ return {
126
+ content: [
127
+ {
128
+ type: 'text',
129
+ text: content,
130
+ },
131
+ ],
132
+ };
133
+ }
134
+ catch (error) {
135
+ return {
136
+ content: [
137
+ {
138
+ type: 'text',
139
+ text: `Failed to fetch GitHub README: ${error instanceof Error ? error.message : 'Unknown error'}`,
140
+ },
141
+ ],
142
+ isError: true,
143
+ };
144
+ }
145
+ });
146
+ // CSDN article tool
147
+ server.tool('fetch_csdn_article', 'Fetch content from a CSDN blog article', {
148
+ url: z.string().url('Must be a valid URL').describe('CSDN article URL'),
149
+ }, async ({ url }) => {
150
+ try {
151
+ const content = await fetchCsdnArticle(url);
152
+ return {
153
+ content: [
154
+ {
155
+ type: 'text',
156
+ text: content,
157
+ },
158
+ ],
159
+ };
160
+ }
161
+ catch (error) {
162
+ return {
163
+ content: [
164
+ {
165
+ type: 'text',
166
+ text: `Failed to fetch CSDN article: ${error instanceof Error ? error.message : 'Unknown error'}`,
167
+ },
168
+ ],
169
+ isError: true,
170
+ };
171
+ }
172
+ });
173
+ // Juejin article tool
174
+ server.tool('fetch_juejin_article', 'Fetch content from a Juejin article', {
175
+ url: z.string().url('Must be a valid URL').describe('Juejin article URL'),
176
+ }, async ({ url }) => {
177
+ try {
178
+ const content = await fetchJuejinArticle(url);
179
+ return {
180
+ content: [
181
+ {
182
+ type: 'text',
183
+ text: content,
184
+ },
185
+ ],
186
+ };
187
+ }
188
+ catch (error) {
189
+ return {
190
+ content: [
191
+ {
192
+ type: 'text',
193
+ text: `Failed to fetch Juejin article: ${error instanceof Error ? error.message : 'Unknown error'}`,
194
+ },
195
+ ],
196
+ isError: true,
197
+ };
198
+ }
199
+ });
200
+ }
@@ -0,0 +1,43 @@
1
+ import { z } from 'zod';
2
+ import { validateUrl } from '../infrastructure/url-validator.js';
3
+ export function registerFreeExtract(server) {
4
+ server.tool('free_extract', `Extract full content from a URL. Returns clean markdown.
5
+
6
+ Best for: Reading a specific page found in search results.
7
+ Not recommended for: Bulk extraction — use search first.`, {
8
+ url: z.string().describe('URL to extract'),
9
+ max_length: z.number().optional().default(5000).describe('Max characters to return'),
10
+ }, async ({ url, max_length }) => {
11
+ // SSRF 防护
12
+ const validation = validateUrl(url);
13
+ if (!validation.valid) {
14
+ return {
15
+ content: [{ type: 'text', text: `Error: ${validation.error}` }],
16
+ isError: true,
17
+ };
18
+ }
19
+ try {
20
+ // 使用 Jina Reader
21
+ const res = await fetch(`https://r.jina.ai/${url}`, {
22
+ headers: { 'Accept': 'text/markdown' },
23
+ signal: AbortSignal.timeout(10000),
24
+ });
25
+ if (!res.ok) {
26
+ return {
27
+ content: [{ type: 'text', text: `Error: HTTP ${res.status}` }],
28
+ isError: true,
29
+ };
30
+ }
31
+ const content = await res.text();
32
+ return {
33
+ content: [{ type: 'text', text: content.slice(0, max_length) }],
34
+ };
35
+ }
36
+ catch (error) {
37
+ return {
38
+ content: [{ type: 'text', text: `Error: ${error instanceof Error ? error.message : String(error)}` }],
39
+ isError: true,
40
+ };
41
+ }
42
+ });
43
+ }
@@ -0,0 +1,40 @@
1
+ import { z } from 'zod';
2
+ import { searchWithFallback } from './free-search.js';
3
+ export function registerFreeSearchAdvanced(server) {
4
+ server.tool('free_search_advanced', `Advanced search with filters and quality control.
5
+
6
+ Best for: Date ranges, domain filtering, high-confidence only, Chinese content.
7
+ Not recommended for: Simple queries — use free_search instead.`, {
8
+ query: z.string().describe('Search query'),
9
+ count: z.number().optional().default(5).describe('Number of results (1-20)'),
10
+ min_confidence: z.number().min(1).max(3).optional().default(1)
11
+ .describe('Only return results verified by N+ sources'),
12
+ time_range: z.enum(['day', 'week', 'month', 'year']).optional()
13
+ .describe('Filter by recency'),
14
+ language: z.enum(['auto', 'en', 'zh']).optional().default('auto')
15
+ .describe('Language preference'),
16
+ include_domains: z.array(z.string()).optional()
17
+ .describe('Only search these domains'),
18
+ exclude_domains: z.array(z.string()).optional()
19
+ .describe('Exclude these domains'),
20
+ }, async (input) => {
21
+ try {
22
+ const results = await searchWithFallback({
23
+ query: input.query,
24
+ count: input.count,
25
+ engines: ['duckduckgo', 'sogou', 'bing', 'baidu', 'brave', 'tavily'],
26
+ minConfidence: input.min_confidence,
27
+ language: input.language,
28
+ includeDomains: input.include_domains,
29
+ excludeDomains: input.exclude_domains,
30
+ });
31
+ return { content: [{ type: 'text', text: JSON.stringify(results, null, 2) }] };
32
+ }
33
+ catch (error) {
34
+ return {
35
+ content: [{ type: 'text', text: `Search failed: ${error instanceof Error ? error.message : 'Unknown error'}` }],
36
+ isError: true,
37
+ };
38
+ }
39
+ });
40
+ }
@@ -0,0 +1,380 @@
1
+ import { z } from 'zod';
2
+ import { searchDuckDuckGo } from '../engines/duckduckgo.js';
3
+ import { searchSogou } from '../engines/sogou.js';
4
+ import { searchBing } from '../engines/bing.js';
5
+ import { searchBaidu } from '../engines/baidu.js';
6
+ import { BraveProvider } from '../engines/brave.js';
7
+ import { TavilyProvider } from '../engines/tavily.js';
8
+ import { searchExa } from '../engines/exa.js';
9
+ import { dedupByUrl, dedupByTitle, filterLowQuality, scoreAndRank, formatResults } from '../aggregation/index.js';
10
+ import { SearchCache, logger, HealthTracker, RateLimiter } from '../infrastructure/index.js';
11
+ const SUPPORTED_ENGINES = ['duckduckgo', 'sogou', 'bing', 'baidu', 'brave', 'tavily', 'exa'];
12
+ const FREE_ENGINES = ['duckduckgo', 'sogou', 'bing', 'baidu'];
13
+ const PAID_ENGINES = ['brave', 'tavily', 'exa'];
14
+ // Engine weights (higher = more trusted)
15
+ const ENGINE_WEIGHTS = {
16
+ duckduckgo: 0.85,
17
+ sogou: 0.8,
18
+ bing: 0.9,
19
+ baidu: 0.75,
20
+ brave: 0.95,
21
+ tavily: 0.9,
22
+ exa: 0.92,
23
+ };
24
+ // Infrastructure singletons
25
+ const cache = new SearchCache();
26
+ const healthTracker = new HealthTracker();
27
+ const rateLimiter = new RateLimiter();
28
+ // ─── Engine provider mapping (from ddgs pattern) ──────────────────────────
29
+ // DDG uses Bing as backend, so we track providers to avoid duplicate queries
30
+ const PROVIDER_MAP = {
31
+ duckduckgo: 'bing',
32
+ sogou: 'sogou',
33
+ bing: 'bing',
34
+ baidu: 'baidu',
35
+ brave: 'brave',
36
+ tavily: 'tavily',
37
+ exa: 'exa',
38
+ };
39
+ /**
40
+ * Get unique providers from engine list.
41
+ * From ddgs: same provider only searches once.
42
+ */
43
+ function getUniqueProviders(engines) {
44
+ const seenProviders = new Set();
45
+ const unique = [];
46
+ for (const engine of engines) {
47
+ const provider = PROVIDER_MAP[engine] || engine;
48
+ if (!seenProviders.has(provider)) {
49
+ seenProviders.add(provider);
50
+ unique.push(engine);
51
+ }
52
+ }
53
+ return unique;
54
+ }
55
+ /**
56
+ * Search a single engine with health check, rate limiting, and retry logic.
57
+ */
58
+ async function searchEngine(engine, query, limit, maxRetries = 2) {
59
+ // Skip unhealthy providers
60
+ if (!healthTracker.isHealthy(engine)) {
61
+ logger.warn({ engine }, 'Skipping unhealthy provider');
62
+ return [];
63
+ }
64
+ // Rate limit before making the request
65
+ await rateLimiter.waitForSlot(engine);
66
+ let lastError = null;
67
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
68
+ const startTime = Date.now();
69
+ try {
70
+ let results;
71
+ switch (engine) {
72
+ case 'duckduckgo':
73
+ results = await searchDuckDuckGo(query, limit);
74
+ break;
75
+ case 'sogou':
76
+ results = await searchSogou(query, limit);
77
+ break;
78
+ case 'bing':
79
+ results = await searchBing(query, limit);
80
+ break;
81
+ case 'baidu':
82
+ results = await searchBaidu(query, limit);
83
+ break;
84
+ case 'brave':
85
+ results = await new BraveProvider().search(query, limit);
86
+ break;
87
+ case 'tavily':
88
+ results = await new TavilyProvider().search(query, limit);
89
+ break;
90
+ case 'exa':
91
+ results = await searchExa({ query, count: limit, apiKey: process.env.EXA_API_KEY });
92
+ break;
93
+ default:
94
+ return [];
95
+ }
96
+ const latency = Date.now() - startTime;
97
+ healthTracker.recordSuccess(engine, latency);
98
+ logger.info({ engine, latency, count: results.length, attempt }, 'Search completed');
99
+ return results;
100
+ }
101
+ catch (err) {
102
+ lastError = err instanceof Error ? err : new Error(String(err));
103
+ const latency = Date.now() - startTime;
104
+ // Check if this is a retryable error (network, timeout, 5xx)
105
+ const isRetryable = isRetryableError(lastError);
106
+ if (attempt < maxRetries && isRetryable) {
107
+ // Exponential backoff: 500ms, 1000ms, 2000ms...
108
+ const delay = Math.min(500 * Math.pow(2, attempt), 5000);
109
+ logger.warn({ engine, attempt, delay, err: lastError.message }, 'Retryable error, retrying...');
110
+ await new Promise(resolve => setTimeout(resolve, delay));
111
+ continue;
112
+ }
113
+ // Non-retryable or max retries exceeded
114
+ healthTracker.recordFailure(engine);
115
+ logger.error({ engine, latency, attempt, err: lastError.message }, 'Search failed');
116
+ return [];
117
+ }
118
+ }
119
+ // Should not reach here, but just in case
120
+ return [];
121
+ }
122
+ /**
123
+ * Check if an error is retryable (network, timeout, 5xx).
124
+ */
125
+ function isRetryableError(err) {
126
+ const msg = err.message.toLowerCase();
127
+ // Network errors
128
+ if (msg.includes('econnreset') || msg.includes('econnrefused') ||
129
+ msg.includes('etimedout') || msg.includes('network')) {
130
+ return true;
131
+ }
132
+ // Timeout
133
+ if (msg.includes('timeout') || msg.includes('abort')) {
134
+ return true;
135
+ }
136
+ // HTTP 5xx errors (but not 501 Not Implemented)
137
+ if (msg.includes('http 5') && !msg.includes('http 501')) {
138
+ return true;
139
+ }
140
+ return false;
141
+ }
142
+ /**
143
+ * Check if a paid engine has its API key configured.
144
+ */
145
+ function hasApiKey(engine) {
146
+ switch (engine) {
147
+ case 'brave':
148
+ return !!process.env.BRAVE_API_KEY;
149
+ case 'tavily':
150
+ return !!process.env.TAVILY_API_KEY;
151
+ case 'exa':
152
+ return !!process.env.EXA_API_KEY;
153
+ default:
154
+ return true; // free engines always available
155
+ }
156
+ }
157
+ // ─── Request collapsing ───────────────────────────────────────────────
158
+ // Track in-flight requests to avoid duplicate concurrent calls
159
+ const pendingRequests = new Map();
160
+ /**
161
+ * Generate cache key for request collapsing.
162
+ */
163
+ function makeCollapseKey(options) {
164
+ const { query, count = 10, engines = [] } = options;
165
+ const sortedEngines = [...engines].sort().join(',');
166
+ return `${query}:${count}:${sortedEngines}`;
167
+ }
168
+ // ─── Core search logic (fused patterns from ddgs) ──────────────────────
169
+ /**
170
+ * Search with provider dedup, batch concurrency, and early exit.
171
+ *
172
+ * Patterns from ddgs:
173
+ * 1. Provider dedup: same provider only searches once
174
+ * 2. Batch concurrency: search in batches to avoid rate limits
175
+ * 3. Early exit: stop when enough results collected
176
+ * 4. Frequency scoring: count how many engines returned each result
177
+ */
178
+ export async function searchWithFallback(options) {
179
+ const collapseKey = makeCollapseKey(options);
180
+ // Check if same request is already in-flight
181
+ const pending = pendingRequests.get(collapseKey);
182
+ if (pending) {
183
+ logger.info({ query: options.query }, 'Request collapsing: reusing pending request');
184
+ return pending;
185
+ }
186
+ // Start new request and track it
187
+ const searchPromise = executeSearch(options);
188
+ pendingRequests.set(collapseKey, searchPromise);
189
+ // Clean up when done
190
+ searchPromise.finally(() => {
191
+ pendingRequests.delete(collapseKey);
192
+ });
193
+ return searchPromise;
194
+ }
195
+ /**
196
+ * Execute the actual search logic (internal).
197
+ */
198
+ async function executeSearch(options) {
199
+ const { query, count = 10, engines: userEngines = ['duckduckgo', 'sogou'], minConfidence = 1, language, includeDomains, excludeDomains, } = options;
200
+ // Check cache first
201
+ const cacheKey = cache.makeKey(query, count, userEngines);
202
+ const cached = cache.get(cacheKey);
203
+ if (cached) {
204
+ logger.info({ query, count, engines: userEngines }, 'Cache hit');
205
+ return cached;
206
+ }
207
+ logger.info({ query, count, engines: userEngines }, 'Starting search');
208
+ // ── Step 1: Provider dedup (from ddgs) ──────────────────────────────
209
+ // Only search each provider once (e.g., DDG and Bing both use Bing backend)
210
+ const uniqueEngines = getUniqueProviders(userEngines);
211
+ logger.info({ engines: uniqueEngines }, 'After provider dedup');
212
+ // ── Step 2: Determine which engines to search ───────────────────────
213
+ // Phase 1: Free engines
214
+ const freeToSearch = uniqueEngines.filter(e => FREE_ENGINES.includes(e));
215
+ const allFree = FREE_ENGINES.filter(e => !uniqueEngines.includes(e));
216
+ const phase1Engines = [...freeToSearch, ...allFree];
217
+ // ── Step 3: Batch concurrency + early exit (from ddgs) ──────────────
218
+ // Adaptive batch size based on count and engine count
219
+ const BATCH_SIZE = Math.max(2, Math.min(phase1Engines.length, Math.ceil(count / 10) + 1));
220
+ const allResults = [];
221
+ const failures = [];
222
+ const searchedEngines = [];
223
+ // Batch 1: Free engines
224
+ logger.info({ engines: phase1Engines }, 'Phase 1: free engines (batch)');
225
+ for (let i = 0; i < phase1Engines.length; i += BATCH_SIZE) {
226
+ const batch = phase1Engines.slice(i, i + BATCH_SIZE);
227
+ const batchResults = await Promise.allSettled(batch.map(async (engine) => {
228
+ const results = await searchEngine(engine, query, count);
229
+ searchedEngines.push(engine);
230
+ return { engine, results };
231
+ }));
232
+ for (const result of batchResults) {
233
+ if (result.status === 'fulfilled') {
234
+ allResults.push(...result.value.results);
235
+ }
236
+ else {
237
+ failures.push({
238
+ engine: 'unknown',
239
+ message: result.reason?.message || 'Unknown error',
240
+ });
241
+ }
242
+ }
243
+ // Early exit: stop if we have enough results
244
+ if (allResults.length >= count * 1.5) {
245
+ logger.info({ count: allResults.length }, 'Early exit: enough results');
246
+ break;
247
+ }
248
+ }
249
+ logger.info({ count: allResults.length }, 'Phase 1 results');
250
+ // ── Step 4: Fallback to paid engines if not enough ───────────────────
251
+ if (allResults.length < count) {
252
+ const paidToSearch = uniqueEngines.filter(e => PAID_ENGINES.includes(e) && hasApiKey(e));
253
+ if (paidToSearch.length > 0) {
254
+ const remaining = Math.max(count - allResults.length, 1);
255
+ logger.info({ engines: paidToSearch, remaining }, 'Phase 2: paid engines');
256
+ const phase2Results = await Promise.allSettled(paidToSearch.map(async (engine) => {
257
+ const results = await searchEngine(engine, query, remaining);
258
+ searchedEngines.push(engine);
259
+ return { engine, results };
260
+ }));
261
+ for (const result of phase2Results) {
262
+ if (result.status === 'fulfilled') {
263
+ allResults.push(...result.value.results);
264
+ }
265
+ else {
266
+ failures.push({
267
+ engine: 'unknown',
268
+ message: result.reason?.message || 'Unknown error',
269
+ });
270
+ }
271
+ }
272
+ logger.info({ got: allResults.length }, 'Phase 2 results');
273
+ }
274
+ else {
275
+ logger.info('Phase 2: no paid engines available');
276
+ }
277
+ }
278
+ // ── Step 5: Aggregation layer (fused from ddgs + our patterns) ──────
279
+ // 5a. Filter low-quality results (from ddgs)
280
+ const filtered = filterLowQuality(allResults);
281
+ // 5b. URL dedup with frequency counting
282
+ const { results: urlDeduped, frequencies } = dedupByUrl(filtered);
283
+ // 5c. Title dedup
284
+ const titleDeduped = dedupByTitle(urlDeduped);
285
+ // 5d. Score and rank with frequency bonus
286
+ let scored = scoreAndRank(titleDeduped, query, ENGINE_WEIGHTS, frequencies);
287
+ // ── Step 6: Post-search filters ─────────────────────────────────────
288
+ if (minConfidence > 1) {
289
+ scored = scored.filter(r => r.confidence >= minConfidence);
290
+ }
291
+ if (includeDomains && includeDomains.length > 0) {
292
+ scored = scored.filter(r => {
293
+ try {
294
+ const hostname = new URL(r.url).hostname;
295
+ return includeDomains.some(d => hostname.includes(d) || hostname.endsWith(d));
296
+ }
297
+ catch {
298
+ return false;
299
+ }
300
+ });
301
+ }
302
+ if (excludeDomains && excludeDomains.length > 0) {
303
+ scored = scored.filter(r => {
304
+ try {
305
+ const hostname = new URL(r.url).hostname;
306
+ return !excludeDomains.some(d => hostname.includes(d) || hostname.endsWith(d));
307
+ }
308
+ catch {
309
+ return true;
310
+ }
311
+ });
312
+ }
313
+ // ── Step 7: Format output with security processing ──────────────────
314
+ const formatted = formatResults(scored);
315
+ const response = {
316
+ query,
317
+ engines: userEngines,
318
+ ...formatted,
319
+ ...(failures.length > 0
320
+ ? { partialFailures: failures }
321
+ : {}),
322
+ };
323
+ // ── Step 8: Async cache write (from ddgs) ───────────────────────────
324
+ // Don't block the response - write cache in background
325
+ setImmediate(() => {
326
+ try {
327
+ cache.set(cacheKey, response);
328
+ logger.info({ total: response.meta.total }, 'Search complete');
329
+ }
330
+ catch (err) {
331
+ logger.error({ err }, 'Cache write failed');
332
+ }
333
+ });
334
+ return response;
335
+ }
336
+ // ─── Tool registration ──────────────────────────────────────────────────
337
+ // Export the health tracker instance so index.ts can use the same singleton
338
+ export { healthTracker };
339
+ export function setupFreeSearchTool(server) {
340
+ server.tool('free_search', 'Search the web with automatic fallback between free and paid engines. ' +
341
+ 'Phase 1: DuckDuckGo + Sogou + Bing + Baidu (free, no key required). ' +
342
+ 'Phase 2: Brave + Tavily + Exa (paid, requires BRAVE_API_KEY / TAVILY_API_KEY / EXA_API_KEY env vars). ' +
343
+ 'All results are deduplicated, scored, and ranked. ' +
344
+ 'Results include security metadata to protect against prompt injection.', {
345
+ query: z.string().min(1, 'Search query must not be empty'),
346
+ limit: z.number().int().min(1).max(50).default(10).describe('Number of results to return (1-50)'),
347
+ engines: z.array(z.enum(['duckduckgo', 'sogou', 'bing', 'baidu', 'brave', 'tavily', 'exa']))
348
+ .min(1)
349
+ .default(['duckduckgo', 'sogou'])
350
+ .describe('Search engines to use (default: all free engines)'),
351
+ }, async ({ query, limit = 10, engines: userEngines }) => {
352
+ try {
353
+ const results = await searchWithFallback({
354
+ query,
355
+ count: limit,
356
+ engines: userEngines,
357
+ });
358
+ return {
359
+ content: [
360
+ {
361
+ type: 'text',
362
+ text: JSON.stringify(results, null, 2),
363
+ },
364
+ ],
365
+ };
366
+ }
367
+ catch (error) {
368
+ logger.error({ err: error instanceof Error ? error.message : String(error) }, 'Search tool execution failed');
369
+ return {
370
+ content: [
371
+ {
372
+ type: 'text',
373
+ text: `Search failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
374
+ },
375
+ ],
376
+ isError: true,
377
+ };
378
+ }
379
+ });
380
+ }
@@ -0,0 +1,9 @@
1
+ export function registerHealth(server, health) {
2
+ server.resource('health', 'search://health', async () => ({
3
+ contents: [{
4
+ uri: 'search://health',
5
+ mimeType: 'application/json',
6
+ text: JSON.stringify(health.getHealth(), null, 2),
7
+ }]
8
+ }));
9
+ }
package/dist/types.js ADDED
@@ -0,0 +1 @@
1
+ export {};