web-search-plus-plugin 1.3.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -33
- package/index.ts +870 -187
- package/openclaw.plugin.json +1 -2
- package/package.json +1 -5
- package/scripts/search.py +0 -2940
- package/scripts/setup.py +0 -463
package/index.ts
CHANGED
|
@@ -1,243 +1,926 @@
|
|
|
1
|
-
import
|
|
2
|
-
import { spawn } from "child_process";
|
|
1
|
+
import crypto from "crypto";
|
|
3
2
|
import fs from "fs";
|
|
4
3
|
import path from "path";
|
|
5
4
|
import { fileURLToPath } from "url";
|
|
5
|
+
import dns from "dns/promises";
|
|
6
|
+
import net from "net";
|
|
6
7
|
|
|
7
8
|
function getPluginDir(): string {
|
|
9
|
+
// When OpenClaw transpiles plugins, import.meta.url may point to a temp dir.
|
|
10
|
+
// Check for the known extension path first.
|
|
11
|
+
const knownPath = path.join(process.env.HOME || "/root", ".openclaw", "extensions", "web-search-plus-plugin");
|
|
12
|
+
if (fs.existsSync(path.join(knownPath, "package.json"))) return knownPath;
|
|
8
13
|
try {
|
|
9
14
|
if (typeof __dirname !== "undefined") return __dirname;
|
|
10
15
|
} catch {}
|
|
11
16
|
try {
|
|
12
17
|
return path.dirname(fileURLToPath(import.meta.url));
|
|
13
18
|
} catch {}
|
|
14
|
-
return
|
|
19
|
+
return process.cwd();
|
|
15
20
|
}
|
|
16
21
|
|
|
17
|
-
const
|
|
22
|
+
const PLUGIN_DIR = getPluginDir();
|
|
23
|
+
const CACHE_DIR = path.join(PLUGIN_DIR, ".cache");
|
|
24
|
+
const PROVIDER_HEALTH_FILE = path.join(CACHE_DIR, "provider_health.json");
|
|
25
|
+
const DEFAULT_CACHE_TTL = 3600;
|
|
26
|
+
const RETRY_BACKOFF_MS = [1000, 3000, 9000];
|
|
27
|
+
const COOLDOWN_STEPS_SECONDS = [60, 300, 1500, 3600];
|
|
28
|
+
const TRANSIENT_HTTP_CODES = new Set([408, 425, 429, 500, 502, 503, 504]);
|
|
29
|
+
|
|
30
|
+
const PARAMETERS_SCHEMA = {
|
|
31
|
+
type: "object",
|
|
32
|
+
required: ["query"],
|
|
33
|
+
properties: {
|
|
34
|
+
query: { type: "string", description: "Search query" },
|
|
35
|
+
provider: {
|
|
36
|
+
type: "string",
|
|
37
|
+
enum: ["serper", "tavily", "querit", "exa", "perplexity", "you", "searxng", "auto"],
|
|
38
|
+
description: "Force a provider, or use auto routing (default: auto)",
|
|
39
|
+
},
|
|
40
|
+
count: { type: "number", description: "Number of results (default: 5)" },
|
|
41
|
+
depth: {
|
|
42
|
+
type: "string",
|
|
43
|
+
enum: ["normal", "deep", "deep-reasoning"],
|
|
44
|
+
description: "Exa depth when using Exa or when auto-routing chooses Exa.",
|
|
45
|
+
},
|
|
46
|
+
time_range: {
|
|
47
|
+
type: "string",
|
|
48
|
+
enum: ["day", "week", "month", "year"],
|
|
49
|
+
description: "Recency filter where supported.",
|
|
50
|
+
},
|
|
51
|
+
include_domains: {
|
|
52
|
+
type: "array",
|
|
53
|
+
items: { type: "string" },
|
|
54
|
+
description: "Only include results from these domains (Tavily, Exa, Querit where supported).",
|
|
55
|
+
},
|
|
56
|
+
exclude_domains: {
|
|
57
|
+
type: "array",
|
|
58
|
+
items: { type: "string" },
|
|
59
|
+
description: "Exclude results from these domains (Tavily, Exa, Querit where supported).",
|
|
60
|
+
},
|
|
61
|
+
},
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
type Json = Record<string, any>;
|
|
65
|
+
type ProviderName = "serper" | "tavily" | "querit" | "exa" | "perplexity" | "you" | "searxng";
|
|
66
|
+
type ToolParams = {
|
|
67
|
+
query: string;
|
|
68
|
+
provider?: ProviderName | "auto";
|
|
69
|
+
count?: number;
|
|
70
|
+
depth?: "normal" | "deep" | "deep-reasoning";
|
|
71
|
+
time_range?: "day" | "week" | "month" | "year";
|
|
72
|
+
include_domains?: string[];
|
|
73
|
+
exclude_domains?: string[];
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
type SearchResult = {
|
|
77
|
+
title: string;
|
|
78
|
+
url: string;
|
|
79
|
+
snippet: string;
|
|
80
|
+
score?: number;
|
|
81
|
+
[key: string]: any;
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
type SearchResponse = {
|
|
85
|
+
provider: string;
|
|
86
|
+
query: string;
|
|
87
|
+
results: SearchResult[];
|
|
88
|
+
images?: string[];
|
|
89
|
+
answer?: string;
|
|
90
|
+
metadata?: Json;
|
|
91
|
+
[key: string]: any;
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
class ProviderConfigError extends Error {}
|
|
95
|
+
class ProviderRequestError extends Error {
|
|
96
|
+
statusCode?: number;
|
|
97
|
+
transient: boolean;
|
|
98
|
+
constructor(message: string, statusCode?: number, transient = false) {
|
|
99
|
+
super(message);
|
|
100
|
+
this.name = "ProviderRequestError";
|
|
101
|
+
this.statusCode = statusCode;
|
|
102
|
+
this.transient = transient;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const SENSITIVE_PATTERNS: RegExp[] = [
|
|
107
|
+
/\b(?:sk|pk|rk|api|tok)_[A-Za-z0-9\-_]{10,}\b/g,
|
|
108
|
+
/\bBearer\s+[A-Za-z0-9\-._~+/]+=*\b/gi,
|
|
109
|
+
/\b(?:key|token|secret|password|api[_-]?key)\s*[:=]\s*[^\s,"'}]+/gi,
|
|
110
|
+
/([?&](?:api[_-]?key|key|token|access[_-]?token|auth|authorization)=)([^&#\s]+)/gi,
|
|
111
|
+
/\b[A-Za-z0-9_-]{24,}\.[A-Za-z0-9_-]{16,}\.[A-Za-z0-9_-]{16,}\b/g,
|
|
112
|
+
];
|
|
113
|
+
|
|
114
|
+
function sanitizeOutput(input: any): any {
|
|
115
|
+
if (typeof input === "string") {
|
|
116
|
+
let out = input;
|
|
117
|
+
for (const pattern of SENSITIVE_PATTERNS) {
|
|
118
|
+
out = out.replace(pattern, (_m, p1) => (p1 ? `${p1}[REDACTED]` : "[REDACTED]"));
|
|
119
|
+
}
|
|
120
|
+
return out;
|
|
121
|
+
}
|
|
122
|
+
if (Array.isArray(input)) return input.map((v) => sanitizeOutput(v));
|
|
123
|
+
if (input && typeof input === "object") {
|
|
124
|
+
const result: any = {};
|
|
125
|
+
for (const [k, v] of Object.entries(input)) {
|
|
126
|
+
if (/(?:api[_-]?key|token|secret|password|authorization)/i.test(k)) {
|
|
127
|
+
result[k] = "[REDACTED]";
|
|
128
|
+
} else {
|
|
129
|
+
result[k] = sanitizeOutput(v);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return result;
|
|
133
|
+
}
|
|
134
|
+
return input;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function ensureDir(dir: string): void {
|
|
138
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function sleep(ms: number): Promise<void> {
|
|
142
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function readJsonFile(file: string, fallback: any): any {
|
|
146
|
+
try {
|
|
147
|
+
return JSON.parse(fs.readFileSync(file, "utf8"));
|
|
148
|
+
} catch {
|
|
149
|
+
return fallback;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function writeJsonFile(file: string, value: any): void {
|
|
154
|
+
ensureDir(path.dirname(file));
|
|
155
|
+
fs.writeFileSync(file, JSON.stringify(value, null, 2), "utf8");
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function sha256(input: string): string {
|
|
159
|
+
return crypto.createHash("sha256").update(input).digest("hex");
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function buildCacheKey(query: string, provider: string, maxResults: number, params?: Json): string {
|
|
163
|
+
return sha256(JSON.stringify({ query, provider, maxResults, ...(params || {}) }, Object.keys({ query, provider, maxResults, ...(params || {}) }).sort())).slice(0, 32);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function getCachePath(cacheKey: string): string {
|
|
167
|
+
return path.join(CACHE_DIR, `${cacheKey}.json`);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function cacheGet(query: string, provider: string, maxResults: number, ttl: number, params?: Json): any | null {
|
|
171
|
+
const key = buildCacheKey(query, provider, maxResults, params);
|
|
172
|
+
const file = getCachePath(key);
|
|
173
|
+
try {
|
|
174
|
+
const cached = JSON.parse(fs.readFileSync(file, "utf8"));
|
|
175
|
+
const ts = Number(cached._cache_timestamp || 0);
|
|
176
|
+
if (!ts || Date.now() / 1000 - ts > ttl) {
|
|
177
|
+
try { fs.unlinkSync(file); } catch {}
|
|
178
|
+
return null;
|
|
179
|
+
}
|
|
180
|
+
return cached;
|
|
181
|
+
} catch {
|
|
182
|
+
try { fs.unlinkSync(file); } catch {}
|
|
183
|
+
return null;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
18
186
|
|
|
19
|
-
function
|
|
20
|
-
|
|
187
|
+
function cachePut(query: string, provider: string, maxResults: number, result: any, params?: Json): void {
|
|
188
|
+
ensureDir(CACHE_DIR);
|
|
189
|
+
const key = buildCacheKey(query, provider, maxResults, params);
|
|
190
|
+
const file = getCachePath(key);
|
|
191
|
+
const payload = {
|
|
192
|
+
...result,
|
|
193
|
+
_cache_timestamp: Math.floor(Date.now() / 1000),
|
|
194
|
+
_cache_key: key,
|
|
195
|
+
_cache_query: query,
|
|
196
|
+
_cache_provider: provider,
|
|
197
|
+
_cache_max_results: maxResults,
|
|
198
|
+
_cache_params: params || {},
|
|
199
|
+
};
|
|
200
|
+
writeJsonFile(file, payload);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function loadProviderHealth(): Json {
|
|
204
|
+
return readJsonFile(PROVIDER_HEALTH_FILE, {});
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function saveProviderHealth(state: Json): void {
|
|
208
|
+
writeJsonFile(PROVIDER_HEALTH_FILE, state);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function providerInCooldown(provider: string): { inCooldown: boolean; remaining: number } {
|
|
212
|
+
const state = loadProviderHealth();
|
|
213
|
+
const cooldownUntil = Number(state?.[provider]?.cooldown_until || 0);
|
|
214
|
+
const remaining = cooldownUntil - Math.floor(Date.now() / 1000);
|
|
215
|
+
return { inCooldown: remaining > 0, remaining: Math.max(0, remaining) };
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function markProviderFailure(provider: string, message: string): Json {
|
|
219
|
+
const state = loadProviderHealth();
|
|
220
|
+
const now = Math.floor(Date.now() / 1000);
|
|
221
|
+
const failCount = Number(state?.[provider]?.failure_count || 0) + 1;
|
|
222
|
+
const cooldownSeconds = COOLDOWN_STEPS_SECONDS[Math.min(failCount - 1, COOLDOWN_STEPS_SECONDS.length - 1)];
|
|
223
|
+
state[provider] = {
|
|
224
|
+
failure_count: failCount,
|
|
225
|
+
cooldown_until: now + cooldownSeconds,
|
|
226
|
+
cooldown_seconds: cooldownSeconds,
|
|
227
|
+
last_error: sanitizeOutput(message),
|
|
228
|
+
last_failure_at: now,
|
|
229
|
+
};
|
|
230
|
+
saveProviderHealth(state);
|
|
231
|
+
return state[provider];
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function resetProviderHealth(provider: string): void {
|
|
235
|
+
const state = loadProviderHealth();
|
|
236
|
+
if (state[provider]) {
|
|
237
|
+
delete state[provider];
|
|
238
|
+
saveProviderHealth(state);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
function normalizeResultUrl(url: string): string {
|
|
243
|
+
try {
|
|
244
|
+
const u = new URL(url.trim());
|
|
245
|
+
const host = u.hostname.replace(/^www\./i, "").toLowerCase();
|
|
246
|
+
const pathname = u.pathname.replace(/\/$/, "");
|
|
247
|
+
return `${host}${pathname}`;
|
|
248
|
+
} catch {
|
|
249
|
+
return url.trim().toLowerCase();
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
function deduplicateResultsAcrossProviders(resultsByProvider: Array<[string, SearchResponse]>, maxResults: number): { results: SearchResult[]; dedupCount: number } {
|
|
254
|
+
const deduped: SearchResult[] = [];
|
|
255
|
+
const seen = new Set<string>();
|
|
256
|
+
let dedupCount = 0;
|
|
257
|
+
for (const [provider, data] of resultsByProvider) {
|
|
258
|
+
for (const item of data.results || []) {
|
|
259
|
+
const norm = normalizeResultUrl(item.url || "");
|
|
260
|
+
if (norm && seen.has(norm)) {
|
|
261
|
+
dedupCount += 1;
|
|
262
|
+
continue;
|
|
263
|
+
}
|
|
264
|
+
if (norm) seen.add(norm);
|
|
265
|
+
deduped.push({ ...item, provider: item.provider || provider });
|
|
266
|
+
if (deduped.length >= maxResults) return { results: deduped, dedupCount };
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
return { results: deduped, dedupCount };
|
|
21
270
|
}
|
|
22
271
|
|
|
23
272
|
function loadEnvFile(envPath: string): Record<string, string> {
|
|
24
273
|
if (!fs.existsSync(envPath)) return {};
|
|
25
274
|
const env: Record<string, string> = {};
|
|
26
|
-
const
|
|
27
|
-
for (const line of lines) {
|
|
275
|
+
for (const line of fs.readFileSync(envPath, "utf8").split(/\r?\n/)) {
|
|
28
276
|
const trimmed = line.trim();
|
|
29
277
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
30
278
|
const stripped = trimmed.startsWith("export ") ? trimmed.slice(7) : trimmed;
|
|
31
|
-
const
|
|
32
|
-
if (
|
|
33
|
-
const key = stripped.slice(0,
|
|
34
|
-
const
|
|
35
|
-
if (key) env[key] =
|
|
279
|
+
const idx = stripped.indexOf("=");
|
|
280
|
+
if (idx < 0) continue;
|
|
281
|
+
const key = stripped.slice(0, idx).trim();
|
|
282
|
+
const value = stripped.slice(idx + 1).trim().replace(/^['"]|['"]$/g, "");
|
|
283
|
+
if (key) env[key] = value;
|
|
36
284
|
}
|
|
37
285
|
return env;
|
|
38
286
|
}
|
|
39
287
|
|
|
40
|
-
function
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
288
|
+
function getRuntimeEnv(pluginConfig: Record<string, string>): Record<string, string> {
|
|
289
|
+
const envFiles = [path.join(PLUGIN_DIR, ".env"), path.join(PLUGIN_DIR, "..", "web-search-plus", ".env")];
|
|
290
|
+
const fileEnv = Object.assign({}, ...envFiles.map(loadEnvFile));
|
|
291
|
+
const mapped: Record<string, string> = {};
|
|
292
|
+
const configKeyMap: Record<string, string> = {
|
|
293
|
+
serperApiKey: "SERPER_API_KEY",
|
|
294
|
+
tavilyApiKey: "TAVILY_API_KEY",
|
|
295
|
+
queritApiKey: "QUERIT_API_KEY",
|
|
296
|
+
exaApiKey: "EXA_API_KEY",
|
|
297
|
+
perplexityApiKey: "PERPLEXITY_API_KEY",
|
|
298
|
+
kilocodeApiKey: "KILOCODE_API_KEY",
|
|
299
|
+
youApiKey: "YOU_API_KEY",
|
|
300
|
+
searxngInstanceUrl: "SEARXNG_INSTANCE_URL",
|
|
301
|
+
searxngAllowPrivate: "SEARXNG_ALLOW_PRIVATE",
|
|
302
|
+
};
|
|
303
|
+
for (const [cfgKey, envKey] of Object.entries(configKeyMap)) {
|
|
304
|
+
const val = pluginConfig?.[cfgKey];
|
|
305
|
+
if (val && typeof val === "string") mapped[envKey] = val;
|
|
306
|
+
}
|
|
307
|
+
return { ...fileEnv, ...Object.fromEntries(Object.entries(process.env).filter(([, v]) => typeof v === "string") as any), ...mapped };
|
|
308
|
+
}
|
|
58
309
|
|
|
59
|
-
|
|
60
|
-
|
|
310
|
+
function getApiKey(provider: ProviderName, env: Record<string, string>): string | undefined {
|
|
311
|
+
const keyMap: Record<ProviderName, string | undefined> = {
|
|
312
|
+
serper: env.SERPER_API_KEY,
|
|
313
|
+
tavily: env.TAVILY_API_KEY,
|
|
314
|
+
querit: env.QUERIT_API_KEY,
|
|
315
|
+
exa: env.EXA_API_KEY,
|
|
316
|
+
perplexity: env.KILOCODE_API_KEY || env.PERPLEXITY_API_KEY,
|
|
317
|
+
you: env.YOU_API_KEY,
|
|
318
|
+
searxng: env.SEARXNG_INSTANCE_URL,
|
|
319
|
+
};
|
|
320
|
+
return keyMap[provider];
|
|
321
|
+
}
|
|
61
322
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
323
|
+
function validateApiKey(provider: ProviderName, env: Record<string, string>): string {
|
|
324
|
+
const key = getApiKey(provider, env);
|
|
325
|
+
if (!key) {
|
|
326
|
+
if (provider === "searxng") throw new ProviderConfigError("Missing SearXNG instance URL (SEARXNG_INSTANCE_URL or pluginConfig.searxngInstanceUrl)");
|
|
327
|
+
throw new ProviderConfigError(`Missing API key for ${provider}`);
|
|
328
|
+
}
|
|
329
|
+
return key;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
function toTimeRange(value?: string): string | undefined {
|
|
333
|
+
return value && ["day", "week", "month", "year"].includes(value) ? value : undefined;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
function titleFromUrl(url: string): string {
|
|
337
|
+
try {
|
|
338
|
+
const u = new URL(url);
|
|
339
|
+
const domain = u.hostname.replace(/^www\./, "");
|
|
340
|
+
const segs = u.pathname.split("/").filter(Boolean);
|
|
341
|
+
const last = segs.length ? segs[segs.length - 1].replace(/[-_]/g, " ").replace(/\.\w{2,4}$/, "") : "";
|
|
342
|
+
return last ? `${domain} — ${last}` : domain;
|
|
343
|
+
} catch {
|
|
344
|
+
return url.slice(0, 80);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
async function httpJson(url: string, init: RequestInit, timeoutMs = 30000): Promise<any> {
|
|
349
|
+
const controller = new AbortController();
|
|
350
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
351
|
+
try {
|
|
352
|
+
const res = await fetch(url, {
|
|
353
|
+
...init,
|
|
354
|
+
headers: {
|
|
355
|
+
"User-Agent": "ClawdBot-WebSearchPlus/3.0",
|
|
356
|
+
...(init.headers || {}),
|
|
357
|
+
},
|
|
358
|
+
signal: controller.signal,
|
|
68
359
|
});
|
|
360
|
+
const text = await res.text();
|
|
361
|
+
let data: any = null;
|
|
362
|
+
try { data = text ? JSON.parse(text) : {}; } catch {}
|
|
363
|
+
if (!res.ok) {
|
|
364
|
+
const detail = data?.error || data?.message || text || res.statusText;
|
|
365
|
+
throw new ProviderRequestError(`${detail} (HTTP ${res.status})`, res.status, TRANSIENT_HTTP_CODES.has(res.status));
|
|
366
|
+
}
|
|
367
|
+
return data ?? {};
|
|
368
|
+
} catch (error: any) {
|
|
369
|
+
if (error?.name === "AbortError") throw new ProviderRequestError(`Request timed out after ${timeoutMs}ms`, undefined, true);
|
|
370
|
+
if (error instanceof ProviderRequestError) throw error;
|
|
371
|
+
throw new ProviderRequestError(`Network error: ${String(error?.message || error)}`, undefined, true);
|
|
372
|
+
} finally {
|
|
373
|
+
clearTimeout(timer);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
69
376
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
377
|
+
async function validateSearxngUrl(input: string, env: Record<string, string>): Promise<string> {
|
|
378
|
+
let u: URL;
|
|
379
|
+
try {
|
|
380
|
+
u = new URL(input);
|
|
381
|
+
} catch {
|
|
382
|
+
throw new ProviderConfigError("Invalid SearXNG URL");
|
|
383
|
+
}
|
|
384
|
+
if (!["http:", "https:"].includes(u.protocol)) throw new ProviderConfigError(`SearXNG URL must use http or https, got ${u.protocol}`);
|
|
385
|
+
if (!u.hostname) throw new ProviderConfigError("SearXNG URL must include a hostname");
|
|
386
|
+
|
|
387
|
+
const blockedHosts = new Set(["169.254.169.254", "metadata.google.internal", "metadata.internal"]);
|
|
388
|
+
if (blockedHosts.has(u.hostname)) throw new ProviderConfigError("SearXNG URL blocked: metadata endpoint");
|
|
389
|
+
|
|
390
|
+
const allowPrivate = String(env.SEARXNG_ALLOW_PRIVATE || "").trim() === "1";
|
|
391
|
+
if (!allowPrivate) {
|
|
392
|
+
const records = await dns.lookup(u.hostname, { all: true, verbatim: true }).catch(() => [] as dns.LookupAddress[]);
|
|
393
|
+
if (!records.length && net.isIP(u.hostname)) records.push({ address: u.hostname, family: net.isIP(u.hostname) as 4 | 6 });
|
|
394
|
+
if (!records.length) throw new ProviderConfigError(`SearXNG URL blocked: cannot resolve hostname ${u.hostname}`);
|
|
395
|
+
for (const record of records) {
|
|
396
|
+
const ip = record.address;
|
|
397
|
+
const lower = ip.toLowerCase();
|
|
398
|
+
const isIpv4Private = /^10\./.test(ip) || /^127\./.test(ip) || /^169\.254\./.test(ip) || /^192\.168\./.test(ip) || /^172\.(1[6-9]|2\d|3[0-1])\./.test(ip) || ip === "0.0.0.0";
|
|
399
|
+
const isIpv6Private = lower === "::1" || lower === "::" || lower.startsWith("fc") || lower.startsWith("fd") || lower.startsWith("fe80:");
|
|
400
|
+
if (isIpv4Private || isIpv6Private) {
|
|
401
|
+
throw new ProviderConfigError(`SearXNG URL blocked: ${u.hostname} resolves to private/internal IP ${ip}`);
|
|
77
402
|
}
|
|
78
|
-
}
|
|
79
|
-
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
return u.toString().replace(/\/$/, "");
|
|
80
406
|
}
|
|
81
407
|
|
|
82
|
-
const
|
|
83
|
-
|
|
408
|
+
const SHOPPING_SIGNALS: Record<string, number> = {
|
|
409
|
+
"\\bhow much\\b": 4.0, "\\bprice of\\b": 4.0, "\\bcost of\\b": 4.0, "\\bprices?\\b": 3.0,
|
|
410
|
+
"\\$\\d+|\\d+\\s*dollars?": 3.0, "€\\d+|\\d+\\s*euros?": 3.0, "£\\d+|\\d+\\s*pounds?": 3.0,
|
|
411
|
+
"\\bpreis(e)?\\b": 3.5, "\\bkosten\\b": 3.0, "\\bwieviel\\b": 3.5, "\\bwie viel\\b": 3.5, "\\bwas kostet\\b": 4.0,
|
|
412
|
+
"\\bbuy\\b": 3.5, "\\bpurchase\\b": 3.5, "\\border\\b(?!\\s+by)": 3.0, "\\bshopping\\b": 3.5, "\\bshop for\\b": 3.5,
|
|
413
|
+
"\\bwhere to (buy|get|purchase)\\b": 4.0, "\\bkaufen\\b": 3.5, "\\bbestellen\\b": 3.5, "\\bwo kaufen\\b": 4.0,
|
|
414
|
+
"\\bhändler\\b": 3.0, "\\bshop\\b": 2.5, "\\bdeal(s)?\\b": 3.0, "\\bdiscount(s)?\\b": 3.0, "\\bsale\\b": 2.5,
|
|
415
|
+
"\\bcheap(er|est)?\\b": 3.0, "\\baffordable\\b": 2.5, "\\bbudget\\b": 2.5, "\\bbest price\\b": 3.5,
|
|
416
|
+
"\\bcompare prices\\b": 3.5, "\\bcoupon\\b": 3.0, "\\bgünstig(er|ste)?\\b": 3.0, "\\bbillig(er|ste)?\\b": 3.0,
|
|
417
|
+
"\\bangebot(e)?\\b": 3.0, "\\brabatt\\b": 3.0, "\\baktion\\b": 2.5, "\\bschnäppchen\\b": 3.0,
|
|
418
|
+
"\\bvs\\.?\\b": 2.0, "\\bversus\\b": 2.0, "\\bor\\b.*\\bwhich\\b": 2.0, "\\bspecs?\\b": 2.5,
|
|
419
|
+
"\\bspecifications?\\b": 2.5, "\\breview(s)?\\b": 2.0, "\\brating(s)?\\b": 2.0, "\\bunboxing\\b": 2.5,
|
|
420
|
+
"\\btest\\b": 2.5, "\\bbewertung(en)?\\b": 2.5, "\\btechnische daten\\b": 3.0, "\\bspezifikationen\\b": 2.5,
|
|
421
|
+
};
|
|
422
|
+
const RESEARCH_SIGNALS: Record<string, number> = {
|
|
423
|
+
"\\bhow does\\b": 4.0, "\\bhow do\\b": 3.5, "\\bwhy does\\b": 4.0, "\\bwhy do\\b": 3.5, "\\bwhy is\\b": 3.5,
|
|
424
|
+
"\\bexplain\\b": 4.0, "\\bexplanation\\b": 4.0, "\\bwhat is\\b": 3.0, "\\bwhat are\\b": 3.0, "\\bdefine\\b": 3.5,
|
|
425
|
+
"\\bdefinition of\\b": 3.5, "\\bmeaning of\\b": 3.0, "\\banalyze\\b": 3.5, "\\banalysis\\b": 3.5,
|
|
426
|
+
"\\bcompare\\b(?!\\s*prices?)": 3.0, "\\bcomparison\\b": 3.0, "\\bstatus of\\b": 3.5, "\\bstatus\\b": 2.5,
|
|
427
|
+
"\\bwhat happened with\\b": 4.0, "\\bpros and cons\\b": 4.0, "\\badvantages?\\b": 3.0, "\\bdisadvantages?\\b": 3.0,
|
|
428
|
+
"\\bbenefits?\\b": 2.5, "\\bdrawbacks?\\b": 3.0, "\\bdifference between\\b": 3.5, "\\bunderstand\\b": 3.0,
|
|
429
|
+
"\\blearn(ing)?\\b": 2.5, "\\btutorial\\b": 3.0, "\\bguide\\b": 2.5, "\\bhow to\\b": 2.0, "\\bstep by step\\b": 3.0,
|
|
430
|
+
"\\bin[- ]depth\\b": 3.0, "\\bdetailed\\b": 2.5, "\\bcomprehensive\\b": 3.0, "\\bthorough\\b": 2.5,
|
|
431
|
+
"\\bdeep dive\\b": 3.5, "\\boverall\\b": 2.0, "\\bsummary\\b": 2.0, "\\bstudy\\b": 2.5, "\\bresearch shows\\b": 3.5,
|
|
432
|
+
"\\baccording to\\b": 2.5, "\\bevidence\\b": 3.0, "\\bscientific\\b": 3.0, "\\bhistory of\\b": 3.0,
|
|
433
|
+
"\\bbackground\\b": 2.5, "\\bcontext\\b": 2.5, "\\bimplications?\\b": 3.0, "\\bwie funktioniert\\b": 4.0,
|
|
434
|
+
"\\bwarum\\b": 3.5, "\\berklär(en|ung)?\\b": 4.0, "\\bwas ist\\b": 3.0, "\\bwas sind\\b": 3.0, "\\bbedeutung\\b": 3.0,
|
|
435
|
+
"\\banalyse\\b": 3.5, "\\bvergleich(en)?\\b": 3.0, "\\bvor- und nachteile\\b": 4.0, "\\bvorteile\\b": 3.0,
|
|
436
|
+
"\\bnachteile\\b": 3.0, "\\bunterschied(e)?\\b": 3.5, "\\bverstehen\\b": 3.0, "\\blernen\\b": 2.5,
|
|
437
|
+
"\\banleitung\\b": 3.0, "\\bübersicht\\b": 2.5, "\\bhintergrund\\b": 2.5, "\\bzusammenfassung\\b": 2.5,
|
|
438
|
+
};
|
|
439
|
+
const DISCOVERY_SIGNALS: Record<string, number> = {
|
|
440
|
+
"\\bsimilar to\\b": 5.0, "\\blike\\s+\\w+\\.com": 4.5, "\\balternatives? to\\b": 5.0, "\\bcompetitors? (of|to)\\b": 4.5,
|
|
441
|
+
"\\bcompeting with\\b": 4.0, "\\brivals? (of|to)\\b": 4.0, "\\binstead of\\b": 3.0, "\\breplacement for\\b": 3.5,
|
|
442
|
+
"\\bcompanies (like|that|doing|building)\\b": 4.5, "\\bstartups? (like|that|doing|building)\\b": 4.5, "\\bwho else\\b": 4.0,
|
|
443
|
+
"\\bother (companies|startups|tools|apps)\\b": 3.5, "\\bfind (companies|startups|tools|examples?)\\b": 4.5,
|
|
444
|
+
"\\bevents? in\\b": 4.0, "\\bthings to do in\\b": 4.5, "\\bseries [a-d]\\b": 4.0, "\\byc\\b|y combinator": 4.0,
|
|
445
|
+
"\\bfund(ed|ing|raise)\\b": 3.5, "\\bventure\\b": 3.0, "\\bvaluation\\b": 3.0, "\\bresearch papers? (on|about)\\b": 4.0,
|
|
446
|
+
"\\barxiv\\b": 4.5, "\\bgithub (projects?|repos?)\\b": 4.5, "\\bopen source\\b.*\\bprojects?\\b": 4.0,
|
|
447
|
+
"\\btweets? (about|on)\\b": 3.5, "\\bblogs? (about|on|like)\\b": 3.0, "https?://[^\\s]+": 5.0, "\\b\\w+\\.(com|org|io|ai|co|dev)\\b": 3.5,
|
|
448
|
+
};
|
|
449
|
+
const LOCAL_NEWS_SIGNALS: Record<string, number> = {
|
|
450
|
+
"\\bnear me\\b": 4.0, "\\bnearby\\b": 3.5, "\\blocal\\b": 3.0, "\\bin (my )?(city|area|town|neighborhood)\\b": 3.5,
|
|
451
|
+
"\\brestaurants?\\b": 2.5, "\\bhotels?\\b": 2.5, "\\bcafes?\\b": 2.5, "\\bstores?\\b": 2.0, "\\bdirections? to\\b": 3.5,
|
|
452
|
+
"\\bmap of\\b": 3.0, "\\bphone number\\b": 3.0, "\\baddress of\\b": 3.0, "\\bopen(ing)? hours\\b": 3.0,
|
|
453
|
+
"\\bweather\\b": 4.0, "\\bforecast\\b": 3.5, "\\btemperature\\b": 3.0, "\\btime in\\b": 3.0,
|
|
454
|
+
"\\blatest\\b": 2.5, "\\brecent\\b": 2.5, "\\btoday\\b": 2.5, "\\bbreaking\\b": 3.5, "\\bnews\\b": 2.5,
|
|
455
|
+
"\\bheadlines?\\b": 3.0, "\\b202[4-9]\\b": 2.0, "\\blast (week|month|year)\\b": 2.0, "\\bin der nähe\\b": 4.0,
|
|
456
|
+
"\\bin meiner nähe\\b": 4.0, "\\böffnungszeiten\\b": 3.0, "\\badresse von\\b": 3.0, "\\bweg(beschreibung)? nach\\b": 3.5,
|
|
457
|
+
"\\bheute\\b": 2.5, "\\bmorgen\\b": 2.0, "\\baktuell\\b": 2.5, "\\bnachrichten\\b": 3.0,
|
|
458
|
+
};
|
|
459
|
+
const RAG_SIGNALS: Record<string, number> = {
|
|
460
|
+
"\\brag\\b": 4.5, "\\bcontext for\\b": 4.0, "\\bsummarize\\b": 3.5, "\\bbrief(ly)?\\b": 3.0, "\\bquick overview\\b": 3.5,
|
|
461
|
+
"\\btl;?dr\\b": 4.0, "\\bkey (points|facts|info)\\b": 3.5, "\\bmain (points|takeaways)\\b": 3.5,
|
|
462
|
+
"\\b(web|online)\\s+and\\s+news\\b": 4.0, "\\ball sources\\b": 3.5, "\\bcomprehensive (search|overview)\\b": 3.5,
|
|
463
|
+
"\\blatest\\s+(news|updates)\\b": 3.0, "\\bcurrent (events|situation|status)\\b": 3.5, "\\bright now\\b": 3.0,
|
|
464
|
+
"\\bas of today\\b": 3.5, "\\bup.to.date\\b": 3.5, "\\breal.time\\b": 4.0, "\\blive\\b": 2.5,
|
|
465
|
+
"\\bwhat'?s happening with\\b": 3.5, "\\bwhat'?s the latest\\b": 4.0, "\\bupdates?\\s+on\\b": 3.5, "\\bstatus of\\b": 3.0,
|
|
466
|
+
"\\bsituation (in|with|around)\\b": 3.5,
|
|
467
|
+
};
|
|
468
|
+
const DIRECT_ANSWER_SIGNALS: Record<string, number> = {
|
|
469
|
+
"\\bwhat is\\b": 3.0, "\\bwhat are\\b": 2.5, "\\bcurrent status\\b": 4.0, "\\bstatus of\\b": 3.5, "\\bstatus\\b": 2.5,
|
|
470
|
+
"\\bwhat happened with\\b": 4.0, "\\bwhat'?s happening with\\b": 4.0, "\\bas of (today|now)\\b": 4.0, "\\bthis weekend\\b": 3.5,
|
|
471
|
+
"\\bevents? in\\b": 3.5, "\\bthings to do in\\b": 4.0, "\\bnear me\\b": 3.0, "\\bcan you (tell me|summarize|explain)\\b": 3.5,
|
|
472
|
+
"\\bwann\\b": 3.0, "\\bwer\\b": 3.0, "\\bwo\\b": 2.5, "\\bwie viele\\b": 3.0,
|
|
473
|
+
};
|
|
474
|
+
const PRIVACY_SIGNALS: Record<string, number> = {
|
|
475
|
+
"\\bprivate(ly)?\\b": 4.0, "\\banonymous(ly)?\\b": 4.0, "\\bwithout tracking\\b": 4.5, "\\bno track(ing)?\\b": 4.5,
|
|
476
|
+
"\\bprivacy\\b": 3.5, "\\bprivacy.?focused\\b": 4.5, "\\bprivacy.?first\\b": 4.5, "\\bduckduckgo alternative\\b": 4.5,
|
|
477
|
+
"\\bprivate search\\b": 5.0, "\\bprivat\\b": 4.0, "\\banonym\\b": 4.0, "\\bohne tracking\\b": 4.5,
|
|
478
|
+
"\\bdatenschutz\\b": 4.0, "\\baggregate results?\\b": 4.0, "\\bmultiple sources?\\b": 4.0, "\\bdiverse (results|perspectives|sources)\\b": 4.0,
|
|
479
|
+
"\\bfrom (all|multiple|different) (engines?|sources?)\\b": 4.5, "\\bmeta.?search\\b": 5.0, "\\ball engines?\\b": 4.0,
|
|
480
|
+
"\\bverschiedene quellen\\b": 4.0, "\\baus mehreren quellen\\b": 4.0, "\\balle suchmaschinen\\b": 4.5,
|
|
481
|
+
"\\bfree search\\b": 3.5, "\\bno api cost\\b": 4.0, "\\bself.?hosted search\\b": 5.0, "\\bzero cost\\b": 3.5,
|
|
482
|
+
"\\bbudget\\b(?!\\s*(laptop|phone|option))\\b": 2.5, "\\bkostenlos(e)?\\s+suche\\b": 3.5, "\\bkeine api.?kosten\\b": 4.0,
|
|
483
|
+
};
|
|
484
|
+
const EXA_DEEP_SIGNALS: Record<string, number> = {
|
|
485
|
+
"\\bsynthesi[sz]e\\b": 5.0, "\\bdeep research\\b": 5.0, "\\bcomprehensive (analysis|report|overview|survey)\\b": 4.5,
|
|
486
|
+
"\\bacross (multiple|many|several) (sources|documents|papers)\\b": 4.5, "\\baggregat(e|ing) (information|data|results)\\b": 4.0,
|
|
487
|
+
"\\bcross.?referenc": 4.5, "\\bsec filings?\\b": 4.5, "\\bannual reports?\\b": 4.0, "\\bearnings (call|report|transcript)\\b": 4.5,
|
|
488
|
+
"\\bfinancial analysis\\b": 4.0, "\\bliterature (review|survey)\\b": 5.0, "\\bacademic literature\\b": 4.5,
|
|
489
|
+
"\\bstate of the (art|field|industry)\\b": 4.0, "\\bcompile (a |the )?(report|findings|results)\\b": 4.5,
|
|
490
|
+
"\\bsummariz(e|ing) (research|papers|studies)\\b": 4.0, "\\bmultiple documents?\\b": 4.0, "\\bdossier\\b": 4.5,
|
|
491
|
+
"\\bdue diligence\\b": 4.5, "\\bstructured (output|data|report)\\b": 4.0, "\\bmarket research\\b": 4.0,
|
|
492
|
+
"\\bindustry (report|analysis|overview)\\b": 4.0, "\\bresearch (on|about|into)\\b": 4.0, "\\bwhitepaper\\b": 4.5,
|
|
493
|
+
"\\btechnical report\\b": 4.0, "\\bsurvey of\\b": 4.5, "\\bmeta.?analysis\\b": 5.0, "\\bsystematic review\\b": 5.0,
|
|
494
|
+
"\\bcase study\\b": 3.5, "\\bbenchmark(s|ing)?\\b": 3.5, "\\btiefenrecherche\\b": 5.0, "\\bumfassende (analyse|übersicht|recherche)\\b": 4.5,
|
|
495
|
+
"\\baus mehreren quellen zusammenfassen\\b": 4.5, "\\bmarktforschung\\b": 4.0,
|
|
496
|
+
};
|
|
497
|
+
const EXA_DEEP_REASONING_SIGNALS: Record<string, number> = {
|
|
498
|
+
"\\bdeep.?reasoning\\b": 6.0, "\\bcomplex (analysis|reasoning|research)\\b": 4.5, "\\bcontradictions?\\b": 4.5,
|
|
499
|
+
"\\breconcil(e|ing)\\b": 5.0, "\\bcritical(ly)? analyz": 4.5, "\\bweigh(ing)? (the )?evidence\\b": 4.5,
|
|
500
|
+
"\\bcompeting (claims|theories|perspectives)\\b": 4.5, "\\bcomplex financial\\b": 4.5, "\\bregulatory (analysis|compliance|landscape)\\b": 4.5,
|
|
501
|
+
"\\blegal analysis\\b": 4.5, "\\bcomprehensive (due diligence|investigation)\\b": 5.0, "\\bpatent (landscape|analysis|search)\\b": 4.5,
|
|
502
|
+
"\\bmarket intelligence\\b": 4.5, "\\bcompetitive (intelligence|landscape)\\b": 4.5, "\\btrade.?offs?\\b": 4.0,
|
|
503
|
+
"\\bpros and cons of\\b": 4.0, "\\bshould I (use|choose|pick)\\b": 3.5, "\\bwhich is better\\b": 4.0,
|
|
504
|
+
"\\bkomplexe analyse\\b": 4.5, "\\bwidersprüche\\b": 4.5, "\\bquellen abwägen\\b": 4.5, "\\brechtliche analyse\\b": 4.5,
|
|
505
|
+
"\\bvergleich(e|en)?\\b": 3.5,
|
|
506
|
+
};
|
|
507
|
+
const BRAND_PATTERNS = [
|
|
508
|
+
"\\b(apple|iphone|ipad|macbook|airpods?)\\b", "\\b(samsung|galaxy)\\b", "\\b(google|pixel)\\b", "\\b(microsoft|surface|xbox)\\b",
|
|
509
|
+
"\\b(sony|playstation)\\b", "\\b(nvidia|geforce|rtx)\\b", "\\b(amd|ryzen|radeon)\\b", "\\b(intel|core i[3579])\\b",
|
|
510
|
+
"\\b(dell|hp|lenovo|asus|acer)\\b", "\\b(lg|tcl|hisense)\\b", "\\b(laptop|phone|tablet|tv|monitor|headphones?|earbuds?)\\b",
|
|
511
|
+
"\\b(camera|lens|drone)\\b", "\\b(watch|smartwatch|fitbit|garmin)\\b", "\\b(router|modem|wifi)\\b", "\\b(keyboard|mouse|gaming)\\b",
|
|
512
|
+
];
|
|
84
513
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
const
|
|
88
|
-
const
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
514
|
+
class QueryAnalyzer {
|
|
515
|
+
calculateSignalScore(query: string, signals: Record<string, number>) {
|
|
516
|
+
const q = query.toLowerCase();
|
|
517
|
+
const matches: any[] = [];
|
|
518
|
+
let total = 0;
|
|
519
|
+
for (const [pattern, weight] of Object.entries(signals)) {
|
|
520
|
+
const regex = new RegExp(pattern, "i");
|
|
521
|
+
const found = q.match(regex);
|
|
522
|
+
if (found) {
|
|
523
|
+
matches.push({ pattern, matched: found[0], weight });
|
|
524
|
+
total += weight;
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
return { total, matches };
|
|
528
|
+
}
|
|
529
|
+
detectProductBrandCombo(query: string): number {
|
|
530
|
+
const hasBrand = BRAND_PATTERNS.some((p) => new RegExp(p, "i").test(query));
|
|
531
|
+
const productIndicators = ["\\b(buy|price|specs?|review|vs|compare)\\b", "\\b(pro|max|plus|mini|ultra|lite)\\b", "\\b\\d+\\s*(gb|tb|inch|mm|hz)\\b"];
|
|
532
|
+
const hasProduct = productIndicators.some((p) => new RegExp(p, "i").test(query));
|
|
533
|
+
if (hasBrand && hasProduct) return 3;
|
|
534
|
+
if (hasBrand) return 1.5;
|
|
535
|
+
return 0;
|
|
536
|
+
}
|
|
537
|
+
detectUrl(query: string): string | null {
|
|
538
|
+
const found = query.match(/https?:\/\/[^\s]+|\b\w+\.(com|org|io|ai|co|dev|net|app)\b/i);
|
|
539
|
+
return found?.[0] || null;
|
|
540
|
+
}
|
|
541
|
+
assessQueryComplexity(query: string) {
|
|
542
|
+
const words = query.trim().split(/\s+/).filter(Boolean);
|
|
543
|
+
const wordCount = words.length;
|
|
544
|
+
const questionWords = (query.match(/\b(what|why|how|when|where|which|who|whose|whom)\b/gi) || []).length;
|
|
545
|
+
const clauseMarkers = (query.match(/\b(and|but|or|because|since|while|although|if|when)\b/gi) || []).length;
|
|
546
|
+
let complexityScore = 0;
|
|
547
|
+
if (wordCount > 10) complexityScore += 1.5;
|
|
548
|
+
if (wordCount > 20) complexityScore += 1.0;
|
|
549
|
+
if (questionWords > 1) complexityScore += 1.0;
|
|
550
|
+
if (clauseMarkers > 0) complexityScore += clauseMarkers * 0.5;
|
|
551
|
+
return { word_count: wordCount, question_words: questionWords, clause_markers: clauseMarkers, complexity_score: complexityScore, is_complex: complexityScore > 2 };
|
|
552
|
+
}
|
|
553
|
+
detectRecencyIntent(query: string) {
|
|
554
|
+
const patterns: Array<[RegExp, number]> = [
|
|
555
|
+
[/\b(latest|newest|recent|current)\b/i, 2.5], [/\b(today|yesterday|this week|this month)\b/i, 3],
|
|
556
|
+
[/\b(202[4-9]|2030)\b/i, 2], [/\b(breaking|live|just|now)\b/i, 3], [/\blast (hour|day|week|month)\b/i, 2.5],
|
|
557
|
+
];
|
|
558
|
+
let total = 0;
|
|
559
|
+
for (const [regex, weight] of patterns) if (regex.test(query)) total += weight;
|
|
560
|
+
return { is_recency_focused: total > 2, score: total };
|
|
561
|
+
}
|
|
562
|
+
analyze(query: string) {
|
|
563
|
+
const shopping = this.calculateSignalScore(query, SHOPPING_SIGNALS);
|
|
564
|
+
const research = this.calculateSignalScore(query, RESEARCH_SIGNALS);
|
|
565
|
+
const discovery = this.calculateSignalScore(query, DISCOVERY_SIGNALS);
|
|
566
|
+
const localNews = this.calculateSignalScore(query, LOCAL_NEWS_SIGNALS);
|
|
567
|
+
const rag = this.calculateSignalScore(query, RAG_SIGNALS);
|
|
568
|
+
const privacy = this.calculateSignalScore(query, PRIVACY_SIGNALS);
|
|
569
|
+
const direct = this.calculateSignalScore(query, DIRECT_ANSWER_SIGNALS);
|
|
570
|
+
const exaDeep = this.calculateSignalScore(query, EXA_DEEP_SIGNALS);
|
|
571
|
+
const exaDeepReasoning = this.calculateSignalScore(query, EXA_DEEP_REASONING_SIGNALS);
|
|
572
|
+
|
|
573
|
+
const brandBonus = this.detectProductBrandCombo(query);
|
|
574
|
+
if (brandBonus > 0) {
|
|
575
|
+
shopping.total += brandBonus;
|
|
576
|
+
shopping.matches.push({ pattern: "product_brand_combo", matched: "brand + product detected", weight: brandBonus });
|
|
577
|
+
}
|
|
578
|
+
const detectedUrl = this.detectUrl(query);
|
|
579
|
+
if (detectedUrl) {
|
|
580
|
+
discovery.total += 5;
|
|
581
|
+
discovery.matches.push({ pattern: "url_detected", matched: detectedUrl, weight: 5 });
|
|
582
|
+
}
|
|
583
|
+
const complexity = this.assessQueryComplexity(query);
|
|
584
|
+
if (complexity.is_complex) {
|
|
585
|
+
research.total += complexity.complexity_score;
|
|
586
|
+
research.matches.push({ pattern: "query_complexity", matched: `complex query (${complexity.word_count} words)`, weight: complexity.complexity_score });
|
|
587
|
+
}
|
|
588
|
+
const recency = this.detectRecencyIntent(query);
|
|
589
|
+
|
|
590
|
+
return {
|
|
591
|
+
detected_url: detectedUrl,
|
|
592
|
+
complexity,
|
|
593
|
+
recency_focused: recency.is_recency_focused,
|
|
594
|
+
recency_score: recency.score,
|
|
595
|
+
exa_deep_score: exaDeep.total,
|
|
596
|
+
exa_deep_reasoning_score: exaDeepReasoning.total,
|
|
597
|
+
provider_scores: {
|
|
598
|
+
serper: shopping.total + localNews.total + recency.score * 0.35,
|
|
599
|
+
tavily: research.total + (complexity.is_complex ? 0 : complexity.complexity_score) + recency.score * 0.2,
|
|
600
|
+
querit: research.total * 0.65 + rag.total * 0.35 + recency.score * 0.45,
|
|
601
|
+
exa: discovery.total + (/(\bsimilar|alternatives?|examples?)\b/i.test(query) ? 1 : 0) + exaDeep.total * 0.5 + exaDeepReasoning.total * 0.5,
|
|
602
|
+
perplexity: direct.total + localNews.total * 0.4 + recency.score * 0.55,
|
|
603
|
+
you: rag.total + recency.score * 0.25,
|
|
604
|
+
searxng: privacy.total,
|
|
605
|
+
},
|
|
606
|
+
provider_matches: {
|
|
607
|
+
serper: [...shopping.matches, ...localNews.matches],
|
|
608
|
+
tavily: research.matches,
|
|
609
|
+
querit: research.matches,
|
|
610
|
+
exa: [...discovery.matches, ...exaDeep.matches, ...exaDeepReasoning.matches],
|
|
611
|
+
perplexity: direct.matches,
|
|
612
|
+
you: rag.matches,
|
|
613
|
+
searxng: privacy.matches,
|
|
614
|
+
},
|
|
98
615
|
};
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
616
|
+
}
|
|
617
|
+
route(query: string, availableProviders: ProviderName[]) {
|
|
618
|
+
const analysis = this.analyze(query);
|
|
619
|
+
const scores = analysis.provider_scores as Record<ProviderName, number>;
|
|
620
|
+
const available = Object.fromEntries(availableProviders.map((p) => [p, scores[p] ?? 0])) as Record<ProviderName, number>;
|
|
621
|
+
const providers = Object.keys(available) as ProviderName[];
|
|
622
|
+
if (!providers.length) {
|
|
623
|
+
return { provider: "serper" as ProviderName, confidence: 0, confidence_level: "low", reason: "no_available_providers", scores: {}, top_signals: [], exa_depth: "normal" };
|
|
624
|
+
}
|
|
625
|
+
const maxScore = Math.max(...providers.map((p) => available[p]));
|
|
626
|
+
const winners = providers.filter((p) => available[p] === maxScore);
|
|
627
|
+
const priority: ProviderName[] = ["tavily", "querit", "exa", "perplexity", "serper", "you", "searxng"];
|
|
628
|
+
const winner = priority.find((p) => winners.includes(p)) || winners[0];
|
|
629
|
+
const secondBest = [...providers.map((p) => available[p])].sort((a, b) => b - a)[1] || 0;
|
|
630
|
+
const margin = maxScore > 0 ? (maxScore - secondBest) / maxScore : 0;
|
|
631
|
+
const normalizedScore = Math.min(maxScore / 15, 1);
|
|
632
|
+
const confidence = maxScore === 0 ? 0 : Number((normalizedScore * 0.6 + margin * 0.4).toFixed(3));
|
|
633
|
+
let exaDepth: "normal" | "deep" | "deep-reasoning" = "normal";
|
|
634
|
+
if (winner === "exa") {
|
|
635
|
+
if ((analysis.exa_deep_reasoning_score || 0) >= 4) exaDepth = "deep-reasoning";
|
|
636
|
+
else if ((analysis.exa_deep_score || 0) >= 4) exaDepth = "deep";
|
|
102
637
|
}
|
|
638
|
+
return {
|
|
639
|
+
provider: winner,
|
|
640
|
+
confidence,
|
|
641
|
+
confidence_level: confidence >= 0.7 ? "high" : confidence >= 0.4 ? "medium" : "low",
|
|
642
|
+
reason: maxScore === 0 ? "no_signals_matched" : confidence >= 0.7 ? "high_confidence_match" : confidence >= 0.4 ? "moderate_confidence_match" : "low_confidence_match",
|
|
643
|
+
exa_depth: exaDepth,
|
|
644
|
+
scores: Object.fromEntries(providers.map((p) => [p, Number((available[p] || 0).toFixed(2))])),
|
|
645
|
+
top_signals: (analysis.provider_matches[winner] || []).sort((a: any, b: any) => b.weight - a.weight).slice(0, 5).map((s: any) => ({ matched: s.matched, weight: s.weight })),
|
|
646
|
+
analysis_summary: {
|
|
647
|
+
query_length: query.trim().split(/\s+/).filter(Boolean).length,
|
|
648
|
+
is_complex: analysis.complexity.is_complex,
|
|
649
|
+
has_url: !!analysis.detected_url,
|
|
650
|
+
recency_focused: analysis.recency_focused,
|
|
651
|
+
},
|
|
652
|
+
};
|
|
653
|
+
}
|
|
654
|
+
}
|
|
103
655
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
],
|
|
123
|
-
{
|
|
124
|
-
description:
|
|
125
|
-
"Force a specific provider, or 'auto' for smart routing (default: auto)",
|
|
126
|
-
},
|
|
127
|
-
),
|
|
128
|
-
),
|
|
129
|
-
count: Type.Optional(
|
|
130
|
-
Type.Number({ description: "Number of results (default: 5)" }),
|
|
131
|
-
),
|
|
132
|
-
depth: Type.Optional(
|
|
133
|
-
Type.Union(
|
|
134
|
-
[
|
|
135
|
-
Type.Literal("normal"),
|
|
136
|
-
Type.Literal("deep"),
|
|
137
|
-
Type.Literal("deep-reasoning"),
|
|
138
|
-
],
|
|
139
|
-
{
|
|
140
|
-
description:
|
|
141
|
-
"Exa search depth: 'deep' synthesizes across sources (4-12s), 'deep-reasoning' for complex cross-reference analysis (12-50s). When provider is auto, depth may be auto-selected based on query complexity.",
|
|
142
|
-
},
|
|
143
|
-
),
|
|
144
|
-
),
|
|
145
|
-
time_range: Type.Optional(
|
|
146
|
-
Type.Union(
|
|
147
|
-
[
|
|
148
|
-
Type.Literal("day"),
|
|
149
|
-
Type.Literal("week"),
|
|
150
|
-
Type.Literal("month"),
|
|
151
|
-
Type.Literal("year"),
|
|
152
|
-
],
|
|
153
|
-
{
|
|
154
|
-
description:
|
|
155
|
-
"Filter results by recency. Applies to Serper (as tbs), Perplexity (as search_recency_filter), Tavily/You.com (as freshness). Useful for news and current events.",
|
|
156
|
-
},
|
|
157
|
-
),
|
|
158
|
-
),
|
|
159
|
-
include_domains: Type.Optional(
|
|
160
|
-
Type.Array(Type.String(), {
|
|
161
|
-
description:
|
|
162
|
-
"Only include results from these domains (e.g. ['arxiv.org', 'github.com']). Supported by Tavily and Exa.",
|
|
163
|
-
}),
|
|
164
|
-
),
|
|
165
|
-
exclude_domains: Type.Optional(
|
|
166
|
-
Type.Array(Type.String(), {
|
|
167
|
-
description:
|
|
168
|
-
"Exclude results from these domains (e.g. ['reddit.com', 'pinterest.com']). Supported by Tavily and Exa.",
|
|
169
|
-
}),
|
|
170
|
-
),
|
|
171
|
-
}),
|
|
172
|
-
async execute(
|
|
173
|
-
_id: string,
|
|
174
|
-
params: {
|
|
175
|
-
query: string;
|
|
176
|
-
provider?: string;
|
|
177
|
-
count?: number;
|
|
178
|
-
depth?: string;
|
|
179
|
-
time_range?: string;
|
|
180
|
-
include_domains?: string[];
|
|
181
|
-
exclude_domains?: string[];
|
|
182
|
-
},
|
|
183
|
-
) {
|
|
184
|
-
if (!fs.existsSync(scriptPath)) {
|
|
185
|
-
return {
|
|
186
|
-
content: [{ type: "text", text: `Search failed: script not found at ${scriptPath}` }],
|
|
187
|
-
};
|
|
188
|
-
}
|
|
656
|
+
async function searchSerper(query: string, apiKey: string, maxResults: number, timeRange?: string): Promise<SearchResponse> {
|
|
657
|
+
const body: Json = { q: query, gl: "us", hl: "en", num: maxResults, autocorrect: true };
|
|
658
|
+
const tbsMap: Record<string, string> = { day: "qdr:d", week: "qdr:w", month: "qdr:m", year: "qdr:y" };
|
|
659
|
+
if (timeRange && tbsMap[timeRange]) body.tbs = tbsMap[timeRange];
|
|
660
|
+
const data = await httpJson("https://google.serper.dev/search", { method: "POST", headers: { "X-API-KEY": apiKey, "Content-Type": "application/json" }, body: JSON.stringify(body) });
|
|
661
|
+
const results = (data.organic || []).slice(0, maxResults).map((item: any, i: number) => ({ title: item.title || "", url: item.link || "", snippet: item.snippet || "", score: Number((1 - i * 0.1).toFixed(2)), date: item.date }));
|
|
662
|
+
const answer = data?.answerBox?.answer || data?.answerBox?.snippet || data?.knowledgeGraph?.description || results[0]?.snippet || "";
|
|
663
|
+
return { provider: "serper", query, results, images: [], answer, knowledge_graph: data.knowledgeGraph, related_searches: (data.relatedSearches || []).map((r: any) => r.query) };
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
async function searchTavily(query: string, apiKey: string, maxResults: number, includeDomains?: string[], excludeDomains?: string[]): Promise<SearchResponse> {
|
|
667
|
+
const body: Json = { api_key: apiKey, query, max_results: maxResults, search_depth: "basic", topic: "general", include_images: false, include_answer: true, include_raw_content: false };
|
|
668
|
+
if (includeDomains?.length) body.include_domains = includeDomains;
|
|
669
|
+
if (excludeDomains?.length) body.exclude_domains = excludeDomains;
|
|
670
|
+
const data = await httpJson("https://api.tavily.com/search", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body) });
|
|
671
|
+
const results = (data.results || []).slice(0, maxResults).map((item: any) => ({ title: item.title || "", url: item.url || "", snippet: item.content || "", score: Number((item.score || 0).toFixed(3)) }));
|
|
672
|
+
return { provider: "tavily", query, results, images: data.images || [], answer: data.answer || "" };
|
|
673
|
+
}
|
|
189
674
|
|
|
190
|
-
|
|
675
|
+
async function searchQuerit(query: string, apiKey: string, maxResults: number, timeRange?: string, includeDomains?: string[], excludeDomains?: string[]): Promise<SearchResponse> {
|
|
676
|
+
const timeMap: Record<string, string> = { day: "d1", week: "w1", month: "m1", year: "y1" };
|
|
677
|
+
const filters: Json = { languages: { include: ["en"] }, geo: { countries: { include: ["US"] } } };
|
|
678
|
+
if (includeDomains?.length || excludeDomains?.length) {
|
|
679
|
+
filters.sites = {};
|
|
680
|
+
if (includeDomains?.length) filters.sites.include = includeDomains;
|
|
681
|
+
if (excludeDomains?.length) filters.sites.exclude = excludeDomains;
|
|
682
|
+
}
|
|
683
|
+
if (timeRange && timeMap[timeRange]) filters.timeRange = { date: timeMap[timeRange] };
|
|
684
|
+
const body: Json = { query, count: maxResults, filters };
|
|
685
|
+
const data = await httpJson("https://api.querit.ai/v1/search", { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" }, body: JSON.stringify(body) });
|
|
686
|
+
if (data.error_msg || (data.error_code != null && ![0, 200].includes(data.error_code))) throw new ProviderRequestError(data.error_msg || `Querit request failed with error_code=${data.error_code}`);
|
|
687
|
+
const raw = data?.results?.result || [];
|
|
688
|
+
const results = raw.slice(0, maxResults).map((item: any, i: number) => ({ title: item.title || titleFromUrl(item.url || ""), url: item.url || "", snippet: item.snippet || item.page_age || "", score: Number((1 - i * 0.05).toFixed(3)), page_time: item.page_time, date: item.page_age, language: item.language }));
|
|
689
|
+
return { provider: "querit", query, results, images: [], answer: results[0]?.snippet || "", metadata: { search_id: data.search_id, time_range: timeRange && timeMap[timeRange] } };
|
|
690
|
+
}
|
|
191
691
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
692
|
+
async function searchExa(query: string, apiKey: string, maxResults: number, exaDepth: "normal" | "deep" | "deep-reasoning", includeDomains?: string[], excludeDomains?: string[]): Promise<SearchResponse> {
|
|
693
|
+
const isDeep = exaDepth === "deep" || exaDepth === "deep-reasoning";
|
|
694
|
+
const body: Json = isDeep
|
|
695
|
+
? { query, numResults: maxResults, type: exaDepth, contents: { text: { maxCharacters: 5000, verbosity: "full" } } }
|
|
696
|
+
: { query, numResults: maxResults, type: "neural", contents: { text: { maxCharacters: 2000, verbosity: "standard" }, highlights: { numSentences: 3, highlightsPerUrl: 2 } } };
|
|
697
|
+
if (includeDomains?.length) body.includeDomains = includeDomains;
|
|
698
|
+
if (excludeDomains?.length) body.excludeDomains = excludeDomains;
|
|
699
|
+
const data = await httpJson("https://api.exa.ai/search", { method: "POST", headers: { "x-api-key": apiKey, "Content-Type": "application/json" }, body: JSON.stringify(body) }, isDeep ? 55000 : 30000);
|
|
195
700
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
701
|
+
if (isDeep) {
|
|
702
|
+
const deepOutput = data.output || {};
|
|
703
|
+
const synthesis = typeof deepOutput.content === "string" ? deepOutput.content : deepOutput.content ? JSON.stringify(deepOutput.content) : "";
|
|
704
|
+
const grounding: any[] = [];
|
|
705
|
+
for (const field of deepOutput.grounding || []) {
|
|
706
|
+
for (const cite of field.citations || []) grounding.push({ url: cite.url || "", title: cite.title || "", confidence: field.confidence, field: field.field });
|
|
707
|
+
}
|
|
708
|
+
const results: SearchResult[] = [];
|
|
709
|
+
if (synthesis) results.push({ title: `Exa ${exaDepth.replace(/-/g, " ")} synthesis`, url: "", snippet: synthesis, full_synthesis: synthesis, score: 1, grounding: grounding.slice(0, 10), type: "synthesis" });
|
|
710
|
+
for (const item of (data.results || []).slice(0, maxResults)) {
|
|
711
|
+
const snippet = item.text ? String(item.text).slice(0, 800) : (item.highlights || [])[0] || "";
|
|
712
|
+
results.push({ title: item.title || "", url: item.url || "", snippet, score: Number((item.score || 0).toFixed(3)), published_date: item.publishedDate, author: item.author, type: "source" });
|
|
713
|
+
}
|
|
714
|
+
return { provider: "exa", query, exa_depth: exaDepth, results, images: [], answer: synthesis || results[1]?.snippet || "", grounding, metadata: { synthesis_length: synthesis.length, source_count: (data.results || []).length } };
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
const results = (data.results || []).slice(0, maxResults).map((item: any) => ({ title: item.title || "", url: item.url || "", snippet: item.text ? String(item.text).slice(0, 800) : Array.isArray(item.highlights) ? item.highlights.slice(0, 2).join(" ... ") : "", score: Number((item.score || 0).toFixed(3)), published_date: item.publishedDate, author: item.author }));
|
|
718
|
+
return { provider: "exa", query, results, images: [], answer: results[0]?.snippet || "" };
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
async function searchPerplexity(query: string, apiKey: string, maxResults: number, timeRange?: string): Promise<SearchResponse> {
|
|
722
|
+
const body: Json = {
|
|
723
|
+
model: "perplexity/sonar-pro",
|
|
724
|
+
messages: [
|
|
725
|
+
{ role: "system", content: "Answer with concise factual summary and include source URLs." },
|
|
726
|
+
{ role: "user", content: query },
|
|
727
|
+
],
|
|
728
|
+
temperature: 0.2,
|
|
729
|
+
};
|
|
730
|
+
if (timeRange) body.search_recency_filter = timeRange;
|
|
731
|
+
const data = await httpJson("https://api.kilo.ai/api/gateway/chat/completions", { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" }, body: JSON.stringify(body) });
|
|
732
|
+
const answer = String(data?.choices?.[0]?.message?.content || "").trim();
|
|
733
|
+
let citations = Array.isArray(data?.citations) ? data.citations : [];
|
|
734
|
+
if (!citations.length) {
|
|
735
|
+
const matches = answer.match(/https?:\/\/[^\s)\]}>"']+/g) || [];
|
|
736
|
+
citations = [...new Set(matches)];
|
|
737
|
+
}
|
|
738
|
+
const results: SearchResult[] = [];
|
|
739
|
+
if (answer) results.push({ title: `Perplexity Answer: ${query.slice(0, 80)}`, url: "https://www.perplexity.ai", snippet: answer.replace(/\[\d+\]/g, "").trim().slice(0, 500), score: 1.0 });
|
|
740
|
+
for (const [i, citation] of citations.slice(0, Math.max(0, maxResults - 1)).entries()) {
|
|
741
|
+
const url = typeof citation === "string" ? citation : citation?.url || "";
|
|
742
|
+
const title = typeof citation === "string" ? titleFromUrl(url) : citation?.title || titleFromUrl(url);
|
|
743
|
+
results.push({ title, url, snippet: `Source cited in Perplexity answer [citation ${i + 1}]`, score: Number((0.9 - i * 0.1).toFixed(3)) });
|
|
744
|
+
}
|
|
745
|
+
return { provider: "perplexity", query, results, images: [], answer, metadata: { model: body.model, usage: data.usage || {} } };
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
async function searchYou(query: string, apiKey: string, maxResults: number, timeRange?: string): Promise<SearchResponse> {
|
|
749
|
+
const url = new URL("https://ydc-index.io/v1/search");
|
|
750
|
+
url.searchParams.set("query", query);
|
|
751
|
+
url.searchParams.set("count", String(maxResults));
|
|
752
|
+
url.searchParams.set("safesearch", "moderate");
|
|
753
|
+
url.searchParams.set("country", "US");
|
|
754
|
+
url.searchParams.set("language", "EN");
|
|
755
|
+
if (timeRange) url.searchParams.set("freshness", timeRange);
|
|
756
|
+
const data = await httpJson(url.toString(), { method: "GET", headers: { "X-API-KEY": apiKey, Accept: "application/json" } });
|
|
757
|
+
const web = data?.results?.web || [];
|
|
758
|
+
const news = data?.results?.news || [];
|
|
759
|
+
const results = web.slice(0, maxResults).map((item: any, i: number) => ({ title: item.title || "", url: item.url || "", snippet: item?.snippets?.[0] || item.description || "", score: Number((1 - i * 0.05).toFixed(3)), date: item.page_age, source: "web", additional_snippets: Array.isArray(item.snippets) ? item.snippets.slice(1, 3) : undefined, thumbnail: item.thumbnail_url, favicon: item.favicon_url }));
|
|
760
|
+
const answer = results.slice(0, 3).map((r) => r.snippet).filter(Boolean).join(" ").slice(0, 1000);
|
|
761
|
+
return { provider: "you", query, results, news: news.slice(0, 5), images: [], answer, metadata: { search_uuid: data?.metadata?.search_uuid, latency: data?.metadata?.latency } };
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
async function searchSearxng(query: string, instanceUrl: string, maxResults: number, timeRange: string | undefined, env: Record<string, string>): Promise<SearchResponse> {
|
|
765
|
+
const base = await validateSearxngUrl(instanceUrl, env);
|
|
766
|
+
const url = new URL(`${base}/search`);
|
|
767
|
+
url.searchParams.set("q", query);
|
|
768
|
+
url.searchParams.set("format", "json");
|
|
769
|
+
url.searchParams.set("language", "en");
|
|
770
|
+
url.searchParams.set("safesearch", "0");
|
|
771
|
+
if (timeRange) url.searchParams.set("time_range", timeRange);
|
|
772
|
+
const data = await httpJson(url.toString(), { method: "GET", headers: { Accept: "application/json" } });
|
|
773
|
+
const enginesUsed = new Set<string>();
|
|
774
|
+
const results = (data.results || []).slice(0, maxResults).map((item: any, i: number) => {
|
|
775
|
+
enginesUsed.add(item.engine || "unknown");
|
|
776
|
+
return { title: item.title || "", url: item.url || "", snippet: item.content || "", score: Number((item.score ?? (1 - i * 0.05)).toFixed(3)), engine: item.engine || "unknown", category: item.category || "general", date: item.publishedDate };
|
|
777
|
+
});
|
|
778
|
+
const answer = Array.isArray(data.answers) && data.answers[0] ? String(data.answers[0]) : Array.isArray(data.infoboxes) && data.infoboxes[0] ? String(data.infoboxes[0].content || data.infoboxes[0].infobox || "") : results[0]?.snippet || "";
|
|
779
|
+
return { provider: "searxng", query, results, images: [], answer, suggestions: data.suggestions || [], corrections: data.corrections || [], metadata: { number_of_results: data.number_of_results, engines_used: [...enginesUsed], instance_url: base } };
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
async function executeWithRetry(fn: () => Promise<SearchResponse>): Promise<SearchResponse> {
|
|
783
|
+
let lastError: any;
|
|
784
|
+
for (let attempt = 0; attempt < RETRY_BACKOFF_MS.length; attempt += 1) {
|
|
785
|
+
try {
|
|
786
|
+
return await fn();
|
|
787
|
+
} catch (error: any) {
|
|
788
|
+
lastError = error;
|
|
789
|
+
if (!(error instanceof ProviderRequestError) || !error.transient || error.statusCode === 401 || error.statusCode === 403) break;
|
|
790
|
+
if (attempt < RETRY_BACKOFF_MS.length - 1) await sleep(RETRY_BACKOFF_MS[attempt]);
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
throw lastError;
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
export default function (api: any) {
|
|
797
|
+
const pluginConfig: Record<string, string> = (api.pluginConfig ?? {}) as Record<string, string>;
|
|
798
|
+
const runtimeEnv = getRuntimeEnv(pluginConfig);
|
|
799
|
+
|
|
800
|
+
api.registerTool(
|
|
801
|
+
{
|
|
802
|
+
name: "web_search_plus",
|
|
803
|
+
description:
|
|
804
|
+
"Search the web with intelligent multi-provider routing across Serper, Tavily, Querit, Exa, Perplexity, You.com, and SearXNG. Auto-selects the best provider, caches results, retries transient failures, and falls back across providers.",
|
|
805
|
+
parameters: PARAMETERS_SCHEMA,
|
|
806
|
+
async execute(_id: string, params: ToolParams) {
|
|
807
|
+
try {
|
|
808
|
+
const query = String(params.query || "").trim();
|
|
809
|
+
if (!query) return { content: [{ type: "text", text: "Search failed: query is required" }] };
|
|
810
|
+
|
|
811
|
+
const count = Math.max(1, Math.min(10, Math.floor(Number(params.count || 5))));
|
|
812
|
+
const requestedProvider = (params.provider || "auto") as ProviderName | "auto";
|
|
813
|
+
const timeRange = toTimeRange(params.time_range);
|
|
814
|
+
const includeDomains = Array.isArray(params.include_domains) ? params.include_domains.filter(Boolean) : undefined;
|
|
815
|
+
const excludeDomains = Array.isArray(params.exclude_domains) ? params.exclude_domains.filter(Boolean) : undefined;
|
|
199
816
|
|
|
200
|
-
|
|
201
|
-
|
|
817
|
+
const allProviders: ProviderName[] = ["serper", "tavily", "querit", "exa", "perplexity", "you", "searxng"];
|
|
818
|
+
const configuredProviders = allProviders.filter((p) => !!getApiKey(p, runtimeEnv));
|
|
819
|
+
|
|
820
|
+
let routingInfo: Json;
|
|
821
|
+
let provider: ProviderName;
|
|
822
|
+
if (requestedProvider === "auto") {
|
|
823
|
+
const analyzer = new QueryAnalyzer();
|
|
824
|
+
const routing = analyzer.route(query, configuredProviders);
|
|
825
|
+
provider = routing.provider;
|
|
826
|
+
routingInfo = { auto_routed: true, provider, confidence: routing.confidence, confidence_level: routing.confidence_level, reason: routing.reason, top_signals: routing.top_signals, scores: routing.scores, exa_depth: routing.exa_depth };
|
|
827
|
+
} else {
|
|
828
|
+
provider = requestedProvider;
|
|
829
|
+
routingInfo = { auto_routed: false, provider };
|
|
202
830
|
}
|
|
203
831
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
832
|
+
const priority: ProviderName[] = ["tavily", "querit", "exa", "perplexity", "serper", "you", "searxng"];
|
|
833
|
+
const providersToTry: ProviderName[] = [provider, ...priority.filter((p) => p !== provider && configuredProviders.includes(p))];
|
|
834
|
+
const eligibleProviders: ProviderName[] = [];
|
|
835
|
+
const cooldownSkips: Json[] = [];
|
|
836
|
+
for (const p of providersToTry) {
|
|
837
|
+
const cooldown = providerInCooldown(p);
|
|
838
|
+
if (cooldown.inCooldown) cooldownSkips.push({ provider: p, cooldown_remaining_seconds: cooldown.remaining });
|
|
839
|
+
else eligibleProviders.push(p);
|
|
207
840
|
}
|
|
841
|
+
if (!eligibleProviders.length) eligibleProviders.push(provider);
|
|
842
|
+
|
|
843
|
+
const cacheContext = {
|
|
844
|
+
time_range: timeRange,
|
|
845
|
+
include_domains: includeDomains ? [...includeDomains].sort() : null,
|
|
846
|
+
exclude_domains: excludeDomains ? [...excludeDomains].sort() : null,
|
|
847
|
+
exa_depth: params.depth || routingInfo.exa_depth || "normal",
|
|
848
|
+
};
|
|
208
849
|
|
|
209
|
-
|
|
210
|
-
|
|
850
|
+
const cached = cacheGet(query, provider, count, DEFAULT_CACHE_TTL, cacheContext);
|
|
851
|
+
if (cached) {
|
|
852
|
+
const result = { ...cached };
|
|
853
|
+
for (const key of Object.keys(result)) if (key.startsWith("_cache_")) delete result[key];
|
|
854
|
+
result.cached = true;
|
|
855
|
+
result.cache_age_seconds = Math.floor(Date.now() / 1000 - Number(cached._cache_timestamp || 0));
|
|
856
|
+
result.routing = { ...routingInfo, ...(cooldownSkips.length ? { cooldown_skips: cooldownSkips } : {}) };
|
|
857
|
+
return { content: [{ type: "text", text: JSON.stringify(sanitizeOutput(result)) }] };
|
|
211
858
|
}
|
|
212
859
|
|
|
213
|
-
|
|
214
|
-
|
|
860
|
+
const errors: Json[] = [];
|
|
861
|
+
const successes: Array<[string, SearchResponse]> = [];
|
|
862
|
+
|
|
863
|
+
const runProvider = async (p: ProviderName): Promise<SearchResponse> => {
|
|
864
|
+
const key = validateApiKey(p, runtimeEnv);
|
|
865
|
+
if (p === "serper") return searchSerper(query, key, count, timeRange);
|
|
866
|
+
if (p === "tavily") return searchTavily(query, key, count, includeDomains, excludeDomains);
|
|
867
|
+
if (p === "querit") return searchQuerit(query, key, count, timeRange, includeDomains, excludeDomains);
|
|
868
|
+
if (p === "exa") {
|
|
869
|
+
const exaDepth = (params.depth || routingInfo.exa_depth || "normal") as "normal" | "deep" | "deep-reasoning";
|
|
870
|
+
return searchExa(query, key, count, exaDepth, includeDomains, excludeDomains);
|
|
871
|
+
}
|
|
872
|
+
if (p === "perplexity") return searchPerplexity(query, key, count, timeRange);
|
|
873
|
+
if (p === "you") return searchYou(query, key, count, timeRange);
|
|
874
|
+
return searchSearxng(query, key, count, timeRange, runtimeEnv);
|
|
875
|
+
};
|
|
876
|
+
|
|
877
|
+
for (const p of eligibleProviders) {
|
|
878
|
+
try {
|
|
879
|
+
const result = await executeWithRetry(() => runProvider(p));
|
|
880
|
+
resetProviderHealth(p);
|
|
881
|
+
successes.push([p, result]);
|
|
882
|
+
if ((result.results || []).length >= count || errors.length === 0) break;
|
|
883
|
+
} catch (error: any) {
|
|
884
|
+
const message = sanitizeOutput(String(error?.message || error));
|
|
885
|
+
const cooldown = markProviderFailure(p, message);
|
|
886
|
+
errors.push({ provider: p, error: message, cooldown_seconds: cooldown.cooldown_seconds });
|
|
887
|
+
}
|
|
215
888
|
}
|
|
216
889
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
path.join(PLUGIN_DIR, "..", "web-search-plus", ".env"),
|
|
220
|
-
];
|
|
221
|
-
const fileEnv: Record<string, string> = {};
|
|
222
|
-
for (const envPath of envPaths) {
|
|
223
|
-
Object.assign(fileEnv, loadEnvFile(envPath));
|
|
890
|
+
if (!successes.length) {
|
|
891
|
+
return { content: [{ type: "text", text: JSON.stringify(sanitizeOutput({ error: "All providers failed", provider, query, routing: routingInfo, provider_errors: errors, cooldown_skips: cooldownSkips })) }] };
|
|
224
892
|
}
|
|
225
|
-
const childEnv = { ...process.env, ...configEnv, ...fileEnv };
|
|
226
893
|
|
|
227
|
-
|
|
894
|
+
let result: SearchResponse;
|
|
895
|
+
if (successes.length === 1) {
|
|
896
|
+
result = successes[0][1];
|
|
897
|
+
} else {
|
|
898
|
+
result = { ...successes[0][1] };
|
|
899
|
+
const deduped = deduplicateResultsAcrossProviders(successes, count);
|
|
900
|
+
result.results = deduped.results;
|
|
901
|
+
result.deduplicated = deduped.dedupCount > 0;
|
|
902
|
+
result.metadata = { ...(result.metadata || {}), dedup_count: deduped.dedupCount, providers_merged: successes.map(([p]) => p) };
|
|
903
|
+
}
|
|
228
904
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
content: [{ type: "text", text: `Search failed (exit ${result.code}): ${stderr}` }],
|
|
233
|
-
};
|
|
905
|
+
const successfulProvider = successes[0][0] as ProviderName;
|
|
906
|
+
if (successfulProvider !== provider) {
|
|
907
|
+
routingInfo = { ...routingInfo, fallback_used: true, original_provider: provider, provider: successfulProvider, fallback_errors: errors };
|
|
234
908
|
}
|
|
909
|
+
if (cooldownSkips.length) routingInfo.cooldown_skips = cooldownSkips;
|
|
910
|
+
result.routing = routingInfo;
|
|
911
|
+
result.cached = false;
|
|
912
|
+
if (!(result as any).metadata) result.metadata = {};
|
|
913
|
+
if ((result as any).deduplicated == null) (result as any).deduplicated = false;
|
|
914
|
+
if ((result.metadata as any).dedup_count == null) (result.metadata as any).dedup_count = 0;
|
|
235
915
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
};
|
|
239
|
-
}
|
|
916
|
+
cachePut(query, successfulProvider, count, result, cacheContext);
|
|
917
|
+
|
|
918
|
+
return { content: [{ type: "text", text: JSON.stringify(sanitizeOutput(result)) }] };
|
|
919
|
+
} catch (error: any) {
|
|
920
|
+
return { content: [{ type: "text", text: `Search failed: ${sanitizeOutput(String(error?.message || error))}` }] };
|
|
921
|
+
}
|
|
240
922
|
},
|
|
241
|
-
|
|
242
|
-
|
|
923
|
+
},
|
|
924
|
+
{ optional: true },
|
|
925
|
+
);
|
|
243
926
|
}
|