web-search-plus-plugin 1.4.0 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -1,84 +1,31 @@
1
- import { spawn } from "child_process";
1
+ import crypto from "crypto";
2
2
  import fs from "fs";
3
3
  import path from "path";
4
4
  import { fileURLToPath } from "url";
5
+ import dns from "dns/promises";
6
+ import net from "net";
5
7
 
6
8
  function getPluginDir(): string {
9
+ // When OpenClaw transpiles plugins, import.meta.url may point to a temp dir.
10
+ // Check for the known extension path first.
11
+ const knownPath = path.join(process.env.HOME || "/root", ".openclaw", "extensions", "web-search-plus-plugin");
12
+ if (fs.existsSync(path.join(knownPath, "package.json"))) return knownPath;
7
13
  try {
8
14
  if (typeof __dirname !== "undefined") return __dirname;
9
15
  } catch {}
10
16
  try {
11
17
  return path.dirname(fileURLToPath(import.meta.url));
12
18
  } catch {}
13
- return path.join(process.cwd(), "skills", "web-search-plus-plugin");
14
- }
15
-
16
- const SENSITIVE_PATTERN = /(?:key|token|secret|password|api[_-]?key)\s*[=:]\s*\S+/gi;
17
-
18
- function sanitizeOutput(text: string): string {
19
- return text.replace(SENSITIVE_PATTERN, "[REDACTED]");
20
- }
21
-
22
- function loadEnvFile(envPath: string): Record<string, string> {
23
- if (!fs.existsSync(envPath)) return {};
24
- const env: Record<string, string> = {};
25
- const lines = fs.readFileSync(envPath, "utf8").split("\n");
26
- for (const line of lines) {
27
- const trimmed = line.trim();
28
- if (!trimmed || trimmed.startsWith("#")) continue;
29
- const stripped = trimmed.startsWith("export ") ? trimmed.slice(7) : trimmed;
30
- const eqIdx = stripped.indexOf("=");
31
- if (eqIdx < 0) continue;
32
- const key = stripped.slice(0, eqIdx).trim();
33
- const val = stripped.slice(eqIdx + 1).trim().replace(/^['"]|['"]$/g, "");
34
- if (key) env[key] = val;
35
- }
36
- return env;
37
- }
38
-
39
- function runPython(
40
- args: string[],
41
- env: NodeJS.ProcessEnv,
42
- timeoutMs: number,
43
- ): Promise<{ stdout: string; stderr: string; code: number }> {
44
- return new Promise((resolve) => {
45
- const child = spawn("python3", args, { env, shell: false });
46
- let stdout = "";
47
- let stderr = "";
48
- let settled = false;
49
-
50
- const timer = setTimeout(() => {
51
- if (!settled) {
52
- settled = true;
53
- child.kill();
54
- resolve({ stdout: "", stderr: "Search timed out", code: 1 });
55
- }
56
- }, timeoutMs);
57
-
58
- child.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
59
- child.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
60
-
61
- child.on("close", (code: number | null) => {
62
- if (!settled) {
63
- settled = true;
64
- clearTimeout(timer);
65
- resolve({ stdout, stderr, code: code ?? 1 });
66
- }
67
- });
68
-
69
- child.on("error", (err: Error) => {
70
- if (!settled) {
71
- settled = true;
72
- clearTimeout(timer);
73
- const safeMsg = (err as any).code === "ENOENT" ? "python3 not found" : "Process error";
74
- resolve({ stdout: "", stderr: safeMsg, code: 1 });
75
- }
76
- });
77
- });
19
+ return process.cwd();
78
20
  }
79
21
 
80
22
  const PLUGIN_DIR = getPluginDir();
81
- const scriptPath = path.join(PLUGIN_DIR, "scripts", "search.py");
23
+ const CACHE_DIR = path.join(PLUGIN_DIR, ".cache");
24
+ const PROVIDER_HEALTH_FILE = path.join(CACHE_DIR, "provider_health.json");
25
+ const DEFAULT_CACHE_TTL = 3600;
26
+ const RETRY_BACKOFF_MS = [1000, 3000, 9000];
27
+ const COOLDOWN_STEPS_SECONDS = [60, 300, 1500, 3600];
28
+ const TRANSIENT_HTTP_CODES = new Set([408, 425, 429, 500, 502, 503, 504]);
82
29
 
83
30
  const PARAMETERS_SCHEMA = {
84
31
  type: "object",
@@ -88,36 +35,260 @@ const PARAMETERS_SCHEMA = {
88
35
  provider: {
89
36
  type: "string",
90
37
  enum: ["serper", "tavily", "querit", "exa", "perplexity", "you", "searxng", "auto"],
91
- description: "Force a specific provider, or 'auto' for smart routing (default: auto)",
38
+ description: "Force a provider, or use auto routing (default: auto)",
92
39
  },
93
40
  count: { type: "number", description: "Number of results (default: 5)" },
94
41
  depth: {
95
42
  type: "string",
96
43
  enum: ["normal", "deep", "deep-reasoning"],
97
- description: "Exa search depth: 'deep' synthesizes across sources (4-12s), 'deep-reasoning' for complex cross-reference analysis (12-50s). When provider is auto, depth may be auto-selected based on query complexity.",
44
+ description: "Exa depth when using Exa or when auto-routing chooses Exa.",
98
45
  },
99
46
  time_range: {
100
47
  type: "string",
101
48
  enum: ["day", "week", "month", "year"],
102
- description: "Filter results by recency. Applies to Serper (as tbs), Perplexity (as search_recency_filter), Tavily/You.com (as freshness). Useful for news and current events.",
49
+ description: "Recency filter where supported.",
103
50
  },
104
51
  include_domains: {
105
52
  type: "array",
106
53
  items: { type: "string" },
107
- description: "Only include results from these domains (e.g. ['arxiv.org', 'github.com']). Supported by Tavily and Exa.",
54
+ description: "Only include results from these domains (Tavily, Exa, Querit where supported).",
108
55
  },
109
56
  exclude_domains: {
110
57
  type: "array",
111
58
  items: { type: "string" },
112
- description: "Exclude results from these domains (e.g. ['reddit.com', 'pinterest.com']). Supported by Tavily and Exa.",
59
+ description: "Exclude results from these domains (Tavily, Exa, Querit where supported).",
113
60
  },
114
61
  },
115
62
  };
116
63
 
117
- export default function (api: any) {
118
- // Bridge OpenClaw config fields to env vars expected by search.py
119
- const configEnv: Record<string, string> = {};
120
- const pluginConfig: Record<string, string> = (api.pluginConfig ?? {}) as Record<string, string>;
64
+ type Json = Record<string, any>;
65
+ type ProviderName = "serper" | "tavily" | "querit" | "exa" | "perplexity" | "you" | "searxng";
66
+ type ToolParams = {
67
+ query: string;
68
+ provider?: ProviderName | "auto";
69
+ count?: number;
70
+ depth?: "normal" | "deep" | "deep-reasoning";
71
+ time_range?: "day" | "week" | "month" | "year";
72
+ include_domains?: string[];
73
+ exclude_domains?: string[];
74
+ };
75
+
76
+ type SearchResult = {
77
+ title: string;
78
+ url: string;
79
+ snippet: string;
80
+ score?: number;
81
+ [key: string]: any;
82
+ };
83
+
84
+ type SearchResponse = {
85
+ provider: string;
86
+ query: string;
87
+ results: SearchResult[];
88
+ images?: string[];
89
+ answer?: string;
90
+ metadata?: Json;
91
+ [key: string]: any;
92
+ };
93
+
94
+ class ProviderConfigError extends Error {}
95
+ class ProviderRequestError extends Error {
96
+ statusCode?: number;
97
+ transient: boolean;
98
+ constructor(message: string, statusCode?: number, transient = false) {
99
+ super(message);
100
+ this.name = "ProviderRequestError";
101
+ this.statusCode = statusCode;
102
+ this.transient = transient;
103
+ }
104
+ }
105
+
106
+ const SENSITIVE_PATTERNS: RegExp[] = [
107
+ /\b(?:sk|pk|rk|api|tok)_[A-Za-z0-9\-_]{10,}\b/g,
108
+ /\bBearer\s+[A-Za-z0-9\-._~+/]+=*\b/gi,
109
+ /\b(?:key|token|secret|password|api[_-]?key)\s*[:=]\s*[^\s,"'}]+/gi,
110
+ /([?&](?:api[_-]?key|key|token|access[_-]?token|auth|authorization)=)([^&#\s]+)/gi,
111
+ /\b[A-Za-z0-9_-]{24,}\.[A-Za-z0-9_-]{16,}\.[A-Za-z0-9_-]{16,}\b/g,
112
+ ];
113
+
114
+ function sanitizeOutput(input: any): any {
115
+ if (typeof input === "string") {
116
+ let out = input;
117
+ for (const pattern of SENSITIVE_PATTERNS) {
118
+ out = out.replace(pattern, (_m, p1) => (p1 ? `${p1}[REDACTED]` : "[REDACTED]"));
119
+ }
120
+ return out;
121
+ }
122
+ if (Array.isArray(input)) return input.map((v) => sanitizeOutput(v));
123
+ if (input && typeof input === "object") {
124
+ const result: any = {};
125
+ for (const [k, v] of Object.entries(input)) {
126
+ if (/(?:api[_-]?key|token|secret|password|authorization)/i.test(k)) {
127
+ result[k] = "[REDACTED]";
128
+ } else {
129
+ result[k] = sanitizeOutput(v);
130
+ }
131
+ }
132
+ return result;
133
+ }
134
+ return input;
135
+ }
136
+
137
+ function ensureDir(dir: string): void {
138
+ fs.mkdirSync(dir, { recursive: true });
139
+ }
140
+
141
+ function sleep(ms: number): Promise<void> {
142
+ return new Promise((resolve) => setTimeout(resolve, ms));
143
+ }
144
+
145
+ function readJsonFile(file: string, fallback: any): any {
146
+ try {
147
+ return JSON.parse(fs.readFileSync(file, "utf8"));
148
+ } catch {
149
+ return fallback;
150
+ }
151
+ }
152
+
153
+ function writeJsonFile(file: string, value: any): void {
154
+ ensureDir(path.dirname(file));
155
+ fs.writeFileSync(file, JSON.stringify(value, null, 2), "utf8");
156
+ }
157
+
158
+ function sha256(input: string): string {
159
+ return crypto.createHash("sha256").update(input).digest("hex");
160
+ }
161
+
162
+ function buildCacheKey(query: string, provider: string, maxResults: number, params?: Json): string {
163
+ return sha256(JSON.stringify({ query, provider, maxResults, ...(params || {}) }, Object.keys({ query, provider, maxResults, ...(params || {}) }).sort())).slice(0, 32);
164
+ }
165
+
166
+ function getCachePath(cacheKey: string): string {
167
+ return path.join(CACHE_DIR, `${cacheKey}.json`);
168
+ }
169
+
170
+ function cacheGet(query: string, provider: string, maxResults: number, ttl: number, params?: Json): any | null {
171
+ const key = buildCacheKey(query, provider, maxResults, params);
172
+ const file = getCachePath(key);
173
+ try {
174
+ const cached = JSON.parse(fs.readFileSync(file, "utf8"));
175
+ const ts = Number(cached._cache_timestamp || 0);
176
+ if (!ts || Date.now() / 1000 - ts > ttl) {
177
+ try { fs.unlinkSync(file); } catch {}
178
+ return null;
179
+ }
180
+ return cached;
181
+ } catch {
182
+ try { fs.unlinkSync(file); } catch {}
183
+ return null;
184
+ }
185
+ }
186
+
187
+ function cachePut(query: string, provider: string, maxResults: number, result: any, params?: Json): void {
188
+ ensureDir(CACHE_DIR);
189
+ const key = buildCacheKey(query, provider, maxResults, params);
190
+ const file = getCachePath(key);
191
+ const payload = {
192
+ ...result,
193
+ _cache_timestamp: Math.floor(Date.now() / 1000),
194
+ _cache_key: key,
195
+ _cache_query: query,
196
+ _cache_provider: provider,
197
+ _cache_max_results: maxResults,
198
+ _cache_params: params || {},
199
+ };
200
+ writeJsonFile(file, payload);
201
+ }
202
+
203
+ function loadProviderHealth(): Json {
204
+ return readJsonFile(PROVIDER_HEALTH_FILE, {});
205
+ }
206
+
207
+ function saveProviderHealth(state: Json): void {
208
+ writeJsonFile(PROVIDER_HEALTH_FILE, state);
209
+ }
210
+
211
+ function providerInCooldown(provider: string): { inCooldown: boolean; remaining: number } {
212
+ const state = loadProviderHealth();
213
+ const cooldownUntil = Number(state?.[provider]?.cooldown_until || 0);
214
+ const remaining = cooldownUntil - Math.floor(Date.now() / 1000);
215
+ return { inCooldown: remaining > 0, remaining: Math.max(0, remaining) };
216
+ }
217
+
218
+ function markProviderFailure(provider: string, message: string): Json {
219
+ const state = loadProviderHealth();
220
+ const now = Math.floor(Date.now() / 1000);
221
+ const failCount = Number(state?.[provider]?.failure_count || 0) + 1;
222
+ const cooldownSeconds = COOLDOWN_STEPS_SECONDS[Math.min(failCount - 1, COOLDOWN_STEPS_SECONDS.length - 1)];
223
+ state[provider] = {
224
+ failure_count: failCount,
225
+ cooldown_until: now + cooldownSeconds,
226
+ cooldown_seconds: cooldownSeconds,
227
+ last_error: sanitizeOutput(message),
228
+ last_failure_at: now,
229
+ };
230
+ saveProviderHealth(state);
231
+ return state[provider];
232
+ }
233
+
234
+ function resetProviderHealth(provider: string): void {
235
+ const state = loadProviderHealth();
236
+ if (state[provider]) {
237
+ delete state[provider];
238
+ saveProviderHealth(state);
239
+ }
240
+ }
241
+
242
+ function normalizeResultUrl(url: string): string {
243
+ try {
244
+ const u = new URL(url.trim());
245
+ const host = u.hostname.replace(/^www\./i, "").toLowerCase();
246
+ const pathname = u.pathname.replace(/\/$/, "");
247
+ return `${host}${pathname}`;
248
+ } catch {
249
+ return url.trim().toLowerCase();
250
+ }
251
+ }
252
+
253
+ function deduplicateResultsAcrossProviders(resultsByProvider: Array<[string, SearchResponse]>, maxResults: number): { results: SearchResult[]; dedupCount: number } {
254
+ const deduped: SearchResult[] = [];
255
+ const seen = new Set<string>();
256
+ let dedupCount = 0;
257
+ for (const [provider, data] of resultsByProvider) {
258
+ for (const item of data.results || []) {
259
+ const norm = normalizeResultUrl(item.url || "");
260
+ if (norm && seen.has(norm)) {
261
+ dedupCount += 1;
262
+ continue;
263
+ }
264
+ if (norm) seen.add(norm);
265
+ deduped.push({ ...item, provider: item.provider || provider });
266
+ if (deduped.length >= maxResults) return { results: deduped, dedupCount };
267
+ }
268
+ }
269
+ return { results: deduped, dedupCount };
270
+ }
271
+
272
+ function loadEnvFile(envPath: string): Record<string, string> {
273
+ if (!fs.existsSync(envPath)) return {};
274
+ const env: Record<string, string> = {};
275
+ for (const line of fs.readFileSync(envPath, "utf8").split(/\r?\n/)) {
276
+ const trimmed = line.trim();
277
+ if (!trimmed || trimmed.startsWith("#")) continue;
278
+ const stripped = trimmed.startsWith("export ") ? trimmed.slice(7) : trimmed;
279
+ const idx = stripped.indexOf("=");
280
+ if (idx < 0) continue;
281
+ const key = stripped.slice(0, idx).trim();
282
+ const value = stripped.slice(idx + 1).trim().replace(/^['"]|['"]$/g, "");
283
+ if (key) env[key] = value;
284
+ }
285
+ return env;
286
+ }
287
+
288
+ function getRuntimeEnv(pluginConfig: Record<string, string>): Record<string, string> {
289
+ const envFiles = [path.join(PLUGIN_DIR, ".env")];
290
+ const fileEnv = Object.assign({}, ...envFiles.map(loadEnvFile));
291
+ const mapped: Record<string, string> = {};
121
292
  const configKeyMap: Record<string, string> = {
122
293
  serperApiKey: "SERPER_API_KEY",
123
294
  tavilyApiKey: "TAVILY_API_KEY",
@@ -127,85 +298,629 @@ export default function (api: any) {
127
298
  kilocodeApiKey: "KILOCODE_API_KEY",
128
299
  youApiKey: "YOU_API_KEY",
129
300
  searxngInstanceUrl: "SEARXNG_INSTANCE_URL",
301
+ searxngAllowPrivate: "SEARXNG_ALLOW_PRIVATE",
130
302
  };
131
303
  for (const [cfgKey, envKey] of Object.entries(configKeyMap)) {
132
- const val = pluginConfig[cfgKey];
133
- if (val && typeof val === "string") configEnv[envKey] = val;
304
+ const val = pluginConfig?.[cfgKey];
305
+ if (val && typeof val === "string") mapped[envKey] = val;
134
306
  }
307
+ return { ...fileEnv, ...Object.fromEntries(Object.entries(process.env).filter(([, v]) => typeof v === "string") as any), ...mapped };
308
+ }
309
+
310
+ function getApiKey(provider: ProviderName, env: Record<string, string>): string | undefined {
311
+ const keyMap: Record<ProviderName, string | undefined> = {
312
+ serper: env.SERPER_API_KEY,
313
+ tavily: env.TAVILY_API_KEY,
314
+ querit: env.QUERIT_API_KEY,
315
+ exa: env.EXA_API_KEY,
316
+ perplexity: env.KILOCODE_API_KEY || env.PERPLEXITY_API_KEY,
317
+ you: env.YOU_API_KEY || env.YOUCOM_API_KEY,
318
+ searxng: env.SEARXNG_INSTANCE_URL || env.SEARXNG_URL,
319
+ };
320
+ return keyMap[provider];
321
+ }
322
+
323
+ function validateApiKey(provider: ProviderName, env: Record<string, string>): string {
324
+ const key = getApiKey(provider, env);
325
+ if (!key) {
326
+ if (provider === "searxng") throw new ProviderConfigError("Missing SearXNG instance URL (SEARXNG_INSTANCE_URL or pluginConfig.searxngInstanceUrl)");
327
+ throw new ProviderConfigError(`Missing API key for ${provider}`);
328
+ }
329
+ return key;
330
+ }
331
+
332
+ function toTimeRange(value?: string): string | undefined {
333
+ return value && ["day", "week", "month", "year"].includes(value) ? value : undefined;
334
+ }
335
+
336
+ function titleFromUrl(url: string): string {
337
+ try {
338
+ const u = new URL(url);
339
+ const domain = u.hostname.replace(/^www\./, "");
340
+ const segs = u.pathname.split("/").filter(Boolean);
341
+ const last = segs.length ? segs[segs.length - 1].replace(/[-_]/g, " ").replace(/\.\w{2,4}$/, "") : "";
342
+ return last ? `${domain} — ${last}` : domain;
343
+ } catch {
344
+ return url.slice(0, 80);
345
+ }
346
+ }
347
+
348
+ async function httpJson(url: string, init: RequestInit, timeoutMs = 30000): Promise<any> {
349
+ const controller = new AbortController();
350
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
351
+ try {
352
+ const res = await fetch(url, {
353
+ ...init,
354
+ headers: {
355
+ "User-Agent": "ClawdBot-WebSearchPlus/3.0",
356
+ ...(init.headers || {}),
357
+ },
358
+ signal: controller.signal,
359
+ });
360
+ const text = await res.text();
361
+ let data: any = null;
362
+ try { data = text ? JSON.parse(text) : {}; } catch {}
363
+ if (!res.ok) {
364
+ const detail = data?.error || data?.message || text || res.statusText;
365
+ throw new ProviderRequestError(`${detail} (HTTP ${res.status})`, res.status, TRANSIENT_HTTP_CODES.has(res.status));
366
+ }
367
+ return data ?? {};
368
+ } catch (error: any) {
369
+ if (error?.name === "AbortError") throw new ProviderRequestError(`Request timed out after ${timeoutMs}ms`, undefined, true);
370
+ if (error instanceof ProviderRequestError) throw error;
371
+ throw new ProviderRequestError(`Network error: ${String(error?.message || error)}`, undefined, true);
372
+ } finally {
373
+ clearTimeout(timer);
374
+ }
375
+ }
376
+
377
+ async function validateSearxngUrl(input: string, env: Record<string, string>): Promise<string> {
378
+ let u: URL;
379
+ try {
380
+ u = new URL(input);
381
+ } catch {
382
+ throw new ProviderConfigError("Invalid SearXNG URL");
383
+ }
384
+ if (!["http:", "https:"].includes(u.protocol)) throw new ProviderConfigError(`SearXNG URL must use http or https, got ${u.protocol}`);
385
+ if (!u.hostname) throw new ProviderConfigError("SearXNG URL must include a hostname");
386
+
387
+ const blockedHosts = new Set(["169.254.169.254", "metadata.google.internal", "metadata.internal"]);
388
+ if (blockedHosts.has(u.hostname)) throw new ProviderConfigError("SearXNG URL blocked: metadata endpoint");
389
+
390
+ // WARNING: Setting SEARXNG_ALLOW_PRIVATE=true disables SSRF protection for SearXNG.
391
+ // Only enable on fully trusted private networks.
392
+ const allowPrivate = ["1", "true", "yes"].includes(String(env.SEARXNG_ALLOW_PRIVATE || "").trim().toLowerCase());
393
+ if (!allowPrivate) {
394
+ const records = await dns.lookup(u.hostname, { all: true, verbatim: true }).catch(() => [] as dns.LookupAddress[]);
395
+ if (!records.length && net.isIP(u.hostname)) records.push({ address: u.hostname, family: net.isIP(u.hostname) as 4 | 6 });
396
+ if (!records.length) throw new ProviderConfigError(`SearXNG URL blocked: cannot resolve hostname ${u.hostname}`);
397
+ for (const record of records) {
398
+ const ip = record.address;
399
+ const lower = ip.toLowerCase();
400
+ const isIpv4Private = /^10\./.test(ip) || /^127\./.test(ip) || /^169\.254\./.test(ip) || /^192\.168\./.test(ip) || /^172\.(1[6-9]|2\d|3[0-1])\./.test(ip) || ip === "0.0.0.0";
401
+ const isIpv6Private = lower === "::1" || lower === "::" || lower.startsWith("fc") || lower.startsWith("fd") || lower.startsWith("fe80:");
402
+ if (isIpv4Private || isIpv6Private) {
403
+ throw new ProviderConfigError(`SearXNG URL blocked: ${u.hostname} resolves to private/internal IP ${ip}`);
404
+ }
405
+ }
406
+ }
407
+ return u.toString().replace(/\/$/, "");
408
+ }
409
+
410
+ const SHOPPING_SIGNALS: Record<string, number> = {
411
+ "\\bhow much\\b": 4.0, "\\bprice of\\b": 4.0, "\\bcost of\\b": 4.0, "\\bprices?\\b": 3.0,
412
+ "\\$\\d+|\\d+\\s*dollars?": 3.0, "€\\d+|\\d+\\s*euros?": 3.0, "£\\d+|\\d+\\s*pounds?": 3.0,
413
+ "\\bpreis(e)?\\b": 3.5, "\\bkosten\\b": 3.0, "\\bwieviel\\b": 3.5, "\\bwie viel\\b": 3.5, "\\bwas kostet\\b": 4.0,
414
+ "\\bbuy\\b": 3.5, "\\bpurchase\\b": 3.5, "\\border\\b(?!\\s+by)": 3.0, "\\bshopping\\b": 3.5, "\\bshop for\\b": 3.5,
415
+ "\\bwhere to (buy|get|purchase)\\b": 4.0, "\\bkaufen\\b": 3.5, "\\bbestellen\\b": 3.5, "\\bwo kaufen\\b": 4.0,
416
+ "\\bhändler\\b": 3.0, "\\bshop\\b": 2.5, "\\bdeal(s)?\\b": 3.0, "\\bdiscount(s)?\\b": 3.0, "\\bsale\\b": 2.5,
417
+ "\\bcheap(er|est)?\\b": 3.0, "\\baffordable\\b": 2.5, "\\bbudget\\b": 2.5, "\\bbest price\\b": 3.5,
418
+ "\\bcompare prices\\b": 3.5, "\\bcoupon\\b": 3.0, "\\bgünstig(er|ste)?\\b": 3.0, "\\bbillig(er|ste)?\\b": 3.0,
419
+ "\\bangebot(e)?\\b": 3.0, "\\brabatt\\b": 3.0, "\\baktion\\b": 2.5, "\\bschnäppchen\\b": 3.0,
420
+ "\\bvs\\.?\\b": 2.0, "\\bversus\\b": 2.0, "\\bor\\b.*\\bwhich\\b": 2.0, "\\bspecs?\\b": 2.5,
421
+ "\\bspecifications?\\b": 2.5, "\\breview(s)?\\b": 2.0, "\\brating(s)?\\b": 2.0, "\\bunboxing\\b": 2.5,
422
+ "\\btest\\b": 2.5, "\\bbewertung(en)?\\b": 2.5, "\\btechnische daten\\b": 3.0, "\\bspezifikationen\\b": 2.5,
423
+ };
424
+ const RESEARCH_SIGNALS: Record<string, number> = {
425
+ "\\bhow does\\b": 4.0, "\\bhow do\\b": 3.5, "\\bwhy does\\b": 4.0, "\\bwhy do\\b": 3.5, "\\bwhy is\\b": 3.5,
426
+ "\\bexplain\\b": 4.0, "\\bexplanation\\b": 4.0, "\\bwhat is\\b": 3.0, "\\bwhat are\\b": 3.0, "\\bdefine\\b": 3.5,
427
+ "\\bdefinition of\\b": 3.5, "\\bmeaning of\\b": 3.0, "\\banalyze\\b": 3.5, "\\banalysis\\b": 3.5,
428
+ "\\bcompare\\b(?!\\s*prices?)": 3.0, "\\bcomparison\\b": 3.0, "\\bstatus of\\b": 3.5, "\\bstatus\\b": 2.5,
429
+ "\\bwhat happened with\\b": 4.0, "\\bpros and cons\\b": 4.0, "\\badvantages?\\b": 3.0, "\\bdisadvantages?\\b": 3.0,
430
+ "\\bbenefits?\\b": 2.5, "\\bdrawbacks?\\b": 3.0, "\\bdifference between\\b": 3.5, "\\bunderstand\\b": 3.0,
431
+ "\\blearn(ing)?\\b": 2.5, "\\btutorial\\b": 3.0, "\\bguide\\b": 2.5, "\\bhow to\\b": 2.0, "\\bstep by step\\b": 3.0,
432
+ "\\bin[- ]depth\\b": 3.0, "\\bdetailed\\b": 2.5, "\\bcomprehensive\\b": 3.0, "\\bthorough\\b": 2.5,
433
+ "\\bdeep dive\\b": 3.5, "\\boverall\\b": 2.0, "\\bsummary\\b": 2.0, "\\bstudy\\b": 2.5, "\\bresearch shows\\b": 3.5,
434
+ "\\baccording to\\b": 2.5, "\\bevidence\\b": 3.0, "\\bscientific\\b": 3.0, "\\bhistory of\\b": 3.0,
435
+ "\\bbackground\\b": 2.5, "\\bcontext\\b": 2.5, "\\bimplications?\\b": 3.0, "\\bwie funktioniert\\b": 4.0,
436
+ "\\bwarum\\b": 3.5, "\\berklär(en|ung)?\\b": 4.0, "\\bwas ist\\b": 3.0, "\\bwas sind\\b": 3.0, "\\bbedeutung\\b": 3.0,
437
+ "\\banalyse\\b": 3.5, "\\bvergleich(en)?\\b": 3.0, "\\bvor- und nachteile\\b": 4.0, "\\bvorteile\\b": 3.0,
438
+ "\\bnachteile\\b": 3.0, "\\bunterschied(e)?\\b": 3.5, "\\bverstehen\\b": 3.0, "\\blernen\\b": 2.5,
439
+ "\\banleitung\\b": 3.0, "\\bübersicht\\b": 2.5, "\\bhintergrund\\b": 2.5, "\\bzusammenfassung\\b": 2.5,
440
+ };
441
+ const DISCOVERY_SIGNALS: Record<string, number> = {
442
+ "\\bsimilar to\\b": 5.0, "\\blike\\s+\\w+\\.com": 4.5, "\\balternatives? to\\b": 5.0, "\\bcompetitors? (of|to)\\b": 4.5,
443
+ "\\bcompeting with\\b": 4.0, "\\brivals? (of|to)\\b": 4.0, "\\binstead of\\b": 3.0, "\\breplacement for\\b": 3.5,
444
+ "\\bcompanies (like|that|doing|building)\\b": 4.5, "\\bstartups? (like|that|doing|building)\\b": 4.5, "\\bwho else\\b": 4.0,
445
+ "\\bother (companies|startups|tools|apps)\\b": 3.5, "\\bfind (companies|startups|tools|examples?)\\b": 4.5,
446
+ "\\bevents? in\\b": 4.0, "\\bthings to do in\\b": 4.5, "\\bseries [a-d]\\b": 4.0, "\\byc\\b|y combinator": 4.0,
447
+ "\\bfund(ed|ing|raise)\\b": 3.5, "\\bventure\\b": 3.0, "\\bvaluation\\b": 3.0, "\\bresearch papers? (on|about)\\b": 4.0,
448
+ "\\barxiv\\b": 4.5, "\\bgithub (projects?|repos?)\\b": 4.5, "\\bopen source\\b.*\\bprojects?\\b": 4.0,
449
+ "\\btweets? (about|on)\\b": 3.5, "\\bblogs? (about|on|like)\\b": 3.0, "https?://[^\\s]+": 5.0, "\\b\\w+\\.(com|org|io|ai|co|dev)\\b": 3.5,
450
+ };
451
+ const LOCAL_NEWS_SIGNALS: Record<string, number> = {
452
+ "\\bnear me\\b": 4.0, "\\bnearby\\b": 3.5, "\\blocal\\b": 3.0, "\\bin (my )?(city|area|town|neighborhood)\\b": 3.5,
453
+ "\\brestaurants?\\b": 2.5, "\\bhotels?\\b": 2.5, "\\bcafes?\\b": 2.5, "\\bstores?\\b": 2.0, "\\bdirections? to\\b": 3.5,
454
+ "\\bmap of\\b": 3.0, "\\bphone number\\b": 3.0, "\\baddress of\\b": 3.0, "\\bopen(ing)? hours\\b": 3.0,
455
+ "\\bweather\\b": 4.0, "\\bforecast\\b": 3.5, "\\btemperature\\b": 3.0, "\\btime in\\b": 3.0,
456
+ "\\blatest\\b": 2.5, "\\brecent\\b": 2.5, "\\btoday\\b": 2.5, "\\bbreaking\\b": 3.5, "\\bnews\\b": 2.5,
457
+ "\\bheadlines?\\b": 3.0, "\\b202[4-9]\\b": 2.0, "\\blast (week|month|year)\\b": 2.0, "\\bin der nähe\\b": 4.0,
458
+ "\\bin meiner nähe\\b": 4.0, "\\böffnungszeiten\\b": 3.0, "\\badresse von\\b": 3.0, "\\bweg(beschreibung)? nach\\b": 3.5,
459
+ "\\bheute\\b": 2.5, "\\bmorgen\\b": 2.0, "\\baktuell\\b": 2.5, "\\bnachrichten\\b": 3.0,
460
+ };
461
+ const RAG_SIGNALS: Record<string, number> = {
462
+ "\\brag\\b": 4.5, "\\bcontext for\\b": 4.0, "\\bsummarize\\b": 3.5, "\\bbrief(ly)?\\b": 3.0, "\\bquick overview\\b": 3.5,
463
+ "\\btl;?dr\\b": 4.0, "\\bkey (points|facts|info)\\b": 3.5, "\\bmain (points|takeaways)\\b": 3.5,
464
+ "\\b(web|online)\\s+and\\s+news\\b": 4.0, "\\ball sources\\b": 3.5, "\\bcomprehensive (search|overview)\\b": 3.5,
465
+ "\\blatest\\s+(news|updates)\\b": 3.0, "\\bcurrent (events|situation|status)\\b": 3.5, "\\bright now\\b": 3.0,
466
+ "\\bas of today\\b": 3.5, "\\bup.to.date\\b": 3.5, "\\breal.time\\b": 4.0, "\\blive\\b": 2.5,
467
+ "\\bwhat'?s happening with\\b": 3.5, "\\bwhat'?s the latest\\b": 4.0, "\\bupdates?\\s+on\\b": 3.5, "\\bstatus of\\b": 3.0,
468
+ "\\bsituation (in|with|around)\\b": 3.5,
469
+ };
470
+ const DIRECT_ANSWER_SIGNALS: Record<string, number> = {
471
+ "\\bwhat is\\b": 3.0, "\\bwhat are\\b": 2.5, "\\bcurrent status\\b": 4.0, "\\bstatus of\\b": 3.5, "\\bstatus\\b": 2.5,
472
+ "\\bwhat happened with\\b": 4.0, "\\bwhat'?s happening with\\b": 4.0, "\\bas of (today|now)\\b": 4.0, "\\bthis weekend\\b": 3.5,
473
+ "\\bevents? in\\b": 3.5, "\\bthings to do in\\b": 4.0, "\\bnear me\\b": 3.0, "\\bcan you (tell me|summarize|explain)\\b": 3.5,
474
+ "\\bwann\\b": 3.0, "\\bwer\\b": 3.0, "\\bwo\\b": 2.5, "\\bwie viele\\b": 3.0,
475
+ };
476
+ const PRIVACY_SIGNALS: Record<string, number> = {
477
+ "\\bprivate(ly)?\\b": 4.0, "\\banonymous(ly)?\\b": 4.0, "\\bwithout tracking\\b": 4.5, "\\bno track(ing)?\\b": 4.5,
478
+ "\\bprivacy\\b": 3.5, "\\bprivacy.?focused\\b": 4.5, "\\bprivacy.?first\\b": 4.5, "\\bduckduckgo alternative\\b": 4.5,
479
+ "\\bprivate search\\b": 5.0, "\\bprivat\\b": 4.0, "\\banonym\\b": 4.0, "\\bohne tracking\\b": 4.5,
480
+ "\\bdatenschutz\\b": 4.0, "\\baggregate results?\\b": 4.0, "\\bmultiple sources?\\b": 4.0, "\\bdiverse (results|perspectives|sources)\\b": 4.0,
481
+ "\\bfrom (all|multiple|different) (engines?|sources?)\\b": 4.5, "\\bmeta.?search\\b": 5.0, "\\ball engines?\\b": 4.0,
482
+ "\\bverschiedene quellen\\b": 4.0, "\\baus mehreren quellen\\b": 4.0, "\\balle suchmaschinen\\b": 4.5,
483
+ "\\bfree search\\b": 3.5, "\\bno api cost\\b": 4.0, "\\bself.?hosted search\\b": 5.0, "\\bzero cost\\b": 3.5,
484
+ "\\bbudget\\b(?!\\s*(laptop|phone|option))\\b": 2.5, "\\bkostenlos(e)?\\s+suche\\b": 3.5, "\\bkeine api.?kosten\\b": 4.0,
485
+ };
486
+ const EXA_DEEP_SIGNALS: Record<string, number> = {
487
+ "\\bsynthesi[sz]e\\b": 5.0, "\\bdeep research\\b": 5.0, "\\bcomprehensive (analysis|report|overview|survey)\\b": 4.5,
488
+ "\\bacross (multiple|many|several) (sources|documents|papers)\\b": 4.5, "\\baggregat(e|ing) (information|data|results)\\b": 4.0,
489
+ "\\bcross.?referenc": 4.5, "\\bsec filings?\\b": 4.5, "\\bannual reports?\\b": 4.0, "\\bearnings (call|report|transcript)\\b": 4.5,
490
+ "\\bfinancial analysis\\b": 4.0, "\\bliterature (review|survey)\\b": 5.0, "\\bacademic literature\\b": 4.5,
491
+ "\\bstate of the (art|field|industry)\\b": 4.0, "\\bcompile (a |the )?(report|findings|results)\\b": 4.5,
492
+ "\\bsummariz(e|ing) (research|papers|studies)\\b": 4.0, "\\bmultiple documents?\\b": 4.0, "\\bdossier\\b": 4.5,
493
+ "\\bdue diligence\\b": 4.5, "\\bstructured (output|data|report)\\b": 4.0, "\\bmarket research\\b": 4.0,
494
+ "\\bindustry (report|analysis|overview)\\b": 4.0, "\\bresearch (on|about|into)\\b": 4.0, "\\bwhitepaper\\b": 4.5,
495
+ "\\btechnical report\\b": 4.0, "\\bsurvey of\\b": 4.5, "\\bmeta.?analysis\\b": 5.0, "\\bsystematic review\\b": 5.0,
496
+ "\\bcase study\\b": 3.5, "\\bbenchmark(s|ing)?\\b": 3.5, "\\btiefenrecherche\\b": 5.0, "\\bumfassende (analyse|übersicht|recherche)\\b": 4.5,
497
+ "\\baus mehreren quellen zusammenfassen\\b": 4.5, "\\bmarktforschung\\b": 4.0,
498
+ };
499
+ const EXA_DEEP_REASONING_SIGNALS: Record<string, number> = {
500
+ "\\bdeep.?reasoning\\b": 6.0, "\\bcomplex (analysis|reasoning|research)\\b": 4.5, "\\bcontradictions?\\b": 4.5,
501
+ "\\breconcil(e|ing)\\b": 5.0, "\\bcritical(ly)? analyz": 4.5, "\\bweigh(ing)? (the )?evidence\\b": 4.5,
502
+ "\\bcompeting (claims|theories|perspectives)\\b": 4.5, "\\bcomplex financial\\b": 4.5, "\\bregulatory (analysis|compliance|landscape)\\b": 4.5,
503
+ "\\blegal analysis\\b": 4.5, "\\bcomprehensive (due diligence|investigation)\\b": 5.0, "\\bpatent (landscape|analysis|search)\\b": 4.5,
504
+ "\\bmarket intelligence\\b": 4.5, "\\bcompetitive (intelligence|landscape)\\b": 4.5, "\\btrade.?offs?\\b": 4.0,
505
+ "\\bpros and cons of\\b": 4.0, "\\bshould I (use|choose|pick)\\b": 3.5, "\\bwhich is better\\b": 4.0,
506
+ "\\bkomplexe analyse\\b": 4.5, "\\bwidersprüche\\b": 4.5, "\\bquellen abwägen\\b": 4.5, "\\brechtliche analyse\\b": 4.5,
507
+ "\\bvergleich(e|en)?\\b": 3.5,
508
+ };
509
+ const BRAND_PATTERNS = [
510
+ "\\b(apple|iphone|ipad|macbook|airpods?)\\b", "\\b(samsung|galaxy)\\b", "\\b(google|pixel)\\b", "\\b(microsoft|surface|xbox)\\b",
511
+ "\\b(sony|playstation)\\b", "\\b(nvidia|geforce|rtx)\\b", "\\b(amd|ryzen|radeon)\\b", "\\b(intel|core i[3579])\\b",
512
+ "\\b(dell|hp|lenovo|asus|acer)\\b", "\\b(lg|tcl|hisense)\\b", "\\b(laptop|phone|tablet|tv|monitor|headphones?|earbuds?)\\b",
513
+ "\\b(camera|lens|drone)\\b", "\\b(watch|smartwatch|fitbit|garmin)\\b", "\\b(router|modem|wifi)\\b", "\\b(keyboard|mouse|gaming)\\b",
514
+ ];
515
+
516
+ class QueryAnalyzer {
517
+ calculateSignalScore(query: string, signals: Record<string, number>) {
518
+ const q = query.toLowerCase();
519
+ const matches: any[] = [];
520
+ let total = 0;
521
+ for (const [pattern, weight] of Object.entries(signals)) {
522
+ const regex = new RegExp(pattern, "i");
523
+ const found = q.match(regex);
524
+ if (found) {
525
+ matches.push({ pattern, matched: found[0], weight });
526
+ total += weight;
527
+ }
528
+ }
529
+ return { total, matches };
530
+ }
531
+ detectProductBrandCombo(query: string): number {
532
+ const hasBrand = BRAND_PATTERNS.some((p) => new RegExp(p, "i").test(query));
533
+ const productIndicators = ["\\b(buy|price|specs?|review|vs|compare)\\b", "\\b(pro|max|plus|mini|ultra|lite)\\b", "\\b\\d+\\s*(gb|tb|inch|mm|hz)\\b"];
534
+ const hasProduct = productIndicators.some((p) => new RegExp(p, "i").test(query));
535
+ if (hasBrand && hasProduct) return 3;
536
+ if (hasBrand) return 1.5;
537
+ return 0;
538
+ }
539
+ detectUrl(query: string): string | null {
540
+ const found = query.match(/https?:\/\/[^\s]+|\b\w+\.(com|org|io|ai|co|dev|net|app)\b/i);
541
+ return found?.[0] || null;
542
+ }
543
+ assessQueryComplexity(query: string) {
544
+ const words = query.trim().split(/\s+/).filter(Boolean);
545
+ const wordCount = words.length;
546
+ const questionWords = (query.match(/\b(what|why|how|when|where|which|who|whose|whom)\b/gi) || []).length;
547
+ const clauseMarkers = (query.match(/\b(and|but|or|because|since|while|although|if|when)\b/gi) || []).length;
548
+ let complexityScore = 0;
549
+ if (wordCount > 10) complexityScore += 1.5;
550
+ if (wordCount > 20) complexityScore += 1.0;
551
+ if (questionWords > 1) complexityScore += 1.0;
552
+ if (clauseMarkers > 0) complexityScore += clauseMarkers * 0.5;
553
+ return { word_count: wordCount, question_words: questionWords, clause_markers: clauseMarkers, complexity_score: complexityScore, is_complex: complexityScore > 2 };
554
+ }
555
+ detectRecencyIntent(query: string) {
556
+ const patterns: Array<[RegExp, number]> = [
557
+ [/\b(latest|newest|recent|current)\b/i, 2.5], [/\b(today|yesterday|this week|this month)\b/i, 3],
558
+ [/\b(202[4-9]|2030)\b/i, 2], [/\b(breaking|live|just|now)\b/i, 3], [/\blast (hour|day|week|month)\b/i, 2.5],
559
+ ];
560
+ let total = 0;
561
+ for (const [regex, weight] of patterns) if (regex.test(query)) total += weight;
562
+ return { is_recency_focused: total > 2, score: total };
563
+ }
564
+ analyze(query: string) {
565
+ const shopping = this.calculateSignalScore(query, SHOPPING_SIGNALS);
566
+ const research = this.calculateSignalScore(query, RESEARCH_SIGNALS);
567
+ const discovery = this.calculateSignalScore(query, DISCOVERY_SIGNALS);
568
+ const localNews = this.calculateSignalScore(query, LOCAL_NEWS_SIGNALS);
569
+ const rag = this.calculateSignalScore(query, RAG_SIGNALS);
570
+ const privacy = this.calculateSignalScore(query, PRIVACY_SIGNALS);
571
+ const direct = this.calculateSignalScore(query, DIRECT_ANSWER_SIGNALS);
572
+ const exaDeep = this.calculateSignalScore(query, EXA_DEEP_SIGNALS);
573
+ const exaDeepReasoning = this.calculateSignalScore(query, EXA_DEEP_REASONING_SIGNALS);
574
+
575
+ const brandBonus = this.detectProductBrandCombo(query);
576
+ if (brandBonus > 0) {
577
+ shopping.total += brandBonus;
578
+ shopping.matches.push({ pattern: "product_brand_combo", matched: "brand + product detected", weight: brandBonus });
579
+ }
580
+ const detectedUrl = this.detectUrl(query);
581
+ if (detectedUrl) {
582
+ discovery.total += 5;
583
+ discovery.matches.push({ pattern: "url_detected", matched: detectedUrl, weight: 5 });
584
+ }
585
+ const complexity = this.assessQueryComplexity(query);
586
+ if (complexity.is_complex) {
587
+ research.total += complexity.complexity_score;
588
+ research.matches.push({ pattern: "query_complexity", matched: `complex query (${complexity.word_count} words)`, weight: complexity.complexity_score });
589
+ }
590
+ const recency = this.detectRecencyIntent(query);
591
+
592
+ return {
593
+ detected_url: detectedUrl,
594
+ complexity,
595
+ recency_focused: recency.is_recency_focused,
596
+ recency_score: recency.score,
597
+ exa_deep_score: exaDeep.total,
598
+ exa_deep_reasoning_score: exaDeepReasoning.total,
599
+ provider_scores: {
600
+ serper: shopping.total + localNews.total + recency.score * 0.35,
601
+ tavily: research.total + (complexity.is_complex ? 0 : complexity.complexity_score) + recency.score * 0.2,
602
+ querit: research.total * 0.65 + rag.total * 0.35 + recency.score * 0.45,
603
+ exa: discovery.total + (/(\bsimilar|alternatives?|examples?)\b/i.test(query) ? 1 : 0) + exaDeep.total * 0.5 + exaDeepReasoning.total * 0.5,
604
+ perplexity: direct.total + localNews.total * 0.4 + recency.score * 0.55,
605
+ you: rag.total + recency.score * 0.25,
606
+ searxng: privacy.total,
607
+ },
608
+ provider_matches: {
609
+ serper: [...shopping.matches, ...localNews.matches],
610
+ tavily: research.matches,
611
+ querit: research.matches,
612
+ exa: [...discovery.matches, ...exaDeep.matches, ...exaDeepReasoning.matches],
613
+ perplexity: direct.matches,
614
+ you: rag.matches,
615
+ searxng: privacy.matches,
616
+ },
617
+ };
618
+ }
619
+ route(query: string, availableProviders: ProviderName[]) {
620
+ const analysis = this.analyze(query);
621
+ const scores = analysis.provider_scores as Record<ProviderName, number>;
622
+ const available = Object.fromEntries(availableProviders.map((p) => [p, scores[p] ?? 0])) as Record<ProviderName, number>;
623
+ const providers = Object.keys(available) as ProviderName[];
624
+ if (!providers.length) {
625
+ return { provider: "serper" as ProviderName, confidence: 0, confidence_level: "low", reason: "no_available_providers", scores: {}, top_signals: [], exa_depth: "normal" };
626
+ }
627
+ const maxScore = Math.max(...providers.map((p) => available[p]));
628
+ const winners = providers.filter((p) => available[p] === maxScore);
629
+ const priority: ProviderName[] = ["tavily", "querit", "exa", "perplexity", "serper", "you", "searxng"];
630
+ const winner = priority.find((p) => winners.includes(p)) || winners[0];
631
+ const secondBest = [...providers.map((p) => available[p])].sort((a, b) => b - a)[1] || 0;
632
+ const margin = maxScore > 0 ? (maxScore - secondBest) / maxScore : 0;
633
+ const normalizedScore = Math.min(maxScore / 15, 1);
634
+ const confidence = maxScore === 0 ? 0 : Number((normalizedScore * 0.6 + margin * 0.4).toFixed(3));
635
+ let exaDepth: "normal" | "deep" | "deep-reasoning" = "normal";
636
+ if (winner === "exa") {
637
+ if ((analysis.exa_deep_reasoning_score || 0) >= 4) exaDepth = "deep-reasoning";
638
+ else if ((analysis.exa_deep_score || 0) >= 4) exaDepth = "deep";
639
+ }
640
+ return {
641
+ provider: winner,
642
+ confidence,
643
+ confidence_level: confidence >= 0.7 ? "high" : confidence >= 0.4 ? "medium" : "low",
644
+ reason: maxScore === 0 ? "no_signals_matched" : confidence >= 0.7 ? "high_confidence_match" : confidence >= 0.4 ? "moderate_confidence_match" : "low_confidence_match",
645
+ exa_depth: exaDepth,
646
+ scores: Object.fromEntries(providers.map((p) => [p, Number((available[p] || 0).toFixed(2))])),
647
+ top_signals: (analysis.provider_matches[winner] || []).sort((a: any, b: any) => b.weight - a.weight).slice(0, 5).map((s: any) => ({ matched: s.matched, weight: s.weight })),
648
+ analysis_summary: {
649
+ query_length: query.trim().split(/\s+/).filter(Boolean).length,
650
+ is_complex: analysis.complexity.is_complex,
651
+ has_url: !!analysis.detected_url,
652
+ recency_focused: analysis.recency_focused,
653
+ },
654
+ };
655
+ }
656
+ }
657
+
658
+ async function searchSerper(query: string, apiKey: string, maxResults: number, timeRange?: string): Promise<SearchResponse> {
659
+ const body: Json = { q: query, gl: "us", hl: "en", num: maxResults, autocorrect: true };
660
+ const tbsMap: Record<string, string> = { day: "qdr:d", week: "qdr:w", month: "qdr:m", year: "qdr:y" };
661
+ if (timeRange && tbsMap[timeRange]) body.tbs = tbsMap[timeRange];
662
+ const data = await httpJson("https://google.serper.dev/search", { method: "POST", headers: { "X-API-KEY": apiKey, "Content-Type": "application/json" }, body: JSON.stringify(body) });
663
+ const results = (data.organic || []).slice(0, maxResults).map((item: any, i: number) => ({ title: item.title || "", url: item.link || "", snippet: item.snippet || "", score: Number((1 - i * 0.1).toFixed(2)), date: item.date }));
664
+ const answer = data?.answerBox?.answer || data?.answerBox?.snippet || data?.knowledgeGraph?.description || results[0]?.snippet || "";
665
+ return { provider: "serper", query, results, images: [], answer, knowledge_graph: data.knowledgeGraph, related_searches: (data.relatedSearches || []).map((r: any) => r.query) };
666
+ }
667
+
668
+ async function searchTavily(query: string, apiKey: string, maxResults: number, includeDomains?: string[], excludeDomains?: string[]): Promise<SearchResponse> {
669
+ const body: Json = { api_key: apiKey, query, max_results: maxResults, search_depth: "basic", topic: "general", include_images: false, include_answer: true, include_raw_content: false };
670
+ if (includeDomains?.length) body.include_domains = includeDomains;
671
+ if (excludeDomains?.length) body.exclude_domains = excludeDomains;
672
+ const data = await httpJson("https://api.tavily.com/search", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body) });
673
+ const results = (data.results || []).slice(0, maxResults).map((item: any) => ({ title: item.title || "", url: item.url || "", snippet: item.content || "", score: Number((item.score || 0).toFixed(3)) }));
674
+ return { provider: "tavily", query, results, images: data.images || [], answer: data.answer || "" };
675
+ }
676
+
677
+ async function searchQuerit(query: string, apiKey: string, maxResults: number, timeRange?: string, includeDomains?: string[], excludeDomains?: string[]): Promise<SearchResponse> {
678
+ const timeMap: Record<string, string> = { day: "d1", week: "w1", month: "m1", year: "y1" };
679
+ const filters: Json = { languages: { include: ["en"] }, geo: { countries: { include: ["US"] } } };
680
+ if (includeDomains?.length || excludeDomains?.length) {
681
+ filters.sites = {};
682
+ if (includeDomains?.length) filters.sites.include = includeDomains;
683
+ if (excludeDomains?.length) filters.sites.exclude = excludeDomains;
684
+ }
685
+ if (timeRange && timeMap[timeRange]) filters.timeRange = { date: timeMap[timeRange] };
686
+ const body: Json = { query, count: maxResults, filters };
687
+ const data = await httpJson("https://api.querit.ai/v1/search", { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" }, body: JSON.stringify(body) });
688
+ if (data.error_msg || (data.error_code != null && ![0, 200].includes(data.error_code))) throw new ProviderRequestError(data.error_msg || `Querit request failed with error_code=${data.error_code}`);
689
+ const raw = data?.results?.result || [];
690
+ const results = raw.slice(0, maxResults).map((item: any, i: number) => ({ title: item.title || titleFromUrl(item.url || ""), url: item.url || "", snippet: item.snippet || item.page_age || "", score: Number((1 - i * 0.05).toFixed(3)), page_time: item.page_time, date: item.page_age, language: item.language }));
691
+ return { provider: "querit", query, results, images: [], answer: results[0]?.snippet || "", metadata: { search_id: data.search_id, time_range: timeRange && timeMap[timeRange] } };
692
+ }
693
+
694
+ async function searchExa(query: string, apiKey: string, maxResults: number, exaDepth: "normal" | "deep" | "deep-reasoning", includeDomains?: string[], excludeDomains?: string[]): Promise<SearchResponse> {
695
+ const isDeep = exaDepth === "deep" || exaDepth === "deep-reasoning";
696
+ const body: Json = isDeep
697
+ ? { query, numResults: maxResults, type: exaDepth, contents: { text: { maxCharacters: 5000, verbosity: "full" } } }
698
+ : { query, numResults: maxResults, type: "neural", contents: { text: { maxCharacters: 2000, verbosity: "standard" }, highlights: { numSentences: 3, highlightsPerUrl: 2 } } };
699
+ if (includeDomains?.length) body.includeDomains = includeDomains;
700
+ if (excludeDomains?.length) body.excludeDomains = excludeDomains;
701
+ const data = await httpJson("https://api.exa.ai/search", { method: "POST", headers: { "x-api-key": apiKey, "Content-Type": "application/json" }, body: JSON.stringify(body) }, isDeep ? 55000 : 30000);
702
+
703
+ if (isDeep) {
704
+ const deepOutput = data.output || {};
705
+ const synthesis = typeof deepOutput.content === "string" ? deepOutput.content : deepOutput.content ? JSON.stringify(deepOutput.content) : "";
706
+ const grounding: any[] = [];
707
+ for (const field of deepOutput.grounding || []) {
708
+ for (const cite of field.citations || []) grounding.push({ url: cite.url || "", title: cite.title || "", confidence: field.confidence, field: field.field });
709
+ }
710
+ const results: SearchResult[] = [];
711
+ if (synthesis) results.push({ title: `Exa ${exaDepth.replace(/-/g, " ")} synthesis`, url: "", snippet: synthesis, full_synthesis: synthesis, score: 1, grounding: grounding.slice(0, 10), type: "synthesis" });
712
+ for (const item of (data.results || []).slice(0, maxResults)) {
713
+ const snippet = item.text ? String(item.text).slice(0, 800) : (item.highlights || [])[0] || "";
714
+ results.push({ title: item.title || "", url: item.url || "", snippet, score: Number((item.score || 0).toFixed(3)), published_date: item.publishedDate, author: item.author, type: "source" });
715
+ }
716
+ return { provider: "exa", query, exa_depth: exaDepth, results, images: [], answer: synthesis || results[1]?.snippet || "", grounding, metadata: { synthesis_length: synthesis.length, source_count: (data.results || []).length } };
717
+ }
718
+
719
+ const results = (data.results || []).slice(0, maxResults).map((item: any) => ({ title: item.title || "", url: item.url || "", snippet: item.text ? String(item.text).slice(0, 800) : Array.isArray(item.highlights) ? item.highlights.slice(0, 2).join(" ... ") : "", score: Number((item.score || 0).toFixed(3)), published_date: item.publishedDate, author: item.author }));
720
+ return { provider: "exa", query, results, images: [], answer: results[0]?.snippet || "" };
721
+ }
722
+
723
+ async function searchPerplexity(query: string, apiKey: string, maxResults: number, timeRange?: string): Promise<SearchResponse> {
724
+ const body: Json = {
725
+ model: "perplexity/sonar-pro",
726
+ messages: [
727
+ { role: "system", content: "Answer with concise factual summary and include source URLs." },
728
+ { role: "user", content: query },
729
+ ],
730
+ temperature: 0.2,
731
+ };
732
+ if (timeRange) body.search_recency_filter = timeRange;
733
+ const data = await httpJson("https://api.kilo.ai/api/gateway/chat/completions", { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" }, body: JSON.stringify(body) });
734
+ const answer = String(data?.choices?.[0]?.message?.content || "").trim();
735
+ let citations = Array.isArray(data?.citations) ? data.citations : [];
736
+ if (!citations.length) {
737
+ const matches = answer.match(/https?:\/\/[^\s)\]}>"']+/g) || [];
738
+ citations = [...new Set(matches)];
739
+ }
740
+ const results: SearchResult[] = [];
741
+ if (answer) results.push({ title: `Perplexity Answer: ${query.slice(0, 80)}`, url: "https://www.perplexity.ai", snippet: answer.replace(/\[\d+\]/g, "").trim().slice(0, 500), score: 1.0 });
742
+ for (const [i, citation] of citations.slice(0, Math.max(0, maxResults - 1)).entries()) {
743
+ const url = typeof citation === "string" ? citation : citation?.url || "";
744
+ const title = typeof citation === "string" ? titleFromUrl(url) : citation?.title || titleFromUrl(url);
745
+ results.push({ title, url, snippet: `Source cited in Perplexity answer [citation ${i + 1}]`, score: Number((0.9 - i * 0.1).toFixed(3)) });
746
+ }
747
+ return { provider: "perplexity", query, results, images: [], answer, metadata: { model: body.model, usage: data.usage || {} } };
748
+ }
749
+
750
+ async function searchYou(query: string, apiKey: string, maxResults: number, timeRange?: string): Promise<SearchResponse> {
751
+ const url = new URL("https://ydc-index.io/v1/search");
752
+ url.searchParams.set("query", query);
753
+ url.searchParams.set("count", String(maxResults));
754
+ url.searchParams.set("safesearch", "moderate");
755
+ url.searchParams.set("country", "US");
756
+ url.searchParams.set("language", "EN");
757
+ if (timeRange) url.searchParams.set("freshness", timeRange);
758
+ const data = await httpJson(url.toString(), { method: "GET", headers: { "X-API-KEY": apiKey, Accept: "application/json" } });
759
+ const web = data?.results?.web || [];
760
+ const news = data?.results?.news || [];
761
+ const results = web.slice(0, maxResults).map((item: any, i: number) => ({ title: item.title || "", url: item.url || "", snippet: item?.snippets?.[0] || item.description || "", score: Number((1 - i * 0.05).toFixed(3)), date: item.page_age, source: "web", additional_snippets: Array.isArray(item.snippets) ? item.snippets.slice(1, 3) : undefined, thumbnail: item.thumbnail_url, favicon: item.favicon_url }));
762
+ const answer = results.slice(0, 3).map((r) => r.snippet).filter(Boolean).join(" ").slice(0, 1000);
763
+ return { provider: "you", query, results, news: news.slice(0, 5), images: [], answer, metadata: { search_uuid: data?.metadata?.search_uuid, latency: data?.metadata?.latency } };
764
+ }
765
+
766
+ async function searchSearxng(query: string, instanceUrl: string, maxResults: number, timeRange: string | undefined, env: Record<string, string>): Promise<SearchResponse> {
767
+ const base = await validateSearxngUrl(instanceUrl, env);
768
+ const url = new URL(`${base}/search`);
769
+ url.searchParams.set("q", query);
770
+ url.searchParams.set("format", "json");
771
+ url.searchParams.set("language", "en");
772
+ url.searchParams.set("safesearch", "0");
773
+ if (timeRange) url.searchParams.set("time_range", timeRange);
774
+ const data = await httpJson(url.toString(), { method: "GET", headers: { Accept: "application/json" } });
775
+ const enginesUsed = new Set<string>();
776
+ const results = (data.results || []).slice(0, maxResults).map((item: any, i: number) => {
777
+ enginesUsed.add(item.engine || "unknown");
778
+ return { title: item.title || "", url: item.url || "", snippet: item.content || "", score: Number((item.score ?? (1 - i * 0.05)).toFixed(3)), engine: item.engine || "unknown", category: item.category || "general", date: item.publishedDate };
779
+ });
780
+ const answer = Array.isArray(data.answers) && data.answers[0] ? String(data.answers[0]) : Array.isArray(data.infoboxes) && data.infoboxes[0] ? String(data.infoboxes[0].content || data.infoboxes[0].infobox || "") : results[0]?.snippet || "";
781
+ return { provider: "searxng", query, results, images: [], answer, suggestions: data.suggestions || [], corrections: data.corrections || [], metadata: { number_of_results: data.number_of_results, engines_used: [...enginesUsed], instance_url: base } };
782
+ }
783
+
784
+ async function executeWithRetry(fn: () => Promise<SearchResponse>): Promise<SearchResponse> {
785
+ let lastError: any;
786
+ for (let attempt = 0; attempt < RETRY_BACKOFF_MS.length; attempt += 1) {
787
+ try {
788
+ return await fn();
789
+ } catch (error: any) {
790
+ lastError = error;
791
+ if (!(error instanceof ProviderRequestError) || !error.transient || error.statusCode === 401 || error.statusCode === 403) break;
792
+ if (attempt < RETRY_BACKOFF_MS.length - 1) await sleep(RETRY_BACKOFF_MS[attempt]);
793
+ }
794
+ }
795
+ throw lastError;
796
+ }
797
+
798
+ export default function (api: any) {
799
+ const pluginConfig: Record<string, string> = (api.pluginConfig ?? {}) as Record<string, string>;
800
+ const runtimeEnv = getRuntimeEnv(pluginConfig);
135
801
 
136
802
  api.registerTool(
137
803
  {
138
804
  name: "web_search_plus",
139
805
  description:
140
- "Search the web using multi-provider intelligent routing (Serper/Google, Tavily/Research, Querit/Multilingual AI Search, Exa/Neural+Deep, Perplexity, You.com, SearXNG). Automatically selects the best provider based on query intent. Use for ALL web searches. Set depth='deep' for multi-source synthesis, 'deep-reasoning' for complex cross-document analysis.",
806
+ "Search the web with intelligent multi-provider routing across Serper, Tavily, Querit, Exa, Perplexity, You.com, and SearXNG. Auto-selects the best provider, caches results, retries transient failures, and falls back across providers.",
141
807
  parameters: PARAMETERS_SCHEMA,
142
- async execute(
143
- _id: string,
144
- params: {
145
- query: string;
146
- provider?: string;
147
- count?: number;
148
- depth?: string;
149
- time_range?: string;
150
- include_domains?: string[];
151
- exclude_domains?: string[];
152
- },
153
- ) {
154
- if (!fs.existsSync(scriptPath)) {
155
- return {
156
- content: [{ type: "text", text: `Search failed: script not found at ${scriptPath}` }],
157
- };
158
- }
808
+ async execute(_id: string, params: ToolParams) {
809
+ try {
810
+ const query = String(params.query || "").trim();
811
+ if (!query) return { content: [{ type: "text", text: "Search failed: query is required" }] };
159
812
 
160
- const args = [scriptPath, "--query", params.query, "--compact"];
813
+ const count = Math.max(1, Math.min(10, Math.floor(Number(params.count || 5))));
814
+ const requestedProvider = (params.provider || "auto") as ProviderName | "auto";
815
+ const timeRange = toTimeRange(params.time_range);
816
+ const includeDomains = Array.isArray(params.include_domains) ? params.include_domains.filter(Boolean) : undefined;
817
+ const excludeDomains = Array.isArray(params.exclude_domains) ? params.exclude_domains.filter(Boolean) : undefined;
161
818
 
162
- if (params.provider && params.provider !== "auto") {
163
- args.push("--provider", params.provider);
164
- }
819
+ const allProviders: ProviderName[] = ["serper", "tavily", "querit", "exa", "perplexity", "you", "searxng"];
820
+ const configuredProviders = allProviders.filter((p) => !!getApiKey(p, runtimeEnv));
165
821
 
166
- if (typeof params.count === "number" && Number.isFinite(params.count)) {
167
- args.push("--max-results", String(Math.max(1, Math.floor(params.count))));
168
- }
822
+ let routingInfo: Json;
823
+ let provider: ProviderName;
824
+ if (requestedProvider === "auto") {
825
+ const analyzer = new QueryAnalyzer();
826
+ const routing = analyzer.route(query, configuredProviders);
827
+ provider = routing.provider;
828
+ routingInfo = { auto_routed: true, provider, confidence: routing.confidence, confidence_level: routing.confidence_level, reason: routing.reason, top_signals: routing.top_signals, scores: routing.scores, exa_depth: routing.exa_depth };
829
+ } else {
830
+ provider = requestedProvider;
831
+ routingInfo = { auto_routed: false, provider };
832
+ }
169
833
 
170
- if (params.depth && params.depth !== "normal") {
171
- args.push("--exa-depth", params.depth);
172
- }
834
+ const priority: ProviderName[] = ["tavily", "querit", "exa", "perplexity", "serper", "you", "searxng"];
835
+ const providersToTry: ProviderName[] = [provider, ...priority.filter((p) => p !== provider && configuredProviders.includes(p))];
836
+ const eligibleProviders: ProviderName[] = [];
837
+ const cooldownSkips: Json[] = [];
838
+ for (const p of providersToTry) {
839
+ const cooldown = providerInCooldown(p);
840
+ if (cooldown.inCooldown) cooldownSkips.push({ provider: p, cooldown_remaining_seconds: cooldown.remaining });
841
+ else eligibleProviders.push(p);
842
+ }
843
+ if (!eligibleProviders.length) eligibleProviders.push(provider);
173
844
 
174
- if (params.time_range) {
175
- args.push("--time-range", params.time_range);
176
- args.push("--freshness", params.time_range);
177
- }
845
+ const cacheContext = {
846
+ time_range: timeRange,
847
+ include_domains: includeDomains ? [...includeDomains].sort() : null,
848
+ exclude_domains: excludeDomains ? [...excludeDomains].sort() : null,
849
+ exa_depth: params.depth || routingInfo.exa_depth || "normal",
850
+ };
178
851
 
179
- if (params.include_domains?.length) {
180
- args.push("--include-domains", ...params.include_domains);
181
- }
852
+ const cached = cacheGet(query, provider, count, DEFAULT_CACHE_TTL, cacheContext);
853
+ if (cached) {
854
+ const result = { ...cached };
855
+ for (const key of Object.keys(result)) if (key.startsWith("_cache_")) delete result[key];
856
+ result.cached = true;
857
+ result.cache_age_seconds = Math.floor(Date.now() / 1000 - Number(cached._cache_timestamp || 0));
858
+ result.routing = { ...routingInfo, ...(cooldownSkips.length ? { cooldown_skips: cooldownSkips } : {}) };
859
+ return { content: [{ type: "text", text: JSON.stringify(sanitizeOutput(result)) }] };
860
+ }
182
861
 
183
- if (params.exclude_domains?.length) {
184
- args.push("--exclude-domains", ...params.exclude_domains);
185
- }
862
+ const errors: Json[] = [];
863
+ const successes: Array<[string, SearchResponse]> = [];
186
864
 
187
- const envPaths = [
188
- path.join(PLUGIN_DIR, ".env"),
189
- path.join(PLUGIN_DIR, "..", "web-search-plus", ".env"),
190
- ];
191
- const fileEnv: Record<string, string> = {};
192
- for (const envPath of envPaths) {
193
- Object.assign(fileEnv, loadEnvFile(envPath));
194
- }
195
- const childEnv = { ...process.env, ...configEnv, ...fileEnv };
865
+ const runProvider = async (p: ProviderName): Promise<SearchResponse> => {
866
+ const key = validateApiKey(p, runtimeEnv);
867
+ if (p === "serper") return searchSerper(query, key, count, timeRange);
868
+ if (p === "tavily") return searchTavily(query, key, count, includeDomains, excludeDomains);
869
+ if (p === "querit") return searchQuerit(query, key, count, timeRange, includeDomains, excludeDomains);
870
+ if (p === "exa") {
871
+ const exaDepth = (params.depth || routingInfo.exa_depth || "normal") as "normal" | "deep" | "deep-reasoning";
872
+ return searchExa(query, key, count, exaDepth, includeDomains, excludeDomains);
873
+ }
874
+ if (p === "perplexity") return searchPerplexity(query, key, count, timeRange);
875
+ if (p === "you") return searchYou(query, key, count, timeRange);
876
+ return searchSearxng(query, key, count, timeRange, runtimeEnv);
877
+ };
196
878
 
197
- const result = await runPython(args, childEnv, 75000);
879
+ for (const p of eligibleProviders) {
880
+ try {
881
+ const result = await executeWithRetry(() => runProvider(p));
882
+ resetProviderHealth(p);
883
+ successes.push([p, result]);
884
+ if ((result.results || []).length >= count || errors.length === 0) break;
885
+ } catch (error: any) {
886
+ const message = sanitizeOutput(String(error?.message || error));
887
+ const cooldown = markProviderFailure(p, message);
888
+ errors.push({ provider: p, error: message, cooldown_seconds: cooldown.cooldown_seconds });
889
+ }
890
+ }
198
891
 
199
- if (result.code !== 0) {
200
- const stderr = sanitizeOutput(result.stderr.trim()) || "Unknown error";
201
- return {
202
- content: [{ type: "text", text: `Search failed (exit ${result.code}): ${stderr}` }],
203
- };
204
- }
892
+ if (!successes.length) {
893
+ return { content: [{ type: "text", text: JSON.stringify(sanitizeOutput({ error: "All providers failed", provider, query, routing: routingInfo, provider_errors: errors, cooldown_skips: cooldownSkips })) }] };
894
+ }
895
+
896
+ let result: SearchResponse;
897
+ if (successes.length === 1) {
898
+ result = successes[0][1];
899
+ } else {
900
+ result = { ...successes[0][1] };
901
+ const deduped = deduplicateResultsAcrossProviders(successes, count);
902
+ result.results = deduped.results;
903
+ result.deduplicated = deduped.dedupCount > 0;
904
+ result.metadata = { ...(result.metadata || {}), dedup_count: deduped.dedupCount, providers_merged: successes.map(([p]) => p) };
905
+ }
906
+
907
+ const successfulProvider = successes[0][0] as ProviderName;
908
+ if (successfulProvider !== provider) {
909
+ routingInfo = { ...routingInfo, fallback_used: true, original_provider: provider, provider: successfulProvider, fallback_errors: errors };
910
+ }
911
+ if (cooldownSkips.length) routingInfo.cooldown_skips = cooldownSkips;
912
+ result.routing = routingInfo;
913
+ result.cached = false;
914
+ if (!(result as any).metadata) result.metadata = {};
915
+ if ((result as any).deduplicated == null) (result as any).deduplicated = false;
916
+ if ((result.metadata as any).dedup_count == null) (result.metadata as any).dedup_count = 0;
205
917
 
206
- return {
207
- content: [{ type: "text", text: sanitizeOutput(result.stdout.trim()) || "{}" }],
208
- };
918
+ cachePut(query, successfulProvider, count, result, cacheContext);
919
+
920
+ return { content: [{ type: "text", text: JSON.stringify(sanitizeOutput(result)) }] };
921
+ } catch (error: any) {
922
+ return { content: [{ type: "text", text: `Search failed: ${sanitizeOutput(String(error?.message || error))}` }] };
923
+ }
209
924
  },
210
925
  },
211
926
  { optional: true },