web-search-plus-plugin 1.3.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -1,243 +1,926 @@
1
- import { Type } from "@sinclair/typebox";
2
- import { spawn } from "child_process";
1
+ import crypto from "crypto";
3
2
  import fs from "fs";
4
3
  import path from "path";
5
4
  import { fileURLToPath } from "url";
5
+ import dns from "dns/promises";
6
+ import net from "net";
6
7
 
7
8
  function getPluginDir(): string {
9
+ // When OpenClaw transpiles plugins, import.meta.url may point to a temp dir.
10
+ // Check for the known extension path first.
11
+ const knownPath = path.join(process.env.HOME || "/root", ".openclaw", "extensions", "web-search-plus-plugin");
12
+ if (fs.existsSync(path.join(knownPath, "package.json"))) return knownPath;
8
13
  try {
9
14
  if (typeof __dirname !== "undefined") return __dirname;
10
15
  } catch {}
11
16
  try {
12
17
  return path.dirname(fileURLToPath(import.meta.url));
13
18
  } catch {}
14
- return path.join(process.cwd(), "skills", "web-search-plus-plugin");
19
+ return process.cwd();
15
20
  }
16
21
 
17
- const SENSITIVE_PATTERN = /(?:key|token|secret|password|api[_-]?key)\s*[=:]\s*\S+/gi;
22
+ const PLUGIN_DIR = getPluginDir();
23
+ const CACHE_DIR = path.join(PLUGIN_DIR, ".cache");
24
+ const PROVIDER_HEALTH_FILE = path.join(CACHE_DIR, "provider_health.json");
25
+ const DEFAULT_CACHE_TTL = 3600;
26
+ const RETRY_BACKOFF_MS = [1000, 3000, 9000];
27
+ const COOLDOWN_STEPS_SECONDS = [60, 300, 1500, 3600];
28
+ const TRANSIENT_HTTP_CODES = new Set([408, 425, 429, 500, 502, 503, 504]);
29
+
30
+ const PARAMETERS_SCHEMA = {
31
+ type: "object",
32
+ required: ["query"],
33
+ properties: {
34
+ query: { type: "string", description: "Search query" },
35
+ provider: {
36
+ type: "string",
37
+ enum: ["serper", "tavily", "querit", "exa", "perplexity", "you", "searxng", "auto"],
38
+ description: "Force a provider, or use auto routing (default: auto)",
39
+ },
40
+ count: { type: "number", description: "Number of results (default: 5)" },
41
+ depth: {
42
+ type: "string",
43
+ enum: ["normal", "deep", "deep-reasoning"],
44
+ description: "Exa depth when using Exa or when auto-routing chooses Exa.",
45
+ },
46
+ time_range: {
47
+ type: "string",
48
+ enum: ["day", "week", "month", "year"],
49
+ description: "Recency filter where supported.",
50
+ },
51
+ include_domains: {
52
+ type: "array",
53
+ items: { type: "string" },
54
+ description: "Only include results from these domains (Tavily, Exa, Querit where supported).",
55
+ },
56
+ exclude_domains: {
57
+ type: "array",
58
+ items: { type: "string" },
59
+ description: "Exclude results from these domains (Tavily, Exa, Querit where supported).",
60
+ },
61
+ },
62
+ };
63
+
64
+ type Json = Record<string, any>;
65
+ type ProviderName = "serper" | "tavily" | "querit" | "exa" | "perplexity" | "you" | "searxng";
66
+ type ToolParams = {
67
+ query: string;
68
+ provider?: ProviderName | "auto";
69
+ count?: number;
70
+ depth?: "normal" | "deep" | "deep-reasoning";
71
+ time_range?: "day" | "week" | "month" | "year";
72
+ include_domains?: string[];
73
+ exclude_domains?: string[];
74
+ };
75
+
76
+ type SearchResult = {
77
+ title: string;
78
+ url: string;
79
+ snippet: string;
80
+ score?: number;
81
+ [key: string]: any;
82
+ };
83
+
84
+ type SearchResponse = {
85
+ provider: string;
86
+ query: string;
87
+ results: SearchResult[];
88
+ images?: string[];
89
+ answer?: string;
90
+ metadata?: Json;
91
+ [key: string]: any;
92
+ };
93
+
94
+ class ProviderConfigError extends Error {}
95
+ class ProviderRequestError extends Error {
96
+ statusCode?: number;
97
+ transient: boolean;
98
+ constructor(message: string, statusCode?: number, transient = false) {
99
+ super(message);
100
+ this.name = "ProviderRequestError";
101
+ this.statusCode = statusCode;
102
+ this.transient = transient;
103
+ }
104
+ }
105
+
106
+ const SENSITIVE_PATTERNS: RegExp[] = [
107
+ /\b(?:sk|pk|rk|api|tok)_[A-Za-z0-9\-_]{10,}\b/g,
108
+ /\bBearer\s+[A-Za-z0-9\-._~+/]+=*\b/gi,
109
+ /\b(?:key|token|secret|password|api[_-]?key)\s*[:=]\s*[^\s,"'}]+/gi,
110
+ /([?&](?:api[_-]?key|key|token|access[_-]?token|auth|authorization)=)([^&#\s]+)/gi,
111
+ /\b[A-Za-z0-9_-]{24,}\.[A-Za-z0-9_-]{16,}\.[A-Za-z0-9_-]{16,}\b/g,
112
+ ];
113
+
114
+ function sanitizeOutput(input: any): any {
115
+ if (typeof input === "string") {
116
+ let out = input;
117
+ for (const pattern of SENSITIVE_PATTERNS) {
118
+ out = out.replace(pattern, (_m, p1) => (p1 ? `${p1}[REDACTED]` : "[REDACTED]"));
119
+ }
120
+ return out;
121
+ }
122
+ if (Array.isArray(input)) return input.map((v) => sanitizeOutput(v));
123
+ if (input && typeof input === "object") {
124
+ const result: any = {};
125
+ for (const [k, v] of Object.entries(input)) {
126
+ if (/(?:api[_-]?key|token|secret|password|authorization)/i.test(k)) {
127
+ result[k] = "[REDACTED]";
128
+ } else {
129
+ result[k] = sanitizeOutput(v);
130
+ }
131
+ }
132
+ return result;
133
+ }
134
+ return input;
135
+ }
136
+
137
+ function ensureDir(dir: string): void {
138
+ fs.mkdirSync(dir, { recursive: true });
139
+ }
140
+
141
+ function sleep(ms: number): Promise<void> {
142
+ return new Promise((resolve) => setTimeout(resolve, ms));
143
+ }
144
+
145
+ function readJsonFile(file: string, fallback: any): any {
146
+ try {
147
+ return JSON.parse(fs.readFileSync(file, "utf8"));
148
+ } catch {
149
+ return fallback;
150
+ }
151
+ }
152
+
153
+ function writeJsonFile(file: string, value: any): void {
154
+ ensureDir(path.dirname(file));
155
+ fs.writeFileSync(file, JSON.stringify(value, null, 2), "utf8");
156
+ }
157
+
158
+ function sha256(input: string): string {
159
+ return crypto.createHash("sha256").update(input).digest("hex");
160
+ }
161
+
162
+ function buildCacheKey(query: string, provider: string, maxResults: number, params?: Json): string {
163
+ return sha256(JSON.stringify({ query, provider, maxResults, ...(params || {}) }, Object.keys({ query, provider, maxResults, ...(params || {}) }).sort())).slice(0, 32);
164
+ }
165
+
166
+ function getCachePath(cacheKey: string): string {
167
+ return path.join(CACHE_DIR, `${cacheKey}.json`);
168
+ }
169
+
170
+ function cacheGet(query: string, provider: string, maxResults: number, ttl: number, params?: Json): any | null {
171
+ const key = buildCacheKey(query, provider, maxResults, params);
172
+ const file = getCachePath(key);
173
+ try {
174
+ const cached = JSON.parse(fs.readFileSync(file, "utf8"));
175
+ const ts = Number(cached._cache_timestamp || 0);
176
+ if (!ts || Date.now() / 1000 - ts > ttl) {
177
+ try { fs.unlinkSync(file); } catch {}
178
+ return null;
179
+ }
180
+ return cached;
181
+ } catch {
182
+ try { fs.unlinkSync(file); } catch {}
183
+ return null;
184
+ }
185
+ }
18
186
 
19
- function sanitizeOutput(text: string): string {
20
- return text.replace(SENSITIVE_PATTERN, "[REDACTED]");
187
+ function cachePut(query: string, provider: string, maxResults: number, result: any, params?: Json): void {
188
+ ensureDir(CACHE_DIR);
189
+ const key = buildCacheKey(query, provider, maxResults, params);
190
+ const file = getCachePath(key);
191
+ const payload = {
192
+ ...result,
193
+ _cache_timestamp: Math.floor(Date.now() / 1000),
194
+ _cache_key: key,
195
+ _cache_query: query,
196
+ _cache_provider: provider,
197
+ _cache_max_results: maxResults,
198
+ _cache_params: params || {},
199
+ };
200
+ writeJsonFile(file, payload);
201
+ }
202
+
203
+ function loadProviderHealth(): Json {
204
+ return readJsonFile(PROVIDER_HEALTH_FILE, {});
205
+ }
206
+
207
+ function saveProviderHealth(state: Json): void {
208
+ writeJsonFile(PROVIDER_HEALTH_FILE, state);
209
+ }
210
+
211
+ function providerInCooldown(provider: string): { inCooldown: boolean; remaining: number } {
212
+ const state = loadProviderHealth();
213
+ const cooldownUntil = Number(state?.[provider]?.cooldown_until || 0);
214
+ const remaining = cooldownUntil - Math.floor(Date.now() / 1000);
215
+ return { inCooldown: remaining > 0, remaining: Math.max(0, remaining) };
216
+ }
217
+
218
+ function markProviderFailure(provider: string, message: string): Json {
219
+ const state = loadProviderHealth();
220
+ const now = Math.floor(Date.now() / 1000);
221
+ const failCount = Number(state?.[provider]?.failure_count || 0) + 1;
222
+ const cooldownSeconds = COOLDOWN_STEPS_SECONDS[Math.min(failCount - 1, COOLDOWN_STEPS_SECONDS.length - 1)];
223
+ state[provider] = {
224
+ failure_count: failCount,
225
+ cooldown_until: now + cooldownSeconds,
226
+ cooldown_seconds: cooldownSeconds,
227
+ last_error: sanitizeOutput(message),
228
+ last_failure_at: now,
229
+ };
230
+ saveProviderHealth(state);
231
+ return state[provider];
232
+ }
233
+
234
+ function resetProviderHealth(provider: string): void {
235
+ const state = loadProviderHealth();
236
+ if (state[provider]) {
237
+ delete state[provider];
238
+ saveProviderHealth(state);
239
+ }
240
+ }
241
+
242
+ function normalizeResultUrl(url: string): string {
243
+ try {
244
+ const u = new URL(url.trim());
245
+ const host = u.hostname.replace(/^www\./i, "").toLowerCase();
246
+ const pathname = u.pathname.replace(/\/$/, "");
247
+ return `${host}${pathname}`;
248
+ } catch {
249
+ return url.trim().toLowerCase();
250
+ }
251
+ }
252
+
253
+ function deduplicateResultsAcrossProviders(resultsByProvider: Array<[string, SearchResponse]>, maxResults: number): { results: SearchResult[]; dedupCount: number } {
254
+ const deduped: SearchResult[] = [];
255
+ const seen = new Set<string>();
256
+ let dedupCount = 0;
257
+ for (const [provider, data] of resultsByProvider) {
258
+ for (const item of data.results || []) {
259
+ const norm = normalizeResultUrl(item.url || "");
260
+ if (norm && seen.has(norm)) {
261
+ dedupCount += 1;
262
+ continue;
263
+ }
264
+ if (norm) seen.add(norm);
265
+ deduped.push({ ...item, provider: item.provider || provider });
266
+ if (deduped.length >= maxResults) return { results: deduped, dedupCount };
267
+ }
268
+ }
269
+ return { results: deduped, dedupCount };
21
270
  }
22
271
 
23
272
  function loadEnvFile(envPath: string): Record<string, string> {
24
273
  if (!fs.existsSync(envPath)) return {};
25
274
  const env: Record<string, string> = {};
26
- const lines = fs.readFileSync(envPath, "utf8").split("\n");
27
- for (const line of lines) {
275
+ for (const line of fs.readFileSync(envPath, "utf8").split(/\r?\n/)) {
28
276
  const trimmed = line.trim();
29
277
  if (!trimmed || trimmed.startsWith("#")) continue;
30
278
  const stripped = trimmed.startsWith("export ") ? trimmed.slice(7) : trimmed;
31
- const eqIdx = stripped.indexOf("=");
32
- if (eqIdx < 0) continue;
33
- const key = stripped.slice(0, eqIdx).trim();
34
- const val = stripped.slice(eqIdx + 1).trim().replace(/^['"]|['"]$/g, "");
35
- if (key) env[key] = val;
279
+ const idx = stripped.indexOf("=");
280
+ if (idx < 0) continue;
281
+ const key = stripped.slice(0, idx).trim();
282
+ const value = stripped.slice(idx + 1).trim().replace(/^['"]|['"]$/g, "");
283
+ if (key) env[key] = value;
36
284
  }
37
285
  return env;
38
286
  }
39
287
 
40
- function runPython(
41
- args: string[],
42
- env: NodeJS.ProcessEnv,
43
- timeoutMs: number,
44
- ): Promise<{ stdout: string; stderr: string; code: number }> {
45
- return new Promise((resolve) => {
46
- const child = spawn("python3", args, { env, shell: false });
47
- let stdout = "";
48
- let stderr = "";
49
- let settled = false;
50
-
51
- const timer = setTimeout(() => {
52
- if (!settled) {
53
- settled = true;
54
- child.kill();
55
- resolve({ stdout: "", stderr: "Search timed out", code: 1 });
56
- }
57
- }, timeoutMs);
288
+ function getRuntimeEnv(pluginConfig: Record<string, string>): Record<string, string> {
289
+ const envFiles = [path.join(PLUGIN_DIR, ".env"), path.join(PLUGIN_DIR, "..", "web-search-plus", ".env")];
290
+ const fileEnv = Object.assign({}, ...envFiles.map(loadEnvFile));
291
+ const mapped: Record<string, string> = {};
292
+ const configKeyMap: Record<string, string> = {
293
+ serperApiKey: "SERPER_API_KEY",
294
+ tavilyApiKey: "TAVILY_API_KEY",
295
+ queritApiKey: "QUERIT_API_KEY",
296
+ exaApiKey: "EXA_API_KEY",
297
+ perplexityApiKey: "PERPLEXITY_API_KEY",
298
+ kilocodeApiKey: "KILOCODE_API_KEY",
299
+ youApiKey: "YOU_API_KEY",
300
+ searxngInstanceUrl: "SEARXNG_INSTANCE_URL",
301
+ searxngAllowPrivate: "SEARXNG_ALLOW_PRIVATE",
302
+ };
303
+ for (const [cfgKey, envKey] of Object.entries(configKeyMap)) {
304
+ const val = pluginConfig?.[cfgKey];
305
+ if (val && typeof val === "string") mapped[envKey] = val;
306
+ }
307
+ return { ...fileEnv, ...Object.fromEntries(Object.entries(process.env).filter(([, v]) => typeof v === "string") as any), ...mapped };
308
+ }
58
309
 
59
- child.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
60
- child.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
310
+ function getApiKey(provider: ProviderName, env: Record<string, string>): string | undefined {
311
+ const keyMap: Record<ProviderName, string | undefined> = {
312
+ serper: env.SERPER_API_KEY,
313
+ tavily: env.TAVILY_API_KEY,
314
+ querit: env.QUERIT_API_KEY,
315
+ exa: env.EXA_API_KEY,
316
+ perplexity: env.KILOCODE_API_KEY || env.PERPLEXITY_API_KEY,
317
+ you: env.YOU_API_KEY,
318
+ searxng: env.SEARXNG_INSTANCE_URL,
319
+ };
320
+ return keyMap[provider];
321
+ }
61
322
 
62
- child.on("close", (code: number | null) => {
63
- if (!settled) {
64
- settled = true;
65
- clearTimeout(timer);
66
- resolve({ stdout, stderr, code: code ?? 1 });
67
- }
323
+ function validateApiKey(provider: ProviderName, env: Record<string, string>): string {
324
+ const key = getApiKey(provider, env);
325
+ if (!key) {
326
+ if (provider === "searxng") throw new ProviderConfigError("Missing SearXNG instance URL (SEARXNG_INSTANCE_URL or pluginConfig.searxngInstanceUrl)");
327
+ throw new ProviderConfigError(`Missing API key for ${provider}`);
328
+ }
329
+ return key;
330
+ }
331
+
332
+ function toTimeRange(value?: string): string | undefined {
333
+ return value && ["day", "week", "month", "year"].includes(value) ? value : undefined;
334
+ }
335
+
336
+ function titleFromUrl(url: string): string {
337
+ try {
338
+ const u = new URL(url);
339
+ const domain = u.hostname.replace(/^www\./, "");
340
+ const segs = u.pathname.split("/").filter(Boolean);
341
+ const last = segs.length ? segs[segs.length - 1].replace(/[-_]/g, " ").replace(/\.\w{2,4}$/, "") : "";
342
+ return last ? `${domain} — ${last}` : domain;
343
+ } catch {
344
+ return url.slice(0, 80);
345
+ }
346
+ }
347
+
348
+ async function httpJson(url: string, init: RequestInit, timeoutMs = 30000): Promise<any> {
349
+ const controller = new AbortController();
350
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
351
+ try {
352
+ const res = await fetch(url, {
353
+ ...init,
354
+ headers: {
355
+ "User-Agent": "ClawdBot-WebSearchPlus/3.0",
356
+ ...(init.headers || {}),
357
+ },
358
+ signal: controller.signal,
68
359
  });
360
+ const text = await res.text();
361
+ let data: any = null;
362
+ try { data = text ? JSON.parse(text) : {}; } catch {}
363
+ if (!res.ok) {
364
+ const detail = data?.error || data?.message || text || res.statusText;
365
+ throw new ProviderRequestError(`${detail} (HTTP ${res.status})`, res.status, TRANSIENT_HTTP_CODES.has(res.status));
366
+ }
367
+ return data ?? {};
368
+ } catch (error: any) {
369
+ if (error?.name === "AbortError") throw new ProviderRequestError(`Request timed out after ${timeoutMs}ms`, undefined, true);
370
+ if (error instanceof ProviderRequestError) throw error;
371
+ throw new ProviderRequestError(`Network error: ${String(error?.message || error)}`, undefined, true);
372
+ } finally {
373
+ clearTimeout(timer);
374
+ }
375
+ }
69
376
 
70
- child.on("error", (err: Error) => {
71
- if (!settled) {
72
- settled = true;
73
- clearTimeout(timer);
74
- // Only expose the error message, not the full error object (may contain env/args)
75
- const safeMsg = err.code === "ENOENT" ? "python3 not found" : "Process error";
76
- resolve({ stdout: "", stderr: safeMsg, code: 1 });
377
+ async function validateSearxngUrl(input: string, env: Record<string, string>): Promise<string> {
378
+ let u: URL;
379
+ try {
380
+ u = new URL(input);
381
+ } catch {
382
+ throw new ProviderConfigError("Invalid SearXNG URL");
383
+ }
384
+ if (!["http:", "https:"].includes(u.protocol)) throw new ProviderConfigError(`SearXNG URL must use http or https, got ${u.protocol}`);
385
+ if (!u.hostname) throw new ProviderConfigError("SearXNG URL must include a hostname");
386
+
387
+ const blockedHosts = new Set(["169.254.169.254", "metadata.google.internal", "metadata.internal"]);
388
+ if (blockedHosts.has(u.hostname)) throw new ProviderConfigError("SearXNG URL blocked: metadata endpoint");
389
+
390
+ const allowPrivate = String(env.SEARXNG_ALLOW_PRIVATE || "").trim() === "1";
391
+ if (!allowPrivate) {
392
+ const records = await dns.lookup(u.hostname, { all: true, verbatim: true }).catch(() => [] as dns.LookupAddress[]);
393
+ if (!records.length && net.isIP(u.hostname)) records.push({ address: u.hostname, family: net.isIP(u.hostname) as 4 | 6 });
394
+ if (!records.length) throw new ProviderConfigError(`SearXNG URL blocked: cannot resolve hostname ${u.hostname}`);
395
+ for (const record of records) {
396
+ const ip = record.address;
397
+ const lower = ip.toLowerCase();
398
+ const isIpv4Private = /^10\./.test(ip) || /^127\./.test(ip) || /^169\.254\./.test(ip) || /^192\.168\./.test(ip) || /^172\.(1[6-9]|2\d|3[0-1])\./.test(ip) || ip === "0.0.0.0";
399
+ const isIpv6Private = lower === "::1" || lower === "::" || lower.startsWith("fc") || lower.startsWith("fd") || lower.startsWith("fe80:");
400
+ if (isIpv4Private || isIpv6Private) {
401
+ throw new ProviderConfigError(`SearXNG URL blocked: ${u.hostname} resolves to private/internal IP ${ip}`);
77
402
  }
78
- });
79
- });
403
+ }
404
+ }
405
+ return u.toString().replace(/\/$/, "");
80
406
  }
81
407
 
82
- const PLUGIN_DIR = getPluginDir();
83
- const scriptPath = path.join(PLUGIN_DIR, "scripts", "search.py");
408
+ const SHOPPING_SIGNALS: Record<string, number> = {
409
+ "\\bhow much\\b": 4.0, "\\bprice of\\b": 4.0, "\\bcost of\\b": 4.0, "\\bprices?\\b": 3.0,
410
+ "\\$\\d+|\\d+\\s*dollars?": 3.0, "€\\d+|\\d+\\s*euros?": 3.0, "£\\d+|\\d+\\s*pounds?": 3.0,
411
+ "\\bpreis(e)?\\b": 3.5, "\\bkosten\\b": 3.0, "\\bwieviel\\b": 3.5, "\\bwie viel\\b": 3.5, "\\bwas kostet\\b": 4.0,
412
+ "\\bbuy\\b": 3.5, "\\bpurchase\\b": 3.5, "\\border\\b(?!\\s+by)": 3.0, "\\bshopping\\b": 3.5, "\\bshop for\\b": 3.5,
413
+ "\\bwhere to (buy|get|purchase)\\b": 4.0, "\\bkaufen\\b": 3.5, "\\bbestellen\\b": 3.5, "\\bwo kaufen\\b": 4.0,
414
+ "\\bhändler\\b": 3.0, "\\bshop\\b": 2.5, "\\bdeal(s)?\\b": 3.0, "\\bdiscount(s)?\\b": 3.0, "\\bsale\\b": 2.5,
415
+ "\\bcheap(er|est)?\\b": 3.0, "\\baffordable\\b": 2.5, "\\bbudget\\b": 2.5, "\\bbest price\\b": 3.5,
416
+ "\\bcompare prices\\b": 3.5, "\\bcoupon\\b": 3.0, "\\bgünstig(er|ste)?\\b": 3.0, "\\bbillig(er|ste)?\\b": 3.0,
417
+ "\\bangebot(e)?\\b": 3.0, "\\brabatt\\b": 3.0, "\\baktion\\b": 2.5, "\\bschnäppchen\\b": 3.0,
418
+ "\\bvs\\.?\\b": 2.0, "\\bversus\\b": 2.0, "\\bor\\b.*\\bwhich\\b": 2.0, "\\bspecs?\\b": 2.5,
419
+ "\\bspecifications?\\b": 2.5, "\\breview(s)?\\b": 2.0, "\\brating(s)?\\b": 2.0, "\\bunboxing\\b": 2.5,
420
+ "\\btest\\b": 2.5, "\\bbewertung(en)?\\b": 2.5, "\\btechnische daten\\b": 3.0, "\\bspezifikationen\\b": 2.5,
421
+ };
422
+ const RESEARCH_SIGNALS: Record<string, number> = {
423
+ "\\bhow does\\b": 4.0, "\\bhow do\\b": 3.5, "\\bwhy does\\b": 4.0, "\\bwhy do\\b": 3.5, "\\bwhy is\\b": 3.5,
424
+ "\\bexplain\\b": 4.0, "\\bexplanation\\b": 4.0, "\\bwhat is\\b": 3.0, "\\bwhat are\\b": 3.0, "\\bdefine\\b": 3.5,
425
+ "\\bdefinition of\\b": 3.5, "\\bmeaning of\\b": 3.0, "\\banalyze\\b": 3.5, "\\banalysis\\b": 3.5,
426
+ "\\bcompare\\b(?!\\s*prices?)": 3.0, "\\bcomparison\\b": 3.0, "\\bstatus of\\b": 3.5, "\\bstatus\\b": 2.5,
427
+ "\\bwhat happened with\\b": 4.0, "\\bpros and cons\\b": 4.0, "\\badvantages?\\b": 3.0, "\\bdisadvantages?\\b": 3.0,
428
+ "\\bbenefits?\\b": 2.5, "\\bdrawbacks?\\b": 3.0, "\\bdifference between\\b": 3.5, "\\bunderstand\\b": 3.0,
429
+ "\\blearn(ing)?\\b": 2.5, "\\btutorial\\b": 3.0, "\\bguide\\b": 2.5, "\\bhow to\\b": 2.0, "\\bstep by step\\b": 3.0,
430
+ "\\bin[- ]depth\\b": 3.0, "\\bdetailed\\b": 2.5, "\\bcomprehensive\\b": 3.0, "\\bthorough\\b": 2.5,
431
+ "\\bdeep dive\\b": 3.5, "\\boverall\\b": 2.0, "\\bsummary\\b": 2.0, "\\bstudy\\b": 2.5, "\\bresearch shows\\b": 3.5,
432
+ "\\baccording to\\b": 2.5, "\\bevidence\\b": 3.0, "\\bscientific\\b": 3.0, "\\bhistory of\\b": 3.0,
433
+ "\\bbackground\\b": 2.5, "\\bcontext\\b": 2.5, "\\bimplications?\\b": 3.0, "\\bwie funktioniert\\b": 4.0,
434
+ "\\bwarum\\b": 3.5, "\\berklär(en|ung)?\\b": 4.0, "\\bwas ist\\b": 3.0, "\\bwas sind\\b": 3.0, "\\bbedeutung\\b": 3.0,
435
+ "\\banalyse\\b": 3.5, "\\bvergleich(en)?\\b": 3.0, "\\bvor- und nachteile\\b": 4.0, "\\bvorteile\\b": 3.0,
436
+ "\\bnachteile\\b": 3.0, "\\bunterschied(e)?\\b": 3.5, "\\bverstehen\\b": 3.0, "\\blernen\\b": 2.5,
437
+ "\\banleitung\\b": 3.0, "\\bübersicht\\b": 2.5, "\\bhintergrund\\b": 2.5, "\\bzusammenfassung\\b": 2.5,
438
+ };
439
+ const DISCOVERY_SIGNALS: Record<string, number> = {
440
+ "\\bsimilar to\\b": 5.0, "\\blike\\s+\\w+\\.com": 4.5, "\\balternatives? to\\b": 5.0, "\\bcompetitors? (of|to)\\b": 4.5,
441
+ "\\bcompeting with\\b": 4.0, "\\brivals? (of|to)\\b": 4.0, "\\binstead of\\b": 3.0, "\\breplacement for\\b": 3.5,
442
+ "\\bcompanies (like|that|doing|building)\\b": 4.5, "\\bstartups? (like|that|doing|building)\\b": 4.5, "\\bwho else\\b": 4.0,
443
+ "\\bother (companies|startups|tools|apps)\\b": 3.5, "\\bfind (companies|startups|tools|examples?)\\b": 4.5,
444
+ "\\bevents? in\\b": 4.0, "\\bthings to do in\\b": 4.5, "\\bseries [a-d]\\b": 4.0, "\\byc\\b|y combinator": 4.0,
445
+ "\\bfund(ed|ing|raise)\\b": 3.5, "\\bventure\\b": 3.0, "\\bvaluation\\b": 3.0, "\\bresearch papers? (on|about)\\b": 4.0,
446
+ "\\barxiv\\b": 4.5, "\\bgithub (projects?|repos?)\\b": 4.5, "\\bopen source\\b.*\\bprojects?\\b": 4.0,
447
+ "\\btweets? (about|on)\\b": 3.5, "\\bblogs? (about|on|like)\\b": 3.0, "https?://[^\\s]+": 5.0, "\\b\\w+\\.(com|org|io|ai|co|dev)\\b": 3.5,
448
+ };
449
+ const LOCAL_NEWS_SIGNALS: Record<string, number> = {
450
+ "\\bnear me\\b": 4.0, "\\bnearby\\b": 3.5, "\\blocal\\b": 3.0, "\\bin (my )?(city|area|town|neighborhood)\\b": 3.5,
451
+ "\\brestaurants?\\b": 2.5, "\\bhotels?\\b": 2.5, "\\bcafes?\\b": 2.5, "\\bstores?\\b": 2.0, "\\bdirections? to\\b": 3.5,
452
+ "\\bmap of\\b": 3.0, "\\bphone number\\b": 3.0, "\\baddress of\\b": 3.0, "\\bopen(ing)? hours\\b": 3.0,
453
+ "\\bweather\\b": 4.0, "\\bforecast\\b": 3.5, "\\btemperature\\b": 3.0, "\\btime in\\b": 3.0,
454
+ "\\blatest\\b": 2.5, "\\brecent\\b": 2.5, "\\btoday\\b": 2.5, "\\bbreaking\\b": 3.5, "\\bnews\\b": 2.5,
455
+ "\\bheadlines?\\b": 3.0, "\\b202[4-9]\\b": 2.0, "\\blast (week|month|year)\\b": 2.0, "\\bin der nähe\\b": 4.0,
456
+ "\\bin meiner nähe\\b": 4.0, "\\böffnungszeiten\\b": 3.0, "\\badresse von\\b": 3.0, "\\bweg(beschreibung)? nach\\b": 3.5,
457
+ "\\bheute\\b": 2.5, "\\bmorgen\\b": 2.0, "\\baktuell\\b": 2.5, "\\bnachrichten\\b": 3.0,
458
+ };
459
+ const RAG_SIGNALS: Record<string, number> = {
460
+ "\\brag\\b": 4.5, "\\bcontext for\\b": 4.0, "\\bsummarize\\b": 3.5, "\\bbrief(ly)?\\b": 3.0, "\\bquick overview\\b": 3.5,
461
+ "\\btl;?dr\\b": 4.0, "\\bkey (points|facts|info)\\b": 3.5, "\\bmain (points|takeaways)\\b": 3.5,
462
+ "\\b(web|online)\\s+and\\s+news\\b": 4.0, "\\ball sources\\b": 3.5, "\\bcomprehensive (search|overview)\\b": 3.5,
463
+ "\\blatest\\s+(news|updates)\\b": 3.0, "\\bcurrent (events|situation|status)\\b": 3.5, "\\bright now\\b": 3.0,
464
+ "\\bas of today\\b": 3.5, "\\bup.to.date\\b": 3.5, "\\breal.time\\b": 4.0, "\\blive\\b": 2.5,
465
+ "\\bwhat'?s happening with\\b": 3.5, "\\bwhat'?s the latest\\b": 4.0, "\\bupdates?\\s+on\\b": 3.5, "\\bstatus of\\b": 3.0,
466
+ "\\bsituation (in|with|around)\\b": 3.5,
467
+ };
468
+ const DIRECT_ANSWER_SIGNALS: Record<string, number> = {
469
+ "\\bwhat is\\b": 3.0, "\\bwhat are\\b": 2.5, "\\bcurrent status\\b": 4.0, "\\bstatus of\\b": 3.5, "\\bstatus\\b": 2.5,
470
+ "\\bwhat happened with\\b": 4.0, "\\bwhat'?s happening with\\b": 4.0, "\\bas of (today|now)\\b": 4.0, "\\bthis weekend\\b": 3.5,
471
+ "\\bevents? in\\b": 3.5, "\\bthings to do in\\b": 4.0, "\\bnear me\\b": 3.0, "\\bcan you (tell me|summarize|explain)\\b": 3.5,
472
+ "\\bwann\\b": 3.0, "\\bwer\\b": 3.0, "\\bwo\\b": 2.5, "\\bwie viele\\b": 3.0,
473
+ };
474
+ const PRIVACY_SIGNALS: Record<string, number> = {
475
+ "\\bprivate(ly)?\\b": 4.0, "\\banonymous(ly)?\\b": 4.0, "\\bwithout tracking\\b": 4.5, "\\bno track(ing)?\\b": 4.5,
476
+ "\\bprivacy\\b": 3.5, "\\bprivacy.?focused\\b": 4.5, "\\bprivacy.?first\\b": 4.5, "\\bduckduckgo alternative\\b": 4.5,
477
+ "\\bprivate search\\b": 5.0, "\\bprivat\\b": 4.0, "\\banonym\\b": 4.0, "\\bohne tracking\\b": 4.5,
478
+ "\\bdatenschutz\\b": 4.0, "\\baggregate results?\\b": 4.0, "\\bmultiple sources?\\b": 4.0, "\\bdiverse (results|perspectives|sources)\\b": 4.0,
479
+ "\\bfrom (all|multiple|different) (engines?|sources?)\\b": 4.5, "\\bmeta.?search\\b": 5.0, "\\ball engines?\\b": 4.0,
480
+ "\\bverschiedene quellen\\b": 4.0, "\\baus mehreren quellen\\b": 4.0, "\\balle suchmaschinen\\b": 4.5,
481
+ "\\bfree search\\b": 3.5, "\\bno api cost\\b": 4.0, "\\bself.?hosted search\\b": 5.0, "\\bzero cost\\b": 3.5,
482
+ "\\bbudget\\b(?!\\s*(laptop|phone|option))\\b": 2.5, "\\bkostenlos(e)?\\s+suche\\b": 3.5, "\\bkeine api.?kosten\\b": 4.0,
483
+ };
484
+ const EXA_DEEP_SIGNALS: Record<string, number> = {
485
+ "\\bsynthesi[sz]e\\b": 5.0, "\\bdeep research\\b": 5.0, "\\bcomprehensive (analysis|report|overview|survey)\\b": 4.5,
486
+ "\\bacross (multiple|many|several) (sources|documents|papers)\\b": 4.5, "\\baggregat(e|ing) (information|data|results)\\b": 4.0,
487
+ "\\bcross.?referenc": 4.5, "\\bsec filings?\\b": 4.5, "\\bannual reports?\\b": 4.0, "\\bearnings (call|report|transcript)\\b": 4.5,
488
+ "\\bfinancial analysis\\b": 4.0, "\\bliterature (review|survey)\\b": 5.0, "\\bacademic literature\\b": 4.5,
489
+ "\\bstate of the (art|field|industry)\\b": 4.0, "\\bcompile (a |the )?(report|findings|results)\\b": 4.5,
490
+ "\\bsummariz(e|ing) (research|papers|studies)\\b": 4.0, "\\bmultiple documents?\\b": 4.0, "\\bdossier\\b": 4.5,
491
+ "\\bdue diligence\\b": 4.5, "\\bstructured (output|data|report)\\b": 4.0, "\\bmarket research\\b": 4.0,
492
+ "\\bindustry (report|analysis|overview)\\b": 4.0, "\\bresearch (on|about|into)\\b": 4.0, "\\bwhitepaper\\b": 4.5,
493
+ "\\btechnical report\\b": 4.0, "\\bsurvey of\\b": 4.5, "\\bmeta.?analysis\\b": 5.0, "\\bsystematic review\\b": 5.0,
494
+ "\\bcase study\\b": 3.5, "\\bbenchmark(s|ing)?\\b": 3.5, "\\btiefenrecherche\\b": 5.0, "\\bumfassende (analyse|übersicht|recherche)\\b": 4.5,
495
+ "\\baus mehreren quellen zusammenfassen\\b": 4.5, "\\bmarktforschung\\b": 4.0,
496
+ };
497
+ const EXA_DEEP_REASONING_SIGNALS: Record<string, number> = {
498
+ "\\bdeep.?reasoning\\b": 6.0, "\\bcomplex (analysis|reasoning|research)\\b": 4.5, "\\bcontradictions?\\b": 4.5,
499
+ "\\breconcil(e|ing)\\b": 5.0, "\\bcritical(ly)? analyz": 4.5, "\\bweigh(ing)? (the )?evidence\\b": 4.5,
500
+ "\\bcompeting (claims|theories|perspectives)\\b": 4.5, "\\bcomplex financial\\b": 4.5, "\\bregulatory (analysis|compliance|landscape)\\b": 4.5,
501
+ "\\blegal analysis\\b": 4.5, "\\bcomprehensive (due diligence|investigation)\\b": 5.0, "\\bpatent (landscape|analysis|search)\\b": 4.5,
502
+ "\\bmarket intelligence\\b": 4.5, "\\bcompetitive (intelligence|landscape)\\b": 4.5, "\\btrade.?offs?\\b": 4.0,
503
+ "\\bpros and cons of\\b": 4.0, "\\bshould I (use|choose|pick)\\b": 3.5, "\\bwhich is better\\b": 4.0,
504
+ "\\bkomplexe analyse\\b": 4.5, "\\bwidersprüche\\b": 4.5, "\\bquellen abwägen\\b": 4.5, "\\brechtliche analyse\\b": 4.5,
505
+ "\\bvergleich(e|en)?\\b": 3.5,
506
+ };
507
+ const BRAND_PATTERNS = [
508
+ "\\b(apple|iphone|ipad|macbook|airpods?)\\b", "\\b(samsung|galaxy)\\b", "\\b(google|pixel)\\b", "\\b(microsoft|surface|xbox)\\b",
509
+ "\\b(sony|playstation)\\b", "\\b(nvidia|geforce|rtx)\\b", "\\b(amd|ryzen|radeon)\\b", "\\b(intel|core i[3579])\\b",
510
+ "\\b(dell|hp|lenovo|asus|acer)\\b", "\\b(lg|tcl|hisense)\\b", "\\b(laptop|phone|tablet|tv|monitor|headphones?|earbuds?)\\b",
511
+ "\\b(camera|lens|drone)\\b", "\\b(watch|smartwatch|fitbit|garmin)\\b", "\\b(router|modem|wifi)\\b", "\\b(keyboard|mouse|gaming)\\b",
512
+ ];
84
513
 
85
- export default function (api: any) {
86
- // Bridge OpenClaw config fields to env vars expected by search.py
87
- const configEnv: Record<string, string> = {};
88
- const pluginConfig: Record<string, string> = (api.pluginConfig ?? {}) as Record<string, string>;
89
- const configKeyMap: Record<string, string> = {
90
- serperApiKey: "SERPER_API_KEY",
91
- tavilyApiKey: "TAVILY_API_KEY",
92
- queritApiKey: "QUERIT_API_KEY",
93
- exaApiKey: "EXA_API_KEY",
94
- perplexityApiKey: "PERPLEXITY_API_KEY",
95
- kilocodeApiKey: "KILOCODE_API_KEY",
96
- youApiKey: "YOU_API_KEY",
97
- searxngInstanceUrl: "SEARXNG_INSTANCE_URL",
514
+ class QueryAnalyzer {
515
+ calculateSignalScore(query: string, signals: Record<string, number>) {
516
+ const q = query.toLowerCase();
517
+ const matches: any[] = [];
518
+ let total = 0;
519
+ for (const [pattern, weight] of Object.entries(signals)) {
520
+ const regex = new RegExp(pattern, "i");
521
+ const found = q.match(regex);
522
+ if (found) {
523
+ matches.push({ pattern, matched: found[0], weight });
524
+ total += weight;
525
+ }
526
+ }
527
+ return { total, matches };
528
+ }
529
+ detectProductBrandCombo(query: string): number {
530
+ const hasBrand = BRAND_PATTERNS.some((p) => new RegExp(p, "i").test(query));
531
+ const productIndicators = ["\\b(buy|price|specs?|review|vs|compare)\\b", "\\b(pro|max|plus|mini|ultra|lite)\\b", "\\b\\d+\\s*(gb|tb|inch|mm|hz)\\b"];
532
+ const hasProduct = productIndicators.some((p) => new RegExp(p, "i").test(query));
533
+ if (hasBrand && hasProduct) return 3;
534
+ if (hasBrand) return 1.5;
535
+ return 0;
536
+ }
537
+ detectUrl(query: string): string | null {
538
+ const found = query.match(/https?:\/\/[^\s]+|\b\w+\.(com|org|io|ai|co|dev|net|app)\b/i);
539
+ return found?.[0] || null;
540
+ }
541
+ assessQueryComplexity(query: string) {
542
+ const words = query.trim().split(/\s+/).filter(Boolean);
543
+ const wordCount = words.length;
544
+ const questionWords = (query.match(/\b(what|why|how|when|where|which|who|whose|whom)\b/gi) || []).length;
545
+ const clauseMarkers = (query.match(/\b(and|but|or|because|since|while|although|if|when)\b/gi) || []).length;
546
+ let complexityScore = 0;
547
+ if (wordCount > 10) complexityScore += 1.5;
548
+ if (wordCount > 20) complexityScore += 1.0;
549
+ if (questionWords > 1) complexityScore += 1.0;
550
+ if (clauseMarkers > 0) complexityScore += clauseMarkers * 0.5;
551
+ return { word_count: wordCount, question_words: questionWords, clause_markers: clauseMarkers, complexity_score: complexityScore, is_complex: complexityScore > 2 };
552
+ }
553
+ detectRecencyIntent(query: string) {
554
+ const patterns: Array<[RegExp, number]> = [
555
+ [/\b(latest|newest|recent|current)\b/i, 2.5], [/\b(today|yesterday|this week|this month)\b/i, 3],
556
+ [/\b(202[4-9]|2030)\b/i, 2], [/\b(breaking|live|just|now)\b/i, 3], [/\blast (hour|day|week|month)\b/i, 2.5],
557
+ ];
558
+ let total = 0;
559
+ for (const [regex, weight] of patterns) if (regex.test(query)) total += weight;
560
+ return { is_recency_focused: total > 2, score: total };
561
+ }
562
+ analyze(query: string) {
563
+ const shopping = this.calculateSignalScore(query, SHOPPING_SIGNALS);
564
+ const research = this.calculateSignalScore(query, RESEARCH_SIGNALS);
565
+ const discovery = this.calculateSignalScore(query, DISCOVERY_SIGNALS);
566
+ const localNews = this.calculateSignalScore(query, LOCAL_NEWS_SIGNALS);
567
+ const rag = this.calculateSignalScore(query, RAG_SIGNALS);
568
+ const privacy = this.calculateSignalScore(query, PRIVACY_SIGNALS);
569
+ const direct = this.calculateSignalScore(query, DIRECT_ANSWER_SIGNALS);
570
+ const exaDeep = this.calculateSignalScore(query, EXA_DEEP_SIGNALS);
571
+ const exaDeepReasoning = this.calculateSignalScore(query, EXA_DEEP_REASONING_SIGNALS);
572
+
573
+ const brandBonus = this.detectProductBrandCombo(query);
574
+ if (brandBonus > 0) {
575
+ shopping.total += brandBonus;
576
+ shopping.matches.push({ pattern: "product_brand_combo", matched: "brand + product detected", weight: brandBonus });
577
+ }
578
+ const detectedUrl = this.detectUrl(query);
579
+ if (detectedUrl) {
580
+ discovery.total += 5;
581
+ discovery.matches.push({ pattern: "url_detected", matched: detectedUrl, weight: 5 });
582
+ }
583
+ const complexity = this.assessQueryComplexity(query);
584
+ if (complexity.is_complex) {
585
+ research.total += complexity.complexity_score;
586
+ research.matches.push({ pattern: "query_complexity", matched: `complex query (${complexity.word_count} words)`, weight: complexity.complexity_score });
587
+ }
588
+ const recency = this.detectRecencyIntent(query);
589
+
590
+ return {
591
+ detected_url: detectedUrl,
592
+ complexity,
593
+ recency_focused: recency.is_recency_focused,
594
+ recency_score: recency.score,
595
+ exa_deep_score: exaDeep.total,
596
+ exa_deep_reasoning_score: exaDeepReasoning.total,
597
+ provider_scores: {
598
+ serper: shopping.total + localNews.total + recency.score * 0.35,
599
+ tavily: research.total + (complexity.is_complex ? 0 : complexity.complexity_score) + recency.score * 0.2,
600
+ querit: research.total * 0.65 + rag.total * 0.35 + recency.score * 0.45,
601
+ exa: discovery.total + (/(\bsimilar|alternatives?|examples?)\b/i.test(query) ? 1 : 0) + exaDeep.total * 0.5 + exaDeepReasoning.total * 0.5,
602
+ perplexity: direct.total + localNews.total * 0.4 + recency.score * 0.55,
603
+ you: rag.total + recency.score * 0.25,
604
+ searxng: privacy.total,
605
+ },
606
+ provider_matches: {
607
+ serper: [...shopping.matches, ...localNews.matches],
608
+ tavily: research.matches,
609
+ querit: research.matches,
610
+ exa: [...discovery.matches, ...exaDeep.matches, ...exaDeepReasoning.matches],
611
+ perplexity: direct.matches,
612
+ you: rag.matches,
613
+ searxng: privacy.matches,
614
+ },
98
615
  };
99
- for (const [cfgKey, envKey] of Object.entries(configKeyMap)) {
100
- const val = pluginConfig[cfgKey];
101
- if (val && typeof val === "string") configEnv[envKey] = val;
616
+ }
617
+ route(query: string, availableProviders: ProviderName[]) {
618
+ const analysis = this.analyze(query);
619
+ const scores = analysis.provider_scores as Record<ProviderName, number>;
620
+ const available = Object.fromEntries(availableProviders.map((p) => [p, scores[p] ?? 0])) as Record<ProviderName, number>;
621
+ const providers = Object.keys(available) as ProviderName[];
622
+ if (!providers.length) {
623
+ return { provider: "serper" as ProviderName, confidence: 0, confidence_level: "low", reason: "no_available_providers", scores: {}, top_signals: [], exa_depth: "normal" };
624
+ }
625
+ const maxScore = Math.max(...providers.map((p) => available[p]));
626
+ const winners = providers.filter((p) => available[p] === maxScore);
627
+ const priority: ProviderName[] = ["tavily", "querit", "exa", "perplexity", "serper", "you", "searxng"];
628
+ const winner = priority.find((p) => winners.includes(p)) || winners[0];
629
+ const secondBest = [...providers.map((p) => available[p])].sort((a, b) => b - a)[1] || 0;
630
+ const margin = maxScore > 0 ? (maxScore - secondBest) / maxScore : 0;
631
+ const normalizedScore = Math.min(maxScore / 15, 1);
632
+ const confidence = maxScore === 0 ? 0 : Number((normalizedScore * 0.6 + margin * 0.4).toFixed(3));
633
+ let exaDepth: "normal" | "deep" | "deep-reasoning" = "normal";
634
+ if (winner === "exa") {
635
+ if ((analysis.exa_deep_reasoning_score || 0) >= 4) exaDepth = "deep-reasoning";
636
+ else if ((analysis.exa_deep_score || 0) >= 4) exaDepth = "deep";
102
637
  }
638
+ return {
639
+ provider: winner,
640
+ confidence,
641
+ confidence_level: confidence >= 0.7 ? "high" : confidence >= 0.4 ? "medium" : "low",
642
+ reason: maxScore === 0 ? "no_signals_matched" : confidence >= 0.7 ? "high_confidence_match" : confidence >= 0.4 ? "moderate_confidence_match" : "low_confidence_match",
643
+ exa_depth: exaDepth,
644
+ scores: Object.fromEntries(providers.map((p) => [p, Number((available[p] || 0).toFixed(2))])),
645
+ top_signals: (analysis.provider_matches[winner] || []).sort((a: any, b: any) => b.weight - a.weight).slice(0, 5).map((s: any) => ({ matched: s.matched, weight: s.weight })),
646
+ analysis_summary: {
647
+ query_length: query.trim().split(/\s+/).filter(Boolean).length,
648
+ is_complex: analysis.complexity.is_complex,
649
+ has_url: !!analysis.detected_url,
650
+ recency_focused: analysis.recency_focused,
651
+ },
652
+ };
653
+ }
654
+ }
103
655
 
104
- api.registerTool(
105
- {
106
- name: "web_search_plus",
107
- description:
108
- "Search the web using multi-provider intelligent routing (Serper/Google, Tavily/Research, Querit/Multilingual AI Search, Exa/Neural+Deep, Perplexity, You.com, SearXNG). Automatically selects the best provider based on query intent. Use for ALL web searches. Set depth='deep' for multi-source synthesis, 'deep-reasoning' for complex cross-document analysis.",
109
- parameters: Type.Object({
110
- query: Type.String({ description: "Search query" }),
111
- provider: Type.Optional(
112
- Type.Union(
113
- [
114
- Type.Literal("serper"),
115
- Type.Literal("tavily"),
116
- Type.Literal("querit"),
117
- Type.Literal("exa"),
118
- Type.Literal("perplexity"),
119
- Type.Literal("you"),
120
- Type.Literal("searxng"),
121
- Type.Literal("auto"),
122
- ],
123
- {
124
- description:
125
- "Force a specific provider, or 'auto' for smart routing (default: auto)",
126
- },
127
- ),
128
- ),
129
- count: Type.Optional(
130
- Type.Number({ description: "Number of results (default: 5)" }),
131
- ),
132
- depth: Type.Optional(
133
- Type.Union(
134
- [
135
- Type.Literal("normal"),
136
- Type.Literal("deep"),
137
- Type.Literal("deep-reasoning"),
138
- ],
139
- {
140
- description:
141
- "Exa search depth: 'deep' synthesizes across sources (4-12s), 'deep-reasoning' for complex cross-reference analysis (12-50s). When provider is auto, depth may be auto-selected based on query complexity.",
142
- },
143
- ),
144
- ),
145
- time_range: Type.Optional(
146
- Type.Union(
147
- [
148
- Type.Literal("day"),
149
- Type.Literal("week"),
150
- Type.Literal("month"),
151
- Type.Literal("year"),
152
- ],
153
- {
154
- description:
155
- "Filter results by recency. Applies to Serper (as tbs), Perplexity (as search_recency_filter), Tavily/You.com (as freshness). Useful for news and current events.",
156
- },
157
- ),
158
- ),
159
- include_domains: Type.Optional(
160
- Type.Array(Type.String(), {
161
- description:
162
- "Only include results from these domains (e.g. ['arxiv.org', 'github.com']). Supported by Tavily and Exa.",
163
- }),
164
- ),
165
- exclude_domains: Type.Optional(
166
- Type.Array(Type.String(), {
167
- description:
168
- "Exclude results from these domains (e.g. ['reddit.com', 'pinterest.com']). Supported by Tavily and Exa.",
169
- }),
170
- ),
171
- }),
172
- async execute(
173
- _id: string,
174
- params: {
175
- query: string;
176
- provider?: string;
177
- count?: number;
178
- depth?: string;
179
- time_range?: string;
180
- include_domains?: string[];
181
- exclude_domains?: string[];
182
- },
183
- ) {
184
- if (!fs.existsSync(scriptPath)) {
185
- return {
186
- content: [{ type: "text", text: `Search failed: script not found at ${scriptPath}` }],
187
- };
188
- }
656
+ async function searchSerper(query: string, apiKey: string, maxResults: number, timeRange?: string): Promise<SearchResponse> {
657
+ const body: Json = { q: query, gl: "us", hl: "en", num: maxResults, autocorrect: true };
658
+ const tbsMap: Record<string, string> = { day: "qdr:d", week: "qdr:w", month: "qdr:m", year: "qdr:y" };
659
+ if (timeRange && tbsMap[timeRange]) body.tbs = tbsMap[timeRange];
660
+ const data = await httpJson("https://google.serper.dev/search", { method: "POST", headers: { "X-API-KEY": apiKey, "Content-Type": "application/json" }, body: JSON.stringify(body) });
661
+ const results = (data.organic || []).slice(0, maxResults).map((item: any, i: number) => ({ title: item.title || "", url: item.link || "", snippet: item.snippet || "", score: Number((1 - i * 0.1).toFixed(2)), date: item.date }));
662
+ const answer = data?.answerBox?.answer || data?.answerBox?.snippet || data?.knowledgeGraph?.description || results[0]?.snippet || "";
663
+ return { provider: "serper", query, results, images: [], answer, knowledge_graph: data.knowledgeGraph, related_searches: (data.relatedSearches || []).map((r: any) => r.query) };
664
+ }
665
+
666
+ async function searchTavily(query: string, apiKey: string, maxResults: number, includeDomains?: string[], excludeDomains?: string[]): Promise<SearchResponse> {
667
+ const body: Json = { api_key: apiKey, query, max_results: maxResults, search_depth: "basic", topic: "general", include_images: false, include_answer: true, include_raw_content: false };
668
+ if (includeDomains?.length) body.include_domains = includeDomains;
669
+ if (excludeDomains?.length) body.exclude_domains = excludeDomains;
670
+ const data = await httpJson("https://api.tavily.com/search", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body) });
671
+ const results = (data.results || []).slice(0, maxResults).map((item: any) => ({ title: item.title || "", url: item.url || "", snippet: item.content || "", score: Number((item.score || 0).toFixed(3)) }));
672
+ return { provider: "tavily", query, results, images: data.images || [], answer: data.answer || "" };
673
+ }
189
674
 
190
- const args = [scriptPath, "--query", params.query, "--compact"];
675
+ async function searchQuerit(query: string, apiKey: string, maxResults: number, timeRange?: string, includeDomains?: string[], excludeDomains?: string[]): Promise<SearchResponse> {
676
+ const timeMap: Record<string, string> = { day: "d1", week: "w1", month: "m1", year: "y1" };
677
+ const filters: Json = { languages: { include: ["en"] }, geo: { countries: { include: ["US"] } } };
678
+ if (includeDomains?.length || excludeDomains?.length) {
679
+ filters.sites = {};
680
+ if (includeDomains?.length) filters.sites.include = includeDomains;
681
+ if (excludeDomains?.length) filters.sites.exclude = excludeDomains;
682
+ }
683
+ if (timeRange && timeMap[timeRange]) filters.timeRange = { date: timeMap[timeRange] };
684
+ const body: Json = { query, count: maxResults, filters };
685
+ const data = await httpJson("https://api.querit.ai/v1/search", { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" }, body: JSON.stringify(body) });
686
+ if (data.error_msg || (data.error_code != null && ![0, 200].includes(data.error_code))) throw new ProviderRequestError(data.error_msg || `Querit request failed with error_code=${data.error_code}`);
687
+ const raw = data?.results?.result || [];
688
+ const results = raw.slice(0, maxResults).map((item: any, i: number) => ({ title: item.title || titleFromUrl(item.url || ""), url: item.url || "", snippet: item.snippet || item.page_age || "", score: Number((1 - i * 0.05).toFixed(3)), page_time: item.page_time, date: item.page_age, language: item.language }));
689
+ return { provider: "querit", query, results, images: [], answer: results[0]?.snippet || "", metadata: { search_id: data.search_id, time_range: timeRange && timeMap[timeRange] } };
690
+ }
191
691
 
192
- if (params.provider && params.provider !== "auto") {
193
- args.push("--provider", params.provider);
194
- }
692
+ async function searchExa(query: string, apiKey: string, maxResults: number, exaDepth: "normal" | "deep" | "deep-reasoning", includeDomains?: string[], excludeDomains?: string[]): Promise<SearchResponse> {
693
+ const isDeep = exaDepth === "deep" || exaDepth === "deep-reasoning";
694
+ const body: Json = isDeep
695
+ ? { query, numResults: maxResults, type: exaDepth, contents: { text: { maxCharacters: 5000, verbosity: "full" } } }
696
+ : { query, numResults: maxResults, type: "neural", contents: { text: { maxCharacters: 2000, verbosity: "standard" }, highlights: { numSentences: 3, highlightsPerUrl: 2 } } };
697
+ if (includeDomains?.length) body.includeDomains = includeDomains;
698
+ if (excludeDomains?.length) body.excludeDomains = excludeDomains;
699
+ const data = await httpJson("https://api.exa.ai/search", { method: "POST", headers: { "x-api-key": apiKey, "Content-Type": "application/json" }, body: JSON.stringify(body) }, isDeep ? 55000 : 30000);
195
700
 
196
- if (typeof params.count === "number" && Number.isFinite(params.count)) {
197
- args.push("--max-results", String(Math.max(1, Math.floor(params.count))));
198
- }
701
+ if (isDeep) {
702
+ const deepOutput = data.output || {};
703
+ const synthesis = typeof deepOutput.content === "string" ? deepOutput.content : deepOutput.content ? JSON.stringify(deepOutput.content) : "";
704
+ const grounding: any[] = [];
705
+ for (const field of deepOutput.grounding || []) {
706
+ for (const cite of field.citations || []) grounding.push({ url: cite.url || "", title: cite.title || "", confidence: field.confidence, field: field.field });
707
+ }
708
+ const results: SearchResult[] = [];
709
+ if (synthesis) results.push({ title: `Exa ${exaDepth.replace(/-/g, " ")} synthesis`, url: "", snippet: synthesis, full_synthesis: synthesis, score: 1, grounding: grounding.slice(0, 10), type: "synthesis" });
710
+ for (const item of (data.results || []).slice(0, maxResults)) {
711
+ const snippet = item.text ? String(item.text).slice(0, 800) : (item.highlights || [])[0] || "";
712
+ results.push({ title: item.title || "", url: item.url || "", snippet, score: Number((item.score || 0).toFixed(3)), published_date: item.publishedDate, author: item.author, type: "source" });
713
+ }
714
+ return { provider: "exa", query, exa_depth: exaDepth, results, images: [], answer: synthesis || results[1]?.snippet || "", grounding, metadata: { synthesis_length: synthesis.length, source_count: (data.results || []).length } };
715
+ }
716
+
717
+ const results = (data.results || []).slice(0, maxResults).map((item: any) => ({ title: item.title || "", url: item.url || "", snippet: item.text ? String(item.text).slice(0, 800) : Array.isArray(item.highlights) ? item.highlights.slice(0, 2).join(" ... ") : "", score: Number((item.score || 0).toFixed(3)), published_date: item.publishedDate, author: item.author }));
718
+ return { provider: "exa", query, results, images: [], answer: results[0]?.snippet || "" };
719
+ }
720
+
721
+ async function searchPerplexity(query: string, apiKey: string, maxResults: number, timeRange?: string): Promise<SearchResponse> {
722
+ const body: Json = {
723
+ model: "perplexity/sonar-pro",
724
+ messages: [
725
+ { role: "system", content: "Answer with concise factual summary and include source URLs." },
726
+ { role: "user", content: query },
727
+ ],
728
+ temperature: 0.2,
729
+ };
730
+ if (timeRange) body.search_recency_filter = timeRange;
731
+ const data = await httpJson("https://api.kilo.ai/api/gateway/chat/completions", { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" }, body: JSON.stringify(body) });
732
+ const answer = String(data?.choices?.[0]?.message?.content || "").trim();
733
+ let citations = Array.isArray(data?.citations) ? data.citations : [];
734
+ if (!citations.length) {
735
+ const matches = answer.match(/https?:\/\/[^\s)\]}>"']+/g) || [];
736
+ citations = [...new Set(matches)];
737
+ }
738
+ const results: SearchResult[] = [];
739
+ if (answer) results.push({ title: `Perplexity Answer: ${query.slice(0, 80)}`, url: "https://www.perplexity.ai", snippet: answer.replace(/\[\d+\]/g, "").trim().slice(0, 500), score: 1.0 });
740
+ for (const [i, citation] of citations.slice(0, Math.max(0, maxResults - 1)).entries()) {
741
+ const url = typeof citation === "string" ? citation : citation?.url || "";
742
+ const title = typeof citation === "string" ? titleFromUrl(url) : citation?.title || titleFromUrl(url);
743
+ results.push({ title, url, snippet: `Source cited in Perplexity answer [citation ${i + 1}]`, score: Number((0.9 - i * 0.1).toFixed(3)) });
744
+ }
745
+ return { provider: "perplexity", query, results, images: [], answer, metadata: { model: body.model, usage: data.usage || {} } };
746
+ }
747
+
748
+ async function searchYou(query: string, apiKey: string, maxResults: number, timeRange?: string): Promise<SearchResponse> {
749
+ const url = new URL("https://ydc-index.io/v1/search");
750
+ url.searchParams.set("query", query);
751
+ url.searchParams.set("count", String(maxResults));
752
+ url.searchParams.set("safesearch", "moderate");
753
+ url.searchParams.set("country", "US");
754
+ url.searchParams.set("language", "EN");
755
+ if (timeRange) url.searchParams.set("freshness", timeRange);
756
+ const data = await httpJson(url.toString(), { method: "GET", headers: { "X-API-KEY": apiKey, Accept: "application/json" } });
757
+ const web = data?.results?.web || [];
758
+ const news = data?.results?.news || [];
759
+ const results = web.slice(0, maxResults).map((item: any, i: number) => ({ title: item.title || "", url: item.url || "", snippet: item?.snippets?.[0] || item.description || "", score: Number((1 - i * 0.05).toFixed(3)), date: item.page_age, source: "web", additional_snippets: Array.isArray(item.snippets) ? item.snippets.slice(1, 3) : undefined, thumbnail: item.thumbnail_url, favicon: item.favicon_url }));
760
+ const answer = results.slice(0, 3).map((r) => r.snippet).filter(Boolean).join(" ").slice(0, 1000);
761
+ return { provider: "you", query, results, news: news.slice(0, 5), images: [], answer, metadata: { search_uuid: data?.metadata?.search_uuid, latency: data?.metadata?.latency } };
762
+ }
763
+
764
+ async function searchSearxng(query: string, instanceUrl: string, maxResults: number, timeRange: string | undefined, env: Record<string, string>): Promise<SearchResponse> {
765
+ const base = await validateSearxngUrl(instanceUrl, env);
766
+ const url = new URL(`${base}/search`);
767
+ url.searchParams.set("q", query);
768
+ url.searchParams.set("format", "json");
769
+ url.searchParams.set("language", "en");
770
+ url.searchParams.set("safesearch", "0");
771
+ if (timeRange) url.searchParams.set("time_range", timeRange);
772
+ const data = await httpJson(url.toString(), { method: "GET", headers: { Accept: "application/json" } });
773
+ const enginesUsed = new Set<string>();
774
+ const results = (data.results || []).slice(0, maxResults).map((item: any, i: number) => {
775
+ enginesUsed.add(item.engine || "unknown");
776
+ return { title: item.title || "", url: item.url || "", snippet: item.content || "", score: Number((item.score ?? (1 - i * 0.05)).toFixed(3)), engine: item.engine || "unknown", category: item.category || "general", date: item.publishedDate };
777
+ });
778
+ const answer = Array.isArray(data.answers) && data.answers[0] ? String(data.answers[0]) : Array.isArray(data.infoboxes) && data.infoboxes[0] ? String(data.infoboxes[0].content || data.infoboxes[0].infobox || "") : results[0]?.snippet || "";
779
+ return { provider: "searxng", query, results, images: [], answer, suggestions: data.suggestions || [], corrections: data.corrections || [], metadata: { number_of_results: data.number_of_results, engines_used: [...enginesUsed], instance_url: base } };
780
+ }
781
+
782
+ async function executeWithRetry(fn: () => Promise<SearchResponse>): Promise<SearchResponse> {
783
+ let lastError: any;
784
+ for (let attempt = 0; attempt < RETRY_BACKOFF_MS.length; attempt += 1) {
785
+ try {
786
+ return await fn();
787
+ } catch (error: any) {
788
+ lastError = error;
789
+ if (!(error instanceof ProviderRequestError) || !error.transient || error.statusCode === 401 || error.statusCode === 403) break;
790
+ if (attempt < RETRY_BACKOFF_MS.length - 1) await sleep(RETRY_BACKOFF_MS[attempt]);
791
+ }
792
+ }
793
+ throw lastError;
794
+ }
795
+
796
+ export default function (api: any) {
797
+ const pluginConfig: Record<string, string> = (api.pluginConfig ?? {}) as Record<string, string>;
798
+ const runtimeEnv = getRuntimeEnv(pluginConfig);
799
+
800
+ api.registerTool(
801
+ {
802
+ name: "web_search_plus",
803
+ description:
804
+ "Search the web with intelligent multi-provider routing across Serper, Tavily, Querit, Exa, Perplexity, You.com, and SearXNG. Auto-selects the best provider, caches results, retries transient failures, and falls back across providers.",
805
+ parameters: PARAMETERS_SCHEMA,
806
+ async execute(_id: string, params: ToolParams) {
807
+ try {
808
+ const query = String(params.query || "").trim();
809
+ if (!query) return { content: [{ type: "text", text: "Search failed: query is required" }] };
810
+
811
+ const count = Math.max(1, Math.min(10, Math.floor(Number(params.count || 5))));
812
+ const requestedProvider = (params.provider || "auto") as ProviderName | "auto";
813
+ const timeRange = toTimeRange(params.time_range);
814
+ const includeDomains = Array.isArray(params.include_domains) ? params.include_domains.filter(Boolean) : undefined;
815
+ const excludeDomains = Array.isArray(params.exclude_domains) ? params.exclude_domains.filter(Boolean) : undefined;
199
816
 
200
- if (params.depth && params.depth !== "normal") {
201
- args.push("--exa-depth", params.depth);
817
+ const allProviders: ProviderName[] = ["serper", "tavily", "querit", "exa", "perplexity", "you", "searxng"];
818
+ const configuredProviders = allProviders.filter((p) => !!getApiKey(p, runtimeEnv));
819
+
820
+ let routingInfo: Json;
821
+ let provider: ProviderName;
822
+ if (requestedProvider === "auto") {
823
+ const analyzer = new QueryAnalyzer();
824
+ const routing = analyzer.route(query, configuredProviders);
825
+ provider = routing.provider;
826
+ routingInfo = { auto_routed: true, provider, confidence: routing.confidence, confidence_level: routing.confidence_level, reason: routing.reason, top_signals: routing.top_signals, scores: routing.scores, exa_depth: routing.exa_depth };
827
+ } else {
828
+ provider = requestedProvider;
829
+ routingInfo = { auto_routed: false, provider };
202
830
  }
203
831
 
204
- if (params.time_range) {
205
- args.push("--time-range", params.time_range);
206
- args.push("--freshness", params.time_range);
832
+ const priority: ProviderName[] = ["tavily", "querit", "exa", "perplexity", "serper", "you", "searxng"];
833
+ const providersToTry: ProviderName[] = [provider, ...priority.filter((p) => p !== provider && configuredProviders.includes(p))];
834
+ const eligibleProviders: ProviderName[] = [];
835
+ const cooldownSkips: Json[] = [];
836
+ for (const p of providersToTry) {
837
+ const cooldown = providerInCooldown(p);
838
+ if (cooldown.inCooldown) cooldownSkips.push({ provider: p, cooldown_remaining_seconds: cooldown.remaining });
839
+ else eligibleProviders.push(p);
207
840
  }
841
+ if (!eligibleProviders.length) eligibleProviders.push(provider);
842
+
843
+ const cacheContext = {
844
+ time_range: timeRange,
845
+ include_domains: includeDomains ? [...includeDomains].sort() : null,
846
+ exclude_domains: excludeDomains ? [...excludeDomains].sort() : null,
847
+ exa_depth: params.depth || routingInfo.exa_depth || "normal",
848
+ };
208
849
 
209
- if (params.include_domains?.length) {
210
- args.push("--include-domains", ...params.include_domains);
850
+ const cached = cacheGet(query, provider, count, DEFAULT_CACHE_TTL, cacheContext);
851
+ if (cached) {
852
+ const result = { ...cached };
853
+ for (const key of Object.keys(result)) if (key.startsWith("_cache_")) delete result[key];
854
+ result.cached = true;
855
+ result.cache_age_seconds = Math.floor(Date.now() / 1000 - Number(cached._cache_timestamp || 0));
856
+ result.routing = { ...routingInfo, ...(cooldownSkips.length ? { cooldown_skips: cooldownSkips } : {}) };
857
+ return { content: [{ type: "text", text: JSON.stringify(sanitizeOutput(result)) }] };
211
858
  }
212
859
 
213
- if (params.exclude_domains?.length) {
214
- args.push("--exclude-domains", ...params.exclude_domains);
860
+ const errors: Json[] = [];
861
+ const successes: Array<[string, SearchResponse]> = [];
862
+
863
+ const runProvider = async (p: ProviderName): Promise<SearchResponse> => {
864
+ const key = validateApiKey(p, runtimeEnv);
865
+ if (p === "serper") return searchSerper(query, key, count, timeRange);
866
+ if (p === "tavily") return searchTavily(query, key, count, includeDomains, excludeDomains);
867
+ if (p === "querit") return searchQuerit(query, key, count, timeRange, includeDomains, excludeDomains);
868
+ if (p === "exa") {
869
+ const exaDepth = (params.depth || routingInfo.exa_depth || "normal") as "normal" | "deep" | "deep-reasoning";
870
+ return searchExa(query, key, count, exaDepth, includeDomains, excludeDomains);
871
+ }
872
+ if (p === "perplexity") return searchPerplexity(query, key, count, timeRange);
873
+ if (p === "you") return searchYou(query, key, count, timeRange);
874
+ return searchSearxng(query, key, count, timeRange, runtimeEnv);
875
+ };
876
+
877
+ for (const p of eligibleProviders) {
878
+ try {
879
+ const result = await executeWithRetry(() => runProvider(p));
880
+ resetProviderHealth(p);
881
+ successes.push([p, result]);
882
+ if ((result.results || []).length >= count || errors.length === 0) break;
883
+ } catch (error: any) {
884
+ const message = sanitizeOutput(String(error?.message || error));
885
+ const cooldown = markProviderFailure(p, message);
886
+ errors.push({ provider: p, error: message, cooldown_seconds: cooldown.cooldown_seconds });
887
+ }
215
888
  }
216
889
 
217
- const envPaths = [
218
- path.join(PLUGIN_DIR, ".env"),
219
- path.join(PLUGIN_DIR, "..", "web-search-plus", ".env"),
220
- ];
221
- const fileEnv: Record<string, string> = {};
222
- for (const envPath of envPaths) {
223
- Object.assign(fileEnv, loadEnvFile(envPath));
890
+ if (!successes.length) {
891
+ return { content: [{ type: "text", text: JSON.stringify(sanitizeOutput({ error: "All providers failed", provider, query, routing: routingInfo, provider_errors: errors, cooldown_skips: cooldownSkips })) }] };
224
892
  }
225
- const childEnv = { ...process.env, ...configEnv, ...fileEnv };
226
893
 
227
- const result = await runPython(args, childEnv, 75000);
894
+ let result: SearchResponse;
895
+ if (successes.length === 1) {
896
+ result = successes[0][1];
897
+ } else {
898
+ result = { ...successes[0][1] };
899
+ const deduped = deduplicateResultsAcrossProviders(successes, count);
900
+ result.results = deduped.results;
901
+ result.deduplicated = deduped.dedupCount > 0;
902
+ result.metadata = { ...(result.metadata || {}), dedup_count: deduped.dedupCount, providers_merged: successes.map(([p]) => p) };
903
+ }
228
904
 
229
- if (result.code !== 0) {
230
- const stderr = sanitizeOutput(result.stderr.trim()) || "Unknown error";
231
- return {
232
- content: [{ type: "text", text: `Search failed (exit ${result.code}): ${stderr}` }],
233
- };
905
+ const successfulProvider = successes[0][0] as ProviderName;
906
+ if (successfulProvider !== provider) {
907
+ routingInfo = { ...routingInfo, fallback_used: true, original_provider: provider, provider: successfulProvider, fallback_errors: errors };
234
908
  }
909
+ if (cooldownSkips.length) routingInfo.cooldown_skips = cooldownSkips;
910
+ result.routing = routingInfo;
911
+ result.cached = false;
912
+ if (!(result as any).metadata) result.metadata = {};
913
+ if ((result as any).deduplicated == null) (result as any).deduplicated = false;
914
+ if ((result.metadata as any).dedup_count == null) (result.metadata as any).dedup_count = 0;
235
915
 
236
- return {
237
- content: [{ type: "text", text: sanitizeOutput(result.stdout.trim()) || "{}" }],
238
- };
239
- },
916
+ cachePut(query, successfulProvider, count, result, cacheContext);
917
+
918
+ return { content: [{ type: "text", text: JSON.stringify(sanitizeOutput(result)) }] };
919
+ } catch (error: any) {
920
+ return { content: [{ type: "text", text: `Search failed: ${sanitizeOutput(String(error?.message || error))}` }] };
921
+ }
240
922
  },
241
- { optional: true },
242
- );
923
+ },
924
+ { optional: true },
925
+ );
243
926
  }