web-search-plus-plugin 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -33
- package/index.ts +848 -135
- package/openclaw.plugin.json +1 -2
- package/package.json +1 -2
- package/scripts/search.py +0 -2940
- package/scripts/setup.py +0 -463
package/index.ts
CHANGED
|
@@ -1,84 +1,31 @@
|
|
|
1
|
-
import
|
|
1
|
+
import crypto from "crypto";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import path from "path";
|
|
4
4
|
import { fileURLToPath } from "url";
|
|
5
|
+
import dns from "dns/promises";
|
|
6
|
+
import net from "net";
|
|
5
7
|
|
|
6
8
|
function getPluginDir(): string {
|
|
9
|
+
// When OpenClaw transpiles plugins, import.meta.url may point to a temp dir.
|
|
10
|
+
// Check for the known extension path first.
|
|
11
|
+
const knownPath = path.join(process.env.HOME || "/root", ".openclaw", "extensions", "web-search-plus-plugin");
|
|
12
|
+
if (fs.existsSync(path.join(knownPath, "package.json"))) return knownPath;
|
|
7
13
|
try {
|
|
8
14
|
if (typeof __dirname !== "undefined") return __dirname;
|
|
9
15
|
} catch {}
|
|
10
16
|
try {
|
|
11
17
|
return path.dirname(fileURLToPath(import.meta.url));
|
|
12
18
|
} catch {}
|
|
13
|
-
return
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
const SENSITIVE_PATTERN = /(?:key|token|secret|password|api[_-]?key)\s*[=:]\s*\S+/gi;
|
|
17
|
-
|
|
18
|
-
function sanitizeOutput(text: string): string {
|
|
19
|
-
return text.replace(SENSITIVE_PATTERN, "[REDACTED]");
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
function loadEnvFile(envPath: string): Record<string, string> {
|
|
23
|
-
if (!fs.existsSync(envPath)) return {};
|
|
24
|
-
const env: Record<string, string> = {};
|
|
25
|
-
const lines = fs.readFileSync(envPath, "utf8").split("\n");
|
|
26
|
-
for (const line of lines) {
|
|
27
|
-
const trimmed = line.trim();
|
|
28
|
-
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
29
|
-
const stripped = trimmed.startsWith("export ") ? trimmed.slice(7) : trimmed;
|
|
30
|
-
const eqIdx = stripped.indexOf("=");
|
|
31
|
-
if (eqIdx < 0) continue;
|
|
32
|
-
const key = stripped.slice(0, eqIdx).trim();
|
|
33
|
-
const val = stripped.slice(eqIdx + 1).trim().replace(/^['"]|['"]$/g, "");
|
|
34
|
-
if (key) env[key] = val;
|
|
35
|
-
}
|
|
36
|
-
return env;
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
function runPython(
|
|
40
|
-
args: string[],
|
|
41
|
-
env: NodeJS.ProcessEnv,
|
|
42
|
-
timeoutMs: number,
|
|
43
|
-
): Promise<{ stdout: string; stderr: string; code: number }> {
|
|
44
|
-
return new Promise((resolve) => {
|
|
45
|
-
const child = spawn("python3", args, { env, shell: false });
|
|
46
|
-
let stdout = "";
|
|
47
|
-
let stderr = "";
|
|
48
|
-
let settled = false;
|
|
49
|
-
|
|
50
|
-
const timer = setTimeout(() => {
|
|
51
|
-
if (!settled) {
|
|
52
|
-
settled = true;
|
|
53
|
-
child.kill();
|
|
54
|
-
resolve({ stdout: "", stderr: "Search timed out", code: 1 });
|
|
55
|
-
}
|
|
56
|
-
}, timeoutMs);
|
|
57
|
-
|
|
58
|
-
child.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
|
|
59
|
-
child.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
|
|
60
|
-
|
|
61
|
-
child.on("close", (code: number | null) => {
|
|
62
|
-
if (!settled) {
|
|
63
|
-
settled = true;
|
|
64
|
-
clearTimeout(timer);
|
|
65
|
-
resolve({ stdout, stderr, code: code ?? 1 });
|
|
66
|
-
}
|
|
67
|
-
});
|
|
68
|
-
|
|
69
|
-
child.on("error", (err: Error) => {
|
|
70
|
-
if (!settled) {
|
|
71
|
-
settled = true;
|
|
72
|
-
clearTimeout(timer);
|
|
73
|
-
const safeMsg = (err as any).code === "ENOENT" ? "python3 not found" : "Process error";
|
|
74
|
-
resolve({ stdout: "", stderr: safeMsg, code: 1 });
|
|
75
|
-
}
|
|
76
|
-
});
|
|
77
|
-
});
|
|
19
|
+
return process.cwd();
|
|
78
20
|
}
|
|
79
21
|
|
|
80
22
|
const PLUGIN_DIR = getPluginDir();
|
|
81
|
-
const
|
|
23
|
+
const CACHE_DIR = path.join(PLUGIN_DIR, ".cache");
|
|
24
|
+
const PROVIDER_HEALTH_FILE = path.join(CACHE_DIR, "provider_health.json");
|
|
25
|
+
const DEFAULT_CACHE_TTL = 3600;
|
|
26
|
+
const RETRY_BACKOFF_MS = [1000, 3000, 9000];
|
|
27
|
+
const COOLDOWN_STEPS_SECONDS = [60, 300, 1500, 3600];
|
|
28
|
+
const TRANSIENT_HTTP_CODES = new Set([408, 425, 429, 500, 502, 503, 504]);
|
|
82
29
|
|
|
83
30
|
const PARAMETERS_SCHEMA = {
|
|
84
31
|
type: "object",
|
|
@@ -88,36 +35,260 @@ const PARAMETERS_SCHEMA = {
|
|
|
88
35
|
provider: {
|
|
89
36
|
type: "string",
|
|
90
37
|
enum: ["serper", "tavily", "querit", "exa", "perplexity", "you", "searxng", "auto"],
|
|
91
|
-
description: "Force a
|
|
38
|
+
description: "Force a provider, or use auto routing (default: auto)",
|
|
92
39
|
},
|
|
93
40
|
count: { type: "number", description: "Number of results (default: 5)" },
|
|
94
41
|
depth: {
|
|
95
42
|
type: "string",
|
|
96
43
|
enum: ["normal", "deep", "deep-reasoning"],
|
|
97
|
-
description: "Exa
|
|
44
|
+
description: "Exa depth when using Exa or when auto-routing chooses Exa.",
|
|
98
45
|
},
|
|
99
46
|
time_range: {
|
|
100
47
|
type: "string",
|
|
101
48
|
enum: ["day", "week", "month", "year"],
|
|
102
|
-
description: "
|
|
49
|
+
description: "Recency filter where supported.",
|
|
103
50
|
},
|
|
104
51
|
include_domains: {
|
|
105
52
|
type: "array",
|
|
106
53
|
items: { type: "string" },
|
|
107
|
-
description: "Only include results from these domains (
|
|
54
|
+
description: "Only include results from these domains (Tavily, Exa, Querit where supported).",
|
|
108
55
|
},
|
|
109
56
|
exclude_domains: {
|
|
110
57
|
type: "array",
|
|
111
58
|
items: { type: "string" },
|
|
112
|
-
description: "Exclude results from these domains (
|
|
59
|
+
description: "Exclude results from these domains (Tavily, Exa, Querit where supported).",
|
|
113
60
|
},
|
|
114
61
|
},
|
|
115
62
|
};
|
|
116
63
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
64
|
+
type Json = Record<string, any>;
|
|
65
|
+
type ProviderName = "serper" | "tavily" | "querit" | "exa" | "perplexity" | "you" | "searxng";
|
|
66
|
+
type ToolParams = {
|
|
67
|
+
query: string;
|
|
68
|
+
provider?: ProviderName | "auto";
|
|
69
|
+
count?: number;
|
|
70
|
+
depth?: "normal" | "deep" | "deep-reasoning";
|
|
71
|
+
time_range?: "day" | "week" | "month" | "year";
|
|
72
|
+
include_domains?: string[];
|
|
73
|
+
exclude_domains?: string[];
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
type SearchResult = {
|
|
77
|
+
title: string;
|
|
78
|
+
url: string;
|
|
79
|
+
snippet: string;
|
|
80
|
+
score?: number;
|
|
81
|
+
[key: string]: any;
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
type SearchResponse = {
|
|
85
|
+
provider: string;
|
|
86
|
+
query: string;
|
|
87
|
+
results: SearchResult[];
|
|
88
|
+
images?: string[];
|
|
89
|
+
answer?: string;
|
|
90
|
+
metadata?: Json;
|
|
91
|
+
[key: string]: any;
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
class ProviderConfigError extends Error {}
|
|
95
|
+
class ProviderRequestError extends Error {
|
|
96
|
+
statusCode?: number;
|
|
97
|
+
transient: boolean;
|
|
98
|
+
constructor(message: string, statusCode?: number, transient = false) {
|
|
99
|
+
super(message);
|
|
100
|
+
this.name = "ProviderRequestError";
|
|
101
|
+
this.statusCode = statusCode;
|
|
102
|
+
this.transient = transient;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const SENSITIVE_PATTERNS: RegExp[] = [
|
|
107
|
+
/\b(?:sk|pk|rk|api|tok)_[A-Za-z0-9\-_]{10,}\b/g,
|
|
108
|
+
/\bBearer\s+[A-Za-z0-9\-._~+/]+=*\b/gi,
|
|
109
|
+
/\b(?:key|token|secret|password|api[_-]?key)\s*[:=]\s*[^\s,"'}]+/gi,
|
|
110
|
+
/([?&](?:api[_-]?key|key|token|access[_-]?token|auth|authorization)=)([^&#\s]+)/gi,
|
|
111
|
+
/\b[A-Za-z0-9_-]{24,}\.[A-Za-z0-9_-]{16,}\.[A-Za-z0-9_-]{16,}\b/g,
|
|
112
|
+
];
|
|
113
|
+
|
|
114
|
+
function sanitizeOutput(input: any): any {
|
|
115
|
+
if (typeof input === "string") {
|
|
116
|
+
let out = input;
|
|
117
|
+
for (const pattern of SENSITIVE_PATTERNS) {
|
|
118
|
+
out = out.replace(pattern, (_m, p1) => (p1 ? `${p1}[REDACTED]` : "[REDACTED]"));
|
|
119
|
+
}
|
|
120
|
+
return out;
|
|
121
|
+
}
|
|
122
|
+
if (Array.isArray(input)) return input.map((v) => sanitizeOutput(v));
|
|
123
|
+
if (input && typeof input === "object") {
|
|
124
|
+
const result: any = {};
|
|
125
|
+
for (const [k, v] of Object.entries(input)) {
|
|
126
|
+
if (/(?:api[_-]?key|token|secret|password|authorization)/i.test(k)) {
|
|
127
|
+
result[k] = "[REDACTED]";
|
|
128
|
+
} else {
|
|
129
|
+
result[k] = sanitizeOutput(v);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return result;
|
|
133
|
+
}
|
|
134
|
+
return input;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function ensureDir(dir: string): void {
|
|
138
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function sleep(ms: number): Promise<void> {
|
|
142
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function readJsonFile(file: string, fallback: any): any {
|
|
146
|
+
try {
|
|
147
|
+
return JSON.parse(fs.readFileSync(file, "utf8"));
|
|
148
|
+
} catch {
|
|
149
|
+
return fallback;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function writeJsonFile(file: string, value: any): void {
|
|
154
|
+
ensureDir(path.dirname(file));
|
|
155
|
+
fs.writeFileSync(file, JSON.stringify(value, null, 2), "utf8");
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function sha256(input: string): string {
|
|
159
|
+
return crypto.createHash("sha256").update(input).digest("hex");
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function buildCacheKey(query: string, provider: string, maxResults: number, params?: Json): string {
|
|
163
|
+
return sha256(JSON.stringify({ query, provider, maxResults, ...(params || {}) }, Object.keys({ query, provider, maxResults, ...(params || {}) }).sort())).slice(0, 32);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
function getCachePath(cacheKey: string): string {
|
|
167
|
+
return path.join(CACHE_DIR, `${cacheKey}.json`);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function cacheGet(query: string, provider: string, maxResults: number, ttl: number, params?: Json): any | null {
|
|
171
|
+
const key = buildCacheKey(query, provider, maxResults, params);
|
|
172
|
+
const file = getCachePath(key);
|
|
173
|
+
try {
|
|
174
|
+
const cached = JSON.parse(fs.readFileSync(file, "utf8"));
|
|
175
|
+
const ts = Number(cached._cache_timestamp || 0);
|
|
176
|
+
if (!ts || Date.now() / 1000 - ts > ttl) {
|
|
177
|
+
try { fs.unlinkSync(file); } catch {}
|
|
178
|
+
return null;
|
|
179
|
+
}
|
|
180
|
+
return cached;
|
|
181
|
+
} catch {
|
|
182
|
+
try { fs.unlinkSync(file); } catch {}
|
|
183
|
+
return null;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function cachePut(query: string, provider: string, maxResults: number, result: any, params?: Json): void {
|
|
188
|
+
ensureDir(CACHE_DIR);
|
|
189
|
+
const key = buildCacheKey(query, provider, maxResults, params);
|
|
190
|
+
const file = getCachePath(key);
|
|
191
|
+
const payload = {
|
|
192
|
+
...result,
|
|
193
|
+
_cache_timestamp: Math.floor(Date.now() / 1000),
|
|
194
|
+
_cache_key: key,
|
|
195
|
+
_cache_query: query,
|
|
196
|
+
_cache_provider: provider,
|
|
197
|
+
_cache_max_results: maxResults,
|
|
198
|
+
_cache_params: params || {},
|
|
199
|
+
};
|
|
200
|
+
writeJsonFile(file, payload);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function loadProviderHealth(): Json {
|
|
204
|
+
return readJsonFile(PROVIDER_HEALTH_FILE, {});
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function saveProviderHealth(state: Json): void {
|
|
208
|
+
writeJsonFile(PROVIDER_HEALTH_FILE, state);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function providerInCooldown(provider: string): { inCooldown: boolean; remaining: number } {
|
|
212
|
+
const state = loadProviderHealth();
|
|
213
|
+
const cooldownUntil = Number(state?.[provider]?.cooldown_until || 0);
|
|
214
|
+
const remaining = cooldownUntil - Math.floor(Date.now() / 1000);
|
|
215
|
+
return { inCooldown: remaining > 0, remaining: Math.max(0, remaining) };
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function markProviderFailure(provider: string, message: string): Json {
|
|
219
|
+
const state = loadProviderHealth();
|
|
220
|
+
const now = Math.floor(Date.now() / 1000);
|
|
221
|
+
const failCount = Number(state?.[provider]?.failure_count || 0) + 1;
|
|
222
|
+
const cooldownSeconds = COOLDOWN_STEPS_SECONDS[Math.min(failCount - 1, COOLDOWN_STEPS_SECONDS.length - 1)];
|
|
223
|
+
state[provider] = {
|
|
224
|
+
failure_count: failCount,
|
|
225
|
+
cooldown_until: now + cooldownSeconds,
|
|
226
|
+
cooldown_seconds: cooldownSeconds,
|
|
227
|
+
last_error: sanitizeOutput(message),
|
|
228
|
+
last_failure_at: now,
|
|
229
|
+
};
|
|
230
|
+
saveProviderHealth(state);
|
|
231
|
+
return state[provider];
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function resetProviderHealth(provider: string): void {
|
|
235
|
+
const state = loadProviderHealth();
|
|
236
|
+
if (state[provider]) {
|
|
237
|
+
delete state[provider];
|
|
238
|
+
saveProviderHealth(state);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
function normalizeResultUrl(url: string): string {
|
|
243
|
+
try {
|
|
244
|
+
const u = new URL(url.trim());
|
|
245
|
+
const host = u.hostname.replace(/^www\./i, "").toLowerCase();
|
|
246
|
+
const pathname = u.pathname.replace(/\/$/, "");
|
|
247
|
+
return `${host}${pathname}`;
|
|
248
|
+
} catch {
|
|
249
|
+
return url.trim().toLowerCase();
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
function deduplicateResultsAcrossProviders(resultsByProvider: Array<[string, SearchResponse]>, maxResults: number): { results: SearchResult[]; dedupCount: number } {
|
|
254
|
+
const deduped: SearchResult[] = [];
|
|
255
|
+
const seen = new Set<string>();
|
|
256
|
+
let dedupCount = 0;
|
|
257
|
+
for (const [provider, data] of resultsByProvider) {
|
|
258
|
+
for (const item of data.results || []) {
|
|
259
|
+
const norm = normalizeResultUrl(item.url || "");
|
|
260
|
+
if (norm && seen.has(norm)) {
|
|
261
|
+
dedupCount += 1;
|
|
262
|
+
continue;
|
|
263
|
+
}
|
|
264
|
+
if (norm) seen.add(norm);
|
|
265
|
+
deduped.push({ ...item, provider: item.provider || provider });
|
|
266
|
+
if (deduped.length >= maxResults) return { results: deduped, dedupCount };
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
return { results: deduped, dedupCount };
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function loadEnvFile(envPath: string): Record<string, string> {
|
|
273
|
+
if (!fs.existsSync(envPath)) return {};
|
|
274
|
+
const env: Record<string, string> = {};
|
|
275
|
+
for (const line of fs.readFileSync(envPath, "utf8").split(/\r?\n/)) {
|
|
276
|
+
const trimmed = line.trim();
|
|
277
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
278
|
+
const stripped = trimmed.startsWith("export ") ? trimmed.slice(7) : trimmed;
|
|
279
|
+
const idx = stripped.indexOf("=");
|
|
280
|
+
if (idx < 0) continue;
|
|
281
|
+
const key = stripped.slice(0, idx).trim();
|
|
282
|
+
const value = stripped.slice(idx + 1).trim().replace(/^['"]|['"]$/g, "");
|
|
283
|
+
if (key) env[key] = value;
|
|
284
|
+
}
|
|
285
|
+
return env;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
function getRuntimeEnv(pluginConfig: Record<string, string>): Record<string, string> {
|
|
289
|
+
const envFiles = [path.join(PLUGIN_DIR, ".env"), path.join(PLUGIN_DIR, "..", "web-search-plus", ".env")];
|
|
290
|
+
const fileEnv = Object.assign({}, ...envFiles.map(loadEnvFile));
|
|
291
|
+
const mapped: Record<string, string> = {};
|
|
121
292
|
const configKeyMap: Record<string, string> = {
|
|
122
293
|
serperApiKey: "SERPER_API_KEY",
|
|
123
294
|
tavilyApiKey: "TAVILY_API_KEY",
|
|
@@ -127,85 +298,627 @@ export default function (api: any) {
|
|
|
127
298
|
kilocodeApiKey: "KILOCODE_API_KEY",
|
|
128
299
|
youApiKey: "YOU_API_KEY",
|
|
129
300
|
searxngInstanceUrl: "SEARXNG_INSTANCE_URL",
|
|
301
|
+
searxngAllowPrivate: "SEARXNG_ALLOW_PRIVATE",
|
|
130
302
|
};
|
|
131
303
|
for (const [cfgKey, envKey] of Object.entries(configKeyMap)) {
|
|
132
|
-
const val = pluginConfig[cfgKey];
|
|
133
|
-
if (val && typeof val === "string")
|
|
304
|
+
const val = pluginConfig?.[cfgKey];
|
|
305
|
+
if (val && typeof val === "string") mapped[envKey] = val;
|
|
134
306
|
}
|
|
307
|
+
return { ...fileEnv, ...Object.fromEntries(Object.entries(process.env).filter(([, v]) => typeof v === "string") as any), ...mapped };
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function getApiKey(provider: ProviderName, env: Record<string, string>): string | undefined {
|
|
311
|
+
const keyMap: Record<ProviderName, string | undefined> = {
|
|
312
|
+
serper: env.SERPER_API_KEY,
|
|
313
|
+
tavily: env.TAVILY_API_KEY,
|
|
314
|
+
querit: env.QUERIT_API_KEY,
|
|
315
|
+
exa: env.EXA_API_KEY,
|
|
316
|
+
perplexity: env.KILOCODE_API_KEY || env.PERPLEXITY_API_KEY,
|
|
317
|
+
you: env.YOU_API_KEY,
|
|
318
|
+
searxng: env.SEARXNG_INSTANCE_URL,
|
|
319
|
+
};
|
|
320
|
+
return keyMap[provider];
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
function validateApiKey(provider: ProviderName, env: Record<string, string>): string {
|
|
324
|
+
const key = getApiKey(provider, env);
|
|
325
|
+
if (!key) {
|
|
326
|
+
if (provider === "searxng") throw new ProviderConfigError("Missing SearXNG instance URL (SEARXNG_INSTANCE_URL or pluginConfig.searxngInstanceUrl)");
|
|
327
|
+
throw new ProviderConfigError(`Missing API key for ${provider}`);
|
|
328
|
+
}
|
|
329
|
+
return key;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
function toTimeRange(value?: string): string | undefined {
|
|
333
|
+
return value && ["day", "week", "month", "year"].includes(value) ? value : undefined;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
function titleFromUrl(url: string): string {
|
|
337
|
+
try {
|
|
338
|
+
const u = new URL(url);
|
|
339
|
+
const domain = u.hostname.replace(/^www\./, "");
|
|
340
|
+
const segs = u.pathname.split("/").filter(Boolean);
|
|
341
|
+
const last = segs.length ? segs[segs.length - 1].replace(/[-_]/g, " ").replace(/\.\w{2,4}$/, "") : "";
|
|
342
|
+
return last ? `${domain} — ${last}` : domain;
|
|
343
|
+
} catch {
|
|
344
|
+
return url.slice(0, 80);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
async function httpJson(url: string, init: RequestInit, timeoutMs = 30000): Promise<any> {
|
|
349
|
+
const controller = new AbortController();
|
|
350
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
351
|
+
try {
|
|
352
|
+
const res = await fetch(url, {
|
|
353
|
+
...init,
|
|
354
|
+
headers: {
|
|
355
|
+
"User-Agent": "ClawdBot-WebSearchPlus/3.0",
|
|
356
|
+
...(init.headers || {}),
|
|
357
|
+
},
|
|
358
|
+
signal: controller.signal,
|
|
359
|
+
});
|
|
360
|
+
const text = await res.text();
|
|
361
|
+
let data: any = null;
|
|
362
|
+
try { data = text ? JSON.parse(text) : {}; } catch {}
|
|
363
|
+
if (!res.ok) {
|
|
364
|
+
const detail = data?.error || data?.message || text || res.statusText;
|
|
365
|
+
throw new ProviderRequestError(`${detail} (HTTP ${res.status})`, res.status, TRANSIENT_HTTP_CODES.has(res.status));
|
|
366
|
+
}
|
|
367
|
+
return data ?? {};
|
|
368
|
+
} catch (error: any) {
|
|
369
|
+
if (error?.name === "AbortError") throw new ProviderRequestError(`Request timed out after ${timeoutMs}ms`, undefined, true);
|
|
370
|
+
if (error instanceof ProviderRequestError) throw error;
|
|
371
|
+
throw new ProviderRequestError(`Network error: ${String(error?.message || error)}`, undefined, true);
|
|
372
|
+
} finally {
|
|
373
|
+
clearTimeout(timer);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
async function validateSearxngUrl(input: string, env: Record<string, string>): Promise<string> {
|
|
378
|
+
let u: URL;
|
|
379
|
+
try {
|
|
380
|
+
u = new URL(input);
|
|
381
|
+
} catch {
|
|
382
|
+
throw new ProviderConfigError("Invalid SearXNG URL");
|
|
383
|
+
}
|
|
384
|
+
if (!["http:", "https:"].includes(u.protocol)) throw new ProviderConfigError(`SearXNG URL must use http or https, got ${u.protocol}`);
|
|
385
|
+
if (!u.hostname) throw new ProviderConfigError("SearXNG URL must include a hostname");
|
|
386
|
+
|
|
387
|
+
const blockedHosts = new Set(["169.254.169.254", "metadata.google.internal", "metadata.internal"]);
|
|
388
|
+
if (blockedHosts.has(u.hostname)) throw new ProviderConfigError("SearXNG URL blocked: metadata endpoint");
|
|
389
|
+
|
|
390
|
+
const allowPrivate = String(env.SEARXNG_ALLOW_PRIVATE || "").trim() === "1";
|
|
391
|
+
if (!allowPrivate) {
|
|
392
|
+
const records = await dns.lookup(u.hostname, { all: true, verbatim: true }).catch(() => [] as dns.LookupAddress[]);
|
|
393
|
+
if (!records.length && net.isIP(u.hostname)) records.push({ address: u.hostname, family: net.isIP(u.hostname) as 4 | 6 });
|
|
394
|
+
if (!records.length) throw new ProviderConfigError(`SearXNG URL blocked: cannot resolve hostname ${u.hostname}`);
|
|
395
|
+
for (const record of records) {
|
|
396
|
+
const ip = record.address;
|
|
397
|
+
const lower = ip.toLowerCase();
|
|
398
|
+
const isIpv4Private = /^10\./.test(ip) || /^127\./.test(ip) || /^169\.254\./.test(ip) || /^192\.168\./.test(ip) || /^172\.(1[6-9]|2\d|3[0-1])\./.test(ip) || ip === "0.0.0.0";
|
|
399
|
+
const isIpv6Private = lower === "::1" || lower === "::" || lower.startsWith("fc") || lower.startsWith("fd") || lower.startsWith("fe80:");
|
|
400
|
+
if (isIpv4Private || isIpv6Private) {
|
|
401
|
+
throw new ProviderConfigError(`SearXNG URL blocked: ${u.hostname} resolves to private/internal IP ${ip}`);
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
return u.toString().replace(/\/$/, "");
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
const SHOPPING_SIGNALS: Record<string, number> = {
|
|
409
|
+
"\\bhow much\\b": 4.0, "\\bprice of\\b": 4.0, "\\bcost of\\b": 4.0, "\\bprices?\\b": 3.0,
|
|
410
|
+
"\\$\\d+|\\d+\\s*dollars?": 3.0, "€\\d+|\\d+\\s*euros?": 3.0, "£\\d+|\\d+\\s*pounds?": 3.0,
|
|
411
|
+
"\\bpreis(e)?\\b": 3.5, "\\bkosten\\b": 3.0, "\\bwieviel\\b": 3.5, "\\bwie viel\\b": 3.5, "\\bwas kostet\\b": 4.0,
|
|
412
|
+
"\\bbuy\\b": 3.5, "\\bpurchase\\b": 3.5, "\\border\\b(?!\\s+by)": 3.0, "\\bshopping\\b": 3.5, "\\bshop for\\b": 3.5,
|
|
413
|
+
"\\bwhere to (buy|get|purchase)\\b": 4.0, "\\bkaufen\\b": 3.5, "\\bbestellen\\b": 3.5, "\\bwo kaufen\\b": 4.0,
|
|
414
|
+
"\\bhändler\\b": 3.0, "\\bshop\\b": 2.5, "\\bdeal(s)?\\b": 3.0, "\\bdiscount(s)?\\b": 3.0, "\\bsale\\b": 2.5,
|
|
415
|
+
"\\bcheap(er|est)?\\b": 3.0, "\\baffordable\\b": 2.5, "\\bbudget\\b": 2.5, "\\bbest price\\b": 3.5,
|
|
416
|
+
"\\bcompare prices\\b": 3.5, "\\bcoupon\\b": 3.0, "\\bgünstig(er|ste)?\\b": 3.0, "\\bbillig(er|ste)?\\b": 3.0,
|
|
417
|
+
"\\bangebot(e)?\\b": 3.0, "\\brabatt\\b": 3.0, "\\baktion\\b": 2.5, "\\bschnäppchen\\b": 3.0,
|
|
418
|
+
"\\bvs\\.?\\b": 2.0, "\\bversus\\b": 2.0, "\\bor\\b.*\\bwhich\\b": 2.0, "\\bspecs?\\b": 2.5,
|
|
419
|
+
"\\bspecifications?\\b": 2.5, "\\breview(s)?\\b": 2.0, "\\brating(s)?\\b": 2.0, "\\bunboxing\\b": 2.5,
|
|
420
|
+
"\\btest\\b": 2.5, "\\bbewertung(en)?\\b": 2.5, "\\btechnische daten\\b": 3.0, "\\bspezifikationen\\b": 2.5,
|
|
421
|
+
};
|
|
422
|
+
const RESEARCH_SIGNALS: Record<string, number> = {
|
|
423
|
+
"\\bhow does\\b": 4.0, "\\bhow do\\b": 3.5, "\\bwhy does\\b": 4.0, "\\bwhy do\\b": 3.5, "\\bwhy is\\b": 3.5,
|
|
424
|
+
"\\bexplain\\b": 4.0, "\\bexplanation\\b": 4.0, "\\bwhat is\\b": 3.0, "\\bwhat are\\b": 3.0, "\\bdefine\\b": 3.5,
|
|
425
|
+
"\\bdefinition of\\b": 3.5, "\\bmeaning of\\b": 3.0, "\\banalyze\\b": 3.5, "\\banalysis\\b": 3.5,
|
|
426
|
+
"\\bcompare\\b(?!\\s*prices?)": 3.0, "\\bcomparison\\b": 3.0, "\\bstatus of\\b": 3.5, "\\bstatus\\b": 2.5,
|
|
427
|
+
"\\bwhat happened with\\b": 4.0, "\\bpros and cons\\b": 4.0, "\\badvantages?\\b": 3.0, "\\bdisadvantages?\\b": 3.0,
|
|
428
|
+
"\\bbenefits?\\b": 2.5, "\\bdrawbacks?\\b": 3.0, "\\bdifference between\\b": 3.5, "\\bunderstand\\b": 3.0,
|
|
429
|
+
"\\blearn(ing)?\\b": 2.5, "\\btutorial\\b": 3.0, "\\bguide\\b": 2.5, "\\bhow to\\b": 2.0, "\\bstep by step\\b": 3.0,
|
|
430
|
+
"\\bin[- ]depth\\b": 3.0, "\\bdetailed\\b": 2.5, "\\bcomprehensive\\b": 3.0, "\\bthorough\\b": 2.5,
|
|
431
|
+
"\\bdeep dive\\b": 3.5, "\\boverall\\b": 2.0, "\\bsummary\\b": 2.0, "\\bstudy\\b": 2.5, "\\bresearch shows\\b": 3.5,
|
|
432
|
+
"\\baccording to\\b": 2.5, "\\bevidence\\b": 3.0, "\\bscientific\\b": 3.0, "\\bhistory of\\b": 3.0,
|
|
433
|
+
"\\bbackground\\b": 2.5, "\\bcontext\\b": 2.5, "\\bimplications?\\b": 3.0, "\\bwie funktioniert\\b": 4.0,
|
|
434
|
+
"\\bwarum\\b": 3.5, "\\berklär(en|ung)?\\b": 4.0, "\\bwas ist\\b": 3.0, "\\bwas sind\\b": 3.0, "\\bbedeutung\\b": 3.0,
|
|
435
|
+
"\\banalyse\\b": 3.5, "\\bvergleich(en)?\\b": 3.0, "\\bvor- und nachteile\\b": 4.0, "\\bvorteile\\b": 3.0,
|
|
436
|
+
"\\bnachteile\\b": 3.0, "\\bunterschied(e)?\\b": 3.5, "\\bverstehen\\b": 3.0, "\\blernen\\b": 2.5,
|
|
437
|
+
"\\banleitung\\b": 3.0, "\\bübersicht\\b": 2.5, "\\bhintergrund\\b": 2.5, "\\bzusammenfassung\\b": 2.5,
|
|
438
|
+
};
|
|
439
|
+
const DISCOVERY_SIGNALS: Record<string, number> = {
|
|
440
|
+
"\\bsimilar to\\b": 5.0, "\\blike\\s+\\w+\\.com": 4.5, "\\balternatives? to\\b": 5.0, "\\bcompetitors? (of|to)\\b": 4.5,
|
|
441
|
+
"\\bcompeting with\\b": 4.0, "\\brivals? (of|to)\\b": 4.0, "\\binstead of\\b": 3.0, "\\breplacement for\\b": 3.5,
|
|
442
|
+
"\\bcompanies (like|that|doing|building)\\b": 4.5, "\\bstartups? (like|that|doing|building)\\b": 4.5, "\\bwho else\\b": 4.0,
|
|
443
|
+
"\\bother (companies|startups|tools|apps)\\b": 3.5, "\\bfind (companies|startups|tools|examples?)\\b": 4.5,
|
|
444
|
+
"\\bevents? in\\b": 4.0, "\\bthings to do in\\b": 4.5, "\\bseries [a-d]\\b": 4.0, "\\byc\\b|y combinator": 4.0,
|
|
445
|
+
"\\bfund(ed|ing|raise)\\b": 3.5, "\\bventure\\b": 3.0, "\\bvaluation\\b": 3.0, "\\bresearch papers? (on|about)\\b": 4.0,
|
|
446
|
+
"\\barxiv\\b": 4.5, "\\bgithub (projects?|repos?)\\b": 4.5, "\\bopen source\\b.*\\bprojects?\\b": 4.0,
|
|
447
|
+
"\\btweets? (about|on)\\b": 3.5, "\\bblogs? (about|on|like)\\b": 3.0, "https?://[^\\s]+": 5.0, "\\b\\w+\\.(com|org|io|ai|co|dev)\\b": 3.5,
|
|
448
|
+
};
|
|
449
|
+
const LOCAL_NEWS_SIGNALS: Record<string, number> = {
|
|
450
|
+
"\\bnear me\\b": 4.0, "\\bnearby\\b": 3.5, "\\blocal\\b": 3.0, "\\bin (my )?(city|area|town|neighborhood)\\b": 3.5,
|
|
451
|
+
"\\brestaurants?\\b": 2.5, "\\bhotels?\\b": 2.5, "\\bcafes?\\b": 2.5, "\\bstores?\\b": 2.0, "\\bdirections? to\\b": 3.5,
|
|
452
|
+
"\\bmap of\\b": 3.0, "\\bphone number\\b": 3.0, "\\baddress of\\b": 3.0, "\\bopen(ing)? hours\\b": 3.0,
|
|
453
|
+
"\\bweather\\b": 4.0, "\\bforecast\\b": 3.5, "\\btemperature\\b": 3.0, "\\btime in\\b": 3.0,
|
|
454
|
+
"\\blatest\\b": 2.5, "\\brecent\\b": 2.5, "\\btoday\\b": 2.5, "\\bbreaking\\b": 3.5, "\\bnews\\b": 2.5,
|
|
455
|
+
"\\bheadlines?\\b": 3.0, "\\b202[4-9]\\b": 2.0, "\\blast (week|month|year)\\b": 2.0, "\\bin der nähe\\b": 4.0,
|
|
456
|
+
"\\bin meiner nähe\\b": 4.0, "\\böffnungszeiten\\b": 3.0, "\\badresse von\\b": 3.0, "\\bweg(beschreibung)? nach\\b": 3.5,
|
|
457
|
+
"\\bheute\\b": 2.5, "\\bmorgen\\b": 2.0, "\\baktuell\\b": 2.5, "\\bnachrichten\\b": 3.0,
|
|
458
|
+
};
|
|
459
|
+
const RAG_SIGNALS: Record<string, number> = {
|
|
460
|
+
"\\brag\\b": 4.5, "\\bcontext for\\b": 4.0, "\\bsummarize\\b": 3.5, "\\bbrief(ly)?\\b": 3.0, "\\bquick overview\\b": 3.5,
|
|
461
|
+
"\\btl;?dr\\b": 4.0, "\\bkey (points|facts|info)\\b": 3.5, "\\bmain (points|takeaways)\\b": 3.5,
|
|
462
|
+
"\\b(web|online)\\s+and\\s+news\\b": 4.0, "\\ball sources\\b": 3.5, "\\bcomprehensive (search|overview)\\b": 3.5,
|
|
463
|
+
"\\blatest\\s+(news|updates)\\b": 3.0, "\\bcurrent (events|situation|status)\\b": 3.5, "\\bright now\\b": 3.0,
|
|
464
|
+
"\\bas of today\\b": 3.5, "\\bup.to.date\\b": 3.5, "\\breal.time\\b": 4.0, "\\blive\\b": 2.5,
|
|
465
|
+
"\\bwhat'?s happening with\\b": 3.5, "\\bwhat'?s the latest\\b": 4.0, "\\bupdates?\\s+on\\b": 3.5, "\\bstatus of\\b": 3.0,
|
|
466
|
+
"\\bsituation (in|with|around)\\b": 3.5,
|
|
467
|
+
};
|
|
468
|
+
const DIRECT_ANSWER_SIGNALS: Record<string, number> = {
|
|
469
|
+
"\\bwhat is\\b": 3.0, "\\bwhat are\\b": 2.5, "\\bcurrent status\\b": 4.0, "\\bstatus of\\b": 3.5, "\\bstatus\\b": 2.5,
|
|
470
|
+
"\\bwhat happened with\\b": 4.0, "\\bwhat'?s happening with\\b": 4.0, "\\bas of (today|now)\\b": 4.0, "\\bthis weekend\\b": 3.5,
|
|
471
|
+
"\\bevents? in\\b": 3.5, "\\bthings to do in\\b": 4.0, "\\bnear me\\b": 3.0, "\\bcan you (tell me|summarize|explain)\\b": 3.5,
|
|
472
|
+
"\\bwann\\b": 3.0, "\\bwer\\b": 3.0, "\\bwo\\b": 2.5, "\\bwie viele\\b": 3.0,
|
|
473
|
+
};
|
|
474
|
+
const PRIVACY_SIGNALS: Record<string, number> = {
|
|
475
|
+
"\\bprivate(ly)?\\b": 4.0, "\\banonymous(ly)?\\b": 4.0, "\\bwithout tracking\\b": 4.5, "\\bno track(ing)?\\b": 4.5,
|
|
476
|
+
"\\bprivacy\\b": 3.5, "\\bprivacy.?focused\\b": 4.5, "\\bprivacy.?first\\b": 4.5, "\\bduckduckgo alternative\\b": 4.5,
|
|
477
|
+
"\\bprivate search\\b": 5.0, "\\bprivat\\b": 4.0, "\\banonym\\b": 4.0, "\\bohne tracking\\b": 4.5,
|
|
478
|
+
"\\bdatenschutz\\b": 4.0, "\\baggregate results?\\b": 4.0, "\\bmultiple sources?\\b": 4.0, "\\bdiverse (results|perspectives|sources)\\b": 4.0,
|
|
479
|
+
"\\bfrom (all|multiple|different) (engines?|sources?)\\b": 4.5, "\\bmeta.?search\\b": 5.0, "\\ball engines?\\b": 4.0,
|
|
480
|
+
"\\bverschiedene quellen\\b": 4.0, "\\baus mehreren quellen\\b": 4.0, "\\balle suchmaschinen\\b": 4.5,
|
|
481
|
+
"\\bfree search\\b": 3.5, "\\bno api cost\\b": 4.0, "\\bself.?hosted search\\b": 5.0, "\\bzero cost\\b": 3.5,
|
|
482
|
+
"\\bbudget\\b(?!\\s*(laptop|phone|option))\\b": 2.5, "\\bkostenlos(e)?\\s+suche\\b": 3.5, "\\bkeine api.?kosten\\b": 4.0,
|
|
483
|
+
};
|
|
484
|
+
const EXA_DEEP_SIGNALS: Record<string, number> = {
|
|
485
|
+
"\\bsynthesi[sz]e\\b": 5.0, "\\bdeep research\\b": 5.0, "\\bcomprehensive (analysis|report|overview|survey)\\b": 4.5,
|
|
486
|
+
"\\bacross (multiple|many|several) (sources|documents|papers)\\b": 4.5, "\\baggregat(e|ing) (information|data|results)\\b": 4.0,
|
|
487
|
+
"\\bcross.?referenc": 4.5, "\\bsec filings?\\b": 4.5, "\\bannual reports?\\b": 4.0, "\\bearnings (call|report|transcript)\\b": 4.5,
|
|
488
|
+
"\\bfinancial analysis\\b": 4.0, "\\bliterature (review|survey)\\b": 5.0, "\\bacademic literature\\b": 4.5,
|
|
489
|
+
"\\bstate of the (art|field|industry)\\b": 4.0, "\\bcompile (a |the )?(report|findings|results)\\b": 4.5,
|
|
490
|
+
"\\bsummariz(e|ing) (research|papers|studies)\\b": 4.0, "\\bmultiple documents?\\b": 4.0, "\\bdossier\\b": 4.5,
|
|
491
|
+
"\\bdue diligence\\b": 4.5, "\\bstructured (output|data|report)\\b": 4.0, "\\bmarket research\\b": 4.0,
|
|
492
|
+
"\\bindustry (report|analysis|overview)\\b": 4.0, "\\bresearch (on|about|into)\\b": 4.0, "\\bwhitepaper\\b": 4.5,
|
|
493
|
+
"\\btechnical report\\b": 4.0, "\\bsurvey of\\b": 4.5, "\\bmeta.?analysis\\b": 5.0, "\\bsystematic review\\b": 5.0,
|
|
494
|
+
"\\bcase study\\b": 3.5, "\\bbenchmark(s|ing)?\\b": 3.5, "\\btiefenrecherche\\b": 5.0, "\\bumfassende (analyse|übersicht|recherche)\\b": 4.5,
|
|
495
|
+
"\\baus mehreren quellen zusammenfassen\\b": 4.5, "\\bmarktforschung\\b": 4.0,
|
|
496
|
+
};
|
|
497
|
+
const EXA_DEEP_REASONING_SIGNALS: Record<string, number> = {
|
|
498
|
+
"\\bdeep.?reasoning\\b": 6.0, "\\bcomplex (analysis|reasoning|research)\\b": 4.5, "\\bcontradictions?\\b": 4.5,
|
|
499
|
+
"\\breconcil(e|ing)\\b": 5.0, "\\bcritical(ly)? analyz": 4.5, "\\bweigh(ing)? (the )?evidence\\b": 4.5,
|
|
500
|
+
"\\bcompeting (claims|theories|perspectives)\\b": 4.5, "\\bcomplex financial\\b": 4.5, "\\bregulatory (analysis|compliance|landscape)\\b": 4.5,
|
|
501
|
+
"\\blegal analysis\\b": 4.5, "\\bcomprehensive (due diligence|investigation)\\b": 5.0, "\\bpatent (landscape|analysis|search)\\b": 4.5,
|
|
502
|
+
"\\bmarket intelligence\\b": 4.5, "\\bcompetitive (intelligence|landscape)\\b": 4.5, "\\btrade.?offs?\\b": 4.0,
|
|
503
|
+
"\\bpros and cons of\\b": 4.0, "\\bshould I (use|choose|pick)\\b": 3.5, "\\bwhich is better\\b": 4.0,
|
|
504
|
+
"\\bkomplexe analyse\\b": 4.5, "\\bwidersprüche\\b": 4.5, "\\bquellen abwägen\\b": 4.5, "\\brechtliche analyse\\b": 4.5,
|
|
505
|
+
"\\bvergleich(e|en)?\\b": 3.5,
|
|
506
|
+
};
|
|
507
|
+
const BRAND_PATTERNS = [
|
|
508
|
+
"\\b(apple|iphone|ipad|macbook|airpods?)\\b", "\\b(samsung|galaxy)\\b", "\\b(google|pixel)\\b", "\\b(microsoft|surface|xbox)\\b",
|
|
509
|
+
"\\b(sony|playstation)\\b", "\\b(nvidia|geforce|rtx)\\b", "\\b(amd|ryzen|radeon)\\b", "\\b(intel|core i[3579])\\b",
|
|
510
|
+
"\\b(dell|hp|lenovo|asus|acer)\\b", "\\b(lg|tcl|hisense)\\b", "\\b(laptop|phone|tablet|tv|monitor|headphones?|earbuds?)\\b",
|
|
511
|
+
"\\b(camera|lens|drone)\\b", "\\b(watch|smartwatch|fitbit|garmin)\\b", "\\b(router|modem|wifi)\\b", "\\b(keyboard|mouse|gaming)\\b",
|
|
512
|
+
];
|
|
513
|
+
|
|
514
|
+
class QueryAnalyzer {
|
|
515
|
+
calculateSignalScore(query: string, signals: Record<string, number>) {
|
|
516
|
+
const q = query.toLowerCase();
|
|
517
|
+
const matches: any[] = [];
|
|
518
|
+
let total = 0;
|
|
519
|
+
for (const [pattern, weight] of Object.entries(signals)) {
|
|
520
|
+
const regex = new RegExp(pattern, "i");
|
|
521
|
+
const found = q.match(regex);
|
|
522
|
+
if (found) {
|
|
523
|
+
matches.push({ pattern, matched: found[0], weight });
|
|
524
|
+
total += weight;
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
return { total, matches };
|
|
528
|
+
}
|
|
529
|
+
detectProductBrandCombo(query: string): number {
|
|
530
|
+
const hasBrand = BRAND_PATTERNS.some((p) => new RegExp(p, "i").test(query));
|
|
531
|
+
const productIndicators = ["\\b(buy|price|specs?|review|vs|compare)\\b", "\\b(pro|max|plus|mini|ultra|lite)\\b", "\\b\\d+\\s*(gb|tb|inch|mm|hz)\\b"];
|
|
532
|
+
const hasProduct = productIndicators.some((p) => new RegExp(p, "i").test(query));
|
|
533
|
+
if (hasBrand && hasProduct) return 3;
|
|
534
|
+
if (hasBrand) return 1.5;
|
|
535
|
+
return 0;
|
|
536
|
+
}
|
|
537
|
+
detectUrl(query: string): string | null {
|
|
538
|
+
const found = query.match(/https?:\/\/[^\s]+|\b\w+\.(com|org|io|ai|co|dev|net|app)\b/i);
|
|
539
|
+
return found?.[0] || null;
|
|
540
|
+
}
|
|
541
|
+
assessQueryComplexity(query: string) {
|
|
542
|
+
const words = query.trim().split(/\s+/).filter(Boolean);
|
|
543
|
+
const wordCount = words.length;
|
|
544
|
+
const questionWords = (query.match(/\b(what|why|how|when|where|which|who|whose|whom)\b/gi) || []).length;
|
|
545
|
+
const clauseMarkers = (query.match(/\b(and|but|or|because|since|while|although|if|when)\b/gi) || []).length;
|
|
546
|
+
let complexityScore = 0;
|
|
547
|
+
if (wordCount > 10) complexityScore += 1.5;
|
|
548
|
+
if (wordCount > 20) complexityScore += 1.0;
|
|
549
|
+
if (questionWords > 1) complexityScore += 1.0;
|
|
550
|
+
if (clauseMarkers > 0) complexityScore += clauseMarkers * 0.5;
|
|
551
|
+
return { word_count: wordCount, question_words: questionWords, clause_markers: clauseMarkers, complexity_score: complexityScore, is_complex: complexityScore > 2 };
|
|
552
|
+
}
|
|
553
|
+
detectRecencyIntent(query: string) {
|
|
554
|
+
const patterns: Array<[RegExp, number]> = [
|
|
555
|
+
[/\b(latest|newest|recent|current)\b/i, 2.5], [/\b(today|yesterday|this week|this month)\b/i, 3],
|
|
556
|
+
[/\b(202[4-9]|2030)\b/i, 2], [/\b(breaking|live|just|now)\b/i, 3], [/\blast (hour|day|week|month)\b/i, 2.5],
|
|
557
|
+
];
|
|
558
|
+
let total = 0;
|
|
559
|
+
for (const [regex, weight] of patterns) if (regex.test(query)) total += weight;
|
|
560
|
+
return { is_recency_focused: total > 2, score: total };
|
|
561
|
+
}
|
|
562
|
+
analyze(query: string) {
|
|
563
|
+
const shopping = this.calculateSignalScore(query, SHOPPING_SIGNALS);
|
|
564
|
+
const research = this.calculateSignalScore(query, RESEARCH_SIGNALS);
|
|
565
|
+
const discovery = this.calculateSignalScore(query, DISCOVERY_SIGNALS);
|
|
566
|
+
const localNews = this.calculateSignalScore(query, LOCAL_NEWS_SIGNALS);
|
|
567
|
+
const rag = this.calculateSignalScore(query, RAG_SIGNALS);
|
|
568
|
+
const privacy = this.calculateSignalScore(query, PRIVACY_SIGNALS);
|
|
569
|
+
const direct = this.calculateSignalScore(query, DIRECT_ANSWER_SIGNALS);
|
|
570
|
+
const exaDeep = this.calculateSignalScore(query, EXA_DEEP_SIGNALS);
|
|
571
|
+
const exaDeepReasoning = this.calculateSignalScore(query, EXA_DEEP_REASONING_SIGNALS);
|
|
572
|
+
|
|
573
|
+
const brandBonus = this.detectProductBrandCombo(query);
|
|
574
|
+
if (brandBonus > 0) {
|
|
575
|
+
shopping.total += brandBonus;
|
|
576
|
+
shopping.matches.push({ pattern: "product_brand_combo", matched: "brand + product detected", weight: brandBonus });
|
|
577
|
+
}
|
|
578
|
+
const detectedUrl = this.detectUrl(query);
|
|
579
|
+
if (detectedUrl) {
|
|
580
|
+
discovery.total += 5;
|
|
581
|
+
discovery.matches.push({ pattern: "url_detected", matched: detectedUrl, weight: 5 });
|
|
582
|
+
}
|
|
583
|
+
const complexity = this.assessQueryComplexity(query);
|
|
584
|
+
if (complexity.is_complex) {
|
|
585
|
+
research.total += complexity.complexity_score;
|
|
586
|
+
research.matches.push({ pattern: "query_complexity", matched: `complex query (${complexity.word_count} words)`, weight: complexity.complexity_score });
|
|
587
|
+
}
|
|
588
|
+
const recency = this.detectRecencyIntent(query);
|
|
589
|
+
|
|
590
|
+
return {
|
|
591
|
+
detected_url: detectedUrl,
|
|
592
|
+
complexity,
|
|
593
|
+
recency_focused: recency.is_recency_focused,
|
|
594
|
+
recency_score: recency.score,
|
|
595
|
+
exa_deep_score: exaDeep.total,
|
|
596
|
+
exa_deep_reasoning_score: exaDeepReasoning.total,
|
|
597
|
+
provider_scores: {
|
|
598
|
+
serper: shopping.total + localNews.total + recency.score * 0.35,
|
|
599
|
+
tavily: research.total + (complexity.is_complex ? 0 : complexity.complexity_score) + recency.score * 0.2,
|
|
600
|
+
querit: research.total * 0.65 + rag.total * 0.35 + recency.score * 0.45,
|
|
601
|
+
exa: discovery.total + (/(\bsimilar|alternatives?|examples?)\b/i.test(query) ? 1 : 0) + exaDeep.total * 0.5 + exaDeepReasoning.total * 0.5,
|
|
602
|
+
perplexity: direct.total + localNews.total * 0.4 + recency.score * 0.55,
|
|
603
|
+
you: rag.total + recency.score * 0.25,
|
|
604
|
+
searxng: privacy.total,
|
|
605
|
+
},
|
|
606
|
+
provider_matches: {
|
|
607
|
+
serper: [...shopping.matches, ...localNews.matches],
|
|
608
|
+
tavily: research.matches,
|
|
609
|
+
querit: research.matches,
|
|
610
|
+
exa: [...discovery.matches, ...exaDeep.matches, ...exaDeepReasoning.matches],
|
|
611
|
+
perplexity: direct.matches,
|
|
612
|
+
you: rag.matches,
|
|
613
|
+
searxng: privacy.matches,
|
|
614
|
+
},
|
|
615
|
+
};
|
|
616
|
+
}
|
|
617
|
+
route(query: string, availableProviders: ProviderName[]) {
|
|
618
|
+
const analysis = this.analyze(query);
|
|
619
|
+
const scores = analysis.provider_scores as Record<ProviderName, number>;
|
|
620
|
+
const available = Object.fromEntries(availableProviders.map((p) => [p, scores[p] ?? 0])) as Record<ProviderName, number>;
|
|
621
|
+
const providers = Object.keys(available) as ProviderName[];
|
|
622
|
+
if (!providers.length) {
|
|
623
|
+
return { provider: "serper" as ProviderName, confidence: 0, confidence_level: "low", reason: "no_available_providers", scores: {}, top_signals: [], exa_depth: "normal" };
|
|
624
|
+
}
|
|
625
|
+
const maxScore = Math.max(...providers.map((p) => available[p]));
|
|
626
|
+
const winners = providers.filter((p) => available[p] === maxScore);
|
|
627
|
+
const priority: ProviderName[] = ["tavily", "querit", "exa", "perplexity", "serper", "you", "searxng"];
|
|
628
|
+
const winner = priority.find((p) => winners.includes(p)) || winners[0];
|
|
629
|
+
const secondBest = [...providers.map((p) => available[p])].sort((a, b) => b - a)[1] || 0;
|
|
630
|
+
const margin = maxScore > 0 ? (maxScore - secondBest) / maxScore : 0;
|
|
631
|
+
const normalizedScore = Math.min(maxScore / 15, 1);
|
|
632
|
+
const confidence = maxScore === 0 ? 0 : Number((normalizedScore * 0.6 + margin * 0.4).toFixed(3));
|
|
633
|
+
let exaDepth: "normal" | "deep" | "deep-reasoning" = "normal";
|
|
634
|
+
if (winner === "exa") {
|
|
635
|
+
if ((analysis.exa_deep_reasoning_score || 0) >= 4) exaDepth = "deep-reasoning";
|
|
636
|
+
else if ((analysis.exa_deep_score || 0) >= 4) exaDepth = "deep";
|
|
637
|
+
}
|
|
638
|
+
return {
|
|
639
|
+
provider: winner,
|
|
640
|
+
confidence,
|
|
641
|
+
confidence_level: confidence >= 0.7 ? "high" : confidence >= 0.4 ? "medium" : "low",
|
|
642
|
+
reason: maxScore === 0 ? "no_signals_matched" : confidence >= 0.7 ? "high_confidence_match" : confidence >= 0.4 ? "moderate_confidence_match" : "low_confidence_match",
|
|
643
|
+
exa_depth: exaDepth,
|
|
644
|
+
scores: Object.fromEntries(providers.map((p) => [p, Number((available[p] || 0).toFixed(2))])),
|
|
645
|
+
top_signals: (analysis.provider_matches[winner] || []).sort((a: any, b: any) => b.weight - a.weight).slice(0, 5).map((s: any) => ({ matched: s.matched, weight: s.weight })),
|
|
646
|
+
analysis_summary: {
|
|
647
|
+
query_length: query.trim().split(/\s+/).filter(Boolean).length,
|
|
648
|
+
is_complex: analysis.complexity.is_complex,
|
|
649
|
+
has_url: !!analysis.detected_url,
|
|
650
|
+
recency_focused: analysis.recency_focused,
|
|
651
|
+
},
|
|
652
|
+
};
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
async function searchSerper(query: string, apiKey: string, maxResults: number, timeRange?: string): Promise<SearchResponse> {
|
|
657
|
+
const body: Json = { q: query, gl: "us", hl: "en", num: maxResults, autocorrect: true };
|
|
658
|
+
const tbsMap: Record<string, string> = { day: "qdr:d", week: "qdr:w", month: "qdr:m", year: "qdr:y" };
|
|
659
|
+
if (timeRange && tbsMap[timeRange]) body.tbs = tbsMap[timeRange];
|
|
660
|
+
const data = await httpJson("https://google.serper.dev/search", { method: "POST", headers: { "X-API-KEY": apiKey, "Content-Type": "application/json" }, body: JSON.stringify(body) });
|
|
661
|
+
const results = (data.organic || []).slice(0, maxResults).map((item: any, i: number) => ({ title: item.title || "", url: item.link || "", snippet: item.snippet || "", score: Number((1 - i * 0.1).toFixed(2)), date: item.date }));
|
|
662
|
+
const answer = data?.answerBox?.answer || data?.answerBox?.snippet || data?.knowledgeGraph?.description || results[0]?.snippet || "";
|
|
663
|
+
return { provider: "serper", query, results, images: [], answer, knowledge_graph: data.knowledgeGraph, related_searches: (data.relatedSearches || []).map((r: any) => r.query) };
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
async function searchTavily(query: string, apiKey: string, maxResults: number, includeDomains?: string[], excludeDomains?: string[]): Promise<SearchResponse> {
|
|
667
|
+
const body: Json = { api_key: apiKey, query, max_results: maxResults, search_depth: "basic", topic: "general", include_images: false, include_answer: true, include_raw_content: false };
|
|
668
|
+
if (includeDomains?.length) body.include_domains = includeDomains;
|
|
669
|
+
if (excludeDomains?.length) body.exclude_domains = excludeDomains;
|
|
670
|
+
const data = await httpJson("https://api.tavily.com/search", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body) });
|
|
671
|
+
const results = (data.results || []).slice(0, maxResults).map((item: any) => ({ title: item.title || "", url: item.url || "", snippet: item.content || "", score: Number((item.score || 0).toFixed(3)) }));
|
|
672
|
+
return { provider: "tavily", query, results, images: data.images || [], answer: data.answer || "" };
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
async function searchQuerit(query: string, apiKey: string, maxResults: number, timeRange?: string, includeDomains?: string[], excludeDomains?: string[]): Promise<SearchResponse> {
|
|
676
|
+
const timeMap: Record<string, string> = { day: "d1", week: "w1", month: "m1", year: "y1" };
|
|
677
|
+
const filters: Json = { languages: { include: ["en"] }, geo: { countries: { include: ["US"] } } };
|
|
678
|
+
if (includeDomains?.length || excludeDomains?.length) {
|
|
679
|
+
filters.sites = {};
|
|
680
|
+
if (includeDomains?.length) filters.sites.include = includeDomains;
|
|
681
|
+
if (excludeDomains?.length) filters.sites.exclude = excludeDomains;
|
|
682
|
+
}
|
|
683
|
+
if (timeRange && timeMap[timeRange]) filters.timeRange = { date: timeMap[timeRange] };
|
|
684
|
+
const body: Json = { query, count: maxResults, filters };
|
|
685
|
+
const data = await httpJson("https://api.querit.ai/v1/search", { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" }, body: JSON.stringify(body) });
|
|
686
|
+
if (data.error_msg || (data.error_code != null && ![0, 200].includes(data.error_code))) throw new ProviderRequestError(data.error_msg || `Querit request failed with error_code=${data.error_code}`);
|
|
687
|
+
const raw = data?.results?.result || [];
|
|
688
|
+
const results = raw.slice(0, maxResults).map((item: any, i: number) => ({ title: item.title || titleFromUrl(item.url || ""), url: item.url || "", snippet: item.snippet || item.page_age || "", score: Number((1 - i * 0.05).toFixed(3)), page_time: item.page_time, date: item.page_age, language: item.language }));
|
|
689
|
+
return { provider: "querit", query, results, images: [], answer: results[0]?.snippet || "", metadata: { search_id: data.search_id, time_range: timeRange && timeMap[timeRange] } };
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
async function searchExa(query: string, apiKey: string, maxResults: number, exaDepth: "normal" | "deep" | "deep-reasoning", includeDomains?: string[], excludeDomains?: string[]): Promise<SearchResponse> {
|
|
693
|
+
const isDeep = exaDepth === "deep" || exaDepth === "deep-reasoning";
|
|
694
|
+
const body: Json = isDeep
|
|
695
|
+
? { query, numResults: maxResults, type: exaDepth, contents: { text: { maxCharacters: 5000, verbosity: "full" } } }
|
|
696
|
+
: { query, numResults: maxResults, type: "neural", contents: { text: { maxCharacters: 2000, verbosity: "standard" }, highlights: { numSentences: 3, highlightsPerUrl: 2 } } };
|
|
697
|
+
if (includeDomains?.length) body.includeDomains = includeDomains;
|
|
698
|
+
if (excludeDomains?.length) body.excludeDomains = excludeDomains;
|
|
699
|
+
const data = await httpJson("https://api.exa.ai/search", { method: "POST", headers: { "x-api-key": apiKey, "Content-Type": "application/json" }, body: JSON.stringify(body) }, isDeep ? 55000 : 30000);
|
|
700
|
+
|
|
701
|
+
if (isDeep) {
|
|
702
|
+
const deepOutput = data.output || {};
|
|
703
|
+
const synthesis = typeof deepOutput.content === "string" ? deepOutput.content : deepOutput.content ? JSON.stringify(deepOutput.content) : "";
|
|
704
|
+
const grounding: any[] = [];
|
|
705
|
+
for (const field of deepOutput.grounding || []) {
|
|
706
|
+
for (const cite of field.citations || []) grounding.push({ url: cite.url || "", title: cite.title || "", confidence: field.confidence, field: field.field });
|
|
707
|
+
}
|
|
708
|
+
const results: SearchResult[] = [];
|
|
709
|
+
if (synthesis) results.push({ title: `Exa ${exaDepth.replace(/-/g, " ")} synthesis`, url: "", snippet: synthesis, full_synthesis: synthesis, score: 1, grounding: grounding.slice(0, 10), type: "synthesis" });
|
|
710
|
+
for (const item of (data.results || []).slice(0, maxResults)) {
|
|
711
|
+
const snippet = item.text ? String(item.text).slice(0, 800) : (item.highlights || [])[0] || "";
|
|
712
|
+
results.push({ title: item.title || "", url: item.url || "", snippet, score: Number((item.score || 0).toFixed(3)), published_date: item.publishedDate, author: item.author, type: "source" });
|
|
713
|
+
}
|
|
714
|
+
return { provider: "exa", query, exa_depth: exaDepth, results, images: [], answer: synthesis || results[1]?.snippet || "", grounding, metadata: { synthesis_length: synthesis.length, source_count: (data.results || []).length } };
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
const results = (data.results || []).slice(0, maxResults).map((item: any) => ({ title: item.title || "", url: item.url || "", snippet: item.text ? String(item.text).slice(0, 800) : Array.isArray(item.highlights) ? item.highlights.slice(0, 2).join(" ... ") : "", score: Number((item.score || 0).toFixed(3)), published_date: item.publishedDate, author: item.author }));
|
|
718
|
+
return { provider: "exa", query, results, images: [], answer: results[0]?.snippet || "" };
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
async function searchPerplexity(query: string, apiKey: string, maxResults: number, timeRange?: string): Promise<SearchResponse> {
|
|
722
|
+
const body: Json = {
|
|
723
|
+
model: "perplexity/sonar-pro",
|
|
724
|
+
messages: [
|
|
725
|
+
{ role: "system", content: "Answer with concise factual summary and include source URLs." },
|
|
726
|
+
{ role: "user", content: query },
|
|
727
|
+
],
|
|
728
|
+
temperature: 0.2,
|
|
729
|
+
};
|
|
730
|
+
if (timeRange) body.search_recency_filter = timeRange;
|
|
731
|
+
const data = await httpJson("https://api.kilo.ai/api/gateway/chat/completions", { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" }, body: JSON.stringify(body) });
|
|
732
|
+
const answer = String(data?.choices?.[0]?.message?.content || "").trim();
|
|
733
|
+
let citations = Array.isArray(data?.citations) ? data.citations : [];
|
|
734
|
+
if (!citations.length) {
|
|
735
|
+
const matches = answer.match(/https?:\/\/[^\s)\]}>"']+/g) || [];
|
|
736
|
+
citations = [...new Set(matches)];
|
|
737
|
+
}
|
|
738
|
+
const results: SearchResult[] = [];
|
|
739
|
+
if (answer) results.push({ title: `Perplexity Answer: ${query.slice(0, 80)}`, url: "https://www.perplexity.ai", snippet: answer.replace(/\[\d+\]/g, "").trim().slice(0, 500), score: 1.0 });
|
|
740
|
+
for (const [i, citation] of citations.slice(0, Math.max(0, maxResults - 1)).entries()) {
|
|
741
|
+
const url = typeof citation === "string" ? citation : citation?.url || "";
|
|
742
|
+
const title = typeof citation === "string" ? titleFromUrl(url) : citation?.title || titleFromUrl(url);
|
|
743
|
+
results.push({ title, url, snippet: `Source cited in Perplexity answer [citation ${i + 1}]`, score: Number((0.9 - i * 0.1).toFixed(3)) });
|
|
744
|
+
}
|
|
745
|
+
return { provider: "perplexity", query, results, images: [], answer, metadata: { model: body.model, usage: data.usage || {} } };
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
async function searchYou(query: string, apiKey: string, maxResults: number, timeRange?: string): Promise<SearchResponse> {
|
|
749
|
+
const url = new URL("https://ydc-index.io/v1/search");
|
|
750
|
+
url.searchParams.set("query", query);
|
|
751
|
+
url.searchParams.set("count", String(maxResults));
|
|
752
|
+
url.searchParams.set("safesearch", "moderate");
|
|
753
|
+
url.searchParams.set("country", "US");
|
|
754
|
+
url.searchParams.set("language", "EN");
|
|
755
|
+
if (timeRange) url.searchParams.set("freshness", timeRange);
|
|
756
|
+
const data = await httpJson(url.toString(), { method: "GET", headers: { "X-API-KEY": apiKey, Accept: "application/json" } });
|
|
757
|
+
const web = data?.results?.web || [];
|
|
758
|
+
const news = data?.results?.news || [];
|
|
759
|
+
const results = web.slice(0, maxResults).map((item: any, i: number) => ({ title: item.title || "", url: item.url || "", snippet: item?.snippets?.[0] || item.description || "", score: Number((1 - i * 0.05).toFixed(3)), date: item.page_age, source: "web", additional_snippets: Array.isArray(item.snippets) ? item.snippets.slice(1, 3) : undefined, thumbnail: item.thumbnail_url, favicon: item.favicon_url }));
|
|
760
|
+
const answer = results.slice(0, 3).map((r) => r.snippet).filter(Boolean).join(" ").slice(0, 1000);
|
|
761
|
+
return { provider: "you", query, results, news: news.slice(0, 5), images: [], answer, metadata: { search_uuid: data?.metadata?.search_uuid, latency: data?.metadata?.latency } };
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
async function searchSearxng(query: string, instanceUrl: string, maxResults: number, timeRange: string | undefined, env: Record<string, string>): Promise<SearchResponse> {
|
|
765
|
+
const base = await validateSearxngUrl(instanceUrl, env);
|
|
766
|
+
const url = new URL(`${base}/search`);
|
|
767
|
+
url.searchParams.set("q", query);
|
|
768
|
+
url.searchParams.set("format", "json");
|
|
769
|
+
url.searchParams.set("language", "en");
|
|
770
|
+
url.searchParams.set("safesearch", "0");
|
|
771
|
+
if (timeRange) url.searchParams.set("time_range", timeRange);
|
|
772
|
+
const data = await httpJson(url.toString(), { method: "GET", headers: { Accept: "application/json" } });
|
|
773
|
+
const enginesUsed = new Set<string>();
|
|
774
|
+
const results = (data.results || []).slice(0, maxResults).map((item: any, i: number) => {
|
|
775
|
+
enginesUsed.add(item.engine || "unknown");
|
|
776
|
+
return { title: item.title || "", url: item.url || "", snippet: item.content || "", score: Number((item.score ?? (1 - i * 0.05)).toFixed(3)), engine: item.engine || "unknown", category: item.category || "general", date: item.publishedDate };
|
|
777
|
+
});
|
|
778
|
+
const answer = Array.isArray(data.answers) && data.answers[0] ? String(data.answers[0]) : Array.isArray(data.infoboxes) && data.infoboxes[0] ? String(data.infoboxes[0].content || data.infoboxes[0].infobox || "") : results[0]?.snippet || "";
|
|
779
|
+
return { provider: "searxng", query, results, images: [], answer, suggestions: data.suggestions || [], corrections: data.corrections || [], metadata: { number_of_results: data.number_of_results, engines_used: [...enginesUsed], instance_url: base } };
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
async function executeWithRetry(fn: () => Promise<SearchResponse>): Promise<SearchResponse> {
|
|
783
|
+
let lastError: any;
|
|
784
|
+
for (let attempt = 0; attempt < RETRY_BACKOFF_MS.length; attempt += 1) {
|
|
785
|
+
try {
|
|
786
|
+
return await fn();
|
|
787
|
+
} catch (error: any) {
|
|
788
|
+
lastError = error;
|
|
789
|
+
if (!(error instanceof ProviderRequestError) || !error.transient || error.statusCode === 401 || error.statusCode === 403) break;
|
|
790
|
+
if (attempt < RETRY_BACKOFF_MS.length - 1) await sleep(RETRY_BACKOFF_MS[attempt]);
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
throw lastError;
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
export default function (api: any) {
|
|
797
|
+
const pluginConfig: Record<string, string> = (api.pluginConfig ?? {}) as Record<string, string>;
|
|
798
|
+
const runtimeEnv = getRuntimeEnv(pluginConfig);
|
|
135
799
|
|
|
136
800
|
api.registerTool(
|
|
137
801
|
{
|
|
138
802
|
name: "web_search_plus",
|
|
139
803
|
description:
|
|
140
|
-
"Search the web
|
|
804
|
+
"Search the web with intelligent multi-provider routing across Serper, Tavily, Querit, Exa, Perplexity, You.com, and SearXNG. Auto-selects the best provider, caches results, retries transient failures, and falls back across providers.",
|
|
141
805
|
parameters: PARAMETERS_SCHEMA,
|
|
142
|
-
async execute(
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
query:
|
|
146
|
-
provider?: string;
|
|
147
|
-
count?: number;
|
|
148
|
-
depth?: string;
|
|
149
|
-
time_range?: string;
|
|
150
|
-
include_domains?: string[];
|
|
151
|
-
exclude_domains?: string[];
|
|
152
|
-
},
|
|
153
|
-
) {
|
|
154
|
-
if (!fs.existsSync(scriptPath)) {
|
|
155
|
-
return {
|
|
156
|
-
content: [{ type: "text", text: `Search failed: script not found at ${scriptPath}` }],
|
|
157
|
-
};
|
|
158
|
-
}
|
|
806
|
+
async execute(_id: string, params: ToolParams) {
|
|
807
|
+
try {
|
|
808
|
+
const query = String(params.query || "").trim();
|
|
809
|
+
if (!query) return { content: [{ type: "text", text: "Search failed: query is required" }] };
|
|
159
810
|
|
|
160
|
-
|
|
811
|
+
const count = Math.max(1, Math.min(10, Math.floor(Number(params.count || 5))));
|
|
812
|
+
const requestedProvider = (params.provider || "auto") as ProviderName | "auto";
|
|
813
|
+
const timeRange = toTimeRange(params.time_range);
|
|
814
|
+
const includeDomains = Array.isArray(params.include_domains) ? params.include_domains.filter(Boolean) : undefined;
|
|
815
|
+
const excludeDomains = Array.isArray(params.exclude_domains) ? params.exclude_domains.filter(Boolean) : undefined;
|
|
161
816
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
}
|
|
817
|
+
const allProviders: ProviderName[] = ["serper", "tavily", "querit", "exa", "perplexity", "you", "searxng"];
|
|
818
|
+
const configuredProviders = allProviders.filter((p) => !!getApiKey(p, runtimeEnv));
|
|
165
819
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
820
|
+
let routingInfo: Json;
|
|
821
|
+
let provider: ProviderName;
|
|
822
|
+
if (requestedProvider === "auto") {
|
|
823
|
+
const analyzer = new QueryAnalyzer();
|
|
824
|
+
const routing = analyzer.route(query, configuredProviders);
|
|
825
|
+
provider = routing.provider;
|
|
826
|
+
routingInfo = { auto_routed: true, provider, confidence: routing.confidence, confidence_level: routing.confidence_level, reason: routing.reason, top_signals: routing.top_signals, scores: routing.scores, exa_depth: routing.exa_depth };
|
|
827
|
+
} else {
|
|
828
|
+
provider = requestedProvider;
|
|
829
|
+
routingInfo = { auto_routed: false, provider };
|
|
830
|
+
}
|
|
169
831
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
832
|
+
const priority: ProviderName[] = ["tavily", "querit", "exa", "perplexity", "serper", "you", "searxng"];
|
|
833
|
+
const providersToTry: ProviderName[] = [provider, ...priority.filter((p) => p !== provider && configuredProviders.includes(p))];
|
|
834
|
+
const eligibleProviders: ProviderName[] = [];
|
|
835
|
+
const cooldownSkips: Json[] = [];
|
|
836
|
+
for (const p of providersToTry) {
|
|
837
|
+
const cooldown = providerInCooldown(p);
|
|
838
|
+
if (cooldown.inCooldown) cooldownSkips.push({ provider: p, cooldown_remaining_seconds: cooldown.remaining });
|
|
839
|
+
else eligibleProviders.push(p);
|
|
840
|
+
}
|
|
841
|
+
if (!eligibleProviders.length) eligibleProviders.push(provider);
|
|
173
842
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
843
|
+
const cacheContext = {
|
|
844
|
+
time_range: timeRange,
|
|
845
|
+
include_domains: includeDomains ? [...includeDomains].sort() : null,
|
|
846
|
+
exclude_domains: excludeDomains ? [...excludeDomains].sort() : null,
|
|
847
|
+
exa_depth: params.depth || routingInfo.exa_depth || "normal",
|
|
848
|
+
};
|
|
178
849
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
850
|
+
const cached = cacheGet(query, provider, count, DEFAULT_CACHE_TTL, cacheContext);
|
|
851
|
+
if (cached) {
|
|
852
|
+
const result = { ...cached };
|
|
853
|
+
for (const key of Object.keys(result)) if (key.startsWith("_cache_")) delete result[key];
|
|
854
|
+
result.cached = true;
|
|
855
|
+
result.cache_age_seconds = Math.floor(Date.now() / 1000 - Number(cached._cache_timestamp || 0));
|
|
856
|
+
result.routing = { ...routingInfo, ...(cooldownSkips.length ? { cooldown_skips: cooldownSkips } : {}) };
|
|
857
|
+
return { content: [{ type: "text", text: JSON.stringify(sanitizeOutput(result)) }] };
|
|
858
|
+
}
|
|
182
859
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
}
|
|
860
|
+
const errors: Json[] = [];
|
|
861
|
+
const successes: Array<[string, SearchResponse]> = [];
|
|
186
862
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
863
|
+
const runProvider = async (p: ProviderName): Promise<SearchResponse> => {
|
|
864
|
+
const key = validateApiKey(p, runtimeEnv);
|
|
865
|
+
if (p === "serper") return searchSerper(query, key, count, timeRange);
|
|
866
|
+
if (p === "tavily") return searchTavily(query, key, count, includeDomains, excludeDomains);
|
|
867
|
+
if (p === "querit") return searchQuerit(query, key, count, timeRange, includeDomains, excludeDomains);
|
|
868
|
+
if (p === "exa") {
|
|
869
|
+
const exaDepth = (params.depth || routingInfo.exa_depth || "normal") as "normal" | "deep" | "deep-reasoning";
|
|
870
|
+
return searchExa(query, key, count, exaDepth, includeDomains, excludeDomains);
|
|
871
|
+
}
|
|
872
|
+
if (p === "perplexity") return searchPerplexity(query, key, count, timeRange);
|
|
873
|
+
if (p === "you") return searchYou(query, key, count, timeRange);
|
|
874
|
+
return searchSearxng(query, key, count, timeRange, runtimeEnv);
|
|
875
|
+
};
|
|
196
876
|
|
|
197
|
-
|
|
877
|
+
for (const p of eligibleProviders) {
|
|
878
|
+
try {
|
|
879
|
+
const result = await executeWithRetry(() => runProvider(p));
|
|
880
|
+
resetProviderHealth(p);
|
|
881
|
+
successes.push([p, result]);
|
|
882
|
+
if ((result.results || []).length >= count || errors.length === 0) break;
|
|
883
|
+
} catch (error: any) {
|
|
884
|
+
const message = sanitizeOutput(String(error?.message || error));
|
|
885
|
+
const cooldown = markProviderFailure(p, message);
|
|
886
|
+
errors.push({ provider: p, error: message, cooldown_seconds: cooldown.cooldown_seconds });
|
|
887
|
+
}
|
|
888
|
+
}
|
|
198
889
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
890
|
+
if (!successes.length) {
|
|
891
|
+
return { content: [{ type: "text", text: JSON.stringify(sanitizeOutput({ error: "All providers failed", provider, query, routing: routingInfo, provider_errors: errors, cooldown_skips: cooldownSkips })) }] };
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
let result: SearchResponse;
|
|
895
|
+
if (successes.length === 1) {
|
|
896
|
+
result = successes[0][1];
|
|
897
|
+
} else {
|
|
898
|
+
result = { ...successes[0][1] };
|
|
899
|
+
const deduped = deduplicateResultsAcrossProviders(successes, count);
|
|
900
|
+
result.results = deduped.results;
|
|
901
|
+
result.deduplicated = deduped.dedupCount > 0;
|
|
902
|
+
result.metadata = { ...(result.metadata || {}), dedup_count: deduped.dedupCount, providers_merged: successes.map(([p]) => p) };
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
const successfulProvider = successes[0][0] as ProviderName;
|
|
906
|
+
if (successfulProvider !== provider) {
|
|
907
|
+
routingInfo = { ...routingInfo, fallback_used: true, original_provider: provider, provider: successfulProvider, fallback_errors: errors };
|
|
908
|
+
}
|
|
909
|
+
if (cooldownSkips.length) routingInfo.cooldown_skips = cooldownSkips;
|
|
910
|
+
result.routing = routingInfo;
|
|
911
|
+
result.cached = false;
|
|
912
|
+
if (!(result as any).metadata) result.metadata = {};
|
|
913
|
+
if ((result as any).deduplicated == null) (result as any).deduplicated = false;
|
|
914
|
+
if ((result.metadata as any).dedup_count == null) (result.metadata as any).dedup_count = 0;
|
|
205
915
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
916
|
+
cachePut(query, successfulProvider, count, result, cacheContext);
|
|
917
|
+
|
|
918
|
+
return { content: [{ type: "text", text: JSON.stringify(sanitizeOutput(result)) }] };
|
|
919
|
+
} catch (error: any) {
|
|
920
|
+
return { content: [{ type: "text", text: `Search failed: ${sanitizeOutput(String(error?.message || error))}` }] };
|
|
921
|
+
}
|
|
209
922
|
},
|
|
210
923
|
},
|
|
211
924
|
{ optional: true },
|