web-search-plus-plugin 1.1.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.template +16 -0
- package/index.ts +50 -5
- package/openclaw.plugin.json +21 -3
- package/package.json +4 -4
- package/scripts/search.py +228 -36
package/.env.template
CHANGED
|
@@ -1,4 +1,20 @@
|
|
|
1
|
+
# Serper (Google Search) — https://serper.dev
|
|
1
2
|
SERPER_API_KEY=your-serper-key-here
|
|
3
|
+
|
|
4
|
+
# Tavily (Research Search) — https://tavily.com
|
|
2
5
|
TAVILY_API_KEY=your-tavily-key-here
|
|
6
|
+
|
|
7
|
+
# Exa (Neural/Deep Search) — https://exa.ai
|
|
3
8
|
EXA_API_KEY=your-exa-key-here
|
|
9
|
+
|
|
10
|
+
# Perplexity (Direct answers) — https://www.perplexity.ai/settings/api
|
|
4
11
|
PERPLEXITY_API_KEY=your-perplexity-key-here
|
|
12
|
+
|
|
13
|
+
# Kilo Gateway (alternative Perplexity route) — https://api.kilo.ai
|
|
14
|
+
KILOCODE_API_KEY=your-kilocode-key-here
|
|
15
|
+
|
|
16
|
+
# You.com — https://api.you.com
|
|
17
|
+
YOU_API_KEY=your-you-api-key-here
|
|
18
|
+
|
|
19
|
+
# SearXNG (self-hosted, no API key needed)
|
|
20
|
+
SEARXNG_INSTANCE_URL=https://your-searxng-instance.example.com
|
package/index.ts
CHANGED
|
@@ -35,11 +35,28 @@ const PLUGIN_DIR = getPluginDir();
|
|
|
35
35
|
const scriptPath = path.join(PLUGIN_DIR, "scripts", "search.py");
|
|
36
36
|
|
|
37
37
|
export default function (api: any) {
|
|
38
|
+
// Bridge OpenClaw config fields to env vars expected by search.py
|
|
39
|
+
const configEnv: Record<string, string> = {};
|
|
40
|
+
const pluginConfig: Record<string, string> = (api as any)?.config ?? {};
|
|
41
|
+
const configKeyMap: Record<string, string> = {
|
|
42
|
+
serperApiKey: "SERPER_API_KEY",
|
|
43
|
+
tavilyApiKey: "TAVILY_API_KEY",
|
|
44
|
+
exaApiKey: "EXA_API_KEY",
|
|
45
|
+
perplexityApiKey: "PERPLEXITY_API_KEY",
|
|
46
|
+
kilocodeApiKey: "KILOCODE_API_KEY",
|
|
47
|
+
youApiKey: "YOU_API_KEY",
|
|
48
|
+
searxngInstanceUrl: "SEARXNG_INSTANCE_URL",
|
|
49
|
+
};
|
|
50
|
+
for (const [cfgKey, envKey] of Object.entries(configKeyMap)) {
|
|
51
|
+
const val = pluginConfig[cfgKey];
|
|
52
|
+
if (val && typeof val === "string") configEnv[envKey] = val;
|
|
53
|
+
}
|
|
54
|
+
|
|
38
55
|
api.registerTool(
|
|
39
56
|
{
|
|
40
57
|
name: "web_search_plus",
|
|
41
58
|
description:
|
|
42
|
-
"Search the web using multi-provider routing (Serper/Google, Tavily/Research, Exa/Neural). Automatically
|
|
59
|
+
"Search the web using multi-provider intelligent routing (Serper/Google, Tavily/Research, Exa/Neural+Deep, Perplexity, You.com, SearXNG). Automatically selects the best provider based on query intent. Use for ALL web searches. Set depth='deep' for multi-source synthesis, 'deep-reasoning' for complex cross-document analysis.",
|
|
43
60
|
parameters: Type.Object({
|
|
44
61
|
query: Type.String({ description: "Search query" }),
|
|
45
62
|
provider: Type.Optional(
|
|
@@ -48,6 +65,9 @@ export default function (api: any) {
|
|
|
48
65
|
Type.Literal("serper"),
|
|
49
66
|
Type.Literal("tavily"),
|
|
50
67
|
Type.Literal("exa"),
|
|
68
|
+
Type.Literal("perplexity"),
|
|
69
|
+
Type.Literal("you"),
|
|
70
|
+
Type.Literal("searxng"),
|
|
51
71
|
Type.Literal("auto"),
|
|
52
72
|
],
|
|
53
73
|
{
|
|
@@ -59,10 +79,28 @@ export default function (api: any) {
|
|
|
59
79
|
count: Type.Optional(
|
|
60
80
|
Type.Number({ description: "Number of results (default: 5)" }),
|
|
61
81
|
),
|
|
82
|
+
depth: Type.Optional(
|
|
83
|
+
Type.Union(
|
|
84
|
+
[
|
|
85
|
+
Type.Literal("normal"),
|
|
86
|
+
Type.Literal("deep"),
|
|
87
|
+
Type.Literal("deep-reasoning"),
|
|
88
|
+
],
|
|
89
|
+
{
|
|
90
|
+
description:
|
|
91
|
+
"Exa search depth: 'deep' synthesizes across sources (4-12s), 'deep-reasoning' for complex cross-reference analysis (12-50s). Only applies when routed to Exa.",
|
|
92
|
+
},
|
|
93
|
+
),
|
|
94
|
+
),
|
|
62
95
|
}),
|
|
63
96
|
async execute(
|
|
64
97
|
_id: string,
|
|
65
|
-
params: {
|
|
98
|
+
params: {
|
|
99
|
+
query: string;
|
|
100
|
+
provider?: string;
|
|
101
|
+
count?: number;
|
|
102
|
+
depth?: string;
|
|
103
|
+
},
|
|
66
104
|
) {
|
|
67
105
|
const args = [scriptPath, "--query", params.query, "--compact"];
|
|
68
106
|
|
|
@@ -71,7 +109,14 @@ export default function (api: any) {
|
|
|
71
109
|
}
|
|
72
110
|
|
|
73
111
|
if (typeof params.count === "number" && Number.isFinite(params.count)) {
|
|
74
|
-
args.push(
|
|
112
|
+
args.push(
|
|
113
|
+
"--max-results",
|
|
114
|
+
String(Math.max(1, Math.floor(params.count))),
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (params.depth && params.depth !== "normal") {
|
|
119
|
+
args.push("--exa-depth", params.depth);
|
|
75
120
|
}
|
|
76
121
|
|
|
77
122
|
const envPaths = [
|
|
@@ -82,11 +127,11 @@ export default function (api: any) {
|
|
|
82
127
|
for (const envPath of envPaths) {
|
|
83
128
|
Object.assign(fileEnv, loadEnvFile(envPath));
|
|
84
129
|
}
|
|
85
|
-
const childEnv = { ...process.env, ...fileEnv };
|
|
130
|
+
const childEnv = { ...process.env, ...configEnv, ...fileEnv };
|
|
86
131
|
|
|
87
132
|
try {
|
|
88
133
|
const child = spawnSync("python3", args, {
|
|
89
|
-
timeout:
|
|
134
|
+
timeout: 65000,
|
|
90
135
|
env: childEnv,
|
|
91
136
|
shell: false,
|
|
92
137
|
encoding: "utf8",
|
package/openclaw.plugin.json
CHANGED
|
@@ -1,11 +1,29 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "web-search-plus-plugin",
|
|
3
|
+
"kind": "skill",
|
|
3
4
|
"name": "Web Search Plus",
|
|
4
|
-
"version": "1.
|
|
5
|
-
"description": "Multi-provider web search (Serper/Tavily/
|
|
5
|
+
"version": "1.2.0",
|
|
6
|
+
"description": "Multi-provider web search (Serper/Google, Tavily, Exa/Neural+Deep, Perplexity, You.com, SearXNG) with intelligent auto-routing",
|
|
6
7
|
"configSchema": {
|
|
7
8
|
"type": "object",
|
|
8
9
|
"additionalProperties": false,
|
|
9
|
-
"properties": {
|
|
10
|
+
"properties": {
|
|
11
|
+
"serperApiKey": { "type": "string" },
|
|
12
|
+
"tavilyApiKey": { "type": "string" },
|
|
13
|
+
"exaApiKey": { "type": "string" },
|
|
14
|
+
"perplexityApiKey": { "type": "string" },
|
|
15
|
+
"kilocodeApiKey": { "type": "string" },
|
|
16
|
+
"youApiKey": { "type": "string" },
|
|
17
|
+
"searxngInstanceUrl": { "type": "string" }
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"uiHints": {
|
|
21
|
+
"serperApiKey": { "label": "Serper API Key", "placeholder": "sk-...", "sensitive": true },
|
|
22
|
+
"tavilyApiKey": { "label": "Tavily API Key", "placeholder": "tvly-...", "sensitive": true },
|
|
23
|
+
"exaApiKey": { "label": "Exa API Key", "placeholder": "exa-...", "sensitive": true },
|
|
24
|
+
"perplexityApiKey": { "label": "Perplexity API Key", "placeholder": "pplx-...", "sensitive": true },
|
|
25
|
+
"kilocodeApiKey": { "label": "Kilo Gateway API Key", "placeholder": "...", "sensitive": true },
|
|
26
|
+
"youApiKey": { "label": "You.com API Key", "placeholder": "...", "sensitive": true },
|
|
27
|
+
"searxngInstanceUrl": { "label": "SearXNG Instance URL", "placeholder": "https://searx.example.com", "sensitive": false }
|
|
10
28
|
}
|
|
11
29
|
}
|
package/package.json
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "web-search-plus-plugin",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "OpenClaw plugin: multi-provider web search (Serper/Google, Tavily, Exa, Perplexity)
|
|
3
|
+
"version": "1.2.0",
|
|
4
|
+
"description": "OpenClaw plugin: multi-provider web search (Serper/Google, Tavily, Exa/Neural+Deep, Perplexity, You.com, SearXNG) with intelligent auto-routing",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
|
7
7
|
"files": [
|
|
8
8
|
"index.ts",
|
|
9
9
|
"openclaw.plugin.json",
|
|
10
|
-
"scripts
|
|
10
|
+
"scripts/*.py",
|
|
11
11
|
".env.template",
|
|
12
12
|
"README.md",
|
|
13
13
|
"LICENSE"
|
|
14
14
|
],
|
|
15
|
-
"keywords": ["openclaw", "plugin", "search", "serper", "tavily", "exa", "perplexity", "web-search"],
|
|
15
|
+
"keywords": ["openclaw", "plugin", "search", "serper", "tavily", "exa", "exa-deep", "perplexity", "you", "searxng", "web-search", "auto-routing"],
|
|
16
16
|
"repository": {
|
|
17
17
|
"type": "git",
|
|
18
18
|
"url": "https://github.com/robbyczgw-cla/web-search-plus-plugin"
|
package/scripts/search.py
CHANGED
|
@@ -289,7 +289,9 @@ DEFAULT_CONFIG = {
|
|
|
289
289
|
"topic": "general"
|
|
290
290
|
},
|
|
291
291
|
"exa": {
|
|
292
|
-
"type": "neural"
|
|
292
|
+
"type": "neural",
|
|
293
|
+
"depth": "normal",
|
|
294
|
+
"verbosity": "standard"
|
|
293
295
|
},
|
|
294
296
|
"perplexity": {
|
|
295
297
|
"api_url": "https://api.kilo.ai/api/gateway/chat/completions",
|
|
@@ -351,12 +353,13 @@ def get_api_key(provider: str, config: Dict[str, Any] = None) -> Optional[str]:
|
|
|
351
353
|
return key
|
|
352
354
|
|
|
353
355
|
# Then check environment
|
|
356
|
+
if provider == "perplexity":
|
|
357
|
+
return os.environ.get("PERPLEXITY_API_KEY") or os.environ.get("KILOCODE_API_KEY")
|
|
354
358
|
key_map = {
|
|
355
359
|
"serper": "SERPER_API_KEY",
|
|
356
360
|
"tavily": "TAVILY_API_KEY",
|
|
357
361
|
"exa": "EXA_API_KEY",
|
|
358
362
|
"you": "YOU_API_KEY",
|
|
359
|
-
"perplexity": "KILOCODE_API_KEY",
|
|
360
363
|
}
|
|
361
364
|
return os.environ.get(key_map.get(provider, ""))
|
|
362
365
|
|
|
@@ -848,7 +851,61 @@ class QueryAnalyzer:
|
|
|
848
851
|
r'\bkostenlos(e)?\s+suche\b': 3.5,
|
|
849
852
|
r'\bkeine api.?kosten\b': 4.0,
|
|
850
853
|
}
|
|
851
|
-
|
|
854
|
+
|
|
855
|
+
# Exa Deep Search signals → deep multi-source synthesis
|
|
856
|
+
EXA_DEEP_SIGNALS = {
|
|
857
|
+
r'\bsynthesi[sz]e\b': 5.0,
|
|
858
|
+
r'\bdeep research\b': 5.0,
|
|
859
|
+
r'\bcomprehensive (analysis|report|overview|survey)\b': 4.5,
|
|
860
|
+
r'\bacross (multiple|many|several) (sources|documents|papers)\b': 4.5,
|
|
861
|
+
r'\baggregat(e|ing) (information|data|results)\b': 4.0,
|
|
862
|
+
r'\bcross.?referenc': 4.5,
|
|
863
|
+
r'\bsec filings?\b': 4.5,
|
|
864
|
+
r'\bannual reports?\b': 4.0,
|
|
865
|
+
r'\bearnings (call|report|transcript)\b': 4.5,
|
|
866
|
+
r'\bfinancial analysis\b': 4.0,
|
|
867
|
+
r'\bliterature (review|survey)\b': 5.0,
|
|
868
|
+
r'\bacademic literature\b': 4.5,
|
|
869
|
+
r'\bstate of the (art|field|industry)\b': 4.0,
|
|
870
|
+
r'\bcompile (a |the )?(report|findings|results)\b': 4.5,
|
|
871
|
+
r'\bsummariz(e|ing) (research|papers|studies)\b': 4.0,
|
|
872
|
+
r'\bmultiple documents?\b': 4.0,
|
|
873
|
+
r'\bdossier\b': 4.5,
|
|
874
|
+
r'\bdue diligence\b': 4.5,
|
|
875
|
+
r'\bstructured (output|data|report)\b': 4.0,
|
|
876
|
+
r'\bmarket research\b': 4.0,
|
|
877
|
+
r'\bindustry (report|analysis|overview)\b': 4.0,
|
|
878
|
+
# German
|
|
879
|
+
r'\btiefenrecherche\b': 5.0,
|
|
880
|
+
r'\bumfassende (analyse|übersicht|recherche)\b': 4.5,
|
|
881
|
+
r'\baus mehreren quellen zusammenfassen\b': 4.5,
|
|
882
|
+
r'\bmarktforschung\b': 4.0,
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
# Exa Deep Reasoning signals → complex cross-reference analysis
|
|
886
|
+
EXA_DEEP_REASONING_SIGNALS = {
|
|
887
|
+
r'\bdeep.?reasoning\b': 6.0,
|
|
888
|
+
r'\bcomplex (analysis|reasoning|research)\b': 4.5,
|
|
889
|
+
r'\bcontradictions?\b': 4.5,
|
|
890
|
+
r'\breconcil(e|ing)\b': 5.0,
|
|
891
|
+
r'\bcritical(ly)? analyz': 4.5,
|
|
892
|
+
r'\bweigh(ing)? (the )?evidence\b': 4.5,
|
|
893
|
+
r'\bcompeting (claims|theories|perspectives)\b': 4.5,
|
|
894
|
+
r'\bcomplex financial\b': 4.5,
|
|
895
|
+
r'\bregulatory (analysis|compliance|landscape)\b': 4.5,
|
|
896
|
+
r'\blegal analysis\b': 4.5,
|
|
897
|
+
r'\bcomprehensive (due diligence|investigation)\b': 5.0,
|
|
898
|
+
r'\bpatent (landscape|analysis|search)\b': 4.5,
|
|
899
|
+
r'\bmarket intelligence\b': 4.5,
|
|
900
|
+
r'\bcompetitive (intelligence|landscape)\b': 4.5,
|
|
901
|
+
# German
|
|
902
|
+
r'\bkomplexe analyse\b': 4.5,
|
|
903
|
+
r'\bwidersprüche\b': 4.5,
|
|
904
|
+
r'\bquellen abwägen\b': 4.5,
|
|
905
|
+
r'\brechtliche analyse\b': 4.5,
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
|
|
852
909
|
# Brand/product patterns for shopping detection
|
|
853
910
|
BRAND_PATTERNS = [
|
|
854
911
|
# Tech brands
|
|
@@ -1033,7 +1090,13 @@ class QueryAnalyzer:
|
|
|
1033
1090
|
direct_answer_score, direct_answer_matches = self._calculate_signal_score(
|
|
1034
1091
|
query, self.DIRECT_ANSWER_SIGNALS
|
|
1035
1092
|
)
|
|
1036
|
-
|
|
1093
|
+
exa_deep_score, exa_deep_matches = self._calculate_signal_score(
|
|
1094
|
+
query, self.EXA_DEEP_SIGNALS
|
|
1095
|
+
)
|
|
1096
|
+
exa_deep_reasoning_score, exa_deep_reasoning_matches = self._calculate_signal_score(
|
|
1097
|
+
query, self.EXA_DEEP_REASONING_SIGNALS
|
|
1098
|
+
)
|
|
1099
|
+
|
|
1037
1100
|
# Apply product/brand bonus to shopping
|
|
1038
1101
|
brand_bonus = self._detect_product_brand_combo(query)
|
|
1039
1102
|
if brand_bonus > 0:
|
|
@@ -1071,7 +1134,7 @@ class QueryAnalyzer:
|
|
|
1071
1134
|
provider_scores = {
|
|
1072
1135
|
"serper": shopping_score + local_news_score + (recency_score * 0.35),
|
|
1073
1136
|
"tavily": research_score + (complexity["complexity_score"] if not complexity["is_complex"] else 0) + (0.2 * recency_score),
|
|
1074
|
-
"exa": discovery_score + (1.0 if re.search(r"\b(similar|alternatives?|examples?)\b", query, re.IGNORECASE) else 0.0),
|
|
1137
|
+
"exa": discovery_score + (1.0 if re.search(r"\b(similar|alternatives?|examples?)\b", query, re.IGNORECASE) else 0.0) + (exa_deep_score * 0.5) + (exa_deep_reasoning_score * 0.5),
|
|
1075
1138
|
"perplexity": direct_answer_score + (local_news_score * 0.4) + (recency_score * 0.55),
|
|
1076
1139
|
"you": rag_score + (recency_score * 0.25), # You.com good for real-time + RAG
|
|
1077
1140
|
"searxng": privacy_score, # SearXNG for privacy/multi-source queries
|
|
@@ -1081,7 +1144,7 @@ class QueryAnalyzer:
|
|
|
1081
1144
|
provider_matches = {
|
|
1082
1145
|
"serper": shopping_matches + local_news_matches,
|
|
1083
1146
|
"tavily": research_matches,
|
|
1084
|
-
"exa": discovery_matches,
|
|
1147
|
+
"exa": discovery_matches + exa_deep_matches + exa_deep_reasoning_matches,
|
|
1085
1148
|
"perplexity": direct_answer_matches,
|
|
1086
1149
|
"you": rag_matches,
|
|
1087
1150
|
"searxng": privacy_matches,
|
|
@@ -1095,6 +1158,8 @@ class QueryAnalyzer:
|
|
|
1095
1158
|
"complexity": complexity,
|
|
1096
1159
|
"recency_focused": is_recency,
|
|
1097
1160
|
"recency_score": recency_score,
|
|
1161
|
+
"exa_deep_score": exa_deep_score,
|
|
1162
|
+
"exa_deep_reasoning_score": exa_deep_reasoning_score,
|
|
1098
1163
|
}
|
|
1099
1164
|
|
|
1100
1165
|
def route(self, query: str) -> Dict[str, Any]:
|
|
@@ -1107,8 +1172,8 @@ class QueryAnalyzer:
|
|
|
1107
1172
|
# Filter to available providers
|
|
1108
1173
|
disabled = set(self.auto_config.get("disabled_providers", []))
|
|
1109
1174
|
available = {
|
|
1110
|
-
p: s for p, s in scores.items()
|
|
1111
|
-
if p not in disabled and
|
|
1175
|
+
p: s for p, s in scores.items()
|
|
1176
|
+
if p not in disabled and get_api_key(p, self.config)
|
|
1112
1177
|
}
|
|
1113
1178
|
|
|
1114
1179
|
if not available:
|
|
@@ -1179,18 +1244,29 @@ class QueryAnalyzer:
|
|
|
1179
1244
|
# (user might want similar search)
|
|
1180
1245
|
pass # Keep current winner but note it
|
|
1181
1246
|
|
|
1247
|
+
# Determine Exa search depth when routed to Exa
|
|
1248
|
+
exa_depth = "normal"
|
|
1249
|
+
if winner == "exa":
|
|
1250
|
+
deep_r_score = analysis.get("exa_deep_reasoning_score", 0)
|
|
1251
|
+
deep_score = analysis.get("exa_deep_score", 0)
|
|
1252
|
+
if deep_r_score >= 4.0:
|
|
1253
|
+
exa_depth = "deep-reasoning"
|
|
1254
|
+
elif deep_score >= 4.0:
|
|
1255
|
+
exa_depth = "deep"
|
|
1256
|
+
|
|
1182
1257
|
# Build detailed routing result
|
|
1183
1258
|
threshold = self.auto_config.get("confidence_threshold", 0.3)
|
|
1184
|
-
|
|
1259
|
+
|
|
1185
1260
|
return {
|
|
1186
1261
|
"provider": winner,
|
|
1187
1262
|
"confidence": confidence,
|
|
1188
1263
|
"confidence_level": "high" if confidence >= 0.7 else "medium" if confidence >= 0.4 else "low",
|
|
1189
1264
|
"reason": reason,
|
|
1265
|
+
"exa_depth": exa_depth,
|
|
1190
1266
|
"scores": {p: round(s, 2) for p, s in available.items()},
|
|
1191
1267
|
"winning_score": round(max_score, 2),
|
|
1192
1268
|
"top_signals": [
|
|
1193
|
-
{"matched": s["matched"], "weight": s["weight"]}
|
|
1269
|
+
{"matched": s["matched"], "weight": s["weight"]}
|
|
1194
1270
|
for s in top_signals
|
|
1195
1271
|
],
|
|
1196
1272
|
"below_threshold": confidence < threshold,
|
|
@@ -1227,6 +1303,7 @@ def explain_routing(query: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
1227
1303
|
"confidence": routing["confidence"],
|
|
1228
1304
|
"confidence_level": routing["confidence_level"],
|
|
1229
1305
|
"reason": routing["reason"],
|
|
1306
|
+
"exa_depth": routing.get("exa_depth", "normal"),
|
|
1230
1307
|
},
|
|
1231
1308
|
"scores": routing["scores"],
|
|
1232
1309
|
"top_signals": routing["top_signals"],
|
|
@@ -1235,6 +1312,8 @@ def explain_routing(query: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
1235
1312
|
"research_signals": len(analysis["provider_matches"]["tavily"]),
|
|
1236
1313
|
"discovery_signals": len(analysis["provider_matches"]["exa"]),
|
|
1237
1314
|
"rag_signals": len(analysis["provider_matches"]["you"]),
|
|
1315
|
+
"exa_deep_score": round(analysis.get("exa_deep_score", 0), 2),
|
|
1316
|
+
"exa_deep_reasoning_score": round(analysis.get("exa_deep_reasoning_score", 0), 2),
|
|
1238
1317
|
},
|
|
1239
1318
|
"query_analysis": {
|
|
1240
1319
|
"word_count": analysis["complexity"]["word_count"],
|
|
@@ -1252,8 +1331,8 @@ def explain_routing(query: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
1252
1331
|
if matches
|
|
1253
1332
|
},
|
|
1254
1333
|
"available_providers": [
|
|
1255
|
-
p for p in ["serper", "tavily", "exa", "perplexity", "you", "searxng"]
|
|
1256
|
-
if
|
|
1334
|
+
p for p in ["serper", "tavily", "exa", "perplexity", "you", "searxng"]
|
|
1335
|
+
if get_api_key(p, config) and p not in config.get("auto_routing", {}).get("disabled_providers", [])
|
|
1257
1336
|
]
|
|
1258
1337
|
}
|
|
1259
1338
|
|
|
@@ -1544,7 +1623,7 @@ def search_tavily(
|
|
|
1544
1623
|
|
|
1545
1624
|
|
|
1546
1625
|
# =============================================================================
|
|
1547
|
-
# Exa (Neural/Semantic Search)
|
|
1626
|
+
# Exa (Neural/Semantic/Deep Search)
|
|
1548
1627
|
# =============================================================================
|
|
1549
1628
|
|
|
1550
1629
|
def search_exa(
|
|
@@ -1552,22 +1631,43 @@ def search_exa(
|
|
|
1552
1631
|
api_key: str,
|
|
1553
1632
|
max_results: int = 5,
|
|
1554
1633
|
search_type: str = "neural",
|
|
1634
|
+
exa_depth: str = "normal",
|
|
1555
1635
|
category: Optional[str] = None,
|
|
1556
1636
|
start_date: Optional[str] = None,
|
|
1557
1637
|
end_date: Optional[str] = None,
|
|
1558
1638
|
similar_url: Optional[str] = None,
|
|
1559
1639
|
include_domains: Optional[List[str]] = None,
|
|
1560
1640
|
exclude_domains: Optional[List[str]] = None,
|
|
1641
|
+
text_verbosity: str = "standard",
|
|
1561
1642
|
) -> dict:
|
|
1562
|
-
"""Search using Exa (Neural/Semantic Search).
|
|
1643
|
+
"""Search using Exa (Neural/Semantic/Deep Search).
|
|
1644
|
+
|
|
1645
|
+
exa_depth controls synthesis level:
|
|
1646
|
+
- "normal": standard search (neural/fast/auto/keyword/instant)
|
|
1647
|
+
- "deep": multi-source synthesis with grounding (4-12s, $12/1k)
|
|
1648
|
+
- "deep-reasoning": cross-reference reasoning with grounding (12-50s, $15/1k)
|
|
1649
|
+
"""
|
|
1650
|
+
is_deep = exa_depth in ("deep", "deep-reasoning")
|
|
1651
|
+
|
|
1563
1652
|
if similar_url:
|
|
1653
|
+
# findSimilar does not support deep search types
|
|
1564
1654
|
endpoint = "https://api.exa.ai/findSimilar"
|
|
1565
|
-
body = {
|
|
1655
|
+
body: Dict[str, Any] = {
|
|
1566
1656
|
"url": similar_url,
|
|
1567
1657
|
"numResults": max_results,
|
|
1568
1658
|
"contents": {
|
|
1569
|
-
"text": {"maxCharacters":
|
|
1570
|
-
"highlights":
|
|
1659
|
+
"text": {"maxCharacters": 2000, "verbosity": text_verbosity},
|
|
1660
|
+
"highlights": {"numSentences": 3, "highlightsPerUrl": 2},
|
|
1661
|
+
},
|
|
1662
|
+
}
|
|
1663
|
+
elif is_deep:
|
|
1664
|
+
endpoint = "https://api.exa.ai/search"
|
|
1665
|
+
body = {
|
|
1666
|
+
"query": query,
|
|
1667
|
+
"numResults": max_results,
|
|
1668
|
+
"type": exa_depth,
|
|
1669
|
+
"contents": {
|
|
1670
|
+
"text": {"maxCharacters": 5000, "verbosity": "full"},
|
|
1571
1671
|
},
|
|
1572
1672
|
}
|
|
1573
1673
|
else:
|
|
@@ -1577,11 +1677,11 @@ def search_exa(
|
|
|
1577
1677
|
"numResults": max_results,
|
|
1578
1678
|
"type": search_type,
|
|
1579
1679
|
"contents": {
|
|
1580
|
-
"text": {"maxCharacters":
|
|
1581
|
-
"highlights":
|
|
1680
|
+
"text": {"maxCharacters": 2000, "verbosity": text_verbosity},
|
|
1681
|
+
"highlights": {"numSentences": 3, "highlightsPerUrl": 2},
|
|
1582
1682
|
},
|
|
1583
1683
|
}
|
|
1584
|
-
|
|
1684
|
+
|
|
1585
1685
|
if category:
|
|
1586
1686
|
body["category"] = category
|
|
1587
1687
|
if start_date:
|
|
@@ -1592,19 +1692,91 @@ def search_exa(
|
|
|
1592
1692
|
body["includeDomains"] = include_domains
|
|
1593
1693
|
if exclude_domains:
|
|
1594
1694
|
body["excludeDomains"] = exclude_domains
|
|
1595
|
-
|
|
1695
|
+
|
|
1596
1696
|
headers = {
|
|
1597
1697
|
"x-api-key": api_key,
|
|
1598
1698
|
"Content-Type": "application/json",
|
|
1599
1699
|
}
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1700
|
+
|
|
1701
|
+
timeout = 55 if is_deep else 30
|
|
1702
|
+
data = make_request(endpoint, headers, body, timeout=timeout)
|
|
1703
|
+
|
|
1603
1704
|
results = []
|
|
1705
|
+
|
|
1706
|
+
# Deep search: primary content in output field with grounding citations
|
|
1707
|
+
if is_deep:
|
|
1708
|
+
deep_output = data.get("output", {})
|
|
1709
|
+
synthesized_text = ""
|
|
1710
|
+
grounding_citations: List[Dict[str, Any]] = []
|
|
1711
|
+
|
|
1712
|
+
if isinstance(deep_output.get("content"), str):
|
|
1713
|
+
synthesized_text = deep_output["content"]
|
|
1714
|
+
elif isinstance(deep_output.get("content"), dict):
|
|
1715
|
+
synthesized_text = json.dumps(deep_output["content"], ensure_ascii=False)
|
|
1716
|
+
|
|
1717
|
+
for field_citation in deep_output.get("grounding", []):
|
|
1718
|
+
for cite in field_citation.get("citations", []):
|
|
1719
|
+
grounding_citations.append({
|
|
1720
|
+
"url": cite.get("url", ""),
|
|
1721
|
+
"title": cite.get("title", ""),
|
|
1722
|
+
"confidence": field_citation.get("confidence", ""),
|
|
1723
|
+
"field": field_citation.get("field", ""),
|
|
1724
|
+
})
|
|
1725
|
+
|
|
1726
|
+
# Primary synthesized result
|
|
1727
|
+
if synthesized_text:
|
|
1728
|
+
results.append({
|
|
1729
|
+
"title": f"Exa {exa_depth.replace('-', ' ').title()} Synthesis",
|
|
1730
|
+
"url": "",
|
|
1731
|
+
"snippet": synthesized_text[:2000],
|
|
1732
|
+
"full_synthesis": synthesized_text,
|
|
1733
|
+
"score": 1.0,
|
|
1734
|
+
"grounding": grounding_citations[:10],
|
|
1735
|
+
"type": "synthesis",
|
|
1736
|
+
})
|
|
1737
|
+
|
|
1738
|
+
# Supporting source documents
|
|
1739
|
+
for item in data.get("results", [])[:max_results]:
|
|
1740
|
+
text_content = item.get("text", "") or ""
|
|
1741
|
+
highlights = item.get("highlights", [])
|
|
1742
|
+
snippet = text_content[:800] if text_content else (highlights[0] if highlights else "")
|
|
1743
|
+
results.append({
|
|
1744
|
+
"title": item.get("title", ""),
|
|
1745
|
+
"url": item.get("url", ""),
|
|
1746
|
+
"snippet": snippet,
|
|
1747
|
+
"score": round(item.get("score", 0.0), 3),
|
|
1748
|
+
"published_date": item.get("publishedDate"),
|
|
1749
|
+
"author": item.get("author"),
|
|
1750
|
+
"type": "source",
|
|
1751
|
+
})
|
|
1752
|
+
|
|
1753
|
+
answer = synthesized_text[:1000] if synthesized_text else (results[1]["snippet"] if len(results) > 1 else "")
|
|
1754
|
+
|
|
1755
|
+
return {
|
|
1756
|
+
"provider": "exa",
|
|
1757
|
+
"query": query,
|
|
1758
|
+
"exa_depth": exa_depth,
|
|
1759
|
+
"results": results,
|
|
1760
|
+
"images": [],
|
|
1761
|
+
"answer": answer,
|
|
1762
|
+
"grounding": grounding_citations,
|
|
1763
|
+
"metadata": {
|
|
1764
|
+
"synthesis_length": len(synthesized_text),
|
|
1765
|
+
"source_count": len(data.get("results", [])),
|
|
1766
|
+
},
|
|
1767
|
+
}
|
|
1768
|
+
|
|
1769
|
+
# Standard search result parsing
|
|
1604
1770
|
for item in data.get("results", [])[:max_results]:
|
|
1771
|
+
text_content = item.get("text", "") or ""
|
|
1605
1772
|
highlights = item.get("highlights", [])
|
|
1606
|
-
|
|
1607
|
-
|
|
1773
|
+
if text_content:
|
|
1774
|
+
snippet = text_content[:800]
|
|
1775
|
+
elif highlights:
|
|
1776
|
+
snippet = " ... ".join(highlights[:2])
|
|
1777
|
+
else:
|
|
1778
|
+
snippet = ""
|
|
1779
|
+
|
|
1608
1780
|
results.append({
|
|
1609
1781
|
"title": item.get("title", ""),
|
|
1610
1782
|
"url": item.get("url", ""),
|
|
@@ -1613,9 +1785,9 @@ def search_exa(
|
|
|
1613
1785
|
"published_date": item.get("publishedDate"),
|
|
1614
1786
|
"author": item.get("author"),
|
|
1615
1787
|
})
|
|
1616
|
-
|
|
1788
|
+
|
|
1617
1789
|
answer = results[0]["snippet"] if results else ""
|
|
1618
|
-
|
|
1790
|
+
|
|
1619
1791
|
return {
|
|
1620
1792
|
"provider": "exa",
|
|
1621
1793
|
"query": query if not similar_url else f"Similar to: {similar_url}",
|
|
@@ -2123,9 +2295,22 @@ Full docs: See README.md and SKILL.md
|
|
|
2123
2295
|
# Exa-specific
|
|
2124
2296
|
exa_config = config.get("exa", {})
|
|
2125
2297
|
parser.add_argument(
|
|
2126
|
-
"--exa-type",
|
|
2127
|
-
default=exa_config.get("type", "neural"),
|
|
2128
|
-
choices=["neural", "keyword"]
|
|
2298
|
+
"--exa-type",
|
|
2299
|
+
default=exa_config.get("type", "neural"),
|
|
2300
|
+
choices=["neural", "fast", "auto", "keyword", "instant"],
|
|
2301
|
+
help="Exa search type (for standard search, ignored when --exa-depth is set)"
|
|
2302
|
+
)
|
|
2303
|
+
parser.add_argument(
|
|
2304
|
+
"--exa-depth",
|
|
2305
|
+
default=exa_config.get("depth", "normal"),
|
|
2306
|
+
choices=["normal", "deep", "deep-reasoning"],
|
|
2307
|
+
help="Exa search depth: deep (synthesized, 4-12s), deep-reasoning (cross-reference, 12-50s)"
|
|
2308
|
+
)
|
|
2309
|
+
parser.add_argument(
|
|
2310
|
+
"--exa-verbosity",
|
|
2311
|
+
default=exa_config.get("verbosity", "standard"),
|
|
2312
|
+
choices=["compact", "standard", "full"],
|
|
2313
|
+
help="Exa text verbosity for content extraction"
|
|
2129
2314
|
)
|
|
2130
2315
|
parser.add_argument(
|
|
2131
2316
|
"--category",
|
|
@@ -2157,10 +2342,9 @@ Full docs: See README.md and SKILL.md
|
|
|
2157
2342
|
help="You.com: fetch full page content"
|
|
2158
2343
|
)
|
|
2159
2344
|
parser.add_argument(
|
|
2160
|
-
"--
|
|
2345
|
+
"--no-news",
|
|
2161
2346
|
action="store_true",
|
|
2162
|
-
default
|
|
2163
|
-
help="You.com: include news results (default: true)"
|
|
2347
|
+
help="You.com: exclude news results (included by default)"
|
|
2164
2348
|
)
|
|
2165
2349
|
|
|
2166
2350
|
# SearXNG-specific
|
|
@@ -2324,17 +2508,23 @@ Full docs: See README.md and SKILL.md
|
|
|
2324
2508
|
include_raw_content=args.raw_content,
|
|
2325
2509
|
)
|
|
2326
2510
|
elif prov == "exa":
|
|
2511
|
+
# CLI --exa-depth overrides; fallback to auto-routing suggestion
|
|
2512
|
+
exa_depth = args.exa_depth
|
|
2513
|
+
if exa_depth == "normal" and routing_info.get("exa_depth") in ("deep", "deep-reasoning"):
|
|
2514
|
+
exa_depth = routing_info["exa_depth"]
|
|
2327
2515
|
return search_exa(
|
|
2328
2516
|
query=args.query or "",
|
|
2329
2517
|
api_key=key,
|
|
2330
2518
|
max_results=args.max_results,
|
|
2331
2519
|
search_type=args.exa_type,
|
|
2520
|
+
exa_depth=exa_depth,
|
|
2332
2521
|
category=args.category,
|
|
2333
2522
|
start_date=args.start_date,
|
|
2334
2523
|
end_date=args.end_date,
|
|
2335
2524
|
similar_url=args.similar_url,
|
|
2336
2525
|
include_domains=args.include_domains,
|
|
2337
2526
|
exclude_domains=args.exclude_domains,
|
|
2527
|
+
text_verbosity=args.exa_verbosity,
|
|
2338
2528
|
)
|
|
2339
2529
|
elif prov == "perplexity":
|
|
2340
2530
|
perplexity_config = config.get("perplexity", {})
|
|
@@ -2355,7 +2545,7 @@ Full docs: See README.md and SKILL.md
|
|
|
2355
2545
|
language=args.language,
|
|
2356
2546
|
freshness=args.freshness,
|
|
2357
2547
|
safesearch=args.you_safesearch,
|
|
2358
|
-
include_news=args.
|
|
2548
|
+
include_news=not args.no_news,
|
|
2359
2549
|
livecrawl=args.livecrawl,
|
|
2360
2550
|
)
|
|
2361
2551
|
elif prov == "searxng":
|
|
@@ -2402,9 +2592,11 @@ Full docs: See README.md and SKILL.md
|
|
|
2402
2592
|
"time_range": args.time_range,
|
|
2403
2593
|
"topic": args.topic,
|
|
2404
2594
|
"search_engines": sorted(args.engines) if args.engines else None,
|
|
2405
|
-
"include_news":
|
|
2595
|
+
"include_news": not args.no_news,
|
|
2406
2596
|
"search_type": args.search_type,
|
|
2407
2597
|
"exa_type": args.exa_type,
|
|
2598
|
+
"exa_depth": args.exa_depth,
|
|
2599
|
+
"exa_verbosity": args.exa_verbosity,
|
|
2408
2600
|
"category": args.category,
|
|
2409
2601
|
"similar_url": args.similar_url,
|
|
2410
2602
|
}
|