web-search-plus-plugin 1.1.6 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.template +16 -0
- package/index.ts +50 -5
- package/openclaw.plugin.json +21 -3
- package/package.json +4 -4
- package/scripts/search.py +259 -36
package/.env.template
CHANGED
|
@@ -1,4 +1,20 @@
|
|
|
1
|
+
# Serper (Google Search) — https://serper.dev
|
|
1
2
|
SERPER_API_KEY=your-serper-key-here
|
|
3
|
+
|
|
4
|
+
# Tavily (Research Search) — https://tavily.com
|
|
2
5
|
TAVILY_API_KEY=your-tavily-key-here
|
|
6
|
+
|
|
7
|
+
# Exa (Neural/Deep Search) — https://exa.ai
|
|
3
8
|
EXA_API_KEY=your-exa-key-here
|
|
9
|
+
|
|
10
|
+
# Perplexity (Direct answers) — https://www.perplexity.ai/settings/api
|
|
4
11
|
PERPLEXITY_API_KEY=your-perplexity-key-here
|
|
12
|
+
|
|
13
|
+
# Kilo Gateway (alternative Perplexity route) — https://api.kilo.ai
|
|
14
|
+
KILOCODE_API_KEY=your-kilocode-key-here
|
|
15
|
+
|
|
16
|
+
# You.com — https://api.you.com
|
|
17
|
+
YOU_API_KEY=your-you-api-key-here
|
|
18
|
+
|
|
19
|
+
# SearXNG (self-hosted, no API key needed)
|
|
20
|
+
SEARXNG_INSTANCE_URL=https://your-searxng-instance.example.com
|
package/index.ts
CHANGED
|
@@ -35,11 +35,28 @@ const PLUGIN_DIR = getPluginDir();
|
|
|
35
35
|
const scriptPath = path.join(PLUGIN_DIR, "scripts", "search.py");
|
|
36
36
|
|
|
37
37
|
export default function (api: any) {
|
|
38
|
+
// Bridge OpenClaw config fields to env vars expected by search.py
|
|
39
|
+
const configEnv: Record<string, string> = {};
|
|
40
|
+
const pluginConfig: Record<string, string> = (api as any)?.config ?? {};
|
|
41
|
+
const configKeyMap: Record<string, string> = {
|
|
42
|
+
serperApiKey: "SERPER_API_KEY",
|
|
43
|
+
tavilyApiKey: "TAVILY_API_KEY",
|
|
44
|
+
exaApiKey: "EXA_API_KEY",
|
|
45
|
+
perplexityApiKey: "PERPLEXITY_API_KEY",
|
|
46
|
+
kilocodeApiKey: "KILOCODE_API_KEY",
|
|
47
|
+
youApiKey: "YOU_API_KEY",
|
|
48
|
+
searxngInstanceUrl: "SEARXNG_INSTANCE_URL",
|
|
49
|
+
};
|
|
50
|
+
for (const [cfgKey, envKey] of Object.entries(configKeyMap)) {
|
|
51
|
+
const val = pluginConfig[cfgKey];
|
|
52
|
+
if (val && typeof val === "string") configEnv[envKey] = val;
|
|
53
|
+
}
|
|
54
|
+
|
|
38
55
|
api.registerTool(
|
|
39
56
|
{
|
|
40
57
|
name: "web_search_plus",
|
|
41
58
|
description:
|
|
42
|
-
"Search the web using multi-provider routing (Serper/Google, Tavily/Research, Exa/Neural). Automatically
|
|
59
|
+
"Search the web using multi-provider intelligent routing (Serper/Google, Tavily/Research, Exa/Neural+Deep, Perplexity, You.com, SearXNG). Automatically selects the best provider based on query intent. Use for ALL web searches. Set depth='deep' for multi-source synthesis, 'deep-reasoning' for complex cross-document analysis.",
|
|
43
60
|
parameters: Type.Object({
|
|
44
61
|
query: Type.String({ description: "Search query" }),
|
|
45
62
|
provider: Type.Optional(
|
|
@@ -48,6 +65,9 @@ export default function (api: any) {
|
|
|
48
65
|
Type.Literal("serper"),
|
|
49
66
|
Type.Literal("tavily"),
|
|
50
67
|
Type.Literal("exa"),
|
|
68
|
+
Type.Literal("perplexity"),
|
|
69
|
+
Type.Literal("you"),
|
|
70
|
+
Type.Literal("searxng"),
|
|
51
71
|
Type.Literal("auto"),
|
|
52
72
|
],
|
|
53
73
|
{
|
|
@@ -59,10 +79,28 @@ export default function (api: any) {
|
|
|
59
79
|
count: Type.Optional(
|
|
60
80
|
Type.Number({ description: "Number of results (default: 5)" }),
|
|
61
81
|
),
|
|
82
|
+
depth: Type.Optional(
|
|
83
|
+
Type.Union(
|
|
84
|
+
[
|
|
85
|
+
Type.Literal("normal"),
|
|
86
|
+
Type.Literal("deep"),
|
|
87
|
+
Type.Literal("deep-reasoning"),
|
|
88
|
+
],
|
|
89
|
+
{
|
|
90
|
+
description:
|
|
91
|
+
"Exa search depth: 'deep' synthesizes across sources (4-12s), 'deep-reasoning' for complex cross-reference analysis (12-50s). Only applies when routed to Exa.",
|
|
92
|
+
},
|
|
93
|
+
),
|
|
94
|
+
),
|
|
62
95
|
}),
|
|
63
96
|
async execute(
|
|
64
97
|
_id: string,
|
|
65
|
-
params: {
|
|
98
|
+
params: {
|
|
99
|
+
query: string;
|
|
100
|
+
provider?: string;
|
|
101
|
+
count?: number;
|
|
102
|
+
depth?: string;
|
|
103
|
+
},
|
|
66
104
|
) {
|
|
67
105
|
const args = [scriptPath, "--query", params.query, "--compact"];
|
|
68
106
|
|
|
@@ -71,7 +109,14 @@ export default function (api: any) {
|
|
|
71
109
|
}
|
|
72
110
|
|
|
73
111
|
if (typeof params.count === "number" && Number.isFinite(params.count)) {
|
|
74
|
-
args.push(
|
|
112
|
+
args.push(
|
|
113
|
+
"--max-results",
|
|
114
|
+
String(Math.max(1, Math.floor(params.count))),
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (params.depth && params.depth !== "normal") {
|
|
119
|
+
args.push("--exa-depth", params.depth);
|
|
75
120
|
}
|
|
76
121
|
|
|
77
122
|
const envPaths = [
|
|
@@ -82,11 +127,11 @@ export default function (api: any) {
|
|
|
82
127
|
for (const envPath of envPaths) {
|
|
83
128
|
Object.assign(fileEnv, loadEnvFile(envPath));
|
|
84
129
|
}
|
|
85
|
-
const childEnv = { ...process.env, ...fileEnv };
|
|
130
|
+
const childEnv = { ...process.env, ...configEnv, ...fileEnv };
|
|
86
131
|
|
|
87
132
|
try {
|
|
88
133
|
const child = spawnSync("python3", args, {
|
|
89
|
-
timeout:
|
|
134
|
+
timeout: 65000,
|
|
90
135
|
env: childEnv,
|
|
91
136
|
shell: false,
|
|
92
137
|
encoding: "utf8",
|
package/openclaw.plugin.json
CHANGED
|
@@ -1,11 +1,29 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "web-search-plus-plugin",
|
|
3
|
+
"kind": "skill",
|
|
3
4
|
"name": "Web Search Plus",
|
|
4
|
-
"version": "1.
|
|
5
|
-
"description": "Multi-provider web search (Serper/Tavily/
|
|
5
|
+
"version": "1.2.1",
|
|
6
|
+
"description": "Multi-provider web search (Serper/Google, Tavily, Exa/Neural+Deep, Perplexity, You.com, SearXNG) with intelligent auto-routing",
|
|
6
7
|
"configSchema": {
|
|
7
8
|
"type": "object",
|
|
8
9
|
"additionalProperties": false,
|
|
9
|
-
"properties": {
|
|
10
|
+
"properties": {
|
|
11
|
+
"serperApiKey": { "type": "string" },
|
|
12
|
+
"tavilyApiKey": { "type": "string" },
|
|
13
|
+
"exaApiKey": { "type": "string" },
|
|
14
|
+
"perplexityApiKey": { "type": "string" },
|
|
15
|
+
"kilocodeApiKey": { "type": "string" },
|
|
16
|
+
"youApiKey": { "type": "string" },
|
|
17
|
+
"searxngInstanceUrl": { "type": "string" }
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"uiHints": {
|
|
21
|
+
"serperApiKey": { "label": "Serper API Key", "placeholder": "sk-...", "sensitive": true },
|
|
22
|
+
"tavilyApiKey": { "label": "Tavily API Key", "placeholder": "tvly-...", "sensitive": true },
|
|
23
|
+
"exaApiKey": { "label": "Exa API Key", "placeholder": "exa-...", "sensitive": true },
|
|
24
|
+
"perplexityApiKey": { "label": "Perplexity API Key", "placeholder": "pplx-...", "sensitive": true },
|
|
25
|
+
"kilocodeApiKey": { "label": "Kilo Gateway API Key", "placeholder": "...", "sensitive": true },
|
|
26
|
+
"youApiKey": { "label": "You.com API Key", "placeholder": "...", "sensitive": true },
|
|
27
|
+
"searxngInstanceUrl": { "label": "SearXNG Instance URL", "placeholder": "https://searx.example.com", "sensitive": false }
|
|
10
28
|
}
|
|
11
29
|
}
|
package/package.json
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "web-search-plus-plugin",
|
|
3
|
-
"version": "1.1
|
|
4
|
-
"description": "OpenClaw plugin: multi-provider web search (Serper/Google, Tavily, Exa, Perplexity)
|
|
3
|
+
"version": "1.2.1",
|
|
4
|
+
"description": "OpenClaw plugin: multi-provider web search (Serper/Google, Tavily, Exa/Neural+Deep, Perplexity, You.com, SearXNG) with intelligent auto-routing",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
|
7
7
|
"files": [
|
|
8
8
|
"index.ts",
|
|
9
9
|
"openclaw.plugin.json",
|
|
10
|
-
"scripts
|
|
10
|
+
"scripts/*.py",
|
|
11
11
|
".env.template",
|
|
12
12
|
"README.md",
|
|
13
13
|
"LICENSE"
|
|
14
14
|
],
|
|
15
|
-
"keywords": ["openclaw", "plugin", "search", "serper", "tavily", "exa", "perplexity", "web-search"],
|
|
15
|
+
"keywords": ["openclaw", "plugin", "search", "serper", "tavily", "exa", "exa-deep", "perplexity", "you", "searxng", "web-search", "auto-routing"],
|
|
16
16
|
"repository": {
|
|
17
17
|
"type": "git",
|
|
18
18
|
"url": "https://github.com/robbyczgw-cla/web-search-plus-plugin"
|
package/scripts/search.py
CHANGED
|
@@ -289,7 +289,9 @@ DEFAULT_CONFIG = {
|
|
|
289
289
|
"topic": "general"
|
|
290
290
|
},
|
|
291
291
|
"exa": {
|
|
292
|
-
"type": "neural"
|
|
292
|
+
"type": "neural",
|
|
293
|
+
"depth": "normal",
|
|
294
|
+
"verbosity": "standard"
|
|
293
295
|
},
|
|
294
296
|
"perplexity": {
|
|
295
297
|
"api_url": "https://api.kilo.ai/api/gateway/chat/completions",
|
|
@@ -351,12 +353,13 @@ def get_api_key(provider: str, config: Dict[str, Any] = None) -> Optional[str]:
|
|
|
351
353
|
return key
|
|
352
354
|
|
|
353
355
|
# Then check environment
|
|
356
|
+
if provider == "perplexity":
|
|
357
|
+
return os.environ.get("PERPLEXITY_API_KEY") or os.environ.get("KILOCODE_API_KEY")
|
|
354
358
|
key_map = {
|
|
355
359
|
"serper": "SERPER_API_KEY",
|
|
356
360
|
"tavily": "TAVILY_API_KEY",
|
|
357
361
|
"exa": "EXA_API_KEY",
|
|
358
362
|
"you": "YOU_API_KEY",
|
|
359
|
-
"perplexity": "KILOCODE_API_KEY",
|
|
360
363
|
}
|
|
361
364
|
return os.environ.get(key_map.get(provider, ""))
|
|
362
365
|
|
|
@@ -750,6 +753,19 @@ class QueryAnalyzer:
|
|
|
750
753
|
r'\bheadlines?\b': 3.0,
|
|
751
754
|
r'\b202[4-9]\b': 2.0, # Current year mentions
|
|
752
755
|
r'\blast (week|month|year)\b': 2.0,
|
|
756
|
+
|
|
757
|
+
# German local patterns
|
|
758
|
+
r'\bin der nähe\b': 4.0,
|
|
759
|
+
r'\bin meiner nähe\b': 4.0,
|
|
760
|
+
r'\böffnungszeiten\b': 3.0,
|
|
761
|
+
r'\badresse von\b': 3.0,
|
|
762
|
+
r'\bweg(beschreibung)? nach\b': 3.5,
|
|
763
|
+
|
|
764
|
+
# German news/recency patterns
|
|
765
|
+
r'\bheute\b': 2.5,
|
|
766
|
+
r'\bmorgen\b': 2.0,
|
|
767
|
+
r'\baktuell\b': 2.5,
|
|
768
|
+
r'\bnachrichten\b': 3.0,
|
|
753
769
|
}
|
|
754
770
|
|
|
755
771
|
# RAG/AI signals → You.com
|
|
@@ -802,6 +818,11 @@ class QueryAnalyzer:
|
|
|
802
818
|
r'\bthings to do in\b': 4.0,
|
|
803
819
|
r'\bnear me\b': 3.0,
|
|
804
820
|
r'\bcan you (tell me|summarize|explain)\b': 3.5,
|
|
821
|
+
# German
|
|
822
|
+
r'\bwann\b': 3.0,
|
|
823
|
+
r'\bwer\b': 3.0,
|
|
824
|
+
r'\bwo\b': 2.5,
|
|
825
|
+
r'\bwie viele\b': 3.0,
|
|
805
826
|
}
|
|
806
827
|
|
|
807
828
|
# Privacy/Multi-source signals → SearXNG (self-hosted meta-search)
|
|
@@ -848,7 +869,74 @@ class QueryAnalyzer:
|
|
|
848
869
|
r'\bkostenlos(e)?\s+suche\b': 3.5,
|
|
849
870
|
r'\bkeine api.?kosten\b': 4.0,
|
|
850
871
|
}
|
|
851
|
-
|
|
872
|
+
|
|
873
|
+
# Exa Deep Search signals → deep multi-source synthesis
|
|
874
|
+
EXA_DEEP_SIGNALS = {
|
|
875
|
+
r'\bsynthesi[sz]e\b': 5.0,
|
|
876
|
+
r'\bdeep research\b': 5.0,
|
|
877
|
+
r'\bcomprehensive (analysis|report|overview|survey)\b': 4.5,
|
|
878
|
+
r'\bacross (multiple|many|several) (sources|documents|papers)\b': 4.5,
|
|
879
|
+
r'\baggregat(e|ing) (information|data|results)\b': 4.0,
|
|
880
|
+
r'\bcross.?referenc': 4.5,
|
|
881
|
+
r'\bsec filings?\b': 4.5,
|
|
882
|
+
r'\bannual reports?\b': 4.0,
|
|
883
|
+
r'\bearnings (call|report|transcript)\b': 4.5,
|
|
884
|
+
r'\bfinancial analysis\b': 4.0,
|
|
885
|
+
r'\bliterature (review|survey)\b': 5.0,
|
|
886
|
+
r'\bacademic literature\b': 4.5,
|
|
887
|
+
r'\bstate of the (art|field|industry)\b': 4.0,
|
|
888
|
+
r'\bcompile (a |the )?(report|findings|results)\b': 4.5,
|
|
889
|
+
r'\bsummariz(e|ing) (research|papers|studies)\b': 4.0,
|
|
890
|
+
r'\bmultiple documents?\b': 4.0,
|
|
891
|
+
r'\bdossier\b': 4.5,
|
|
892
|
+
r'\bdue diligence\b': 4.5,
|
|
893
|
+
r'\bstructured (output|data|report)\b': 4.0,
|
|
894
|
+
r'\bmarket research\b': 4.0,
|
|
895
|
+
r'\bindustry (report|analysis|overview)\b': 4.0,
|
|
896
|
+
r'\bresearch (on|about|into)\b': 4.0,
|
|
897
|
+
r'\bwhitepaper\b': 4.5,
|
|
898
|
+
r'\btechnical report\b': 4.0,
|
|
899
|
+
r'\bsurvey of\b': 4.5,
|
|
900
|
+
r'\bmeta.?analysis\b': 5.0,
|
|
901
|
+
r'\bsystematic review\b': 5.0,
|
|
902
|
+
r'\bcase study\b': 3.5,
|
|
903
|
+
r'\bbenchmark(s|ing)?\b': 3.5,
|
|
904
|
+
# German
|
|
905
|
+
r'\btiefenrecherche\b': 5.0,
|
|
906
|
+
r'\bumfassende (analyse|übersicht|recherche)\b': 4.5,
|
|
907
|
+
r'\baus mehreren quellen zusammenfassen\b': 4.5,
|
|
908
|
+
r'\bmarktforschung\b': 4.0,
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
# Exa Deep Reasoning signals → complex cross-reference analysis
|
|
912
|
+
EXA_DEEP_REASONING_SIGNALS = {
|
|
913
|
+
r'\bdeep.?reasoning\b': 6.0,
|
|
914
|
+
r'\bcomplex (analysis|reasoning|research)\b': 4.5,
|
|
915
|
+
r'\bcontradictions?\b': 4.5,
|
|
916
|
+
r'\breconcil(e|ing)\b': 5.0,
|
|
917
|
+
r'\bcritical(ly)? analyz': 4.5,
|
|
918
|
+
r'\bweigh(ing)? (the )?evidence\b': 4.5,
|
|
919
|
+
r'\bcompeting (claims|theories|perspectives)\b': 4.5,
|
|
920
|
+
r'\bcomplex financial\b': 4.5,
|
|
921
|
+
r'\bregulatory (analysis|compliance|landscape)\b': 4.5,
|
|
922
|
+
r'\blegal analysis\b': 4.5,
|
|
923
|
+
r'\bcomprehensive (due diligence|investigation)\b': 5.0,
|
|
924
|
+
r'\bpatent (landscape|analysis|search)\b': 4.5,
|
|
925
|
+
r'\bmarket intelligence\b': 4.5,
|
|
926
|
+
r'\bcompetitive (intelligence|landscape)\b': 4.5,
|
|
927
|
+
r'\btrade.?offs?\b': 4.0,
|
|
928
|
+
r'\bpros and cons of\b': 4.0,
|
|
929
|
+
r'\bshould I (use|choose|pick)\b': 3.5,
|
|
930
|
+
r'\bwhich is better\b': 4.0,
|
|
931
|
+
# German
|
|
932
|
+
r'\bkomplexe analyse\b': 4.5,
|
|
933
|
+
r'\bwidersprüche\b': 4.5,
|
|
934
|
+
r'\bquellen abwägen\b': 4.5,
|
|
935
|
+
r'\brechtliche analyse\b': 4.5,
|
|
936
|
+
r'\bvergleich(e|en)?\b': 3.5,
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
|
|
852
940
|
# Brand/product patterns for shopping detection
|
|
853
941
|
BRAND_PATTERNS = [
|
|
854
942
|
# Tech brands
|
|
@@ -1033,7 +1121,13 @@ class QueryAnalyzer:
|
|
|
1033
1121
|
direct_answer_score, direct_answer_matches = self._calculate_signal_score(
|
|
1034
1122
|
query, self.DIRECT_ANSWER_SIGNALS
|
|
1035
1123
|
)
|
|
1036
|
-
|
|
1124
|
+
exa_deep_score, exa_deep_matches = self._calculate_signal_score(
|
|
1125
|
+
query, self.EXA_DEEP_SIGNALS
|
|
1126
|
+
)
|
|
1127
|
+
exa_deep_reasoning_score, exa_deep_reasoning_matches = self._calculate_signal_score(
|
|
1128
|
+
query, self.EXA_DEEP_REASONING_SIGNALS
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1037
1131
|
# Apply product/brand bonus to shopping
|
|
1038
1132
|
brand_bonus = self._detect_product_brand_combo(query)
|
|
1039
1133
|
if brand_bonus > 0:
|
|
@@ -1071,7 +1165,7 @@ class QueryAnalyzer:
|
|
|
1071
1165
|
provider_scores = {
|
|
1072
1166
|
"serper": shopping_score + local_news_score + (recency_score * 0.35),
|
|
1073
1167
|
"tavily": research_score + (complexity["complexity_score"] if not complexity["is_complex"] else 0) + (0.2 * recency_score),
|
|
1074
|
-
"exa": discovery_score + (1.0 if re.search(r"\b(similar|alternatives?|examples?)\b", query, re.IGNORECASE) else 0.0),
|
|
1168
|
+
"exa": discovery_score + (1.0 if re.search(r"\b(similar|alternatives?|examples?)\b", query, re.IGNORECASE) else 0.0) + (exa_deep_score * 0.5) + (exa_deep_reasoning_score * 0.5),
|
|
1075
1169
|
"perplexity": direct_answer_score + (local_news_score * 0.4) + (recency_score * 0.55),
|
|
1076
1170
|
"you": rag_score + (recency_score * 0.25), # You.com good for real-time + RAG
|
|
1077
1171
|
"searxng": privacy_score, # SearXNG for privacy/multi-source queries
|
|
@@ -1081,7 +1175,7 @@ class QueryAnalyzer:
|
|
|
1081
1175
|
provider_matches = {
|
|
1082
1176
|
"serper": shopping_matches + local_news_matches,
|
|
1083
1177
|
"tavily": research_matches,
|
|
1084
|
-
"exa": discovery_matches,
|
|
1178
|
+
"exa": discovery_matches + exa_deep_matches + exa_deep_reasoning_matches,
|
|
1085
1179
|
"perplexity": direct_answer_matches,
|
|
1086
1180
|
"you": rag_matches,
|
|
1087
1181
|
"searxng": privacy_matches,
|
|
@@ -1095,6 +1189,8 @@ class QueryAnalyzer:
|
|
|
1095
1189
|
"complexity": complexity,
|
|
1096
1190
|
"recency_focused": is_recency,
|
|
1097
1191
|
"recency_score": recency_score,
|
|
1192
|
+
"exa_deep_score": exa_deep_score,
|
|
1193
|
+
"exa_deep_reasoning_score": exa_deep_reasoning_score,
|
|
1098
1194
|
}
|
|
1099
1195
|
|
|
1100
1196
|
def route(self, query: str) -> Dict[str, Any]:
|
|
@@ -1107,8 +1203,8 @@ class QueryAnalyzer:
|
|
|
1107
1203
|
# Filter to available providers
|
|
1108
1204
|
disabled = set(self.auto_config.get("disabled_providers", []))
|
|
1109
1205
|
available = {
|
|
1110
|
-
p: s for p, s in scores.items()
|
|
1111
|
-
if p not in disabled and
|
|
1206
|
+
p: s for p, s in scores.items()
|
|
1207
|
+
if p not in disabled and get_api_key(p, self.config)
|
|
1112
1208
|
}
|
|
1113
1209
|
|
|
1114
1210
|
if not available:
|
|
@@ -1179,18 +1275,29 @@ class QueryAnalyzer:
|
|
|
1179
1275
|
# (user might want similar search)
|
|
1180
1276
|
pass # Keep current winner but note it
|
|
1181
1277
|
|
|
1278
|
+
# Determine Exa search depth when routed to Exa
|
|
1279
|
+
exa_depth = "normal"
|
|
1280
|
+
if winner == "exa":
|
|
1281
|
+
deep_r_score = analysis.get("exa_deep_reasoning_score", 0)
|
|
1282
|
+
deep_score = analysis.get("exa_deep_score", 0)
|
|
1283
|
+
if deep_r_score >= 4.0:
|
|
1284
|
+
exa_depth = "deep-reasoning"
|
|
1285
|
+
elif deep_score >= 4.0:
|
|
1286
|
+
exa_depth = "deep"
|
|
1287
|
+
|
|
1182
1288
|
# Build detailed routing result
|
|
1183
1289
|
threshold = self.auto_config.get("confidence_threshold", 0.3)
|
|
1184
|
-
|
|
1290
|
+
|
|
1185
1291
|
return {
|
|
1186
1292
|
"provider": winner,
|
|
1187
1293
|
"confidence": confidence,
|
|
1188
1294
|
"confidence_level": "high" if confidence >= 0.7 else "medium" if confidence >= 0.4 else "low",
|
|
1189
1295
|
"reason": reason,
|
|
1296
|
+
"exa_depth": exa_depth,
|
|
1190
1297
|
"scores": {p: round(s, 2) for p, s in available.items()},
|
|
1191
1298
|
"winning_score": round(max_score, 2),
|
|
1192
1299
|
"top_signals": [
|
|
1193
|
-
{"matched": s["matched"], "weight": s["weight"]}
|
|
1300
|
+
{"matched": s["matched"], "weight": s["weight"]}
|
|
1194
1301
|
for s in top_signals
|
|
1195
1302
|
],
|
|
1196
1303
|
"below_threshold": confidence < threshold,
|
|
@@ -1227,6 +1334,7 @@ def explain_routing(query: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
1227
1334
|
"confidence": routing["confidence"],
|
|
1228
1335
|
"confidence_level": routing["confidence_level"],
|
|
1229
1336
|
"reason": routing["reason"],
|
|
1337
|
+
"exa_depth": routing.get("exa_depth", "normal"),
|
|
1230
1338
|
},
|
|
1231
1339
|
"scores": routing["scores"],
|
|
1232
1340
|
"top_signals": routing["top_signals"],
|
|
@@ -1235,6 +1343,8 @@ def explain_routing(query: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
1235
1343
|
"research_signals": len(analysis["provider_matches"]["tavily"]),
|
|
1236
1344
|
"discovery_signals": len(analysis["provider_matches"]["exa"]),
|
|
1237
1345
|
"rag_signals": len(analysis["provider_matches"]["you"]),
|
|
1346
|
+
"exa_deep_score": round(analysis.get("exa_deep_score", 0), 2),
|
|
1347
|
+
"exa_deep_reasoning_score": round(analysis.get("exa_deep_reasoning_score", 0), 2),
|
|
1238
1348
|
},
|
|
1239
1349
|
"query_analysis": {
|
|
1240
1350
|
"word_count": analysis["complexity"]["word_count"],
|
|
@@ -1252,8 +1362,8 @@ def explain_routing(query: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
1252
1362
|
if matches
|
|
1253
1363
|
},
|
|
1254
1364
|
"available_providers": [
|
|
1255
|
-
p for p in ["serper", "tavily", "exa", "perplexity", "you", "searxng"]
|
|
1256
|
-
if
|
|
1365
|
+
p for p in ["serper", "tavily", "exa", "perplexity", "you", "searxng"]
|
|
1366
|
+
if get_api_key(p, config) and p not in config.get("auto_routing", {}).get("disabled_providers", [])
|
|
1257
1367
|
]
|
|
1258
1368
|
}
|
|
1259
1369
|
|
|
@@ -1544,7 +1654,7 @@ def search_tavily(
|
|
|
1544
1654
|
|
|
1545
1655
|
|
|
1546
1656
|
# =============================================================================
|
|
1547
|
-
# Exa (Neural/Semantic Search)
|
|
1657
|
+
# Exa (Neural/Semantic/Deep Search)
|
|
1548
1658
|
# =============================================================================
|
|
1549
1659
|
|
|
1550
1660
|
def search_exa(
|
|
@@ -1552,22 +1662,43 @@ def search_exa(
|
|
|
1552
1662
|
api_key: str,
|
|
1553
1663
|
max_results: int = 5,
|
|
1554
1664
|
search_type: str = "neural",
|
|
1665
|
+
exa_depth: str = "normal",
|
|
1555
1666
|
category: Optional[str] = None,
|
|
1556
1667
|
start_date: Optional[str] = None,
|
|
1557
1668
|
end_date: Optional[str] = None,
|
|
1558
1669
|
similar_url: Optional[str] = None,
|
|
1559
1670
|
include_domains: Optional[List[str]] = None,
|
|
1560
1671
|
exclude_domains: Optional[List[str]] = None,
|
|
1672
|
+
text_verbosity: str = "standard",
|
|
1561
1673
|
) -> dict:
|
|
1562
|
-
"""Search using Exa (Neural/Semantic Search).
|
|
1674
|
+
"""Search using Exa (Neural/Semantic/Deep Search).
|
|
1675
|
+
|
|
1676
|
+
exa_depth controls synthesis level:
|
|
1677
|
+
- "normal": standard search (neural/fast/auto/keyword/instant)
|
|
1678
|
+
- "deep": multi-source synthesis with grounding (4-12s, $12/1k)
|
|
1679
|
+
- "deep-reasoning": cross-reference reasoning with grounding (12-50s, $15/1k)
|
|
1680
|
+
"""
|
|
1681
|
+
is_deep = exa_depth in ("deep", "deep-reasoning")
|
|
1682
|
+
|
|
1563
1683
|
if similar_url:
|
|
1684
|
+
# findSimilar does not support deep search types
|
|
1564
1685
|
endpoint = "https://api.exa.ai/findSimilar"
|
|
1565
|
-
body = {
|
|
1686
|
+
body: Dict[str, Any] = {
|
|
1566
1687
|
"url": similar_url,
|
|
1567
1688
|
"numResults": max_results,
|
|
1568
1689
|
"contents": {
|
|
1569
|
-
"text": {"maxCharacters":
|
|
1570
|
-
"highlights":
|
|
1690
|
+
"text": {"maxCharacters": 2000, "verbosity": text_verbosity},
|
|
1691
|
+
"highlights": {"numSentences": 3, "highlightsPerUrl": 2},
|
|
1692
|
+
},
|
|
1693
|
+
}
|
|
1694
|
+
elif is_deep:
|
|
1695
|
+
endpoint = "https://api.exa.ai/search"
|
|
1696
|
+
body = {
|
|
1697
|
+
"query": query,
|
|
1698
|
+
"numResults": max_results,
|
|
1699
|
+
"type": exa_depth,
|
|
1700
|
+
"contents": {
|
|
1701
|
+
"text": {"maxCharacters": 5000, "verbosity": "full"},
|
|
1571
1702
|
},
|
|
1572
1703
|
}
|
|
1573
1704
|
else:
|
|
@@ -1577,11 +1708,11 @@ def search_exa(
|
|
|
1577
1708
|
"numResults": max_results,
|
|
1578
1709
|
"type": search_type,
|
|
1579
1710
|
"contents": {
|
|
1580
|
-
"text": {"maxCharacters":
|
|
1581
|
-
"highlights":
|
|
1711
|
+
"text": {"maxCharacters": 2000, "verbosity": text_verbosity},
|
|
1712
|
+
"highlights": {"numSentences": 3, "highlightsPerUrl": 2},
|
|
1582
1713
|
},
|
|
1583
1714
|
}
|
|
1584
|
-
|
|
1715
|
+
|
|
1585
1716
|
if category:
|
|
1586
1717
|
body["category"] = category
|
|
1587
1718
|
if start_date:
|
|
@@ -1592,19 +1723,91 @@ def search_exa(
|
|
|
1592
1723
|
body["includeDomains"] = include_domains
|
|
1593
1724
|
if exclude_domains:
|
|
1594
1725
|
body["excludeDomains"] = exclude_domains
|
|
1595
|
-
|
|
1726
|
+
|
|
1596
1727
|
headers = {
|
|
1597
1728
|
"x-api-key": api_key,
|
|
1598
1729
|
"Content-Type": "application/json",
|
|
1599
1730
|
}
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1731
|
+
|
|
1732
|
+
timeout = 55 if is_deep else 30
|
|
1733
|
+
data = make_request(endpoint, headers, body, timeout=timeout)
|
|
1734
|
+
|
|
1603
1735
|
results = []
|
|
1736
|
+
|
|
1737
|
+
# Deep search: primary content in output field with grounding citations
|
|
1738
|
+
if is_deep:
|
|
1739
|
+
deep_output = data.get("output", {})
|
|
1740
|
+
synthesized_text = ""
|
|
1741
|
+
grounding_citations: List[Dict[str, Any]] = []
|
|
1742
|
+
|
|
1743
|
+
if isinstance(deep_output.get("content"), str):
|
|
1744
|
+
synthesized_text = deep_output["content"]
|
|
1745
|
+
elif isinstance(deep_output.get("content"), dict):
|
|
1746
|
+
synthesized_text = json.dumps(deep_output["content"], ensure_ascii=False)
|
|
1747
|
+
|
|
1748
|
+
for field_citation in deep_output.get("grounding", []):
|
|
1749
|
+
for cite in field_citation.get("citations", []):
|
|
1750
|
+
grounding_citations.append({
|
|
1751
|
+
"url": cite.get("url", ""),
|
|
1752
|
+
"title": cite.get("title", ""),
|
|
1753
|
+
"confidence": field_citation.get("confidence", ""),
|
|
1754
|
+
"field": field_citation.get("field", ""),
|
|
1755
|
+
})
|
|
1756
|
+
|
|
1757
|
+
# Primary synthesized result
|
|
1758
|
+
if synthesized_text:
|
|
1759
|
+
results.append({
|
|
1760
|
+
"title": f"Exa {exa_depth.replace('-', ' ').title()} Synthesis",
|
|
1761
|
+
"url": "",
|
|
1762
|
+
"snippet": synthesized_text[:2000],
|
|
1763
|
+
"full_synthesis": synthesized_text,
|
|
1764
|
+
"score": 1.0,
|
|
1765
|
+
"grounding": grounding_citations[:10],
|
|
1766
|
+
"type": "synthesis",
|
|
1767
|
+
})
|
|
1768
|
+
|
|
1769
|
+
# Supporting source documents
|
|
1770
|
+
for item in data.get("results", [])[:max_results]:
|
|
1771
|
+
text_content = item.get("text", "") or ""
|
|
1772
|
+
highlights = item.get("highlights", [])
|
|
1773
|
+
snippet = text_content[:800] if text_content else (highlights[0] if highlights else "")
|
|
1774
|
+
results.append({
|
|
1775
|
+
"title": item.get("title", ""),
|
|
1776
|
+
"url": item.get("url", ""),
|
|
1777
|
+
"snippet": snippet,
|
|
1778
|
+
"score": round(item.get("score", 0.0), 3),
|
|
1779
|
+
"published_date": item.get("publishedDate"),
|
|
1780
|
+
"author": item.get("author"),
|
|
1781
|
+
"type": "source",
|
|
1782
|
+
})
|
|
1783
|
+
|
|
1784
|
+
answer = synthesized_text[:1000] if synthesized_text else (results[1]["snippet"] if len(results) > 1 else "")
|
|
1785
|
+
|
|
1786
|
+
return {
|
|
1787
|
+
"provider": "exa",
|
|
1788
|
+
"query": query,
|
|
1789
|
+
"exa_depth": exa_depth,
|
|
1790
|
+
"results": results,
|
|
1791
|
+
"images": [],
|
|
1792
|
+
"answer": answer,
|
|
1793
|
+
"grounding": grounding_citations,
|
|
1794
|
+
"metadata": {
|
|
1795
|
+
"synthesis_length": len(synthesized_text),
|
|
1796
|
+
"source_count": len(data.get("results", [])),
|
|
1797
|
+
},
|
|
1798
|
+
}
|
|
1799
|
+
|
|
1800
|
+
# Standard search result parsing
|
|
1604
1801
|
for item in data.get("results", [])[:max_results]:
|
|
1802
|
+
text_content = item.get("text", "") or ""
|
|
1605
1803
|
highlights = item.get("highlights", [])
|
|
1606
|
-
|
|
1607
|
-
|
|
1804
|
+
if text_content:
|
|
1805
|
+
snippet = text_content[:800]
|
|
1806
|
+
elif highlights:
|
|
1807
|
+
snippet = " ... ".join(highlights[:2])
|
|
1808
|
+
else:
|
|
1809
|
+
snippet = ""
|
|
1810
|
+
|
|
1608
1811
|
results.append({
|
|
1609
1812
|
"title": item.get("title", ""),
|
|
1610
1813
|
"url": item.get("url", ""),
|
|
@@ -1613,9 +1816,9 @@ def search_exa(
|
|
|
1613
1816
|
"published_date": item.get("publishedDate"),
|
|
1614
1817
|
"author": item.get("author"),
|
|
1615
1818
|
})
|
|
1616
|
-
|
|
1819
|
+
|
|
1617
1820
|
answer = results[0]["snippet"] if results else ""
|
|
1618
|
-
|
|
1821
|
+
|
|
1619
1822
|
return {
|
|
1620
1823
|
"provider": "exa",
|
|
1621
1824
|
"query": query if not similar_url else f"Similar to: {similar_url}",
|
|
@@ -2123,9 +2326,22 @@ Full docs: See README.md and SKILL.md
|
|
|
2123
2326
|
# Exa-specific
|
|
2124
2327
|
exa_config = config.get("exa", {})
|
|
2125
2328
|
parser.add_argument(
|
|
2126
|
-
"--exa-type",
|
|
2127
|
-
default=exa_config.get("type", "neural"),
|
|
2128
|
-
choices=["neural", "keyword"]
|
|
2329
|
+
"--exa-type",
|
|
2330
|
+
default=exa_config.get("type", "neural"),
|
|
2331
|
+
choices=["neural", "fast", "auto", "keyword", "instant"],
|
|
2332
|
+
help="Exa search type (for standard search, ignored when --exa-depth is set)"
|
|
2333
|
+
)
|
|
2334
|
+
parser.add_argument(
|
|
2335
|
+
"--exa-depth",
|
|
2336
|
+
default=exa_config.get("depth", "normal"),
|
|
2337
|
+
choices=["normal", "deep", "deep-reasoning"],
|
|
2338
|
+
help="Exa search depth: deep (synthesized, 4-12s), deep-reasoning (cross-reference, 12-50s)"
|
|
2339
|
+
)
|
|
2340
|
+
parser.add_argument(
|
|
2341
|
+
"--exa-verbosity",
|
|
2342
|
+
default=exa_config.get("verbosity", "standard"),
|
|
2343
|
+
choices=["compact", "standard", "full"],
|
|
2344
|
+
help="Exa text verbosity for content extraction"
|
|
2129
2345
|
)
|
|
2130
2346
|
parser.add_argument(
|
|
2131
2347
|
"--category",
|
|
@@ -2157,10 +2373,9 @@ Full docs: See README.md and SKILL.md
|
|
|
2157
2373
|
help="You.com: fetch full page content"
|
|
2158
2374
|
)
|
|
2159
2375
|
parser.add_argument(
|
|
2160
|
-
"--
|
|
2376
|
+
"--no-news",
|
|
2161
2377
|
action="store_true",
|
|
2162
|
-
default
|
|
2163
|
-
help="You.com: include news results (default: true)"
|
|
2378
|
+
help="You.com: exclude news results (included by default)"
|
|
2164
2379
|
)
|
|
2165
2380
|
|
|
2166
2381
|
# SearXNG-specific
|
|
@@ -2324,17 +2539,23 @@ Full docs: See README.md and SKILL.md
|
|
|
2324
2539
|
include_raw_content=args.raw_content,
|
|
2325
2540
|
)
|
|
2326
2541
|
elif prov == "exa":
|
|
2542
|
+
# CLI --exa-depth overrides; fallback to auto-routing suggestion
|
|
2543
|
+
exa_depth = args.exa_depth
|
|
2544
|
+
if exa_depth == "normal" and routing_info.get("exa_depth") in ("deep", "deep-reasoning"):
|
|
2545
|
+
exa_depth = routing_info["exa_depth"]
|
|
2327
2546
|
return search_exa(
|
|
2328
2547
|
query=args.query or "",
|
|
2329
2548
|
api_key=key,
|
|
2330
2549
|
max_results=args.max_results,
|
|
2331
2550
|
search_type=args.exa_type,
|
|
2551
|
+
exa_depth=exa_depth,
|
|
2332
2552
|
category=args.category,
|
|
2333
2553
|
start_date=args.start_date,
|
|
2334
2554
|
end_date=args.end_date,
|
|
2335
2555
|
similar_url=args.similar_url,
|
|
2336
2556
|
include_domains=args.include_domains,
|
|
2337
2557
|
exclude_domains=args.exclude_domains,
|
|
2558
|
+
text_verbosity=args.exa_verbosity,
|
|
2338
2559
|
)
|
|
2339
2560
|
elif prov == "perplexity":
|
|
2340
2561
|
perplexity_config = config.get("perplexity", {})
|
|
@@ -2355,7 +2576,7 @@ Full docs: See README.md and SKILL.md
|
|
|
2355
2576
|
language=args.language,
|
|
2356
2577
|
freshness=args.freshness,
|
|
2357
2578
|
safesearch=args.you_safesearch,
|
|
2358
|
-
include_news=args.
|
|
2579
|
+
include_news=not args.no_news,
|
|
2359
2580
|
livecrawl=args.livecrawl,
|
|
2360
2581
|
)
|
|
2361
2582
|
elif prov == "searxng":
|
|
@@ -2402,9 +2623,11 @@ Full docs: See README.md and SKILL.md
|
|
|
2402
2623
|
"time_range": args.time_range,
|
|
2403
2624
|
"topic": args.topic,
|
|
2404
2625
|
"search_engines": sorted(args.engines) if args.engines else None,
|
|
2405
|
-
"include_news":
|
|
2626
|
+
"include_news": not args.no_news,
|
|
2406
2627
|
"search_type": args.search_type,
|
|
2407
2628
|
"exa_type": args.exa_type,
|
|
2629
|
+
"exa_depth": args.exa_depth,
|
|
2630
|
+
"exa_verbosity": args.exa_verbosity,
|
|
2408
2631
|
"category": args.category,
|
|
2409
2632
|
"similar_url": args.similar_url,
|
|
2410
2633
|
}
|