web-search-plus-plugin 1.2.0 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +44 -2
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/scripts/search.py +91 -28
package/index.ts
CHANGED
|
@@ -88,10 +88,36 @@ export default function (api: any) {
|
|
|
88
88
|
],
|
|
89
89
|
{
|
|
90
90
|
description:
|
|
91
|
-
"Exa search depth: 'deep' synthesizes across sources (4-12s), 'deep-reasoning' for complex cross-reference analysis (12-50s).
|
|
91
|
+
"Exa search depth: 'deep' synthesizes across sources (4-12s), 'deep-reasoning' for complex cross-reference analysis (12-50s). When provider is auto, depth may be auto-selected based on query complexity.",
|
|
92
92
|
},
|
|
93
93
|
),
|
|
94
94
|
),
|
|
95
|
+
time_range: Type.Optional(
|
|
96
|
+
Type.Union(
|
|
97
|
+
[
|
|
98
|
+
Type.Literal("day"),
|
|
99
|
+
Type.Literal("week"),
|
|
100
|
+
Type.Literal("month"),
|
|
101
|
+
Type.Literal("year"),
|
|
102
|
+
],
|
|
103
|
+
{
|
|
104
|
+
description:
|
|
105
|
+
"Filter results by recency. Applies to Serper (as tbs), Perplexity (as search_recency_filter), Tavily/You.com (as freshness). Useful for news and current events.",
|
|
106
|
+
},
|
|
107
|
+
),
|
|
108
|
+
),
|
|
109
|
+
include_domains: Type.Optional(
|
|
110
|
+
Type.Array(Type.String(), {
|
|
111
|
+
description:
|
|
112
|
+
"Only include results from these domains (e.g. ['arxiv.org', 'github.com']). Supported by Tavily and Exa.",
|
|
113
|
+
}),
|
|
114
|
+
),
|
|
115
|
+
exclude_domains: Type.Optional(
|
|
116
|
+
Type.Array(Type.String(), {
|
|
117
|
+
description:
|
|
118
|
+
"Exclude results from these domains (e.g. ['reddit.com', 'pinterest.com']). Supported by Tavily and Exa.",
|
|
119
|
+
}),
|
|
120
|
+
),
|
|
95
121
|
}),
|
|
96
122
|
async execute(
|
|
97
123
|
_id: string,
|
|
@@ -100,6 +126,9 @@ export default function (api: any) {
|
|
|
100
126
|
provider?: string;
|
|
101
127
|
count?: number;
|
|
102
128
|
depth?: string;
|
|
129
|
+
time_range?: string;
|
|
130
|
+
include_domains?: string[];
|
|
131
|
+
exclude_domains?: string[];
|
|
103
132
|
},
|
|
104
133
|
) {
|
|
105
134
|
const args = [scriptPath, "--query", params.query, "--compact"];
|
|
@@ -119,6 +148,19 @@ export default function (api: any) {
|
|
|
119
148
|
args.push("--exa-depth", params.depth);
|
|
120
149
|
}
|
|
121
150
|
|
|
151
|
+
if (params.time_range) {
|
|
152
|
+
args.push("--time-range", params.time_range);
|
|
153
|
+
args.push("--freshness", params.time_range);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (params.include_domains?.length) {
|
|
157
|
+
args.push("--include-domains", ...params.include_domains);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (params.exclude_domains?.length) {
|
|
161
|
+
args.push("--exclude-domains", ...params.exclude_domains);
|
|
162
|
+
}
|
|
163
|
+
|
|
122
164
|
const envPaths = [
|
|
123
165
|
path.join(PLUGIN_DIR, ".env"),
|
|
124
166
|
path.join(PLUGIN_DIR, "..", "web-search-plus", ".env"),
|
|
@@ -131,7 +173,7 @@ export default function (api: any) {
|
|
|
131
173
|
|
|
132
174
|
try {
|
|
133
175
|
const child = spawnSync("python3", args, {
|
|
134
|
-
timeout:
|
|
176
|
+
timeout: 75000,
|
|
135
177
|
env: childEnv,
|
|
136
178
|
shell: false,
|
|
137
179
|
encoding: "utf8",
|
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "web-search-plus-plugin",
|
|
3
3
|
"kind": "skill",
|
|
4
4
|
"name": "Web Search Plus",
|
|
5
|
-
"version": "1.2.
|
|
5
|
+
"version": "1.2.2",
|
|
6
6
|
"description": "Multi-provider web search (Serper/Google, Tavily, Exa/Neural+Deep, Perplexity, You.com, SearXNG) with intelligent auto-routing",
|
|
7
7
|
"configSchema": {
|
|
8
8
|
"type": "object",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "web-search-plus-plugin",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.2",
|
|
4
4
|
"description": "OpenClaw plugin: multi-provider web search (Serper/Google, Tavily, Exa/Neural+Deep, Perplexity, You.com, SearXNG) with intelligent auto-routing",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.ts",
|
package/scripts/search.py
CHANGED
|
@@ -459,17 +459,15 @@ def validate_api_key(provider: str, config: Dict[str, Any] = None) -> str:
|
|
|
459
459
|
],
|
|
460
460
|
"provider": provider
|
|
461
461
|
}
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
462
|
+
raise ProviderConfigError(json.dumps(error_msg))
|
|
463
|
+
|
|
465
464
|
# Validate URL format
|
|
466
465
|
if not key.startswith(("http://", "https://")):
|
|
467
|
-
|
|
466
|
+
raise ProviderConfigError(json.dumps({
|
|
468
467
|
"error": "SearXNG instance URL must start with http:// or https://",
|
|
469
468
|
"provided": key,
|
|
470
469
|
"provider": provider
|
|
471
|
-
}
|
|
472
|
-
sys.exit(1)
|
|
470
|
+
}))
|
|
473
471
|
|
|
474
472
|
return key
|
|
475
473
|
|
|
@@ -500,16 +498,14 @@ def validate_api_key(provider: str, config: Dict[str, Any] = None) -> str:
|
|
|
500
498
|
],
|
|
501
499
|
"provider": provider
|
|
502
500
|
}
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
501
|
+
raise ProviderConfigError(json.dumps(error_msg))
|
|
502
|
+
|
|
506
503
|
if len(key) < 10:
|
|
507
|
-
|
|
504
|
+
raise ProviderConfigError(json.dumps({
|
|
508
505
|
"error": f"API key for {provider} appears invalid (too short)",
|
|
509
506
|
"provider": provider
|
|
510
|
-
}
|
|
511
|
-
|
|
512
|
-
|
|
507
|
+
}))
|
|
508
|
+
|
|
513
509
|
return key
|
|
514
510
|
|
|
515
511
|
|
|
@@ -753,6 +749,19 @@ class QueryAnalyzer:
|
|
|
753
749
|
r'\bheadlines?\b': 3.0,
|
|
754
750
|
r'\b202[4-9]\b': 2.0, # Current year mentions
|
|
755
751
|
r'\blast (week|month|year)\b': 2.0,
|
|
752
|
+
|
|
753
|
+
# German local patterns
|
|
754
|
+
r'\bin der nähe\b': 4.0,
|
|
755
|
+
r'\bin meiner nähe\b': 4.0,
|
|
756
|
+
r'\böffnungszeiten\b': 3.0,
|
|
757
|
+
r'\badresse von\b': 3.0,
|
|
758
|
+
r'\bweg(beschreibung)? nach\b': 3.5,
|
|
759
|
+
|
|
760
|
+
# German news/recency patterns
|
|
761
|
+
r'\bheute\b': 2.5,
|
|
762
|
+
r'\bmorgen\b': 2.0,
|
|
763
|
+
r'\baktuell\b': 2.5,
|
|
764
|
+
r'\bnachrichten\b': 3.0,
|
|
756
765
|
}
|
|
757
766
|
|
|
758
767
|
# RAG/AI signals → You.com
|
|
@@ -805,6 +814,11 @@ class QueryAnalyzer:
|
|
|
805
814
|
r'\bthings to do in\b': 4.0,
|
|
806
815
|
r'\bnear me\b': 3.0,
|
|
807
816
|
r'\bcan you (tell me|summarize|explain)\b': 3.5,
|
|
817
|
+
# German
|
|
818
|
+
r'\bwann\b': 3.0,
|
|
819
|
+
r'\bwer\b': 3.0,
|
|
820
|
+
r'\bwo\b': 2.5,
|
|
821
|
+
r'\bwie viele\b': 3.0,
|
|
808
822
|
}
|
|
809
823
|
|
|
810
824
|
# Privacy/Multi-source signals → SearXNG (self-hosted meta-search)
|
|
@@ -875,6 +889,14 @@ class QueryAnalyzer:
|
|
|
875
889
|
r'\bstructured (output|data|report)\b': 4.0,
|
|
876
890
|
r'\bmarket research\b': 4.0,
|
|
877
891
|
r'\bindustry (report|analysis|overview)\b': 4.0,
|
|
892
|
+
r'\bresearch (on|about|into)\b': 4.0,
|
|
893
|
+
r'\bwhitepaper\b': 4.5,
|
|
894
|
+
r'\btechnical report\b': 4.0,
|
|
895
|
+
r'\bsurvey of\b': 4.5,
|
|
896
|
+
r'\bmeta.?analysis\b': 5.0,
|
|
897
|
+
r'\bsystematic review\b': 5.0,
|
|
898
|
+
r'\bcase study\b': 3.5,
|
|
899
|
+
r'\bbenchmark(s|ing)?\b': 3.5,
|
|
878
900
|
# German
|
|
879
901
|
r'\btiefenrecherche\b': 5.0,
|
|
880
902
|
r'\bumfassende (analyse|übersicht|recherche)\b': 4.5,
|
|
@@ -898,11 +920,16 @@ class QueryAnalyzer:
|
|
|
898
920
|
r'\bpatent (landscape|analysis|search)\b': 4.5,
|
|
899
921
|
r'\bmarket intelligence\b': 4.5,
|
|
900
922
|
r'\bcompetitive (intelligence|landscape)\b': 4.5,
|
|
923
|
+
r'\btrade.?offs?\b': 4.0,
|
|
924
|
+
r'\bpros and cons of\b': 4.0,
|
|
925
|
+
r'\bshould I (use|choose|pick)\b': 3.5,
|
|
926
|
+
r'\bwhich is better\b': 4.0,
|
|
901
927
|
# German
|
|
902
928
|
r'\bkomplexe analyse\b': 4.5,
|
|
903
929
|
r'\bwidersprüche\b': 4.5,
|
|
904
930
|
r'\bquellen abwägen\b': 4.5,
|
|
905
931
|
r'\brechtliche analyse\b': 4.5,
|
|
932
|
+
r'\bvergleich(e|en)?\b': 3.5,
|
|
906
933
|
}
|
|
907
934
|
|
|
908
935
|
|
|
@@ -1339,6 +1366,11 @@ def explain_routing(query: str, config: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
1339
1366
|
|
|
1340
1367
|
|
|
1341
1368
|
|
|
1369
|
+
class ProviderConfigError(Exception):
|
|
1370
|
+
"""Raised when a provider is missing or has an invalid API key/config."""
|
|
1371
|
+
pass
|
|
1372
|
+
|
|
1373
|
+
|
|
1342
1374
|
class ProviderRequestError(Exception):
|
|
1343
1375
|
"""Structured provider error with retry/cooldown metadata."""
|
|
1344
1376
|
|
|
@@ -1406,6 +1438,24 @@ def reset_provider_health(provider: str) -> None:
|
|
|
1406
1438
|
_save_provider_health(state)
|
|
1407
1439
|
|
|
1408
1440
|
|
|
1441
|
+
def _title_from_url(url: str) -> str:
|
|
1442
|
+
"""Derive a readable title from a URL when none is provided."""
|
|
1443
|
+
try:
|
|
1444
|
+
parsed = urlparse(url)
|
|
1445
|
+
domain = parsed.netloc.replace("www.", "")
|
|
1446
|
+
# Use last meaningful path segment as context
|
|
1447
|
+
segments = [s for s in parsed.path.strip("/").split("/") if s]
|
|
1448
|
+
if segments:
|
|
1449
|
+
last = segments[-1].replace("-", " ").replace("_", " ")
|
|
1450
|
+
# Strip file extensions
|
|
1451
|
+
last = re.sub(r'\.\w{2,4}$', '', last)
|
|
1452
|
+
if last:
|
|
1453
|
+
return f"{domain} — {last[:80]}"
|
|
1454
|
+
return domain
|
|
1455
|
+
except Exception:
|
|
1456
|
+
return url[:60]
|
|
1457
|
+
|
|
1458
|
+
|
|
1409
1459
|
def normalize_result_url(url: str) -> str:
|
|
1410
1460
|
if not url:
|
|
1411
1461
|
return ""
|
|
@@ -1728,7 +1778,7 @@ def search_exa(
|
|
|
1728
1778
|
results.append({
|
|
1729
1779
|
"title": f"Exa {exa_depth.replace('-', ' ').title()} Synthesis",
|
|
1730
1780
|
"url": "",
|
|
1731
|
-
"snippet": synthesized_text
|
|
1781
|
+
"snippet": synthesized_text,
|
|
1732
1782
|
"full_synthesis": synthesized_text,
|
|
1733
1783
|
"score": 1.0,
|
|
1734
1784
|
"grounding": grounding_citations[:10],
|
|
@@ -1750,7 +1800,7 @@ def search_exa(
|
|
|
1750
1800
|
"type": "source",
|
|
1751
1801
|
})
|
|
1752
1802
|
|
|
1753
|
-
answer = synthesized_text
|
|
1803
|
+
answer = synthesized_text if synthesized_text else (results[1]["snippet"] if len(results) > 1 else "")
|
|
1754
1804
|
|
|
1755
1805
|
return {
|
|
1756
1806
|
"provider": "exa",
|
|
@@ -1845,13 +1895,17 @@ def search_perplexity(
|
|
|
1845
1895
|
message = choices[0].get("message", {}) if choices else {}
|
|
1846
1896
|
answer = (message.get("content") or "").strip()
|
|
1847
1897
|
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1852
|
-
|
|
1853
|
-
|
|
1854
|
-
|
|
1898
|
+
# Prefer the structured citations array from Perplexity API response
|
|
1899
|
+
api_citations = data.get("citations", [])
|
|
1900
|
+
|
|
1901
|
+
# Fallback: extract URLs from answer text if API doesn't provide citations
|
|
1902
|
+
if not api_citations:
|
|
1903
|
+
api_citations = []
|
|
1904
|
+
seen = set()
|
|
1905
|
+
for u in re.findall(r"https?://[^\s)\]}>\"']+", answer):
|
|
1906
|
+
if u not in seen:
|
|
1907
|
+
seen.add(u)
|
|
1908
|
+
api_citations.append(u)
|
|
1855
1909
|
|
|
1856
1910
|
results = []
|
|
1857
1911
|
|
|
@@ -1866,12 +1920,19 @@ def search_perplexity(
|
|
|
1866
1920
|
"score": 1.0,
|
|
1867
1921
|
})
|
|
1868
1922
|
|
|
1869
|
-
#
|
|
1870
|
-
for i,
|
|
1923
|
+
# Source results from citations
|
|
1924
|
+
for i, citation in enumerate(api_citations[:max_results - 1]):
|
|
1925
|
+
# citations can be plain URL strings or dicts with url/title
|
|
1926
|
+
if isinstance(citation, str):
|
|
1927
|
+
url = citation
|
|
1928
|
+
title = _title_from_url(url)
|
|
1929
|
+
else:
|
|
1930
|
+
url = citation.get("url", "")
|
|
1931
|
+
title = citation.get("title") or _title_from_url(url)
|
|
1871
1932
|
results.append({
|
|
1872
|
-
"title":
|
|
1873
|
-
"url":
|
|
1874
|
-
"snippet": "
|
|
1933
|
+
"title": title,
|
|
1934
|
+
"url": url,
|
|
1935
|
+
"snippet": f"Source cited in Perplexity answer [citation {i+1}]",
|
|
1875
1936
|
"score": round(0.9 - i * 0.1, 3),
|
|
1876
1937
|
})
|
|
1877
1938
|
|
|
@@ -2463,9 +2524,11 @@ Full docs: See README.md and SKILL.md
|
|
|
2463
2524
|
disabled_providers = auto_config.get("disabled_providers", [])
|
|
2464
2525
|
|
|
2465
2526
|
# Start with the selected provider, then try others in priority order
|
|
2527
|
+
# Only include providers that have a configured API key (except the primary,
|
|
2528
|
+
# which gets a clear error if unconfigured and no fallback succeeds)
|
|
2466
2529
|
providers_to_try = [provider]
|
|
2467
2530
|
for p in provider_priority:
|
|
2468
|
-
if p not in providers_to_try and p not in disabled_providers:
|
|
2531
|
+
if p not in providers_to_try and p not in disabled_providers and get_api_key(p, config):
|
|
2469
2532
|
providers_to_try.append(p)
|
|
2470
2533
|
|
|
2471
2534
|
# Skip providers currently in cooldown
|