web-search-plus-plugin 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -88,10 +88,36 @@ export default function (api: any) {
88
88
  ],
89
89
  {
90
90
  description:
91
- "Exa search depth: 'deep' synthesizes across sources (4-12s), 'deep-reasoning' for complex cross-reference analysis (12-50s). Only applies when routed to Exa.",
91
+ "Exa search depth: 'deep' synthesizes across sources (4-12s), 'deep-reasoning' for complex cross-reference analysis (12-50s). When provider is auto, depth may be auto-selected based on query complexity.",
92
92
  },
93
93
  ),
94
94
  ),
95
+ time_range: Type.Optional(
96
+ Type.Union(
97
+ [
98
+ Type.Literal("day"),
99
+ Type.Literal("week"),
100
+ Type.Literal("month"),
101
+ Type.Literal("year"),
102
+ ],
103
+ {
104
+ description:
105
+ "Filter results by recency. Applies to Serper (as tbs), Perplexity (as search_recency_filter), Tavily/You.com (as freshness). Useful for news and current events.",
106
+ },
107
+ ),
108
+ ),
109
+ include_domains: Type.Optional(
110
+ Type.Array(Type.String(), {
111
+ description:
112
+ "Only include results from these domains (e.g. ['arxiv.org', 'github.com']). Supported by Tavily and Exa.",
113
+ }),
114
+ ),
115
+ exclude_domains: Type.Optional(
116
+ Type.Array(Type.String(), {
117
+ description:
118
+ "Exclude results from these domains (e.g. ['reddit.com', 'pinterest.com']). Supported by Tavily and Exa.",
119
+ }),
120
+ ),
95
121
  }),
96
122
  async execute(
97
123
  _id: string,
@@ -100,6 +126,9 @@ export default function (api: any) {
100
126
  provider?: string;
101
127
  count?: number;
102
128
  depth?: string;
129
+ time_range?: string;
130
+ include_domains?: string[];
131
+ exclude_domains?: string[];
103
132
  },
104
133
  ) {
105
134
  const args = [scriptPath, "--query", params.query, "--compact"];
@@ -119,6 +148,19 @@ export default function (api: any) {
119
148
  args.push("--exa-depth", params.depth);
120
149
  }
121
150
 
151
+ if (params.time_range) {
152
+ args.push("--time-range", params.time_range);
153
+ args.push("--freshness", params.time_range);
154
+ }
155
+
156
+ if (params.include_domains?.length) {
157
+ args.push("--include-domains", ...params.include_domains);
158
+ }
159
+
160
+ if (params.exclude_domains?.length) {
161
+ args.push("--exclude-domains", ...params.exclude_domains);
162
+ }
163
+
122
164
  const envPaths = [
123
165
  path.join(PLUGIN_DIR, ".env"),
124
166
  path.join(PLUGIN_DIR, "..", "web-search-plus", ".env"),
@@ -131,7 +173,7 @@ export default function (api: any) {
131
173
 
132
174
  try {
133
175
  const child = spawnSync("python3", args, {
134
- timeout: 65000,
176
+ timeout: 75000,
135
177
  env: childEnv,
136
178
  shell: false,
137
179
  encoding: "utf8",
@@ -2,7 +2,7 @@
2
2
  "id": "web-search-plus-plugin",
3
3
  "kind": "skill",
4
4
  "name": "Web Search Plus",
5
- "version": "1.2.0",
5
+ "version": "1.2.2",
6
6
  "description": "Multi-provider web search (Serper/Google, Tavily, Exa/Neural+Deep, Perplexity, You.com, SearXNG) with intelligent auto-routing",
7
7
  "configSchema": {
8
8
  "type": "object",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "web-search-plus-plugin",
3
- "version": "1.2.0",
3
+ "version": "1.2.2",
4
4
  "description": "OpenClaw plugin: multi-provider web search (Serper/Google, Tavily, Exa/Neural+Deep, Perplexity, You.com, SearXNG) with intelligent auto-routing",
5
5
  "type": "module",
6
6
  "main": "index.ts",
package/scripts/search.py CHANGED
@@ -459,17 +459,15 @@ def validate_api_key(provider: str, config: Dict[str, Any] = None) -> str:
459
459
  ],
460
460
  "provider": provider
461
461
  }
462
- print(json.dumps(error_msg, indent=2), file=sys.stderr)
463
- sys.exit(1)
464
-
462
+ raise ProviderConfigError(json.dumps(error_msg))
463
+
465
464
  # Validate URL format
466
465
  if not key.startswith(("http://", "https://")):
467
- print(json.dumps({
466
+ raise ProviderConfigError(json.dumps({
468
467
  "error": "SearXNG instance URL must start with http:// or https://",
469
468
  "provided": key,
470
469
  "provider": provider
471
- }, indent=2), file=sys.stderr)
472
- sys.exit(1)
470
+ }))
473
471
 
474
472
  return key
475
473
 
@@ -500,16 +498,14 @@ def validate_api_key(provider: str, config: Dict[str, Any] = None) -> str:
500
498
  ],
501
499
  "provider": provider
502
500
  }
503
- print(json.dumps(error_msg, indent=2), file=sys.stderr)
504
- sys.exit(1)
505
-
501
+ raise ProviderConfigError(json.dumps(error_msg))
502
+
506
503
  if len(key) < 10:
507
- print(json.dumps({
504
+ raise ProviderConfigError(json.dumps({
508
505
  "error": f"API key for {provider} appears invalid (too short)",
509
506
  "provider": provider
510
- }, indent=2), file=sys.stderr)
511
- sys.exit(1)
512
-
507
+ }))
508
+
513
509
  return key
514
510
 
515
511
 
@@ -753,6 +749,19 @@ class QueryAnalyzer:
753
749
  r'\bheadlines?\b': 3.0,
754
750
  r'\b202[4-9]\b': 2.0, # Current year mentions
755
751
  r'\blast (week|month|year)\b': 2.0,
752
+
753
+ # German local patterns
754
+ r'\bin der nähe\b': 4.0,
755
+ r'\bin meiner nähe\b': 4.0,
756
+ r'\böffnungszeiten\b': 3.0,
757
+ r'\badresse von\b': 3.0,
758
+ r'\bweg(beschreibung)? nach\b': 3.5,
759
+
760
+ # German news/recency patterns
761
+ r'\bheute\b': 2.5,
762
+ r'\bmorgen\b': 2.0,
763
+ r'\baktuell\b': 2.5,
764
+ r'\bnachrichten\b': 3.0,
756
765
  }
757
766
 
758
767
  # RAG/AI signals → You.com
@@ -805,6 +814,11 @@ class QueryAnalyzer:
805
814
  r'\bthings to do in\b': 4.0,
806
815
  r'\bnear me\b': 3.0,
807
816
  r'\bcan you (tell me|summarize|explain)\b': 3.5,
817
+ # German
818
+ r'\bwann\b': 3.0,
819
+ r'\bwer\b': 3.0,
820
+ r'\bwo\b': 2.5,
821
+ r'\bwie viele\b': 3.0,
808
822
  }
809
823
 
810
824
  # Privacy/Multi-source signals → SearXNG (self-hosted meta-search)
@@ -875,6 +889,14 @@ class QueryAnalyzer:
875
889
  r'\bstructured (output|data|report)\b': 4.0,
876
890
  r'\bmarket research\b': 4.0,
877
891
  r'\bindustry (report|analysis|overview)\b': 4.0,
892
+ r'\bresearch (on|about|into)\b': 4.0,
893
+ r'\bwhitepaper\b': 4.5,
894
+ r'\btechnical report\b': 4.0,
895
+ r'\bsurvey of\b': 4.5,
896
+ r'\bmeta.?analysis\b': 5.0,
897
+ r'\bsystematic review\b': 5.0,
898
+ r'\bcase study\b': 3.5,
899
+ r'\bbenchmark(s|ing)?\b': 3.5,
878
900
  # German
879
901
  r'\btiefenrecherche\b': 5.0,
880
902
  r'\bumfassende (analyse|übersicht|recherche)\b': 4.5,
@@ -898,11 +920,16 @@ class QueryAnalyzer:
898
920
  r'\bpatent (landscape|analysis|search)\b': 4.5,
899
921
  r'\bmarket intelligence\b': 4.5,
900
922
  r'\bcompetitive (intelligence|landscape)\b': 4.5,
923
+ r'\btrade.?offs?\b': 4.0,
924
+ r'\bpros and cons of\b': 4.0,
925
+ r'\bshould I (use|choose|pick)\b': 3.5,
926
+ r'\bwhich is better\b': 4.0,
901
927
  # German
902
928
  r'\bkomplexe analyse\b': 4.5,
903
929
  r'\bwidersprüche\b': 4.5,
904
930
  r'\bquellen abwägen\b': 4.5,
905
931
  r'\brechtliche analyse\b': 4.5,
932
+ r'\bvergleich(e|en)?\b': 3.5,
906
933
  }
907
934
 
908
935
 
@@ -1339,6 +1366,11 @@ def explain_routing(query: str, config: Dict[str, Any]) -> Dict[str, Any]:
1339
1366
 
1340
1367
 
1341
1368
 
1369
+ class ProviderConfigError(Exception):
1370
+ """Raised when a provider is missing or has an invalid API key/config."""
1371
+ pass
1372
+
1373
+
1342
1374
  class ProviderRequestError(Exception):
1343
1375
  """Structured provider error with retry/cooldown metadata."""
1344
1376
 
@@ -1406,6 +1438,24 @@ def reset_provider_health(provider: str) -> None:
1406
1438
  _save_provider_health(state)
1407
1439
 
1408
1440
 
1441
+ def _title_from_url(url: str) -> str:
1442
+ """Derive a readable title from a URL when none is provided."""
1443
+ try:
1444
+ parsed = urlparse(url)
1445
+ domain = parsed.netloc.replace("www.", "")
1446
+ # Use last meaningful path segment as context
1447
+ segments = [s for s in parsed.path.strip("/").split("/") if s]
1448
+ if segments:
1449
+ last = segments[-1].replace("-", " ").replace("_", " ")
1450
+ # Strip file extensions
1451
+ last = re.sub(r'\.\w{2,4}$', '', last)
1452
+ if last:
1453
+ return f"{domain} — {last[:80]}"
1454
+ return domain
1455
+ except Exception:
1456
+ return url[:60]
1457
+
1458
+
1409
1459
  def normalize_result_url(url: str) -> str:
1410
1460
  if not url:
1411
1461
  return ""
@@ -1728,7 +1778,7 @@ def search_exa(
1728
1778
  results.append({
1729
1779
  "title": f"Exa {exa_depth.replace('-', ' ').title()} Synthesis",
1730
1780
  "url": "",
1731
- "snippet": synthesized_text[:2000],
1781
+ "snippet": synthesized_text,
1732
1782
  "full_synthesis": synthesized_text,
1733
1783
  "score": 1.0,
1734
1784
  "grounding": grounding_citations[:10],
@@ -1750,7 +1800,7 @@ def search_exa(
1750
1800
  "type": "source",
1751
1801
  })
1752
1802
 
1753
- answer = synthesized_text[:1000] if synthesized_text else (results[1]["snippet"] if len(results) > 1 else "")
1803
+ answer = synthesized_text if synthesized_text else (results[1]["snippet"] if len(results) > 1 else "")
1754
1804
 
1755
1805
  return {
1756
1806
  "provider": "exa",
@@ -1845,13 +1895,17 @@ def search_perplexity(
1845
1895
  message = choices[0].get("message", {}) if choices else {}
1846
1896
  answer = (message.get("content") or "").strip()
1847
1897
 
1848
- urls = re.findall(r"https?://[^\s)\]}>\"']+", answer)
1849
- unique_urls = []
1850
- seen = set()
1851
- for u in urls:
1852
- if u not in seen:
1853
- seen.add(u)
1854
- unique_urls.append(u)
1898
+ # Prefer the structured citations array from Perplexity API response
1899
+ api_citations = data.get("citations", [])
1900
+
1901
+ # Fallback: extract URLs from answer text if API doesn't provide citations
1902
+ if not api_citations:
1903
+ api_citations = []
1904
+ seen = set()
1905
+ for u in re.findall(r"https?://[^\s)\]}>\"']+", answer):
1906
+ if u not in seen:
1907
+ seen.add(u)
1908
+ api_citations.append(u)
1855
1909
 
1856
1910
  results = []
1857
1911
 
@@ -1866,12 +1920,19 @@ def search_perplexity(
1866
1920
  "score": 1.0,
1867
1921
  })
1868
1922
 
1869
- # Additional results: extracted source URLs
1870
- for i, u in enumerate(unique_urls[:max_results - 1]):
1923
+ # Source results from citations
1924
+ for i, citation in enumerate(api_citations[:max_results - 1]):
1925
+ # citations can be plain URL strings or dicts with url/title
1926
+ if isinstance(citation, str):
1927
+ url = citation
1928
+ title = _title_from_url(url)
1929
+ else:
1930
+ url = citation.get("url", "")
1931
+ title = citation.get("title") or _title_from_url(url)
1871
1932
  results.append({
1872
- "title": f"Source {i+1}",
1873
- "url": u,
1874
- "snippet": "Referenced source from Perplexity answer",
1933
+ "title": title,
1934
+ "url": url,
1935
+ "snippet": f"Source cited in Perplexity answer [citation {i+1}]",
1875
1936
  "score": round(0.9 - i * 0.1, 3),
1876
1937
  })
1877
1938
 
@@ -2463,9 +2524,11 @@ Full docs: See README.md and SKILL.md
2463
2524
  disabled_providers = auto_config.get("disabled_providers", [])
2464
2525
 
2465
2526
  # Start with the selected provider, then try others in priority order
2527
+ # Only include providers that have a configured API key (except the primary,
2528
+ # which gets a clear error if unconfigured and no fallback succeeds)
2466
2529
  providers_to_try = [provider]
2467
2530
  for p in provider_priority:
2468
- if p not in providers_to_try and p not in disabled_providers:
2531
+ if p not in providers_to_try and p not in disabled_providers and get_api_key(p, config):
2469
2532
  providers_to_try.append(p)
2470
2533
 
2471
2534
  # Skip providers currently in cooldown