@juicesharp/rpiv-web-tools 1.11.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -5
- package/package.json +2 -2
- package/providers/factory.ts +10 -1
- package/providers/index.ts +31 -4
- package/providers/searxng.ts +242 -0
- package/providers/types.ts +54 -0
- package/web-tools.ts +99 -13
package/README.md
CHANGED
|
@@ -8,14 +8,14 @@
|
|
|
8
8
|
</a>
|
|
9
9
|
</div>
|
|
10
10
|
|
|
11
|
-
Let the model search the web and read pages. `rpiv-web-tools` adds `web_search` and `web_fetch` tools to [Pi Agent](https://github.com/badlogic/pi-mono) with pluggable providers (Brave, Tavily, Serper, Exa, Jina, Firecrawl), plus `/web-search-config` for interactive provider selection and API-key setup.
|
|
11
|
+
Let the model search the web and read pages. `rpiv-web-tools` adds `web_search` and `web_fetch` tools to [Pi Agent](https://github.com/badlogic/pi-mono) with pluggable providers (Brave, Tavily, Serper, Exa, Jina, Firecrawl, [SearXNG](https://docs.searxng.org/)), plus `/web-search-config` for interactive provider selection and API-key setup.
|
|
12
12
|
|
|
13
13
|

|
|
14
14
|
|
|
15
15
|
## Features
|
|
16
16
|
|
|
17
|
-
- **
|
|
18
|
-
- **Per-provider fetch strategy** - Brave and
|
|
17
|
+
- **Seven pluggable providers** - Brave, Tavily, Serper, Exa, Jina, Firecrawl, and self-hosted SearXNG. Pick one as the active backend; switch any time without losing the others' keys.
|
|
18
|
+
- **Per-provider fetch strategy** - Brave, Serper, and SearXNG read the URL directly and strip HTML to text; Tavily/Exa/Jina/Firecrawl use their native extraction endpoints (markdown for Jina/Firecrawl, plain text for Tavily/Exa).
|
|
19
19
|
- **Read any URL** - fetch http/https pages with HTML-to-text extraction, or get the raw response with `raw: true` (honoured by Brave/Serper; extraction providers always return their parsed text).
|
|
20
20
|
- **Large-page spillover** - oversized responses truncate inline and spill the full body to a temp file the model can read on demand.
|
|
21
21
|
- **SSRF guard** - refuses loopback, RFC 1918, link-local, and cloud-metadata addresses (`localhost`, `127.0.0.0/8`, `10.0.0.0/8`, `169.254.0.0/16`, `172.16.0.0/12`, `192.168.0.0/16`, `::1`, `fc00::/7`, `fe80::/10`).
|
|
@@ -53,7 +53,7 @@ Returns:
|
|
|
53
53
|
content: [{ type: "text", text: string }], // markdown list of "**title**\n url\n snippet"
|
|
54
54
|
details: {
|
|
55
55
|
query: string,
|
|
56
|
-
backend: "brave" | "tavily" | "serper" | "exa" | "jina" | "firecrawl",
|
|
56
|
+
backend: "brave" | "tavily" | "serper" | "exa" | "jina" | "firecrawl" | "searxng",
|
|
57
57
|
resultCount: number,
|
|
58
58
|
results?: Array<{ title: string, url: string, snippet: string }>,
|
|
59
59
|
}
|
|
@@ -100,12 +100,49 @@ Throws on invalid URL, non-http(s) protocol, private/loopback hostnames (SSRF gu
|
|
|
100
100
|
|
|
101
101
|
First match wins:
|
|
102
102
|
|
|
103
|
-
1. The active provider's environment variable: `BRAVE_SEARCH_API_KEY`, `TAVILY_API_KEY`, `SERPER_API_KEY`, `EXA_API_KEY`, `JINA_API_KEY`, or `
|
|
103
|
+
1. The active provider's environment variable: `BRAVE_SEARCH_API_KEY`, `TAVILY_API_KEY`, `SERPER_API_KEY`, `EXA_API_KEY`, `JINA_API_KEY`, `FIRECRAWL_API_KEY`, or `SEARXNG_API_KEY`
|
|
104
104
|
2. `apiKeys.<provider>` field in `~/.config/rpiv-web-tools/config.json`
|
|
105
105
|
3. Legacy `apiKey` field (Brave only — auto-migrated to the new shape on next save)
|
|
106
106
|
|
|
107
107
|
The active provider is `config.provider` (set by `/web-search-config`); falls back to `brave` if absent.
|
|
108
108
|
|
|
109
|
+
## SearXNG (self-hosted)
|
|
110
|
+
|
|
111
|
+
SearXNG is the only provider that talks to an instance you control, so it needs a base URL instead of (or in addition to) an API key.
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
export SEARXNG_URL=http://localhost:8080
|
|
115
|
+
# Optional: only if your instance sits behind a Bearer-auth reverse proxy
|
|
116
|
+
export SEARXNG_API_KEY=…
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Resolution order for the URL: `SEARXNG_URL` env var → `baseUrls.searxng` in `~/.config/rpiv-web-tools/config.json` → default `http://localhost:8080`. `/web-search-config` prompts for the URL first and the (optional) API key second.
|
|
120
|
+
|
|
121
|
+
Your instance must have `json` enabled in `settings.yml` under `search.formats` — default SearXNG installs ship with JSON disabled and will return `403 Forbidden` otherwise (per the [SearXNG search API docs](https://docs.searxng.org/dev/search_api.html)). The provider surfaces that case with an actionable hint. SearXNG's `web_fetch` reuses the same raw-HTTP + HTML-to-text pipeline as Brave/Serper, so URLs returned by `web_search` can be fetched without any extra setup.
|
|
122
|
+
|
|
123
|
+
The SSRF guard (which refuses loopback and RFC-1918 addresses) applies to URLs `web_fetch` retrieves on the model's behalf, not to the SearXNG search endpoint itself: a `SEARXNG_URL` pointing at `http://localhost:8080` or another private host is intentionally reachable, since SearXNG is self-hosted by design.
|
|
124
|
+
|
|
125
|
+
### Running SearXNG locally with Docker
|
|
126
|
+
|
|
127
|
+
The `searxng/searxng` entrypoint **overwrites** `/etc/searxng/settings.yml` on first start with the bundled default (ships with `formats: [html]` only). Pre-populating the mounted file doesn't stick — wait for the entrypoint, then patch:
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
mkdir -p ~/.searxng
|
|
131
|
+
docker run -d --name searxng --restart unless-stopped \
|
|
132
|
+
-p 8080:8080 -v "$HOME/.searxng":/etc/searxng \
|
|
133
|
+
-e BASE_URL=http://localhost:8080/ searxng/searxng:latest
|
|
134
|
+
sleep 5 # wait for entrypoint to write settings.yml
|
|
135
|
+
sed -i.bak '/^ formats:$/,/^[^ ]/ { /- html/a\
|
|
136
|
+
- json
|
|
137
|
+
}' ~/.searxng/settings.yml
|
|
138
|
+
docker restart searxng
|
|
139
|
+
|
|
140
|
+
# Sanity check — a number > 0 means it's wired correctly
|
|
141
|
+
curl -sf 'http://localhost:8080/search?q=hello&format=json' | jq '.results | length'
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
`403` means JSON is still disabled — re-check `~/.searxng/settings.yml`. Works identically on Docker Desktop or OrbStack. For a throwaway test instance, swap `~/.searxng` for `/tmp/searxng` and drop `--restart unless-stopped`.
|
|
145
|
+
|
|
109
146
|
## Executor guidance overrides
|
|
110
147
|
|
|
111
148
|
Override the `promptSnippet` / `promptGuidelines` the model sees for each tool by editing `~/.config/rpiv-web-tools/config.json`. Note the per-tool nesting under `guidance.web_search` / `guidance.web_fetch` — this differs from the flat `guidance` shape used by single-tool siblings (`rpiv-advisor`, `rpiv-todo`, `rpiv-ask-user-question`):
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@juicesharp/rpiv-web-tools",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.13.0",
|
|
4
4
|
"description": "Pi extension. Web search and fetch for the model with pluggable providers (Brave, Tavily, Serper, Exa, Jina, Firecrawl).",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
]
|
|
49
49
|
},
|
|
50
50
|
"dependencies": {
|
|
51
|
-
"@juicesharp/rpiv-config": "^1.
|
|
51
|
+
"@juicesharp/rpiv-config": "^1.13.0"
|
|
52
52
|
},
|
|
53
53
|
"peerDependencies": {
|
|
54
54
|
"@earendil-works/pi-coding-agent": "*",
|
package/providers/factory.ts
CHANGED
|
@@ -2,11 +2,18 @@ import { BraveProvider } from "./brave.js";
|
|
|
2
2
|
import { ExaProvider } from "./exa.js";
|
|
3
3
|
import { FirecrawlProvider } from "./firecrawl.js";
|
|
4
4
|
import { JinaProvider } from "./jina.js";
|
|
5
|
+
import { SearxngProvider } from "./searxng.js";
|
|
5
6
|
import { SerperProvider } from "./serper.js";
|
|
6
7
|
import { TavilyProvider } from "./tavily.js";
|
|
7
8
|
import type { SearchProvider } from "./types.js";
|
|
8
9
|
|
|
9
|
-
export
|
|
10
|
+
export interface ProviderCredentials {
|
|
11
|
+
apiKey?: string;
|
|
12
|
+
baseUrl?: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function createSearchProvider(name: string, creds: ProviderCredentials): SearchProvider {
|
|
16
|
+
const apiKey = creds.apiKey ?? "";
|
|
10
17
|
switch (name) {
|
|
11
18
|
case "brave":
|
|
12
19
|
return new BraveProvider(apiKey);
|
|
@@ -20,6 +27,8 @@ export function createSearchProvider(name: string, apiKey: string): SearchProvid
|
|
|
20
27
|
return new JinaProvider(apiKey);
|
|
21
28
|
case "firecrawl":
|
|
22
29
|
return new FirecrawlProvider(apiKey);
|
|
30
|
+
case "searxng":
|
|
31
|
+
return new SearxngProvider({ apiKey: creds.apiKey, baseUrl: creds.baseUrl ?? "" });
|
|
23
32
|
default:
|
|
24
33
|
throw new Error(`Unknown search provider: "${name}"`);
|
|
25
34
|
}
|
package/providers/index.ts
CHANGED
|
@@ -2,23 +2,50 @@ import { BRAVE_PROVIDER_META } from "./brave.js";
|
|
|
2
2
|
import { EXA_PROVIDER_META } from "./exa.js";
|
|
3
3
|
import { FIRECRAWL_PROVIDER_META } from "./firecrawl.js";
|
|
4
4
|
import { JINA_PROVIDER_META } from "./jina.js";
|
|
5
|
+
import { SEARXNG_PROVIDER_META } from "./searxng.js";
|
|
5
6
|
import { SERPER_PROVIDER_META } from "./serper.js";
|
|
6
7
|
import { TAVILY_PROVIDER_META } from "./tavily.js";
|
|
8
|
+
import type { ProviderMeta } from "./types.js";
|
|
7
9
|
|
|
8
10
|
export { BRAVE_API_KEY_ENV_VAR, BRAVE_PROVIDER_META, BraveProvider } from "./brave.js";
|
|
9
11
|
export { EXA_API_KEY_ENV_VAR, EXA_PROVIDER_META, ExaProvider } from "./exa.js";
|
|
10
|
-
export { createSearchProvider } from "./factory.js";
|
|
12
|
+
export { createSearchProvider, type ProviderCredentials } from "./factory.js";
|
|
11
13
|
export { FIRECRAWL_API_KEY_ENV_VAR, FIRECRAWL_PROVIDER_META, FirecrawlProvider } from "./firecrawl.js";
|
|
12
14
|
export { JINA_API_KEY_ENV_VAR, JINA_PROVIDER_META, JinaProvider } from "./jina.js";
|
|
15
|
+
export {
|
|
16
|
+
configureSearxng,
|
|
17
|
+
SEARXNG_API_KEY_ENV_VAR,
|
|
18
|
+
SEARXNG_DEFAULT_URL,
|
|
19
|
+
SEARXNG_PROVIDER_META,
|
|
20
|
+
SEARXNG_URL_ENV_VAR,
|
|
21
|
+
type SearxngConfigChange,
|
|
22
|
+
type SearxngConfigCurrent,
|
|
23
|
+
type SearxngConfigUi,
|
|
24
|
+
SearxngProvider,
|
|
25
|
+
} from "./searxng.js";
|
|
13
26
|
export { SERPER_API_KEY_ENV_VAR, SERPER_PROVIDER_META, SerperProvider } from "./serper.js";
|
|
14
27
|
export { TAVILY_API_KEY_ENV_VAR, TAVILY_PROVIDER_META, TavilyProvider } from "./tavily.js";
|
|
15
|
-
export type {
|
|
28
|
+
export type {
|
|
29
|
+
FetchResponse,
|
|
30
|
+
ProviderConfigChange,
|
|
31
|
+
ProviderConfigCurrent,
|
|
32
|
+
ProviderConfigUi,
|
|
33
|
+
ProviderMeta,
|
|
34
|
+
SearchProvider,
|
|
35
|
+
SearchResponse,
|
|
36
|
+
SearchResult,
|
|
37
|
+
} from "./types.js";
|
|
16
38
|
|
|
17
|
-
|
|
39
|
+
// Typed as readonly ProviderMeta[] (not `as const`) so iterators can access
|
|
40
|
+
// the optional META fields (baseUrlEnvVar, defaultBaseUrl, configure) without
|
|
41
|
+
// per-element narrowing. Individual META consts still expose their narrow
|
|
42
|
+
// literal types when imported directly.
|
|
43
|
+
export const PROVIDERS: readonly ProviderMeta[] = [
|
|
18
44
|
BRAVE_PROVIDER_META,
|
|
19
45
|
TAVILY_PROVIDER_META,
|
|
20
46
|
SERPER_PROVIDER_META,
|
|
21
47
|
EXA_PROVIDER_META,
|
|
22
48
|
JINA_PROVIDER_META,
|
|
23
49
|
FIRECRAWL_PROVIDER_META,
|
|
24
|
-
|
|
50
|
+
SEARXNG_PROVIDER_META,
|
|
51
|
+
];
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import { assertTextContentType, extractBodyAsText, fetchUrlOrThrow } from "./fetch-helpers.js";
|
|
2
|
+
import {
|
|
3
|
+
type FetchResponse,
|
|
4
|
+
isCancellation,
|
|
5
|
+
type ProviderConfigChange,
|
|
6
|
+
type ProviderConfigCurrent,
|
|
7
|
+
type ProviderConfigUi,
|
|
8
|
+
type ProviderMeta,
|
|
9
|
+
type SearchProvider,
|
|
10
|
+
type SearchResponse,
|
|
11
|
+
type SearchResult,
|
|
12
|
+
} from "./types.js";
|
|
13
|
+
|
|
14
|
+
export const SEARXNG_API_KEY_ENV_VAR = "SEARXNG_API_KEY";
|
|
15
|
+
export const SEARXNG_URL_ENV_VAR = "SEARXNG_URL";
|
|
16
|
+
export const SEARXNG_DEFAULT_URL = "http://localhost:8080";
|
|
17
|
+
|
|
18
|
+
// SearXNG search API knobs (per https://docs.searxng.org/dev/search_api.html).
|
|
19
|
+
const SEARXNG_SEARCH_PATH = "/search";
|
|
20
|
+
const SEARXNG_FORMAT_JSON = "json";
|
|
21
|
+
const SEARXNG_SAFESEARCH_OFF = "0"; // 0/1/2 = none/moderate/strict
|
|
22
|
+
|
|
23
|
+
// Number of leading + trailing characters preserved when masking a Bearer key
|
|
24
|
+
// in the config prompt. Mirrors API_KEY_MASK_VISIBLE_CHARS in web-tools.ts.
|
|
25
|
+
const MASK_VISIBLE_CHARS = 4;
|
|
26
|
+
|
|
27
|
+
// SearXNG-specific aliases of the generic config shapes — preserved for
|
|
28
|
+
// backward compatibility with the symbols exported in v1.11.0. New providers
|
|
29
|
+
// should consume the generic ProviderConfig* types from ./types.js directly.
|
|
30
|
+
export type SearxngConfigUi = ProviderConfigUi;
|
|
31
|
+
export type SearxngConfigCurrent = ProviderConfigCurrent;
|
|
32
|
+
export type SearxngConfigChange = ProviderConfigChange;
|
|
33
|
+
|
|
34
|
+
export const SEARXNG_PROVIDER_META: ProviderMeta = {
|
|
35
|
+
name: "searxng",
|
|
36
|
+
label: "SearXNG",
|
|
37
|
+
envVar: SEARXNG_API_KEY_ENV_VAR,
|
|
38
|
+
baseUrlEnvVar: SEARXNG_URL_ENV_VAR,
|
|
39
|
+
defaultBaseUrl: SEARXNG_DEFAULT_URL,
|
|
40
|
+
configure: (ui, current) => configureSearxng(ui, current),
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
interface SearxngRawResult {
|
|
44
|
+
title?: string;
|
|
45
|
+
url?: string;
|
|
46
|
+
content?: string;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
interface SearxngRawResponse {
|
|
50
|
+
results?: SearxngRawResult[];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function normalizeSearxngResults(raw: SearxngRawResponse, maxResults: number): SearchResult[] {
|
|
54
|
+
return (raw.results ?? []).slice(0, maxResults).map((r) => ({
|
|
55
|
+
title: r.title ?? "",
|
|
56
|
+
url: r.url ?? "",
|
|
57
|
+
snippet: r.content ?? "",
|
|
58
|
+
}));
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function stripTrailingSlashes(url: string): string {
|
|
62
|
+
return url.replace(/\/+$/, "");
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Reject anything that isn't an http(s) URL — a user-supplied SEARXNG_URL
|
|
66
|
+
// must not be allowed to silently become `file://`, `javascript:`, `data:`
|
|
67
|
+
// or any other scheme that `new URL()` accepts but we'd misuse downstream.
|
|
68
|
+
function assertHttpUrl(url: string): void {
|
|
69
|
+
let parsed: URL;
|
|
70
|
+
try {
|
|
71
|
+
parsed = new URL(url);
|
|
72
|
+
} catch {
|
|
73
|
+
throw new Error(`${SEARXNG_URL_ENV_VAR} is not a valid URL (got: ${url})`);
|
|
74
|
+
}
|
|
75
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
76
|
+
throw new Error(
|
|
77
|
+
`${SEARXNG_URL_ENV_VAR} must use http:// or https:// (got: ${parsed.protocol.replace(":", "")}://)`,
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// 401 ≈ reverse-proxy auth rejected the Bearer token. 403 from a default
|
|
83
|
+
// SearXNG install almost always means JSON output is disabled — the docs
|
|
84
|
+
// explicitly warn that "Requesting an unset format will return a 403
|
|
85
|
+
// Forbidden error". Surface the actionable fix for each.
|
|
86
|
+
function hintForSearchStatus(status: number): string {
|
|
87
|
+
if (status === 401) {
|
|
88
|
+
return ` (the SearXNG instance's reverse-proxy rejected the Bearer token; check ${SEARXNG_API_KEY_ENV_VAR} or apiKeys.searxng)`;
|
|
89
|
+
}
|
|
90
|
+
if (status === 403) {
|
|
91
|
+
return " (the SearXNG instance may have JSON output disabled; enable 'json' under 'search.formats' in its settings.yml)";
|
|
92
|
+
}
|
|
93
|
+
return "";
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
interface SearxngProviderOptions {
|
|
97
|
+
apiKey?: string;
|
|
98
|
+
baseUrl: string;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export class SearxngProvider implements SearchProvider {
|
|
102
|
+
readonly name = "searxng";
|
|
103
|
+
readonly label = "SearXNG";
|
|
104
|
+
readonly envVar = SEARXNG_API_KEY_ENV_VAR;
|
|
105
|
+
|
|
106
|
+
private readonly apiKey?: string;
|
|
107
|
+
private readonly baseUrl: string;
|
|
108
|
+
|
|
109
|
+
constructor(options: SearxngProviderOptions) {
|
|
110
|
+
this.apiKey = options.apiKey?.trim() || undefined;
|
|
111
|
+
const trimmed = stripTrailingSlashes(options.baseUrl?.trim() ?? "");
|
|
112
|
+
if (trimmed) assertHttpUrl(trimmed);
|
|
113
|
+
this.baseUrl = trimmed;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
async search(query: string, maxResults: number, signal?: AbortSignal): Promise<SearchResponse> {
|
|
117
|
+
this.requireBaseUrl();
|
|
118
|
+
const res = await fetch(this.buildSearchUrl(query), {
|
|
119
|
+
method: "GET",
|
|
120
|
+
headers: this.buildAuthHeaders(),
|
|
121
|
+
signal,
|
|
122
|
+
});
|
|
123
|
+
if (!res.ok) throw await this.searchApiError(res);
|
|
124
|
+
const raw = (await res.json()) as SearxngRawResponse;
|
|
125
|
+
return { query, results: normalizeSearxngResults(raw, maxResults) };
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// No guard: SearXNG's fetch() wraps the built-in HTTP+htmlToText pipeline
|
|
129
|
+
// and does not call the SearXNG instance — same contract as Brave/Serper.
|
|
130
|
+
async fetch(url: string, raw: boolean, signal?: AbortSignal): Promise<FetchResponse> {
|
|
131
|
+
const res = await fetchUrlOrThrow(url, signal);
|
|
132
|
+
const contentType = res.headers.get("content-type") ?? "";
|
|
133
|
+
assertTextContentType(contentType);
|
|
134
|
+
|
|
135
|
+
const { text, title } = await extractBodyAsText(res, contentType, raw);
|
|
136
|
+
const contentLengthHeader = res.headers.get("content-length");
|
|
137
|
+
return {
|
|
138
|
+
text,
|
|
139
|
+
title,
|
|
140
|
+
contentType: contentType || undefined,
|
|
141
|
+
contentLength: contentLengthHeader ? Number(contentLengthHeader) : undefined,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
private requireBaseUrl(): void {
|
|
146
|
+
if (!this.baseUrl) {
|
|
147
|
+
throw new Error(
|
|
148
|
+
`${SEARXNG_URL_ENV_VAR} is not set. Run /web-search-config to configure, or export the env var.`,
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// The SearXNG API exposes only `pageno` for pagination, not `count`/`limit`
|
|
154
|
+
// (https://docs.searxng.org/dev/search_api.html), so we ask for a single
|
|
155
|
+
// page and slice to maxResults client-side.
|
|
156
|
+
private buildSearchUrl(query: string): string {
|
|
157
|
+
const url = new URL(`${this.baseUrl}${SEARXNG_SEARCH_PATH}`);
|
|
158
|
+
url.searchParams.set("q", query);
|
|
159
|
+
url.searchParams.set("format", SEARXNG_FORMAT_JSON);
|
|
160
|
+
url.searchParams.set("safesearch", SEARXNG_SAFESEARCH_OFF);
|
|
161
|
+
return url.toString();
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// SearXNG itself has no native auth; the optional Bearer key is for
|
|
165
|
+
// instances fronted by a reverse-proxy that gates on Authorization.
|
|
166
|
+
private buildAuthHeaders(): Record<string, string> {
|
|
167
|
+
const headers: Record<string, string> = { Accept: "application/json" };
|
|
168
|
+
if (this.apiKey) headers.Authorization = `Bearer ${this.apiKey}`;
|
|
169
|
+
return headers;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
private async searchApiError(res: Response): Promise<Error> {
|
|
173
|
+
const body = await res.text();
|
|
174
|
+
return new Error(`${this.label} Search API error (${res.status})${hintForSearchStatus(res.status)}: ${body}`);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// ---------------------------------------------------------------------------
|
|
179
|
+
// /web-search-config helper — SearXNG branch
|
|
180
|
+
// ---------------------------------------------------------------------------
|
|
181
|
+
// SEARXNG_PROVIDER_META.configure wires configureSearxng() in; the orchestrator
|
|
182
|
+
// dispatches generically through ProviderMeta.configure without naming
|
|
183
|
+
// SearXNG specifically.
|
|
184
|
+
|
|
185
|
+
// Mirrors web-tools.ts:maskApiKey. Duplicated here (3 lines) to keep
|
|
186
|
+
// providers/* free of web-tools internals; consolidate if this ever grows.
|
|
187
|
+
function maskKey(key: string): string {
|
|
188
|
+
const head = key.slice(0, MASK_VISIBLE_CHARS);
|
|
189
|
+
const tail = key.slice(-MASK_VISIBLE_CHARS);
|
|
190
|
+
return `${head}...${tail}`;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Returns the resolved URL string, or `undefined` if the user cancelled.
|
|
194
|
+
// Empty input keeps the current URL or falls back to SEARXNG_DEFAULT_URL.
|
|
195
|
+
async function promptForBaseUrl(ui: ProviderConfigUi, current: string | undefined): Promise<string | undefined> {
|
|
196
|
+
const existing = current?.trim();
|
|
197
|
+
const input = await ui.input(
|
|
198
|
+
"SearXNG base URL",
|
|
199
|
+
existing
|
|
200
|
+
? `Press Enter to keep current (${existing}), or type new URL`
|
|
201
|
+
: `Press Enter for default (${SEARXNG_DEFAULT_URL}), or type instance URL`,
|
|
202
|
+
);
|
|
203
|
+
if (isCancellation(input)) return undefined;
|
|
204
|
+
return input.trim() || existing || SEARXNG_DEFAULT_URL;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Returns the resolved key string, `null` to leave unset, or `undefined` if
|
|
208
|
+
// the user cancelled. Empty input keeps the current key or leaves it unset.
|
|
209
|
+
async function promptForOptionalKey(
|
|
210
|
+
ui: ProviderConfigUi,
|
|
211
|
+
current: string | undefined,
|
|
212
|
+
): Promise<string | null | undefined> {
|
|
213
|
+
const existing = current?.trim() || undefined;
|
|
214
|
+
const input = await ui.input(
|
|
215
|
+
"SearXNG API key (optional — for instances behind a Bearer-auth proxy)",
|
|
216
|
+
existing
|
|
217
|
+
? `Press Enter to keep current (${maskKey(existing)}), or type new key`
|
|
218
|
+
: "Press Enter to leave unset, or type a key",
|
|
219
|
+
);
|
|
220
|
+
if (isCancellation(input)) return undefined;
|
|
221
|
+
return input.trim() || existing || null;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Prompts the user for the SearXNG base URL and optional Bearer API key.
|
|
226
|
+
* Returns `null` if the user cancels at either prompt.
|
|
227
|
+
*
|
|
228
|
+
* The caller owns persistence (loading/merging/saving WebToolsConfig) and
|
|
229
|
+
* user-visible notifications. This helper only handles the prompt flow.
|
|
230
|
+
*/
|
|
231
|
+
export async function configureSearxng(
|
|
232
|
+
ui: SearxngConfigUi,
|
|
233
|
+
current: SearxngConfigCurrent,
|
|
234
|
+
): Promise<SearxngConfigChange | null> {
|
|
235
|
+
const baseUrl = await promptForBaseUrl(ui, current.baseUrl);
|
|
236
|
+
if (baseUrl === undefined) return null;
|
|
237
|
+
|
|
238
|
+
const apiKey = await promptForOptionalKey(ui, current.apiKey);
|
|
239
|
+
if (apiKey === undefined) return null;
|
|
240
|
+
|
|
241
|
+
return { baseUrl, apiKey };
|
|
242
|
+
}
|
package/providers/types.ts
CHANGED
|
@@ -23,3 +23,57 @@ export interface SearchProvider {
|
|
|
23
23
|
search(query: string, maxResults: number, signal?: AbortSignal): Promise<SearchResponse>;
|
|
24
24
|
fetch(url: string, raw: boolean, signal?: AbortSignal): Promise<FetchResponse>;
|
|
25
25
|
}
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// PROVIDER_META + per-provider configure() contract
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
// User input from a ProviderConfigUi prompt. Both `null` and `undefined`
|
|
32
|
+
// indicate the user cancelled (different UI implementations may return
|
|
33
|
+
// either); use isCancellation() to test instead of comparing manually.
|
|
34
|
+
export type UserInput = string | null | undefined;
|
|
35
|
+
|
|
36
|
+
export function isCancellation(input: UserInput): input is null | undefined {
|
|
37
|
+
return input == null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Minimal UI surface a provider's configure() helper is allowed to depend on.
|
|
41
|
+
// Intentionally narrow so providers/ stays free of web-tools internals (no
|
|
42
|
+
// circular import) and so the contract can grow deliberately if a future
|
|
43
|
+
// provider needs more.
|
|
44
|
+
export interface ProviderConfigUi {
|
|
45
|
+
input(label: string, placeholder: string): Promise<UserInput>;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// What the orchestrator hands to configure(): the provider's currently
|
|
49
|
+
// persisted state (if any).
|
|
50
|
+
export interface ProviderConfigCurrent {
|
|
51
|
+
baseUrl?: string;
|
|
52
|
+
apiKey?: string;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// What configure() returns for the orchestrator to merge into WebToolsConfig.
|
|
56
|
+
// `null` apiKey = "leave unset"; absent baseUrl = "this provider has no URL
|
|
57
|
+
// knob"; whole-result `null` = "user cancelled, do not persist".
|
|
58
|
+
export interface ProviderConfigChange {
|
|
59
|
+
baseUrl?: string;
|
|
60
|
+
apiKey?: string | null;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Per-provider metadata declared alongside each provider's class. Drives
|
|
64
|
+
// generic dispatch in web-tools.ts so adding a new provider doesn't require
|
|
65
|
+
// touching the orchestrator.
|
|
66
|
+
//
|
|
67
|
+
// envVar — the API-key env var (omit if the provider has no key)
|
|
68
|
+
// baseUrlEnvVar — the URL env var (set for self-hosted providers)
|
|
69
|
+
// defaultBaseUrl — fallback URL when neither env nor config supplies one
|
|
70
|
+
// configure — interactive setup; if present, /web-search-config
|
|
71
|
+
// dispatches here instead of the default single-key prompt
|
|
72
|
+
export interface ProviderMeta {
|
|
73
|
+
name: string;
|
|
74
|
+
label: string;
|
|
75
|
+
envVar?: string;
|
|
76
|
+
baseUrlEnvVar?: string;
|
|
77
|
+
defaultBaseUrl?: string;
|
|
78
|
+
configure?(ui: ProviderConfigUi, current: ProviderConfigCurrent): Promise<ProviderConfigChange | null>;
|
|
79
|
+
}
|
package/web-tools.ts
CHANGED
|
@@ -28,7 +28,7 @@ import { configPath, loadJsonConfig, saveJsonConfig, validateGuidanceFields } fr
|
|
|
28
28
|
import { Type } from "typebox";
|
|
29
29
|
import { createSearchProvider } from "./providers/factory.js";
|
|
30
30
|
import { PROVIDERS } from "./providers/index.js";
|
|
31
|
-
import type { SearchResult } from "./providers/types.js";
|
|
31
|
+
import type { ProviderMeta, SearchProvider, SearchResult } from "./providers/types.js";
|
|
32
32
|
|
|
33
33
|
// ---------------------------------------------------------------------------
|
|
34
34
|
// Tunables and external surface
|
|
@@ -55,6 +55,12 @@ const UNSET_LABEL = "(not set)";
|
|
|
55
55
|
|
|
56
56
|
const DEFAULT_PROVIDER_NAME = "brave";
|
|
57
57
|
|
|
58
|
+
// Brave is the only provider whose key was historically stored at the top
|
|
59
|
+
// level (config.apiKey) before the per-provider apiKeys map. The legacy
|
|
60
|
+
// field is auto-migrated to apiKeys.brave on the next save by
|
|
61
|
+
// /web-search-config (the dispatch deletes apiKey from the saved object).
|
|
62
|
+
const LEGACY_TOP_LEVEL_KEY_PROVIDER = "brave";
|
|
63
|
+
|
|
58
64
|
// ---------------------------------------------------------------------------
|
|
59
65
|
// Config file persistence
|
|
60
66
|
// ---------------------------------------------------------------------------
|
|
@@ -69,6 +75,7 @@ interface WebToolsGuidance {
|
|
|
69
75
|
interface WebToolsConfig {
|
|
70
76
|
provider?: string;
|
|
71
77
|
apiKeys?: Record<string, string>;
|
|
78
|
+
baseUrls?: Record<string, string>;
|
|
72
79
|
apiKey?: string; // legacy — kept for backward compat
|
|
73
80
|
guidance?: WebToolsGuidance;
|
|
74
81
|
}
|
|
@@ -112,19 +119,44 @@ function resolveProviderApiKey(providerName: string, config: WebToolsConfig): st
|
|
|
112
119
|
const meta = PROVIDERS.find((p) => p.name === providerName);
|
|
113
120
|
if (!meta) return undefined;
|
|
114
121
|
|
|
115
|
-
const envKey = process.env[meta.envVar]?.trim();
|
|
122
|
+
const envKey = meta.envVar ? process.env[meta.envVar]?.trim() : undefined;
|
|
116
123
|
if (envKey) return envKey;
|
|
117
124
|
|
|
118
125
|
const configKey = config.apiKeys?.[providerName]?.trim();
|
|
119
126
|
if (configKey) return configKey;
|
|
120
127
|
|
|
121
|
-
if (providerName ===
|
|
128
|
+
if (providerName === LEGACY_TOP_LEVEL_KEY_PROVIDER) {
|
|
122
129
|
return config.apiKey?.trim() || undefined;
|
|
123
130
|
}
|
|
124
131
|
|
|
125
132
|
return undefined;
|
|
126
133
|
}
|
|
127
134
|
|
|
135
|
+
// Generic per-provider base-URL resolution: env → config.baseUrls[name] →
|
|
136
|
+
// meta.defaultBaseUrl → "". Providers without baseUrlEnvVar (hosted ones)
|
|
137
|
+
// short-circuit to "". The orchestrator only calls this for providers that
|
|
138
|
+
// declare baseUrlEnvVar, so the empty-string fallback is a safety net rather
|
|
139
|
+
// than a runtime path.
|
|
140
|
+
function resolveProviderBaseUrl(meta: ProviderMeta, config: WebToolsConfig): string {
|
|
141
|
+
if (!meta.baseUrlEnvVar) return "";
|
|
142
|
+
const envUrl = process.env[meta.baseUrlEnvVar]?.trim();
|
|
143
|
+
if (envUrl) return envUrl;
|
|
144
|
+
const configUrl = config.baseUrls?.[meta.name]?.trim();
|
|
145
|
+
if (configUrl) return configUrl;
|
|
146
|
+
return meta.defaultBaseUrl ?? "";
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Centralized instantiation: load active provider name + creds, build via
|
|
150
|
+
// the factory. Called by both registerWebSearchTool and registerWebFetchTool.
|
|
151
|
+
function instantiateActiveProvider(config: WebToolsConfig): { providerName: string; provider: SearchProvider } {
|
|
152
|
+
const providerName = config.provider ?? DEFAULT_PROVIDER_NAME;
|
|
153
|
+
const apiKey = resolveProviderApiKey(providerName, config);
|
|
154
|
+
const meta = PROVIDERS.find((p) => p.name === providerName);
|
|
155
|
+
const baseUrl = meta?.baseUrlEnvVar ? resolveProviderBaseUrl(meta, config) : undefined;
|
|
156
|
+
const provider = createSearchProvider(providerName, { apiKey: apiKey ?? "", baseUrl });
|
|
157
|
+
return { providerName, provider };
|
|
158
|
+
}
|
|
159
|
+
|
|
128
160
|
function maskApiKey(key: string | undefined): string {
|
|
129
161
|
if (!key) return UNSET_LABEL;
|
|
130
162
|
const head = key.slice(0, API_KEY_MASK_VISIBLE_CHARS);
|
|
@@ -261,9 +293,7 @@ export function registerWebSearchTool(pi: ExtensionAPI): void {
|
|
|
261
293
|
async execute(_toolCallId, params, signal, onUpdate, _ctx) {
|
|
262
294
|
const maxResults = clampSearchResultCount(params.max_results);
|
|
263
295
|
const config = loadConfig();
|
|
264
|
-
const providerName = config
|
|
265
|
-
const apiKey = resolveProviderApiKey(providerName, config);
|
|
266
|
-
const provider = createSearchProvider(providerName, apiKey ?? "");
|
|
296
|
+
const { providerName, provider } = instantiateActiveProvider(config);
|
|
267
297
|
|
|
268
298
|
onUpdate?.({
|
|
269
299
|
content: [{ type: "text", text: `Searching ${provider.label} for: "${params.query}"...` }],
|
|
@@ -351,9 +381,7 @@ export function registerWebFetchTool(pi: ExtensionAPI): void {
|
|
|
351
381
|
});
|
|
352
382
|
|
|
353
383
|
const config = loadConfig();
|
|
354
|
-
const
|
|
355
|
-
const apiKey = resolveProviderApiKey(providerName, config);
|
|
356
|
-
const provider = createSearchProvider(providerName, apiKey ?? "");
|
|
384
|
+
const { provider } = instantiateActiveProvider(config);
|
|
357
385
|
|
|
358
386
|
const { text: bodyText, title, contentType, contentLength } = await provider.fetch(url, raw, signal);
|
|
359
387
|
|
|
@@ -432,15 +460,27 @@ function formatShowConfigMessage(current: WebToolsConfig): string {
|
|
|
432
460
|
lines.push(` active provider: ${providerName}`);
|
|
433
461
|
|
|
434
462
|
for (const meta of PROVIDERS) {
|
|
435
|
-
const envKey = process.env[meta.envVar]?.trim();
|
|
463
|
+
const envKey = meta.envVar ? process.env[meta.envVar]?.trim() : undefined;
|
|
436
464
|
const configKey = current.apiKeys?.[meta.name]?.trim();
|
|
437
|
-
const legacyKey = meta.name ===
|
|
465
|
+
const legacyKey = meta.name === LEGACY_TOP_LEVEL_KEY_PROVIDER ? current.apiKey?.trim() : undefined;
|
|
438
466
|
const resolved = envKey ?? configKey ?? legacyKey;
|
|
439
467
|
lines.push(
|
|
440
468
|
` ${meta.name}: ${maskApiKey(resolved)} (env: ${maskApiKey(envKey)}, config: ${maskApiKey(configKey ?? legacyKey)})`,
|
|
441
469
|
);
|
|
442
470
|
}
|
|
443
471
|
|
|
472
|
+
// One URL line per provider that declares baseUrlEnvVar. Today this is
|
|
473
|
+
// only SearXNG, but a second self-hosted provider lands without touching
|
|
474
|
+
// this loop.
|
|
475
|
+
for (const meta of PROVIDERS) {
|
|
476
|
+
if (!meta.baseUrlEnvVar) continue;
|
|
477
|
+
const envUrl = process.env[meta.baseUrlEnvVar]?.trim();
|
|
478
|
+
const configUrl = current.baseUrls?.[meta.name]?.trim();
|
|
479
|
+
const resolvedUrl = envUrl || configUrl || meta.defaultBaseUrl || "";
|
|
480
|
+
const urlSource = envUrl ? "env" : configUrl ? "config" : "default";
|
|
481
|
+
lines.push(` ${meta.name} url: ${resolvedUrl} (source: ${urlSource})`);
|
|
482
|
+
}
|
|
483
|
+
|
|
444
484
|
return lines.join("\n");
|
|
445
485
|
}
|
|
446
486
|
|
|
@@ -465,7 +505,15 @@ export function registerWebSearchConfigCommand(pi: ExtensionAPI): void {
|
|
|
465
505
|
...PROVIDERS.filter((p) => p.name === activeProvider),
|
|
466
506
|
...PROVIDERS.filter((p) => p.name !== activeProvider),
|
|
467
507
|
];
|
|
468
|
-
const hasKey = (p:
|
|
508
|
+
const hasKey = (p: ProviderMeta) => {
|
|
509
|
+
// Self-hosted providers are "configured" once they have a base URL
|
|
510
|
+
// (env or config). The bare default URL doesn't count — it's just a
|
|
511
|
+
// hint that the user hasn't touched the setting yet.
|
|
512
|
+
if (p.baseUrlEnvVar) {
|
|
513
|
+
return Boolean(process.env[p.baseUrlEnvVar]?.trim() || current.baseUrls?.[p.name]?.trim());
|
|
514
|
+
}
|
|
515
|
+
return resolveProviderApiKey(p.name, current) !== undefined;
|
|
516
|
+
};
|
|
469
517
|
const labelOf = (p: (typeof PROVIDERS)[number]) => {
|
|
470
518
|
const markers: string[] = [];
|
|
471
519
|
if (p.name === activeProvider) markers.push("✓");
|
|
@@ -490,8 +538,46 @@ export function registerWebSearchConfigCommand(pi: ExtensionAPI): void {
|
|
|
490
538
|
}
|
|
491
539
|
const selectedProvider = selectedMeta.name;
|
|
492
540
|
|
|
541
|
+
// Providers that declare a `configure` callback own their prompt flow
|
|
542
|
+
// (e.g. SearXNG: URL prompt then optional Bearer key). The orchestrator
|
|
543
|
+
// dispatches generically and owns persistence + notifications.
|
|
544
|
+
if (selectedMeta.configure) {
|
|
545
|
+
const result = await selectedMeta.configure(ctx.ui, {
|
|
546
|
+
baseUrl: current.baseUrls?.[selectedProvider],
|
|
547
|
+
apiKey: current.apiKeys?.[selectedProvider],
|
|
548
|
+
});
|
|
549
|
+
if (!result) {
|
|
550
|
+
ctx.ui.notify("Web search config unchanged", "info");
|
|
551
|
+
return;
|
|
552
|
+
}
|
|
553
|
+
const toSave: WebToolsConfig = {
|
|
554
|
+
...current,
|
|
555
|
+
provider: selectedProvider,
|
|
556
|
+
...(result.baseUrl !== undefined && {
|
|
557
|
+
baseUrls: { ...current.baseUrls, [selectedProvider]: result.baseUrl },
|
|
558
|
+
}),
|
|
559
|
+
...(result.apiKey ? { apiKeys: { ...current.apiKeys, [selectedProvider]: result.apiKey } } : {}),
|
|
560
|
+
};
|
|
561
|
+
delete (toSave as { apiKey?: string }).apiKey;
|
|
562
|
+
if (!saveConfig(toSave)) {
|
|
563
|
+
ctx.ui.notify(
|
|
564
|
+
`Failed to save ${selectedMeta.label} config to ${CONFIG_PATH} — disk write failed`,
|
|
565
|
+
"error",
|
|
566
|
+
);
|
|
567
|
+
return;
|
|
568
|
+
}
|
|
569
|
+
ctx.ui.notify(
|
|
570
|
+
result.baseUrl
|
|
571
|
+
? `Saved ${selectedMeta.label} config (url: ${result.baseUrl}) to ${CONFIG_PATH}`
|
|
572
|
+
: `Saved ${selectedMeta.label} config to ${CONFIG_PATH}`,
|
|
573
|
+
"info",
|
|
574
|
+
);
|
|
575
|
+
return;
|
|
576
|
+
}
|
|
577
|
+
|
|
493
578
|
const existingKey =
|
|
494
|
-
current.apiKeys?.[selectedProvider] ??
|
|
579
|
+
current.apiKeys?.[selectedProvider] ??
|
|
580
|
+
(selectedProvider === LEGACY_TOP_LEVEL_KEY_PROVIDER ? current.apiKey : undefined);
|
|
495
581
|
const input = await ctx.ui.input(
|
|
496
582
|
`${selectedMeta.label} API key`,
|
|
497
583
|
existingKey ? `Press Enter to keep current (${maskApiKey(existingKey)}), or type new key` : "...",
|