@juicesharp/rpiv-web-tools 1.10.2 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,17 +8,14 @@
8
8
  </a>
9
9
  </div>
10
10
 
11
- [![npm version](https://img.shields.io/npm/v/@juicesharp/rpiv-web-tools.svg)](https://www.npmjs.com/package/@juicesharp/rpiv-web-tools)
12
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
13
-
14
- Let the model search the web and read pages. `rpiv-web-tools` adds `web_search` and `web_fetch` tools to [Pi Agent](https://github.com/badlogic/pi-mono) with pluggable providers (Brave, Tavily, Serper, Exa, Jina, Firecrawl), plus `/web-search-config` for interactive provider selection and API-key setup.
11
+ Let the model search the web and read pages. `rpiv-web-tools` adds `web_search` and `web_fetch` tools to [Pi Agent](https://github.com/badlogic/pi-mono) with pluggable providers (Brave, Tavily, Serper, Exa, Jina, Firecrawl, [SearXNG](https://docs.searxng.org/)), plus `/web-search-config` for interactive provider selection and API-key setup.
15
12
 
16
13
  ![Provider selection prompt](https://raw.githubusercontent.com/juicesharp/rpiv-mono/main/packages/rpiv-web-tools/docs/config.jpg)
17
14
 
18
15
  ## Features
19
16
 
20
- - **Six pluggable providers** - Brave, Tavily, Serper, Exa, Jina, Firecrawl. Pick one as the active backend; switch any time without losing the others' keys.
21
- - **Per-provider fetch strategy** - Brave and Serper read the URL directly and strip HTML to text; Tavily/Exa/Jina/Firecrawl use their native extraction endpoints (markdown for Jina/Firecrawl, plain text for Tavily/Exa).
17
+ - **Seven pluggable providers** - Brave, Tavily, Serper, Exa, Jina, Firecrawl, and self-hosted SearXNG. Pick one as the active backend; switch any time without losing the others' keys.
18
+ - **Per-provider fetch strategy** - Brave, Serper, and SearXNG read the URL directly and strip HTML to text; Tavily/Exa/Jina/Firecrawl use their native extraction endpoints (markdown for Jina/Firecrawl, plain text for Tavily/Exa).
22
19
  - **Read any URL** - fetch http/https pages with HTML-to-text extraction, or get the raw response with `raw: true` (honoured by Brave/Serper; extraction providers always return their parsed text).
23
20
  - **Large-page spillover** - oversized responses truncate inline and spill the full body to a temp file the model can read on demand.
24
21
  - **SSRF guard** - refuses loopback, RFC 1918, link-local, and cloud-metadata addresses (`localhost`, `127.0.0.0/8`, `10.0.0.0/8`, `169.254.0.0/16`, `172.16.0.0/12`, `192.168.0.0/16`, `::1`, `fc00::/7`, `fe80::/10`).
@@ -56,7 +53,7 @@ Returns:
56
53
  content: [{ type: "text", text: string }], // markdown list of "**title**\n url\n snippet"
57
54
  details: {
58
55
  query: string,
59
- backend: "brave" | "tavily" | "serper" | "exa" | "jina" | "firecrawl",
56
+ backend: "brave" | "tavily" | "serper" | "exa" | "jina" | "firecrawl" | "searxng",
60
57
  resultCount: number,
61
58
  results?: Array<{ title: string, url: string, snippet: string }>,
62
59
  }
@@ -103,12 +100,49 @@ Throws on invalid URL, non-http(s) protocol, private/loopback hostnames (SSRF gu
103
100
 
104
101
  First match wins:
105
102
 
106
- 1. The active provider's environment variable: `BRAVE_SEARCH_API_KEY`, `TAVILY_API_KEY`, `SERPER_API_KEY`, `EXA_API_KEY`, `JINA_API_KEY`, or `FIRECRAWL_API_KEY`
103
+ 1. The active provider's environment variable: `BRAVE_SEARCH_API_KEY`, `TAVILY_API_KEY`, `SERPER_API_KEY`, `EXA_API_KEY`, `JINA_API_KEY`, `FIRECRAWL_API_KEY`, or `SEARXNG_API_KEY`
107
104
  2. `apiKeys.<provider>` field in `~/.config/rpiv-web-tools/config.json`
108
105
  3. Legacy `apiKey` field (Brave only — auto-migrated to the new shape on next save)
109
106
 
110
107
  The active provider is `config.provider` (set by `/web-search-config`); falls back to `brave` if absent.
111
108
 
109
+ ## SearXNG (self-hosted)
110
+
111
+ SearXNG is the only provider that talks to an instance you control, so it needs a base URL instead of (or in addition to) an API key.
112
+
113
+ ```bash
114
+ export SEARXNG_URL=http://localhost:8080
115
+ # Optional: only if your instance sits behind a Bearer-auth reverse proxy
116
+ export SEARXNG_API_KEY=…
117
+ ```
118
+
119
+ Resolution order for the URL: `SEARXNG_URL` env var → `baseUrls.searxng` in `~/.config/rpiv-web-tools/config.json` → default `http://localhost:8080`. `/web-search-config` prompts for the URL first and the (optional) API key second.
120
+
121
+ Your instance must have `json` enabled in `settings.yml` under `search.formats` — default SearXNG installs ship with JSON disabled and will return `403 Forbidden` otherwise (per the [SearXNG search API docs](https://docs.searxng.org/dev/search_api.html)). The provider surfaces that case with an actionable hint. SearXNG's `web_fetch` reuses the same raw-HTTP + HTML-to-text pipeline as Brave/Serper, so URLs returned by `web_search` can be fetched without any extra setup.
122
+
123
+ The SSRF guard (which refuses loopback and RFC-1918 addresses) applies to URLs `web_fetch` retrieves on the model's behalf, not to the SearXNG search endpoint itself: a `SEARXNG_URL` pointing at `http://localhost:8080` or another private host is intentionally reachable, since SearXNG is self-hosted by design.
124
+
125
+ ### Running SearXNG locally with Docker
126
+
127
+ The `searxng/searxng` entrypoint **overwrites** `/etc/searxng/settings.yml` on first start with the bundled default (ships with `formats: [html]` only). Pre-populating the mounted file doesn't stick — wait for the entrypoint, then patch:
128
+
129
+ ```bash
130
+ mkdir -p ~/.searxng
131
+ docker run -d --name searxng --restart unless-stopped \
132
+ -p 8080:8080 -v "$HOME/.searxng":/etc/searxng \
133
+ -e BASE_URL=http://localhost:8080/ searxng/searxng:latest
134
+ sleep 5 # wait for entrypoint to write settings.yml
135
+ sed -i.bak '/^ formats:$/,/^[^ ]/ { /- html/a\
136
+ - json
137
+ }' ~/.searxng/settings.yml
138
+ docker restart searxng
139
+
140
+ # Sanity check — a number > 0 means it's wired correctly
141
+ curl -sf 'http://localhost:8080/search?q=hello&format=json' | jq '.results | length'
142
+ ```
143
+
144
+ `403` means JSON is still disabled — re-check `~/.searxng/settings.yml`. Works identically on Docker Desktop or OrbStack. For a throwaway test instance, swap `~/.searxng` for `/tmp/searxng` and drop `--restart unless-stopped`.
145
+
112
146
  ## Executor guidance overrides
113
147
 
114
148
  Override the `promptSnippet` / `promptGuidelines` the model sees for each tool by editing `~/.config/rpiv-web-tools/config.json`. Note the per-tool nesting under `guidance.web_search` / `guidance.web_fetch` — this differs from the flat `guidance` shape used by single-tool siblings (`rpiv-advisor`, `rpiv-todo`, `rpiv-ask-user-question`):
@@ -145,4 +179,7 @@ The guard is host-literal only; it does NOT resolve DNS or validate redirects. A
145
179
 
146
180
  ## License
147
181
 
182
+ [![npm version](https://img.shields.io/npm/v/@juicesharp/rpiv-web-tools.svg)](https://www.npmjs.com/package/@juicesharp/rpiv-web-tools)
183
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
184
+
148
185
  MIT
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juicesharp/rpiv-web-tools",
3
- "version": "1.10.2",
3
+ "version": "1.12.0",
4
4
  "description": "Pi extension. Web search and fetch for the model with pluggable providers (Brave, Tavily, Serper, Exa, Jina, Firecrawl).",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -48,7 +48,7 @@
48
48
  ]
49
49
  },
50
50
  "dependencies": {
51
- "@juicesharp/rpiv-config": "^1.10.2"
51
+ "@juicesharp/rpiv-config": "^1.12.0"
52
52
  },
53
53
  "peerDependencies": {
54
54
  "@earendil-works/pi-coding-agent": "*",
@@ -2,11 +2,18 @@ import { BraveProvider } from "./brave.js";
2
2
  import { ExaProvider } from "./exa.js";
3
3
  import { FirecrawlProvider } from "./firecrawl.js";
4
4
  import { JinaProvider } from "./jina.js";
5
+ import { SearxngProvider } from "./searxng.js";
5
6
  import { SerperProvider } from "./serper.js";
6
7
  import { TavilyProvider } from "./tavily.js";
7
8
  import type { SearchProvider } from "./types.js";
8
9
 
9
- export function createSearchProvider(name: string, apiKey: string): SearchProvider {
10
+ export interface ProviderCredentials {
11
+ apiKey?: string;
12
+ baseUrl?: string;
13
+ }
14
+
15
+ export function createSearchProvider(name: string, creds: ProviderCredentials): SearchProvider {
16
+ const apiKey = creds.apiKey ?? "";
10
17
  switch (name) {
11
18
  case "brave":
12
19
  return new BraveProvider(apiKey);
@@ -20,6 +27,8 @@ export function createSearchProvider(name: string, apiKey: string): SearchProvid
20
27
  return new JinaProvider(apiKey);
21
28
  case "firecrawl":
22
29
  return new FirecrawlProvider(apiKey);
30
+ case "searxng":
31
+ return new SearxngProvider({ apiKey: creds.apiKey, baseUrl: creds.baseUrl ?? "" });
23
32
  default:
24
33
  throw new Error(`Unknown search provider: "${name}"`);
25
34
  }
@@ -2,23 +2,50 @@ import { BRAVE_PROVIDER_META } from "./brave.js";
2
2
  import { EXA_PROVIDER_META } from "./exa.js";
3
3
  import { FIRECRAWL_PROVIDER_META } from "./firecrawl.js";
4
4
  import { JINA_PROVIDER_META } from "./jina.js";
5
+ import { SEARXNG_PROVIDER_META } from "./searxng.js";
5
6
  import { SERPER_PROVIDER_META } from "./serper.js";
6
7
  import { TAVILY_PROVIDER_META } from "./tavily.js";
8
+ import type { ProviderMeta } from "./types.js";
7
9
 
8
10
  export { BRAVE_API_KEY_ENV_VAR, BRAVE_PROVIDER_META, BraveProvider } from "./brave.js";
9
11
  export { EXA_API_KEY_ENV_VAR, EXA_PROVIDER_META, ExaProvider } from "./exa.js";
10
- export { createSearchProvider } from "./factory.js";
12
+ export { createSearchProvider, type ProviderCredentials } from "./factory.js";
11
13
  export { FIRECRAWL_API_KEY_ENV_VAR, FIRECRAWL_PROVIDER_META, FirecrawlProvider } from "./firecrawl.js";
12
14
  export { JINA_API_KEY_ENV_VAR, JINA_PROVIDER_META, JinaProvider } from "./jina.js";
15
+ export {
16
+ configureSearxng,
17
+ SEARXNG_API_KEY_ENV_VAR,
18
+ SEARXNG_DEFAULT_URL,
19
+ SEARXNG_PROVIDER_META,
20
+ SEARXNG_URL_ENV_VAR,
21
+ type SearxngConfigChange,
22
+ type SearxngConfigCurrent,
23
+ type SearxngConfigUi,
24
+ SearxngProvider,
25
+ } from "./searxng.js";
13
26
  export { SERPER_API_KEY_ENV_VAR, SERPER_PROVIDER_META, SerperProvider } from "./serper.js";
14
27
  export { TAVILY_API_KEY_ENV_VAR, TAVILY_PROVIDER_META, TavilyProvider } from "./tavily.js";
15
- export type { FetchResponse, SearchProvider, SearchResponse, SearchResult } from "./types.js";
28
+ export type {
29
+ FetchResponse,
30
+ ProviderConfigChange,
31
+ ProviderConfigCurrent,
32
+ ProviderConfigUi,
33
+ ProviderMeta,
34
+ SearchProvider,
35
+ SearchResponse,
36
+ SearchResult,
37
+ } from "./types.js";
16
38
 
17
- export const PROVIDERS = [
39
+ // Typed as readonly ProviderMeta[] (not `as const`) so iterators can access
40
+ // the optional META fields (baseUrlEnvVar, defaultBaseUrl, configure) without
41
+ // per-element narrowing. Individual META consts still expose their narrow
42
+ // literal types when imported directly.
43
+ export const PROVIDERS: readonly ProviderMeta[] = [
18
44
  BRAVE_PROVIDER_META,
19
45
  TAVILY_PROVIDER_META,
20
46
  SERPER_PROVIDER_META,
21
47
  EXA_PROVIDER_META,
22
48
  JINA_PROVIDER_META,
23
49
  FIRECRAWL_PROVIDER_META,
24
- ] as const;
50
+ SEARXNG_PROVIDER_META,
51
+ ];
@@ -0,0 +1,242 @@
1
+ import { assertTextContentType, extractBodyAsText, fetchUrlOrThrow } from "./fetch-helpers.js";
2
+ import {
3
+ type FetchResponse,
4
+ isCancellation,
5
+ type ProviderConfigChange,
6
+ type ProviderConfigCurrent,
7
+ type ProviderConfigUi,
8
+ type ProviderMeta,
9
+ type SearchProvider,
10
+ type SearchResponse,
11
+ type SearchResult,
12
+ } from "./types.js";
13
+
14
+ export const SEARXNG_API_KEY_ENV_VAR = "SEARXNG_API_KEY";
15
+ export const SEARXNG_URL_ENV_VAR = "SEARXNG_URL";
16
+ export const SEARXNG_DEFAULT_URL = "http://localhost:8080";
17
+
18
+ // SearXNG search API knobs (per https://docs.searxng.org/dev/search_api.html).
19
+ const SEARXNG_SEARCH_PATH = "/search";
20
+ const SEARXNG_FORMAT_JSON = "json";
21
+ const SEARXNG_SAFESEARCH_OFF = "0"; // 0/1/2 = none/moderate/strict
22
+
23
+ // Number of leading + trailing characters preserved when masking a Bearer key
24
+ // in the config prompt. Mirrors API_KEY_MASK_VISIBLE_CHARS in web-tools.ts.
25
+ const MASK_VISIBLE_CHARS = 4;
26
+
27
+ // SearXNG-specific aliases of the generic config shapes — preserved for
28
+ // backward compatibility with the symbols exported in v1.11.0. New providers
29
+ // should consume the generic ProviderConfig* types from ./types.js directly.
30
+ export type SearxngConfigUi = ProviderConfigUi;
31
+ export type SearxngConfigCurrent = ProviderConfigCurrent;
32
+ export type SearxngConfigChange = ProviderConfigChange;
33
+
34
+ export const SEARXNG_PROVIDER_META: ProviderMeta = {
35
+ name: "searxng",
36
+ label: "SearXNG",
37
+ envVar: SEARXNG_API_KEY_ENV_VAR,
38
+ baseUrlEnvVar: SEARXNG_URL_ENV_VAR,
39
+ defaultBaseUrl: SEARXNG_DEFAULT_URL,
40
+ configure: (ui, current) => configureSearxng(ui, current),
41
+ };
42
+
43
+ interface SearxngRawResult {
44
+ title?: string;
45
+ url?: string;
46
+ content?: string;
47
+ }
48
+
49
+ interface SearxngRawResponse {
50
+ results?: SearxngRawResult[];
51
+ }
52
+
53
+ function normalizeSearxngResults(raw: SearxngRawResponse, maxResults: number): SearchResult[] {
54
+ return (raw.results ?? []).slice(0, maxResults).map((r) => ({
55
+ title: r.title ?? "",
56
+ url: r.url ?? "",
57
+ snippet: r.content ?? "",
58
+ }));
59
+ }
60
+
61
+ function stripTrailingSlashes(url: string): string {
62
+ return url.replace(/\/+$/, "");
63
+ }
64
+
65
+ // Reject anything that isn't an http(s) URL — a user-supplied SEARXNG_URL
66
+ // must not be allowed to silently become `file://`, `javascript:`, `data:`
67
+ // or any other scheme that `new URL()` accepts but we'd misuse downstream.
68
+ function assertHttpUrl(url: string): void {
69
+ let parsed: URL;
70
+ try {
71
+ parsed = new URL(url);
72
+ } catch {
73
+ throw new Error(`${SEARXNG_URL_ENV_VAR} is not a valid URL (got: ${url})`);
74
+ }
75
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
76
+ throw new Error(
77
+ `${SEARXNG_URL_ENV_VAR} must use http:// or https:// (got: ${parsed.protocol.replace(":", "")}://)`,
78
+ );
79
+ }
80
+ }
81
+
82
+ // 401 ≈ reverse-proxy auth rejected the Bearer token. 403 from a default
83
+ // SearXNG install almost always means JSON output is disabled — the docs
84
+ // explicitly warn that "Requesting an unset format will return a 403
85
+ // Forbidden error". Surface the actionable fix for each.
86
+ function hintForSearchStatus(status: number): string {
87
+ if (status === 401) {
88
+ return ` (the SearXNG instance's reverse-proxy rejected the Bearer token; check ${SEARXNG_API_KEY_ENV_VAR} or apiKeys.searxng)`;
89
+ }
90
+ if (status === 403) {
91
+ return " (the SearXNG instance may have JSON output disabled; enable 'json' under 'search.formats' in its settings.yml)";
92
+ }
93
+ return "";
94
+ }
95
+
96
+ interface SearxngProviderOptions {
97
+ apiKey?: string;
98
+ baseUrl: string;
99
+ }
100
+
101
+ export class SearxngProvider implements SearchProvider {
102
+ readonly name = "searxng";
103
+ readonly label = "SearXNG";
104
+ readonly envVar = SEARXNG_API_KEY_ENV_VAR;
105
+
106
+ private readonly apiKey?: string;
107
+ private readonly baseUrl: string;
108
+
109
+ constructor(options: SearxngProviderOptions) {
110
+ this.apiKey = options.apiKey?.trim() || undefined;
111
+ const trimmed = stripTrailingSlashes(options.baseUrl?.trim() ?? "");
112
+ if (trimmed) assertHttpUrl(trimmed);
113
+ this.baseUrl = trimmed;
114
+ }
115
+
116
+ async search(query: string, maxResults: number, signal?: AbortSignal): Promise<SearchResponse> {
117
+ this.requireBaseUrl();
118
+ const res = await fetch(this.buildSearchUrl(query), {
119
+ method: "GET",
120
+ headers: this.buildAuthHeaders(),
121
+ signal,
122
+ });
123
+ if (!res.ok) throw await this.searchApiError(res);
124
+ const raw = (await res.json()) as SearxngRawResponse;
125
+ return { query, results: normalizeSearxngResults(raw, maxResults) };
126
+ }
127
+
128
+ // No guard: SearXNG's fetch() wraps the built-in HTTP+htmlToText pipeline
129
+ // and does not call the SearXNG instance — same contract as Brave/Serper.
130
+ async fetch(url: string, raw: boolean, signal?: AbortSignal): Promise<FetchResponse> {
131
+ const res = await fetchUrlOrThrow(url, signal);
132
+ const contentType = res.headers.get("content-type") ?? "";
133
+ assertTextContentType(contentType);
134
+
135
+ const { text, title } = await extractBodyAsText(res, contentType, raw);
136
+ const contentLengthHeader = res.headers.get("content-length");
137
+ return {
138
+ text,
139
+ title,
140
+ contentType: contentType || undefined,
141
+ contentLength: contentLengthHeader ? Number(contentLengthHeader) : undefined,
142
+ };
143
+ }
144
+
145
+ private requireBaseUrl(): void {
146
+ if (!this.baseUrl) {
147
+ throw new Error(
148
+ `${SEARXNG_URL_ENV_VAR} is not set. Run /web-search-config to configure, or export the env var.`,
149
+ );
150
+ }
151
+ }
152
+
153
+ // The SearXNG API exposes only `pageno` for pagination, not `count`/`limit`
154
+ // (https://docs.searxng.org/dev/search_api.html), so we ask for a single
155
+ // page and slice to maxResults client-side.
156
+ private buildSearchUrl(query: string): string {
157
+ const url = new URL(`${this.baseUrl}${SEARXNG_SEARCH_PATH}`);
158
+ url.searchParams.set("q", query);
159
+ url.searchParams.set("format", SEARXNG_FORMAT_JSON);
160
+ url.searchParams.set("safesearch", SEARXNG_SAFESEARCH_OFF);
161
+ return url.toString();
162
+ }
163
+
164
+ // SearXNG itself has no native auth; the optional Bearer key is for
165
+ // instances fronted by a reverse-proxy that gates on Authorization.
166
+ private buildAuthHeaders(): Record<string, string> {
167
+ const headers: Record<string, string> = { Accept: "application/json" };
168
+ if (this.apiKey) headers.Authorization = `Bearer ${this.apiKey}`;
169
+ return headers;
170
+ }
171
+
172
+ private async searchApiError(res: Response): Promise<Error> {
173
+ const body = await res.text();
174
+ return new Error(`${this.label} Search API error (${res.status})${hintForSearchStatus(res.status)}: ${body}`);
175
+ }
176
+ }
177
+
178
+ // ---------------------------------------------------------------------------
179
+ // /web-search-config helper — SearXNG branch
180
+ // ---------------------------------------------------------------------------
181
+ // SEARXNG_PROVIDER_META.configure wires configureSearxng() in; the orchestrator
182
+ // dispatches generically through ProviderMeta.configure without naming
183
+ // SearXNG specifically.
184
+
185
+ // Mirrors web-tools.ts:maskApiKey. Duplicated here (3 lines) to keep
186
+ // providers/* free of web-tools internals; consolidate if this ever grows.
187
+ function maskKey(key: string): string {
188
+ const head = key.slice(0, MASK_VISIBLE_CHARS);
189
+ const tail = key.slice(-MASK_VISIBLE_CHARS);
190
+ return `${head}...${tail}`;
191
+ }
192
+
193
+ // Returns the resolved URL string, or `undefined` if the user cancelled.
194
+ // Empty input keeps the current URL or falls back to SEARXNG_DEFAULT_URL.
195
+ async function promptForBaseUrl(ui: ProviderConfigUi, current: string | undefined): Promise<string | undefined> {
196
+ const existing = current?.trim();
197
+ const input = await ui.input(
198
+ "SearXNG base URL",
199
+ existing
200
+ ? `Press Enter to keep current (${existing}), or type new URL`
201
+ : `Press Enter for default (${SEARXNG_DEFAULT_URL}), or type instance URL`,
202
+ );
203
+ if (isCancellation(input)) return undefined;
204
+ return input.trim() || existing || SEARXNG_DEFAULT_URL;
205
+ }
206
+
207
+ // Returns the resolved key string, `null` to leave unset, or `undefined` if
208
+ // the user cancelled. Empty input keeps the current key or leaves it unset.
209
+ async function promptForOptionalKey(
210
+ ui: ProviderConfigUi,
211
+ current: string | undefined,
212
+ ): Promise<string | null | undefined> {
213
+ const existing = current?.trim() || undefined;
214
+ const input = await ui.input(
215
+ "SearXNG API key (optional — for instances behind a Bearer-auth proxy)",
216
+ existing
217
+ ? `Press Enter to keep current (${maskKey(existing)}), or type new key`
218
+ : "Press Enter to leave unset, or type a key",
219
+ );
220
+ if (isCancellation(input)) return undefined;
221
+ return input.trim() || existing || null;
222
+ }
223
+
224
+ /**
225
+ * Prompts the user for the SearXNG base URL and optional Bearer API key.
226
+ * Returns `null` if the user cancels at either prompt.
227
+ *
228
+ * The caller owns persistence (loading/merging/saving WebToolsConfig) and
229
+ * user-visible notifications. This helper only handles the prompt flow.
230
+ */
231
+ export async function configureSearxng(
232
+ ui: SearxngConfigUi,
233
+ current: SearxngConfigCurrent,
234
+ ): Promise<SearxngConfigChange | null> {
235
+ const baseUrl = await promptForBaseUrl(ui, current.baseUrl);
236
+ if (baseUrl === undefined) return null;
237
+
238
+ const apiKey = await promptForOptionalKey(ui, current.apiKey);
239
+ if (apiKey === undefined) return null;
240
+
241
+ return { baseUrl, apiKey };
242
+ }
@@ -23,3 +23,57 @@ export interface SearchProvider {
23
23
  search(query: string, maxResults: number, signal?: AbortSignal): Promise<SearchResponse>;
24
24
  fetch(url: string, raw: boolean, signal?: AbortSignal): Promise<FetchResponse>;
25
25
  }
26
+
27
+ // ---------------------------------------------------------------------------
28
+ // PROVIDER_META + per-provider configure() contract
29
+ // ---------------------------------------------------------------------------
30
+
31
+ // User input from a ProviderConfigUi prompt. Both `null` and `undefined`
32
+ // indicate the user cancelled (different UI implementations may return
33
+ // either); use isCancellation() to test instead of comparing manually.
34
+ export type UserInput = string | null | undefined;
35
+
36
+ export function isCancellation(input: UserInput): input is null | undefined {
37
+ return input == null;
38
+ }
39
+
40
+ // Minimal UI surface a provider's configure() helper is allowed to depend on.
41
+ // Intentionally narrow so providers/ stays free of web-tools internals (no
42
+ // circular import) and so the contract can grow deliberately if a future
43
+ // provider needs more.
44
+ export interface ProviderConfigUi {
45
+ input(label: string, placeholder: string): Promise<UserInput>;
46
+ }
47
+
48
+ // What the orchestrator hands to configure(): the provider's currently
49
+ // persisted state (if any).
50
+ export interface ProviderConfigCurrent {
51
+ baseUrl?: string;
52
+ apiKey?: string;
53
+ }
54
+
55
+ // What configure() returns for the orchestrator to merge into WebToolsConfig.
56
+ // `null` apiKey = "leave unset"; absent baseUrl = "this provider has no URL
57
+ // knob"; whole-result `null` = "user cancelled, do not persist".
58
+ export interface ProviderConfigChange {
59
+ baseUrl?: string;
60
+ apiKey?: string | null;
61
+ }
62
+
63
+ // Per-provider metadata declared alongside each provider's class. Drives
64
+ // generic dispatch in web-tools.ts so adding a new provider doesn't require
65
+ // touching the orchestrator.
66
+ //
67
+ // envVar — the API-key env var (omit if the provider has no key)
68
+ // baseUrlEnvVar — the URL env var (set for self-hosted providers)
69
+ // defaultBaseUrl — fallback URL when neither env nor config supplies one
70
+ // configure — interactive setup; if present, /web-search-config
71
+ // dispatches here instead of the default single-key prompt
72
+ export interface ProviderMeta {
73
+ name: string;
74
+ label: string;
75
+ envVar?: string;
76
+ baseUrlEnvVar?: string;
77
+ defaultBaseUrl?: string;
78
+ configure?(ui: ProviderConfigUi, current: ProviderConfigCurrent): Promise<ProviderConfigChange | null>;
79
+ }
package/web-tools.ts CHANGED
@@ -28,7 +28,7 @@ import { configPath, loadJsonConfig, saveJsonConfig, validateGuidanceFields } fr
28
28
  import { Type } from "typebox";
29
29
  import { createSearchProvider } from "./providers/factory.js";
30
30
  import { PROVIDERS } from "./providers/index.js";
31
- import type { SearchResult } from "./providers/types.js";
31
+ import type { ProviderMeta, SearchProvider, SearchResult } from "./providers/types.js";
32
32
 
33
33
  // ---------------------------------------------------------------------------
34
34
  // Tunables and external surface
@@ -55,6 +55,12 @@ const UNSET_LABEL = "(not set)";
55
55
 
56
56
  const DEFAULT_PROVIDER_NAME = "brave";
57
57
 
58
+ // Brave is the only provider whose key was historically stored at the top
59
+ // level (config.apiKey) before the per-provider apiKeys map. The legacy
60
+ // field is auto-migrated to apiKeys.brave on the next save by
61
+ // /web-search-config (the dispatch deletes apiKey from the saved object).
62
+ const LEGACY_TOP_LEVEL_KEY_PROVIDER = "brave";
63
+
58
64
  // ---------------------------------------------------------------------------
59
65
  // Config file persistence
60
66
  // ---------------------------------------------------------------------------
@@ -69,6 +75,7 @@ interface WebToolsGuidance {
69
75
  interface WebToolsConfig {
70
76
  provider?: string;
71
77
  apiKeys?: Record<string, string>;
78
+ baseUrls?: Record<string, string>;
72
79
  apiKey?: string; // legacy — kept for backward compat
73
80
  guidance?: WebToolsGuidance;
74
81
  }
@@ -112,19 +119,44 @@ function resolveProviderApiKey(providerName: string, config: WebToolsConfig): st
112
119
  const meta = PROVIDERS.find((p) => p.name === providerName);
113
120
  if (!meta) return undefined;
114
121
 
115
- const envKey = process.env[meta.envVar]?.trim();
122
+ const envKey = meta.envVar ? process.env[meta.envVar]?.trim() : undefined;
116
123
  if (envKey) return envKey;
117
124
 
118
125
  const configKey = config.apiKeys?.[providerName]?.trim();
119
126
  if (configKey) return configKey;
120
127
 
121
- if (providerName === "brave") {
128
+ if (providerName === LEGACY_TOP_LEVEL_KEY_PROVIDER) {
122
129
  return config.apiKey?.trim() || undefined;
123
130
  }
124
131
 
125
132
  return undefined;
126
133
  }
127
134
 
135
+ // Generic per-provider base-URL resolution: env → config.baseUrls[name] →
136
+ // meta.defaultBaseUrl → "". Providers without baseUrlEnvVar (hosted ones)
137
+ // short-circuit to "". The orchestrator only calls this for providers that
138
+ // declare baseUrlEnvVar, so the empty-string fallback is a safety net rather
139
+ // than a runtime path.
140
+ function resolveProviderBaseUrl(meta: ProviderMeta, config: WebToolsConfig): string {
141
+ if (!meta.baseUrlEnvVar) return "";
142
+ const envUrl = process.env[meta.baseUrlEnvVar]?.trim();
143
+ if (envUrl) return envUrl;
144
+ const configUrl = config.baseUrls?.[meta.name]?.trim();
145
+ if (configUrl) return configUrl;
146
+ return meta.defaultBaseUrl ?? "";
147
+ }
148
+
149
+ // Centralized instantiation: load active provider name + creds, build via
150
+ // the factory. Called by both registerWebSearchTool and registerWebFetchTool.
151
+ function instantiateActiveProvider(config: WebToolsConfig): { providerName: string; provider: SearchProvider } {
152
+ const providerName = config.provider ?? DEFAULT_PROVIDER_NAME;
153
+ const apiKey = resolveProviderApiKey(providerName, config);
154
+ const meta = PROVIDERS.find((p) => p.name === providerName);
155
+ const baseUrl = meta?.baseUrlEnvVar ? resolveProviderBaseUrl(meta, config) : undefined;
156
+ const provider = createSearchProvider(providerName, { apiKey: apiKey ?? "", baseUrl });
157
+ return { providerName, provider };
158
+ }
159
+
128
160
  function maskApiKey(key: string | undefined): string {
129
161
  if (!key) return UNSET_LABEL;
130
162
  const head = key.slice(0, API_KEY_MASK_VISIBLE_CHARS);
@@ -261,9 +293,7 @@ export function registerWebSearchTool(pi: ExtensionAPI): void {
261
293
  async execute(_toolCallId, params, signal, onUpdate, _ctx) {
262
294
  const maxResults = clampSearchResultCount(params.max_results);
263
295
  const config = loadConfig();
264
- const providerName = config.provider ?? DEFAULT_PROVIDER_NAME;
265
- const apiKey = resolveProviderApiKey(providerName, config);
266
- const provider = createSearchProvider(providerName, apiKey ?? "");
296
+ const { providerName, provider } = instantiateActiveProvider(config);
267
297
 
268
298
  onUpdate?.({
269
299
  content: [{ type: "text", text: `Searching ${provider.label} for: "${params.query}"...` }],
@@ -351,9 +381,7 @@ export function registerWebFetchTool(pi: ExtensionAPI): void {
351
381
  });
352
382
 
353
383
  const config = loadConfig();
354
- const providerName = config.provider ?? DEFAULT_PROVIDER_NAME;
355
- const apiKey = resolveProviderApiKey(providerName, config);
356
- const provider = createSearchProvider(providerName, apiKey ?? "");
384
+ const { provider } = instantiateActiveProvider(config);
357
385
 
358
386
  const { text: bodyText, title, contentType, contentLength } = await provider.fetch(url, raw, signal);
359
387
 
@@ -432,15 +460,27 @@ function formatShowConfigMessage(current: WebToolsConfig): string {
432
460
  lines.push(` active provider: ${providerName}`);
433
461
 
434
462
  for (const meta of PROVIDERS) {
435
- const envKey = process.env[meta.envVar]?.trim();
463
+ const envKey = meta.envVar ? process.env[meta.envVar]?.trim() : undefined;
436
464
  const configKey = current.apiKeys?.[meta.name]?.trim();
437
- const legacyKey = meta.name === "brave" ? current.apiKey?.trim() : undefined;
465
+ const legacyKey = meta.name === LEGACY_TOP_LEVEL_KEY_PROVIDER ? current.apiKey?.trim() : undefined;
438
466
  const resolved = envKey ?? configKey ?? legacyKey;
439
467
  lines.push(
440
468
  ` ${meta.name}: ${maskApiKey(resolved)} (env: ${maskApiKey(envKey)}, config: ${maskApiKey(configKey ?? legacyKey)})`,
441
469
  );
442
470
  }
443
471
 
472
+ // One URL line per provider that declares baseUrlEnvVar. Today this is
473
+ // only SearXNG, but a second self-hosted provider lands without touching
474
+ // this loop.
475
+ for (const meta of PROVIDERS) {
476
+ if (!meta.baseUrlEnvVar) continue;
477
+ const envUrl = process.env[meta.baseUrlEnvVar]?.trim();
478
+ const configUrl = current.baseUrls?.[meta.name]?.trim();
479
+ const resolvedUrl = envUrl || configUrl || meta.defaultBaseUrl || "";
480
+ const urlSource = envUrl ? "env" : configUrl ? "config" : "default";
481
+ lines.push(` ${meta.name} url: ${resolvedUrl} (source: ${urlSource})`);
482
+ }
483
+
444
484
  return lines.join("\n");
445
485
  }
446
486
 
@@ -465,7 +505,15 @@ export function registerWebSearchConfigCommand(pi: ExtensionAPI): void {
465
505
  ...PROVIDERS.filter((p) => p.name === activeProvider),
466
506
  ...PROVIDERS.filter((p) => p.name !== activeProvider),
467
507
  ];
468
- const hasKey = (p: (typeof PROVIDERS)[number]) => resolveProviderApiKey(p.name, current) !== undefined;
508
+ const hasKey = (p: ProviderMeta) => {
509
+ // Self-hosted providers are "configured" once they have a base URL
510
+ // (env or config). The bare default URL doesn't count — it's just a
511
+ // hint that the user hasn't touched the setting yet.
512
+ if (p.baseUrlEnvVar) {
513
+ return Boolean(process.env[p.baseUrlEnvVar]?.trim() || current.baseUrls?.[p.name]?.trim());
514
+ }
515
+ return resolveProviderApiKey(p.name, current) !== undefined;
516
+ };
469
517
  const labelOf = (p: (typeof PROVIDERS)[number]) => {
470
518
  const markers: string[] = [];
471
519
  if (p.name === activeProvider) markers.push("✓");
@@ -490,8 +538,46 @@ export function registerWebSearchConfigCommand(pi: ExtensionAPI): void {
490
538
  }
491
539
  const selectedProvider = selectedMeta.name;
492
540
 
541
+ // Providers that declare a `configure` callback own their prompt flow
542
+ // (e.g. SearXNG: URL prompt then optional Bearer key). The orchestrator
543
+ // dispatches generically and owns persistence + notifications.
544
+ if (selectedMeta.configure) {
545
+ const result = await selectedMeta.configure(ctx.ui, {
546
+ baseUrl: current.baseUrls?.[selectedProvider],
547
+ apiKey: current.apiKeys?.[selectedProvider],
548
+ });
549
+ if (!result) {
550
+ ctx.ui.notify("Web search config unchanged", "info");
551
+ return;
552
+ }
553
+ const toSave: WebToolsConfig = {
554
+ ...current,
555
+ provider: selectedProvider,
556
+ ...(result.baseUrl !== undefined && {
557
+ baseUrls: { ...current.baseUrls, [selectedProvider]: result.baseUrl },
558
+ }),
559
+ ...(result.apiKey ? { apiKeys: { ...current.apiKeys, [selectedProvider]: result.apiKey } } : {}),
560
+ };
561
+ delete (toSave as { apiKey?: string }).apiKey;
562
+ if (!saveConfig(toSave)) {
563
+ ctx.ui.notify(
564
+ `Failed to save ${selectedMeta.label} config to ${CONFIG_PATH} — disk write failed`,
565
+ "error",
566
+ );
567
+ return;
568
+ }
569
+ ctx.ui.notify(
570
+ result.baseUrl
571
+ ? `Saved ${selectedMeta.label} config (url: ${result.baseUrl}) to ${CONFIG_PATH}`
572
+ : `Saved ${selectedMeta.label} config to ${CONFIG_PATH}`,
573
+ "info",
574
+ );
575
+ return;
576
+ }
577
+
493
578
  const existingKey =
494
- current.apiKeys?.[selectedProvider] ?? (selectedProvider === "brave" ? current.apiKey : undefined);
579
+ current.apiKeys?.[selectedProvider] ??
580
+ (selectedProvider === LEGACY_TOP_LEVEL_KEY_PROVIDER ? current.apiKey : undefined);
495
581
  const input = await ctx.ui.input(
496
582
  `${selectedMeta.label} API key`,
497
583
  existingKey ? `Press Enter to keep current (${maskApiKey(existingKey)}), or type new key` : "...",