pi-web-toolkit 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.3.3] - 2026-06-28
11
+
12
+ ### Added
13
+
14
+ - `install.sh` bootstrap installer for one-command pi-web-toolkit setup, including dependency verification, SearXNG endpoint selection, toolkit config writing, optional Firecrawl setup, local development install mode, and `--doctor` diagnostics.
15
+ - Toolkit config support at `${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json`, with environment variables taking precedence for SearXNG endpoints, Firecrawl fallback enablement, Firecrawl runner selection, and external CLI command paths.
16
+ - Public SearXNG endpoint discovery from `searx.space` with JSON API verification, plus an explicit isolated local Docker SearXNG option.
17
+ - Explicit Firecrawl runner selection through `firecrawlRunner` / `PI_WEB_FIRECRAWL_RUNNER`, supporting `installed`, `npx`, and `bunx`.
18
+ - Regression tests for toolkit config precedence and installer behavior, including public endpoint and local Docker flows.
19
+
20
+ ### Changed
21
+
22
+ - README and guide now present the bootstrap installer as the primary installation path while keeping manual setup as an advanced option.
23
+ - External CLI wrappers can use configured absolute command paths, reducing reliance on shell profile/PATH changes after installer runs.
24
+ - `web_search` fallback behavior is covered by regression tests so missing optional Firecrawl runners do not appear as the primary search backend failure.
25
+
10
26
  ## [0.3.2] - 2026-06-25
11
27
 
12
28
  ### Fixed
@@ -157,7 +173,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
157
173
  - `web_browse` — interactive browser automation via agent-browser.
158
174
  - LLM-optimized `promptGuidelines` and `promptSnippet` for every tool.
159
175
 
160
- [Unreleased]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.3.2...HEAD
176
+ [Unreleased]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.3.3...HEAD
177
+ [0.3.3]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.3.2...v0.3.3
161
178
  [0.3.2]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.3.1...v0.3.2
162
179
  [0.3.1]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.3.0...v0.3.1
163
180
  [0.3.0]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.2.2...v0.3.0
package/README.md CHANGED
@@ -22,7 +22,7 @@ Web research toolkit for [pi](https://pi.dev) agents. Search via SearXNG, fetch
22
22
  | **`firecrawl_scrape`** | [firecrawl-cli](https://github.com/firecrawl/cli) (keyless) | Cloud single-page fetch (anti-bot / JS / PDF) | — |
23
23
  | **`firecrawl_interact`** | [firecrawl-cli](https://github.com/firecrawl/cli) (keyless) | Cloud natural-language page interaction | — |
24
24
 
25
- > **Firecrawl fallback.** `web_search`, `web_fetch`, and `web_browse` are the local-first primary tools and automatically retry through Firecrawl Keyless (1,000 free credits/month, no API key) only when their local backend errors out or search returns nothing. The three `firecrawl_*` tools are fallback-only escape hatches; agents are instructed not to call them first unless you explicitly ask for Firecrawl/cloud behavior or a local-first tool already failed. Disable fallback use with `PI_WEB_FIRECRAWL_FALLBACK=0`. Install the optional CLI: `npm install -g firecrawl-cli`.
25
+ > **Firecrawl fallback.** `web_search`, `web_fetch`, and `web_browse` are the local-first primary tools and automatically retry through Firecrawl Keyless (1,000 free credits/month, no API key) only when their local backend errors out or search returns nothing. The three `firecrawl_*` tools are fallback-only escape hatches; agents are instructed not to call them first unless you explicitly ask for Firecrawl/cloud behavior or a local-first tool already failed. Disable fallback use with `PI_WEB_FIRECRAWL_FALLBACK=0` or toolkit config `"firecrawlFallback": false`. Install the optional CLI: `npm install -g firecrawl-cli`.
26
26
 
27
27
  ## Tools Preview
28
28
 
@@ -46,154 +46,149 @@ A quick look at how pi renders toolkit calls while an agent searches, fetches, b
46
46
  </tr>
47
47
  </table>
48
48
 
49
- ## Install with Pi Agent
49
+ ## Quick Start
50
50
 
51
- Copy and send the prompt below to Pi. It will install this package and its external dependencies for you.
51
+ ### Install
52
52
 
53
- ```text
54
- Install pi-web-toolkit and its external dependencies. Complete and verify every
55
- step yourself; do not rely on web browsing or external documentation. Inspect
56
- the machine first and reuse working installations. Ask before using sudo,
57
- changing shell profiles, overwriting configuration, or modifying existing
58
- services or containers.
59
-
60
- 1. Ensure Node.js 22+, npm, Docker, OpenSSL, curl, uv, and Pi are installed, and
61
- that Docker is running. Install only missing or incompatible prerequisites.
62
- 2. Configure SearXNG:
63
- - Test SEARXNG_URL when set, then http://localhost:8080.
64
- - Verify /search?q=test&format=json returns JSON with a results array.
65
- - If neither endpoint works, first ensure no existing container or config
66
- would be overwritten, then create a local-only instance by running:
67
-
68
- mkdir -p "$HOME/.config/searxng"
69
- cat > "$HOME/.config/searxng/settings.yml" <<'YAML'
70
- use_default_settings: true
71
-
72
- search:
73
- formats:
74
- - html
75
- - json
76
- YAML
77
-
78
- docker run -d \
79
- --name searxng \
80
- --restart unless-stopped \
81
- -p 127.0.0.1:8080:8080 \
82
- -e FORCE_OWNERSHIP=false \
83
- -e SEARXNG_SECRET="$(openssl rand -hex 32)" \
84
- -v "$HOME/.config/searxng/settings.yml:/etc/searxng/settings.yml:ro" \
85
- docker.io/searxng/searxng:latest
86
-
87
- - Verify the selected endpoint by running:
88
-
89
- SEARXNG_ENDPOINT="${SEARXNG_URL:-http://localhost:8080}"
90
- curl -fsS --get "${SEARXNG_ENDPOINT%/}/search" \
91
- --data-urlencode "q=test" \
92
- --data "format=json" |
93
- grep -q '"results"' && echo "SearXNG JSON API ready"
94
-
95
- - Pi uses http://localhost:8080 by default. Set SEARXNG_URL before starting
96
- Pi only when using another endpoint.
97
- 3. Install and verify Scrapling:
98
- uv tool install "scrapling[all]"
99
- scrapling install
100
- scrapling --help
101
- 4. Install and verify agent-browser:
102
- npm install -g agent-browser
103
- agent-browser install
104
- agent-browser doctor
105
- On Linux, use agent-browser install --with-deps if required.
106
- 5. Optionally install firecrawl-cli for the keyless cloud fallback (no API key
107
- needed; the fallback degrades gracefully if it is absent):
108
- npm install -g firecrawl-cli
109
- 6. After all dependencies pass verification, install the package:
110
- pi install npm:pi-web-toolkit
111
-
112
- Report what was installed or reused, all verification results, the SearXNG
113
- endpoint Pi will use, and whether Pi must be restarted. Do not report success
114
- until every check passes.
53
+ Run the bootstrap installer:
54
+
55
+ ```bash
56
+ curl -fsSL https://raw.githubusercontent.com/Wade11s/pi-web-toolkit/main/install.sh | bash
115
57
  ```
116
58
 
117
- ## Quick Start
59
+ This is the normal install path. It installs the pi package, configures external runtime dependencies, verifies everything, and writes persistent runtime options to `${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json`.
60
+
61
+ When it finishes, **restart pi** so the package is loaded. If pi-web-toolkit was already loaded and only toolkit config changed, `/reload` may also work.
62
+
63
+ ### What the installer does
64
+
65
+ The installer:
118
66
 
119
- ### 1. Install external dependencies
67
+ - Checks Node.js 22+, npm, Pi, curl, OpenSSL, and uv.
68
+ - Installs or reuses Scrapling and agent-browser.
69
+ - Configures a JSON-capable SearXNG endpoint for `web_search`.
70
+ - Optionally installs `firecrawl-cli` for the Firecrawl Keyless fallback.
71
+ - Writes toolkit config with the selected endpoint and discovered CLI paths.
72
+ - Installs the pi package with `pi install npm:pi-web-toolkit`.
73
+ - Runs final verification before reporting success.
120
74
 
121
- The commands below assume a POSIX shell with Docker, OpenSSL, curl, uv, and Node.js 22+ with npm.
75
+ The installer is conservative. It does **not** silently install Docker, Node.js, Pi, Homebrew, OS packages, use sudo, change shell profiles, or overwrite user-managed SearXNG resources.
76
+
77
+ ### Common installer options
78
+
79
+ When piping from curl, pass flags after `bash -s --`:
122
80
 
123
81
  ```bash
124
- # SearXNG (for search; local-only instance with the required JSON API)
125
- mkdir -p "$HOME/.config/searxng"
126
- cat > "$HOME/.config/searxng/settings.yml" <<'YAML'
127
- use_default_settings: true
128
-
129
- search:
130
- formats:
131
- - html
132
- - json
133
- YAML
134
-
135
- docker run -d \
136
- --name searxng \
137
- --restart unless-stopped \
138
- -p 127.0.0.1:8080:8080 \
139
- -e FORCE_OWNERSHIP=false \
140
- -e SEARXNG_SECRET="$(openssl rand -hex 32)" \
141
- -v "$HOME/.config/searxng/settings.yml:/etc/searxng/settings.yml:ro" \
142
- docker.io/searxng/searxng:latest
143
- export SEARXNG_URL="http://127.0.0.1:8080"
82
+ curl -fsSL https://raw.githubusercontent.com/Wade11s/pi-web-toolkit/main/install.sh | bash -s -- --yes --searxng-url https://searxng.example.com --no-firecrawl
83
+ ```
84
+
85
+ If you have cloned the repo, run the same flags directly:
86
+
87
+ | Goal | Command |
88
+ |------|---------|
89
+ | Use an existing/self-hosted SearXNG endpoint | `./install.sh --searxng-url https://searxng.example.com` |
90
+ | Non-interactive install with a known endpoint | `./install.sh --yes --searxng-url https://searxng.example.com --no-firecrawl` |
91
+ | Explicitly auto-select a verified public SearXNG endpoint | `./install.sh --yes --auto-searxng public --no-firecrawl` |
92
+ | Start/reuse isolated local Docker SearXNG | `./install.sh --yes --auto-searxng local-docker --searxng-port 8080 --no-firecrawl` |
93
+ | Install optional Firecrawl Keyless fallback with global CLI | `./install.sh --with-firecrawl --firecrawl-runner installed` |
94
+ | Enable Firecrawl fallback through opt-in `npx` runner | `./install.sh --with-firecrawl --firecrawl-runner npx` |
95
+ | Enable Firecrawl fallback through opt-in `bunx` runner | `./install.sh --with-firecrawl --firecrawl-runner bunx` |
96
+ | Verify readiness without changing anything | `./install.sh --doctor` |
97
+ | Install from the current checkout | `./install.sh --local` |
98
+
99
+ ### SearXNG endpoint choices
100
+
101
+ `web_search` needs a SearXNG endpoint that supports JSON search responses:
102
+
103
+ ```bash
104
+ curl -fsS --get "https://searxng.example.com/search" \
105
+ --data-urlencode "q=searxng" \
106
+ --data "format=json" | grep -q '"results"'
107
+ ```
108
+
109
+ The installer can use:
110
+
111
+ - An existing/self-hosted endpoint passed with `--searxng-url`.
112
+ - A working local endpoint such as `http://localhost:8080`.
113
+ - A public endpoint discovered from `searx.space`, ranked by health signals, then verified with `format=json`.
114
+ - An isolated local Docker endpoint using container `pi-web-toolkit-searxng` and config under the toolkit config directory.
115
+
116
+ Public endpoints are not silently selected by default because search queries leave your machine. Use `--auto-searxng public` only when that trade-off is acceptable.
117
+
118
+ ### Manual install (advanced)
119
+
120
+ If you prefer to install dependencies yourself:
121
+
122
+ ```bash
123
+ # SearXNG endpoint: provide an existing JSON-capable endpoint, or run your own
124
+ export SEARXNG_URL="https://searxng.example.com"
144
125
 
145
126
  # scrapling (for fetch & batch fetch)
146
127
  uv tool install "scrapling[all]"
147
128
  scrapling install
148
129
 
149
130
  # agent-browser (for browse)
150
- npm i -g agent-browser && agent-browser install
151
- # On Linux hosts missing browser system libraries: agent-browser install --with-deps
131
+ npm i -g agent-browser
132
+ agent-browser install
133
+ agent-browser doctor
152
134
 
153
- # firecrawl-cli (OPTIONAL — enables the keyless cloud fallback; no API key needed)
135
+ # firecrawl-cli (optional cloud fallback; no API key needed)
154
136
  npm i -g firecrawl-cli
137
+
138
+ # pi package
139
+ pi install npm:pi-web-toolkit
155
140
  ```
156
141
 
157
- **Verify dependencies:**
142
+ A SearXNG endpoint must support `format=json`:
143
+
158
144
  ```bash
159
- # SearXNG
160
145
  curl -fsS --get "$SEARXNG_URL/search" \
161
146
  --data-urlencode "q=searxng" \
162
- --data "format=json" |
163
- grep -q '"results"' && echo "SearXNG JSON API ready"
147
+ --data "format=json" | grep -q '"results"'
148
+ ```
164
149
 
165
- # scrapling
166
- scrapling --help
150
+ ## Configuration
167
151
 
168
- # agent-browser
169
- agent-browser doctor
170
- ```
152
+ Runtime configuration is resolved in this order: environment variables first, then the toolkit config file written by the installer, then built-in defaults. No build step is required.
171
153
 
172
- ### 2. Install the extension
173
- #### From npm
174
- ```bash
175
- pi install npm:pi-web-toolkit
176
- ```
177
- #### From GitHub
178
- ```bash
179
- pi install git:github.com/Wade11s/pi-web-toolkit
180
- ```
154
+ Default toolkit config path:
181
155
 
182
- ## Configuration
156
+ ```text
157
+ ${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json
158
+ ```
183
159
 
184
- `web_search` reads its SearXNG endpoint from an environment variable. Set it before starting pi; no build step is required.
160
+ Example:
161
+
162
+ ```json
163
+ {
164
+ "searxngUrl": "https://searxng.example.com",
165
+ "firecrawlFallback": false,
166
+ "firecrawlRunner": "installed",
167
+ "commands": {
168
+ "scrapling": "/Users/alice/.local/bin/scrapling",
169
+ "agentBrowser": "/Users/alice/.npm-global/bin/agent-browser",
170
+ "firecrawl": "/Users/alice/.npm-global/bin/firecrawl"
171
+ }
172
+ }
173
+ ```
185
174
 
186
- | Variable | Default | Used By | Description |
187
- |----------|---------|---------|-------------|
188
- | `SEARXNG_URL` | `http://localhost:8080` | `web_search` | Your SearXNG instance endpoint |
189
- | `PI_WEB_FIRECRAWL_FALLBACK` | `1` (on) | all tools | Set to `0`/`false`/`no`/`off` to disable the optional Firecrawl keyless cloud fallback for a strict local-only policy. |
175
+ | Variable | Toolkit config key | Default | Used By | Description |
176
+ |----------|--------------------|---------|---------|-------------|
177
+ | `SEARXNG_URL` | `searxngUrl` | `http://localhost:8080` | `web_search` | SearXNG endpoint. Must support `/search?q=...&format=json`. |
178
+ | `PI_WEB_FIRECRAWL_FALLBACK` | `firecrawlFallback` | `true` | all Firecrawl fallback paths | Set env to `0`/`false`/`no`/`off`, or config to `false`, to disable cloud fallback. |
179
+ | `PI_WEB_FIRECRAWL_RUNNER` | `firecrawlRunner` | `installed` | all Firecrawl fallback paths | Firecrawl runner: `installed`, `npx`, or `bunx`. `npx`/`bunx` are opt-in because they may run or download packages at fallback time. |
180
+ | `SCRAPLING_BIN` | `commands.scrapling` | `scrapling` | `web_fetch`, `web_batch_fetch` | Scrapling executable path. |
181
+ | `AGENT_BROWSER_BIN` | `commands.agentBrowser` | `agent-browser` | `web_browse` | agent-browser executable path. |
182
+ | `FIRECRAWL_BIN` | `commands.firecrawl` | `firecrawl` | `firecrawl_*`, fallback paths | Firecrawl CLI executable path. |
183
+ | `PI_WEB_TOOLKIT_CONFIG` | — | `${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json` | all tools | Override the toolkit config file location. |
190
184
 
191
- Set before starting pi:
185
+ Set env vars before starting pi when you need a temporary override:
192
186
 
193
187
  ```bash
194
188
  export SEARXNG_URL="https://searxng.example.com"
195
- # Optional: disable the Firecrawl cloud fallback entirely
189
+ export SCRAPLING_BIN="$HOME/.local/bin/scrapling"
196
190
  export PI_WEB_FIRECRAWL_FALLBACK=0
191
+ export PI_WEB_FIRECRAWL_RUNNER=npx
197
192
  ```
198
193
 
199
194
  ### Optional: Firecrawl keyless fallback
@@ -206,8 +201,40 @@ Install the optional CLI (the fallback degrades gracefully if it is absent):
206
201
  npm install -g firecrawl-cli
207
202
  ```
208
203
 
204
+ Alternatively, opt into a runner that executes the official CLI on demand:
205
+
206
+ ```json
207
+ { "firecrawlRunner": "npx" }
208
+ ```
209
+
210
+ Allowed runners are `installed`, `npx`, and `bunx`. The default is `installed`; `npx` and `bunx` are never selected automatically because they may run or download packages at fallback time.
211
+
209
212
  The fallback is **keyless-only**: it never reads or stores an API key, and spawns the CLI under an isolated temporary `HOME` with the key env stripped. **Privacy:** when the fallback runs, the URL and page content are sent to Firecrawl's cloud.
210
213
 
214
+ ## Troubleshooting
215
+
216
+ Run doctor mode when an install fails, when filing an issue, or when you want to verify an existing setup. It is verify-only: it does not install dependencies, write config, start containers, or run `pi install`.
217
+
218
+ ```bash
219
+ ./install.sh --doctor
220
+ # or, without cloning:
221
+ curl -fsSL https://raw.githubusercontent.com/Wade11s/pi-web-toolkit/main/install.sh | bash -s -- --doctor
222
+ ```
223
+
224
+ Common failures:
225
+
226
+ | Symptom | Fix |
227
+ |---------|-----|
228
+ | Node.js is too old | Install Node.js 22+ and retry. |
229
+ | `uv` is missing | Install uv, then rerun the installer. |
230
+ | SearXNG returns HTML/403 instead of JSON | Use another endpoint or enable `search.formats: json` on your SearXNG instance. |
231
+ | Docker local SearXNG fails | Start Docker first, or use `--searxng-url` / `--auto-searxng public`. |
232
+ | `agent-browser doctor` fails on Linux | Rerun with `--agent-browser-with-deps` or install the missing browser system libraries manually. |
233
+ | Firecrawl fallback says runner missing | Install `firecrawl-cli`, choose `--firecrawl-runner npx`, choose `--firecrawl-runner bunx`, or disable fallback with `--no-firecrawl`. |
234
+ | pi does not show the tools after install | Restart pi. |
235
+
236
+ To remove the pi package, run `pi remove npm:pi-web-toolkit`. To remove the toolkit config, delete `${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json`. If the installer created local SearXNG, remove container `pi-web-toolkit-searxng` and the toolkit SearXNG config directory manually.
237
+
211
238
  ## Project Structure
212
239
 
213
240
  ```
@@ -216,6 +243,7 @@ pi-web-toolkit/
216
243
  │ ├── index.ts # Unified entry point — registers all 7 tools (4 local + 3 Firecrawl keyless)
217
244
  │ ├── utils/
218
245
  │ │ ├── cli-runner.ts # Unified CLI process spawning with timeout/AbortSignal/env
246
+ │ │ ├── config.ts # Toolkit config and external CLI path resolution
219
247
  │ │ ├── content-preview.ts # Intelligent content extraction from scraped pages
220
248
  │ │ ├── output-sink.ts # Truncation + temp-file fallback
221
249
  │ │ ├── render-helpers.ts # URL abbreviations, text normalization, error formatting for TUI
@@ -232,7 +260,10 @@ pi-web-toolkit/
232
260
  │ └── firecrawl_interact.ts # Firecrawl keyless natural-language interaction (escape hatch)
233
261
  ├── test/
234
262
  │ ├── agent-browser/ # agent-browser output parser regression tests
263
+ │ ├── config/ # Toolkit config precedence tests
235
264
  │ ├── content-preview/ # Content preview fixtures, baselines & snapshots
265
+ │ ├── installer/ # Bootstrap installer behavior tests
266
+ │ ├── web-search/ # SearXNG-first fallback behavior tests
236
267
  │ └── README.md # Test suite structure and conventions
237
268
  ├── docs/
238
269
  │ ├── tools.md # Full parameter specs
@@ -241,6 +272,7 @@ pi-web-toolkit/
241
272
  ├── AGENTS.md
242
273
  ├── CONTEXT.md
243
274
  ├── CHANGELOG.md
275
+ ├── install.sh
244
276
  ├── package.json
245
277
  ├── README.md
246
278
  ├── tsconfig.json
@@ -251,7 +283,7 @@ pi-web-toolkit/
251
283
  - **Unified registration** — `index.ts` is the single source of truth for what pi loads.
252
284
  - **Shared utilities** — `utils/` modules encapsulate CLI spawning, content extraction, output truncation, TUI formatting, and common registration patterns; tool files import only from `utils/`, never from each other.
253
285
  - **Per-tool isolation** — each tool owns its own schema, execute logic, and TUI renderer; no cross-imports except via `utils/`.
254
- - **Runtime config** — environment variables are read at execute time, not build time.
286
+ - **Runtime config** — environment variables and toolkit config are read at execute time, not build time.
255
287
 
256
288
  ## Reference
257
289
 
@@ -2,4 +2,4 @@
2
2
 
3
3
  pi-web-toolkit was local-first and self-hosted by design: SearXNG, scrapling, and agent-browser all run on the user's machine, and the README guaranteed "100% open-source. No required API keys or paid services." We decided to add **Firecrawl Keyless** as a strictly optional, fallback-only cloud layer: when a local backend errors out (or `web_search` returns nothing), the same tool transparently retries through the official `firecrawl-cli` in keyless mode, and three explicit `firecrawl_*` tools are exposed for capabilities the local backends lack.
4
4
 
5
- This is hard to reverse once users and the agent come to rely on the fallback, surprising to a reader who assumes a local-only toolkit, and the result of a real trade-off (zero-config reliability vs. cloud egress, a privacy surface, and a third-party dependency). The fallback defaults **on**, is **keyless-only** (no API key, no signup, no stored credentials — the CLI is spawned under an isolated temp `HOME` with the key env stripped), and is **opt-out-able** via `PI_WEB_FIRECRAWL_FALLBACK=0`. We drive `firecrawl-cli` (an official Firecrawl client) rather than hand-rolling REST because Firecrawl only grants the free keyless tier to official clients, and we restrict it to the keyless endpoints (`/search`, `/scrape`, `/interact`); API-key mode, self-hosted URLs, OAuth, and non-keyless endpoints (`/map`, `/crawl`, `/batch/scrape`, etc.) are deliberately out of scope. The decision and the graceful-skip behavior (never leave the user worse off than the local tool already did) are encoded in the Firecrawl CLI wrapper module.
5
+ This is hard to reverse once users and the agent come to rely on the fallback, surprising to a reader who assumes a local-only toolkit, and the result of a real trade-off (zero-config reliability vs. cloud egress, a privacy surface, and a third-party dependency). The fallback defaults **on**, is **keyless-only** (no API key, no signup, no stored credentials — the CLI is spawned under an isolated temp `HOME` with the key env stripped), and is **opt-out-able** via `PI_WEB_FIRECRAWL_FALLBACK=0`. We drive `firecrawl-cli` (an official Firecrawl client) rather than hand-rolling REST because Firecrawl only grants the free keyless tier to official clients, and we restrict it to the keyless endpoints (`/search`, `/scrape`, `/interact`); API-key mode, self-hosted URLs, OAuth, and non-keyless endpoints (`/map`, `/crawl`, `/batch/scrape`, etc.) are deliberately out of scope. The default Firecrawl runner is an installed `firecrawl` executable; `npx` and `bunx` runners are explicit opt-ins because they may run or download packages at fallback time. The decision and the graceful-skip behavior (never leave the user worse off than the local tool already did) are encoded in the Firecrawl CLI wrapper module.
@@ -0,0 +1,3 @@
1
+ # Toolkit config for installer selections
2
+
3
+ The installer writes selected pi-web-toolkit runtime options, especially the SearXNG endpoint, Firecrawl fallback policy, Firecrawl runner, and discovered external CLI paths, to `${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json` instead of modifying shell profiles or relying only on transient environment variables. Environment variables keep highest precedence, but the toolkit config gives installer choices persistent effect after restarting pi without changing the user's shell startup files.
@@ -0,0 +1,3 @@
1
+ # Conservative installer prerequisites
2
+
3
+ The bootstrap installer automates user-level dependency setup and pi-web-toolkit configuration, but it does not silently install or alter system-level prerequisites such as Node.js, Pi, Docker, Homebrew, or OS package-manager state. It asks before optional setup steps such as a local Docker SearXNG container and otherwise reports precise remediation commands, preserving user control over system-wide changes while still making the common path easier.
@@ -0,0 +1,3 @@
1
+ # Interactive SearXNG endpoint discovery
2
+
3
+ The bootstrap installer discovers public SearXNG candidates from `searx.space`, ranks them by health signals, and verifies the JSON search API before presenting them to the user. It does not silently choose a public endpoint unless explicitly requested, because using a remote SearXNG service changes the user's privacy and reliability profile; local or custom endpoints remain first-class choices.
package/docs/guide.md CHANGED
@@ -44,9 +44,25 @@ User asks about something external / current
44
44
 
45
45
  ---
46
46
 
47
+ ## Installation and endpoint selection
48
+
49
+ Use the root `install.sh` bootstrap installer for ordinary installs. It verifies prerequisites, installs or reuses Scrapling and agent-browser, configures a JSON-capable SearXNG endpoint, writes toolkit config, installs the pi package, and performs final verification.
50
+
51
+ SearXNG endpoint selection is endpoint-first:
52
+
53
+ 1. `--searxng-url` or `SEARXNG_URL`
54
+ 2. existing toolkit config
55
+ 3. working localhost endpoints
56
+ 4. explicit public discovery with `--auto-searxng public`
57
+ 5. explicit isolated local Docker setup with `--auto-searxng local-docker`
58
+
59
+ Public SearXNG endpoints are discovered from `searx.space`, ranked by health signals, then verified with `/search?q=...&format=json` before use. The installer does not silently choose a public endpoint unless explicitly requested. Use `./install.sh --doctor` for verify-only diagnostics; normal installation already performs final verification.
60
+
61
+ ---
62
+
47
63
  ## Firecrawl Keyless fallback
48
64
 
49
- When a local backend cannot do the job, the tools automatically retry through **Firecrawl Keyless** (1,000 free credits/month, no API key, no signup) before giving up. It is **fallback-only** — never the primary path — and is **opt-out-able** with `PI_WEB_FIRECRAWL_FALLBACK=0`. Requires the optional `firecrawl-cli` (`npm install -g firecrawl-cli`); if it is absent the tools simply surface the original local error. Agents should call `web_search`/`web_fetch`/`web_browse` first and call `firecrawl_*` directly only after the corresponding local-first tool failed, or when the user explicitly asks for Firecrawl/cloud behavior.
65
+ When a local backend cannot do the job, the tools automatically retry through **Firecrawl Keyless** (1,000 free credits/month, no API key, no signup) before giving up. It is **fallback-only** — never the primary path — and is **opt-out-able** with `PI_WEB_FIRECRAWL_FALLBACK=0` or toolkit config `"firecrawlFallback": false`. Requires the optional `firecrawl-cli` (`npm install -g firecrawl-cli`) or an explicit `firecrawlRunner` of `npx`/`bunx`; if no runner is available the tools simply surface the original local error. Agents should call `web_search`/`web_fetch`/`web_browse` first and call `firecrawl_*` directly only after the corresponding local-first tool failed, or when the user explicitly asks for Firecrawl/cloud behavior.
50
66
 
51
67
  | Tool | Falls back to Firecrawl when… |
52
68
  |------|-------------------------------|
@@ -57,10 +73,10 @@ When a local backend cannot do the job, the tools automatically retry through **
57
73
 
58
74
  The three `firecrawl_*` tools are fallback-only explicit escape hatches for capabilities the local backends lack (`github`/`research`/`pdf` search categories, cloud rendering, natural-language interaction). They are not the first step for ordinary URL reading; `web_fetch` already performs Firecrawl fallback internally when local fetching fails.
59
75
 
60
- **Graceful skip.** If the fallback itself cannot help — the CLI is missing, the IP is flagged as suspicious, the keyless quota is exhausted, or the fallback is disabled — the tool falls through to the original local-tool error so the user is never left worse off.
76
+ **Graceful skip.** If the fallback itself cannot help — the runner is missing, the IP is flagged as suspicious, the keyless quota is exhausted, or the fallback is disabled — the tool falls through to the original local-tool error so the user is never left worse off. `firecrawlRunner` defaults to `installed`; `npx` and `bunx` are opt-in because they may run or download packages at fallback time.
61
77
 
62
78
  **Credit budgeting.** Search ≈ 2 credits / 10 results, scrape ≈ 1 credit / page, interact ≈ 2 credits/min (code-only) or ≈ 7 credits/min (AI prompt). Results report `creditsUsed` where the source provides it. The fallback stays conservative (small limits) against the 1,000 credits/month allowance.
63
79
 
64
- **Privacy.** Firecrawl is a cloud service: when the fallback runs, the URL/query and page content leave the machine. Set `PI_WEB_FIRECRAWL_FALLBACK=0` to enforce a strict local-only, no-cloud-egress policy. The fallback is **keyless-only** — it never reads, stores, or sends an API key, and spawns the CLI under an isolated temporary `HOME`.
80
+ **Privacy.** Firecrawl is a cloud service: when the fallback runs, the URL/query and page content leave the machine. Set `PI_WEB_FIRECRAWL_FALLBACK=0` or toolkit config `"firecrawlFallback": false` to enforce a strict local-only, no-cloud-egress policy. The fallback is **keyless-only** — it never reads, stores, or sends an API key, and spawns the CLI under an isolated temporary `HOME`.
65
81
 
66
82
  ---
package/docs/tools.md CHANGED
@@ -1,5 +1,20 @@
1
1
  # Tool Reference
2
2
 
3
+ ## Runtime configuration
4
+
5
+ Tools resolve runtime configuration in this order: environment variables, toolkit config, then built-in defaults. The installer writes toolkit config to `${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json`; override that path with `PI_WEB_TOOLKIT_CONFIG`.
6
+
7
+ | Variable | Toolkit config key | Default | Used By |
8
+ |----------|--------------------|---------|---------|
9
+ | `SEARXNG_URL` | `searxngUrl` | `http://localhost:8080` | `web_search` |
10
+ | `PI_WEB_FIRECRAWL_FALLBACK` | `firecrawlFallback` | `true` | Firecrawl fallback paths |
11
+ | `PI_WEB_FIRECRAWL_RUNNER` | `firecrawlRunner` | `installed` | Firecrawl fallback paths |
12
+ | `SCRAPLING_BIN` | `commands.scrapling` | `scrapling` | `web_fetch`, `web_batch_fetch` |
13
+ | `AGENT_BROWSER_BIN` | `commands.agentBrowser` | `agent-browser` | `web_browse` |
14
+ | `FIRECRAWL_BIN` | `commands.firecrawl` | `firecrawl` | `firecrawl_*` and fallback paths |
15
+
16
+ If toolkit config exists but is malformed, tools fail with a clear config error instead of silently ignoring the file. `firecrawlRunner` accepts `installed`, `npx`, or `bunx`; `npx` and `bunx` are opt-in because they may run or download packages at fallback time.
17
+
3
18
  ## `web_search`
4
19
 
5
20
  Search the web via SearXNG. Returns ranked results with title, URL, and snippet. Automatically aggregates up to 3 pages of SearXNG results when more than ~20 are needed.
@@ -170,7 +185,7 @@ User: "Compare Python asyncio, Trio, and curio"
170
185
 
171
186
  These three tools talk to [Firecrawl](https://www.firecrawl.dev) in **keyless** mode: 1,000 free credits/month, **no API key and no signup**. They require the optional `firecrawl-cli` (`npm install -g firecrawl-cli`). **Privacy:** the URL/query/page content is sent to Firecrawl's cloud.
172
187
 
173
- They double as the implementation of the automatic fallback: `web_search`/`web_fetch`/`web_browse` retry through Firecrawl keyless when their local backend fails (or search returns nothing). Do not use `firecrawl_*` as the first attempt for ordinary search, URL reading, or page interaction; use the corresponding local-first tool first unless the user explicitly asks for Firecrawl/cloud behavior. Disable all Firecrawl usage with `PI_WEB_FIRECRAWL_FALLBACK=0`.
188
+ They double as the implementation of the automatic fallback: `web_search`/`web_fetch`/`web_browse` retry through Firecrawl keyless when their local backend fails (or search returns nothing). Do not use `firecrawl_*` as the first attempt for ordinary search, URL reading, or page interaction; use the corresponding local-first tool first unless the user explicitly asks for Firecrawl/cloud behavior. Disable all Firecrawl usage with `PI_WEB_FIRECRAWL_FALLBACK=0` or toolkit config `"firecrawlFallback": false`.
174
189
 
175
190
  ### `firecrawl_search`
176
191
 
@@ -6,6 +6,7 @@
6
6
  */
7
7
 
8
8
  import { runCLI } from "./cli-runner";
9
+ import { getToolkitCommand } from "./config";
9
10
 
10
11
  export interface BrowseAction {
11
12
  type: "click" | "fill" | "type" | "press" | "wait" | "wait_selector" | "scroll";
@@ -180,7 +181,7 @@ export async function runAgentBrowserBatch(
180
181
 
181
182
  try {
182
183
  const result = await runCLI({
183
- command: "agent-browser",
184
+ command: getToolkitCommand("agentBrowser"),
184
185
  args,
185
186
  stdin: JSON.stringify(commands),
186
187
  timeout: options.timeout,
@@ -199,7 +200,7 @@ export async function runAgentBrowserBatch(
199
200
  );
200
201
  }
201
202
  } catch (err: any) {
202
- if (err.message === "agent-browser is not installed") {
203
+ if (typeof err.message === "string" && err.message.includes("is not installed")) {
203
204
  throw new Error(
204
205
  "agent-browser is not installed.\n\nInstall it with:\n npm i -g agent-browser && agent-browser install\n\nThen run: agent-browser doctor"
205
206
  );
@@ -211,7 +212,7 @@ export async function runAgentBrowserBatch(
211
212
  export async function closeAgentBrowserSession(session: string, signal?: AbortSignal): Promise<void> {
212
213
  try {
213
214
  await runCLI({
214
- command: "agent-browser",
215
+ command: getToolkitCommand("agentBrowser"),
215
216
  args: ["--session", session, "close"],
216
217
  signal,
217
218
  });