pi-web-toolkit 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,34 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.3.3] - 2026-06-28
11
+
12
+ ### Added
13
+
14
+ - `install.sh` bootstrap installer for one-command pi-web-toolkit setup, including dependency verification, SearXNG endpoint selection, toolkit config writing, optional Firecrawl setup, local development install mode, and `--doctor` diagnostics.
15
+ - Toolkit config support at `${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json`, with environment variables taking precedence for SearXNG endpoints, Firecrawl fallback enablement, Firecrawl runner selection, and external CLI command paths.
16
+ - Public SearXNG endpoint discovery from `searx.space` with JSON API verification, plus an explicit isolated local Docker SearXNG option.
17
+ - Explicit Firecrawl runner selection through `firecrawlRunner` / `PI_WEB_FIRECRAWL_RUNNER`, supporting `installed`, `npx`, and `bunx`.
18
+ - Regression tests for toolkit config precedence and installer behavior, including public endpoint and local Docker flows.
19
+
20
+ ### Changed
21
+
22
+ - README and guide now present the bootstrap installer as the primary installation path while keeping manual setup as an advanced option.
23
+ - External CLI wrappers can use configured absolute command paths, reducing reliance on shell profile/PATH changes after installer runs.
24
+ - `web_search` fallback behavior is covered by regression tests so missing optional Firecrawl runners do not appear as the primary search backend failure.
25
+
26
+ ## [0.3.2] - 2026-06-25
27
+
28
+ ### Fixed
29
+
30
+ - Kept the agent's web-tool selection local-first: ordinary URL reads now prefer `web_fetch`, discovery prefers `web_search`, and interaction prefers `web_browse`; `firecrawl_*` tools are documented and prompted as fallback-only unless explicitly requested.
31
+ - Fixed `firecrawl_scrape` and `firecrawl_interact` partial-result rendering type-check errors caused by reading `details` before declaration.
32
+
33
+ ### Changed
34
+
35
+ - Reduced web-tool prompt metadata overhead by consolidating shared routing rules and shortening per-tool `promptSnippet`/`promptGuidelines` text.
36
+ - Added a tool-routing prompt regression test and included it in `npm test`.
37
+
10
38
  ## [0.3.1] - 2026-06-23
11
39
 
12
40
  ### Changed
@@ -145,7 +173,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
145
173
  - `web_browse` — interactive browser automation via agent-browser.
146
174
  - LLM-optimized `promptGuidelines` and `promptSnippet` for every tool.
147
175
 
148
- [Unreleased]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.2.2...HEAD
176
+ [Unreleased]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.3.3...HEAD
177
+ [0.3.3]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.3.2...v0.3.3
178
+ [0.3.2]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.3.1...v0.3.2
179
+ [0.3.1]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.3.0...v0.3.1
180
+ [0.3.0]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.2.2...v0.3.0
149
181
  [0.2.2]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.2.1...v0.2.2
150
182
  [0.2.1]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.2.0...v0.2.1
151
183
  [0.2.0]: https://github.com/Wade11s/pi-web-toolkit/compare/v0.1.2...v0.2.0
package/README.md CHANGED
@@ -22,7 +22,7 @@ Web research toolkit for [pi](https://pi.dev) agents. Search via SearXNG, fetch
22
22
  | **`firecrawl_scrape`** | [firecrawl-cli](https://github.com/firecrawl/cli) (keyless) | Cloud single-page fetch (anti-bot / JS / PDF) | — |
23
23
  | **`firecrawl_interact`** | [firecrawl-cli](https://github.com/firecrawl/cli) (keyless) | Cloud natural-language page interaction | — |
24
24
 
25
- > **Firecrawl fallback.** `web_search`, `web_fetch`, and `web_browse` automatically retry through Firecrawl Keyless (1,000 free credits/month, no API key) when their local backend errors out or search returns nothing. The three `firecrawl_*` tools are explicit escape hatches. Disable it with `PI_WEB_FIRECRAWL_FALLBACK=0`. Install the optional CLI: `npm install -g firecrawl-cli`.
25
+ > **Firecrawl fallback.** `web_search`, `web_fetch`, and `web_browse` are the local-first primary tools and automatically retry through Firecrawl Keyless (1,000 free credits/month, no API key) only when their local backend errors out or search returns nothing. The three `firecrawl_*` tools are fallback-only escape hatches; agents are instructed not to call them first unless you explicitly ask for Firecrawl/cloud behavior or a local-first tool already failed. Disable fallback use with `PI_WEB_FIRECRAWL_FALLBACK=0` or toolkit config `"firecrawlFallback": false`. Install the optional CLI: `npm install -g firecrawl-cli`.
26
26
 
27
27
  ## Tools Preview
28
28
 
@@ -46,159 +46,154 @@ A quick look at how pi renders toolkit calls while an agent searches, fetches, b
46
46
  </tr>
47
47
  </table>
48
48
 
49
- ## Install with Pi Agent
49
+ ## Quick Start
50
50
 
51
- Copy and send the prompt below to Pi. It will install this package and its external dependencies for you.
51
+ ### Install
52
52
 
53
- ```text
54
- Install pi-web-toolkit and its external dependencies. Complete and verify every
55
- step yourself; do not rely on web browsing or external documentation. Inspect
56
- the machine first and reuse working installations. Ask before using sudo,
57
- changing shell profiles, overwriting configuration, or modifying existing
58
- services or containers.
59
-
60
- 1. Ensure Node.js 22+, npm, Docker, OpenSSL, curl, uv, and Pi are installed, and
61
- that Docker is running. Install only missing or incompatible prerequisites.
62
- 2. Configure SearXNG:
63
- - Test SEARXNG_URL when set, then http://localhost:8080.
64
- - Verify /search?q=test&format=json returns JSON with a results array.
65
- - If neither endpoint works, first ensure no existing container or config
66
- would be overwritten, then create a local-only instance by running:
67
-
68
- mkdir -p "$HOME/.config/searxng"
69
- cat > "$HOME/.config/searxng/settings.yml" <<'YAML'
70
- use_default_settings: true
71
-
72
- search:
73
- formats:
74
- - html
75
- - json
76
- YAML
77
-
78
- docker run -d \
79
- --name searxng \
80
- --restart unless-stopped \
81
- -p 127.0.0.1:8080:8080 \
82
- -e FORCE_OWNERSHIP=false \
83
- -e SEARXNG_SECRET="$(openssl rand -hex 32)" \
84
- -v "$HOME/.config/searxng/settings.yml:/etc/searxng/settings.yml:ro" \
85
- docker.io/searxng/searxng:latest
86
-
87
- - Verify the selected endpoint by running:
88
-
89
- SEARXNG_ENDPOINT="${SEARXNG_URL:-http://localhost:8080}"
90
- curl -fsS --get "${SEARXNG_ENDPOINT%/}/search" \
91
- --data-urlencode "q=test" \
92
- --data "format=json" |
93
- grep -q '"results"' && echo "SearXNG JSON API ready"
94
-
95
- - Pi uses http://localhost:8080 by default. Set SEARXNG_URL before starting
96
- Pi only when using another endpoint.
97
- 3. Install and verify Scrapling:
98
- uv tool install "scrapling[all]"
99
- scrapling install
100
- scrapling --help
101
- 4. Install and verify agent-browser:
102
- npm install -g agent-browser
103
- agent-browser install
104
- agent-browser doctor
105
- On Linux, use agent-browser install --with-deps if required.
106
- 5. Optionally install firecrawl-cli for the keyless cloud fallback (no API key
107
- needed; the fallback degrades gracefully if it is absent):
108
- npm install -g firecrawl-cli
109
- 6. After all dependencies pass verification, install the package:
110
- pi install npm:pi-web-toolkit
111
-
112
- Report what was installed or reused, all verification results, the SearXNG
113
- endpoint Pi will use, and whether Pi must be restarted. Do not report success
114
- until every check passes.
53
+ Run the bootstrap installer:
54
+
55
+ ```bash
56
+ curl -fsSL https://raw.githubusercontent.com/Wade11s/pi-web-toolkit/main/install.sh | bash
115
57
  ```
116
58
 
117
- ## Quick Start
59
+ This is the normal install path. It installs the pi package, configures external runtime dependencies, verifies everything, and writes persistent runtime options to `${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json`.
60
+
61
+ When it finishes, **restart pi** so the package is loaded. If pi-web-toolkit was already loaded and only toolkit config changed, `/reload` may also work.
62
+
63
+ ### What the installer does
64
+
65
+ The installer:
118
66
 
119
- ### 1. Install external dependencies
67
+ - Checks Node.js 22+, npm, Pi, curl, OpenSSL, and uv.
68
+ - Installs or reuses Scrapling and agent-browser.
69
+ - Configures a JSON-capable SearXNG endpoint for `web_search`.
70
+ - Optionally installs `firecrawl-cli` for the Firecrawl Keyless fallback.
71
+ - Writes toolkit config with the selected endpoint and discovered CLI paths.
72
+ - Installs the pi package with `pi install npm:pi-web-toolkit`.
73
+ - Runs final verification before reporting success.
120
74
 
121
- The commands below assume a POSIX shell with Docker, OpenSSL, curl, uv, and Node.js 22+ with npm.
75
+ The installer is conservative. It does **not** silently install Docker, Node.js, Pi, Homebrew, OS packages, use sudo, change shell profiles, or overwrite user-managed SearXNG resources.
76
+
77
+ ### Common installer options
78
+
79
+ When piping from curl, pass flags after `bash -s --`:
122
80
 
123
81
  ```bash
124
- # SearXNG (for search; local-only instance with the required JSON API)
125
- mkdir -p "$HOME/.config/searxng"
126
- cat > "$HOME/.config/searxng/settings.yml" <<'YAML'
127
- use_default_settings: true
128
-
129
- search:
130
- formats:
131
- - html
132
- - json
133
- YAML
134
-
135
- docker run -d \
136
- --name searxng \
137
- --restart unless-stopped \
138
- -p 127.0.0.1:8080:8080 \
139
- -e FORCE_OWNERSHIP=false \
140
- -e SEARXNG_SECRET="$(openssl rand -hex 32)" \
141
- -v "$HOME/.config/searxng/settings.yml:/etc/searxng/settings.yml:ro" \
142
- docker.io/searxng/searxng:latest
143
- export SEARXNG_URL="http://127.0.0.1:8080"
82
+ curl -fsSL https://raw.githubusercontent.com/Wade11s/pi-web-toolkit/main/install.sh | bash -s -- --yes --searxng-url https://searxng.example.com --no-firecrawl
83
+ ```
84
+
85
+ If you have cloned the repo, run the same flags directly:
86
+
87
+ | Goal | Command |
88
+ |------|---------|
89
+ | Use an existing/self-hosted SearXNG endpoint | `./install.sh --searxng-url https://searxng.example.com` |
90
+ | Non-interactive install with a known endpoint | `./install.sh --yes --searxng-url https://searxng.example.com --no-firecrawl` |
91
+ | Explicitly auto-select a verified public SearXNG endpoint | `./install.sh --yes --auto-searxng public --no-firecrawl` |
92
+ | Start/reuse isolated local Docker SearXNG | `./install.sh --yes --auto-searxng local-docker --searxng-port 8080 --no-firecrawl` |
93
+ | Install optional Firecrawl Keyless fallback with global CLI | `./install.sh --with-firecrawl --firecrawl-runner installed` |
94
+ | Enable Firecrawl fallback through opt-in `npx` runner | `./install.sh --with-firecrawl --firecrawl-runner npx` |
95
+ | Enable Firecrawl fallback through opt-in `bunx` runner | `./install.sh --with-firecrawl --firecrawl-runner bunx` |
96
+ | Verify readiness without changing anything | `./install.sh --doctor` |
97
+ | Install from the current checkout | `./install.sh --local` |
98
+
99
+ ### SearXNG endpoint choices
100
+
101
+ `web_search` needs a SearXNG endpoint that supports JSON search responses:
102
+
103
+ ```bash
104
+ curl -fsS --get "https://searxng.example.com/search" \
105
+ --data-urlencode "q=searxng" \
106
+ --data "format=json" | grep -q '"results"'
107
+ ```
108
+
109
+ The installer can use:
110
+
111
+ - An existing/self-hosted endpoint passed with `--searxng-url`.
112
+ - A working local endpoint such as `http://localhost:8080`.
113
+ - A public endpoint discovered from `searx.space`, ranked by health signals, then verified with `format=json`.
114
+ - An isolated local Docker endpoint using container `pi-web-toolkit-searxng` and config under the toolkit config directory.
115
+
116
+ Public endpoints are not silently selected by default because search queries leave your machine. Use `--auto-searxng public` only when that trade-off is acceptable.
117
+
118
+ ### Manual install (advanced)
119
+
120
+ If you prefer to install dependencies yourself:
121
+
122
+ ```bash
123
+ # SearXNG endpoint: provide an existing JSON-capable endpoint, or run your own
124
+ export SEARXNG_URL="https://searxng.example.com"
144
125
 
145
126
  # scrapling (for fetch & batch fetch)
146
127
  uv tool install "scrapling[all]"
147
128
  scrapling install
148
129
 
149
130
  # agent-browser (for browse)
150
- npm i -g agent-browser && agent-browser install
151
- # On Linux hosts missing browser system libraries: agent-browser install --with-deps
131
+ npm i -g agent-browser
132
+ agent-browser install
133
+ agent-browser doctor
152
134
 
153
- # firecrawl-cli (OPTIONAL — enables the keyless cloud fallback; no API key needed)
135
+ # firecrawl-cli (optional cloud fallback; no API key needed)
154
136
  npm i -g firecrawl-cli
137
+
138
+ # pi package
139
+ pi install npm:pi-web-toolkit
155
140
  ```
156
141
 
157
- **Verify dependencies:**
142
+ A SearXNG endpoint must support `format=json`:
143
+
158
144
  ```bash
159
- # SearXNG
160
145
  curl -fsS --get "$SEARXNG_URL/search" \
161
146
  --data-urlencode "q=searxng" \
162
- --data "format=json" |
163
- grep -q '"results"' && echo "SearXNG JSON API ready"
147
+ --data "format=json" | grep -q '"results"'
148
+ ```
164
149
 
165
- # scrapling
166
- scrapling --help
150
+ ## Configuration
167
151
 
168
- # agent-browser
169
- agent-browser doctor
170
- ```
152
+ Runtime configuration is resolved in this order: environment variables first, then the toolkit config file written by the installer, then built-in defaults. No build step is required.
171
153
 
172
- ### 2. Install the extension
173
- #### From npm
174
- ```bash
175
- pi install npm:pi-web-toolkit
176
- ```
177
- #### From GitHub
178
- ```bash
179
- pi install git:github.com/Wade11s/pi-web-toolkit
180
- ```
154
+ Default toolkit config path:
181
155
 
182
- ## Configuration
156
+ ```text
157
+ ${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json
158
+ ```
183
159
 
184
- `web_search` reads its SearXNG endpoint from an environment variable. Set it before starting pi; no build step is required.
160
+ Example:
161
+
162
+ ```json
163
+ {
164
+ "searxngUrl": "https://searxng.example.com",
165
+ "firecrawlFallback": false,
166
+ "firecrawlRunner": "installed",
167
+ "commands": {
168
+ "scrapling": "/Users/alice/.local/bin/scrapling",
169
+ "agentBrowser": "/Users/alice/.npm-global/bin/agent-browser",
170
+ "firecrawl": "/Users/alice/.npm-global/bin/firecrawl"
171
+ }
172
+ }
173
+ ```
185
174
 
186
- | Variable | Default | Used By | Description |
187
- |----------|---------|---------|-------------|
188
- | `SEARXNG_URL` | `http://localhost:8080` | `web_search` | Your SearXNG instance endpoint |
189
- | `PI_WEB_FIRECRAWL_FALLBACK` | `1` (on) | all tools | Set to `0`/`false`/`no`/`off` to disable the optional Firecrawl keyless cloud fallback for a strict local-only policy. |
175
+ | Variable | Toolkit config key | Default | Used By | Description |
176
+ |----------|--------------------|---------|---------|-------------|
177
+ | `SEARXNG_URL` | `searxngUrl` | `http://localhost:8080` | `web_search` | SearXNG endpoint. Must support `/search?q=...&format=json`. |
178
+ | `PI_WEB_FIRECRAWL_FALLBACK` | `firecrawlFallback` | `true` | all Firecrawl fallback paths | Set env to `0`/`false`/`no`/`off`, or config to `false`, to disable cloud fallback. |
179
+ | `PI_WEB_FIRECRAWL_RUNNER` | `firecrawlRunner` | `installed` | all Firecrawl fallback paths | Firecrawl runner: `installed`, `npx`, or `bunx`. `npx`/`bunx` are opt-in because they may run or download packages at fallback time. |
180
+ | `SCRAPLING_BIN` | `commands.scrapling` | `scrapling` | `web_fetch`, `web_batch_fetch` | Scrapling executable path. |
181
+ | `AGENT_BROWSER_BIN` | `commands.agentBrowser` | `agent-browser` | `web_browse` | agent-browser executable path. |
182
+ | `FIRECRAWL_BIN` | `commands.firecrawl` | `firecrawl` | `firecrawl_*`, fallback paths | Firecrawl CLI executable path. |
183
+ | `PI_WEB_TOOLKIT_CONFIG` | — | `${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json` | all tools | Override the toolkit config file location. |
190
184
 
191
- Set before starting pi:
185
+ Set env vars before starting pi when you need a temporary override:
192
186
 
193
187
  ```bash
194
188
  export SEARXNG_URL="https://searxng.example.com"
195
- # Optional: disable the Firecrawl cloud fallback entirely
189
+ export SCRAPLING_BIN="$HOME/.local/bin/scrapling"
196
190
  export PI_WEB_FIRECRAWL_FALLBACK=0
191
+ export PI_WEB_FIRECRAWL_RUNNER=npx
197
192
  ```
198
193
 
199
194
  ### Optional: Firecrawl keyless fallback
200
195
 
201
- When a local backend (`web_search`/`web_fetch`/`web_browse`) fails or returns nothing, the tools automatically retry through [Firecrawl Keyless](https://www.firecrawl.dev/blog/firecrawl-keyless-launch) — 1,000 free credits/month, **no API key, no signup**. The `firecrawl_*` tools are explicit escape hatches for capabilities the local backends lack (search categories, cloud rendering, natural-language interaction).
196
+ When a local backend (`web_search`/`web_fetch`/`web_browse`) fails or returns nothing, the tools automatically retry through [Firecrawl Keyless](https://www.firecrawl.dev/blog/firecrawl-keyless-launch) — 1,000 free credits/month, **no API key, no signup**. The `firecrawl_*` tools are fallback-only explicit escape hatches for capabilities the local backends lack (search categories, cloud rendering, natural-language interaction). Agents should use `web_fetch`/`web_search`/`web_browse` first unless you explicitly request Firecrawl/cloud behavior.
202
197
 
203
198
  Install the optional CLI (the fallback degrades gracefully if it is absent):
204
199
 
@@ -206,8 +201,40 @@ Install the optional CLI (the fallback degrades gracefully if it is absent):
206
201
  npm install -g firecrawl-cli
207
202
  ```
208
203
 
204
+ Alternatively, opt into a runner that executes the official CLI on demand:
205
+
206
+ ```json
207
+ { "firecrawlRunner": "npx" }
208
+ ```
209
+
210
+ Allowed runners are `installed`, `npx`, and `bunx`. The default is `installed`; `npx` and `bunx` are never selected automatically because they may run or download packages at fallback time.
211
+
209
212
  The fallback is **keyless-only**: it never reads or stores an API key, and spawns the CLI under an isolated temporary `HOME` with the key env stripped. **Privacy:** when the fallback runs, the URL and page content are sent to Firecrawl's cloud.
210
213
 
214
+ ## Troubleshooting
215
+
216
+ Run doctor mode when an install fails, when filing an issue, or when you want to verify an existing setup. It is verify-only: it does not install dependencies, write config, start containers, or run `pi install`.
217
+
218
+ ```bash
219
+ ./install.sh --doctor
220
+ # or, without cloning:
221
+ curl -fsSL https://raw.githubusercontent.com/Wade11s/pi-web-toolkit/main/install.sh | bash -s -- --doctor
222
+ ```
223
+
224
+ Common failures:
225
+
226
+ | Symptom | Fix |
227
+ |---------|-----|
228
+ | Node.js is too old | Install Node.js 22+ and retry. |
229
+ | `uv` is missing | Install uv, then rerun the installer. |
230
+ | SearXNG returns HTML/403 instead of JSON | Use another endpoint or enable `search.formats: json` on your SearXNG instance. |
231
+ | Docker local SearXNG fails | Start Docker first, or use `--searxng-url` / `--auto-searxng public`. |
232
+ | `agent-browser doctor` fails on Linux | Rerun with `--agent-browser-with-deps` or install the missing browser system libraries manually. |
233
+ | Firecrawl fallback says runner missing | Install `firecrawl-cli`, choose `--firecrawl-runner npx`, choose `--firecrawl-runner bunx`, or disable fallback with `--no-firecrawl`. |
234
+ | pi does not show the tools after install | Restart pi. |
235
+
236
+ To remove the pi package, run `pi remove npm:pi-web-toolkit`. To remove the toolkit config, delete `${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json`. If the installer created local SearXNG, remove container `pi-web-toolkit-searxng` and the toolkit SearXNG config directory manually.
237
+
211
238
  ## Project Structure
212
239
 
213
240
  ```
@@ -216,6 +243,7 @@ pi-web-toolkit/
216
243
  │ ├── index.ts # Unified entry point — registers all 7 tools (4 local + 3 Firecrawl keyless)
217
244
  │ ├── utils/
218
245
  │ │ ├── cli-runner.ts # Unified CLI process spawning with timeout/AbortSignal/env
246
+ │ │ ├── config.ts # Toolkit config and external CLI path resolution
219
247
  │ │ ├── content-preview.ts # Intelligent content extraction from scraped pages
220
248
  │ │ ├── output-sink.ts # Truncation + temp-file fallback
221
249
  │ │ ├── render-helpers.ts # URL abbreviations, text normalization, error formatting for TUI
@@ -232,7 +260,10 @@ pi-web-toolkit/
232
260
  │ └── firecrawl_interact.ts # Firecrawl keyless natural-language interaction (escape hatch)
233
261
  ├── test/
234
262
  │ ├── agent-browser/ # agent-browser output parser regression tests
263
+ │ ├── config/ # Toolkit config precedence tests
235
264
  │ ├── content-preview/ # Content preview fixtures, baselines & snapshots
265
+ │ ├── installer/ # Bootstrap installer behavior tests
266
+ │ ├── web-search/ # SearXNG-first fallback behavior tests
236
267
  │ └── README.md # Test suite structure and conventions
237
268
  ├── docs/
238
269
  │ ├── tools.md # Full parameter specs
@@ -241,6 +272,7 @@ pi-web-toolkit/
241
272
  ├── AGENTS.md
242
273
  ├── CONTEXT.md
243
274
  ├── CHANGELOG.md
275
+ ├── install.sh
244
276
  ├── package.json
245
277
  ├── README.md
246
278
  ├── tsconfig.json
@@ -251,7 +283,7 @@ pi-web-toolkit/
251
283
  - **Unified registration** — `index.ts` is the single source of truth for what pi loads.
252
284
  - **Shared utilities** — `utils/` modules encapsulate CLI spawning, content extraction, output truncation, TUI formatting, and common registration patterns; tool files import only from `utils/`, never from each other.
253
285
  - **Per-tool isolation** — each tool owns its own schema, execute logic, and TUI renderer; no cross-imports except via `utils/`.
254
- - **Runtime config** — environment variables are read at execute time, not build time.
286
+ - **Runtime config** — environment variables and toolkit config are read at execute time, not build time.
255
287
 
256
288
  ## Reference
257
289
 
@@ -2,4 +2,4 @@
2
2
 
3
3
  pi-web-toolkit was local-first and self-hosted by design: SearXNG, scrapling, and agent-browser all run on the user's machine, and the README guaranteed "100% open-source. No required API keys or paid services." We decided to add **Firecrawl Keyless** as a strictly optional, fallback-only cloud layer: when a local backend errors out (or `web_search` returns nothing), the same tool transparently retries through the official `firecrawl-cli` in keyless mode, and three explicit `firecrawl_*` tools are exposed for capabilities the local backends lack.
4
4
 
5
- This is hard to reverse once users and the agent come to rely on the fallback, surprising to a reader who assumes a local-only toolkit, and the result of a real trade-off (zero-config reliability vs. cloud egress, a privacy surface, and a third-party dependency). The fallback defaults **on**, is **keyless-only** (no API key, no signup, no stored credentials — the CLI is spawned under an isolated temp `HOME` with the key env stripped), and is **opt-out-able** via `PI_WEB_FIRECRAWL_FALLBACK=0`. We drive `firecrawl-cli` (an official Firecrawl client) rather than hand-rolling REST because Firecrawl only grants the free keyless tier to official clients, and we restrict it to the keyless endpoints (`/search`, `/scrape`, `/interact`); API-key mode, self-hosted URLs, OAuth, and non-keyless endpoints (`/map`, `/crawl`, `/batch/scrape`, etc.) are deliberately out of scope. The decision and the graceful-skip behavior (never leave the user worse off than the local tool already did) are encoded in the Firecrawl CLI wrapper module.
5
+ This is hard to reverse once users and the agent come to rely on the fallback, surprising to a reader who assumes a local-only toolkit, and the result of a real trade-off (zero-config reliability vs. cloud egress, a privacy surface, and a third-party dependency). The fallback defaults **on**, is **keyless-only** (no API key, no signup, no stored credentials — the CLI is spawned under an isolated temp `HOME` with the key env stripped), and is **opt-out-able** via `PI_WEB_FIRECRAWL_FALLBACK=0`. We drive `firecrawl-cli` (an official Firecrawl client) rather than hand-rolling REST because Firecrawl only grants the free keyless tier to official clients, and we restrict it to the keyless endpoints (`/search`, `/scrape`, `/interact`); API-key mode, self-hosted URLs, OAuth, and non-keyless endpoints (`/map`, `/crawl`, `/batch/scrape`, etc.) are deliberately out of scope. The default Firecrawl runner is an installed `firecrawl` executable; `npx` and `bunx` runners are explicit opt-ins because they may run or download packages at fallback time. The decision and the graceful-skip behavior (never leave the user worse off than the local tool already did) are encoded in the Firecrawl CLI wrapper module.
@@ -0,0 +1,3 @@
1
+ # Toolkit config for installer selections
2
+
3
+ The installer writes selected pi-web-toolkit runtime options, especially the SearXNG endpoint, Firecrawl fallback policy, Firecrawl runner, and discovered external CLI paths, to `${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json` instead of modifying shell profiles or relying only on transient environment variables. Environment variables keep highest precedence, but the toolkit config gives installer choices persistent effect after restarting pi without changing the user's shell startup files.
@@ -0,0 +1,3 @@
1
+ # Conservative installer prerequisites
2
+
3
+ The bootstrap installer automates user-level dependency setup and pi-web-toolkit configuration, but it does not silently install or alter system-level prerequisites such as Node.js, Pi, Docker, Homebrew, or OS package-manager state. It asks before optional setup steps such as a local Docker SearXNG container and otherwise reports precise remediation commands, preserving user control over system-wide changes while still making the common path easier.
@@ -0,0 +1,3 @@
1
+ # Interactive SearXNG endpoint discovery
2
+
3
+ The bootstrap installer discovers public SearXNG candidates from `searx.space`, ranks them by health signals, and verifies the JSON search API before presenting them to the user. It does not silently choose a public endpoint unless explicitly requested, because using a remote SearXNG service changes the user's privacy and reliability profile; local or custom endpoints remain first-class choices.
package/docs/guide.md CHANGED
@@ -44,9 +44,25 @@ User asks about something external / current
44
44
 
45
45
  ---
46
46
 
47
+ ## Installation and endpoint selection
48
+
49
+ Use the root `install.sh` bootstrap installer for ordinary installs. It verifies prerequisites, installs or reuses Scrapling and agent-browser, configures a JSON-capable SearXNG endpoint, writes toolkit config, installs the pi package, and performs final verification.
50
+
51
+ SearXNG endpoint selection is endpoint-first:
52
+
53
+ 1. `--searxng-url` or `SEARXNG_URL`
54
+ 2. existing toolkit config
55
+ 3. working localhost endpoints
56
+ 4. explicit public discovery with `--auto-searxng public`
57
+ 5. explicit isolated local Docker setup with `--auto-searxng local-docker`
58
+
59
+ Public SearXNG endpoints are discovered from `searx.space`, ranked by health signals, then verified with `/search?q=...&format=json` before use. The installer does not silently choose a public endpoint unless explicitly requested. Use `./install.sh --doctor` for verify-only diagnostics; normal installation already performs final verification.
60
+
61
+ ---
62
+
47
63
  ## Firecrawl Keyless fallback
48
64
 
49
- When a local backend cannot do the job, the tools automatically retry through **Firecrawl Keyless** (1,000 free credits/month, no API key, no signup) before giving up. It is **fallback-only** — never the primary path — and is **opt-out-able** with `PI_WEB_FIRECRAWL_FALLBACK=0`. Requires the optional `firecrawl-cli` (`npm install -g firecrawl-cli`); if it is absent the tools simply surface the original local error.
65
+ When a local backend cannot do the job, the tools automatically retry through **Firecrawl Keyless** (1,000 free credits/month, no API key, no signup) before giving up. It is **fallback-only** — never the primary path — and is **opt-out-able** with `PI_WEB_FIRECRAWL_FALLBACK=0` or toolkit config `"firecrawlFallback": false`. Requires the optional `firecrawl-cli` (`npm install -g firecrawl-cli`) or an explicit `firecrawlRunner` of `npx`/`bunx`; if no runner is available the tools simply surface the original local error. Agents should call `web_search`/`web_fetch`/`web_browse` first and call `firecrawl_*` directly only after the corresponding local-first tool failed, or when the user explicitly asks for Firecrawl/cloud behavior.
50
66
 
51
67
  | Tool | Falls back to Firecrawl when… |
52
68
  |------|-------------------------------|
@@ -55,12 +71,12 @@ When a local backend cannot do the job, the tools automatically retry through **
55
71
  | `web_browse` | agent-browser is missing or its batch fails (not on caller validation errors) |
56
72
  | `web_batch_fetch` | (no fallback — Firecrawl batch scrape is not keyless) |
57
73
 
58
- The three `firecrawl_*` tools are the explicit escape hatches for capabilities the local backends lack (`github`/`research`/`pdf` search categories, cloud rendering, natural-language interaction).
74
+ The three `firecrawl_*` tools are fallback-only explicit escape hatches for capabilities the local backends lack (`github`/`research`/`pdf` search categories, cloud rendering, natural-language interaction). They are not the first step for ordinary URL reading; `web_fetch` already performs Firecrawl fallback internally when local fetching fails.
59
75
 
60
- **Graceful skip.** If the fallback itself cannot help — the CLI is missing, the IP is flagged as suspicious, the keyless quota is exhausted, or the fallback is disabled — the tool falls through to the original local-tool error so the user is never left worse off.
76
+ **Graceful skip.** If the fallback itself cannot help — the runner is missing, the IP is flagged as suspicious, the keyless quota is exhausted, or the fallback is disabled — the tool falls through to the original local-tool error so the user is never left worse off. `firecrawlRunner` defaults to `installed`; `npx` and `bunx` are opt-in because they may run or download packages at fallback time.
61
77
 
62
78
  **Credit budgeting.** Search ≈ 2 credits / 10 results, scrape ≈ 1 credit / page, interact ≈ 2 credits/min (code-only) or ≈ 7 credits/min (AI prompt). Results report `creditsUsed` where the source provides it. The fallback stays conservative (small limits) against the 1,000 credits/month allowance.
63
79
 
64
- **Privacy.** Firecrawl is a cloud service: when the fallback runs, the URL/query and page content leave the machine. Set `PI_WEB_FIRECRAWL_FALLBACK=0` to enforce a strict local-only, no-cloud-egress policy. The fallback is **keyless-only** — it never reads, stores, or sends an API key, and spawns the CLI under an isolated temporary `HOME`.
80
+ **Privacy.** Firecrawl is a cloud service: when the fallback runs, the URL/query and page content leave the machine. Set `PI_WEB_FIRECRAWL_FALLBACK=0` or toolkit config `"firecrawlFallback": false` to enforce a strict local-only, no-cloud-egress policy. The fallback is **keyless-only** — it never reads, stores, or sends an API key, and spawns the CLI under an isolated temporary `HOME`.
65
81
 
66
82
  ---
package/docs/tools.md CHANGED
@@ -1,5 +1,20 @@
1
1
  # Tool Reference
2
2
 
3
+ ## Runtime configuration
4
+
5
+ Tools resolve runtime configuration in this order: environment variables, toolkit config, then built-in defaults. The installer writes toolkit config to `${XDG_CONFIG_HOME:-~/.config}/pi-web-toolkit/config.json`; override that path with `PI_WEB_TOOLKIT_CONFIG`.
6
+
7
+ | Variable | Toolkit config key | Default | Used By |
8
+ |----------|--------------------|---------|---------|
9
+ | `SEARXNG_URL` | `searxngUrl` | `http://localhost:8080` | `web_search` |
10
+ | `PI_WEB_FIRECRAWL_FALLBACK` | `firecrawlFallback` | `true` | Firecrawl fallback paths |
11
+ | `PI_WEB_FIRECRAWL_RUNNER` | `firecrawlRunner` | `installed` | Firecrawl fallback paths |
12
+ | `SCRAPLING_BIN` | `commands.scrapling` | `scrapling` | `web_fetch`, `web_batch_fetch` |
13
+ | `AGENT_BROWSER_BIN` | `commands.agentBrowser` | `agent-browser` | `web_browse` |
14
+ | `FIRECRAWL_BIN` | `commands.firecrawl` | `firecrawl` | `firecrawl_*` and fallback paths |
15
+
16
+ If toolkit config exists but is malformed, tools fail with a clear config error instead of silently ignoring the file. `firecrawlRunner` accepts `installed`, `npx`, or `bunx`; `npx` and `bunx` are opt-in because they may run or download packages at fallback time.
17
+
3
18
  ## `web_search`
4
19
 
5
20
  Search the web via SearXNG. Returns ranked results with title, URL, and snippet. Automatically aggregates up to 3 pages of SearXNG results when more than ~20 are needed.
@@ -12,7 +27,7 @@ Search the web via SearXNG. Returns ranked results with title, URL, and snippet.
12
27
  }
13
28
  ```
14
29
 
15
- **When to use:** The user asks about current events, facts, or anything requiring up-to-date information and has not already provided the source URLs.
30
+ **When to use:** The user asks about current events, facts, or anything requiring up-to-date information and has not already provided the source URLs. Use `web_search` before `firecrawl_search`; `web_search` already performs Firecrawl fallback internally when SearXNG fails or returns nothing.
16
31
 
17
32
  **Empty results behavior:** When no results are found, `web_search` includes any query **suggestions** provided by SearXNG. The agent can use them to refine and retry the search.
18
33
 
@@ -35,9 +50,10 @@ Fetch a single page and convert it to clean markdown. Uses Scrapling's browser-b
35
50
  ```
36
51
 
37
52
  **When to use:**
38
- - After `web_search` finds a relevant result
53
+ - As the first attempt for a user-provided URL or after `web_search` finds a relevant result
39
54
  - The page is static or loads its content on first request
40
55
  - You need to read **one** article, doc, or blog post
56
+ - Before `firecrawl_scrape`; `web_fetch` already performs Firecrawl fallback internally when the local fetcher fails
41
57
 
42
58
  **Example flow:**
43
59
  ```
@@ -77,10 +93,12 @@ Uses the [agent-browser](https://github.com/vercel-labs/agent-browser) CLI with
77
93
  When `selector` is omitted, the tool returns agent-browser's interactive accessibility snapshot rather than full page text.
78
94
 
79
95
  **When to use:**
96
+ - As the first attempt when the page requires interaction
80
97
  - The page requires **clicking** before showing target content (e.g. "Load more", pagination, tab switching)
81
98
  - The page requires **filling a form** (e.g. search box, login)
82
99
  - The page requires **scrolling** to load lazy content (infinite scroll)
83
100
  - The page requires **waiting** for JS to render content (SPA)
101
+ - Before `firecrawl_interact`; `web_browse` already performs Firecrawl fallback internally when local browser automation fails
84
102
 
85
103
  **Example flows:**
86
104
 
@@ -163,11 +181,11 @@ User: "Compare Python asyncio, Trio, and curio"
163
181
 
164
182
  ---
165
183
 
166
- ## Firecrawl keyless tools (optional cloud escape hatches)
184
+ ## Firecrawl keyless tools (optional fallback-only cloud escape hatches)
167
185
 
168
186
  These three tools talk to [Firecrawl](https://www.firecrawl.dev) in **keyless** mode: 1,000 free credits/month, **no API key and no signup**. They require the optional `firecrawl-cli` (`npm install -g firecrawl-cli`). **Privacy:** the URL/query/page content is sent to Firecrawl's cloud.
169
187
 
170
- They double as the implementation of the automatic fallback: `web_search`/`web_fetch`/`web_browse` retry through Firecrawl keyless when their local backend fails (or search returns nothing). Disable all Firecrawl usage with `PI_WEB_FIRECRAWL_FALLBACK=0`.
188
+ They double as the implementation of the automatic fallback: `web_search`/`web_fetch`/`web_browse` retry through Firecrawl keyless when their local backend fails (or search returns nothing). Do not use `firecrawl_*` as the first attempt for ordinary search, URL reading, or page interaction; use the corresponding local-first tool first unless the user explicitly asks for Firecrawl/cloud behavior. Disable all Firecrawl usage with `PI_WEB_FIRECRAWL_FALLBACK=0` or toolkit config `"firecrawlFallback": false`.
171
189
 
172
190
  ### `firecrawl_search`
173
191
 
@@ -187,7 +205,7 @@ Cloud web search via Firecrawl keyless, with capabilities the local SearXNG tool
187
205
  }
188
206
  ```
189
207
 
190
- **When to use:** `web_search` failed or returned nothing; or you need `github`/`research`/`pdf` categories, images/news sources, or domain scoping that SearXNG does not provide.
208
+ **When to use:** `web_search` failed or returned nothing; you need `github`/`research`/`pdf` categories, images/news sources, or domain scoping that SearXNG does not provide; or the user explicitly asked for Firecrawl/cloud search. Do not use it before `web_search` for ordinary discovery.
191
209
 
192
210
  ### `firecrawl_scrape`
193
211
 
@@ -203,7 +221,7 @@ Cloud single-page fetch via Firecrawl keyless (anti-bot bypass, JS rendering, PD
203
221
  }
204
222
  ```
205
223
 
206
- **When to use:** `web_fetch` failed on an anti-bot-protected, JavaScript-heavy, or PDF page.
224
+ **When to use:** `web_fetch` failed on an anti-bot-protected, JavaScript-heavy, or PDF page, or the user explicitly asked for Firecrawl/cloud scraping. Do not use it before `web_fetch` for ordinary URL reading.
207
225
 
208
226
  ### `firecrawl_interact`
209
227
 
@@ -219,6 +237,6 @@ Open a URL in a live Firecrawl browser session and drive it with a natural-langu
219
237
  }
220
238
  ```
221
239
 
222
- **When to use:** `web_browse` cannot run (agent-browser missing / OS deps missing), or you want natural-language page interaction without hand-written CSS selectors. Write each prompt as a single, focused task.
240
+ **When to use:** `web_browse` cannot run (agent-browser missing / OS deps missing), you need natural-language page interaction without hand-written CSS selectors, or the user explicitly asked for Firecrawl/cloud interaction. Do not use it before `web_browse` for ordinary page interaction. Write each prompt as a single, focused task.
223
241
 
224
242
  ---
@@ -39,18 +39,18 @@ const firecrawlInteractTool = defineTool({
39
39
  name: "firecrawl_interact",
40
40
  label: "Firecrawl Interact",
41
41
  description: [
42
+ "Fallback-only cloud browser interaction via Firecrawl keyless.",
43
+ "Do not use firecrawl_interact as the first attempt for ordinary page interaction; use web_browse first.",
42
44
  "Open a URL in a live Firecrawl browser session and drive it with a natural-language",
43
- "prompt (or code), returning the result. Keyless no API key, no signup.",
44
- "Use firecrawl_interact when the local web_browse cannot run, or when you want",
45
- "natural-language page interaction without CSS selectors.",
45
+ "prompt (or code), returning the result. Use only when web_browse cannot run,",
46
+ "when the user explicitly asks for Firecrawl/cloud interaction, or when you need natural-language page interaction without CSS selectors.",
46
47
  "Privacy: the URL, page content, and prompt are sent to Firecrawl's cloud.",
47
48
  `Output is truncated to ${DEFAULT_MAX_LINES} lines or ${formatSize(DEFAULT_MAX_BYTES)}; if truncated, full output is saved to a temp file.`,
48
49
  ].join(" "),
49
- promptSnippet: "Drive a page via Firecrawl keyless (natural-language interaction)",
50
+ promptSnippet: "Fallback-only Firecrawl interaction",
50
51
  promptGuidelines: [
51
- "Prefer web_browse first; reach for firecrawl_interact when web_browse can't run or you want NL interaction.",
52
- "Write each prompt as a single, focused task; the session can be reused across calls.",
53
- "Always pass the full URL including https://.",
52
+ "Use firecrawl_interact only after web_browse fails, for needed NL interaction, or explicit cloud interaction.",
53
+ "Keep firecrawl_interact prompt/code focused.",
54
54
  ],
55
55
  parameters: FirecrawlInteractParamsSchema,
56
56
 
@@ -95,13 +95,6 @@ const firecrawlInteractTool = defineTool({
95
95
 
96
96
  renderResult(result, { expanded, isPartial }, theme, context) {
97
97
  const isError = context?.isError ?? false;
98
-
99
- if (isPartial) {
100
- const domain = details?.url ? getDomain(details.url) : "";
101
- const label = domain ? `Interacting with ${domain} via Firecrawl...` : "Interacting via Firecrawl...";
102
- return new Text(theme.fg("warning", label), 0, 0);
103
- }
104
-
105
98
  const details = result.details as {
106
99
  url?: string;
107
100
  output?: string;
@@ -111,6 +104,12 @@ const firecrawlInteractTool = defineTool({
111
104
  creditsUsed?: number;
112
105
  } | undefined;
113
106
 
107
+ if (isPartial) {
108
+ const domain = details?.url ? getDomain(details.url) : "";
109
+ const label = domain ? `Interacting with ${domain} via Firecrawl...` : "Interacting via Firecrawl...";
110
+ return new Text(theme.fg("warning", label), 0, 0);
111
+ }
112
+
114
113
  if (isError) {
115
114
  const errText = getErrorText(result);
116
115
  let text = theme.fg("error", "✗ Firecrawl interact failed");