webveil 0.0.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +326 -0
- package/dist/cli.d.ts +58 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +91 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/backends/custom.d.ts +15 -0
- package/dist/core/backends/custom.d.ts.map +1 -0
- package/dist/core/backends/custom.js +106 -0
- package/dist/core/backends/custom.js.map +1 -0
- package/dist/core/backends/registry.d.ts +13 -0
- package/dist/core/backends/registry.d.ts.map +1 -0
- package/dist/core/backends/registry.js +31 -0
- package/dist/core/backends/registry.js.map +1 -0
- package/dist/core/backends/searxng.d.ts +8 -0
- package/dist/core/backends/searxng.d.ts.map +1 -0
- package/dist/core/backends/searxng.js +43 -0
- package/dist/core/backends/searxng.js.map +1 -0
- package/dist/core/backends/tavily-compat.d.ts +10 -0
- package/dist/core/backends/tavily-compat.d.ts.map +1 -0
- package/dist/core/backends/tavily-compat.js +85 -0
- package/dist/core/backends/tavily-compat.js.map +1 -0
- package/dist/core/backends/types.d.ts +48 -0
- package/dist/core/backends/types.d.ts.map +1 -0
- package/dist/core/backends/types.js +5 -0
- package/dist/core/backends/types.js.map +1 -0
- package/dist/core/baseurl.d.ts +42 -0
- package/dist/core/baseurl.d.ts.map +1 -0
- package/dist/core/baseurl.js +79 -0
- package/dist/core/baseurl.js.map +1 -0
- package/dist/core/config.d.ts +39 -0
- package/dist/core/config.d.ts.map +1 -0
- package/dist/core/config.js +72 -0
- package/dist/core/config.js.map +1 -0
- package/dist/core/egress.d.ts +46 -0
- package/dist/core/egress.d.ts.map +1 -0
- package/dist/core/egress.js +113 -0
- package/dist/core/egress.js.map +1 -0
- package/dist/core/extract.d.ts +45 -0
- package/dist/core/extract.d.ts.map +1 -0
- package/dist/core/extract.js +36 -0
- package/dist/core/extract.js.map +1 -0
- package/dist/core/fetch.d.ts +42 -0
- package/dist/core/fetch.d.ts.map +1 -0
- package/dist/core/fetch.js +76 -0
- package/dist/core/fetch.js.map +1 -0
- package/dist/core/http.d.ts +8 -0
- package/dist/core/http.d.ts.map +1 -0
- package/dist/core/http.js +49 -0
- package/dist/core/http.js.map +1 -0
- package/dist/core/search.d.ts +34 -0
- package/dist/core/search.d.ts.map +1 -0
- package/dist/core/search.js +92 -0
- package/dist/core/search.js.map +1 -0
- package/dist/core/security.d.ts +35 -0
- package/dist/core/security.d.ts.map +1 -0
- package/dist/core/security.js +141 -0
- package/dist/core/security.js.map +1 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +40 -0
- package/dist/index.js.map +1 -0
- package/package.json +62 -2
- package/src/cli.ts +106 -0
- package/src/core/backends/custom.ts +159 -0
- package/src/core/backends/registry.ts +41 -0
- package/src/core/backends/searxng.ts +70 -0
- package/src/core/backends/tavily-compat.ts +156 -0
- package/src/core/backends/types.ts +61 -0
- package/src/core/baseurl.ts +104 -0
- package/src/core/config.ts +106 -0
- package/src/core/egress.ts +134 -0
- package/src/core/extract.ts +82 -0
- package/src/core/fetch.ts +132 -0
- package/src/core/http.ts +62 -0
- package/src/core/search.ts +140 -0
- package/src/core/security.ts +141 -0
- package/src/index.ts +82 -0
package/README.md
ADDED
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
# webveil
|
|
2
|
+
|
|
3
|
+
**Anonymous-capable, self-hosted, account-free** web **search + fetch** for AI agents.
|
|
4
|
+
|
|
5
|
+
webveil replaces account-bound tools (notably Ollama's `web_search` / `web_fetch`, which
|
|
6
|
+
proxy a hosted service and sign every request with your account identity) with a
|
|
7
|
+
self-hosted path that has **no account, no API key**, and an **egress you control**
|
|
8
|
+
(direct, HTTP proxy, or SOCKS5/Tor) so searches and fetches can be anonymous. It also
|
|
9
|
+
works perfectly well non-anonymously (direct egress).
|
|
10
|
+
|
|
11
|
+
## Packages
|
|
12
|
+
|
|
13
|
+
webveil is a pnpm workspace monorepo. The **core** (`search()` / `fetch()`) is plain,
|
|
14
|
+
framework-agnostic. Two thin frontends wrap that same core:
|
|
15
|
+
|
|
16
|
+
- **[`webveil`](packages/webveil)**, an [incur](https://github.com/wevm/incur)-based
|
|
17
|
+
**CLI + MCP server** (`--mcp`, skills, `--llms`, TOON output). Pi-agnostic; usable by any
|
|
18
|
+
agent (pi via pi-mcp-adapter, Claude Code, Cursor, Codex, bash). Has a `webveil` bin.
|
|
19
|
+
- **[`pi-webveil`](packages/pi-webveil)**, a **pi extension** registering `web_search` and
|
|
20
|
+
`web_fetch` tools that call the core in-process. A drop-in replacement for Ollama's tools
|
|
21
|
+
(same names), which is the original motivation. Depends on `webveil` via `workspace:*`.
|
|
22
|
+
|
|
23
|
+
## Quick start
|
|
24
|
+
|
|
25
|
+
webveil needs a **backend** to get results from. The zero-config default is a local
|
|
26
|
+
**SearXNG** at `http://127.0.0.1:8080` on `direct` egress (non-anonymous). There is
|
|
27
|
+
**no** zero-setup + anonymous + real-web-results option in the ecosystem, see
|
|
28
|
+
[`work/notes/ideas/default-backend-policy-account-vs-origin.md`](work/notes/ideas/default-backend-policy-account-vs-origin.md);
|
|
29
|
+
SearXNG (you run it) is the closest, `tavily-compat` (needs an account/key) is the other.
|
|
30
|
+
|
|
31
|
+
### Run SearXNG (matches the default with no config)
|
|
32
|
+
|
|
33
|
+
```sh
|
|
34
|
+
# Docker: the container binds 8080 internally; map host 8080 -> container 8080
|
|
35
|
+
# so it matches webveil's default baseUrl exactly.
|
|
36
|
+
docker run -d --name searxng -p 8080:8080 searxng/searxng
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Then `webveil search "…"` / `web_fetch` work with no config.
|
|
40
|
+
|
|
41
|
+
> **Port gotcha (you WILL hit this):** SearXNG's default port depends on how you install
|
|
42
|
+
> it. A bare-metal / pip / source install defaults to **8888** (`settings.yml`
|
|
43
|
+
> `server.port: 8888`). The Docker image binds **8080** internally regardless (its
|
|
44
|
+
> entrypoint forces `0.0.0.0:8080`). SearXNG's own docs suggest `docker run … -p 8888:8080`
|
|
45
|
+
> (host 8888 → container 8080). webveil's default expects **8080**. If your instance is on
|
|
46
|
+
> any other port, point webveil at it:
|
|
47
|
+
>
|
|
48
|
+
> ```sh
|
|
49
|
+
> export WEBVEIL_BASE_URL=http://127.0.0.1:8888 # or wherever your instance listens
|
|
50
|
+
> ```
|
|
51
|
+
>
|
|
52
|
+
> or set `baseUrl` in `.pi/webveil.json` (see config seam below).
|
|
53
|
+
|
|
54
|
+
### Other SearXNG install options
|
|
55
|
+
|
|
56
|
+
webveil needs something to point `baseUrl` at: an **HTTP `host:port`**, or (script install)
|
|
57
|
+
the **Unix socket** itself. How you get one:
|
|
58
|
+
|
|
59
|
+
- **Docker (above)**, binds a real TCP port directly; simplest if you only need webveil.
|
|
60
|
+
- **Install script as a background service** (`sudo -H ./utils/searxng.sh install all`,
|
|
61
|
+
see <https://docs.searxng.org/admin/installation-scripts.html>), sets SearXNG up as a
|
|
62
|
+
systemd/uWSGI service. **Gotcha:** by default this listens on a **Unix socket**
|
|
63
|
+
(`socket = /usr/local/searxng/run/socket`), NOT a TCP port. And, crucially, that default
|
|
64
|
+
socket speaks the **native uwsgi protocol, NOT HTTP** (`socket = …`, not `http-socket =
|
|
65
|
+
…`), so even a `curl --unix-socket … http://localhost/` returns HTTP 000. webveil's
|
|
66
|
+
`unix:` baseUrl speaks **HTTP over a unix socket** via undici, so it CANNOT reach that
|
|
67
|
+
default uwsgi socket directly. Three ways to reach the install-script instance:
|
|
68
|
+
- **Point webveil straight at an HTTP unix socket** (no proxy, no extra process), once the
|
|
69
|
+
socket actually speaks HTTP. The install-script default does NOT, so first make uWSGI
|
|
70
|
+
serve HTTP on the socket: in the generated `.ini`, replace
|
|
71
|
+
`socket = /usr/local/searxng/run/socket` with
|
|
72
|
+
`http-socket = /usr/local/searxng/run/socket` (HTTP over the socket instead of the
|
|
73
|
+
uwsgi protocol). THEN point webveil at it with a `unix:` URL naming the socket file:
|
|
74
|
+
```sh
|
|
75
|
+
export WEBVEIL_BASE_URL=unix:/usr/local/searxng/run/socket
|
|
76
|
+
```
|
|
77
|
+
webveil dials the socket directly over undici (`Agent({connect:{socketPath}})`, no
|
|
78
|
+
extra dependency) and issues its normal `/search?...&format=json` request. The grammar
|
|
79
|
+
is `unix:<socketPath>[:<httpPath>]`: the socket file path, then an OPTIONAL `:` +
|
|
80
|
+
base path (mount point) the SearXNG app lives under (defaults to `/`, so the example
|
|
81
|
+
above requests `/search`; a non-root mount is `unix:/usr/local/searxng/run/socket:/searxng`).
|
|
82
|
+
(`unix:` works against ANY HTTP-on-a-unix-socket server, e.g. a Caddy/nginx upstream
|
|
83
|
+
bound to a socket; the uwsgi-vs-`http-socket` distinction above is the SearXNG-specific
|
|
84
|
+
catch.)
|
|
85
|
+
**Egress must be `direct`** for this: a Unix socket is inherently local, so combining a
|
|
86
|
+
`unix:` baseUrl with `egress=http`/`socks5` fails loud (proxying a local hop is fake
|
|
87
|
+
anonymity, see "Where does anonymity live?" below; proxy SearXNG's `outgoing.proxies`
|
|
88
|
+
instead and keep webveil `direct`).
|
|
89
|
+
- **Front it with a reverse proxy** (this is what the SearXNG docs' nginx/apache step is
|
|
90
|
+
for, it bridges HTTP-on-a-port to the uWSGI socket, serving BOTH the browser UI and
|
|
91
|
+
webveil). **Any HTTP server works**, the docs say so explicitly; **Caddy is fine** and
|
|
92
|
+
a good pick if you already run it. Plain Caddy `reverse_proxy` speaks **HTTP** to its
|
|
93
|
+
upstream, so point it at an `http-socket` (see below) or a TCP `http-socket`:
|
|
94
|
+
```caddy
|
|
95
|
+
searxng.example.com {
|
|
96
|
+
reverse_proxy unix//usr/local/searxng/run/socket # plain reverse_proxy = HTTP, so the socket must be http-socket = (not the uwsgi socket =)
|
|
97
|
+
}
|
|
98
|
+
```
|
|
99
|
+
Then point webveil at the Caddy address. (Set SearXNG's `server.base_url` in
|
|
100
|
+
`settings.yml` to match, and keep the limiter in mind, see below.) If you want a Caddy
|
|
101
|
+
frontend AND webveil-direct, the simplest path is ONE `http-socket` that both consume
|
|
102
|
+
(Caddy's HTTP `reverse_proxy` and webveil's `unix:` both speak HTTP to it); you only
|
|
103
|
+
need the uwsgi `socket = ` form if Caddy uses an explicit uwsgi transport.
|
|
104
|
+
- **Or make uWSGI listen on a TCP port** instead of the socket: in the generated
|
|
105
|
+
`.ini`, replace `socket = …/run/socket` with `http-socket = 127.0.0.1:8888`, then point
|
|
106
|
+
webveil at `http://127.0.0.1:8888`. Good when you want ONLY webveil (no public web UI /
|
|
107
|
+
TLS).
|
|
108
|
+
|
|
109
|
+
> **You will also need to enable the JSON API and (for a local instance) disable the
|
|
110
|
+
> limiter.** A fresh script install ships with `server.limiter: true` and often no `json`
|
|
111
|
+
> output format, so webveil gets `429 TOO MANY REQUESTS` or an HTML page. In SearXNG's
|
|
112
|
+
> `settings.yml` set `server.limiter: false` + `server.public_instance: false` (safe for a
|
|
113
|
+
> LOCAL, socket-only instance, NOT internet-exposed) and add `json` under `search.formats:`
|
|
114
|
+
> (`[html, json]`), then restart uWSGI. This applies to EVERY option above, it is a
|
|
115
|
+
> SearXNG-side requirement, not a webveil one.
|
|
116
|
+
|
|
117
|
+
Full SearXNG install options (Docker, Compose, script, bare-metal): the official docs at
|
|
118
|
+
<https://docs.searxng.org/admin/installation.html>. Install topology + the
|
|
119
|
+
uwsgi-vs-`http-socket`, limiter, and reverse-proxy details captured in
|
|
120
|
+
[`work/notes/findings/searxng-install-topology.md`](work/notes/findings/searxng-install-topology.md)
|
|
121
|
+
and
|
|
122
|
+
[`work/notes/findings/searxng-script-socket-is-uwsgi-not-http.md`](work/notes/findings/searxng-script-socket-is-uwsgi-not-http.md).
|
|
123
|
+
|
|
124
|
+
### Where does anonymity live? (read before turning on egress)
|
|
125
|
+
|
|
126
|
+
**webveil's egress only anonymizes webveil's OWN outbound hop** (webveil → backend, and
|
|
127
|
+
`web_fetch` → the target URL). It does NOT anonymize what a backend does next. This has a
|
|
128
|
+
load-bearing consequence for SearXNG:
|
|
129
|
+
|
|
130
|
+
- A **local** SearXNG makes its actual search-engine requests (→ Google/Bing/…) from
|
|
131
|
+
**its own process, on your machine, with your real IP**. That hop is OUTSIDE webveil's
|
|
132
|
+
egress. So setting `WEBVEIL_EGRESS=socks5` while `baseUrl` is `127.0.0.1` does **NOT**
|
|
133
|
+
make your searches anonymous, webveil would just be proxying a pointless localhost call,
|
|
134
|
+
while SearXNG crawls the web from your real IP. That is **false confidence**, the worst
|
|
135
|
+
outcome.
|
|
136
|
+
- **webveil refuses this combo (fail-loud):** a non-`direct` egress (`http`/`socks5`) with
|
|
137
|
+
a **loopback `baseUrl`** is rejected with an error, rather than silently giving you fake
|
|
138
|
+
anonymity. (A *remote* SearXNG over SOCKS is legitimate and allowed, the guard keys on
|
|
139
|
+
loopback specifically.)
|
|
140
|
+
|
|
141
|
+
So the correct setups:
|
|
142
|
+
|
|
143
|
+
| Goal | webveil egress | backend | Who anonymizes the web hop |
|
|
144
|
+
| --- | --- | --- | --- |
|
|
145
|
+
| Local SearXNG, anonymous searches | `direct` | local SearXNG | **SearXNG itself**, set its `outgoing.proxies` (Tor/SOCKS) in `settings.yml` |
|
|
146
|
+
| Remote SearXNG, hide your IP from it | `socks5` | the **remote** SearXNG url | webveil's hop (Mullvad/Tor) |
|
|
147
|
+
| Anonymous `web_fetch` of arbitrary URLs | `socks5` | (any) | webveil's hop |
|
|
148
|
+
| Non-anonymous everyday use | `direct` | local SearXNG | nobody (honest) |
|
|
149
|
+
|
|
150
|
+
Rule of thumb: **proxy the hop that actually reaches the public internet.** For a
|
|
151
|
+
self-hosted SearXNG that hop is SearXNG's, so the proxy goes on SearXNG
|
|
152
|
+
(`outgoing.proxies`), and webveil stays `direct`. webveil's `socks5` mode is for *remote*
|
|
153
|
+
backends and for `web_fetch`. See
|
|
154
|
+
[`work/notes/findings/webveil-anonymity-boundary.md`](work/notes/findings/webveil-anonymity-boundary.md).
|
|
155
|
+
|
|
156
|
+
## How it works (seams)
|
|
157
|
+
|
|
158
|
+
- **core**, the framework-agnostic `search(query, opts)` and `fetch(url, opts)` functions.
|
|
159
|
+
Both frontends call the same core.
|
|
160
|
+
- **backend seam**, where results/content come from: `searxng` (keyless self-hosted
|
|
161
|
+
metasearch), `tavily-compat` (a generic Tavily-shaped `/search` + `/extract`), and
|
|
162
|
+
`custom` (a local command via a JSON stdin/stdout contract). The backend is handed a
|
|
163
|
+
proxied `http` helper so it cannot bypass egress.
|
|
164
|
+
- **egress seam**, how outbound HTTP leaves the machine: `direct`, `http` (undici
|
|
165
|
+
`ProxyAgent`), or `socks5` (Tor `127.0.0.1:9050`, Mullvad `10.64.0.1:1080`). SOCKS5 is
|
|
166
|
+
the mode that matters for anonymity. Fail-loud if a configured proxy cannot be built.
|
|
167
|
+
**Egress is per-request and scoped to webveil ONLY**, it is NOT a system-wide proxy. It
|
|
168
|
+
governs webveil's own search/fetch traffic (and the `fetch` it injects into distilly),
|
|
169
|
+
and nothing else: your shell, `git push`, the browser, and the OS are untouched. So
|
|
170
|
+
webveil on `socks5` does NOT route your `git push` through the proxy. See
|
|
171
|
+
[Anonymous egress](#anonymous-egress-mullvad--tor) and
|
|
172
|
+
[`work/notes/findings/mullvad-socks5-egress-mechanics.md`](work/notes/findings/mullvad-socks5-egress-mechanics.md).
|
|
173
|
+
- **config seam**, per-folder resolution: env > nearest `.pi/webveil.json` walking up from
|
|
174
|
+
cwd > global `~/.pi/agent/webveil.json` > defaults. Per folder = per account/egress.
|
|
175
|
+
- **extractor seam**, `urlToMarkdown` via `distilly/fetch` by default, injected with
|
|
176
|
+
webveil's egress-bound `fetch`; a backend's own `/extract` (Tavily-compat) may override
|
|
177
|
+
it. Owns the context-friendly markdown + size presets (`s`/`m`/`l`/`f`). See
|
|
178
|
+
[`docs/adr/0001`](docs/adr/0001-extractor-uses-distilly-fetch-with-injected-egress.md).
|
|
179
|
+
- **security**, an SSRF guard lives in the egress fetch, so it covers distilly's
|
|
180
|
+
rule-rewritten requests too.
|
|
181
|
+
|
|
182
|
+
## Anonymous egress (Mullvad / Tor)
|
|
183
|
+
|
|
184
|
+
By default webveil uses `direct` egress (your real IP, non-anonymous). Anonymity is
|
|
185
|
+
**opt-in**: it is enabled ONLY when you set it in config/env. webveil never auto-enables a
|
|
186
|
+
proxy (silent anonymity would be a footgun in the other direction).
|
|
187
|
+
|
|
188
|
+
Enable SOCKS5 egress for webveil:
|
|
189
|
+
|
|
190
|
+
```sh
|
|
191
|
+
export WEBVEIL_EGRESS=socks5
|
|
192
|
+
export WEBVEIL_EGRESS_URL=socks5://10.64.0.1:1080 # Mullvad
|
|
193
|
+
# or socks5://127.0.0.1:9050 # Tor
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
or per folder in `.pi/webveil.json`:
|
|
197
|
+
|
|
198
|
+
```json
|
|
199
|
+
{ "egress": { "mode": "socks5", "url": "socks5://10.64.0.1:1080" } }
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Two layers keep your `git push` (and everything else) off the proxy
|
|
203
|
+
|
|
204
|
+
A common worry: "if I route through Mullvad, will my `git push` to GitHub leak under the
|
|
205
|
+
VPN exit IP?" With webveil, **no**, for two independent reasons:
|
|
206
|
+
|
|
207
|
+
1. **webveil's egress is per-request and webveil-only.** It applies the SOCKS5 dispatcher
|
|
208
|
+
inside its own search/fetch code; it does not install a system proxy. `git`, your shell,
|
|
209
|
+
and the OS are never touched. webveil on `socks5` proxies webveil's traffic and nothing
|
|
210
|
+
else.
|
|
211
|
+
2. **You configure split routing** (below) so that even at the OS level, only the proxy IP
|
|
212
|
+
goes through the tunnel.
|
|
213
|
+
|
|
214
|
+
### Mullvad: use the SOCKS5 proxy WITHOUT tunnelling all your traffic
|
|
215
|
+
|
|
216
|
+
Mullvad's SOCKS5 proxy at `10.64.0.1:1080` **only exists while a Mullvad WireGuard tunnel
|
|
217
|
+
is up** (it is reachable only through the tunnel). The trick is to keep the tunnel up but
|
|
218
|
+
tell WireGuard NOT to route your normal traffic through it, only the proxy IP. Add this to
|
|
219
|
+
your Mullvad WireGuard `.conf` (`[Interface]` section):
|
|
220
|
+
|
|
221
|
+
```ini
|
|
222
|
+
Table = off
|
|
223
|
+
PostUp = ip -4 route add 10.64.0.1/32 dev %i; ip -4 route add 10.124.0.0/22 dev %i
|
|
224
|
+
PreDown = ip -4 route delete 10.64.0.1/32 dev %i; ip -4 route delete 10.124.0.0/22 dev %i
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
`Table = off` stops WireGuard from grabbing the default route; the manual routes send ONLY
|
|
228
|
+
Mullvad's SOCKS5 proxy IPs through the tunnel (`10.124.0.0/22` is the multihop range).
|
|
229
|
+
Result: webveil's SOCKS5 requests exit via Mullvad; all other traffic (git, browser, OS)
|
|
230
|
+
uses your normal ISP connection. (Simpler alternative: leave WireGuard's routing alone and
|
|
231
|
+
rely on layer 1, but split routing is the belt-and-braces version.)
|
|
232
|
+
|
|
233
|
+
Verify the proxy works: `curl https://ipv4.am.i.mullvad.net --socks5-hostname 10.64.0.1`
|
|
234
|
+
should return a Mullvad exit IP; a plain `curl https://am.i.mullvad.net` should return your
|
|
235
|
+
real IP (proving only the proxy is tunnelled).
|
|
236
|
+
|
|
237
|
+
### "Different exit identity for webveil than for the rest of the machine"
|
|
238
|
+
|
|
239
|
+
If you want webveil to exit somewhere different from your system, you have options, but be
|
|
240
|
+
clear on what is and isn't possible (see
|
|
241
|
+
[`work/notes/findings/mullvad-socks5-egress-mechanics.md`](work/notes/findings/mullvad-socks5-egress-mechanics.md)):
|
|
242
|
+
|
|
243
|
+
- **Different exit LOCATION, same account (easy).** Point webveil at a specific multihop
|
|
244
|
+
SOCKS5 host so it exits elsewhere than your tunnel's entry:
|
|
245
|
+
`WEBVEIL_EGRESS_URL=socks5://us-nyc-wg-socks5-001.relays.mullvad.net:1080`. Your tunnel
|
|
246
|
+
enters where your Mullvad app is connected; webveil's traffic exits in NYC. Same Mullvad
|
|
247
|
+
account, unlinkable-by-location.
|
|
248
|
+
- **Two DIFFERENT Mullvad ACCOUNTS at once (hard, not a webveil feature).** Mullvad's
|
|
249
|
+
SOCKS5 proxy is a property of the ONE active WireGuard tunnel, which is tied to ONE
|
|
250
|
+
account's key. SOCKS5 multihop changes exit location, NOT account. To run account A
|
|
251
|
+
system-wide AND account B for webveil simultaneously, you must isolate them at the OS
|
|
252
|
+
level: run webveil inside its own network namespace / VM / container that has its own
|
|
253
|
+
WireGuard tunnel on account B, while the host runs account A. That is infrastructure work
|
|
254
|
+
outside webveil. For most people, "don't link my searches to my git" is already solved by
|
|
255
|
+
split routing above (searches exit via Mullvad, git stays on your real IP, not correlated
|
|
256
|
+
by exit IP), without needing a second account.
|
|
257
|
+
|
|
258
|
+
### Tor
|
|
259
|
+
|
|
260
|
+
`WEBVEIL_EGRESS_URL=socks5://127.0.0.1:9050` with the Tor daemon running. Same per-request,
|
|
261
|
+
webveil-only scoping applies.
|
|
262
|
+
|
|
263
|
+
> **Caveat:** webveil's `socks5` mode is NOT a whole-machine VPN. Do not assume enabling it
|
|
264
|
+
> anonymizes anything other than webveil. Conversely, a system-wide full-tunnel VPN under
|
|
265
|
+
> your logged-in identity is the thing that CAN deanonymize a `git push`; webveil's scoped
|
|
266
|
+
> egress deliberately avoids that.
|
|
267
|
+
|
|
268
|
+
## License
|
|
269
|
+
|
|
270
|
+
AGPL-3.0-or-later. webveil depends on `distilly` (MIT, the local HTML-to-markdown
|
|
271
|
+
extractor; webveil uses its networked `distilly/fetch` entrypoint with an injected egress
|
|
272
|
+
fetch) and `incur` (MIT). MIT code may be used by AGPL software; `distilly` stays
|
|
273
|
+
GPL/AGPL-free so it remains cleanly reusable under MIT. See [`LICENSE`](LICENSE) and
|
|
274
|
+
[`COPYRIGHT`](COPYRIGHT).
|
|
275
|
+
|
|
276
|
+
## Size discipline (per-module LOC)
|
|
277
|
+
|
|
278
|
+
Every module stays small with one responsibility. Per-module LOC is tracked here as a
|
|
279
|
+
first-class quality signal. `target` is the rough ceiling from `CONTEXT.md` (a ceiling, not
|
|
280
|
+
a promise); `LOC` is the actual line count of the built file.
|
|
281
|
+
|
|
282
|
+
### `packages/webveil` (core + CLI/MCP frontend)
|
|
283
|
+
|
|
284
|
+
| module | LOC | target |
|
|
285
|
+
| ---------------------------------- | ---: | -----: |
|
|
286
|
+
| src/index.ts (barrel) | 82 | - |
|
|
287
|
+
| src/cli.ts (incur frontend) | 106 | ~80 |
|
|
288
|
+
| src/core/search.ts | 104 | ~90 |
|
|
289
|
+
| src/core/fetch.ts | 132 | ~90 |
|
|
290
|
+
| src/core/config.ts | 106 | ~80 |
|
|
291
|
+
| src/core/egress.ts | 106 | ~70 |
|
|
292
|
+
| src/core/http.ts | 62 | ~60 |
|
|
293
|
+
| src/core/extract.ts | 82 | ~60 |
|
|
294
|
+
| src/core/security.ts (SSRF guard) | 141 | - |
|
|
295
|
+
| src/core/backends/types.ts | 61 | ~40 |
|
|
296
|
+
| src/core/backends/registry.ts | 41 | ~60 |
|
|
297
|
+
| src/core/backends/searxng.ts | 70 | ~90 |
|
|
298
|
+
| src/core/backends/tavily-compat.ts | 156 | ~90 |
|
|
299
|
+
| src/core/backends/custom.ts | 159 | ~70 |
|
|
300
|
+
| **subtotal** | 1408 | |
|
|
301
|
+
|
|
302
|
+
### `packages/pi-webveil` (pi extension frontend)
|
|
303
|
+
|
|
304
|
+
| module | LOC | target |
|
|
305
|
+
| ------------ | --: | -----: |
|
|
306
|
+
| src/index.ts | 168 | ~90 |
|
|
307
|
+
|
|
308
|
+
**Total own source: 1576 LOC** (excluding deps).
|
|
309
|
+
|
|
310
|
+
> Reality vs. target: several modules currently exceed their `CONTEXT.md` ceilings (notably
|
|
311
|
+
> `tavily-compat.ts`, `custom.ts`, `pi-webveil/src/index.ts`), and two built modules
|
|
312
|
+
> (`index.ts` barrel and `security.ts` SSRF guard) were not in the original target list. The
|
|
313
|
+
> table above reflects the modules as actually built. For calibration, comparable pi
|
|
314
|
+
> web-search extensions: `pi-searxng-search` 350 LOC (1 backend, no egress, no fetch),
|
|
315
|
+
> `leing2021/pi-search` 1714, `pi-search-hub` 9047, `pi-web-providers` 18961. webveil
|
|
316
|
+
> delivers a 3-backend + egress + fetch + per-folder-config tool by leaning on `incur`
|
|
317
|
+
> (CLI/MCP/skills) and `distilly` (extraction).
|
|
318
|
+
|
|
319
|
+
## Develop
|
|
320
|
+
|
|
321
|
+
```sh
|
|
322
|
+
pnpm install
|
|
323
|
+
pnpm build
|
|
324
|
+
pnpm test
|
|
325
|
+
pnpm format:check
|
|
326
|
+
```
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Cli } from 'incur';
|
|
3
|
+
import { search as coreSearch } from './core/search.js';
|
|
4
|
+
import { fetch as coreFetch } from './core/fetch.js';
|
|
5
|
+
/**
|
|
6
|
+
* The two core functions the frontend wraps, seamed so tests can inject fakes.
|
|
7
|
+
* Defaults are the real core; a test passes spies to assert the wiring.
|
|
8
|
+
*/
|
|
9
|
+
export interface CliDeps {
|
|
10
|
+
search?: typeof coreSearch;
|
|
11
|
+
fetch?: typeof coreFetch;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Build the webveil CLI. Returns the incur `Cli` so a caller (the bin below, or
|
|
15
|
+
* a test) decides how to serve it. The `search`/`fetch` commands forward to the
|
|
16
|
+
* injected core, normalizing nothing themselves — the core already deduped,
|
|
17
|
+
* clamped, and size-bounded.
|
|
18
|
+
*/
|
|
19
|
+
export declare function createCli(deps?: CliDeps): Cli.Cli<{
|
|
20
|
+
search: {
|
|
21
|
+
args: {
|
|
22
|
+
query: string;
|
|
23
|
+
};
|
|
24
|
+
options: {
|
|
25
|
+
maxResults?: number | undefined;
|
|
26
|
+
};
|
|
27
|
+
};
|
|
28
|
+
} & {
|
|
29
|
+
fetch: {
|
|
30
|
+
args: {
|
|
31
|
+
url: string;
|
|
32
|
+
};
|
|
33
|
+
options: {
|
|
34
|
+
size?: "s" | "m" | "l" | "f" | undefined;
|
|
35
|
+
};
|
|
36
|
+
};
|
|
37
|
+
}, undefined, undefined, undefined>;
|
|
38
|
+
declare const cli: Cli.Cli<{
|
|
39
|
+
search: {
|
|
40
|
+
args: {
|
|
41
|
+
query: string;
|
|
42
|
+
};
|
|
43
|
+
options: {
|
|
44
|
+
maxResults?: number | undefined;
|
|
45
|
+
};
|
|
46
|
+
};
|
|
47
|
+
} & {
|
|
48
|
+
fetch: {
|
|
49
|
+
args: {
|
|
50
|
+
url: string;
|
|
51
|
+
};
|
|
52
|
+
options: {
|
|
53
|
+
size?: "s" | "m" | "l" | "f" | undefined;
|
|
54
|
+
};
|
|
55
|
+
};
|
|
56
|
+
}, undefined, undefined, undefined>;
|
|
57
|
+
export default cli;
|
|
58
|
+
//# sourceMappingURL=cli.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAmBA,OAAO,EAAC,GAAG,EAAI,MAAM,OAAO,CAAC;AAC7B,OAAO,EAAC,MAAM,IAAI,UAAU,EAAC,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAC,KAAK,IAAI,SAAS,EAAC,MAAM,iBAAiB,CAAC;AAEnD;;;GAGG;AACH,MAAM,WAAW,OAAO;IACvB,MAAM,CAAC,EAAE,OAAO,UAAU,CAAC;IAC3B,KAAK,CAAC,EAAE,OAAO,SAAS,CAAC;CACzB;AAKD;;;;;GAKG;AACH,wBAAgB,SAAS,CAAC,IAAI,GAAE,OAAY;;;;;;;;;;;;;;;;;;oCA4C3C;AAKD,QAAA,MAAM,GAAG;;;;;;;;;;;;;;;;;;mCAAc,CAAC;AAexB,eAAe,GAAG,CAAC"}
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// webveil — the incur-based CLI + MCP frontend. ONE `Cli.create()` definition
|
|
3
|
+
// yields the CLI, an MCP server (`--mcp`), skills (`skills add`), a `--llms`
|
|
4
|
+
// manifest, TOON output, and token pagination for free (incur). Pi-agnostic:
|
|
5
|
+
// any agent (pi via pi-mcp-adapter, Claude Code, Cursor, Codex, bash) consumes
|
|
6
|
+
// it the same way. The `webveil` bin points at the built `dist/cli.js`.
|
|
7
|
+
//
|
|
8
|
+
// This is the THIN frontend: each command only parses argv/options and calls
|
|
9
|
+
// the SAME framework-agnostic core (`search()` / `fetch()`) the pi extension
|
|
10
|
+
// calls. The core owns config/egress/backend/extraction; this file owns no
|
|
11
|
+
// network logic of its own.
|
|
12
|
+
//
|
|
13
|
+
// Testability: `createCli(deps)` takes the core functions as injectable deps so
|
|
14
|
+
// a test wires fakes and asserts the commands call the core (via `cli.serve`
|
|
15
|
+
// with custom argv/stdout) WITHOUT touching the network. The bottom of the file
|
|
16
|
+
// builds the real CLI and serves it when run as the bin.
|
|
17
|
+
import { argv } from 'node:process';
|
|
18
|
+
import { fileURLToPath } from 'node:url';
|
|
19
|
+
import { Cli, z } from 'incur';
|
|
20
|
+
import { search as coreSearch } from './core/search.js';
|
|
21
|
+
import { fetch as coreFetch } from './core/fetch.js';
|
|
22
|
+
/** The size presets `fetch` accepts, mirroring the core's `FetchSize`. */
|
|
23
|
+
const SIZES = ['s', 'm', 'l', 'f'];
|
|
24
|
+
/**
|
|
25
|
+
* Build the webveil CLI. Returns the incur `Cli` so a caller (the bin below, or
|
|
26
|
+
* a test) decides how to serve it. The `search`/`fetch` commands forward to the
|
|
27
|
+
* injected core, normalizing nothing themselves — the core already deduped,
|
|
28
|
+
* clamped, and size-bounded.
|
|
29
|
+
*/
|
|
30
|
+
export function createCli(deps = {}) {
|
|
31
|
+
const search = deps.search ?? coreSearch;
|
|
32
|
+
const fetch = deps.fetch ?? coreFetch;
|
|
33
|
+
return Cli.create('webveil', {
|
|
34
|
+
description: 'Anonymous-capable, self-hosted, account-free web search + fetch for agents.',
|
|
35
|
+
})
|
|
36
|
+
.command('search', {
|
|
37
|
+
description: 'Search the web via the configured backend and egress.',
|
|
38
|
+
args: z.object({
|
|
39
|
+
query: z.string().describe('The search query'),
|
|
40
|
+
}),
|
|
41
|
+
options: z.object({
|
|
42
|
+
maxResults: z.coerce
|
|
43
|
+
.number()
|
|
44
|
+
.optional()
|
|
45
|
+
.describe('Maximum number of results to return'),
|
|
46
|
+
}),
|
|
47
|
+
alias: { maxResults: 'n' },
|
|
48
|
+
async run(c) {
|
|
49
|
+
const results = await search(c.args.query, {
|
|
50
|
+
maxResults: c.options.maxResults,
|
|
51
|
+
});
|
|
52
|
+
return { results };
|
|
53
|
+
},
|
|
54
|
+
})
|
|
55
|
+
.command('fetch', {
|
|
56
|
+
description: 'Fetch a URL as clean, size-bounded markdown via the configured egress.',
|
|
57
|
+
args: z.object({
|
|
58
|
+
url: z.string().describe('The URL to fetch'),
|
|
59
|
+
}),
|
|
60
|
+
options: z.object({
|
|
61
|
+
size: z
|
|
62
|
+
.enum(SIZES)
|
|
63
|
+
.optional()
|
|
64
|
+
.describe('Page-size budget preset: s | m | l | f'),
|
|
65
|
+
}),
|
|
66
|
+
alias: { size: 's' },
|
|
67
|
+
async run(c) {
|
|
68
|
+
return fetch(c.args.url, { size: c.options.size });
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
// The real CLI (also `export default` so `incur gen` can import it for typed
|
|
73
|
+
// CTAs). Serving is GUARDED to the bin entry below, so importing this module in
|
|
74
|
+
// a test never consumes `process.argv` or exits the process.
|
|
75
|
+
const cli = createCli();
|
|
76
|
+
/** True when this module is the process entry (the `webveil` bin), not imported. */
|
|
77
|
+
function isMain() {
|
|
78
|
+
const entry = argv[1];
|
|
79
|
+
if (!entry)
|
|
80
|
+
return false;
|
|
81
|
+
try {
|
|
82
|
+
return fileURLToPath(import.meta.url) === entry;
|
|
83
|
+
}
|
|
84
|
+
catch {
|
|
85
|
+
return false;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
if (isMain())
|
|
89
|
+
cli.serve();
|
|
90
|
+
export default cli;
|
|
91
|
+
//# sourceMappingURL=cli.js.map
|
package/dist/cli.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,8EAA8E;AAC9E,6EAA6E;AAC7E,6EAA6E;AAC7E,+EAA+E;AAC/E,wEAAwE;AACxE,EAAE;AACF,6EAA6E;AAC7E,6EAA6E;AAC7E,2EAA2E;AAC3E,4BAA4B;AAC5B,EAAE;AACF,gFAAgF;AAChF,6EAA6E;AAC7E,gFAAgF;AAChF,yDAAyD;AAEzD,OAAO,EAAC,IAAI,EAAC,MAAM,cAAc,CAAC;AAClC,OAAO,EAAC,aAAa,EAAC,MAAM,UAAU,CAAC;AACvC,OAAO,EAAC,GAAG,EAAE,CAAC,EAAC,MAAM,OAAO,CAAC;AAC7B,OAAO,EAAC,MAAM,IAAI,UAAU,EAAC,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAC,KAAK,IAAI,SAAS,EAAC,MAAM,iBAAiB,CAAC;AAWnD,0EAA0E;AAC1E,MAAM,KAAK,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAU,CAAC;AAE5C;;;;;GAKG;AACH,MAAM,UAAU,SAAS,CAAC,OAAgB,EAAE;IAC3C,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,UAAU,CAAC;IACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,SAAS,CAAC;IAEtC,OAAO,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE;QAC5B,WAAW,EACV,6EAA6E;KAC9E,CAAC;SACA,OAAO,CAAC,QAAQ,EAAE;QAClB,WAAW,EAAE,uDAAuD;QACpE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;YACd,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,kBAAkB,CAAC;SAC9C,CAAC;QACF,OAAO,EAAE,CAAC,CAAC,MAAM,CAAC;YACjB,UAAU,EAAE,CAAC,CAAC,MAAM;iBAClB,MAAM,EAAE;iBACR,QAAQ,EAAE;iBACV,QAAQ,CAAC,qCAAqC,CAAC;SACjD,CAAC;QACF,KAAK,EAAE,EAAC,UAAU,EAAE,GAAG,EAAC;QACxB,KAAK,CAAC,GAAG,CAAC,CAAC;YACV,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE;gBAC1C,UAAU,EAAE,CAAC,CAAC,OAAO,CAAC,UAAU;aAChC,CAAC,CAAC;YACH,OAAO,EAAC,OAAO,EAAC,CAAC;QAClB,CAAC;KACD,CAAC;SACD,OAAO,CAAC,OAAO,EAAE;QACjB,WAAW,EACV,wEAAwE;QACzE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;YACd,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,kBAAkB,CAAC;SAC5C,CAAC;QACF,OAAO,EAAE,CAAC,CAAC,MAAM,CAAC;YACjB,IAAI,EAAE,CAAC;iBACL,IAAI,CAAC,KAAK,CAAC;iBACX,QAAQ,EAAE;iBACV,QAAQ,CAAC,wCAAwC,CAAC;SACpD,CAAC;QACF,KAAK,EAAE,EAAC,IAAI,EAAE,GAAG,EAAC;QAClB,KAAK,CAAC,GAAG,CAAC,CAAC;YACV,OAAO,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,EAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAC,CAAC,CAAC;QAClD,CAAC;KACD,CAAC,CAAC;AACL,CAAC;AAED,6EAA6E;AAC7E,gFAAgF;AAChF,6DAA6D;AAC7D,MAAM,GAAG,GAAG,SAAS,EAAE,CAAC;AAExB,oFAAoF;AACpF,SAAS,MAAM;IACd,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACtB,IAAI,CAAC,KAAK;QAAE,OAAO,KAAK,CAAC;IACzB,IAAI,CAAC;QACJ,OAAO,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC;IACjD,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,KAAK,CAAC;IACd,CAAC;AACF,CAAC;AAED,IAAI,MAAM,EAAE;IAAE,GAAG,CAAC,KAAK,EAAE,CAAC;AAE1B,eAAe,GAAG,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { spawn as defaultSpawn } from 'node:child_process';
|
|
2
|
+
import type { Config } from '../config.js';
|
|
3
|
+
import type { Backend } from './types.js';
|
|
4
|
+
/**
|
|
5
|
+
* Minimal `spawn` shape this backend needs, seamed so a test can inject a fake
|
|
6
|
+
* without a real subprocess. Defaults to `node:child_process` `spawn`.
|
|
7
|
+
*/
|
|
8
|
+
export type SpawnFn = typeof defaultSpawn;
|
|
9
|
+
/**
|
|
10
|
+
* Build a custom backend bound to the configured command. The command owns its
|
|
11
|
+
* own I/O; webveil hands it the request as JSON on stdin and parses
|
|
12
|
+
* SearchResult[] from stdout, failing clearly on malformed output.
|
|
13
|
+
*/
|
|
14
|
+
export declare function createCustomBackend(config: Config, spawn?: SpawnFn): Backend;
|
|
15
|
+
//# sourceMappingURL=custom.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"custom.d.ts","sourceRoot":"","sources":["../../../src/core/backends/custom.ts"],"names":[],"mappings":"AAoBA,OAAO,EAAC,KAAK,IAAI,YAAY,EAAC,MAAM,oBAAoB,CAAC;AACzD,OAAO,KAAK,EAAC,MAAM,EAAC,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAC,OAAO,EAAoC,MAAM,YAAY,CAAC;AAe3E;;;GAGG;AACH,MAAM,MAAM,OAAO,GAAG,OAAO,YAAY,CAAC;AAsF1C;;;;GAIG;AACH,wBAAgB,mBAAmB,CAClC,MAAM,EAAE,MAAM,EACd,KAAK,GAAE,OAAsB,GAC3B,OAAO,CAuBT"}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
// custom backend — the local-command escape hatch (contract lifted from
|
|
2
|
+
// pi-web-providers' custom-wrapper). Instead of an HTTP source, it spawns a
|
|
3
|
+
// configured local command, writes the request as JSON to its stdin, and parses
|
|
4
|
+
// `SearchResult[]` from its stdout. This lets any local script be a backend.
|
|
5
|
+
//
|
|
6
|
+
// Egress note: this backend owns its own I/O (the spawned command does whatever
|
|
7
|
+
// it wants), so the handed `http` helper is unused here — there is no outbound
|
|
8
|
+
// HTTP for webveil to proxy. It still returns the normalized SearchResult shape.
|
|
9
|
+
//
|
|
10
|
+
// Command source: the configured `baseUrl` carries the command line, parsed as a
|
|
11
|
+
// whitespace-separated argv (first token = executable, rest = args), matching how
|
|
12
|
+
// the other backends read `baseUrl` as "where results come from". (Recorded
|
|
13
|
+
// decision; see the task's Decisions block.)
|
|
14
|
+
//
|
|
15
|
+
// Contract:
|
|
16
|
+
// stdin <- JSON: {"query": string, "maxResults"?: number}
|
|
17
|
+
// stdout -> JSON: SearchResult[] (each {title, url, snippet?})
|
|
18
|
+
// Malformed stdout (non-JSON, not an array, or entries missing url/title) FAILS
|
|
19
|
+
// CLEARLY — it never silently returns an empty list.
|
|
20
|
+
import { spawn as defaultSpawn } from 'node:child_process';
|
|
21
|
+
function str(value) {
|
|
22
|
+
return typeof value === 'string' && value.length > 0 ? value : undefined;
|
|
23
|
+
}
|
|
24
|
+
/** Parse the configured command line into [executable, ...args]. */
|
|
25
|
+
function parseCommand(baseUrl) {
|
|
26
|
+
const parts = baseUrl.trim().split(/\s+/).filter(Boolean);
|
|
27
|
+
if (parts.length === 0)
|
|
28
|
+
throw new Error('custom: no command configured (set baseUrl to the command to run)');
|
|
29
|
+
return [parts[0], parts.slice(1)];
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Normalize one stdout entry into a SearchResult, FAILING CLEARLY on a malformed
|
|
33
|
+
* entry rather than dropping it — the custom contract is explicit, so a missing
|
|
34
|
+
* url/title is a contract violation the user should see, not a silent skip.
|
|
35
|
+
*/
|
|
36
|
+
function toResult(entry, index) {
|
|
37
|
+
if (typeof entry !== 'object' || entry === null)
|
|
38
|
+
throw new Error(`custom: malformed output — result[${index}] is not an object`);
|
|
39
|
+
const hit = entry;
|
|
40
|
+
const url = str(hit.url);
|
|
41
|
+
const title = str(hit.title);
|
|
42
|
+
if (!url || !title)
|
|
43
|
+
throw new Error(`custom: malformed output — result[${index}] is missing a url or title`);
|
|
44
|
+
const snippet = str(hit.snippet);
|
|
45
|
+
return snippet ? { title, url, snippet } : { title, url };
|
|
46
|
+
}
|
|
47
|
+
/** Parse the command's stdout into SearchResult[], failing clearly on garbage. */
|
|
48
|
+
function parseOutput(stdout) {
|
|
49
|
+
const trimmed = stdout.trim();
|
|
50
|
+
if (trimmed.length === 0)
|
|
51
|
+
throw new Error('custom: command produced no output');
|
|
52
|
+
let parsed;
|
|
53
|
+
try {
|
|
54
|
+
parsed = JSON.parse(trimmed);
|
|
55
|
+
}
|
|
56
|
+
catch (cause) {
|
|
57
|
+
throw new Error(`custom: malformed output — stdout is not valid JSON: ${cause.message}`);
|
|
58
|
+
}
|
|
59
|
+
if (!Array.isArray(parsed))
|
|
60
|
+
throw new Error('custom: malformed output — expected a JSON array of results');
|
|
61
|
+
return parsed.map(toResult);
|
|
62
|
+
}
|
|
63
|
+
/** Spawn the command, write the request to stdin, and collect stdout/stderr. */
|
|
64
|
+
function runCommand(spawn, exe, args, request, signal) {
|
|
65
|
+
return new Promise((resolve, reject) => {
|
|
66
|
+
const child = spawn(exe, args, {
|
|
67
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
68
|
+
signal,
|
|
69
|
+
});
|
|
70
|
+
let stdout = '';
|
|
71
|
+
let stderr = '';
|
|
72
|
+
child.stdout?.on('data', (chunk) => (stdout += String(chunk)));
|
|
73
|
+
child.stderr?.on('data', (chunk) => (stderr += String(chunk)));
|
|
74
|
+
child.on('error', (err) => reject(new Error(`custom: failed to spawn '${exe}': ${err.message}`)));
|
|
75
|
+
child.on('close', (code) => resolve({ stdout, stderr, code }));
|
|
76
|
+
child.stdin?.on('error', () => {
|
|
77
|
+
// A command that exits before reading stdin closes the pipe; ignore the
|
|
78
|
+
// EPIPE here and let the close handler report via exit code/stderr.
|
|
79
|
+
});
|
|
80
|
+
child.stdin?.end(JSON.stringify(request));
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Build a custom backend bound to the configured command. The command owns its
|
|
85
|
+
* own I/O; webveil hands it the request as JSON on stdin and parses
|
|
86
|
+
* SearchResult[] from stdout, failing clearly on malformed output.
|
|
87
|
+
*/
|
|
88
|
+
export function createCustomBackend(config, spawn = defaultSpawn) {
|
|
89
|
+
const [exe, args] = parseCommand(config.baseUrl);
|
|
90
|
+
return {
|
|
91
|
+
async search(query, _http, options = {}) {
|
|
92
|
+
const request = { query };
|
|
93
|
+
if (options.maxResults !== undefined)
|
|
94
|
+
request.maxResults = options.maxResults;
|
|
95
|
+
const run = await runCommand(spawn, exe, args, request, options.signal);
|
|
96
|
+
if (run.code !== 0)
|
|
97
|
+
throw new Error(`custom: command '${exe}' exited with code ${run.code}` +
|
|
98
|
+
(run.stderr.trim() ? `: ${run.stderr.trim()}` : ''));
|
|
99
|
+
const results = parseOutput(run.stdout);
|
|
100
|
+
return options.maxResults !== undefined
|
|
101
|
+
? results.slice(0, options.maxResults)
|
|
102
|
+
: results;
|
|
103
|
+
},
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
//# sourceMappingURL=custom.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"custom.js","sourceRoot":"","sources":["../../../src/core/backends/custom.ts"],"names":[],"mappings":"AAAA,wEAAwE;AACxE,4EAA4E;AAC5E,gFAAgF;AAChF,6EAA6E;AAC7E,EAAE;AACF,gFAAgF;AAChF,+EAA+E;AAC/E,iFAAiF;AACjF,EAAE;AACF,iFAAiF;AACjF,kFAAkF;AAClF,4EAA4E;AAC5E,6CAA6C;AAC7C,EAAE;AACF,YAAY;AACZ,6DAA6D;AAC7D,kEAAkE;AAClE,gFAAgF;AAChF,qDAAqD;AAErD,OAAO,EAAC,KAAK,IAAI,YAAY,EAAC,MAAM,oBAAoB,CAAC;AAuBzD,SAAS,GAAG,CAAC,KAAc;IAC1B,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;AAC1E,CAAC;AAED,oEAAoE;AACpE,SAAS,YAAY,CAAC,OAAe;IACpC,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC1D,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QACrB,MAAM,IAAI,KAAK,CACd,mEAAmE,CACnE,CAAC;IACH,OAAO,CAAC,KAAK,CAAC,CAAC,CAAE,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;AACpC,CAAC;AAED;;;;GAIG;AACH,SAAS,QAAQ,CAAC,KAAc,EAAE,KAAa;IAC9C,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI;QAC9C,MAAM,IAAI,KAAK,CACd,qCAAqC,KAAK,oBAAoB,CAC9D,CAAC;IACH,MAAM,GAAG,GAAG,KAAgC,CAAC;IAC7C,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACzB,MAAM,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAC7B,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK;QACjB,MAAM,IAAI,KAAK,CACd,qCAAqC,KAAK,6BAA6B,CACvE,CAAC;IACH,MAAM,OAAO,GAAG,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACjC,OAAO,OAAO,CAAC,CAAC,CAAC,EAAC,KAAK,EAAE,GAAG,EAAE,OAAO,EAAC,CAAC,CAAC,CAAC,EAAC,KAAK,EAAE,GAAG,EAAC,CAAC;AACvD,CAAC;AAED,kFAAkF;AAClF,SAAS,WAAW,CAAC,MAAc;IAClC,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC;IAC9B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QACvB,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;IACvD,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACJ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC9B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,MAAM,IAAI,KAAK,CACd,wDAAyD,KAAe,CAAC,OAAO,EAAE,CAClF,CAAC;IACH,CAAC;IACD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC;QACzB,MAAM,IAAI,KAAK,CACd,6DAA6D,CAC7D,CAAC;IACH,OAAO,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;AAC7B,CAAC;AAED,gFAAgF;AAChF,SAAS,UAAU,CAClB,KAAc,EACd,GAAW,EACX,IAAc,EACd,OAAsB,EACtB,MAAoB;IAEpB,OAAO,IAAI,OAAO,CAAa,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QAClD,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE;YAC9B,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;YAC/B,MAAM;SACN,CAAC,CAAC;QACH,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC/D,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC/D,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE,CACzB,MAAM,CAAC,IAAI,KAAK,CAAC,4BAA4B,GAAG,MAAM,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CACrE,CAAC;QACF,KAAK,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,EAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAC,CAAC,CAAC,CAAC;QAC7D,KAAK,CAAC,KAAK,EAAE,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YAC7B,wEAAwE;YACxE,oEAAoE;QACrE,CAAC,CAAC,CAAC;QACH,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,mBAAmB,CAClC,MAAc,EACd,QAAiB,YAAY;IAE7B,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACjD,OAAO;QACN,KAAK,CAAC,MAAM,CACX,KAAa,EACb,KAAW,EACX,UAAyB,EAAE;YAE3B,MAAM,OAAO,GAAkB,EAAC,KAAK,EAAC,CAAC;YACvC,IAAI,OAAO,CAAC,UAAU,KAAK,SAAS;gBACnC,OAAO,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;YACzC,MAAM,GAAG,GAAG,MAAM,UAAU,CAAC,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YACxE,IAAI,GAAG,CAAC,IAAI,KAAK,CAAC;gBACjB,MAAM,IAAI,KAAK,CACd,oBAAoB,GAAG,sBAAsB,GAAG,CAAC,IAAI,EAAE;oBACtD,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CACpD,CAAC;YACH,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACxC,OAAO,OAAO,CAAC,UAAU,KAAK,SAAS;gBACtC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC;gBACtC,CAAC,CAAC,OAAO,CAAC;QACZ,CAAC;KACD,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { Config } from '../config.js';
|
|
2
|
+
import type { Backend } from './types.js';
|
|
3
|
+
/** Builds a Backend from the resolved config (knows its baseUrl / apiKey). */
|
|
4
|
+
export type BackendFactory = (config: Config) => Backend;
|
|
5
|
+
/** The backend names the registry can resolve. */
|
|
6
|
+
export declare function backendNames(): string[];
|
|
7
|
+
/**
|
|
8
|
+
* Resolve a backend name to a constructed Backend. Throws clearly on an unknown
|
|
9
|
+
* name (listing the known ones) so a misconfigured `backend` fails loud, never
|
|
10
|
+
* silently no-ops.
|
|
11
|
+
*/
|
|
12
|
+
export declare function getBackend(name: string, config: Config): Backend;
|
|
13
|
+
//# sourceMappingURL=registry.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../../src/core/backends/registry.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAC,MAAM,EAAC,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAC,OAAO,EAAC,MAAM,YAAY,CAAC;AAKxC,8EAA8E;AAC9E,MAAM,MAAM,cAAc,GAAG,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC;AASzD,kDAAkD;AAClD,wBAAgB,YAAY,IAAI,MAAM,EAAE,CAEvC;AAED;;;;GAIG;AACH,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO,CAOhE"}
|