pi-web-scout 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 pi-web-scout contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,126 @@
1
+ # pi-web-scout
2
+
3
+ No-key web search extension for [Pi](https://pi.dev). It registers a `web_search` tool and starts with providers that do not require API keys.
4
+
5
+ ## Goals
6
+
7
+ - No install lifecycle scripts.
8
+ - No native dependencies.
9
+ - No shell execution.
10
+ - No credential command execution.
11
+ - Provider architecture ready for future keyed APIs such as Brave, Serper, Tavily, Exa, etc.
12
+
13
+ ## Try locally
14
+
15
+ ```bash
16
+ pi -e ./pi-web-scout
17
+ ```
18
+
19
+ ## Commands
20
+
21
+ ```text
22
+ /web-scout-status
23
+ ```
24
+
25
+ Shows active config and provider status.
26
+
27
+ ## Tool
28
+
29
+ `web_search`
30
+
31
+ ```json
32
+ {
33
+ "query": "latest TypeScript release notes",
34
+ "max_results": 5,
35
+ "provider": "auto",
36
+ "mode": "first_success"
37
+ }
38
+ ```
39
+
40
+ `web_read`
41
+
42
+ ```json
43
+ {
44
+ "url": "https://example.com/docs",
45
+ "max_chars": 12000
46
+ }
47
+ ```
48
+
49
+ `web_read` fetches public HTTP(S) pages, follows validated redirects, blocks localhost/private/metadata IPs, strips noisy HTML, and returns readable text. It does not run JavaScript or launch a browser.
50
+
51
+ Recommended model flow: call `web_search` first, choose relevant result URLs, then call `web_read` on the best sources instead of relying on snippets alone.
52
+
53
+ Current no-key providers:
54
+
55
+ - `duckduckgo` — DuckDuckGo HTML endpoint.
56
+ - `marginalia` — Marginalia Search public endpoint.
57
+ - `jina` — Jina Search endpoint without an API key.
58
+
59
+ Current keyed providers, disabled by default:
60
+
61
+ - `brave` — Brave Search API, env-only key resolution.
62
+
63
+ Planned keyed providers:
64
+
65
+ - Serper, Tavily, Exa, etc.
66
+
67
+ ## Config
68
+
69
+ Optional project config:
70
+
71
+ `.pi/pi-web-scout.json`
72
+
73
+ ```json
74
+ {
75
+ "enabled": true,
76
+ "defaultProvider": "auto",
77
+ "fallbackChain": ["duckduckgo", "marginalia", "jina"],
78
+ "maxResults": 5,
79
+ "providers": {
80
+ "duckduckgo": { "enabled": true },
81
+ "marginalia": { "enabled": true },
82
+ "jina": { "enabled": true },
83
+ "brave": { "enabled": false, "apiKeyEnv": "PI_WEB_SCOUT_BRAVE_API_KEY" }
84
+ }
85
+ }
86
+ ```
87
+
88
+ The extension currently reads project config only and does not write config files. Invalid config fails fast with a clear error.
89
+
90
+ `mode: "combine"` queries all enabled providers in the fallback chain, deduplicates URLs, and ranks repeated results higher with a simple reciprocal-rank score.
91
+
92
+ ## Security notes
93
+
94
+ This package intentionally avoids:
95
+
96
+ - `postinstall`, `preinstall`, `prepare`
97
+ - `child_process`
98
+ - `eval` / `new Function`
99
+ - shell-based credential resolution
100
+ - writes outside the project
101
+ - browser automation / JavaScript execution
102
+
103
+ Search queries are sent to the selected provider.
104
+
105
+ ## Brave API key
106
+
107
+ Brave keys are available from the Brave Search API dashboard:
108
+
109
+ <https://api.search.brave.com/app/keys>
110
+
111
+ After creating a key, export it before starting Pi:
112
+
113
+ ```bash
114
+ export PI_WEB_SCOUT_BRAVE_API_KEY="..."
115
+ ```
116
+
117
+ Then enable Brave in `.pi/pi-web-scout.json`:
118
+
119
+ ```json
120
+ {
121
+ "fallbackChain": ["brave", "duckduckgo", "marginalia", "jina"],
122
+ "providers": {
123
+ "brave": { "enabled": true, "apiKeyEnv": "PI_WEB_SCOUT_BRAVE_API_KEY" }
124
+ }
125
+ }
126
+ ```
package/SECURITY.md ADDED
@@ -0,0 +1,49 @@
1
+ # Security Policy
2
+
3
+ `pi-web-scout` is a Pi extension. Pi extensions run with the user's local permissions, so review changes before installing or publishing.
4
+
5
+ ## Design constraints
6
+
7
+ This package intentionally avoids:
8
+
9
+ - npm lifecycle install scripts (`preinstall`, `install`, `postinstall`, `prepare`)
10
+ - shell execution (`child_process`, `exec`, `spawn`)
11
+ - `eval` / `new Function`
12
+ - native runtime dependencies
13
+ - browser automation / JavaScript execution
14
+ - credential commands such as `!pass show ...`
15
+ - reading SSH keys, cookies, keychains, `~/.config`, `~/.codex`, or `~/.pi` secrets
16
+ - writing config/cache/state files at runtime
17
+
18
+ ## Network behavior
19
+
20
+ Tools send user-provided search/read requests to selected providers:
21
+
22
+ - DuckDuckGo HTML search
23
+ - Marginalia public search
24
+ - Jina search
25
+ - Brave Search API only when explicitly enabled and keyed via env var
26
+ - `web_read` fetches public HTTP(S) URLs supplied by the user/model
27
+
28
+ ## Credentials
29
+
30
+ Keyed providers use environment variables only. Literal keys in config and shell commands are unsupported by design.
31
+
32
+ Current keyed env var:
33
+
34
+ ```bash
35
+ PI_WEB_SCOUT_BRAVE_API_KEY
36
+ ```
37
+
38
+ ## SSRF notes
39
+
40
+ `web_read` blocks common local/private/metadata targets and validates every redirect hop. It does not perform custom DNS resolution, so DNS rebinding protection is best-effort.
41
+
42
+ ## Reporting
43
+
44
+ Before reporting a vulnerability, please include:
45
+
46
+ - package version / commit
47
+ - minimal reproduction
48
+ - affected tool or provider
49
+ - expected vs actual behavior
package/index.ts ADDED
@@ -0,0 +1,6 @@
1
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { registerWebSearchTool } from "./src/tool.ts";
3
+
4
+ export default function piWebScoutExtension(pi: ExtensionAPI): void {
5
+ registerWebSearchTool(pi);
6
+ }
package/package.json ADDED
@@ -0,0 +1,59 @@
1
+ {
2
+ "name": "pi-web-scout",
3
+ "version": "0.1.0",
4
+ "description": "No-key web search extension for Pi, with provider architecture ready for keyed search APIs.",
5
+ "type": "module",
6
+ "main": "./index.ts",
7
+ "exports": {
8
+ ".": {
9
+ "import": "./index.ts",
10
+ "types": "./index.ts"
11
+ }
12
+ },
13
+ "files": [
14
+ "index.ts",
15
+ "src",
16
+ "README.md",
17
+ "package.json",
18
+ "LICENSE",
19
+ "SECURITY.md"
20
+ ],
21
+ "keywords": [
22
+ "pi-package",
23
+ "pi-extension",
24
+ "pi",
25
+ "web-search",
26
+ "search"
27
+ ],
28
+ "license": "MIT",
29
+ "repository": {
30
+ "type": "git",
31
+ "url": "git+ssh://git@github.com/alcovegan/pi-web-scout.git"
32
+ },
33
+ "bugs": {
34
+ "url": "https://github.com/alcovegan/pi-web-scout/issues"
35
+ },
36
+ "homepage": "https://github.com/alcovegan/pi-web-scout#readme",
37
+ "pi": {
38
+ "extensions": [
39
+ "./index.ts"
40
+ ]
41
+ },
42
+ "scripts": {
43
+ "check:safety": "node tests/safety.test.mjs",
44
+ "test": "node --experimental-strip-types --test tests/*.test.mjs"
45
+ },
46
+ "peerDependencies": {
47
+ "@earendil-works/pi-coding-agent": "*",
48
+ "@earendil-works/pi-ai": "*",
49
+ "typebox": "*"
50
+ },
51
+ "peerDependenciesMeta": {
52
+ "@earendil-works/pi-ai": {
53
+ "optional": true
54
+ },
55
+ "typebox": {
56
+ "optional": true
57
+ }
58
+ }
59
+ }
package/src/config.ts ADDED
@@ -0,0 +1,112 @@
1
+ import { existsSync, readFileSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import type { PiWebSearchConfig, ProviderId, ResolvedConfig } from "./types.ts";
4
+ import { clampInt } from "./safety.ts";
5
+
6
+ const CONFIG_FILE = "pi-web-scout.json";
7
+ const PROVIDERS = ["duckduckgo", "marginalia", "jina", "brave"] as const;
8
+
9
+ const DEFAULT_CONFIG: ResolvedConfig = {
10
+ enabled: true,
11
+ defaultProvider: "auto",
12
+ fallbackChain: ["duckduckgo", "marginalia", "jina"],
13
+ maxResults: 5,
14
+ providers: {
15
+ duckduckgo: { enabled: true },
16
+ marginalia: { enabled: true },
17
+ jina: { enabled: true },
18
+ brave: { enabled: false, apiKeyEnv: "PI_WEB_SCOUT_BRAVE_API_KEY" },
19
+ },
20
+ };
21
+
22
+ function isRecord(value: unknown): value is Record<string, unknown> {
23
+ return typeof value === "object" && value !== null && !Array.isArray(value);
24
+ }
25
+
26
+ function isProviderId(value: unknown): value is ProviderId {
27
+ return value === "auto" || PROVIDERS.includes(value as never);
28
+ }
29
+
30
+ function isConcreteProvider(value: unknown): value is (typeof PROVIDERS)[number] {
31
+ return PROVIDERS.includes(value as never);
32
+ }
33
+
34
+ export function getConfigPath(cwd: string): string {
35
+ return join(cwd, ".pi", CONFIG_FILE);
36
+ }
37
+
38
+ export function loadConfig(cwd: string): ResolvedConfig {
39
+ const path = getConfigPath(cwd);
40
+ if (!existsSync(path)) return structuredClone(DEFAULT_CONFIG);
41
+
42
+ let parsed: unknown;
43
+ try {
44
+ parsed = JSON.parse(readFileSync(path, "utf-8"));
45
+ } catch (error) {
46
+ throw new Error(`Invalid ${path}: ${(error as Error).message}`);
47
+ }
48
+ if (!isRecord(parsed)) throw new Error(`Invalid ${path}: expected a JSON object`);
49
+
50
+ validateConfigShape(parsed, path);
51
+ const user = parsed as PiWebSearchConfig;
52
+
53
+ const resolved = structuredClone(DEFAULT_CONFIG);
54
+ if (user.enabled !== undefined) resolved.enabled = user.enabled;
55
+ if (user.defaultProvider !== undefined) resolved.defaultProvider = user.defaultProvider;
56
+ if (user.maxResults !== undefined) resolved.maxResults = clampInt(user.maxResults, 5, 1, 20);
57
+
58
+ if (user.fallbackChain !== undefined) {
59
+ const chain = user.fallbackChain.filter(isConcreteProvider);
60
+ resolved.fallbackChain = [...new Set(chain)];
61
+ }
62
+
63
+ if (user.providers) {
64
+ for (const provider of PROVIDERS) {
65
+ const providerConfig = user.providers[provider];
66
+ if (providerConfig?.enabled !== undefined) resolved.providers[provider].enabled = providerConfig.enabled;
67
+ if (providerConfig?.apiKeyEnv !== undefined) resolved.providers[provider].apiKeyEnv = providerConfig.apiKeyEnv;
68
+ }
69
+ }
70
+
71
+ resolved.fallbackChain = resolved.fallbackChain.filter((p) => resolved.providers[p].enabled);
72
+ if (resolved.fallbackChain.length === 0) resolved.fallbackChain = ["duckduckgo"];
73
+ if (resolved.defaultProvider !== "auto" && !resolved.providers[resolved.defaultProvider].enabled) {
74
+ resolved.defaultProvider = "auto";
75
+ }
76
+ return resolved;
77
+ }
78
+
79
+ function validateConfigShape(config: Record<string, unknown>, path: string): void {
80
+ if (config.enabled !== undefined && typeof config.enabled !== "boolean") {
81
+ throw new Error(`Invalid ${path}: enabled must be boolean`);
82
+ }
83
+ if (config.defaultProvider !== undefined && !isProviderId(config.defaultProvider)) {
84
+ throw new Error(`Invalid ${path}: defaultProvider must be one of auto, ${PROVIDERS.join(", ")}`);
85
+ }
86
+ if (config.maxResults !== undefined) {
87
+ const n = Number(config.maxResults);
88
+ if (!Number.isFinite(n) || n < 1 || n > 20) {
89
+ throw new Error(`Invalid ${path}: maxResults must be a number from 1 to 20`);
90
+ }
91
+ }
92
+ if (config.fallbackChain !== undefined) {
93
+ if (!Array.isArray(config.fallbackChain)) throw new Error(`Invalid ${path}: fallbackChain must be an array`);
94
+ const invalid = config.fallbackChain.find((value) => !isConcreteProvider(value));
95
+ if (invalid !== undefined) throw new Error(`Invalid ${path}: unknown provider in fallbackChain: ${String(invalid)}`);
96
+ }
97
+ if (config.providers !== undefined) {
98
+ if (!isRecord(config.providers)) throw new Error(`Invalid ${path}: providers must be an object`);
99
+ for (const [name, value] of Object.entries(config.providers)) {
100
+ if (!isConcreteProvider(name)) throw new Error(`Invalid ${path}: unknown provider: ${name}`);
101
+ if (!isRecord(value)) throw new Error(`Invalid ${path}: providers.${name} must be an object`);
102
+ if (value.enabled !== undefined && typeof value.enabled !== "boolean") {
103
+ throw new Error(`Invalid ${path}: providers.${name}.enabled must be boolean`);
104
+ }
105
+ if (value.apiKeyEnv !== undefined) {
106
+ if (typeof value.apiKeyEnv !== "string" || !/^[A-Z_][A-Z0-9_]*$/.test(value.apiKeyEnv)) {
107
+ throw new Error(`Invalid ${path}: providers.${name}.apiKeyEnv must be an ALL_CAPS environment variable name`);
108
+ }
109
+ }
110
+ }
111
+ }
112
+ }
@@ -0,0 +1,36 @@
1
+ import type { ResolvedProviderConfig, SearchProvider } from "./types.ts";
2
+
3
+ const ENV_NAME_PATTERN = /^[A-Z_][A-Z0-9_]*$/;
4
+
5
+ export interface ResolvedCredential {
6
+ value?: string;
7
+ source?: string;
8
+ error?: string;
9
+ }
10
+
11
+ export function validateEnvName(name: string): boolean {
12
+ return ENV_NAME_PATTERN.test(name);
13
+ }
14
+
15
+ /**
16
+ * Resolve provider credentials from environment variables only.
17
+ *
18
+ * Deliberately unsupported:
19
+ * - literal keys in config
20
+ * - shell commands like !pass show ...
21
+ * - keychain/cookie/SSH/config-file discovery
22
+ */
23
+ export function resolveProviderCredential(
24
+ provider: SearchProvider,
25
+ config: ResolvedProviderConfig,
26
+ ): ResolvedCredential {
27
+ if (!provider.requiresKey) return {};
28
+
29
+ const envName = config.apiKeyEnv ?? provider.defaultKeyEnv;
30
+ if (!envName) return { error: `${provider.id} requires an API key env var` };
31
+ if (!validateEnvName(envName)) return { error: `${provider.id} apiKeyEnv is not a valid environment variable name: ${envName}` };
32
+
33
+ const value = process.env[envName]?.trim();
34
+ if (!value) return { error: `${provider.id} requires ${envName} to be set` };
35
+ return { value, source: `env:${envName}` };
36
+ }
package/src/format.ts ADDED
@@ -0,0 +1,36 @@
1
+ import type { ProviderRunResult, SearchResult } from "./types.ts";
2
+
3
+ export function formatSearchResults(
4
+ query: string,
5
+ results: SearchResult[],
6
+ providerLabel: string,
7
+ runs: ProviderRunResult[] = [],
8
+ ): string {
9
+ const safeQuery = query.replace(/[\r\n]+/g, " ").trim();
10
+ const lines = [`## Web search: ${safeQuery}`, `Provider: ${providerLabel}`, `Results: ${results.length}`];
11
+
12
+ if (runs.length > 0) {
13
+ lines.push("", "Provider runs:");
14
+ for (const run of runs) {
15
+ const status = run.error ? `failed: ${run.error}` : `${run.results.length} result${run.results.length === 1 ? "" : "s"}`;
16
+ lines.push(`- ${run.provider}: ${status}`);
17
+ }
18
+ }
19
+ lines.push("");
20
+
21
+ for (const [index, result] of results.entries()) {
22
+ lines.push(`### ${index + 1}. ${result.title || "Untitled"}`);
23
+ lines.push(result.url);
24
+ if (result.providers && result.providers.length > 1) lines.push(`Sources: ${result.providers.join(", ")}`);
25
+ else lines.push(`Source: ${result.provider}`);
26
+ if (result.snippet) lines.push(result.snippet);
27
+ lines.push("");
28
+ }
29
+
30
+ return lines.join("\n").trimEnd();
31
+ }
32
+
33
+ export function formatProviderErrors(errors: ProviderRunResult[]): string {
34
+ if (errors.length === 0) return "";
35
+ return errors.map((e) => `${e.provider}: ${e.error ?? "unknown error"}`).join("; ");
36
+ }
@@ -0,0 +1,53 @@
1
+ import type { SearchProvider, SearchRequest, SearchResult } from "../types.ts";
2
+
3
+ const ENDPOINT = "https://api.search.brave.com/res/v1/web/search";
4
+ const USER_AGENT = "pi-web-scout/0.1 (+https://pi.dev)";
5
+
6
+ interface BraveItem {
7
+ title?: unknown;
8
+ url?: unknown;
9
+ description?: unknown;
10
+ age?: unknown;
11
+ }
12
+
13
+ export const braveProvider: SearchProvider = {
14
+ id: "brave",
15
+ label: "Brave Search API",
16
+ requiresKey: true,
17
+ defaultKeyEnv: "PI_WEB_SCOUT_BRAVE_API_KEY",
18
+ async search(request: SearchRequest): Promise<SearchResult[]> {
19
+ if (!request.apiKey) throw new Error("Brave API key is missing");
20
+
21
+ const url = new URL(ENDPOINT);
22
+ url.searchParams.set("q", request.query);
23
+ url.searchParams.set("count", String(Math.min(request.maxResults, 20)));
24
+
25
+ const response = await fetch(url.toString(), {
26
+ headers: {
27
+ "Accept": "application/json",
28
+ "Accept-Encoding": "gzip",
29
+ "User-Agent": USER_AGENT,
30
+ "X-Subscription-Token": request.apiKey,
31
+ },
32
+ signal: request.signal,
33
+ });
34
+
35
+ if (!response.ok) throw new Error(`Brave returned HTTP ${response.status}`);
36
+
37
+ const data = await response.json() as { web?: { results?: BraveItem[] } };
38
+ return (Array.isArray(data.web?.results) ? data.web.results : [])
39
+ .slice(0, request.maxResults)
40
+ .map((item, index) => ({
41
+ title: stringValue(item.title),
42
+ url: stringValue(item.url),
43
+ snippet: stringValue(item.description),
44
+ provider: "brave",
45
+ rank: index + 1,
46
+ }))
47
+ .filter((result) => result.title.length > 0 && result.url.length > 0);
48
+ },
49
+ };
50
+
51
+ function stringValue(value: unknown): string {
52
+ return typeof value === "string" ? value.trim() : "";
53
+ }
@@ -0,0 +1,68 @@
1
+ import type { SearchProvider, SearchRequest, SearchResult } from "../types.ts";
2
+ import { stripTags } from "../safety.ts";
3
+
4
+ const ENDPOINT = "https://html.duckduckgo.com/html/";
5
+ const USER_AGENT = "pi-web-scout/0.1 (+https://pi.dev)";
6
+
7
+ export const duckDuckGoProvider: SearchProvider = {
8
+ id: "duckduckgo",
9
+ label: "DuckDuckGo HTML",
10
+ requiresKey: false,
11
+ async search(request: SearchRequest): Promise<SearchResult[]> {
12
+ const params = new URLSearchParams({ q: request.query, no_redirect: "1" });
13
+ const response = await fetch(`${ENDPOINT}?${params.toString()}`, {
14
+ headers: {
15
+ "Accept": "text/html,application/xhtml+xml",
16
+ "User-Agent": USER_AGENT,
17
+ },
18
+ signal: request.signal,
19
+ });
20
+
21
+ if (!response.ok) {
22
+ throw new Error(`DuckDuckGo returned HTTP ${response.status}`);
23
+ }
24
+
25
+ return parseDuckDuckGoHtml(await response.text(), request.maxResults);
26
+ },
27
+ };
28
+
29
+ export function parseDuckDuckGoHtml(html: string, maxResults: number): SearchResult[] {
30
+ const results: SearchResult[] = [];
31
+ const resultBlockRegex = /<div[^>]+class="[^"]*result[^"]*"[\s\S]*?(?=<div[^>]+class="[^"]*result[^"]*"|<\/body>|$)/gi;
32
+ let blockMatch: RegExpExecArray | null;
33
+
34
+ while ((blockMatch = resultBlockRegex.exec(html)) !== null && results.length < maxResults) {
35
+ const block = blockMatch[0];
36
+ const link = block.match(/<a[^>]+class="[^"]*result__a[^"]*"[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i);
37
+ if (!link) continue;
38
+
39
+ const rawUrl = decodeDuckDuckGoUrl(link[1] ?? "");
40
+ const title = stripTags(link[2] ?? "");
41
+ if (!rawUrl || !title) continue;
42
+
43
+ const snippetMatch = block.match(/<a[^>]+class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/a>/i)
44
+ ?? block.match(/<td[^>]+class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/td>/i);
45
+ const snippet = snippetMatch ? stripTags(snippetMatch[1] ?? "") : undefined;
46
+
47
+ results.push({
48
+ title,
49
+ url: rawUrl,
50
+ snippet,
51
+ provider: "duckduckgo",
52
+ rank: results.length + 1,
53
+ });
54
+ }
55
+
56
+ return results;
57
+ }
58
+
59
+ function decodeDuckDuckGoUrl(url: string): string {
60
+ const decoded = stripTags(url);
61
+ try {
62
+ const parsed = new URL(decoded, "https://duckduckgo.com");
63
+ const uddg = parsed.searchParams.get("uddg");
64
+ return uddg ? decodeURIComponent(uddg) : parsed.toString();
65
+ } catch {
66
+ return decoded;
67
+ }
68
+ }
@@ -0,0 +1,54 @@
1
+ import type { SearchProvider, SearchRequest, SearchResult } from "../types.ts";
2
+
3
+ const ENDPOINT = "https://s.jina.ai/";
4
+ const USER_AGENT = "pi-web-scout/0.1 (+https://pi.dev)";
5
+
6
+ interface JinaItem {
7
+ title?: unknown;
8
+ url?: unknown;
9
+ content?: unknown;
10
+ description?: unknown;
11
+ }
12
+
13
+ export const jinaProvider: SearchProvider = {
14
+ id: "jina",
15
+ label: "Jina Search",
16
+ requiresKey: false,
17
+ async search(request: SearchRequest): Promise<SearchResult[]> {
18
+ const url = new URL(ENDPOINT);
19
+ url.searchParams.set("q", request.query);
20
+ url.searchParams.set("format", "json");
21
+
22
+ const response = await fetch(url.toString(), {
23
+ headers: {
24
+ "Accept": "application/json",
25
+ "User-Agent": USER_AGENT,
26
+ },
27
+ signal: request.signal,
28
+ });
29
+
30
+ if (!response.ok) {
31
+ throw new Error(`Jina returned HTTP ${response.status}`);
32
+ }
33
+
34
+ return parseJinaResponse(await response.json(), request.maxResults);
35
+ },
36
+ };
37
+
38
+ export function parseJinaResponse(data: unknown, maxResults: number): SearchResult[] {
39
+ const payload = data as { data?: JinaItem[] };
40
+ return (Array.isArray(payload.data) ? payload.data : [])
41
+ .slice(0, maxResults)
42
+ .map((item, index) => ({
43
+ title: stringValue(item.title) || stringValue(item.url),
44
+ url: stringValue(item.url),
45
+ snippet: (stringValue(item.content) || stringValue(item.description)).slice(0, 700),
46
+ provider: "jina",
47
+ rank: index + 1,
48
+ }))
49
+ .filter((result) => result.title.length > 0 && result.url.length > 0);
50
+ }
51
+
52
+ function stringValue(value: unknown): string {
53
+ return typeof value === "string" ? value.trim() : "";
54
+ }
@@ -0,0 +1,51 @@
1
+ import type { SearchProvider, SearchRequest, SearchResult } from "../types.ts";
2
+
3
+ const BASE_URL = "https://api.marginalia.nu/public/search";
4
+ const USER_AGENT = "pi-web-scout/0.1 (+https://pi.dev)";
5
+
6
+ interface MarginaliaItem {
7
+ title?: unknown;
8
+ url?: unknown;
9
+ description?: unknown;
10
+ }
11
+
12
+ export const marginaliaProvider: SearchProvider = {
13
+ id: "marginalia",
14
+ label: "Marginalia Search",
15
+ requiresKey: false,
16
+ async search(request: SearchRequest): Promise<SearchResult[]> {
17
+ const count = Math.min(request.maxResults, 20);
18
+ const endpoint = `${BASE_URL}/${encodeURIComponent(request.query)}?index=0&count=${count}`;
19
+ const response = await fetch(endpoint, {
20
+ headers: {
21
+ "Accept": "application/json",
22
+ "User-Agent": USER_AGENT,
23
+ },
24
+ signal: request.signal,
25
+ });
26
+
27
+ if (!response.ok) {
28
+ throw new Error(`Marginalia returned HTTP ${response.status}`);
29
+ }
30
+
31
+ return parseMarginaliaResponse(await response.json(), request.maxResults);
32
+ },
33
+ };
34
+
35
+ export function parseMarginaliaResponse(data: unknown, maxResults: number): SearchResult[] {
36
+ const payload = data as { results?: MarginaliaItem[] };
37
+ return (Array.isArray(payload.results) ? payload.results : [])
38
+ .slice(0, maxResults)
39
+ .map((item, index) => ({
40
+ title: stringValue(item.title),
41
+ url: stringValue(item.url),
42
+ snippet: stringValue(item.description),
43
+ provider: "marginalia",
44
+ rank: index + 1,
45
+ }))
46
+ .filter((result) => result.title.length > 0 && result.url.length > 0);
47
+ }
48
+
49
+ function stringValue(value: unknown): string {
50
+ return typeof value === "string" ? value.trim() : "";
51
+ }
@@ -0,0 +1,20 @@
1
+ import type { ProviderId, SearchProvider } from "../types.ts";
2
+ import { braveProvider } from "./brave.ts";
3
+ import { duckDuckGoProvider } from "./duckduckgo.ts";
4
+ import { jinaProvider } from "./jina.ts";
5
+ import { marginaliaProvider } from "./marginalia.ts";
6
+
7
+ const providers = new Map<string, SearchProvider>([
8
+ [duckDuckGoProvider.id, duckDuckGoProvider],
9
+ [marginaliaProvider.id, marginaliaProvider],
10
+ [jinaProvider.id, jinaProvider],
11
+ [braveProvider.id, braveProvider],
12
+ ]);
13
+
14
+ export function getProvider(id: Exclude<ProviderId, "auto">): SearchProvider | undefined {
15
+ return providers.get(id);
16
+ }
17
+
18
+ export function listProviders(): SearchProvider[] {
19
+ return [...providers.values()];
20
+ }
package/src/read.ts ADDED
@@ -0,0 +1,102 @@
1
+ import { clampInt, sanitizeError } from "./safety.ts";
2
+ import { validatePublicHttpUrl } from "./url-safety.ts";
3
+ import { extractReadableText } from "./readability.ts";
4
+
5
+ const MAX_REDIRECTS = 8;
6
+ const MAX_BYTES = 1_000_000;
7
+ const DEFAULT_TIMEOUT_MS = 15_000;
8
+ const USER_AGENT = "pi-web-scout/0.1 (+https://pi.dev)";
9
+
10
+ export interface WebReadOptions {
11
+ url: string;
12
+ maxChars?: number;
13
+ signal?: AbortSignal;
14
+ }
15
+
16
+ export interface WebReadResult {
17
+ requestedUrl: string;
18
+ finalUrl: string;
19
+ title?: string;
20
+ contentType: string;
21
+ status: number;
22
+ text: string;
23
+ truncated: boolean;
24
+ bytesRead: number;
25
+ }
26
+
27
+ export async function readWebPage(options: WebReadOptions): Promise<WebReadResult> {
28
+ const maxChars = clampInt(options.maxChars, 12_000, 1_000, 50_000);
29
+ return await fetchWithRedirects(options.url, maxChars, options.signal, 0, new Set());
30
+ }
31
+
32
+ async function fetchWithRedirects(
33
+ url: string,
34
+ maxChars: number,
35
+ signal: AbortSignal | undefined,
36
+ depth: number,
37
+ seen: Set<string>,
38
+ ): Promise<WebReadResult> {
39
+ const validation = validatePublicHttpUrl(url);
40
+ if (!validation.valid || !validation.url) throw new Error(`URL blocked: ${validation.reason ?? "invalid URL"}`);
41
+ const normalized = validation.url.toString();
42
+ if (seen.has(normalized)) throw new Error("redirect loop detected");
43
+ if (depth > MAX_REDIRECTS) throw new Error(`too many redirects (>${MAX_REDIRECTS})`);
44
+ seen.add(normalized);
45
+
46
+ const timeout = AbortSignal.timeout(DEFAULT_TIMEOUT_MS);
47
+ const combinedSignal = signal ? AbortSignal.any([signal, timeout]) : timeout;
48
+
49
+ const response = await fetch(normalized, {
50
+ redirect: "manual",
51
+ signal: combinedSignal,
52
+ headers: {
53
+ "Accept": "text/html,application/xhtml+xml,text/plain,application/json;q=0.8,*/*;q=0.1",
54
+ "Accept-Encoding": "gzip, deflate",
55
+ "User-Agent": USER_AGENT,
56
+ },
57
+ });
58
+
59
+ if (response.status >= 300 && response.status < 400) {
60
+ const location = response.headers.get("location");
61
+ if (!location) throw new Error(`redirect without location: HTTP ${response.status}`);
62
+ return await fetchWithRedirects(new URL(location, normalized).toString(), maxChars, signal, depth + 1, seen);
63
+ }
64
+
65
+ if (!response.ok) throw new Error(`fetch failed: HTTP ${response.status}`);
66
+
67
+ const contentType = response.headers.get("content-type") ?? "text/plain";
68
+ const { text: raw, bytesRead } = await readLimitedBody(response);
69
+ const extracted = extractReadableText(raw, contentType);
70
+ const truncated = extracted.text.length > maxChars;
71
+ const text = truncated ? `${extracted.text.slice(0, maxChars)}\n\n[... truncated at ${maxChars} chars ...]` : extracted.text;
72
+
73
+ return {
74
+ requestedUrl: url,
75
+ finalUrl: response.url || normalized,
76
+ title: extracted.title,
77
+ contentType,
78
+ status: response.status,
79
+ text,
80
+ truncated,
81
+ bytesRead,
82
+ };
83
+ }
84
+
85
+ async function readLimitedBody(response: Response): Promise<{ text: string; bytesRead: number }> {
86
+ const reader = response.body?.getReader();
87
+ if (!reader) return { text: await response.text().catch((error) => sanitizeError(error)), bytesRead: 0 };
88
+
89
+ const decoder = new TextDecoder();
90
+ let text = "";
91
+ let bytesRead = 0;
92
+ while (bytesRead < MAX_BYTES) {
93
+ const { done, value } = await reader.read();
94
+ if (done) break;
95
+ bytesRead += value.byteLength;
96
+ text += decoder.decode(value, { stream: true });
97
+ }
98
+ await reader.cancel().catch(() => undefined);
99
+ text += decoder.decode();
100
+ if (bytesRead >= MAX_BYTES) text += "\n[... response body truncated at 1MB ...]";
101
+ return { text, bytesRead };
102
+ }
@@ -0,0 +1,58 @@
1
+ import { decodeHtml, stripTags } from "./safety.ts";
2
+
3
+ export interface ExtractedPage {
4
+ title?: string;
5
+ text: string;
6
+ }
7
+
8
+ export function extractReadableText(content: string, contentType: string): ExtractedPage {
9
+ if (!/html|xml/i.test(contentType)) {
10
+ return { text: normalizeWhitespace(content) };
11
+ }
12
+
13
+ const title = extractTitle(content);
14
+ let html = content
15
+ .replace(/<script\b[\s\S]*?<\/script>/gi, " ")
16
+ .replace(/<style\b[\s\S]*?<\/style>/gi, " ")
17
+ .replace(/<noscript\b[\s\S]*?<\/noscript>/gi, " ")
18
+ .replace(/<svg\b[\s\S]*?<\/svg>/gi, " ")
19
+ .replace(/<nav\b[\s\S]*?<\/nav>/gi, " ")
20
+ .replace(/<footer\b[\s\S]*?<\/footer>/gi, " ")
21
+ .replace(/<header\b[\s\S]*?<\/header>/gi, " ")
22
+ .replace(/<(br|p|div|section|article|li|tr|h[1-6])\b[^>]*>/gi, "\n")
23
+ .replace(/<\/((p|div|section|article|li|tr|h[1-6]))>/gi, "\n");
24
+
25
+ const main = pickMainContent(html);
26
+ return { title, text: normalizeWhitespace(stripTags(main)) };
27
+ }
28
+
29
+ function extractTitle(html: string): string | undefined {
30
+ const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
31
+ const title = match ? normalizeWhitespace(decodeHtml(stripTags(match[1] ?? ""))) : "";
32
+ return title || undefined;
33
+ }
34
+
35
+ function pickMainContent(html: string): string {
36
+ const candidates = [
37
+ /<main\b[^>]*>([\s\S]*?)<\/main>/i,
38
+ /<article\b[^>]*>([\s\S]*?)<\/article>/i,
39
+ /<body\b[^>]*>([\s\S]*?)<\/body>/i,
40
+ ];
41
+ for (const pattern of candidates) {
42
+ const match = html.match(pattern);
43
+ if (match?.[1] && stripTags(match[1]).length > 200) return match[1];
44
+ }
45
+ return html;
46
+ }
47
+
48
+ function normalizeWhitespace(text: string): string {
49
+ return text
50
+ .replace(/\r/g, "")
51
+ .replace(/[ \t]+/g, " ")
52
+ .replace(/\n{3,}/g, "\n\n")
53
+ .split("\n")
54
+ .map((line) => line.trim())
55
+ .filter((line, index, lines) => line || lines[index - 1])
56
+ .join("\n")
57
+ .trim();
58
+ }
package/src/safety.ts ADDED
@@ -0,0 +1,47 @@
1
+ const SECRET_PATTERNS: RegExp[] = [
2
+ /(authorization|x-api-key|api[-_]?key|token|secret|password)["']?\s*[:=]\s*["']?[^\s"']{8,}/gi,
3
+ /(bearer|token)\s+[a-z0-9._\/-]{8,}/gi,
4
+ ];
5
+
6
+ export function sanitizeError(error: unknown): string {
7
+ const message = error instanceof Error ? error.message : String(error);
8
+ let safe = message.slice(0, 500);
9
+ for (const pattern of SECRET_PATTERNS) safe = safe.replace(pattern, "$1 [redacted]");
10
+ return safe;
11
+ }
12
+
13
+ export function clampInt(value: unknown, fallback: number, min: number, max: number): number {
14
+ const n = typeof value === "number" ? value : Number(value);
15
+ if (!Number.isFinite(n)) return fallback;
16
+ return Math.max(min, Math.min(max, Math.floor(n)));
17
+ }
18
+
19
+ export function normalizeUrlForDedupe(url: string): string {
20
+ try {
21
+ const parsed = new URL(url);
22
+ parsed.hash = "";
23
+ if (parsed.pathname.endsWith("/") && parsed.pathname.length > 1) {
24
+ parsed.pathname = parsed.pathname.replace(/\/+$/, "");
25
+ }
26
+ return parsed.toString().toLowerCase();
27
+ } catch {
28
+ return url.trim().toLowerCase();
29
+ }
30
+ }
31
+
32
+ export function decodeHtml(value: string): string {
33
+ return value
34
+ .replace(/&amp;/g, "&")
35
+ .replace(/&lt;/g, "<")
36
+ .replace(/&gt;/g, ">")
37
+ .replace(/&quot;/g, '"')
38
+ .replace(/&#39;/g, "'")
39
+ .replace(/&#x27;/g, "'")
40
+ .replace(/&#x2F;/g, "/")
41
+ .replace(/&#(\d+);/g, (_m, code) => String.fromCharCode(Number(code)))
42
+ .replace(/&#x([0-9a-f]+);/gi, (_m, code) => String.fromCharCode(parseInt(code, 16)));
43
+ }
44
+
45
+ export function stripTags(html: string): string {
46
+ return decodeHtml(html.replace(/<[^>]*>/g, " ")).replace(/\s+/g, " ").trim();
47
+ }
package/src/tool.ts ADDED
@@ -0,0 +1,249 @@
1
+ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
2
+ import { StringEnum } from "@earendil-works/pi-ai";
3
+ import { Type } from "typebox";
4
+ import { getConfigPath, loadConfig } from "./config.ts";
5
+ import { resolveProviderCredential } from "./credentials.ts";
6
+ import { formatProviderErrors, formatSearchResults } from "./format.ts";
7
+ import { getProvider, listProviders } from "./providers/registry.ts";
8
+ import { readWebPage } from "./read.ts";
9
+ import { clampInt, normalizeUrlForDedupe, sanitizeError } from "./safety.ts";
10
+ import type { ProviderId, ProviderRunResult, ResolvedConfig, SearchMode, SearchResult } from "./types.ts";
11
+
12
+ const PROVIDERS = ["auto", "duckduckgo", "marginalia", "jina", "brave"] as const;
13
+ const MODES = ["first_success", "combine"] as const;
14
+
15
+ export function registerWebSearchTool(pi: ExtensionAPI): void {
16
+ registerStatusCommand(pi);
17
+
18
+ pi.registerTool({
19
+ name: "web_search",
20
+ label: "Web Scout Search",
21
+ description:
22
+ "Search the web using no-key search providers. Defaults to DuckDuckGo HTML and is architected for future keyed providers.",
23
+ promptSnippet: "web_search: search the public web without requiring API keys.",
24
+ promptGuidelines: [
25
+ "Use web_search when current or source-backed information is needed.",
26
+ "Prefer web_search over guessing facts that may have changed recently.",
27
+ "Use web_search with mode=combine when broad coverage matters more than speed.",
28
+ ],
29
+ parameters: Type.Object({
30
+ query: Type.String({ description: "Search query" }),
31
+ max_results: Type.Optional(Type.Number({ description: "Number of results, 1-20. Default comes from config or 5." })),
32
+ provider: Type.Optional(StringEnum(PROVIDERS, { description: "Search provider. Use auto unless a provider is requested." })),
33
+ mode: Type.Optional(StringEnum(MODES, { description: "first_success uses fallback order; combine merges enabled providers." })),
34
+ }),
35
+ async execute(_toolCallId, params, signal, _onUpdate, ctx) {
36
+ const config = loadConfig(ctx.cwd);
37
+ if (!config.enabled) throw new Error("pi-web-scout is disabled in .pi/pi-web-scout.json");
38
+
39
+ const query = String(params.query ?? "").trim();
40
+ if (!query) throw new Error("query must be non-empty");
41
+
42
+ const maxResults = clampInt(params.max_results, config.maxResults, 1, 20);
43
+ const provider = (params.provider ?? config.defaultProvider) as ProviderId;
44
+ const mode = (params.mode ?? "first_success") as SearchMode;
45
+ if (provider !== "auto" && !config.providers[provider].enabled) {
46
+ throw new Error(`Provider ${provider} is disabled in .pi/pi-web-scout.json`);
47
+ }
48
+
49
+ const providerIds = provider === "auto" ? config.fallbackChain : [provider];
50
+ const runResult = mode === "combine" && provider === "auto"
51
+ ? await runCombined(providerIds, query, maxResults, signal, config)
52
+ : await runFirstSuccess(providerIds, query, maxResults, signal, config);
53
+
54
+ if (runResult.results.length === 0) {
55
+ const errors = formatProviderErrors(runResult.runs.filter((r) => r.error));
56
+ throw new Error(errors ? `No web search results. Provider errors: ${errors}` : "No web search results.");
57
+ }
58
+
59
+ return {
60
+ content: [{ type: "text", text: formatSearchResults(query, runResult.results, runResult.providerLabel, runResult.runs) }],
61
+ details: {
62
+ query,
63
+ provider: runResult.provider,
64
+ providerLabel: runResult.providerLabel,
65
+ mode,
66
+ resultCount: runResult.results.length,
67
+ runs: runResult.runs.map((run) => ({
68
+ provider: run.provider,
69
+ resultCount: run.results.length,
70
+ error: run.error,
71
+ })),
72
+ results: runResult.results,
73
+ },
74
+ };
75
+ },
76
+ });
77
+
78
+ pi.registerTool({
79
+ name: "web_read",
80
+ label: "Web Read",
81
+ description: "Fetch a public HTTP(S) URL and extract readable text with SSRF protection. No browser or JavaScript execution.",
82
+ promptSnippet: "web_read: read and extract text from a specific public URL.",
83
+ promptGuidelines: [
84
+ "Use web_read after web_search when snippets are insufficient and a source needs to be read.",
85
+ "Do not use web_read for localhost, private network, or metadata URLs; those are blocked.",
86
+ ],
87
+ parameters: Type.Object({
88
+ url: Type.String({ description: "Public HTTP(S) URL to read" }),
89
+ max_chars: Type.Optional(Type.Number({ description: "Maximum extracted characters, 1000-50000. Default 12000." })),
90
+ }),
91
+ async execute(_toolCallId, params, signal) {
92
+ const url = String(params.url ?? "").trim();
93
+ if (!url) throw new Error("url must be non-empty");
94
+ const result = await readWebPage({ url, maxChars: params.max_chars, signal });
95
+ const header = [
96
+ `# ${result.title || "Web page"}`,
97
+ `URL: ${result.finalUrl}`,
98
+ `Status: HTTP ${result.status}`,
99
+ `Content-Type: ${result.contentType}`,
100
+ result.truncated ? "Truncated: yes" : "Truncated: no",
101
+ "",
102
+ ].join("\n");
103
+ return {
104
+ content: [{ type: "text", text: header + result.text }],
105
+ details: result,
106
+ };
107
+ },
108
+ });
109
+ }
110
+
111
+ function registerStatusCommand(pi: ExtensionAPI): void {
112
+ pi.registerCommand("web-scout-status", {
113
+ description: "Show pi-web-scout configuration and provider status.",
114
+ handler: async (_args, ctx) => {
115
+ try {
116
+ const config = loadConfig(ctx.cwd);
117
+ const lines = [
118
+ "pi-web-scout status",
119
+ `config: ${getConfigPath(ctx.cwd)}`,
120
+ `enabled: ${config.enabled}`,
121
+ `defaultProvider: ${config.defaultProvider}`,
122
+ `fallbackChain: ${config.fallbackChain.join(", ")}`,
123
+ `maxResults: ${config.maxResults}`,
124
+ "",
125
+ "providers:",
126
+ ];
127
+ for (const provider of listProviders()) {
128
+ const enabled = config.providers[provider.id].enabled;
129
+ const providerConfig = config.providers[provider.id];
130
+ const credential = resolveProviderCredential(provider, providerConfig);
131
+ const key = provider.requiresKey
132
+ ? credential.value
133
+ ? `key ${credential.source}`
134
+ : `key missing (${credential.error ?? "not configured"})`
135
+ : "no key";
136
+ lines.push(`- ${provider.id}: ${enabled ? "enabled" : "disabled"}, ${key}, ${provider.label}`);
137
+ }
138
+ ctx.ui.notify(lines.join("\n"), "info");
139
+ } catch (error) {
140
+ ctx.ui.notify(sanitizeError(error), "error");
141
+ }
142
+ },
143
+ });
144
+ }
145
+
146
+ async function runFirstSuccess(
147
+ providerIds: string[],
148
+ query: string,
149
+ maxResults: number,
150
+ signal: AbortSignal | undefined,
151
+ config: ResolvedConfig,
152
+ ): Promise<{ provider: string; providerLabel: string; results: SearchResult[]; runs: ProviderRunResult[] }> {
153
+ const runs: ProviderRunResult[] = [];
154
+
155
+ for (const providerId of providerIds) {
156
+ const provider = getProvider(providerId as never);
157
+ if (!provider) {
158
+ runs.push({ provider: providerId, results: [], error: "unknown provider" });
159
+ continue;
160
+ }
161
+
162
+ try {
163
+ const providerConfig = config.providers[provider.id];
164
+ const credential = resolveProviderCredential(provider, providerConfig);
165
+ if (provider.requiresKey && !credential.value) {
166
+ runs.push({ provider: provider.id, results: [], error: credential.error ?? "missing API key" });
167
+ continue;
168
+ }
169
+ const results = await withTimeoutSignal(signal, 15_000, (combinedSignal) =>
170
+ provider.search({ query, maxResults, signal: combinedSignal, apiKey: credential.value })
171
+ );
172
+ runs.push({ provider: provider.id, results, keySource: credential.source });
173
+ if (results.length > 0) return { provider: provider.id, providerLabel: provider.label, results, runs };
174
+ } catch (error) {
175
+ runs.push({ provider: provider.id, results: [], error: sanitizeError(error) });
176
+ }
177
+ }
178
+
179
+ return { provider: "none", providerLabel: "none", results: [], runs };
180
+ }
181
+
182
+ async function runCombined(
183
+ providerIds: string[],
184
+ query: string,
185
+ maxResults: number,
186
+ signal: AbortSignal | undefined,
187
+ config: ResolvedConfig,
188
+ ): Promise<{ provider: string; providerLabel: string; results: SearchResult[]; runs: ProviderRunResult[] }> {
189
+ const runs = await Promise.all(providerIds.map(async (providerId): Promise<ProviderRunResult> => {
190
+ const provider = getProvider(providerId as never);
191
+ if (!provider) return { provider: providerId, results: [], error: "unknown provider" };
192
+ try {
193
+ const providerConfig = config.providers[provider.id];
194
+ const credential = resolveProviderCredential(provider, providerConfig);
195
+ if (provider.requiresKey && !credential.value) return { provider: provider.id, results: [], error: credential.error ?? "missing API key" };
196
+ const results = await withTimeoutSignal(signal, 15_000, (combinedSignal) =>
197
+ provider.search({ query, maxResults, signal: combinedSignal, apiKey: credential.value })
198
+ );
199
+ return { provider: provider.id, results, keySource: credential.source };
200
+ } catch (error) {
201
+ return { provider: provider.id, results: [], error: sanitizeError(error) };
202
+ }
203
+ }));
204
+
205
+ return {
206
+ provider: "combined",
207
+ providerLabel: "combined",
208
+ results: combineResults(runs).slice(0, maxResults),
209
+ runs,
210
+ };
211
+ }
212
+
213
+ function combineResults(runs: ProviderRunResult[]): SearchResult[] {
214
+ const byUrl = new Map<string, { result: SearchResult; providers: Set<string>; score: number }>();
215
+ for (const run of runs) {
216
+ for (const result of run.results) {
217
+ const key = normalizeUrlForDedupe(result.url);
218
+ const contribution = 1 / (60 + Math.max(1, result.rank));
219
+ const existing = byUrl.get(key);
220
+ if (!existing) {
221
+ byUrl.set(key, { result, providers: new Set([run.provider]), score: contribution });
222
+ continue;
223
+ }
224
+ existing.providers.add(run.provider);
225
+ existing.score += contribution;
226
+ if ((result.snippet?.length ?? 0) > (existing.result.snippet?.length ?? 0)) existing.result = result;
227
+ }
228
+ }
229
+
230
+ return [...byUrl.values()]
231
+ .sort((a, b) => (b.score - a.score) || (b.providers.size - a.providers.size))
232
+ .map((entry, index) => ({
233
+ ...entry.result,
234
+ rank: index + 1,
235
+ provider: [...entry.providers][0] ?? entry.result.provider,
236
+ providers: [...entry.providers].sort(),
237
+ score: Number(entry.score.toFixed(6)),
238
+ }));
239
+ }
240
+
241
+ async function withTimeoutSignal<T>(
242
+ signal: AbortSignal | undefined,
243
+ timeoutMs: number,
244
+ fn: (signal: AbortSignal) => Promise<T>,
245
+ ): Promise<T> {
246
+ const timeoutSignal = AbortSignal.timeout(timeoutMs);
247
+ const combined = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
248
+ return await fn(combined);
249
+ }
package/src/types.ts ADDED
@@ -0,0 +1,65 @@
1
+ export type ProviderId = "auto" | "duckduckgo" | "marginalia" | "jina" | "brave";
2
+
3
+ export type SearchMode = "first_success" | "combine";
4
+
5
+ export interface SearchResult {
6
+ title: string;
7
+ url: string;
8
+ snippet?: string;
9
+ provider: string;
10
+ rank: number;
11
+ /** Providers that returned this URL in combine mode. */
12
+ providers?: string[];
13
+ /** Internal combine score, exposed in details for diagnostics. */
14
+ score?: number;
15
+ }
16
+
17
+ export interface SearchRequest {
18
+ query: string;
19
+ maxResults: number;
20
+ signal?: AbortSignal;
21
+ /** Resolved provider API key. Present only for keyed providers. */
22
+ apiKey?: string;
23
+ }
24
+
25
+ export interface ProviderRunResult {
26
+ provider: string;
27
+ results: SearchResult[];
28
+ error?: string;
29
+ keySource?: string;
30
+ }
31
+
32
+ export interface SearchProvider {
33
+ id: Exclude<ProviderId, "auto">;
34
+ label: string;
35
+ requiresKey: boolean;
36
+ defaultKeyEnv?: string;
37
+ search(request: SearchRequest): Promise<SearchResult[]>;
38
+ }
39
+
40
+ export interface ProviderConfig {
41
+ enabled?: boolean;
42
+ /** Environment variable name only. Literal keys and shell commands are intentionally unsupported. */
43
+ apiKeyEnv?: string;
44
+ }
45
+
46
+ export interface ResolvedProviderConfig {
47
+ enabled: boolean;
48
+ apiKeyEnv?: string;
49
+ }
50
+
51
+ export interface PiWebSearchConfig {
52
+ enabled?: boolean;
53
+ defaultProvider?: ProviderId;
54
+ fallbackChain?: Array<Exclude<ProviderId, "auto">>;
55
+ maxResults?: number;
56
+ providers?: Partial<Record<Exclude<ProviderId, "auto">, ProviderConfig>>;
57
+ }
58
+
59
+ export interface ResolvedConfig {
60
+ enabled: boolean;
61
+ defaultProvider: ProviderId;
62
+ fallbackChain: Array<Exclude<ProviderId, "auto">>;
63
+ maxResults: number;
64
+ providers: Record<Exclude<ProviderId, "auto">, ResolvedProviderConfig>;
65
+ }
@@ -0,0 +1,60 @@
1
+ const BLOCKED_HOSTNAMES = new Set([
2
+ "localhost",
3
+ "127.0.0.1",
4
+ "0.0.0.0",
5
+ "::1",
6
+ "[::1]",
7
+ "metadata.google.internal",
8
+ "metadata.azure.com",
9
+ "169.254.169.254",
10
+ "100.100.100.200",
11
+ ]);
12
+
13
+ export interface UrlValidationResult {
14
+ valid: boolean;
15
+ url?: URL;
16
+ reason?: string;
17
+ }
18
+
19
+ export function validatePublicHttpUrl(input: string): UrlValidationResult {
20
+ let parsed: URL;
21
+ try {
22
+ parsed = new URL(input);
23
+ } catch {
24
+ return { valid: false, reason: "invalid URL" };
25
+ }
26
+
27
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
28
+ return { valid: false, reason: `disallowed URL scheme: ${parsed.protocol}` };
29
+ }
30
+
31
+ const hostname = parsed.hostname.toLowerCase();
32
+ if (BLOCKED_HOSTNAMES.has(hostname)) return { valid: false, reason: `blocked hostname: ${hostname}` };
33
+ if (isBlockedIpv4(hostname)) return { valid: false, reason: `blocked IP address: ${hostname}` };
34
+ if (isBlockedIpv6(hostname)) return { valid: false, reason: `blocked IPv6 address: ${hostname}` };
35
+ if (/^\d+$/.test(hostname)) return { valid: false, reason: "numeric hostnames are not allowed" };
36
+
37
+ return { valid: true, url: parsed };
38
+ }
39
+
40
+ function isBlockedIpv4(hostname: string): boolean {
41
+ if (!/^(\d{1,3}\.){3}\d{1,3}$/.test(hostname)) return false;
42
+ const octets = hostname.split(".").map(Number);
43
+ if (octets.some((n) => !Number.isInteger(n) || n < 0 || n > 255)) return true;
44
+ const [a, b] = octets as [number, number, number, number];
45
+ return (
46
+ a === 0 ||
47
+ a === 10 ||
48
+ a === 127 ||
49
+ (a === 169 && b === 254) ||
50
+ (a === 172 && b >= 16 && b <= 31) ||
51
+ (a === 192 && b === 168) ||
52
+ (a === 100 && b >= 64 && b <= 127) ||
53
+ a >= 224
54
+ );
55
+ }
56
+
57
+ function isBlockedIpv6(hostname: string): boolean {
58
+ const host = hostname.replace(/^\[/, "").replace(/\]$/, "");
59
+ return host === "::1" || host.startsWith("fe80:") || host.startsWith("fc") || host.startsWith("fd");
60
+ }