pi-web-scout 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +126 -0
- package/SECURITY.md +49 -0
- package/index.ts +6 -0
- package/package.json +59 -0
- package/src/config.ts +112 -0
- package/src/credentials.ts +36 -0
- package/src/format.ts +36 -0
- package/src/providers/brave.ts +53 -0
- package/src/providers/duckduckgo.ts +68 -0
- package/src/providers/jina.ts +54 -0
- package/src/providers/marginalia.ts +51 -0
- package/src/providers/registry.ts +20 -0
- package/src/read.ts +102 -0
- package/src/readability.ts +58 -0
- package/src/safety.ts +47 -0
- package/src/tool.ts +249 -0
- package/src/types.ts +65 -0
- package/src/url-safety.ts +60 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 pi-web-scout contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# pi-web-scout
|
|
2
|
+
|
|
3
|
+
No-key web search extension for [Pi](https://pi.dev). It registers a `web_search` tool and starts with providers that do not require API keys.
|
|
4
|
+
|
|
5
|
+
## Goals
|
|
6
|
+
|
|
7
|
+
- No install lifecycle scripts.
|
|
8
|
+
- No native dependencies.
|
|
9
|
+
- No shell execution.
|
|
10
|
+
- No credential command execution.
|
|
11
|
+
- Provider architecture ready for future keyed APIs such as Brave, Serper, Tavily, Exa, etc.
|
|
12
|
+
|
|
13
|
+
## Try locally
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pi -e ./pi-web-scout
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Commands
|
|
20
|
+
|
|
21
|
+
```text
|
|
22
|
+
/web-scout-status
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Shows active config and provider status.
|
|
26
|
+
|
|
27
|
+
## Tool
|
|
28
|
+
|
|
29
|
+
`web_search`
|
|
30
|
+
|
|
31
|
+
```json
|
|
32
|
+
{
|
|
33
|
+
"query": "latest TypeScript release notes",
|
|
34
|
+
"max_results": 5,
|
|
35
|
+
"provider": "auto",
|
|
36
|
+
"mode": "first_success"
|
|
37
|
+
}
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
`web_read`
|
|
41
|
+
|
|
42
|
+
```json
|
|
43
|
+
{
|
|
44
|
+
"url": "https://example.com/docs",
|
|
45
|
+
"max_chars": 12000
|
|
46
|
+
}
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
`web_read` fetches public HTTP(S) pages, follows validated redirects, blocks localhost/private/metadata IPs, strips noisy HTML, and returns readable text. It does not run JavaScript or launch a browser.
|
|
50
|
+
|
|
51
|
+
Recommended model flow: call `web_search` first, choose relevant result URLs, then call `web_read` on the best sources instead of relying on snippets alone.
|
|
52
|
+
|
|
53
|
+
Current no-key providers:
|
|
54
|
+
|
|
55
|
+
- `duckduckgo` — DuckDuckGo HTML endpoint.
|
|
56
|
+
- `marginalia` — Marginalia Search public endpoint.
|
|
57
|
+
- `jina` — Jina Search endpoint without an API key.
|
|
58
|
+
|
|
59
|
+
Current keyed providers, disabled by default:
|
|
60
|
+
|
|
61
|
+
- `brave` — Brave Search API, env-only key resolution.
|
|
62
|
+
|
|
63
|
+
Planned keyed providers:
|
|
64
|
+
|
|
65
|
+
- Serper, Tavily, Exa, etc.
|
|
66
|
+
|
|
67
|
+
## Config
|
|
68
|
+
|
|
69
|
+
Optional project config:
|
|
70
|
+
|
|
71
|
+
`.pi/pi-web-scout.json`
|
|
72
|
+
|
|
73
|
+
```json
|
|
74
|
+
{
|
|
75
|
+
"enabled": true,
|
|
76
|
+
"defaultProvider": "auto",
|
|
77
|
+
"fallbackChain": ["duckduckgo", "marginalia", "jina"],
|
|
78
|
+
"maxResults": 5,
|
|
79
|
+
"providers": {
|
|
80
|
+
"duckduckgo": { "enabled": true },
|
|
81
|
+
"marginalia": { "enabled": true },
|
|
82
|
+
"jina": { "enabled": true },
|
|
83
|
+
"brave": { "enabled": false, "apiKeyEnv": "PI_WEB_SCOUT_BRAVE_API_KEY" }
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
The extension currently reads project config only and does not write config files. Invalid config fails fast with a clear error.
|
|
89
|
+
|
|
90
|
+
`mode: "combine"` queries all enabled providers in the fallback chain, deduplicates URLs, and ranks repeated results higher with a simple reciprocal-rank score.
|
|
91
|
+
|
|
92
|
+
## Security notes
|
|
93
|
+
|
|
94
|
+
This package intentionally avoids:
|
|
95
|
+
|
|
96
|
+
- `postinstall`, `preinstall`, `prepare`
|
|
97
|
+
- `child_process`
|
|
98
|
+
- `eval` / `new Function`
|
|
99
|
+
- shell-based credential resolution
|
|
100
|
+
- writes outside the project
|
|
101
|
+
- browser automation / JavaScript execution
|
|
102
|
+
|
|
103
|
+
Search queries are sent to the selected provider.
|
|
104
|
+
|
|
105
|
+
## Brave API key
|
|
106
|
+
|
|
107
|
+
Brave keys are available from the Brave Search API dashboard:
|
|
108
|
+
|
|
109
|
+
<https://api.search.brave.com/app/keys>
|
|
110
|
+
|
|
111
|
+
After creating a key, export it before starting Pi:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
export PI_WEB_SCOUT_BRAVE_API_KEY="..."
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Then enable Brave in `.pi/pi-web-scout.json`:
|
|
118
|
+
|
|
119
|
+
```json
|
|
120
|
+
{
|
|
121
|
+
"fallbackChain": ["brave", "duckduckgo", "marginalia", "jina"],
|
|
122
|
+
"providers": {
|
|
123
|
+
"brave": { "enabled": true, "apiKeyEnv": "PI_WEB_SCOUT_BRAVE_API_KEY" }
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
```
|
package/SECURITY.md
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
`pi-web-scout` is a Pi extension. Pi extensions run with the user's local permissions, so review changes before installing or publishing.
|
|
4
|
+
|
|
5
|
+
## Design constraints
|
|
6
|
+
|
|
7
|
+
This package intentionally avoids:
|
|
8
|
+
|
|
9
|
+
- npm lifecycle install scripts (`preinstall`, `install`, `postinstall`, `prepare`)
|
|
10
|
+
- shell execution (`child_process`, `exec`, `spawn`)
|
|
11
|
+
- `eval` / `new Function`
|
|
12
|
+
- native runtime dependencies
|
|
13
|
+
- browser automation / JavaScript execution
|
|
14
|
+
- credential commands such as `!pass show ...`
|
|
15
|
+
- reading SSH keys, cookies, keychains, `~/.config`, `~/.codex`, or `~/.pi` secrets
|
|
16
|
+
- writing config/cache/state files at runtime
|
|
17
|
+
|
|
18
|
+
## Network behavior
|
|
19
|
+
|
|
20
|
+
Tools send user-provided search/read requests to selected providers:
|
|
21
|
+
|
|
22
|
+
- DuckDuckGo HTML search
|
|
23
|
+
- Marginalia public search
|
|
24
|
+
- Jina search
|
|
25
|
+
- Brave Search API only when explicitly enabled and keyed via env var
|
|
26
|
+
- `web_read` fetches public HTTP(S) URLs supplied by the user/model
|
|
27
|
+
|
|
28
|
+
## Credentials
|
|
29
|
+
|
|
30
|
+
Keyed providers use environment variables only. Literal keys in config and shell commands are unsupported by design.
|
|
31
|
+
|
|
32
|
+
Current keyed env var:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
PI_WEB_SCOUT_BRAVE_API_KEY
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## SSRF notes
|
|
39
|
+
|
|
40
|
+
`web_read` blocks common local/private/metadata targets and validates every redirect hop. It does not perform custom DNS resolution, so DNS rebinding protection is best-effort.
|
|
41
|
+
|
|
42
|
+
## Reporting
|
|
43
|
+
|
|
44
|
+
Before reporting a vulnerability, please include:
|
|
45
|
+
|
|
46
|
+
- package version / commit
|
|
47
|
+
- minimal reproduction
|
|
48
|
+
- affected tool or provider
|
|
49
|
+
- expected vs actual behavior
|
package/index.ts
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pi-web-scout",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "No-key web search extension for Pi, with provider architecture ready for keyed search APIs.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./index.ts",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": {
|
|
9
|
+
"import": "./index.ts",
|
|
10
|
+
"types": "./index.ts"
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
"files": [
|
|
14
|
+
"index.ts",
|
|
15
|
+
"src",
|
|
16
|
+
"README.md",
|
|
17
|
+
"package.json",
|
|
18
|
+
"LICENSE",
|
|
19
|
+
"SECURITY.md"
|
|
20
|
+
],
|
|
21
|
+
"keywords": [
|
|
22
|
+
"pi-package",
|
|
23
|
+
"pi-extension",
|
|
24
|
+
"pi",
|
|
25
|
+
"web-search",
|
|
26
|
+
"search"
|
|
27
|
+
],
|
|
28
|
+
"license": "MIT",
|
|
29
|
+
"repository": {
|
|
30
|
+
"type": "git",
|
|
31
|
+
"url": "git+ssh://git@github.com/alcovegan/pi-web-scout.git"
|
|
32
|
+
},
|
|
33
|
+
"bugs": {
|
|
34
|
+
"url": "https://github.com/alcovegan/pi-web-scout/issues"
|
|
35
|
+
},
|
|
36
|
+
"homepage": "https://github.com/alcovegan/pi-web-scout#readme",
|
|
37
|
+
"pi": {
|
|
38
|
+
"extensions": [
|
|
39
|
+
"./index.ts"
|
|
40
|
+
]
|
|
41
|
+
},
|
|
42
|
+
"scripts": {
|
|
43
|
+
"check:safety": "node tests/safety.test.mjs",
|
|
44
|
+
"test": "node --experimental-strip-types --test tests/*.test.mjs"
|
|
45
|
+
},
|
|
46
|
+
"peerDependencies": {
|
|
47
|
+
"@earendil-works/pi-coding-agent": "*",
|
|
48
|
+
"@earendil-works/pi-ai": "*",
|
|
49
|
+
"typebox": "*"
|
|
50
|
+
},
|
|
51
|
+
"peerDependenciesMeta": {
|
|
52
|
+
"@earendil-works/pi-ai": {
|
|
53
|
+
"optional": true
|
|
54
|
+
},
|
|
55
|
+
"typebox": {
|
|
56
|
+
"optional": true
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
package/src/config.ts
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import type { PiWebSearchConfig, ProviderId, ResolvedConfig } from "./types.ts";
|
|
4
|
+
import { clampInt } from "./safety.ts";
|
|
5
|
+
|
|
6
|
+
const CONFIG_FILE = "pi-web-scout.json";
|
|
7
|
+
const PROVIDERS = ["duckduckgo", "marginalia", "jina", "brave"] as const;
|
|
8
|
+
|
|
9
|
+
const DEFAULT_CONFIG: ResolvedConfig = {
|
|
10
|
+
enabled: true,
|
|
11
|
+
defaultProvider: "auto",
|
|
12
|
+
fallbackChain: ["duckduckgo", "marginalia", "jina"],
|
|
13
|
+
maxResults: 5,
|
|
14
|
+
providers: {
|
|
15
|
+
duckduckgo: { enabled: true },
|
|
16
|
+
marginalia: { enabled: true },
|
|
17
|
+
jina: { enabled: true },
|
|
18
|
+
brave: { enabled: false, apiKeyEnv: "PI_WEB_SCOUT_BRAVE_API_KEY" },
|
|
19
|
+
},
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
23
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function isProviderId(value: unknown): value is ProviderId {
|
|
27
|
+
return value === "auto" || PROVIDERS.includes(value as never);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function isConcreteProvider(value: unknown): value is (typeof PROVIDERS)[number] {
|
|
31
|
+
return PROVIDERS.includes(value as never);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function getConfigPath(cwd: string): string {
|
|
35
|
+
return join(cwd, ".pi", CONFIG_FILE);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function loadConfig(cwd: string): ResolvedConfig {
|
|
39
|
+
const path = getConfigPath(cwd);
|
|
40
|
+
if (!existsSync(path)) return structuredClone(DEFAULT_CONFIG);
|
|
41
|
+
|
|
42
|
+
let parsed: unknown;
|
|
43
|
+
try {
|
|
44
|
+
parsed = JSON.parse(readFileSync(path, "utf-8"));
|
|
45
|
+
} catch (error) {
|
|
46
|
+
throw new Error(`Invalid ${path}: ${(error as Error).message}`);
|
|
47
|
+
}
|
|
48
|
+
if (!isRecord(parsed)) throw new Error(`Invalid ${path}: expected a JSON object`);
|
|
49
|
+
|
|
50
|
+
validateConfigShape(parsed, path);
|
|
51
|
+
const user = parsed as PiWebSearchConfig;
|
|
52
|
+
|
|
53
|
+
const resolved = structuredClone(DEFAULT_CONFIG);
|
|
54
|
+
if (user.enabled !== undefined) resolved.enabled = user.enabled;
|
|
55
|
+
if (user.defaultProvider !== undefined) resolved.defaultProvider = user.defaultProvider;
|
|
56
|
+
if (user.maxResults !== undefined) resolved.maxResults = clampInt(user.maxResults, 5, 1, 20);
|
|
57
|
+
|
|
58
|
+
if (user.fallbackChain !== undefined) {
|
|
59
|
+
const chain = user.fallbackChain.filter(isConcreteProvider);
|
|
60
|
+
resolved.fallbackChain = [...new Set(chain)];
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (user.providers) {
|
|
64
|
+
for (const provider of PROVIDERS) {
|
|
65
|
+
const providerConfig = user.providers[provider];
|
|
66
|
+
if (providerConfig?.enabled !== undefined) resolved.providers[provider].enabled = providerConfig.enabled;
|
|
67
|
+
if (providerConfig?.apiKeyEnv !== undefined) resolved.providers[provider].apiKeyEnv = providerConfig.apiKeyEnv;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
resolved.fallbackChain = resolved.fallbackChain.filter((p) => resolved.providers[p].enabled);
|
|
72
|
+
if (resolved.fallbackChain.length === 0) resolved.fallbackChain = ["duckduckgo"];
|
|
73
|
+
if (resolved.defaultProvider !== "auto" && !resolved.providers[resolved.defaultProvider].enabled) {
|
|
74
|
+
resolved.defaultProvider = "auto";
|
|
75
|
+
}
|
|
76
|
+
return resolved;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function validateConfigShape(config: Record<string, unknown>, path: string): void {
|
|
80
|
+
if (config.enabled !== undefined && typeof config.enabled !== "boolean") {
|
|
81
|
+
throw new Error(`Invalid ${path}: enabled must be boolean`);
|
|
82
|
+
}
|
|
83
|
+
if (config.defaultProvider !== undefined && !isProviderId(config.defaultProvider)) {
|
|
84
|
+
throw new Error(`Invalid ${path}: defaultProvider must be one of auto, ${PROVIDERS.join(", ")}`);
|
|
85
|
+
}
|
|
86
|
+
if (config.maxResults !== undefined) {
|
|
87
|
+
const n = Number(config.maxResults);
|
|
88
|
+
if (!Number.isFinite(n) || n < 1 || n > 20) {
|
|
89
|
+
throw new Error(`Invalid ${path}: maxResults must be a number from 1 to 20`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (config.fallbackChain !== undefined) {
|
|
93
|
+
if (!Array.isArray(config.fallbackChain)) throw new Error(`Invalid ${path}: fallbackChain must be an array`);
|
|
94
|
+
const invalid = config.fallbackChain.find((value) => !isConcreteProvider(value));
|
|
95
|
+
if (invalid !== undefined) throw new Error(`Invalid ${path}: unknown provider in fallbackChain: ${String(invalid)}`);
|
|
96
|
+
}
|
|
97
|
+
if (config.providers !== undefined) {
|
|
98
|
+
if (!isRecord(config.providers)) throw new Error(`Invalid ${path}: providers must be an object`);
|
|
99
|
+
for (const [name, value] of Object.entries(config.providers)) {
|
|
100
|
+
if (!isConcreteProvider(name)) throw new Error(`Invalid ${path}: unknown provider: ${name}`);
|
|
101
|
+
if (!isRecord(value)) throw new Error(`Invalid ${path}: providers.${name} must be an object`);
|
|
102
|
+
if (value.enabled !== undefined && typeof value.enabled !== "boolean") {
|
|
103
|
+
throw new Error(`Invalid ${path}: providers.${name}.enabled must be boolean`);
|
|
104
|
+
}
|
|
105
|
+
if (value.apiKeyEnv !== undefined) {
|
|
106
|
+
if (typeof value.apiKeyEnv !== "string" || !/^[A-Z_][A-Z0-9_]*$/.test(value.apiKeyEnv)) {
|
|
107
|
+
throw new Error(`Invalid ${path}: providers.${name}.apiKeyEnv must be an ALL_CAPS environment variable name`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type { ResolvedProviderConfig, SearchProvider } from "./types.ts";
|
|
2
|
+
|
|
3
|
+
const ENV_NAME_PATTERN = /^[A-Z_][A-Z0-9_]*$/;
|
|
4
|
+
|
|
5
|
+
export interface ResolvedCredential {
|
|
6
|
+
value?: string;
|
|
7
|
+
source?: string;
|
|
8
|
+
error?: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function validateEnvName(name: string): boolean {
|
|
12
|
+
return ENV_NAME_PATTERN.test(name);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Resolve provider credentials from environment variables only.
|
|
17
|
+
*
|
|
18
|
+
* Deliberately unsupported:
|
|
19
|
+
* - literal keys in config
|
|
20
|
+
* - shell commands like !pass show ...
|
|
21
|
+
* - keychain/cookie/SSH/config-file discovery
|
|
22
|
+
*/
|
|
23
|
+
export function resolveProviderCredential(
|
|
24
|
+
provider: SearchProvider,
|
|
25
|
+
config: ResolvedProviderConfig,
|
|
26
|
+
): ResolvedCredential {
|
|
27
|
+
if (!provider.requiresKey) return {};
|
|
28
|
+
|
|
29
|
+
const envName = config.apiKeyEnv ?? provider.defaultKeyEnv;
|
|
30
|
+
if (!envName) return { error: `${provider.id} requires an API key env var` };
|
|
31
|
+
if (!validateEnvName(envName)) return { error: `${provider.id} apiKeyEnv is not a valid environment variable name: ${envName}` };
|
|
32
|
+
|
|
33
|
+
const value = process.env[envName]?.trim();
|
|
34
|
+
if (!value) return { error: `${provider.id} requires ${envName} to be set` };
|
|
35
|
+
return { value, source: `env:${envName}` };
|
|
36
|
+
}
|
package/src/format.ts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type { ProviderRunResult, SearchResult } from "./types.ts";
|
|
2
|
+
|
|
3
|
+
export function formatSearchResults(
|
|
4
|
+
query: string,
|
|
5
|
+
results: SearchResult[],
|
|
6
|
+
providerLabel: string,
|
|
7
|
+
runs: ProviderRunResult[] = [],
|
|
8
|
+
): string {
|
|
9
|
+
const safeQuery = query.replace(/[\r\n]+/g, " ").trim();
|
|
10
|
+
const lines = [`## Web search: ${safeQuery}`, `Provider: ${providerLabel}`, `Results: ${results.length}`];
|
|
11
|
+
|
|
12
|
+
if (runs.length > 0) {
|
|
13
|
+
lines.push("", "Provider runs:");
|
|
14
|
+
for (const run of runs) {
|
|
15
|
+
const status = run.error ? `failed: ${run.error}` : `${run.results.length} result${run.results.length === 1 ? "" : "s"}`;
|
|
16
|
+
lines.push(`- ${run.provider}: ${status}`);
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
lines.push("");
|
|
20
|
+
|
|
21
|
+
for (const [index, result] of results.entries()) {
|
|
22
|
+
lines.push(`### ${index + 1}. ${result.title || "Untitled"}`);
|
|
23
|
+
lines.push(result.url);
|
|
24
|
+
if (result.providers && result.providers.length > 1) lines.push(`Sources: ${result.providers.join(", ")}`);
|
|
25
|
+
else lines.push(`Source: ${result.provider}`);
|
|
26
|
+
if (result.snippet) lines.push(result.snippet);
|
|
27
|
+
lines.push("");
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return lines.join("\n").trimEnd();
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function formatProviderErrors(errors: ProviderRunResult[]): string {
|
|
34
|
+
if (errors.length === 0) return "";
|
|
35
|
+
return errors.map((e) => `${e.provider}: ${e.error ?? "unknown error"}`).join("; ");
|
|
36
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import type { SearchProvider, SearchRequest, SearchResult } from "../types.ts";
|
|
2
|
+
|
|
3
|
+
const ENDPOINT = "https://api.search.brave.com/res/v1/web/search";
|
|
4
|
+
const USER_AGENT = "pi-web-scout/0.1 (+https://pi.dev)";
|
|
5
|
+
|
|
6
|
+
interface BraveItem {
|
|
7
|
+
title?: unknown;
|
|
8
|
+
url?: unknown;
|
|
9
|
+
description?: unknown;
|
|
10
|
+
age?: unknown;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export const braveProvider: SearchProvider = {
|
|
14
|
+
id: "brave",
|
|
15
|
+
label: "Brave Search API",
|
|
16
|
+
requiresKey: true,
|
|
17
|
+
defaultKeyEnv: "PI_WEB_SCOUT_BRAVE_API_KEY",
|
|
18
|
+
async search(request: SearchRequest): Promise<SearchResult[]> {
|
|
19
|
+
if (!request.apiKey) throw new Error("Brave API key is missing");
|
|
20
|
+
|
|
21
|
+
const url = new URL(ENDPOINT);
|
|
22
|
+
url.searchParams.set("q", request.query);
|
|
23
|
+
url.searchParams.set("count", String(Math.min(request.maxResults, 20)));
|
|
24
|
+
|
|
25
|
+
const response = await fetch(url.toString(), {
|
|
26
|
+
headers: {
|
|
27
|
+
"Accept": "application/json",
|
|
28
|
+
"Accept-Encoding": "gzip",
|
|
29
|
+
"User-Agent": USER_AGENT,
|
|
30
|
+
"X-Subscription-Token": request.apiKey,
|
|
31
|
+
},
|
|
32
|
+
signal: request.signal,
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
if (!response.ok) throw new Error(`Brave returned HTTP ${response.status}`);
|
|
36
|
+
|
|
37
|
+
const data = await response.json() as { web?: { results?: BraveItem[] } };
|
|
38
|
+
return (Array.isArray(data.web?.results) ? data.web.results : [])
|
|
39
|
+
.slice(0, request.maxResults)
|
|
40
|
+
.map((item, index) => ({
|
|
41
|
+
title: stringValue(item.title),
|
|
42
|
+
url: stringValue(item.url),
|
|
43
|
+
snippet: stringValue(item.description),
|
|
44
|
+
provider: "brave",
|
|
45
|
+
rank: index + 1,
|
|
46
|
+
}))
|
|
47
|
+
.filter((result) => result.title.length > 0 && result.url.length > 0);
|
|
48
|
+
},
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
function stringValue(value: unknown): string {
|
|
52
|
+
return typeof value === "string" ? value.trim() : "";
|
|
53
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import type { SearchProvider, SearchRequest, SearchResult } from "../types.ts";
|
|
2
|
+
import { stripTags } from "../safety.ts";
|
|
3
|
+
|
|
4
|
+
const ENDPOINT = "https://html.duckduckgo.com/html/";
|
|
5
|
+
const USER_AGENT = "pi-web-scout/0.1 (+https://pi.dev)";
|
|
6
|
+
|
|
7
|
+
export const duckDuckGoProvider: SearchProvider = {
|
|
8
|
+
id: "duckduckgo",
|
|
9
|
+
label: "DuckDuckGo HTML",
|
|
10
|
+
requiresKey: false,
|
|
11
|
+
async search(request: SearchRequest): Promise<SearchResult[]> {
|
|
12
|
+
const params = new URLSearchParams({ q: request.query, no_redirect: "1" });
|
|
13
|
+
const response = await fetch(`${ENDPOINT}?${params.toString()}`, {
|
|
14
|
+
headers: {
|
|
15
|
+
"Accept": "text/html,application/xhtml+xml",
|
|
16
|
+
"User-Agent": USER_AGENT,
|
|
17
|
+
},
|
|
18
|
+
signal: request.signal,
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
if (!response.ok) {
|
|
22
|
+
throw new Error(`DuckDuckGo returned HTTP ${response.status}`);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
return parseDuckDuckGoHtml(await response.text(), request.maxResults);
|
|
26
|
+
},
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export function parseDuckDuckGoHtml(html: string, maxResults: number): SearchResult[] {
|
|
30
|
+
const results: SearchResult[] = [];
|
|
31
|
+
const resultBlockRegex = /<div[^>]+class="[^"]*result[^"]*"[\s\S]*?(?=<div[^>]+class="[^"]*result[^"]*"|<\/body>|$)/gi;
|
|
32
|
+
let blockMatch: RegExpExecArray | null;
|
|
33
|
+
|
|
34
|
+
while ((blockMatch = resultBlockRegex.exec(html)) !== null && results.length < maxResults) {
|
|
35
|
+
const block = blockMatch[0];
|
|
36
|
+
const link = block.match(/<a[^>]+class="[^"]*result__a[^"]*"[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i);
|
|
37
|
+
if (!link) continue;
|
|
38
|
+
|
|
39
|
+
const rawUrl = decodeDuckDuckGoUrl(link[1] ?? "");
|
|
40
|
+
const title = stripTags(link[2] ?? "");
|
|
41
|
+
if (!rawUrl || !title) continue;
|
|
42
|
+
|
|
43
|
+
const snippetMatch = block.match(/<a[^>]+class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/a>/i)
|
|
44
|
+
?? block.match(/<td[^>]+class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/td>/i);
|
|
45
|
+
const snippet = snippetMatch ? stripTags(snippetMatch[1] ?? "") : undefined;
|
|
46
|
+
|
|
47
|
+
results.push({
|
|
48
|
+
title,
|
|
49
|
+
url: rawUrl,
|
|
50
|
+
snippet,
|
|
51
|
+
provider: "duckduckgo",
|
|
52
|
+
rank: results.length + 1,
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return results;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function decodeDuckDuckGoUrl(url: string): string {
|
|
60
|
+
const decoded = stripTags(url);
|
|
61
|
+
try {
|
|
62
|
+
const parsed = new URL(decoded, "https://duckduckgo.com");
|
|
63
|
+
const uddg = parsed.searchParams.get("uddg");
|
|
64
|
+
return uddg ? decodeURIComponent(uddg) : parsed.toString();
|
|
65
|
+
} catch {
|
|
66
|
+
return decoded;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import type { SearchProvider, SearchRequest, SearchResult } from "../types.ts";
|
|
2
|
+
|
|
3
|
+
const ENDPOINT = "https://s.jina.ai/";
|
|
4
|
+
const USER_AGENT = "pi-web-scout/0.1 (+https://pi.dev)";
|
|
5
|
+
|
|
6
|
+
interface JinaItem {
|
|
7
|
+
title?: unknown;
|
|
8
|
+
url?: unknown;
|
|
9
|
+
content?: unknown;
|
|
10
|
+
description?: unknown;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export const jinaProvider: SearchProvider = {
|
|
14
|
+
id: "jina",
|
|
15
|
+
label: "Jina Search",
|
|
16
|
+
requiresKey: false,
|
|
17
|
+
async search(request: SearchRequest): Promise<SearchResult[]> {
|
|
18
|
+
const url = new URL(ENDPOINT);
|
|
19
|
+
url.searchParams.set("q", request.query);
|
|
20
|
+
url.searchParams.set("format", "json");
|
|
21
|
+
|
|
22
|
+
const response = await fetch(url.toString(), {
|
|
23
|
+
headers: {
|
|
24
|
+
"Accept": "application/json",
|
|
25
|
+
"User-Agent": USER_AGENT,
|
|
26
|
+
},
|
|
27
|
+
signal: request.signal,
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
if (!response.ok) {
|
|
31
|
+
throw new Error(`Jina returned HTTP ${response.status}`);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return parseJinaResponse(await response.json(), request.maxResults);
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
export function parseJinaResponse(data: unknown, maxResults: number): SearchResult[] {
|
|
39
|
+
const payload = data as { data?: JinaItem[] };
|
|
40
|
+
return (Array.isArray(payload.data) ? payload.data : [])
|
|
41
|
+
.slice(0, maxResults)
|
|
42
|
+
.map((item, index) => ({
|
|
43
|
+
title: stringValue(item.title) || stringValue(item.url),
|
|
44
|
+
url: stringValue(item.url),
|
|
45
|
+
snippet: (stringValue(item.content) || stringValue(item.description)).slice(0, 700),
|
|
46
|
+
provider: "jina",
|
|
47
|
+
rank: index + 1,
|
|
48
|
+
}))
|
|
49
|
+
.filter((result) => result.title.length > 0 && result.url.length > 0);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function stringValue(value: unknown): string {
|
|
53
|
+
return typeof value === "string" ? value.trim() : "";
|
|
54
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { SearchProvider, SearchRequest, SearchResult } from "../types.ts";
|
|
2
|
+
|
|
3
|
+
const BASE_URL = "https://api.marginalia.nu/public/search";
|
|
4
|
+
const USER_AGENT = "pi-web-scout/0.1 (+https://pi.dev)";
|
|
5
|
+
|
|
6
|
+
interface MarginaliaItem {
|
|
7
|
+
title?: unknown;
|
|
8
|
+
url?: unknown;
|
|
9
|
+
description?: unknown;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export const marginaliaProvider: SearchProvider = {
|
|
13
|
+
id: "marginalia",
|
|
14
|
+
label: "Marginalia Search",
|
|
15
|
+
requiresKey: false,
|
|
16
|
+
async search(request: SearchRequest): Promise<SearchResult[]> {
|
|
17
|
+
const count = Math.min(request.maxResults, 20);
|
|
18
|
+
const endpoint = `${BASE_URL}/${encodeURIComponent(request.query)}?index=0&count=${count}`;
|
|
19
|
+
const response = await fetch(endpoint, {
|
|
20
|
+
headers: {
|
|
21
|
+
"Accept": "application/json",
|
|
22
|
+
"User-Agent": USER_AGENT,
|
|
23
|
+
},
|
|
24
|
+
signal: request.signal,
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
if (!response.ok) {
|
|
28
|
+
throw new Error(`Marginalia returned HTTP ${response.status}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return parseMarginaliaResponse(await response.json(), request.maxResults);
|
|
32
|
+
},
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
export function parseMarginaliaResponse(data: unknown, maxResults: number): SearchResult[] {
|
|
36
|
+
const payload = data as { results?: MarginaliaItem[] };
|
|
37
|
+
return (Array.isArray(payload.results) ? payload.results : [])
|
|
38
|
+
.slice(0, maxResults)
|
|
39
|
+
.map((item, index) => ({
|
|
40
|
+
title: stringValue(item.title),
|
|
41
|
+
url: stringValue(item.url),
|
|
42
|
+
snippet: stringValue(item.description),
|
|
43
|
+
provider: "marginalia",
|
|
44
|
+
rank: index + 1,
|
|
45
|
+
}))
|
|
46
|
+
.filter((result) => result.title.length > 0 && result.url.length > 0);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function stringValue(value: unknown): string {
|
|
50
|
+
return typeof value === "string" ? value.trim() : "";
|
|
51
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { ProviderId, SearchProvider } from "../types.ts";
|
|
2
|
+
import { braveProvider } from "./brave.ts";
|
|
3
|
+
import { duckDuckGoProvider } from "./duckduckgo.ts";
|
|
4
|
+
import { jinaProvider } from "./jina.ts";
|
|
5
|
+
import { marginaliaProvider } from "./marginalia.ts";
|
|
6
|
+
|
|
7
|
+
const providers = new Map<string, SearchProvider>([
|
|
8
|
+
[duckDuckGoProvider.id, duckDuckGoProvider],
|
|
9
|
+
[marginaliaProvider.id, marginaliaProvider],
|
|
10
|
+
[jinaProvider.id, jinaProvider],
|
|
11
|
+
[braveProvider.id, braveProvider],
|
|
12
|
+
]);
|
|
13
|
+
|
|
14
|
+
export function getProvider(id: Exclude<ProviderId, "auto">): SearchProvider | undefined {
|
|
15
|
+
return providers.get(id);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function listProviders(): SearchProvider[] {
|
|
19
|
+
return [...providers.values()];
|
|
20
|
+
}
|
package/src/read.ts
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { clampInt, sanitizeError } from "./safety.ts";
|
|
2
|
+
import { validatePublicHttpUrl } from "./url-safety.ts";
|
|
3
|
+
import { extractReadableText } from "./readability.ts";
|
|
4
|
+
|
|
5
|
+
const MAX_REDIRECTS = 8;
|
|
6
|
+
const MAX_BYTES = 1_000_000;
|
|
7
|
+
const DEFAULT_TIMEOUT_MS = 15_000;
|
|
8
|
+
const USER_AGENT = "pi-web-scout/0.1 (+https://pi.dev)";
|
|
9
|
+
|
|
10
|
+
export interface WebReadOptions {
|
|
11
|
+
url: string;
|
|
12
|
+
maxChars?: number;
|
|
13
|
+
signal?: AbortSignal;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface WebReadResult {
|
|
17
|
+
requestedUrl: string;
|
|
18
|
+
finalUrl: string;
|
|
19
|
+
title?: string;
|
|
20
|
+
contentType: string;
|
|
21
|
+
status: number;
|
|
22
|
+
text: string;
|
|
23
|
+
truncated: boolean;
|
|
24
|
+
bytesRead: number;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export async function readWebPage(options: WebReadOptions): Promise<WebReadResult> {
|
|
28
|
+
const maxChars = clampInt(options.maxChars, 12_000, 1_000, 50_000);
|
|
29
|
+
return await fetchWithRedirects(options.url, maxChars, options.signal, 0, new Set());
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function fetchWithRedirects(
|
|
33
|
+
url: string,
|
|
34
|
+
maxChars: number,
|
|
35
|
+
signal: AbortSignal | undefined,
|
|
36
|
+
depth: number,
|
|
37
|
+
seen: Set<string>,
|
|
38
|
+
): Promise<WebReadResult> {
|
|
39
|
+
const validation = validatePublicHttpUrl(url);
|
|
40
|
+
if (!validation.valid || !validation.url) throw new Error(`URL blocked: ${validation.reason ?? "invalid URL"}`);
|
|
41
|
+
const normalized = validation.url.toString();
|
|
42
|
+
if (seen.has(normalized)) throw new Error("redirect loop detected");
|
|
43
|
+
if (depth > MAX_REDIRECTS) throw new Error(`too many redirects (>${MAX_REDIRECTS})`);
|
|
44
|
+
seen.add(normalized);
|
|
45
|
+
|
|
46
|
+
const timeout = AbortSignal.timeout(DEFAULT_TIMEOUT_MS);
|
|
47
|
+
const combinedSignal = signal ? AbortSignal.any([signal, timeout]) : timeout;
|
|
48
|
+
|
|
49
|
+
const response = await fetch(normalized, {
|
|
50
|
+
redirect: "manual",
|
|
51
|
+
signal: combinedSignal,
|
|
52
|
+
headers: {
|
|
53
|
+
"Accept": "text/html,application/xhtml+xml,text/plain,application/json;q=0.8,*/*;q=0.1",
|
|
54
|
+
"Accept-Encoding": "gzip, deflate",
|
|
55
|
+
"User-Agent": USER_AGENT,
|
|
56
|
+
},
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
if (response.status >= 300 && response.status < 400) {
|
|
60
|
+
const location = response.headers.get("location");
|
|
61
|
+
if (!location) throw new Error(`redirect without location: HTTP ${response.status}`);
|
|
62
|
+
return await fetchWithRedirects(new URL(location, normalized).toString(), maxChars, signal, depth + 1, seen);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (!response.ok) throw new Error(`fetch failed: HTTP ${response.status}`);
|
|
66
|
+
|
|
67
|
+
const contentType = response.headers.get("content-type") ?? "text/plain";
|
|
68
|
+
const { text: raw, bytesRead } = await readLimitedBody(response);
|
|
69
|
+
const extracted = extractReadableText(raw, contentType);
|
|
70
|
+
const truncated = extracted.text.length > maxChars;
|
|
71
|
+
const text = truncated ? `${extracted.text.slice(0, maxChars)}\n\n[... truncated at ${maxChars} chars ...]` : extracted.text;
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
requestedUrl: url,
|
|
75
|
+
finalUrl: response.url || normalized,
|
|
76
|
+
title: extracted.title,
|
|
77
|
+
contentType,
|
|
78
|
+
status: response.status,
|
|
79
|
+
text,
|
|
80
|
+
truncated,
|
|
81
|
+
bytesRead,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async function readLimitedBody(response: Response): Promise<{ text: string; bytesRead: number }> {
|
|
86
|
+
const reader = response.body?.getReader();
|
|
87
|
+
if (!reader) return { text: await response.text().catch((error) => sanitizeError(error)), bytesRead: 0 };
|
|
88
|
+
|
|
89
|
+
const decoder = new TextDecoder();
|
|
90
|
+
let text = "";
|
|
91
|
+
let bytesRead = 0;
|
|
92
|
+
while (bytesRead < MAX_BYTES) {
|
|
93
|
+
const { done, value } = await reader.read();
|
|
94
|
+
if (done) break;
|
|
95
|
+
bytesRead += value.byteLength;
|
|
96
|
+
text += decoder.decode(value, { stream: true });
|
|
97
|
+
}
|
|
98
|
+
await reader.cancel().catch(() => undefined);
|
|
99
|
+
text += decoder.decode();
|
|
100
|
+
if (bytesRead >= MAX_BYTES) text += "\n[... response body truncated at 1MB ...]";
|
|
101
|
+
return { text, bytesRead };
|
|
102
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { decodeHtml, stripTags } from "./safety.ts";
|
|
2
|
+
|
|
3
|
+
export interface ExtractedPage {
|
|
4
|
+
title?: string;
|
|
5
|
+
text: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export function extractReadableText(content: string, contentType: string): ExtractedPage {
|
|
9
|
+
if (!/html|xml/i.test(contentType)) {
|
|
10
|
+
return { text: normalizeWhitespace(content) };
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
const title = extractTitle(content);
|
|
14
|
+
let html = content
|
|
15
|
+
.replace(/<script\b[\s\S]*?<\/script>/gi, " ")
|
|
16
|
+
.replace(/<style\b[\s\S]*?<\/style>/gi, " ")
|
|
17
|
+
.replace(/<noscript\b[\s\S]*?<\/noscript>/gi, " ")
|
|
18
|
+
.replace(/<svg\b[\s\S]*?<\/svg>/gi, " ")
|
|
19
|
+
.replace(/<nav\b[\s\S]*?<\/nav>/gi, " ")
|
|
20
|
+
.replace(/<footer\b[\s\S]*?<\/footer>/gi, " ")
|
|
21
|
+
.replace(/<header\b[\s\S]*?<\/header>/gi, " ")
|
|
22
|
+
.replace(/<(br|p|div|section|article|li|tr|h[1-6])\b[^>]*>/gi, "\n")
|
|
23
|
+
.replace(/<\/((p|div|section|article|li|tr|h[1-6]))>/gi, "\n");
|
|
24
|
+
|
|
25
|
+
const main = pickMainContent(html);
|
|
26
|
+
return { title, text: normalizeWhitespace(stripTags(main)) };
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function extractTitle(html: string): string | undefined {
|
|
30
|
+
const match = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
|
31
|
+
const title = match ? normalizeWhitespace(decodeHtml(stripTags(match[1] ?? ""))) : "";
|
|
32
|
+
return title || undefined;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function pickMainContent(html: string): string {
|
|
36
|
+
const candidates = [
|
|
37
|
+
/<main\b[^>]*>([\s\S]*?)<\/main>/i,
|
|
38
|
+
/<article\b[^>]*>([\s\S]*?)<\/article>/i,
|
|
39
|
+
/<body\b[^>]*>([\s\S]*?)<\/body>/i,
|
|
40
|
+
];
|
|
41
|
+
for (const pattern of candidates) {
|
|
42
|
+
const match = html.match(pattern);
|
|
43
|
+
if (match?.[1] && stripTags(match[1]).length > 200) return match[1];
|
|
44
|
+
}
|
|
45
|
+
return html;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function normalizeWhitespace(text: string): string {
|
|
49
|
+
return text
|
|
50
|
+
.replace(/\r/g, "")
|
|
51
|
+
.replace(/[ \t]+/g, " ")
|
|
52
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
53
|
+
.split("\n")
|
|
54
|
+
.map((line) => line.trim())
|
|
55
|
+
.filter((line, index, lines) => line || lines[index - 1])
|
|
56
|
+
.join("\n")
|
|
57
|
+
.trim();
|
|
58
|
+
}
|
package/src/safety.ts
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
const SECRET_PATTERNS: RegExp[] = [
|
|
2
|
+
/(authorization|x-api-key|api[-_]?key|token|secret|password)["']?\s*[:=]\s*["']?[^\s"']{8,}/gi,
|
|
3
|
+
/(bearer|token)\s+[a-z0-9._\/-]{8,}/gi,
|
|
4
|
+
];
|
|
5
|
+
|
|
6
|
+
export function sanitizeError(error: unknown): string {
|
|
7
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
8
|
+
let safe = message.slice(0, 500);
|
|
9
|
+
for (const pattern of SECRET_PATTERNS) safe = safe.replace(pattern, "$1 [redacted]");
|
|
10
|
+
return safe;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function clampInt(value: unknown, fallback: number, min: number, max: number): number {
|
|
14
|
+
const n = typeof value === "number" ? value : Number(value);
|
|
15
|
+
if (!Number.isFinite(n)) return fallback;
|
|
16
|
+
return Math.max(min, Math.min(max, Math.floor(n)));
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function normalizeUrlForDedupe(url: string): string {
|
|
20
|
+
try {
|
|
21
|
+
const parsed = new URL(url);
|
|
22
|
+
parsed.hash = "";
|
|
23
|
+
if (parsed.pathname.endsWith("/") && parsed.pathname.length > 1) {
|
|
24
|
+
parsed.pathname = parsed.pathname.replace(/\/+$/, "");
|
|
25
|
+
}
|
|
26
|
+
return parsed.toString().toLowerCase();
|
|
27
|
+
} catch {
|
|
28
|
+
return url.trim().toLowerCase();
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function decodeHtml(value: string): string {
|
|
33
|
+
return value
|
|
34
|
+
.replace(/&/g, "&")
|
|
35
|
+
.replace(/</g, "<")
|
|
36
|
+
.replace(/>/g, ">")
|
|
37
|
+
.replace(/"/g, '"')
|
|
38
|
+
.replace(/'/g, "'")
|
|
39
|
+
.replace(/'/g, "'")
|
|
40
|
+
.replace(///g, "/")
|
|
41
|
+
.replace(/&#(\d+);/g, (_m, code) => String.fromCharCode(Number(code)))
|
|
42
|
+
.replace(/&#x([0-9a-f]+);/gi, (_m, code) => String.fromCharCode(parseInt(code, 16)));
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function stripTags(html: string): string {
|
|
46
|
+
return decodeHtml(html.replace(/<[^>]*>/g, " ")).replace(/\s+/g, " ").trim();
|
|
47
|
+
}
|
package/src/tool.ts
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { StringEnum } from "@earendil-works/pi-ai";
|
|
3
|
+
import { Type } from "typebox";
|
|
4
|
+
import { getConfigPath, loadConfig } from "./config.ts";
|
|
5
|
+
import { resolveProviderCredential } from "./credentials.ts";
|
|
6
|
+
import { formatProviderErrors, formatSearchResults } from "./format.ts";
|
|
7
|
+
import { getProvider, listProviders } from "./providers/registry.ts";
|
|
8
|
+
import { readWebPage } from "./read.ts";
|
|
9
|
+
import { clampInt, normalizeUrlForDedupe, sanitizeError } from "./safety.ts";
|
|
10
|
+
import type { ProviderId, ProviderRunResult, ResolvedConfig, SearchMode, SearchResult } from "./types.ts";
|
|
11
|
+
|
|
12
|
+
const PROVIDERS = ["auto", "duckduckgo", "marginalia", "jina", "brave"] as const;
|
|
13
|
+
const MODES = ["first_success", "combine"] as const;
|
|
14
|
+
|
|
15
|
+
export function registerWebSearchTool(pi: ExtensionAPI): void {
|
|
16
|
+
registerStatusCommand(pi);
|
|
17
|
+
|
|
18
|
+
pi.registerTool({
|
|
19
|
+
name: "web_search",
|
|
20
|
+
label: "Web Scout Search",
|
|
21
|
+
description:
|
|
22
|
+
"Search the web using no-key search providers. Defaults to DuckDuckGo HTML and is architected for future keyed providers.",
|
|
23
|
+
promptSnippet: "web_search: search the public web without requiring API keys.",
|
|
24
|
+
promptGuidelines: [
|
|
25
|
+
"Use web_search when current or source-backed information is needed.",
|
|
26
|
+
"Prefer web_search over guessing facts that may have changed recently.",
|
|
27
|
+
"Use web_search with mode=combine when broad coverage matters more than speed.",
|
|
28
|
+
],
|
|
29
|
+
parameters: Type.Object({
|
|
30
|
+
query: Type.String({ description: "Search query" }),
|
|
31
|
+
max_results: Type.Optional(Type.Number({ description: "Number of results, 1-20. Default comes from config or 5." })),
|
|
32
|
+
provider: Type.Optional(StringEnum(PROVIDERS, { description: "Search provider. Use auto unless a provider is requested." })),
|
|
33
|
+
mode: Type.Optional(StringEnum(MODES, { description: "first_success uses fallback order; combine merges enabled providers." })),
|
|
34
|
+
}),
|
|
35
|
+
async execute(_toolCallId, params, signal, _onUpdate, ctx) {
|
|
36
|
+
const config = loadConfig(ctx.cwd);
|
|
37
|
+
if (!config.enabled) throw new Error("pi-web-scout is disabled in .pi/pi-web-scout.json");
|
|
38
|
+
|
|
39
|
+
const query = String(params.query ?? "").trim();
|
|
40
|
+
if (!query) throw new Error("query must be non-empty");
|
|
41
|
+
|
|
42
|
+
const maxResults = clampInt(params.max_results, config.maxResults, 1, 20);
|
|
43
|
+
const provider = (params.provider ?? config.defaultProvider) as ProviderId;
|
|
44
|
+
const mode = (params.mode ?? "first_success") as SearchMode;
|
|
45
|
+
if (provider !== "auto" && !config.providers[provider].enabled) {
|
|
46
|
+
throw new Error(`Provider ${provider} is disabled in .pi/pi-web-scout.json`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const providerIds = provider === "auto" ? config.fallbackChain : [provider];
|
|
50
|
+
const runResult = mode === "combine" && provider === "auto"
|
|
51
|
+
? await runCombined(providerIds, query, maxResults, signal, config)
|
|
52
|
+
: await runFirstSuccess(providerIds, query, maxResults, signal, config);
|
|
53
|
+
|
|
54
|
+
if (runResult.results.length === 0) {
|
|
55
|
+
const errors = formatProviderErrors(runResult.runs.filter((r) => r.error));
|
|
56
|
+
throw new Error(errors ? `No web search results. Provider errors: ${errors}` : "No web search results.");
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return {
|
|
60
|
+
content: [{ type: "text", text: formatSearchResults(query, runResult.results, runResult.providerLabel, runResult.runs) }],
|
|
61
|
+
details: {
|
|
62
|
+
query,
|
|
63
|
+
provider: runResult.provider,
|
|
64
|
+
providerLabel: runResult.providerLabel,
|
|
65
|
+
mode,
|
|
66
|
+
resultCount: runResult.results.length,
|
|
67
|
+
runs: runResult.runs.map((run) => ({
|
|
68
|
+
provider: run.provider,
|
|
69
|
+
resultCount: run.results.length,
|
|
70
|
+
error: run.error,
|
|
71
|
+
})),
|
|
72
|
+
results: runResult.results,
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
},
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
pi.registerTool({
|
|
79
|
+
name: "web_read",
|
|
80
|
+
label: "Web Read",
|
|
81
|
+
description: "Fetch a public HTTP(S) URL and extract readable text with SSRF protection. No browser or JavaScript execution.",
|
|
82
|
+
promptSnippet: "web_read: read and extract text from a specific public URL.",
|
|
83
|
+
promptGuidelines: [
|
|
84
|
+
"Use web_read after web_search when snippets are insufficient and a source needs to be read.",
|
|
85
|
+
"Do not use web_read for localhost, private network, or metadata URLs; those are blocked.",
|
|
86
|
+
],
|
|
87
|
+
parameters: Type.Object({
|
|
88
|
+
url: Type.String({ description: "Public HTTP(S) URL to read" }),
|
|
89
|
+
max_chars: Type.Optional(Type.Number({ description: "Maximum extracted characters, 1000-50000. Default 12000." })),
|
|
90
|
+
}),
|
|
91
|
+
async execute(_toolCallId, params, signal) {
|
|
92
|
+
const url = String(params.url ?? "").trim();
|
|
93
|
+
if (!url) throw new Error("url must be non-empty");
|
|
94
|
+
const result = await readWebPage({ url, maxChars: params.max_chars, signal });
|
|
95
|
+
const header = [
|
|
96
|
+
`# ${result.title || "Web page"}`,
|
|
97
|
+
`URL: ${result.finalUrl}`,
|
|
98
|
+
`Status: HTTP ${result.status}`,
|
|
99
|
+
`Content-Type: ${result.contentType}`,
|
|
100
|
+
result.truncated ? "Truncated: yes" : "Truncated: no",
|
|
101
|
+
"",
|
|
102
|
+
].join("\n");
|
|
103
|
+
return {
|
|
104
|
+
content: [{ type: "text", text: header + result.text }],
|
|
105
|
+
details: result,
|
|
106
|
+
};
|
|
107
|
+
},
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function registerStatusCommand(pi: ExtensionAPI): void {
|
|
112
|
+
pi.registerCommand("web-scout-status", {
|
|
113
|
+
description: "Show pi-web-scout configuration and provider status.",
|
|
114
|
+
handler: async (_args, ctx) => {
|
|
115
|
+
try {
|
|
116
|
+
const config = loadConfig(ctx.cwd);
|
|
117
|
+
const lines = [
|
|
118
|
+
"pi-web-scout status",
|
|
119
|
+
`config: ${getConfigPath(ctx.cwd)}`,
|
|
120
|
+
`enabled: ${config.enabled}`,
|
|
121
|
+
`defaultProvider: ${config.defaultProvider}`,
|
|
122
|
+
`fallbackChain: ${config.fallbackChain.join(", ")}`,
|
|
123
|
+
`maxResults: ${config.maxResults}`,
|
|
124
|
+
"",
|
|
125
|
+
"providers:",
|
|
126
|
+
];
|
|
127
|
+
for (const provider of listProviders()) {
|
|
128
|
+
const enabled = config.providers[provider.id].enabled;
|
|
129
|
+
const providerConfig = config.providers[provider.id];
|
|
130
|
+
const credential = resolveProviderCredential(provider, providerConfig);
|
|
131
|
+
const key = provider.requiresKey
|
|
132
|
+
? credential.value
|
|
133
|
+
? `key ${credential.source}`
|
|
134
|
+
: `key missing (${credential.error ?? "not configured"})`
|
|
135
|
+
: "no key";
|
|
136
|
+
lines.push(`- ${provider.id}: ${enabled ? "enabled" : "disabled"}, ${key}, ${provider.label}`);
|
|
137
|
+
}
|
|
138
|
+
ctx.ui.notify(lines.join("\n"), "info");
|
|
139
|
+
} catch (error) {
|
|
140
|
+
ctx.ui.notify(sanitizeError(error), "error");
|
|
141
|
+
}
|
|
142
|
+
},
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
async function runFirstSuccess(
|
|
147
|
+
providerIds: string[],
|
|
148
|
+
query: string,
|
|
149
|
+
maxResults: number,
|
|
150
|
+
signal: AbortSignal | undefined,
|
|
151
|
+
config: ResolvedConfig,
|
|
152
|
+
): Promise<{ provider: string; providerLabel: string; results: SearchResult[]; runs: ProviderRunResult[] }> {
|
|
153
|
+
const runs: ProviderRunResult[] = [];
|
|
154
|
+
|
|
155
|
+
for (const providerId of providerIds) {
|
|
156
|
+
const provider = getProvider(providerId as never);
|
|
157
|
+
if (!provider) {
|
|
158
|
+
runs.push({ provider: providerId, results: [], error: "unknown provider" });
|
|
159
|
+
continue;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
try {
|
|
163
|
+
const providerConfig = config.providers[provider.id];
|
|
164
|
+
const credential = resolveProviderCredential(provider, providerConfig);
|
|
165
|
+
if (provider.requiresKey && !credential.value) {
|
|
166
|
+
runs.push({ provider: provider.id, results: [], error: credential.error ?? "missing API key" });
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
const results = await withTimeoutSignal(signal, 15_000, (combinedSignal) =>
|
|
170
|
+
provider.search({ query, maxResults, signal: combinedSignal, apiKey: credential.value })
|
|
171
|
+
);
|
|
172
|
+
runs.push({ provider: provider.id, results, keySource: credential.source });
|
|
173
|
+
if (results.length > 0) return { provider: provider.id, providerLabel: provider.label, results, runs };
|
|
174
|
+
} catch (error) {
|
|
175
|
+
runs.push({ provider: provider.id, results: [], error: sanitizeError(error) });
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return { provider: "none", providerLabel: "none", results: [], runs };
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async function runCombined(
|
|
183
|
+
providerIds: string[],
|
|
184
|
+
query: string,
|
|
185
|
+
maxResults: number,
|
|
186
|
+
signal: AbortSignal | undefined,
|
|
187
|
+
config: ResolvedConfig,
|
|
188
|
+
): Promise<{ provider: string; providerLabel: string; results: SearchResult[]; runs: ProviderRunResult[] }> {
|
|
189
|
+
const runs = await Promise.all(providerIds.map(async (providerId): Promise<ProviderRunResult> => {
|
|
190
|
+
const provider = getProvider(providerId as never);
|
|
191
|
+
if (!provider) return { provider: providerId, results: [], error: "unknown provider" };
|
|
192
|
+
try {
|
|
193
|
+
const providerConfig = config.providers[provider.id];
|
|
194
|
+
const credential = resolveProviderCredential(provider, providerConfig);
|
|
195
|
+
if (provider.requiresKey && !credential.value) return { provider: provider.id, results: [], error: credential.error ?? "missing API key" };
|
|
196
|
+
const results = await withTimeoutSignal(signal, 15_000, (combinedSignal) =>
|
|
197
|
+
provider.search({ query, maxResults, signal: combinedSignal, apiKey: credential.value })
|
|
198
|
+
);
|
|
199
|
+
return { provider: provider.id, results, keySource: credential.source };
|
|
200
|
+
} catch (error) {
|
|
201
|
+
return { provider: provider.id, results: [], error: sanitizeError(error) };
|
|
202
|
+
}
|
|
203
|
+
}));
|
|
204
|
+
|
|
205
|
+
return {
|
|
206
|
+
provider: "combined",
|
|
207
|
+
providerLabel: "combined",
|
|
208
|
+
results: combineResults(runs).slice(0, maxResults),
|
|
209
|
+
runs,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function combineResults(runs: ProviderRunResult[]): SearchResult[] {
|
|
214
|
+
const byUrl = new Map<string, { result: SearchResult; providers: Set<string>; score: number }>();
|
|
215
|
+
for (const run of runs) {
|
|
216
|
+
for (const result of run.results) {
|
|
217
|
+
const key = normalizeUrlForDedupe(result.url);
|
|
218
|
+
const contribution = 1 / (60 + Math.max(1, result.rank));
|
|
219
|
+
const existing = byUrl.get(key);
|
|
220
|
+
if (!existing) {
|
|
221
|
+
byUrl.set(key, { result, providers: new Set([run.provider]), score: contribution });
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
existing.providers.add(run.provider);
|
|
225
|
+
existing.score += contribution;
|
|
226
|
+
if ((result.snippet?.length ?? 0) > (existing.result.snippet?.length ?? 0)) existing.result = result;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return [...byUrl.values()]
|
|
231
|
+
.sort((a, b) => (b.score - a.score) || (b.providers.size - a.providers.size))
|
|
232
|
+
.map((entry, index) => ({
|
|
233
|
+
...entry.result,
|
|
234
|
+
rank: index + 1,
|
|
235
|
+
provider: [...entry.providers][0] ?? entry.result.provider,
|
|
236
|
+
providers: [...entry.providers].sort(),
|
|
237
|
+
score: Number(entry.score.toFixed(6)),
|
|
238
|
+
}));
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
async function withTimeoutSignal<T>(
|
|
242
|
+
signal: AbortSignal | undefined,
|
|
243
|
+
timeoutMs: number,
|
|
244
|
+
fn: (signal: AbortSignal) => Promise<T>,
|
|
245
|
+
): Promise<T> {
|
|
246
|
+
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
247
|
+
const combined = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
|
|
248
|
+
return await fn(combined);
|
|
249
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
export type ProviderId = "auto" | "duckduckgo" | "marginalia" | "jina" | "brave";
|
|
2
|
+
|
|
3
|
+
export type SearchMode = "first_success" | "combine";
|
|
4
|
+
|
|
5
|
+
export interface SearchResult {
|
|
6
|
+
title: string;
|
|
7
|
+
url: string;
|
|
8
|
+
snippet?: string;
|
|
9
|
+
provider: string;
|
|
10
|
+
rank: number;
|
|
11
|
+
/** Providers that returned this URL in combine mode. */
|
|
12
|
+
providers?: string[];
|
|
13
|
+
/** Internal combine score, exposed in details for diagnostics. */
|
|
14
|
+
score?: number;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface SearchRequest {
|
|
18
|
+
query: string;
|
|
19
|
+
maxResults: number;
|
|
20
|
+
signal?: AbortSignal;
|
|
21
|
+
/** Resolved provider API key. Present only for keyed providers. */
|
|
22
|
+
apiKey?: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface ProviderRunResult {
|
|
26
|
+
provider: string;
|
|
27
|
+
results: SearchResult[];
|
|
28
|
+
error?: string;
|
|
29
|
+
keySource?: string;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface SearchProvider {
|
|
33
|
+
id: Exclude<ProviderId, "auto">;
|
|
34
|
+
label: string;
|
|
35
|
+
requiresKey: boolean;
|
|
36
|
+
defaultKeyEnv?: string;
|
|
37
|
+
search(request: SearchRequest): Promise<SearchResult[]>;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface ProviderConfig {
|
|
41
|
+
enabled?: boolean;
|
|
42
|
+
/** Environment variable name only. Literal keys and shell commands are intentionally unsupported. */
|
|
43
|
+
apiKeyEnv?: string;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export interface ResolvedProviderConfig {
|
|
47
|
+
enabled: boolean;
|
|
48
|
+
apiKeyEnv?: string;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface PiWebSearchConfig {
|
|
52
|
+
enabled?: boolean;
|
|
53
|
+
defaultProvider?: ProviderId;
|
|
54
|
+
fallbackChain?: Array<Exclude<ProviderId, "auto">>;
|
|
55
|
+
maxResults?: number;
|
|
56
|
+
providers?: Partial<Record<Exclude<ProviderId, "auto">, ProviderConfig>>;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface ResolvedConfig {
|
|
60
|
+
enabled: boolean;
|
|
61
|
+
defaultProvider: ProviderId;
|
|
62
|
+
fallbackChain: Array<Exclude<ProviderId, "auto">>;
|
|
63
|
+
maxResults: number;
|
|
64
|
+
providers: Record<Exclude<ProviderId, "auto">, ResolvedProviderConfig>;
|
|
65
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
const BLOCKED_HOSTNAMES = new Set([
|
|
2
|
+
"localhost",
|
|
3
|
+
"127.0.0.1",
|
|
4
|
+
"0.0.0.0",
|
|
5
|
+
"::1",
|
|
6
|
+
"[::1]",
|
|
7
|
+
"metadata.google.internal",
|
|
8
|
+
"metadata.azure.com",
|
|
9
|
+
"169.254.169.254",
|
|
10
|
+
"100.100.100.200",
|
|
11
|
+
]);
|
|
12
|
+
|
|
13
|
+
export interface UrlValidationResult {
|
|
14
|
+
valid: boolean;
|
|
15
|
+
url?: URL;
|
|
16
|
+
reason?: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function validatePublicHttpUrl(input: string): UrlValidationResult {
|
|
20
|
+
let parsed: URL;
|
|
21
|
+
try {
|
|
22
|
+
parsed = new URL(input);
|
|
23
|
+
} catch {
|
|
24
|
+
return { valid: false, reason: "invalid URL" };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
28
|
+
return { valid: false, reason: `disallowed URL scheme: ${parsed.protocol}` };
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const hostname = parsed.hostname.toLowerCase();
|
|
32
|
+
if (BLOCKED_HOSTNAMES.has(hostname)) return { valid: false, reason: `blocked hostname: ${hostname}` };
|
|
33
|
+
if (isBlockedIpv4(hostname)) return { valid: false, reason: `blocked IP address: ${hostname}` };
|
|
34
|
+
if (isBlockedIpv6(hostname)) return { valid: false, reason: `blocked IPv6 address: ${hostname}` };
|
|
35
|
+
if (/^\d+$/.test(hostname)) return { valid: false, reason: "numeric hostnames are not allowed" };
|
|
36
|
+
|
|
37
|
+
return { valid: true, url: parsed };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function isBlockedIpv4(hostname: string): boolean {
|
|
41
|
+
if (!/^(\d{1,3}\.){3}\d{1,3}$/.test(hostname)) return false;
|
|
42
|
+
const octets = hostname.split(".").map(Number);
|
|
43
|
+
if (octets.some((n) => !Number.isInteger(n) || n < 0 || n > 255)) return true;
|
|
44
|
+
const [a, b] = octets as [number, number, number, number];
|
|
45
|
+
return (
|
|
46
|
+
a === 0 ||
|
|
47
|
+
a === 10 ||
|
|
48
|
+
a === 127 ||
|
|
49
|
+
(a === 169 && b === 254) ||
|
|
50
|
+
(a === 172 && b >= 16 && b <= 31) ||
|
|
51
|
+
(a === 192 && b === 168) ||
|
|
52
|
+
(a === 100 && b >= 64 && b <= 127) ||
|
|
53
|
+
a >= 224
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function isBlockedIpv6(hostname: string): boolean {
|
|
58
|
+
const host = hostname.replace(/^\[/, "").replace(/\]$/, "");
|
|
59
|
+
return host === "::1" || host.startsWith("fe80:") || host.startsWith("fc") || host.startsWith("fd");
|
|
60
|
+
}
|