@counterposition/pi-web-search 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +675 -0
- package/README.md +30 -0
- package/extensions/web-search.ts +249 -0
- package/package.json +60 -0
- package/src/config.ts +281 -0
- package/src/format.ts +348 -0
- package/src/page-cache.ts +58 -0
- package/src/pi-ambient.d.ts +30 -0
- package/src/provider-utils.ts +269 -0
- package/src/providers/brave.ts +292 -0
- package/src/providers/exa.ts +227 -0
- package/src/providers/firecrawl.ts +67 -0
- package/src/providers/index.ts +38 -0
- package/src/providers/jina.ts +131 -0
- package/src/providers/serper.ts +193 -0
- package/src/providers/tavily.ts +231 -0
- package/src/types.ts +131 -0
- package/src/url-safety.ts +92 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import net from "node:net";
|
|
2
|
+
|
|
3
|
+
const BLOCKED_HOSTS = new Set([
|
|
4
|
+
"localhost",
|
|
5
|
+
"metadata.google.internal",
|
|
6
|
+
"metadata",
|
|
7
|
+
"169.254.169.254",
|
|
8
|
+
"169.254.169.250",
|
|
9
|
+
"100.100.100.200",
|
|
10
|
+
]);
|
|
11
|
+
|
|
12
|
+
export function validateFetchUrl(input: string): string {
|
|
13
|
+
const trimmed = input.trim();
|
|
14
|
+
if (!trimmed) {
|
|
15
|
+
throw new Error("Invalid URL: URL is required.");
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
let parsed: URL;
|
|
19
|
+
try {
|
|
20
|
+
parsed = new URL(trimmed);
|
|
21
|
+
} catch {
|
|
22
|
+
throw new Error("Invalid URL: malformed URL.");
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
26
|
+
throw new Error("Invalid URL: only http and https URLs are allowed.");
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (parsed.username || parsed.password) {
|
|
30
|
+
throw new Error("Invalid URL: embedded credentials are not allowed.");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const hostname = parsed.hostname.replaceAll(/^\[|\]$/g, "").toLowerCase();
|
|
34
|
+
if (!hostname) {
|
|
35
|
+
throw new Error("Invalid URL: hostname is required.");
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (isBlockedHostname(hostname)) {
|
|
39
|
+
throw new Error("Blocked URL: target host is not allowed.");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return parsed.toString();
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function isBlockedHostname(hostname: string): boolean {
|
|
46
|
+
if (
|
|
47
|
+
BLOCKED_HOSTS.has(hostname) ||
|
|
48
|
+
hostname.endsWith(".local") ||
|
|
49
|
+
hostname.endsWith(".localhost") ||
|
|
50
|
+
hostname.endsWith(".internal") ||
|
|
51
|
+
hostname.endsWith(".home")
|
|
52
|
+
) {
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const ipVersion = net.isIP(hostname);
|
|
57
|
+
if (ipVersion === 4) return isBlockedIpv4(hostname);
|
|
58
|
+
if (ipVersion === 6) return isBlockedIpv6(hostname);
|
|
59
|
+
|
|
60
|
+
return false;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function isBlockedIpv4(hostname: string): boolean {
|
|
64
|
+
const octets = hostname.split(".").map((part) => Number.parseInt(part, 10));
|
|
65
|
+
if (octets.length !== 4 || octets.some((octet) => !Number.isInteger(octet))) return true;
|
|
66
|
+
|
|
67
|
+
const [a, b] = octets;
|
|
68
|
+
|
|
69
|
+
if (a === 0 || a === 10 || a === 127) return true;
|
|
70
|
+
if (a === 169 && b === 254) return true;
|
|
71
|
+
if (a === 172 && b >= 16 && b <= 31) return true;
|
|
72
|
+
if (a === 192 && b === 168) return true;
|
|
73
|
+
if (a === 100 && b >= 64 && b <= 127) return true;
|
|
74
|
+
if (a >= 224) return true;
|
|
75
|
+
|
|
76
|
+
return false;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function isBlockedIpv6(hostname: string): boolean {
|
|
80
|
+
const normalized = hostname.toLowerCase();
|
|
81
|
+
|
|
82
|
+
return (
|
|
83
|
+
normalized === "::1" ||
|
|
84
|
+
normalized.startsWith("fc") ||
|
|
85
|
+
normalized.startsWith("fd") ||
|
|
86
|
+
normalized.startsWith("fe8") ||
|
|
87
|
+
normalized.startsWith("fe9") ||
|
|
88
|
+
normalized.startsWith("fea") ||
|
|
89
|
+
normalized.startsWith("feb") ||
|
|
90
|
+
normalized.startsWith("ff")
|
|
91
|
+
);
|
|
92
|
+
}
|