@pseolint/core 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -1
- package/dist/ai/triage.d.ts.map +1 -1
- package/dist/ai/triage.js +8 -1
- package/dist/ai/triage.js.map +1 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +495 -130
- package/dist/auditor.js.map +1 -1
- package/dist/backpressure.d.ts +68 -0
- package/dist/backpressure.d.ts.map +1 -0
- package/dist/backpressure.js +81 -0
- package/dist/backpressure.js.map +1 -0
- package/dist/cache.d.ts +73 -0
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +258 -19
- package/dist/cache.js.map +1 -1
- package/dist/enrich-findings.d.ts.map +1 -1
- package/dist/enrich-findings.js +1 -14
- package/dist/enrich-findings.js.map +1 -1
- package/dist/fetch-observer.d.ts +97 -0
- package/dist/fetch-observer.d.ts.map +1 -0
- package/dist/fetch-observer.js +124 -0
- package/dist/fetch-observer.js.map +1 -0
- package/dist/formatters/console.d.ts +7 -9
- package/dist/formatters/console.d.ts.map +1 -1
- package/dist/formatters/console.js +218 -254
- package/dist/formatters/console.js.map +1 -1
- package/dist/formatters/html.d.ts +5 -1
- package/dist/formatters/html.d.ts.map +1 -1
- package/dist/formatters/html.js +352 -570
- package/dist/formatters/html.js.map +1 -1
- package/dist/formatters/index.d.ts +4 -1
- package/dist/formatters/index.d.ts.map +1 -1
- package/dist/formatters/index.js +1 -1
- package/dist/formatters/index.js.map +1 -1
- package/dist/formatters/json.d.ts +11 -1
- package/dist/formatters/json.d.ts.map +1 -1
- package/dist/formatters/json.js +5 -1
- package/dist/formatters/json.js.map +1 -1
- package/dist/formatters/markdown.d.ts +7 -1
- package/dist/formatters/markdown.d.ts.map +1 -1
- package/dist/formatters/markdown.js +77 -70
- package/dist/formatters/markdown.js.map +1 -1
- package/dist/index.d.ts +13 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -7
- package/dist/index.js.map +1 -1
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +0 -6
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/content/unique-value.d.ts.map +1 -1
- package/dist/rules/content/unique-value.js +1 -0
- package/dist/rules/content/unique-value.js.map +1 -1
- package/dist/rules/scope.d.ts.map +1 -1
- package/dist/rules/scope.js +6 -14
- package/dist/rules/scope.js.map +1 -1
- package/dist/rules/tech/robots-sitemap-presence.d.ts +9 -1
- package/dist/rules/tech/robots-sitemap-presence.d.ts.map +1 -1
- package/dist/rules/tech/robots-sitemap-presence.js +14 -5
- package/dist/rules/tech/robots-sitemap-presence.js.map +1 -1
- package/dist/safe-mode-preset.d.ts +27 -0
- package/dist/safe-mode-preset.d.ts.map +1 -0
- package/dist/safe-mode-preset.js +54 -0
- package/dist/safe-mode-preset.js.map +1 -0
- package/dist/site-classifier.d.ts +83 -0
- package/dist/site-classifier.d.ts.map +1 -0
- package/dist/site-classifier.js +205 -0
- package/dist/site-classifier.js.map +1 -0
- package/dist/ssrf-guard.d.ts +96 -0
- package/dist/ssrf-guard.d.ts.map +1 -0
- package/dist/ssrf-guard.js +268 -0
- package/dist/ssrf-guard.js.map +1 -0
- package/dist/types.d.ts +171 -19
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +2 -1
- package/dist/types.js.map +1 -1
- package/package.json +2 -2
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSRF guard for audit targets.
|
|
3
|
+
*
|
|
4
|
+
* Two layers:
|
|
5
|
+
* 1. `isPrivateOrReservedHost(hostname)` — fast, synchronous string check.
|
|
6
|
+
* Catches literal private IPs ("10.0.0.5"), loopback names ("localhost"),
|
|
7
|
+
* link-local suffixes (".local"), and internal/metadata hostnames.
|
|
8
|
+
* 2. `validateTargetHost(hostname)` — async. Resolves the hostname via DNS
|
|
9
|
+
* and rejects if the resulting address (v4 or v6) falls into a private /
|
|
10
|
+
* reserved / link-local / multicast range. Mitigates DNS rebinding where
|
|
11
|
+
* a public hostname returns 127.0.0.1.
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* const hostname = new URL(userSuppliedUrl).hostname;
|
|
15
|
+
* await validateTargetHost(hostname); // throws SSRFError on blocked targets
|
|
16
|
+
*
|
|
17
|
+
* Library consumers should call this BEFORE enqueuing a crawl. The audit
|
|
18
|
+
* engine itself wraps its own fetches with this check when `guardSsrf` is
|
|
19
|
+
* enabled in AuditOptions, but defense-in-depth at the API boundary is the
|
|
20
|
+
* primary mitigation.
|
|
21
|
+
*/
|
|
22
|
+
export declare class SSRFError extends Error {
|
|
23
|
+
readonly hostname: string;
|
|
24
|
+
readonly reason: string;
|
|
25
|
+
constructor(hostname: string, reason: string);
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Thrown when a hostname legitimately fails DNS resolution (NXDOMAIN / SERVFAIL
|
|
29
|
+
* / no A / AAAA records). Distinct from `SSRFError`: resolution failure
|
|
30
|
+
* is a "try again later / fix your typo" condition, not an attack. Callers
|
|
31
|
+
* in SaaS contexts should not log these as security events.
|
|
32
|
+
*/
|
|
33
|
+
export declare class DnsResolutionError extends Error {
|
|
34
|
+
readonly hostname: string;
|
|
35
|
+
constructor(hostname: string);
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* IPv4 range predicate — true if the address is private / reserved /
|
|
39
|
+
* link-local / loopback / multicast / broadcast / CGNAT. Expects a valid
|
|
40
|
+
* dotted-quad; caller must ensure that (e.g. via `net.isIP`).
|
|
41
|
+
*/
|
|
42
|
+
export declare function isPrivateIPv4(addr: string): boolean;
|
|
43
|
+
/**
|
|
44
|
+
* IPv6 range predicate — true for loopback, ULA, link-local, unspecified,
|
|
45
|
+
* multicast, and IPv4-mapped addresses (after unwrapping to the v4 check).
|
|
46
|
+
*/
|
|
47
|
+
export declare function isPrivateIPv6(addr: string): boolean;
|
|
48
|
+
/**
|
|
49
|
+
* Decode an integer-packed (`2130706433` = `127.0.0.1`) or hex-encoded
|
|
50
|
+
* (`0x7f000001`) hostname into dotted-quad form. Returns `null` if the
|
|
51
|
+
* input isn't a numeric hostname. Needed because some fetch stacks accept
|
|
52
|
+
* these encodings and resolve them to private IPs, bypassing a naive
|
|
53
|
+
* string-only dotted-quad check.
|
|
54
|
+
*/
|
|
55
|
+
export declare function decodeNumericIPv4(hostname: string): string | null;
|
|
56
|
+
/**
|
|
57
|
+
* Synchronous string-only check. Rejects:
|
|
58
|
+
* - literal private / reserved IP addresses (dotted-quad OR numeric/hex encoding)
|
|
59
|
+
* - exact blocked hostnames (localhost, 0, etc.)
|
|
60
|
+
* - suffix-blocked hostnames (.local, .internal, .arpa, ...)
|
|
61
|
+
*
|
|
62
|
+
* Returns `null` if the host is acceptable, or a human-readable reason
|
|
63
|
+
* string if it should be blocked.
|
|
64
|
+
*/
|
|
65
|
+
export declare function isPrivateOrReservedHost(hostname: string): string | null;
|
|
66
|
+
export interface ValidateTargetHostOptions {
|
|
67
|
+
/** Override the DNS resolver — useful for tests or custom resolvers. */
|
|
68
|
+
resolver?: {
|
|
69
|
+
resolve4: (hostname: string) => Promise<string[]>;
|
|
70
|
+
resolve6: (hostname: string) => Promise<string[]>;
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Full SSRF check: the string check above PLUS a DNS lookup to guarantee the
|
|
75
|
+
* resolved address isn't in a private range. Throws `SSRFError` on failure.
|
|
76
|
+
*
|
|
77
|
+
* Time-of-check-vs-time-of-use: an attacker-controlled DNS server can return
|
|
78
|
+
* a public IP on first lookup and a private IP on the subsequent fetch ("DNS
|
|
79
|
+
* rebinding"). Mitigations: cache the resolved IP and dial to THAT IP (host
|
|
80
|
+
* header preserved), or use a resolver that refuses re-resolution within a
|
|
81
|
+
* TTL window. This function validates; it does not pin. For the audit
|
|
82
|
+
* engine's own fetches, the pinning layer is layered on top via `safeFetch`.
|
|
83
|
+
*/
|
|
84
|
+
export declare function validateTargetHost(hostname: string, options?: ValidateTargetHostOptions): Promise<void>;
|
|
85
|
+
/**
|
|
86
|
+
* Convenience check for "is this URL pointing at localhost or a private
|
|
87
|
+
* network?". Used by the CLI to auto-apply a conservative crawl preset when
|
|
88
|
+
* a developer runs `pseolint http://localhost:3000` — a cache-cold local
|
|
89
|
+
* server can amplify every fetch into a thundering herd of DB queries.
|
|
90
|
+
*
|
|
91
|
+
* Returns false for anything that isn't a parseable URL with a hostname
|
|
92
|
+
* (paths, `file://`, empty strings). Delegates the actual decision to
|
|
93
|
+
* `isPrivateOrReservedHost` so the two stay in sync.
|
|
94
|
+
*/
|
|
95
|
+
export declare function isLocalhostUrl(url: string): boolean;
|
|
96
|
+
//# sourceMappingURL=ssrf-guard.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ssrf-guard.d.ts","sourceRoot":"","sources":["../src/ssrf-guard.ts"],"names":[],"mappings":"AAGA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,qBAAa,SAAU,SAAQ,KAAK;IAClC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;gBAEZ,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;CAM7C;AAED;;;;;GAKG;AACH,qBAAa,kBAAmB,SAAQ,KAAK;IAC3C,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;gBAEd,QAAQ,EAAE,MAAM;CAK7B;AAwBD;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAqBnD;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAWnD;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAoBjE;AAED;;;;;;;;GAQG;AACH,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CA4BvE;AAED,MAAM,WAAW,yBAAyB;IACxC,wEAAwE;IACxE,QAAQ,CAAC,EAAE;QACT,QAAQ,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;QAClD,QAAQ,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;KACnD,CAAC;CACH;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,kBAAkB,CACtC,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,yBAA8B,GACtC,OAAO,CAAC,IAAI,CAAC,CA+Bf;AAED;;;;;;;;;GASG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAUnD"}
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import { promises as dns } from "node:dns";
|
|
2
|
+
import { isIP } from "node:net";
|
|
3
|
+
/**
|
|
4
|
+
* SSRF guard for audit targets.
|
|
5
|
+
*
|
|
6
|
+
* Two layers:
|
|
7
|
+
* 1. `isPrivateOrReservedHost(hostname)` — fast, synchronous string check.
|
|
8
|
+
* Catches literal private IPs ("10.0.0.5"), loopback names ("localhost"),
|
|
9
|
+
* link-local suffixes (".local"), and internal/metadata hostnames.
|
|
10
|
+
* 2. `validateTargetHost(hostname)` — async. Resolves the hostname via DNS
|
|
11
|
+
* and rejects if the resulting address (v4 or v6) falls into a private /
|
|
12
|
+
* reserved / link-local / multicast range. Mitigates DNS rebinding where
|
|
13
|
+
* a public hostname returns 127.0.0.1.
|
|
14
|
+
*
|
|
15
|
+
* Usage:
|
|
16
|
+
* const hostname = new URL(userSuppliedUrl).hostname;
|
|
17
|
+
* await validateTargetHost(hostname); // throws SSRFError on blocked targets
|
|
18
|
+
*
|
|
19
|
+
* Library consumers should call this BEFORE enqueuing a crawl. The audit
|
|
20
|
+
* engine itself wraps its own fetches with this check when `guardSsrf` is
|
|
21
|
+
* enabled in AuditOptions, but defense-in-depth at the API boundary is the
|
|
22
|
+
* primary mitigation.
|
|
23
|
+
*/
|
|
24
|
+
export class SSRFError extends Error {
|
|
25
|
+
hostname;
|
|
26
|
+
reason;
|
|
27
|
+
constructor(hostname, reason) {
|
|
28
|
+
super(`Target host "${hostname}" is not permitted: ${reason}`);
|
|
29
|
+
this.name = "SSRFError";
|
|
30
|
+
this.hostname = hostname;
|
|
31
|
+
this.reason = reason;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Thrown when a hostname legitimately fails DNS resolution (NXDOMAIN / SERVFAIL
|
|
36
|
+
* / no A / AAAA records). Distinct from `SSRFError`: resolution failure
|
|
37
|
+
* is a "try again later / fix your typo" condition, not an attack. Callers
|
|
38
|
+
* in SaaS contexts should not log these as security events.
|
|
39
|
+
*/
|
|
40
|
+
export class DnsResolutionError extends Error {
|
|
41
|
+
hostname;
|
|
42
|
+
constructor(hostname) {
|
|
43
|
+
super(`DNS resolution failed for "${hostname}"`);
|
|
44
|
+
this.name = "DnsResolutionError";
|
|
45
|
+
this.hostname = hostname;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
const BLOCKED_HOSTNAME_EXACT = new Set([
|
|
49
|
+
"localhost",
|
|
50
|
+
"broadcasthost",
|
|
51
|
+
"ip6-localhost",
|
|
52
|
+
"ip6-loopback",
|
|
53
|
+
"0",
|
|
54
|
+
]);
|
|
55
|
+
const BLOCKED_HOSTNAME_SUFFIXES = [
|
|
56
|
+
".local",
|
|
57
|
+
".localhost",
|
|
58
|
+
".internal",
|
|
59
|
+
".arpa",
|
|
60
|
+
".intranet",
|
|
61
|
+
".lan",
|
|
62
|
+
".home",
|
|
63
|
+
".private",
|
|
64
|
+
".corp",
|
|
65
|
+
];
|
|
66
|
+
const IPV4_MAPPED_IPV6 = /^::ffff:(?:0:)?(\d+\.\d+\.\d+\.\d+)$/i;
|
|
67
|
+
/**
|
|
68
|
+
* IPv4 range predicate — true if the address is private / reserved /
|
|
69
|
+
* link-local / loopback / multicast / broadcast / CGNAT. Expects a valid
|
|
70
|
+
* dotted-quad; caller must ensure that (e.g. via `net.isIP`).
|
|
71
|
+
*/
|
|
72
|
+
export function isPrivateIPv4(addr) {
|
|
73
|
+
const parts = addr.split(".").map((p) => Number(p));
|
|
74
|
+
if (parts.length !== 4 || parts.some((p) => !Number.isInteger(p) || p < 0 || p > 255)) {
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
const [a, b] = parts;
|
|
78
|
+
if (a === 0)
|
|
79
|
+
return true; // 0.0.0.0/8 — "this network"
|
|
80
|
+
if (a === 10)
|
|
81
|
+
return true; // 10.0.0.0/8
|
|
82
|
+
if (a === 127)
|
|
83
|
+
return true; // 127.0.0.0/8 — loopback
|
|
84
|
+
if (a === 169 && b === 254)
|
|
85
|
+
return true; // 169.254.0.0/16 — link-local + cloud metadata
|
|
86
|
+
if (a === 172 && b >= 16 && b <= 31)
|
|
87
|
+
return true; // 172.16.0.0/12
|
|
88
|
+
if (a === 192 && b === 168)
|
|
89
|
+
return true; // 192.168.0.0/16
|
|
90
|
+
if (a === 100 && b >= 64 && b <= 127)
|
|
91
|
+
return true; // 100.64.0.0/10 — CGNAT
|
|
92
|
+
if (a === 192 && b === 0 && parts[2] === 0)
|
|
93
|
+
return true; // 192.0.0.0/24 — IETF
|
|
94
|
+
if (a === 192 && b === 0 && parts[2] === 2)
|
|
95
|
+
return true; // 192.0.2.0/24 — TEST-NET-1
|
|
96
|
+
if (a === 198 && (b === 18 || b === 19))
|
|
97
|
+
return true; // 198.18.0.0/15 — benchmark
|
|
98
|
+
if (a === 198 && b === 51 && parts[2] === 100)
|
|
99
|
+
return true; // 198.51.100.0/24 — TEST-NET-2
|
|
100
|
+
if (a === 203 && b === 0 && parts[2] === 113)
|
|
101
|
+
return true; // 203.0.113.0/24 — TEST-NET-3
|
|
102
|
+
if (a >= 224 && a <= 239)
|
|
103
|
+
return true; // 224.0.0.0/4 — multicast
|
|
104
|
+
if (a >= 240)
|
|
105
|
+
return true; // 240.0.0.0/4 — reserved + 255.255.255.255 broadcast
|
|
106
|
+
return false;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* IPv6 range predicate — true for loopback, ULA, link-local, unspecified,
|
|
110
|
+
* multicast, and IPv4-mapped addresses (after unwrapping to the v4 check).
|
|
111
|
+
*/
|
|
112
|
+
export function isPrivateIPv6(addr) {
|
|
113
|
+
const normalized = addr.toLowerCase();
|
|
114
|
+
if (normalized === "::" || normalized === "::1")
|
|
115
|
+
return true;
|
|
116
|
+
if (normalized.startsWith("fe8") || normalized.startsWith("fe9") ||
|
|
117
|
+
normalized.startsWith("fea") || normalized.startsWith("feb"))
|
|
118
|
+
return true; // fe80::/10
|
|
119
|
+
if (normalized.startsWith("fc") || normalized.startsWith("fd"))
|
|
120
|
+
return true; // fc00::/7 ULA
|
|
121
|
+
if (normalized.startsWith("ff"))
|
|
122
|
+
return true; // ff00::/8 multicast
|
|
123
|
+
// IPv4-mapped IPv6 (::ffff:a.b.c.d or ::ffff:0:a.b.c.d) — unwrap and delegate
|
|
124
|
+
const mapped = normalized.match(IPV4_MAPPED_IPV6);
|
|
125
|
+
if (mapped)
|
|
126
|
+
return isPrivateIPv4(mapped[1]);
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Decode an integer-packed (`2130706433` = `127.0.0.1`) or hex-encoded
|
|
131
|
+
* (`0x7f000001`) hostname into dotted-quad form. Returns `null` if the
|
|
132
|
+
* input isn't a numeric hostname. Needed because some fetch stacks accept
|
|
133
|
+
* these encodings and resolve them to private IPs, bypassing a naive
|
|
134
|
+
* string-only dotted-quad check.
|
|
135
|
+
*/
|
|
136
|
+
export function decodeNumericIPv4(hostname) {
|
|
137
|
+
const s = hostname.toLowerCase().trim();
|
|
138
|
+
if (!s)
|
|
139
|
+
return null;
|
|
140
|
+
let n = null;
|
|
141
|
+
if (/^[0-9]+$/.test(s)) {
|
|
142
|
+
// Pure decimal (also catches single-number IPv4 form "2130706433").
|
|
143
|
+
const parsed = Number(s);
|
|
144
|
+
if (Number.isInteger(parsed) && parsed >= 0 && parsed <= 0xffffffff)
|
|
145
|
+
n = parsed;
|
|
146
|
+
}
|
|
147
|
+
else if (/^0x[0-9a-f]+$/.test(s)) {
|
|
148
|
+
// Hex — "0x7f000001".
|
|
149
|
+
const parsed = Number(s);
|
|
150
|
+
if (Number.isInteger(parsed) && parsed >= 0 && parsed <= 0xffffffff)
|
|
151
|
+
n = parsed;
|
|
152
|
+
}
|
|
153
|
+
if (n === null)
|
|
154
|
+
return null;
|
|
155
|
+
return [
|
|
156
|
+
(n >>> 24) & 0xff,
|
|
157
|
+
(n >>> 16) & 0xff,
|
|
158
|
+
(n >>> 8) & 0xff,
|
|
159
|
+
n & 0xff,
|
|
160
|
+
].join(".");
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Synchronous string-only check. Rejects:
|
|
164
|
+
* - literal private / reserved IP addresses (dotted-quad OR numeric/hex encoding)
|
|
165
|
+
* - exact blocked hostnames (localhost, 0, etc.)
|
|
166
|
+
* - suffix-blocked hostnames (.local, .internal, .arpa, ...)
|
|
167
|
+
*
|
|
168
|
+
* Returns `null` if the host is acceptable, or a human-readable reason
|
|
169
|
+
* string if it should be blocked.
|
|
170
|
+
*/
|
|
171
|
+
export function isPrivateOrReservedHost(hostname) {
|
|
172
|
+
if (!hostname)
|
|
173
|
+
return "empty hostname";
|
|
174
|
+
const lower = hostname.toLowerCase();
|
|
175
|
+
if (BLOCKED_HOSTNAME_EXACT.has(lower)) {
|
|
176
|
+
return `reserved hostname (${lower})`;
|
|
177
|
+
}
|
|
178
|
+
for (const suffix of BLOCKED_HOSTNAME_SUFFIXES) {
|
|
179
|
+
if (lower.endsWith(suffix))
|
|
180
|
+
return `reserved TLD / suffix (${suffix})`;
|
|
181
|
+
}
|
|
182
|
+
// Numeric / hex encoding of IPv4 — decode and test.
|
|
183
|
+
const decoded = decodeNumericIPv4(hostname);
|
|
184
|
+
if (decoded) {
|
|
185
|
+
if (isPrivateIPv4(decoded))
|
|
186
|
+
return `private / reserved IPv4 (${decoded}, encoded as ${hostname})`;
|
|
187
|
+
// Also reject all numeric hostnames that decode to public IPs — they're a
|
|
188
|
+
// deniability smell. Callers who intentionally audit a literal IP will
|
|
189
|
+
// pass it in dotted-quad form.
|
|
190
|
+
return `ambiguous numeric-encoded IPv4 (${hostname} decodes to ${decoded}); pass dotted-quad form explicitly`;
|
|
191
|
+
}
|
|
192
|
+
const version = isIP(hostname); // 4 | 6 | 0
|
|
193
|
+
if (version === 4 && isPrivateIPv4(hostname))
|
|
194
|
+
return "private / reserved IPv4 range";
|
|
195
|
+
if (version === 6) {
|
|
196
|
+
const bare = hostname.replace(/^\[|\]$/g, "").replace(/%.*$/, "");
|
|
197
|
+
if (isPrivateIPv6(bare))
|
|
198
|
+
return "private / reserved IPv6 range";
|
|
199
|
+
}
|
|
200
|
+
return null;
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Full SSRF check: the string check above PLUS a DNS lookup to guarantee the
|
|
204
|
+
* resolved address isn't in a private range. Throws `SSRFError` on failure.
|
|
205
|
+
*
|
|
206
|
+
* Time-of-check-vs-time-of-use: an attacker-controlled DNS server can return
|
|
207
|
+
* a public IP on first lookup and a private IP on the subsequent fetch ("DNS
|
|
208
|
+
* rebinding"). Mitigations: cache the resolved IP and dial to THAT IP (host
|
|
209
|
+
* header preserved), or use a resolver that refuses re-resolution within a
|
|
210
|
+
* TTL window. This function validates; it does not pin. For the audit
|
|
211
|
+
* engine's own fetches, the pinning layer is layered on top via `safeFetch`.
|
|
212
|
+
*/
|
|
213
|
+
export async function validateTargetHost(hostname, options = {}) {
|
|
214
|
+
const stringReason = isPrivateOrReservedHost(hostname);
|
|
215
|
+
if (stringReason)
|
|
216
|
+
throw new SSRFError(hostname, stringReason);
|
|
217
|
+
// Literal IPs pass the DNS step trivially (isIP > 0 ⇒ not a name to resolve).
|
|
218
|
+
if (isIP(hostname) !== 0)
|
|
219
|
+
return;
|
|
220
|
+
const resolver = options.resolver ?? {
|
|
221
|
+
resolve4: (h) => dns.resolve4(h),
|
|
222
|
+
resolve6: (h) => dns.resolve6(h),
|
|
223
|
+
};
|
|
224
|
+
const [v4, v6] = await Promise.allSettled([
|
|
225
|
+
resolver.resolve4(hostname),
|
|
226
|
+
resolver.resolve6(hostname),
|
|
227
|
+
]);
|
|
228
|
+
const addrs = [];
|
|
229
|
+
if (v4.status === "fulfilled")
|
|
230
|
+
for (const a of v4.value)
|
|
231
|
+
addrs.push({ kind: "v4", addr: a });
|
|
232
|
+
if (v6.status === "fulfilled")
|
|
233
|
+
for (const a of v6.value)
|
|
234
|
+
addrs.push({ kind: "v6", addr: a });
|
|
235
|
+
if (addrs.length === 0) {
|
|
236
|
+
throw new DnsResolutionError(hostname);
|
|
237
|
+
}
|
|
238
|
+
for (const { kind, addr } of addrs) {
|
|
239
|
+
const isPrivate = kind === "v4" ? isPrivateIPv4(addr) : isPrivateIPv6(addr);
|
|
240
|
+
if (isPrivate) {
|
|
241
|
+
throw new SSRFError(hostname, `resolves to private ${kind} address ${addr}`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Convenience check for "is this URL pointing at localhost or a private
|
|
247
|
+
* network?". Used by the CLI to auto-apply a conservative crawl preset when
|
|
248
|
+
* a developer runs `pseolint http://localhost:3000` — a cache-cold local
|
|
249
|
+
* server can amplify every fetch into a thundering herd of DB queries.
|
|
250
|
+
*
|
|
251
|
+
* Returns false for anything that isn't a parseable URL with a hostname
|
|
252
|
+
* (paths, `file://`, empty strings). Delegates the actual decision to
|
|
253
|
+
* `isPrivateOrReservedHost` so the two stay in sync.
|
|
254
|
+
*/
|
|
255
|
+
export function isLocalhostUrl(url) {
|
|
256
|
+
let parsed;
|
|
257
|
+
try {
|
|
258
|
+
parsed = new URL(url);
|
|
259
|
+
}
|
|
260
|
+
catch {
|
|
261
|
+
return false;
|
|
262
|
+
}
|
|
263
|
+
if (!parsed.hostname)
|
|
264
|
+
return false;
|
|
265
|
+
const host = parsed.hostname.replace(/^\[|\]$/g, "");
|
|
266
|
+
return isPrivateOrReservedHost(host) !== null;
|
|
267
|
+
}
|
|
268
|
+
//# sourceMappingURL=ssrf-guard.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ssrf-guard.js","sourceRoot":"","sources":["../src/ssrf-guard.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,GAAG,EAAE,MAAM,UAAU,CAAC;AAC3C,OAAO,EAAE,IAAI,EAAE,MAAM,UAAU,CAAC;AAEhC;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,MAAM,OAAO,SAAU,SAAQ,KAAK;IACzB,QAAQ,CAAS;IACjB,MAAM,CAAS;IAExB,YAAY,QAAgB,EAAE,MAAc;QAC1C,KAAK,CAAC,gBAAgB,QAAQ,uBAAuB,MAAM,EAAE,CAAC,CAAC;QAC/D,IAAI,CAAC,IAAI,GAAG,WAAW,CAAC;QACxB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,kBAAmB,SAAQ,KAAK;IAClC,QAAQ,CAAS;IAE1B,YAAY,QAAgB;QAC1B,KAAK,CAAC,8BAA8B,QAAQ,GAAG,CAAC,CAAC;QACjD,IAAI,CAAC,IAAI,GAAG,oBAAoB,CAAC;QACjC,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC3B,CAAC;CACF;AAED,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC;IACrC,WAAW;IACX,eAAe;IACf,eAAe;IACf,cAAc;IACd,GAAG;CACJ,CAAC,CAAC;AAEH,MAAM,yBAAyB,GAAG;IAChC,QAAQ;IACR,YAAY;IACZ,WAAW;IACX,OAAO;IACP,WAAW;IACX,MAAM;IACN,OAAO;IACP,UAAU;IACV,OAAO;CACR,CAAC;AAEF,MAAM,gBAAgB,GAAG,uCAAuC,CAAC;AAEjE;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC;QACtF,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC;IACrB,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,6BAA6B;IACvD,IAAI,CAAC,KAAK,EAAE;QAAE,OAAO,IAAI,CAAC,CAAC,aAAa;IACxC,IAAI,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,yBAAyB;IACrD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,+CAA+C;IACxF,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE;QAAE,OAAO,IAAI,CAAC,CAAC,gBAAgB;IAClE,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,iBAAiB;IAC1D,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,wBAAwB;IAC3E,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,sBAAsB;IAC/E,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,4BAA4B;IACrF,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,4BAA4B;IAClF,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,+BAA+B;IAC3F,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,8BAA8B;IACzF,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,0BAA0B;IACjE,IAAI,CAAC,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,qDAAqD;IAChF,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACtC,IAAI,UAAU,KAAK,IAAI,IAAI,UAAU,KAAK,KAAK;QAAE,OAAO,IAAI,CAAC;IAC7D,IAAI,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC;QAC5D,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,YAAY;IAC3F,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,eAAe;IAC5F,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,qBAAqB;IACnE,8EAA8E;IAC9E,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;IAClD,IAAI,MAAM;QAAE,OAAO,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5C,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAgB;IAChD,MAAM,CAAC,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IACxC,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,IAAI,CAAC,GAAkB,IAAI,CAAC;IAC5B,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QACvB,oEAAoE;QACpE,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACzB,IAAI,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,IAAI,MAAM,IAAI,UAAU;YAAE,CAAC,GAAG,MAAM,CAAC;IAClF,CAAC;SAAM,IAAI,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QACnC,sBAAsB;QACtB,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACzB,IAAI,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,IAAI,MAAM,IAAI,UAAU;YAAE,CAAC,GAAG,MAAM,CAAC;IAClF,CAAC;IACD,IAAI,CAAC,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IAC5B,OAAO;QACL,CAAC,CAAC,KAAK,EAAE,CAAC,GAAG,IAAI;QACjB,CAAC,CAAC,KAAK,EAAE,CAAC,GAAG,IAAI;QACjB,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,IAAI;QAChB,CAAC,GAAG,IAAI;KACT,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACd,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACtD,IAAI,CAAC,QAAQ;QAAE,OAAO,gBAAgB,CAAC;IACvC,MAAM,KAAK,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;IAErC,IAAI,sBAAsB,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,sBAAsB,KAAK,GAAG,CAAC;IACxC,CAAC;IACD,KAAK,MAAM,MAAM,IAAI,yBAAyB,EAAE,CAAC;QAC/C,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,OAAO,0BAA0B,MAAM,GAAG,CAAC;IACzE,CAAC;IAED,oDAAoD;IACpD,MAAM,OAAO,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAC5C,IAAI,OAAO,EAAE,CAAC;QACZ,IAAI,aAAa,CAAC,OAAO,CAAC;YAAE,OAAO,4BAA4B,OAAO,gBAAgB,QAAQ,GAAG,CAAC;QAClG,0EAA0E;QAC1E,uEAAuE;QACvE,+BAA+B;QAC/B,OAAO,mCAAmC,QAAQ,eAAe,OAAO,qCAAqC,CAAC;IAChH,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY;IAC5C,IAAI,OAAO,KAAK,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC;QAAE,OAAO,+BAA+B,CAAC;IACrF,IAAI,OAAO,KAAK,CAAC,EAAE,CAAC;QAClB,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAClE,IAAI,aAAa,CAAC,IAAI,CAAC;YAAE,OAAO,+BAA+B,CAAC;IAClE,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAUD;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAgB,EAChB,UAAqC,EAAE;IAEvC,MAAM,YAAY,GAAG,uBAAuB,CAAC,QAAQ,CAAC,CAAC;IACvD,IAAI,YAAY;QAAE,MAAM,IAAI,SAAS,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAE9D,8EAA8E;IAC9E,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO;IAEjC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI;QACnC,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC;QAChC,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC;KACjC,CAAC;IAEF,MAAM,CAAC,EAAE,EAAE,EAAE,CAAC,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;QACxC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAC3B,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;KAC5B,CAAC,CAAC;IAEH,MAAM,KAAK,GAA+C,EAAE,CAAC;IAC7D,IAAI,EAAE,CAAC,MAAM,KAAK,WAAW;QAAE,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;IAC7F,IAAI,EAAE,CAAC,MAAM,KAAK,WAAW;QAAE,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;IAE7F,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IACzC,CAAC;IAED,KAAK,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,KAAK,EAAE,CAAC;QACnC,MAAM,SAAS,GAAG,IAAI,KAAK,IAAI,CAAC,CAAC,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAC5E,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,IAAI,SAAS,CAAC,QAAQ,EAAE,uBAAuB,IAAI,YAAY,IAAI,EAAE,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,cAAc,CAAC,GAAW;IACxC,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,QAAQ;QAAE,OAAO,KAAK,CAAC;IACnC,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IACrD,OAAO,uBAAuB,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC;AAChD,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
export type Severity = "info" | "warning" | "error" | "critical";
|
|
2
|
+
/** Verdict ladder — replaces the old numeric `score` field as the user-facing signal. */
|
|
3
|
+
export type Verdict = "ready" | "caution" | "concerning" | "critical";
|
|
4
|
+
/** Letter grade per category. */
|
|
5
|
+
export type Grade = "A" | "B" | "C" | "D" | "F";
|
|
6
|
+
/** Top-level v0.4 schema version. Bumps on every breaking output change. */
|
|
7
|
+
export declare const SCHEMA_VERSION = "2026-04-v0.4";
|
|
2
8
|
/** Options for `normalizeAuditUrl` (HTTP identity). */
|
|
3
9
|
export interface NormalizeUrlOptions {
|
|
4
10
|
/** When true (default), drop `?query` for URL identity. */
|
|
@@ -38,6 +44,12 @@ export interface RuleResult {
|
|
|
38
44
|
fix?: string;
|
|
39
45
|
/** Google documentation URL backing this finding. */
|
|
40
46
|
ref?: string;
|
|
47
|
+
/**
|
|
48
|
+
* Marketing-page deeplink for this rule (v0.4+). Always populated by the
|
|
49
|
+
* auditor — points to https://pseolint.dev/rules/{slug} where slug is the
|
|
50
|
+
* rule-id segment after the namespace prefix.
|
|
51
|
+
*/
|
|
52
|
+
docsUrl?: string;
|
|
41
53
|
/** Primary page this finding refers to, when applicable. */
|
|
42
54
|
pageUrl?: string;
|
|
43
55
|
/** Other URLs involved (e.g. cluster members, related pairs). */
|
|
@@ -51,15 +63,47 @@ export interface RuleResult {
|
|
|
51
63
|
/** Fix effort level assigned by the enrichment pipeline. */
|
|
52
64
|
effort?: FixEffort;
|
|
53
65
|
}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
66
|
+
/** v0.4 four-category bucket keys. */
|
|
67
|
+
export type CategoryKey = "integrity" | "discoverability" | "citation" | "data" | "audit";
|
|
68
|
+
/** Per-category grade + raw issue count. Audit category exists for completeness but is never weighted. */
|
|
69
|
+
export interface CategoryGrade {
|
|
70
|
+
grade: Grade;
|
|
71
|
+
issues: number;
|
|
72
|
+
}
|
|
73
|
+
export type CategoryGrades = Record<CategoryKey, CategoryGrade>;
|
|
74
|
+
/** Issues bucketed by severity — the v0.4 replacement for the flat `findings` array. */
|
|
75
|
+
export interface IssueBuckets {
|
|
76
|
+
/** Severity = error or critical. Must be fixed before shipping. */
|
|
77
|
+
blockers: RuleResult[];
|
|
78
|
+
/** Severity = warning. Should be fixed before scaling. */
|
|
79
|
+
shouldFix: RuleResult[];
|
|
80
|
+
/** Severity = info. Tracked for trend analysis. */
|
|
81
|
+
informational: RuleResult[];
|
|
82
|
+
}
|
|
83
|
+
/** Crawl statistics surfaced under diagnostics. */
|
|
84
|
+
export interface CrawlStats {
|
|
85
|
+
/** Total URLs the crawler considered (sitemap + discovered links). */
|
|
86
|
+
discovered: number;
|
|
87
|
+
/** URLs the crawler successfully fetched and audited. */
|
|
88
|
+
fetched: number;
|
|
89
|
+
/**
|
|
90
|
+
* URLs the crawler fetched but excluded from the rule pipeline (non-HTML
|
|
91
|
+
* content-type, dedup, robots-disallow, render budget, etc.).
|
|
92
|
+
*/
|
|
93
|
+
skipped: number;
|
|
94
|
+
}
|
|
95
|
+
/** Engine-internal diagnostics — weight 0, never affects verdict. */
|
|
96
|
+
export interface Diagnostics {
|
|
97
|
+
/** Origin readiness aggregate (median/p95/error ratio). Null when no live fetches occurred. */
|
|
98
|
+
originReadiness: import("./fetch-observer.js").ReadinessReport | null;
|
|
99
|
+
crawlStats: CrawlStats;
|
|
100
|
+
/**
|
|
101
|
+
* Engine-emitted `audit/*` findings (e.g. `audit/duplicate-url`,
|
|
102
|
+
* `audit/skipped-by-robots`). Always severity=info, never affect the
|
|
103
|
+
* verdict, never appear in `summary.issues`. Surfaced here so consumers
|
|
104
|
+
* (telemetry, debug UIs) can still see what was skipped or deduped.
|
|
105
|
+
*/
|
|
106
|
+
auditFindings: RuleResult[];
|
|
63
107
|
}
|
|
64
108
|
/** Options for HTTP caching during audits. */
|
|
65
109
|
export interface CacheOptions {
|
|
@@ -67,6 +111,13 @@ export interface CacheOptions {
|
|
|
67
111
|
dir?: string;
|
|
68
112
|
/** TTL for entries without ETag/Last-Modified validators. Default: 7 days. */
|
|
69
113
|
ttlMs?: number;
|
|
114
|
+
/**
|
|
115
|
+
* Maximum total size of the cache directory in bytes. When exceeded after a
|
|
116
|
+
* run, oldest-mtime entries are evicted until under the cap. Also sweeps
|
|
117
|
+
* leftover `.tmp` files from crashed writes. `<= 0` disables size-based
|
|
118
|
+
* eviction. Default: 209_715_200 (200 MB).
|
|
119
|
+
*/
|
|
120
|
+
maxBytes?: number;
|
|
70
121
|
}
|
|
71
122
|
/** Cache stats reported at end of audit. */
|
|
72
123
|
export interface CacheStats {
|
|
@@ -117,12 +168,34 @@ export interface AiOptions {
|
|
|
117
168
|
} | false;
|
|
118
169
|
}
|
|
119
170
|
export interface AuditSummary {
|
|
120
|
-
|
|
121
|
-
|
|
171
|
+
/** Schema version. v0.4 = "2026-04-v0.4". Wave 2 / 3 consumers branch on this. */
|
|
172
|
+
schemaVersion: typeof SCHEMA_VERSION;
|
|
173
|
+
/** User-facing verdict ladder. */
|
|
174
|
+
verdict: Verdict;
|
|
175
|
+
/**
|
|
176
|
+
* Internal numeric risk score (0–100, low = good). Retained for CI thresholding,
|
|
177
|
+
* trend deltas, and alert-gate diff logic. NEVER displayed to humans.
|
|
178
|
+
*/
|
|
179
|
+
risk: number;
|
|
180
|
+
/** One-liner summarising counts: e.g. "3 ship-blockers, 16 should-fix". */
|
|
181
|
+
headline: string;
|
|
182
|
+
/** Per-category grade + count. */
|
|
183
|
+
categories: CategoryGrades;
|
|
184
|
+
/** Findings bucketed by severity. */
|
|
185
|
+
issues: IssueBuckets;
|
|
186
|
+
/**
|
|
187
|
+
* v0.4 §4.11 — pre-flight site classification. Decides which rules apply
|
|
188
|
+
* based on URL count, template clustering, and framework signal. The
|
|
189
|
+
* `suppressedRules` list is what the rule dispatcher honours. Pass
|
|
190
|
+
* `strict: true` in AuditOptions to keep the classification but force all
|
|
191
|
+
* rules to run anyway.
|
|
192
|
+
*/
|
|
193
|
+
siteClassification: import("./site-classifier.js").SiteClassification;
|
|
194
|
+
/** Engine-internal diagnostics (origin readiness, crawl stats). Weight 0. */
|
|
195
|
+
diagnostics: Diagnostics;
|
|
122
196
|
groupScores?: Record<string, number>;
|
|
123
197
|
groupPageCounts?: Record<string, number>;
|
|
124
198
|
pageCount: number;
|
|
125
|
-
findings: RuleResult[];
|
|
126
199
|
/** True when the enrichment pipeline detects template-generated content. */
|
|
127
200
|
templateDetected?: boolean;
|
|
128
201
|
/** Pre-enrichment finding count, for backward compatibility with CI scripts. */
|
|
@@ -160,12 +233,6 @@ export interface AuditOptions {
|
|
|
160
233
|
uniqueValueMinWords?: number;
|
|
161
234
|
metaUniquenessMinJaccard?: number;
|
|
162
235
|
linkDepthMaxClicks?: number;
|
|
163
|
-
/** Minimum pages in one directory before hub/index coverage is required. */
|
|
164
|
-
hubPagesMinSiblings?: number;
|
|
165
|
-
/** Skip hub/index checks when a directory has more than this many pages (e.g. large blogs). */
|
|
166
|
-
hubPagesMaxSiblings?: number;
|
|
167
|
-
titleOverlapThreshold?: number;
|
|
168
|
-
keywordCollisionMinShared?: number;
|
|
169
236
|
templateCoverageMinPages?: number;
|
|
170
237
|
/** aeo/answer-first: max words in the first paragraph for extractable answer. */
|
|
171
238
|
answerFirstMaxWords?: number;
|
|
@@ -188,7 +255,12 @@ export interface AuditOptions {
|
|
|
188
255
|
timeout?: number;
|
|
189
256
|
/** Audit a random subset of N pages. 0 means all pages (default: 0). */
|
|
190
257
|
sampleSize?: number;
|
|
191
|
-
/**
|
|
258
|
+
/**
|
|
259
|
+
* URL/path glob patterns to exclude from the audit. v0.4: globs match
|
|
260
|
+
* against the URL pathname only (e.g. "/api/foo"), not the full URL.
|
|
261
|
+
* The auditor logs a warning at the end of the audit for any pattern that
|
|
262
|
+
* matched zero discovered URLs (likely-typo signal).
|
|
263
|
+
*/
|
|
192
264
|
ignore?: string[];
|
|
193
265
|
crawlDiscovery?: boolean;
|
|
194
266
|
/**
|
|
@@ -243,7 +315,81 @@ export interface AuditOptions {
|
|
|
243
315
|
ai?: AiOptions;
|
|
244
316
|
/** Local-only telemetry (JSONL) options. When omitted or `enabled: false`, no records are written. */
|
|
245
317
|
telemetry?: TelemetryOptions;
|
|
318
|
+
/**
|
|
319
|
+
* External abort signal. When aborted, in-flight fetches are cancelled and
|
|
320
|
+
* `auditSource` throws an `AbortError`. Host code can use this to kill an
|
|
321
|
+
* audit that exceeded a per-user budget or was cancelled by the user.
|
|
322
|
+
*/
|
|
323
|
+
signal?: AbortSignal;
|
|
324
|
+
/**
|
|
325
|
+
* When true, every crawled URL's hostname is validated with
|
|
326
|
+
* `validateTargetHost` before fetch — resolves the hostname and rejects if
|
|
327
|
+
* any address is in a private / reserved / link-local / loopback / multicast
|
|
328
|
+
* range. Applies to the source URL, sitemap entries, redirect targets, and
|
|
329
|
+
* discovered links. Defends against SSRF / DNS-rebinding when the library is
|
|
330
|
+
* invoked against user-supplied URLs (e.g. from a hosted audit service).
|
|
331
|
+
* Default: false (CLI users auditing localhost / staging sites should not
|
|
332
|
+
* be broken silently).
|
|
333
|
+
*/
|
|
334
|
+
guardSsrf?: boolean;
|
|
335
|
+
/**
|
|
336
|
+
* When true (default), sitemap URLs that match a `Disallow:` rule in the
|
|
337
|
+
* target's robots.txt are skipped at fetch time instead of crawled. Set to
|
|
338
|
+
* false to audit staging / internal sites that Disallow everything.
|
|
339
|
+
*/
|
|
340
|
+
respectRobotsTxt?: boolean;
|
|
341
|
+
/**
|
|
342
|
+
* Preset that flips several safety options at once.
|
|
343
|
+
* "saas" — intended for hosted services auditing user-submitted URLs:
|
|
344
|
+
* guardSsrf=true, respectRobotsTxt=true, tighter maxFetchBytes cap,
|
|
345
|
+
* followRedirects stays true (audits need final URL).
|
|
346
|
+
* "cli" — intended for local CLI / dev use:
|
|
347
|
+
* guardSsrf=false (auditing localhost is OK), respectRobotsTxt=true,
|
|
348
|
+
* default caps.
|
|
349
|
+
* "dev" — tiny crawl budget for localhost probing: concurrency=1,
|
|
350
|
+
* sampleSize=25, maxCrawlDiscovered=50. Designed so a cache-cold
|
|
351
|
+
* `pseolint http://localhost:3000` doesn't thundering-herd a dev DB.
|
|
352
|
+
* Auto-selected on localhost sources unless `autoDevPreset: false`.
|
|
353
|
+
* Individual options on AuditOptions override the preset when set.
|
|
354
|
+
* Default: undefined (no preset applied, existing opt-in behaviour).
|
|
355
|
+
*/
|
|
356
|
+
safeMode?: SafeMode;
|
|
357
|
+
/**
|
|
358
|
+
* When true (default), audit sources pointing at localhost / private
|
|
359
|
+
* networks are auto-promoted to the `dev` safeMode preset. Set to false
|
|
360
|
+
* to opt out (e.g. `--full` on the CLI). Explicit `safeMode` beats this.
|
|
361
|
+
*/
|
|
362
|
+
autoDevPreset?: boolean;
|
|
363
|
+
/**
|
|
364
|
+
* Hard ceiling on URLs discovered via link-following before sampling.
|
|
365
|
+
* Protects against malicious sites with many self-links that could extend
|
|
366
|
+
* the crawl up to the byte budget. Default: 5000.
|
|
367
|
+
*/
|
|
368
|
+
maxCrawlDiscovered?: number;
|
|
369
|
+
/**
|
|
370
|
+
* When false, 3xx responses are returned as-is (the audit will see the
|
|
371
|
+
* redirect location header and can report it) instead of followed. Useful
|
|
372
|
+
* for security-sensitive audits that must not leave the exact submitted
|
|
373
|
+
* URL. Default: true.
|
|
374
|
+
*/
|
|
375
|
+
followRedirects?: boolean;
|
|
376
|
+
/**
|
|
377
|
+
* When false, disables the in-flight backpressure watchdog that aborts the
|
|
378
|
+
* audit when origin latency / 5xx rate spikes past thresholds during the
|
|
379
|
+
* crawl. On by default; the last line of defence against a cache-cold
|
|
380
|
+
* origin ballooning an audit into an expensive egress event.
|
|
381
|
+
*/
|
|
382
|
+
backpressure?: boolean;
|
|
383
|
+
/**
|
|
384
|
+
* v0.4 §4.11 — when true, the site classifier still runs and `summary.siteClassification`
|
|
385
|
+
* is populated, but `suppressedRules` is forced to `[]` so every rule executes
|
|
386
|
+
* regardless of detected site type. Use this to inspect what the classifier
|
|
387
|
+
* sees on a site that would otherwise have pSEO-only rules suppressed.
|
|
388
|
+
* Default: false.
|
|
389
|
+
*/
|
|
390
|
+
strict?: boolean;
|
|
246
391
|
}
|
|
392
|
+
export type SafeMode = "saas" | "cli" | "dev";
|
|
247
393
|
export type SamplingStrategy = "stratified" | "random";
|
|
248
394
|
/** A single page's source data for data-source comparison. */
|
|
249
395
|
export interface PageDataRecord {
|
|
@@ -267,6 +413,12 @@ export interface HttpMeta {
|
|
|
267
413
|
redirectChain: string[];
|
|
268
414
|
xRobotsTag: string;
|
|
269
415
|
linkHeader: string;
|
|
416
|
+
/**
|
|
417
|
+
* v0.4: lower-cased response headers. Populated for the source URL only
|
|
418
|
+
* (used by the dev-server framework detector). Other crawled pages can
|
|
419
|
+
* leave this undefined to keep the audit memory-bounded.
|
|
420
|
+
*/
|
|
421
|
+
headers?: Record<string, string>;
|
|
270
422
|
}
|
|
271
423
|
export interface ParsedPage {
|
|
272
424
|
url: string;
|