@pseolint/core 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +49 -1
  2. package/dist/ai/triage.d.ts.map +1 -1
  3. package/dist/ai/triage.js +8 -1
  4. package/dist/ai/triage.js.map +1 -1
  5. package/dist/auditor.d.ts.map +1 -1
  6. package/dist/auditor.js +495 -130
  7. package/dist/auditor.js.map +1 -1
  8. package/dist/backpressure.d.ts +68 -0
  9. package/dist/backpressure.d.ts.map +1 -0
  10. package/dist/backpressure.js +81 -0
  11. package/dist/backpressure.js.map +1 -0
  12. package/dist/cache.d.ts +73 -0
  13. package/dist/cache.d.ts.map +1 -1
  14. package/dist/cache.js +258 -19
  15. package/dist/cache.js.map +1 -1
  16. package/dist/enrich-findings.d.ts.map +1 -1
  17. package/dist/enrich-findings.js +1 -14
  18. package/dist/enrich-findings.js.map +1 -1
  19. package/dist/fetch-observer.d.ts +97 -0
  20. package/dist/fetch-observer.d.ts.map +1 -0
  21. package/dist/fetch-observer.js +124 -0
  22. package/dist/fetch-observer.js.map +1 -0
  23. package/dist/formatters/console.d.ts +7 -9
  24. package/dist/formatters/console.d.ts.map +1 -1
  25. package/dist/formatters/console.js +218 -254
  26. package/dist/formatters/console.js.map +1 -1
  27. package/dist/formatters/html.d.ts +5 -1
  28. package/dist/formatters/html.d.ts.map +1 -1
  29. package/dist/formatters/html.js +352 -570
  30. package/dist/formatters/html.js.map +1 -1
  31. package/dist/formatters/index.d.ts +4 -1
  32. package/dist/formatters/index.d.ts.map +1 -1
  33. package/dist/formatters/index.js +1 -1
  34. package/dist/formatters/index.js.map +1 -1
  35. package/dist/formatters/json.d.ts +11 -1
  36. package/dist/formatters/json.d.ts.map +1 -1
  37. package/dist/formatters/json.js +5 -1
  38. package/dist/formatters/json.js.map +1 -1
  39. package/dist/formatters/markdown.d.ts +7 -1
  40. package/dist/formatters/markdown.d.ts.map +1 -1
  41. package/dist/formatters/markdown.js +77 -70
  42. package/dist/formatters/markdown.js.map +1 -1
  43. package/dist/index.d.ts +13 -8
  44. package/dist/index.d.ts.map +1 -1
  45. package/dist/index.js +6 -7
  46. package/dist/index.js.map +1 -1
  47. package/dist/rule-references.d.ts.map +1 -1
  48. package/dist/rule-references.js +0 -6
  49. package/dist/rule-references.js.map +1 -1
  50. package/dist/rules/content/unique-value.d.ts.map +1 -1
  51. package/dist/rules/content/unique-value.js +1 -0
  52. package/dist/rules/content/unique-value.js.map +1 -1
  53. package/dist/rules/scope.d.ts.map +1 -1
  54. package/dist/rules/scope.js +6 -14
  55. package/dist/rules/scope.js.map +1 -1
  56. package/dist/rules/tech/robots-sitemap-presence.d.ts +9 -1
  57. package/dist/rules/tech/robots-sitemap-presence.d.ts.map +1 -1
  58. package/dist/rules/tech/robots-sitemap-presence.js +14 -5
  59. package/dist/rules/tech/robots-sitemap-presence.js.map +1 -1
  60. package/dist/safe-mode-preset.d.ts +27 -0
  61. package/dist/safe-mode-preset.d.ts.map +1 -0
  62. package/dist/safe-mode-preset.js +54 -0
  63. package/dist/safe-mode-preset.js.map +1 -0
  64. package/dist/site-classifier.d.ts +83 -0
  65. package/dist/site-classifier.d.ts.map +1 -0
  66. package/dist/site-classifier.js +205 -0
  67. package/dist/site-classifier.js.map +1 -0
  68. package/dist/ssrf-guard.d.ts +96 -0
  69. package/dist/ssrf-guard.d.ts.map +1 -0
  70. package/dist/ssrf-guard.js +268 -0
  71. package/dist/ssrf-guard.js.map +1 -0
  72. package/dist/types.d.ts +171 -19
  73. package/dist/types.d.ts.map +1 -1
  74. package/dist/types.js +2 -1
  75. package/dist/types.js.map +1 -1
  76. package/package.json +2 -2
@@ -0,0 +1,96 @@
1
+ /**
2
+ * SSRF guard for audit targets.
3
+ *
4
+ * Two layers:
5
+ * 1. `isPrivateOrReservedHost(hostname)` — fast, synchronous string check.
6
+ * Catches literal private IPs ("10.0.0.5"), loopback names ("localhost"),
7
+ * link-local suffixes (".local"), and internal/metadata hostnames.
8
+ * 2. `validateTargetHost(hostname)` — async. Resolves the hostname via DNS
9
+ * and rejects if the resulting address (v4 or v6) falls into a private /
10
+ * reserved / link-local / multicast range. Mitigates DNS rebinding where
11
+ * a public hostname returns 127.0.0.1.
12
+ *
13
+ * Usage:
14
+ * const hostname = new URL(userSuppliedUrl).hostname;
15
+ * await validateTargetHost(hostname); // throws SSRFError on blocked targets
16
+ *
17
+ * Library consumers should call this BEFORE enqueuing a crawl. The audit
18
+ * engine itself wraps its own fetches with this check when `guardSsrf` is
19
+ * enabled in AuditOptions, but defense-in-depth at the API boundary is the
20
+ * primary mitigation.
21
+ */
22
+ export declare class SSRFError extends Error {
23
+ readonly hostname: string;
24
+ readonly reason: string;
25
+ constructor(hostname: string, reason: string);
26
+ }
27
+ /**
28
+ * Thrown when a hostname legitimately fails DNS resolution (NXDOMAIN / SERVFAIL
29
+ * / no A / AAAA records). Distinct from `SSRFError`: resolution failure
30
+ * is a "try again later / fix your typo" condition, not an attack. Callers
31
+ * in SaaS contexts should not log these as security events.
32
+ */
33
+ export declare class DnsResolutionError extends Error {
34
+ readonly hostname: string;
35
+ constructor(hostname: string);
36
+ }
37
+ /**
38
+ * IPv4 range predicate — true if the address is private / reserved /
39
+ * link-local / loopback / multicast / broadcast / CGNAT. Expects a valid
40
+ * dotted-quad; caller must ensure that (e.g. via `net.isIP`).
41
+ */
42
+ export declare function isPrivateIPv4(addr: string): boolean;
43
+ /**
44
+ * IPv6 range predicate — true for loopback, ULA, link-local, unspecified,
45
+ * multicast, and IPv4-mapped addresses (after unwrapping to the v4 check).
46
+ */
47
+ export declare function isPrivateIPv6(addr: string): boolean;
48
+ /**
49
+ * Decode an integer-packed (`2130706433` = `127.0.0.1`) or hex-encoded
50
+ * (`0x7f000001`) hostname into dotted-quad form. Returns `null` if the
51
+ * input isn't a numeric hostname. Needed because some fetch stacks accept
52
+ * these encodings and resolve them to private IPs, bypassing a naive
53
+ * string-only dotted-quad check.
54
+ */
55
+ export declare function decodeNumericIPv4(hostname: string): string | null;
56
+ /**
57
+ * Synchronous string-only check. Rejects:
58
+ * - literal private / reserved IP addresses (dotted-quad OR numeric/hex encoding)
59
+ * - exact blocked hostnames (localhost, 0, etc.)
60
+ * - suffix-blocked hostnames (.local, .internal, .arpa, ...)
61
+ *
62
+ * Returns `null` if the host is acceptable, or a human-readable reason
63
+ * string if it should be blocked.
64
+ */
65
+ export declare function isPrivateOrReservedHost(hostname: string): string | null;
66
+ export interface ValidateTargetHostOptions {
67
+ /** Override the DNS resolver — useful for tests or custom resolvers. */
68
+ resolver?: {
69
+ resolve4: (hostname: string) => Promise<string[]>;
70
+ resolve6: (hostname: string) => Promise<string[]>;
71
+ };
72
+ }
73
+ /**
74
+ * Full SSRF check: the string check above PLUS a DNS lookup to guarantee the
75
+ * resolved address isn't in a private range. Throws `SSRFError` on failure.
76
+ *
77
+ * Time-of-check-vs-time-of-use: an attacker-controlled DNS server can return
78
+ * a public IP on first lookup and a private IP on the subsequent fetch ("DNS
79
+ * rebinding"). Mitigations: cache the resolved IP and dial to THAT IP (host
80
+ * header preserved), or use a resolver that refuses re-resolution within a
81
+ * TTL window. This function validates; it does not pin. For the audit
82
+ * engine's own fetches, the pinning layer is layered on top via `safeFetch`.
83
+ */
84
+ export declare function validateTargetHost(hostname: string, options?: ValidateTargetHostOptions): Promise<void>;
85
+ /**
86
+ * Convenience check for "is this URL pointing at localhost or a private
87
+ * network?". Used by the CLI to auto-apply a conservative crawl preset when
88
+ * a developer runs `pseolint http://localhost:3000` — a cache-cold local
89
+ * server can amplify every fetch into a thundering herd of DB queries.
90
+ *
91
+ * Returns false for anything that isn't a parseable URL with a hostname
92
+ * (paths, `file://`, empty strings). Delegates the actual decision to
93
+ * `isPrivateOrReservedHost` so the two stay in sync.
94
+ */
95
+ export declare function isLocalhostUrl(url: string): boolean;
96
+ //# sourceMappingURL=ssrf-guard.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ssrf-guard.d.ts","sourceRoot":"","sources":["../src/ssrf-guard.ts"],"names":[],"mappings":"AAGA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,qBAAa,SAAU,SAAQ,KAAK;IAClC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;gBAEZ,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;CAM7C;AAED;;;;;GAKG;AACH,qBAAa,kBAAmB,SAAQ,KAAK;IAC3C,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;gBAEd,QAAQ,EAAE,MAAM;CAK7B;AAwBD;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAqBnD;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAWnD;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAoBjE;AAED;;;;;;;;GAQG;AACH,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CA4BvE;AAED,MAAM,WAAW,yBAAyB;IACxC,wEAAwE;IACxE,QAAQ,CAAC,EAAE;QACT,QAAQ,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;QAClD,QAAQ,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;KACnD,CAAC;CACH;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,kBAAkB,CACtC,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,yBAA8B,GACtC,OAAO,CAAC,IAAI,CAAC,CA+Bf;AAED;;;;;;;;;GASG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAUnD"}
@@ -0,0 +1,268 @@
1
+ import { promises as dns } from "node:dns";
2
+ import { isIP } from "node:net";
3
+ /**
4
+ * SSRF guard for audit targets.
5
+ *
6
+ * Two layers:
7
+ * 1. `isPrivateOrReservedHost(hostname)` — fast, synchronous string check.
8
+ * Catches literal private IPs ("10.0.0.5"), loopback names ("localhost"),
9
+ * link-local suffixes (".local"), and internal/metadata hostnames.
10
+ * 2. `validateTargetHost(hostname)` — async. Resolves the hostname via DNS
11
+ * and rejects if the resulting address (v4 or v6) falls into a private /
12
+ * reserved / link-local / multicast range. Mitigates DNS rebinding where
13
+ * a public hostname returns 127.0.0.1.
14
+ *
15
+ * Usage:
16
+ * const hostname = new URL(userSuppliedUrl).hostname;
17
+ * await validateTargetHost(hostname); // throws SSRFError on blocked targets
18
+ *
19
+ * Library consumers should call this BEFORE enqueuing a crawl. The audit
20
+ * engine itself wraps its own fetches with this check when `guardSsrf` is
21
+ * enabled in AuditOptions, but defense-in-depth at the API boundary is the
22
+ * primary mitigation.
23
+ */
24
+ export class SSRFError extends Error {
25
+ hostname;
26
+ reason;
27
+ constructor(hostname, reason) {
28
+ super(`Target host "${hostname}" is not permitted: ${reason}`);
29
+ this.name = "SSRFError";
30
+ this.hostname = hostname;
31
+ this.reason = reason;
32
+ }
33
+ }
34
+ /**
35
+ * Thrown when a hostname legitimately fails DNS resolution (NXDOMAIN / SERVFAIL
36
+ * / no A / AAAA records). Distinct from `SSRFError`: resolution failure
37
+ * is a "try again later / fix your typo" condition, not an attack. Callers
38
+ * in SaaS contexts should not log these as security events.
39
+ */
40
+ export class DnsResolutionError extends Error {
41
+ hostname;
42
+ constructor(hostname) {
43
+ super(`DNS resolution failed for "${hostname}"`);
44
+ this.name = "DnsResolutionError";
45
+ this.hostname = hostname;
46
+ }
47
+ }
48
+ const BLOCKED_HOSTNAME_EXACT = new Set([
49
+ "localhost",
50
+ "broadcasthost",
51
+ "ip6-localhost",
52
+ "ip6-loopback",
53
+ "0",
54
+ ]);
55
+ const BLOCKED_HOSTNAME_SUFFIXES = [
56
+ ".local",
57
+ ".localhost",
58
+ ".internal",
59
+ ".arpa",
60
+ ".intranet",
61
+ ".lan",
62
+ ".home",
63
+ ".private",
64
+ ".corp",
65
+ ];
66
+ const IPV4_MAPPED_IPV6 = /^::ffff:(?:0:)?(\d+\.\d+\.\d+\.\d+)$/i;
67
+ /**
68
+ * IPv4 range predicate — true if the address is private / reserved /
69
+ * link-local / loopback / multicast / broadcast / CGNAT. Expects a valid
70
+ * dotted-quad; caller must ensure that (e.g. via `net.isIP`).
71
+ */
72
+ export function isPrivateIPv4(addr) {
73
+ const parts = addr.split(".").map((p) => Number(p));
74
+ if (parts.length !== 4 || parts.some((p) => !Number.isInteger(p) || p < 0 || p > 255)) {
75
+ return false;
76
+ }
77
+ const [a, b] = parts;
78
+ if (a === 0)
79
+ return true; // 0.0.0.0/8 — "this network"
80
+ if (a === 10)
81
+ return true; // 10.0.0.0/8
82
+ if (a === 127)
83
+ return true; // 127.0.0.0/8 — loopback
84
+ if (a === 169 && b === 254)
85
+ return true; // 169.254.0.0/16 — link-local + cloud metadata
86
+ if (a === 172 && b >= 16 && b <= 31)
87
+ return true; // 172.16.0.0/12
88
+ if (a === 192 && b === 168)
89
+ return true; // 192.168.0.0/16
90
+ if (a === 100 && b >= 64 && b <= 127)
91
+ return true; // 100.64.0.0/10 — CGNAT
92
+ if (a === 192 && b === 0 && parts[2] === 0)
93
+ return true; // 192.0.0.0/24 — IETF
94
+ if (a === 192 && b === 0 && parts[2] === 2)
95
+ return true; // 192.0.2.0/24 — TEST-NET-1
96
+ if (a === 198 && (b === 18 || b === 19))
97
+ return true; // 198.18.0.0/15 — benchmark
98
+ if (a === 198 && b === 51 && parts[2] === 100)
99
+ return true; // 198.51.100.0/24 — TEST-NET-2
100
+ if (a === 203 && b === 0 && parts[2] === 113)
101
+ return true; // 203.0.113.0/24 — TEST-NET-3
102
+ if (a >= 224 && a <= 239)
103
+ return true; // 224.0.0.0/4 — multicast
104
+ if (a >= 240)
105
+ return true; // 240.0.0.0/4 — reserved + 255.255.255.255 broadcast
106
+ return false;
107
+ }
108
+ /**
109
+ * IPv6 range predicate — true for loopback, ULA, link-local, unspecified,
110
+ * multicast, and IPv4-mapped addresses (after unwrapping to the v4 check).
111
+ */
112
+ export function isPrivateIPv6(addr) {
113
+ const normalized = addr.toLowerCase();
114
+ if (normalized === "::" || normalized === "::1")
115
+ return true;
116
+ if (normalized.startsWith("fe8") || normalized.startsWith("fe9") ||
117
+ normalized.startsWith("fea") || normalized.startsWith("feb"))
118
+ return true; // fe80::/10
119
+ if (normalized.startsWith("fc") || normalized.startsWith("fd"))
120
+ return true; // fc00::/7 ULA
121
+ if (normalized.startsWith("ff"))
122
+ return true; // ff00::/8 multicast
123
+ // IPv4-mapped IPv6 (::ffff:a.b.c.d or ::ffff:0:a.b.c.d) — unwrap and delegate
124
+ const mapped = normalized.match(IPV4_MAPPED_IPV6);
125
+ if (mapped)
126
+ return isPrivateIPv4(mapped[1]);
127
+ return false;
128
+ }
129
+ /**
130
+ * Decode an integer-packed (`2130706433` = `127.0.0.1`) or hex-encoded
131
+ * (`0x7f000001`) hostname into dotted-quad form. Returns `null` if the
132
+ * input isn't a numeric hostname. Needed because some fetch stacks accept
133
+ * these encodings and resolve them to private IPs, bypassing a naive
134
+ * string-only dotted-quad check.
135
+ */
136
+ export function decodeNumericIPv4(hostname) {
137
+ const s = hostname.toLowerCase().trim();
138
+ if (!s)
139
+ return null;
140
+ let n = null;
141
+ if (/^[0-9]+$/.test(s)) {
142
+ // Pure decimal (also catches single-number IPv4 form "2130706433").
143
+ const parsed = Number(s);
144
+ if (Number.isInteger(parsed) && parsed >= 0 && parsed <= 0xffffffff)
145
+ n = parsed;
146
+ }
147
+ else if (/^0x[0-9a-f]+$/.test(s)) {
148
+ // Hex — "0x7f000001".
149
+ const parsed = Number(s);
150
+ if (Number.isInteger(parsed) && parsed >= 0 && parsed <= 0xffffffff)
151
+ n = parsed;
152
+ }
153
+ if (n === null)
154
+ return null;
155
+ return [
156
+ (n >>> 24) & 0xff,
157
+ (n >>> 16) & 0xff,
158
+ (n >>> 8) & 0xff,
159
+ n & 0xff,
160
+ ].join(".");
161
+ }
162
+ /**
163
+ * Synchronous string-only check. Rejects:
164
+ * - literal private / reserved IP addresses (dotted-quad OR numeric/hex encoding)
165
+ * - exact blocked hostnames (localhost, 0, etc.)
166
+ * - suffix-blocked hostnames (.local, .internal, .arpa, ...)
167
+ *
168
+ * Returns `null` if the host is acceptable, or a human-readable reason
169
+ * string if it should be blocked.
170
+ */
171
+ export function isPrivateOrReservedHost(hostname) {
172
+ if (!hostname)
173
+ return "empty hostname";
174
+ const lower = hostname.toLowerCase();
175
+ if (BLOCKED_HOSTNAME_EXACT.has(lower)) {
176
+ return `reserved hostname (${lower})`;
177
+ }
178
+ for (const suffix of BLOCKED_HOSTNAME_SUFFIXES) {
179
+ if (lower.endsWith(suffix))
180
+ return `reserved TLD / suffix (${suffix})`;
181
+ }
182
+ // Numeric / hex encoding of IPv4 — decode and test.
183
+ const decoded = decodeNumericIPv4(hostname);
184
+ if (decoded) {
185
+ if (isPrivateIPv4(decoded))
186
+ return `private / reserved IPv4 (${decoded}, encoded as ${hostname})`;
187
+ // Also reject all numeric hostnames that decode to public IPs — they're a
188
+ // deniability smell. Callers who intentionally audit a literal IP will
189
+ // pass it in dotted-quad form.
190
+ return `ambiguous numeric-encoded IPv4 (${hostname} decodes to ${decoded}); pass dotted-quad form explicitly`;
191
+ }
192
+ const version = isIP(hostname); // 4 | 6 | 0
193
+ if (version === 4 && isPrivateIPv4(hostname))
194
+ return "private / reserved IPv4 range";
195
+ if (version === 6) {
196
+ const bare = hostname.replace(/^\[|\]$/g, "").replace(/%.*$/, "");
197
+ if (isPrivateIPv6(bare))
198
+ return "private / reserved IPv6 range";
199
+ }
200
+ return null;
201
+ }
202
+ /**
203
+ * Full SSRF check: the string check above PLUS a DNS lookup to guarantee the
204
+ * resolved address isn't in a private range. Throws `SSRFError` on failure.
205
+ *
206
+ * Time-of-check-vs-time-of-use: an attacker-controlled DNS server can return
207
+ * a public IP on first lookup and a private IP on the subsequent fetch ("DNS
208
+ * rebinding"). Mitigations: cache the resolved IP and dial to THAT IP (host
209
+ * header preserved), or use a resolver that refuses re-resolution within a
210
+ * TTL window. This function validates; it does not pin. For the audit
211
+ * engine's own fetches, the pinning layer is layered on top via `safeFetch`.
212
+ */
213
+ export async function validateTargetHost(hostname, options = {}) {
214
+ const stringReason = isPrivateOrReservedHost(hostname);
215
+ if (stringReason)
216
+ throw new SSRFError(hostname, stringReason);
217
+ // Literal IPs pass the DNS step trivially (isIP > 0 ⇒ not a name to resolve).
218
+ if (isIP(hostname) !== 0)
219
+ return;
220
+ const resolver = options.resolver ?? {
221
+ resolve4: (h) => dns.resolve4(h),
222
+ resolve6: (h) => dns.resolve6(h),
223
+ };
224
+ const [v4, v6] = await Promise.allSettled([
225
+ resolver.resolve4(hostname),
226
+ resolver.resolve6(hostname),
227
+ ]);
228
+ const addrs = [];
229
+ if (v4.status === "fulfilled")
230
+ for (const a of v4.value)
231
+ addrs.push({ kind: "v4", addr: a });
232
+ if (v6.status === "fulfilled")
233
+ for (const a of v6.value)
234
+ addrs.push({ kind: "v6", addr: a });
235
+ if (addrs.length === 0) {
236
+ throw new DnsResolutionError(hostname);
237
+ }
238
+ for (const { kind, addr } of addrs) {
239
+ const isPrivate = kind === "v4" ? isPrivateIPv4(addr) : isPrivateIPv6(addr);
240
+ if (isPrivate) {
241
+ throw new SSRFError(hostname, `resolves to private ${kind} address ${addr}`);
242
+ }
243
+ }
244
+ }
245
+ /**
246
+ * Convenience check for "is this URL pointing at localhost or a private
247
+ * network?". Used by the CLI to auto-apply a conservative crawl preset when
248
+ * a developer runs `pseolint http://localhost:3000` — a cache-cold local
249
+ * server can amplify every fetch into a thundering herd of DB queries.
250
+ *
251
+ * Returns false for anything that isn't a parseable URL with a hostname
252
+ * (paths, `file://`, empty strings). Delegates the actual decision to
253
+ * `isPrivateOrReservedHost` so the two stay in sync.
254
+ */
255
+ export function isLocalhostUrl(url) {
256
+ let parsed;
257
+ try {
258
+ parsed = new URL(url);
259
+ }
260
+ catch {
261
+ return false;
262
+ }
263
+ if (!parsed.hostname)
264
+ return false;
265
+ const host = parsed.hostname.replace(/^\[|\]$/g, "");
266
+ return isPrivateOrReservedHost(host) !== null;
267
+ }
268
+ //# sourceMappingURL=ssrf-guard.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ssrf-guard.js","sourceRoot":"","sources":["../src/ssrf-guard.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,GAAG,EAAE,MAAM,UAAU,CAAC;AAC3C,OAAO,EAAE,IAAI,EAAE,MAAM,UAAU,CAAC;AAEhC;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,MAAM,OAAO,SAAU,SAAQ,KAAK;IACzB,QAAQ,CAAS;IACjB,MAAM,CAAS;IAExB,YAAY,QAAgB,EAAE,MAAc;QAC1C,KAAK,CAAC,gBAAgB,QAAQ,uBAAuB,MAAM,EAAE,CAAC,CAAC;QAC/D,IAAI,CAAC,IAAI,GAAG,WAAW,CAAC;QACxB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,kBAAmB,SAAQ,KAAK;IAClC,QAAQ,CAAS;IAE1B,YAAY,QAAgB;QAC1B,KAAK,CAAC,8BAA8B,QAAQ,GAAG,CAAC,CAAC;QACjD,IAAI,CAAC,IAAI,GAAG,oBAAoB,CAAC;QACjC,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC3B,CAAC;CACF;AAED,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC;IACrC,WAAW;IACX,eAAe;IACf,eAAe;IACf,cAAc;IACd,GAAG;CACJ,CAAC,CAAC;AAEH,MAAM,yBAAyB,GAAG;IAChC,QAAQ;IACR,YAAY;IACZ,WAAW;IACX,OAAO;IACP,WAAW;IACX,MAAM;IACN,OAAO;IACP,UAAU;IACV,OAAO;CACR,CAAC;AAEF,MAAM,gBAAgB,GAAG,uCAAuC,CAAC;AAEjE;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC;QACtF,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC;IACrB,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,6BAA6B;IACvD,IAAI,CAAC,KAAK,EAAE;QAAE,OAAO,IAAI,CAAC,CAAC,aAAa;IACxC,IAAI,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,yBAAyB;IACrD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,+CAA+C;IACxF,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE;QAAE,OAAO,IAAI,CAAC,CAAC,gBAAgB;IAClE,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,iBAAiB;IAC1D,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,wBAAwB;IAC3E,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,sBAAsB;IAC/E,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,4BAA4B;IACrF,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,4BAA4B;IAClF,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,+BAA+B;IAC3F,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,8BAA8B;IACzF,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,0BAA0B;IACjE,IAAI,CAAC,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,qDAAqD;IAChF,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACtC,IAAI,UAAU,KAAK,IAAI,IAAI,UAAU,KAAK,KAAK;QAAE,OAAO,IAAI,CAAC;IAC7D,IAAI,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC;QAC5D,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,YAAY;IAC3F,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,eAAe;IAC5F,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,qBAAqB;IACnE,8EAA8E;IAC9E,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;IAClD,IAAI,MAAM;QAAE,OAAO,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5C,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAgB;IAChD,MAAM,CAAC,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IACxC,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,IAAI,CAAC,GAAkB,IAAI,CAAC;IAC5B,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QACvB,oEAAoE;QACpE,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACzB,IAAI,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,IAAI,MAAM,IAAI,UAAU;YAAE,CAAC,GAAG,MAAM,CAAC;IAClF,CAAC;SAAM,IAAI,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QACnC,sBAAsB;QACtB,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACzB,IAAI,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,IAAI,MAAM,IAAI,UAAU;YAAE,CAAC,GAAG,MAAM,CAAC;IAClF,CAAC;IACD,IAAI,CAAC,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IAC5B,OAAO;QACL,CAAC,CAAC,KAAK,EAAE,CAAC,GAAG,IAAI;QACjB,CAAC,CAAC,KAAK,EAAE,CAAC,GAAG,IAAI;QACjB,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,IAAI;QAChB,CAAC,GAAG,IAAI;KACT,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACd,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACtD,IAAI,CAAC,QAAQ;QAAE,OAAO,gBAAgB,CAAC;IACvC,MAAM,KAAK,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;IAErC,IAAI,sBAAsB,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,sBAAsB,KAAK,GAAG,CAAC;IACxC,CAAC;IACD,KAAK,MAAM,MAAM,IAAI,yBAAyB,EAAE,CAAC;QAC/C,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,OAAO,0BAA0B,MAAM,GAAG,CAAC;IACzE,CAAC;IAED,oDAAoD;IACpD,MAAM,OAAO,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAC5C,IAAI,OAAO,EAAE,CAAC;QACZ,IAAI,aAAa,CAAC,OAAO,CAAC;YAAE,OAAO,4BAA4B,OAAO,gBAAgB,QAAQ,GAAG,CAAC;QAClG,0EAA0E;QAC1E,uEAAuE;QACvE,+BAA+B;QAC/B,OAAO,mCAAmC,QAAQ,eAAe,OAAO,qCAAqC,CAAC;IAChH,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY;IAC5C,IAAI,OAAO,KAAK,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC;QAAE,OAAO,+BAA+B,CAAC;IACrF,IAAI,OAAO,KAAK,CAAC,EAAE,CAAC;QAClB,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAClE,IAAI,aAAa,CAAC,IAAI,CAAC;YAAE,OAAO,+BAA+B,CAAC;IAClE,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAUD;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAgB,EAChB,UAAqC,EAAE;IAEvC,MAAM,YAAY,GAAG,uBAAuB,CAAC,QAAQ,CAAC,CAAC;IACvD,IAAI,YAAY;QAAE,MAAM,IAAI,SAAS,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAE9D,8EAA8E;IAC9E,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO;IAEjC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI;QACnC,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC;QAChC,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC;KACjC,CAAC;IAEF,MAAM,CAAC,EAAE,EAAE,EAAE,CAAC,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;QACxC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAC3B,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;KAC5B,CAAC,CAAC;IAEH,MAAM,KAAK,GAA+C,EAAE,CAAC;IAC7D,IAAI,EAAE,CAAC,MAAM,KAAK,WAAW;QAAE,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;IAC7F,IAAI,EAAE,CAAC,MAAM,KAAK,WAAW;QAAE,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;IAE7F,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IACzC,CAAC;IAED,KAAK,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,KAAK,EAAE,CAAC;QACnC,MAAM,SAAS,GAAG,IAAI,KAAK,IAAI,CAAC,CAAC,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAC5E,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,IAAI,SAAS,CAAC,QAAQ,EAAE,uBAAuB,IAAI,YAAY,IAAI,EAAE,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,cAAc,CAAC,GAAW;IACxC,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,QAAQ;QAAE,OAAO,KAAK,CAAC;IACnC,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IACrD,OAAO,uBAAuB,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC;AAChD,CAAC"}
package/dist/types.d.ts CHANGED
@@ -1,4 +1,10 @@
1
1
  export type Severity = "info" | "warning" | "error" | "critical";
2
+ /** Verdict ladder — replaces the old numeric `score` field as the user-facing signal. */
3
+ export type Verdict = "ready" | "caution" | "concerning" | "critical";
4
+ /** Letter grade per category. */
5
+ export type Grade = "A" | "B" | "C" | "D" | "F";
6
+ /** Top-level v0.4 schema version. Bumps on every breaking output change. */
7
+ export declare const SCHEMA_VERSION = "2026-04-v0.4";
2
8
  /** Options for `normalizeAuditUrl` (HTTP identity). */
3
9
  export interface NormalizeUrlOptions {
4
10
  /** When true (default), drop `?query` for URL identity. */
@@ -38,6 +44,12 @@ export interface RuleResult {
38
44
  fix?: string;
39
45
  /** Google documentation URL backing this finding. */
40
46
  ref?: string;
47
+ /**
48
+ * Marketing-page deeplink for this rule (v0.4+). Always populated by the
49
+ * auditor — points to https://pseolint.dev/rules/{slug} where slug is the
50
+ * rule-id segment after the namespace prefix.
51
+ */
52
+ docsUrl?: string;
41
53
  /** Primary page this finding refers to, when applicable. */
42
54
  pageUrl?: string;
43
55
  /** Other URLs involved (e.g. cluster members, related pairs). */
@@ -51,15 +63,47 @@ export interface RuleResult {
51
63
  /** Fix effort level assigned by the enrichment pipeline. */
52
64
  effort?: FixEffort;
53
65
  }
54
- export interface CategoryScores {
55
- spam: number;
56
- content: number;
57
- aeo: number;
58
- links: number;
59
- tech: number;
60
- data: number;
61
- schema: number;
62
- cannibal: number;
66
+ /** v0.4 four-category bucket keys. */
67
+ export type CategoryKey = "integrity" | "discoverability" | "citation" | "data" | "audit";
68
+ /** Per-category grade + raw issue count. Audit category exists for completeness but is never weighted. */
69
+ export interface CategoryGrade {
70
+ grade: Grade;
71
+ issues: number;
72
+ }
73
+ export type CategoryGrades = Record<CategoryKey, CategoryGrade>;
74
+ /** Issues bucketed by severity — the v0.4 replacement for the flat `findings` array. */
75
+ export interface IssueBuckets {
76
+ /** Severity = error or critical. Must be fixed before shipping. */
77
+ blockers: RuleResult[];
78
+ /** Severity = warning. Should be fixed before scaling. */
79
+ shouldFix: RuleResult[];
80
+ /** Severity = info. Tracked for trend analysis. */
81
+ informational: RuleResult[];
82
+ }
83
+ /** Crawl statistics surfaced under diagnostics. */
84
+ export interface CrawlStats {
85
+ /** Total URLs the crawler considered (sitemap + discovered links). */
86
+ discovered: number;
87
+ /** URLs the crawler successfully fetched and audited. */
88
+ fetched: number;
89
+ /**
90
+ * URLs the crawler fetched but excluded from the rule pipeline (non-HTML
91
+ * content-type, dedup, robots-disallow, render budget, etc.).
92
+ */
93
+ skipped: number;
94
+ }
95
+ /** Engine-internal diagnostics — weight 0, never affects verdict. */
96
+ export interface Diagnostics {
97
+ /** Origin readiness aggregate (median/p95/error ratio). Null when no live fetches occurred. */
98
+ originReadiness: import("./fetch-observer.js").ReadinessReport | null;
99
+ crawlStats: CrawlStats;
100
+ /**
101
+ * Engine-emitted `audit/*` findings (e.g. `audit/duplicate-url`,
102
+ * `audit/skipped-by-robots`). Always severity=info, never affect the
103
+ * verdict, never appear in `summary.issues`. Surfaced here so consumers
104
+ * (telemetry, debug UIs) can still see what was skipped or deduped.
105
+ */
106
+ auditFindings: RuleResult[];
63
107
  }
64
108
  /** Options for HTTP caching during audits. */
65
109
  export interface CacheOptions {
@@ -67,6 +111,13 @@ export interface CacheOptions {
67
111
  dir?: string;
68
112
  /** TTL for entries without ETag/Last-Modified validators. Default: 7 days. */
69
113
  ttlMs?: number;
114
+ /**
115
+ * Maximum total size of the cache directory in bytes. When exceeded after a
116
+ * run, oldest-mtime entries are evicted until under the cap. Also sweeps
117
+ * leftover `.tmp` files from crashed writes. `<= 0` disables size-based
118
+ * eviction. Default: 209_715_200 (200 MB).
119
+ */
120
+ maxBytes?: number;
70
121
  }
71
122
  /** Cache stats reported at end of audit. */
72
123
  export interface CacheStats {
@@ -117,12 +168,34 @@ export interface AiOptions {
117
168
  } | false;
118
169
  }
119
170
  export interface AuditSummary {
120
- score: number;
121
- categoryScores: CategoryScores;
171
+ /** Schema version. v0.4 = "2026-04-v0.4". Wave 2 / 3 consumers branch on this. */
172
+ schemaVersion: typeof SCHEMA_VERSION;
173
+ /** User-facing verdict ladder. */
174
+ verdict: Verdict;
175
+ /**
176
+ * Internal numeric risk score (0–100, low = good). Retained for CI thresholding,
177
+ * trend deltas, and alert-gate diff logic. NEVER displayed to humans.
178
+ */
179
+ risk: number;
180
+ /** One-liner summarising counts: e.g. "3 ship-blockers, 16 should-fix". */
181
+ headline: string;
182
+ /** Per-category grade + count. */
183
+ categories: CategoryGrades;
184
+ /** Findings bucketed by severity. */
185
+ issues: IssueBuckets;
186
+ /**
187
+ * v0.4 §4.11 — pre-flight site classification. Decides which rules apply
188
+ * based on URL count, template clustering, and framework signal. The
189
+ * `suppressedRules` list is what the rule dispatcher honours. Pass
190
+ * `strict: true` in AuditOptions to keep the classification but force all
191
+ * rules to run anyway.
192
+ */
193
+ siteClassification: import("./site-classifier.js").SiteClassification;
194
+ /** Engine-internal diagnostics (origin readiness, crawl stats). Weight 0. */
195
+ diagnostics: Diagnostics;
122
196
  groupScores?: Record<string, number>;
123
197
  groupPageCounts?: Record<string, number>;
124
198
  pageCount: number;
125
- findings: RuleResult[];
126
199
  /** True when the enrichment pipeline detects template-generated content. */
127
200
  templateDetected?: boolean;
128
201
  /** Pre-enrichment finding count, for backward compatibility with CI scripts. */
@@ -160,12 +233,6 @@ export interface AuditOptions {
160
233
  uniqueValueMinWords?: number;
161
234
  metaUniquenessMinJaccard?: number;
162
235
  linkDepthMaxClicks?: number;
163
- /** Minimum pages in one directory before hub/index coverage is required. */
164
- hubPagesMinSiblings?: number;
165
- /** Skip hub/index checks when a directory has more than this many pages (e.g. large blogs). */
166
- hubPagesMaxSiblings?: number;
167
- titleOverlapThreshold?: number;
168
- keywordCollisionMinShared?: number;
169
236
  templateCoverageMinPages?: number;
170
237
  /** aeo/answer-first: max words in the first paragraph for extractable answer. */
171
238
  answerFirstMaxWords?: number;
@@ -188,7 +255,12 @@ export interface AuditOptions {
188
255
  timeout?: number;
189
256
  /** Audit a random subset of N pages. 0 means all pages (default: 0). */
190
257
  sampleSize?: number;
191
- /** URL/path glob patterns to exclude from the audit. */
258
+ /**
259
+ * URL/path glob patterns to exclude from the audit. v0.4: globs match
260
+ * against the URL pathname only (e.g. "/api/foo"), not the full URL.
261
+ * The auditor logs a warning at the end of the audit for any pattern that
262
+ * matched zero discovered URLs (likely-typo signal).
263
+ */
192
264
  ignore?: string[];
193
265
  crawlDiscovery?: boolean;
194
266
  /**
@@ -243,7 +315,81 @@ export interface AuditOptions {
243
315
  ai?: AiOptions;
244
316
  /** Local-only telemetry (JSONL) options. When omitted or `enabled: false`, no records are written. */
245
317
  telemetry?: TelemetryOptions;
318
+ /**
319
+ * External abort signal. When aborted, in-flight fetches are cancelled and
320
+ * `auditSource` throws an `AbortError`. Host code can use this to kill an
321
+ * audit that exceeded a per-user budget or was cancelled by the user.
322
+ */
323
+ signal?: AbortSignal;
324
+ /**
325
+ * When true, every crawled URL's hostname is validated with
326
+ * `validateTargetHost` before fetch — resolves the hostname and rejects if
327
+ * any address is in a private / reserved / link-local / loopback / multicast
328
+ * range. Applies to the source URL, sitemap entries, redirect targets, and
329
+ * discovered links. Defends against SSRF / DNS-rebinding when the library is
330
+ * invoked against user-supplied URLs (e.g. from a hosted audit service).
331
+ * Default: false (CLI users auditing localhost / staging sites should not
332
+ * be broken silently).
333
+ */
334
+ guardSsrf?: boolean;
335
+ /**
336
+ * When true (default), sitemap URLs that match a `Disallow:` rule in the
337
+ * target's robots.txt are skipped at fetch time instead of crawled. Set to
338
+ * false to audit staging / internal sites that Disallow everything.
339
+ */
340
+ respectRobotsTxt?: boolean;
341
+ /**
342
+ * Preset that flips several safety options at once.
343
+ * "saas" — intended for hosted services auditing user-submitted URLs:
344
+ * guardSsrf=true, respectRobotsTxt=true, tighter maxFetchBytes cap,
345
+ * followRedirects stays true (audits need final URL).
346
+ * "cli" — intended for local CLI / dev use:
347
+ * guardSsrf=false (auditing localhost is OK), respectRobotsTxt=true,
348
+ * default caps.
349
+ * "dev" — tiny crawl budget for localhost probing: concurrency=1,
350
+ * sampleSize=25, maxCrawlDiscovered=50. Designed so a cache-cold
351
+ * `pseolint http://localhost:3000` doesn't thundering-herd a dev DB.
352
+ * Auto-selected on localhost sources unless `autoDevPreset: false`.
353
+ * Individual options on AuditOptions override the preset when set.
354
+ * Default: undefined (no preset applied, existing opt-in behaviour).
355
+ */
356
+ safeMode?: SafeMode;
357
+ /**
358
+ * When true (default), audit sources pointing at localhost / private
359
+ * networks are auto-promoted to the `dev` safeMode preset. Set to false
360
+ * to opt out (e.g. `--full` on the CLI). Explicit `safeMode` beats this.
361
+ */
362
+ autoDevPreset?: boolean;
363
+ /**
364
+ * Hard ceiling on URLs discovered via link-following before sampling.
365
+ * Protects against malicious sites with many self-links that could extend
366
+ * the crawl up to the byte budget. Default: 5000.
367
+ */
368
+ maxCrawlDiscovered?: number;
369
+ /**
370
+ * When false, 3xx responses are returned as-is (the audit will see the
371
+ * redirect location header and can report it) instead of followed. Useful
372
+ * for security-sensitive audits that must not leave the exact submitted
373
+ * URL. Default: true.
374
+ */
375
+ followRedirects?: boolean;
376
+ /**
377
+ * When false, disables the in-flight backpressure watchdog that aborts the
378
+ * audit when origin latency / 5xx rate spikes past thresholds during the
379
+ * crawl. On by default; the last line of defence against a cache-cold
380
+ * origin ballooning an audit into an expensive egress event.
381
+ */
382
+ backpressure?: boolean;
383
+ /**
384
+ * v0.4 §4.11 — when true, the site classifier still runs and `summary.siteClassification`
385
+ * is populated, but `suppressedRules` is forced to `[]` so every rule executes
386
+ * regardless of detected site type. Use this to inspect what the classifier
387
+ * sees on a site that would otherwise have pSEO-only rules suppressed.
388
+ * Default: false.
389
+ */
390
+ strict?: boolean;
246
391
  }
392
+ export type SafeMode = "saas" | "cli" | "dev";
247
393
  export type SamplingStrategy = "stratified" | "random";
248
394
  /** A single page's source data for data-source comparison. */
249
395
  export interface PageDataRecord {
@@ -267,6 +413,12 @@ export interface HttpMeta {
267
413
  redirectChain: string[];
268
414
  xRobotsTag: string;
269
415
  linkHeader: string;
416
+ /**
417
+ * v0.4: lower-cased response headers. Populated for the source URL only
418
+ * (used by the dev-server framework detector). Other crawled pages can
419
+ * leave this undefined to keep the audit memory-bounded.
420
+ */
421
+ headers?: Record<string, string>;
270
422
  }
271
423
  export interface ParsedPage {
272
424
  url: string;