@pseolint/core 0.3.2 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -1
- package/dist/ai/triage.d.ts.map +1 -1
- package/dist/ai/triage.js +8 -1
- package/dist/ai/triage.js.map +1 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +566 -136
- package/dist/auditor.js.map +1 -1
- package/dist/backpressure.d.ts +68 -0
- package/dist/backpressure.d.ts.map +1 -0
- package/dist/backpressure.js +81 -0
- package/dist/backpressure.js.map +1 -0
- package/dist/cache.d.ts +73 -0
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +258 -19
- package/dist/cache.js.map +1 -1
- package/dist/enrich-findings.d.ts.map +1 -1
- package/dist/enrich-findings.js +1 -14
- package/dist/enrich-findings.js.map +1 -1
- package/dist/fetch-observer.d.ts +97 -0
- package/dist/fetch-observer.d.ts.map +1 -0
- package/dist/fetch-observer.js +124 -0
- package/dist/fetch-observer.js.map +1 -0
- package/dist/formatters/console.d.ts +7 -9
- package/dist/formatters/console.d.ts.map +1 -1
- package/dist/formatters/console.js +218 -254
- package/dist/formatters/console.js.map +1 -1
- package/dist/formatters/html.d.ts +5 -1
- package/dist/formatters/html.d.ts.map +1 -1
- package/dist/formatters/html.js +352 -570
- package/dist/formatters/html.js.map +1 -1
- package/dist/formatters/index.d.ts +4 -1
- package/dist/formatters/index.d.ts.map +1 -1
- package/dist/formatters/index.js +1 -1
- package/dist/formatters/index.js.map +1 -1
- package/dist/formatters/json.d.ts +11 -1
- package/dist/formatters/json.d.ts.map +1 -1
- package/dist/formatters/json.js +5 -1
- package/dist/formatters/json.js.map +1 -1
- package/dist/formatters/markdown.d.ts +7 -1
- package/dist/formatters/markdown.d.ts.map +1 -1
- package/dist/formatters/markdown.js +77 -70
- package/dist/formatters/markdown.js.map +1 -1
- package/dist/index.d.ts +13 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -7
- package/dist/index.js.map +1 -1
- package/dist/page-filter.d.ts +50 -0
- package/dist/page-filter.d.ts.map +1 -0
- package/dist/page-filter.js +86 -0
- package/dist/page-filter.js.map +1 -0
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +0 -6
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/content/unique-value.d.ts.map +1 -1
- package/dist/rules/content/unique-value.js +1 -0
- package/dist/rules/content/unique-value.js.map +1 -1
- package/dist/rules/scope.d.ts.map +1 -1
- package/dist/rules/scope.js +6 -14
- package/dist/rules/scope.js.map +1 -1
- package/dist/rules/tech/robots-sitemap-presence.d.ts +9 -1
- package/dist/rules/tech/robots-sitemap-presence.d.ts.map +1 -1
- package/dist/rules/tech/robots-sitemap-presence.js +14 -5
- package/dist/rules/tech/robots-sitemap-presence.js.map +1 -1
- package/dist/safe-mode-preset.d.ts +27 -0
- package/dist/safe-mode-preset.d.ts.map +1 -0
- package/dist/safe-mode-preset.js +54 -0
- package/dist/safe-mode-preset.js.map +1 -0
- package/dist/site-classifier.d.ts +83 -0
- package/dist/site-classifier.d.ts.map +1 -0
- package/dist/site-classifier.js +205 -0
- package/dist/site-classifier.js.map +1 -0
- package/dist/ssrf-guard.d.ts +96 -0
- package/dist/ssrf-guard.d.ts.map +1 -0
- package/dist/ssrf-guard.js +268 -0
- package/dist/ssrf-guard.js.map +1 -0
- package/dist/types.d.ts +202 -19
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +2 -1
- package/dist/types.js.map +1 -1
- package/package.json +2 -2
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v0.4 Site Classifier — runs BEFORE rules to gate which rule set applies.
|
|
3
|
+
*
|
|
4
|
+
* Today the engine runs every rule against every audited site regardless of
|
|
5
|
+
* whether the site is actually programmatic-SEO. A 23-page marketing site
|
|
6
|
+
* gets pSEO-targeted findings that don't apply; a 50,000-page directory
|
|
7
|
+
* gets the same audit shape as a small blog. This pre-flight classifier
|
|
8
|
+
* decides which "kind" of site we're auditing and emits a list of ruleIds
|
|
9
|
+
* that the dispatcher will skip when site type is small-marketing/blog.
|
|
10
|
+
*
|
|
11
|
+
* Heuristics (v1, ships in v0.4):
|
|
12
|
+
* - Sitemap URL count: <50 = bias small/blog; ≥1000 = bias programmatic.
|
|
13
|
+
* - URL-pattern clustering: normalize numeric/slug segments; if top-3
|
|
14
|
+
* templates cover ≥60% of URLs → strong programmatic signal.
|
|
15
|
+
* - Framework signal: pass-through from dev-server detection.
|
|
16
|
+
*
|
|
17
|
+
* Deferred to v0.4.1+ (per spec §4.11):
|
|
18
|
+
* - DOM-skeleton hashing across pages.
|
|
19
|
+
* - Per-cluster classification (mixed pSEO + marketing).
|
|
20
|
+
* - Per-page applicability tagging on findings.
|
|
21
|
+
*/
|
|
22
|
+
/** Rules suppressed for non-pSEO sites (small-marketing / blog). */
|
|
23
|
+
export const PSEO_ONLY_RULE_IDS = [
|
|
24
|
+
"spam/template-coverage",
|
|
25
|
+
"spam/template-diversity",
|
|
26
|
+
"spam/entity-swap",
|
|
27
|
+
"cannibal/url-pattern",
|
|
28
|
+
];
|
|
29
|
+
/**
|
|
30
|
+
* Normalize a pathname into a hashable template by replacing path segments
|
|
31
|
+
* that look like values with type placeholders.
|
|
32
|
+
*
|
|
33
|
+
* Examples:
|
|
34
|
+
* /california/los-angeles/plumbers → /:slug/:slug/:slug
|
|
35
|
+
* /blog/hello-world → /blog/:slug
|
|
36
|
+
* /post/12345 → /post/:n
|
|
37
|
+
* / → /
|
|
38
|
+
*/
|
|
39
|
+
export function normalizePathToTemplate(pathname) {
|
|
40
|
+
// Drop trailing slash for consistency, but keep "/" itself.
|
|
41
|
+
let p = pathname || "/";
|
|
42
|
+
if (p.length > 1 && p.endsWith("/"))
|
|
43
|
+
p = p.slice(0, -1);
|
|
44
|
+
if (!p.startsWith("/"))
|
|
45
|
+
p = "/" + p;
|
|
46
|
+
const segments = p.split("/").slice(1); // drop leading empty
|
|
47
|
+
if (segments.length === 0 || (segments.length === 1 && segments[0] === "")) {
|
|
48
|
+
return "/";
|
|
49
|
+
}
|
|
50
|
+
const out = segments.map((seg) => {
|
|
51
|
+
if (seg === "")
|
|
52
|
+
return "";
|
|
53
|
+
// Pure numeric segment.
|
|
54
|
+
if (/^\d+$/.test(seg))
|
|
55
|
+
return ":n";
|
|
56
|
+
// Slug-like: lowercase letters/digits/hyphens with at least one
|
|
57
|
+
// hyphen-separated multi-word structure (e.g. "los-angeles",
|
|
58
|
+
// "hello-world"). Pure single-word segments like "about", "blog",
|
|
59
|
+
// "tools" are kept as literals so collection roots don't collide
|
|
60
|
+
// with their detail-page templates.
|
|
61
|
+
if (/^[a-z0-9]+(?:-[a-z0-9]+)+$/.test(seg)) {
|
|
62
|
+
return ":slug";
|
|
63
|
+
}
|
|
64
|
+
// Long lowercase letter-only segments (>= 12 chars) are treated as
|
|
65
|
+
// slugs even without hyphens — covers concatenated-word URLs like
|
|
66
|
+
// /chicagoplumbers without hyphens.
|
|
67
|
+
if (seg.length >= 12 && /^[a-z]+$/.test(seg)) {
|
|
68
|
+
return ":slug";
|
|
69
|
+
}
|
|
70
|
+
return seg;
|
|
71
|
+
});
|
|
72
|
+
return "/" + out.join("/");
|
|
73
|
+
}
|
|
74
|
+
/** Convert a URL string to its template path (no host, no query). */
|
|
75
|
+
function urlToTemplate(url) {
|
|
76
|
+
try {
|
|
77
|
+
const u = new URL(url);
|
|
78
|
+
return normalizePathToTemplate(u.pathname);
|
|
79
|
+
}
|
|
80
|
+
catch {
|
|
81
|
+
// Treat as a raw path.
|
|
82
|
+
if (typeof url === "string" && url.length > 0) {
|
|
83
|
+
const path = url.split("?")[0].split("#")[0];
|
|
84
|
+
return normalizePathToTemplate(path);
|
|
85
|
+
}
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
/** Compute template-cluster ratios from a URL list. Returns top entries first. */
|
|
90
|
+
export function clusterUrlTemplates(urls) {
|
|
91
|
+
const counts = new Map();
|
|
92
|
+
let total = 0;
|
|
93
|
+
for (const url of urls) {
|
|
94
|
+
const t = urlToTemplate(url);
|
|
95
|
+
if (t === null)
|
|
96
|
+
continue;
|
|
97
|
+
counts.set(t, (counts.get(t) ?? 0) + 1);
|
|
98
|
+
total += 1;
|
|
99
|
+
}
|
|
100
|
+
if (total === 0)
|
|
101
|
+
return [];
|
|
102
|
+
const entries = Array.from(counts.entries()).map(([template, count]) => ({
|
|
103
|
+
template,
|
|
104
|
+
count,
|
|
105
|
+
ratio: count / total,
|
|
106
|
+
}));
|
|
107
|
+
entries.sort((a, b) => b.count - a.count || a.template.localeCompare(b.template));
|
|
108
|
+
return entries;
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Classify a site from its URL list + framework signal. Pure function.
|
|
112
|
+
*
|
|
113
|
+
* Contract: callers must pass the FULL discovered URL list (sitemap +
|
|
114
|
+
* crawl), not the post-sample list. The classifier needs the raw size
|
|
115
|
+
* signal to distinguish a 5000-page directory from a 25-page sample of one.
|
|
116
|
+
*/
|
|
117
|
+
export function classifySite(input) {
|
|
118
|
+
const urls = Array.isArray(input.urls) ? input.urls : [];
|
|
119
|
+
const framework = input.framework ?? "unknown";
|
|
120
|
+
const signals = [];
|
|
121
|
+
signals.push({ kind: "sitemap-url-count", value: urls.length });
|
|
122
|
+
const clusters = clusterUrlTemplates(urls);
|
|
123
|
+
const top = clusters[0];
|
|
124
|
+
const top3Ratio = clusters.slice(0, 3).reduce((sum, c) => sum + c.ratio, 0);
|
|
125
|
+
if (top) {
|
|
126
|
+
signals.push({
|
|
127
|
+
kind: "url-pattern-cluster-coverage",
|
|
128
|
+
topTemplate: top.template,
|
|
129
|
+
pages: top.count,
|
|
130
|
+
ratio: top.ratio,
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
signals.push({ kind: "framework-detected", value: framework });
|
|
134
|
+
// Empty list → unclear (no signal to act on).
|
|
135
|
+
if (urls.length === 0) {
|
|
136
|
+
return { type: "unclear", confidence: 0, signals, suppressedRules: [] };
|
|
137
|
+
}
|
|
138
|
+
// Step 4: synthesize.
|
|
139
|
+
let type = "unclear";
|
|
140
|
+
let confidence = 0;
|
|
141
|
+
if (urls.length >= 1000) {
|
|
142
|
+
if (top3Ratio >= 0.6) {
|
|
143
|
+
type = "programmatic-directory";
|
|
144
|
+
confidence = 0.9;
|
|
145
|
+
}
|
|
146
|
+
else {
|
|
147
|
+
// Lots of pages but no clear template clustering — likely ecommerce
|
|
148
|
+
// (varied product URLs) or a sprawling content site.
|
|
149
|
+
type = "ecommerce";
|
|
150
|
+
confidence = 0.6;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
else if (urls.length < 50) {
|
|
154
|
+
// Small site. Detect blog separately from generic small-marketing.
|
|
155
|
+
//
|
|
156
|
+
// Note on clustering for tiny sites: in our normalization, top-level
|
|
157
|
+
// pages like /about, /pricing, /contact all collapse to /:slug, so
|
|
158
|
+
// ratio-based clustering misfires below ~10 URLs. We require the top
|
|
159
|
+
// template to be deeper than one segment (so /:slug doesn't trigger)
|
|
160
|
+
// before treating clustering as a programmatic signal at small scale.
|
|
161
|
+
const blogTemplate = clusters.find((c) => c.template.startsWith("/blog/") || c.template === "/blog" || c.template === "/blog/:slug");
|
|
162
|
+
const topIsDeepCluster = top !== undefined &&
|
|
163
|
+
top.template.split("/").filter(Boolean).length >= 2 &&
|
|
164
|
+
top.ratio >= 0.6;
|
|
165
|
+
if (blogTemplate && blogTemplate.ratio >= 0.4) {
|
|
166
|
+
type = "blog";
|
|
167
|
+
confidence = 0.85;
|
|
168
|
+
}
|
|
169
|
+
else if (!topIsDeepCluster) {
|
|
170
|
+
// Tiny site without a deep dominant template — marketing pages.
|
|
171
|
+
type = "small-marketing";
|
|
172
|
+
confidence = 0.85;
|
|
173
|
+
}
|
|
174
|
+
else {
|
|
175
|
+
// Tiny site WITH a deep dominant template — likely a templated
|
|
176
|
+
// micro-site (rare, but possible). Don't claim it's pSEO from such a
|
|
177
|
+
// tiny sample. Keep `unclear` so all rules run; the rules themselves
|
|
178
|
+
// will decide.
|
|
179
|
+
type = "unclear";
|
|
180
|
+
confidence = 0.5;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
else {
|
|
184
|
+
// Medium tier (50–999 URLs). Use clustering to decide.
|
|
185
|
+
if (top3Ratio >= 0.6) {
|
|
186
|
+
type = "programmatic-directory";
|
|
187
|
+
confidence = 0.7;
|
|
188
|
+
}
|
|
189
|
+
else {
|
|
190
|
+
// No clear pattern in the medium tier. Could be marketing site or
|
|
191
|
+
// mid-sized content. Default to unclear so we don't over-suppress.
|
|
192
|
+
type = "unclear";
|
|
193
|
+
confidence = 0.5;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
// Framework-aware nudge: a small Next.js site is almost certainly a
|
|
197
|
+
// marketing or blog site, not a pSEO directory. We've already excluded
|
|
198
|
+
// pSEO via the size check, so this is mostly a confidence bump.
|
|
199
|
+
if (framework === "nextjs" && urls.length < 50 && (type === "small-marketing" || type === "blog")) {
|
|
200
|
+
confidence = Math.min(0.95, confidence + 0.05);
|
|
201
|
+
}
|
|
202
|
+
const suppressedRules = type === "small-marketing" || type === "blog" ? [...PSEO_ONLY_RULE_IDS] : [];
|
|
203
|
+
return { type, confidence, signals, suppressedRules };
|
|
204
|
+
}
|
|
205
|
+
//# sourceMappingURL=site-classifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"site-classifier.js","sourceRoot":"","sources":["../src/site-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAmCH,oEAAoE;AACpE,MAAM,CAAC,MAAM,kBAAkB,GAAsB;IACnD,wBAAwB;IACxB,yBAAyB;IACzB,kBAAkB;IAClB,sBAAsB;CACvB,CAAC;AAEF;;;;;;;;;GASG;AACH,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACtD,4DAA4D;IAC5D,IAAI,CAAC,GAAG,QAAQ,IAAI,GAAG,CAAC;IACxB,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACxD,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC;QAAE,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;IAEpC,MAAM,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB;IAC7D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC;QAC3E,OAAO,GAAG,CAAC;IACb,CAAC;IAED,MAAM,GAAG,GAAa,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACzC,IAAI,GAAG,KAAK,EAAE;YAAE,OAAO,EAAE,CAAC;QAC1B,wBAAwB;QACxB,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QACnC,gEAAgE;QAChE,6DAA6D;QAC7D,kEAAkE;QAClE,iEAAiE;QACjE,oCAAoC;QACpC,IAAI,4BAA4B,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAC3C,OAAO,OAAO,CAAC;QACjB,CAAC;QACD,mEAAmE;QACnE,kEAAkE;QAClE,oCAAoC;QACpC,IAAI,GAAG,CAAC,MAAM,IAAI,EAAE,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAC7C,OAAO,OAAO,CAAC;QACjB,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC,CAAC,CAAC;IAEH,OAAO,GAAG,GAAG,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC7B,CAAC;AAED,qEAAqE;AACrE,SAAS,aAAa,CAAC,GAAW;IAChC,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,OAAO,uBAAuB,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IAC7C,CAAC;IAAC,MAAM,CAAC;QACP,uBAAuB;QACvB,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7C,OAAO,uBAAuB,CAAC,IAAI,CAAC,CAAC;QACvC,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,kFAAkF;AAClF,MAAM,UAAU,mBAAmB,CAAC,IAAc;IAChD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,CAAC,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,CAAC,KAAK,IAAI;YAAE,SAAS;QACzB,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACxC,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IACD,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAC3B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QACvE,QAAQ;QACR,KAAK;QACL,KAAK,EAAE,KAAK,GAAG,KAAK;KACrB,CAAC,CAAC,CAAC;IACJ,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IAClF,OAAO,OAAO,CAAC;AACjB,CAAC;AASD;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,KAAwB;IACnD,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;IACzD,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,IAAI,SAAS,CAAC;IAE/C,MAAM,OAAO,GAA2B,EAAE,CAAC;IAC3C,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,mBAAmB,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IAEhE,MAAM,QAAQ,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;IACxB,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC5E,IAAI,GAAG,EAAE,CAAC;QACR,OAAO,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,8BAA8B;YACpC,WAAW,EAAE,GAAG,CAAC,QAAQ;YACzB,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,KAAK,EAAE,GAAG,CAAC,KAAK;SACjB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,oBAAoB,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;IAE/D,8CAA8C;IAC9C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE,EAAE,CAAC;IAC1E,CAAC;IAED,sBAAsB;IACtB,IAAI,IAAI,GAAa,SAAS,CAAC;IAC/B,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,SAAS,IAAI,GAAG,EAAE,CAAC;YACrB,IAAI,GAAG,wBAAwB,CAAC;YAChC,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;aAAM,CAAC;YACN,oEAAoE;YACpE,qDAAqD;YACrD,IAAI,GAAG,WAAW,CAAC;YACnB,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;IACH,CAAC;SAAM,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QAC5B,mEAAmE;QACnE,EAAE;QACF,qEAAqE;QACrE,mEAAmE;QACnE,qEAAqE;QACrE,qEAAqE;QACrE,sEAAsE;QACtE,MAAM,YAAY,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CACvC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,QAAQ,KAAK,OAAO,IAAI,CAAC,CAAC,QAAQ,KAAK,aAAa,CAC1F,CAAC;QACF,MAAM,gBAAgB,GACpB,GAAG,KAAK,SAAS;YACjB,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;YACnD,GAAG,CAAC,KAAK,IAAI,GAAG,CAAC;QACnB,IAAI,YAAY,IAAI,YAAY,CAAC,KAAK,IAAI,GAAG,EAAE,CAAC;YAC9C,IAAI,GAAG,MAAM,CAAC;YACd,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;aAAM,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC7B,gEAAgE;YAChE,IAAI,GAAG,iBAAiB,CAAC;YACzB,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;aAAM,CAAC;YACN,+DAA+D;YAC/D,qEAAqE;YACrE,qEAAqE;YACrE,eAAe;YACf,IAAI,GAAG,SAAS,CAAC;YACjB,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;IACH,CAAC;SAAM,CAAC;QACN,uDAAuD;QACvD,IAAI,SAAS,IAAI,GAAG,EAAE,CAAC;YACrB,IAAI,GAAG,wBAAwB,CAAC;YAChC,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;aAAM,CAAC;YACN,kEAAkE;YAClE,mEAAmE;YACnE,IAAI,GAAG,SAAS,CAAC;YACjB,UAAU,GAAG,GAAG,CAAC;QACnB,CAAC;IACH,CAAC;IAED,oEAAoE;IACpE,uEAAuE;IACvE,gEAAgE;IAChE,IAAI,SAAS,KAAK,QAAQ,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,IAAI,CAAC,IAAI,KAAK,iBAAiB,IAAI,IAAI,KAAK,MAAM,CAAC,EAAE,CAAC;QAClG,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,UAAU,GAAG,IAAI,CAAC,CAAC;IACjD,CAAC;IAED,MAAM,eAAe,GACnB,IAAI,KAAK,iBAAiB,IAAI,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,kBAAkB,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAE/E,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC;AACxD,CAAC"}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SSRF guard for audit targets.
|
|
3
|
+
*
|
|
4
|
+
* Two layers:
|
|
5
|
+
* 1. `isPrivateOrReservedHost(hostname)` — fast, synchronous string check.
|
|
6
|
+
* Catches literal private IPs ("10.0.0.5"), loopback names ("localhost"),
|
|
7
|
+
* link-local suffixes (".local"), and internal/metadata hostnames.
|
|
8
|
+
* 2. `validateTargetHost(hostname)` — async. Resolves the hostname via DNS
|
|
9
|
+
* and rejects if the resulting address (v4 or v6) falls into a private /
|
|
10
|
+
* reserved / link-local / multicast range. Mitigates DNS rebinding where
|
|
11
|
+
* a public hostname returns 127.0.0.1.
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* const hostname = new URL(userSuppliedUrl).hostname;
|
|
15
|
+
* await validateTargetHost(hostname); // throws SSRFError on blocked targets
|
|
16
|
+
*
|
|
17
|
+
* Library consumers should call this BEFORE enqueuing a crawl. The audit
|
|
18
|
+
* engine itself wraps its own fetches with this check when `guardSsrf` is
|
|
19
|
+
* enabled in AuditOptions, but defense-in-depth at the API boundary is the
|
|
20
|
+
* primary mitigation.
|
|
21
|
+
*/
|
|
22
|
+
export declare class SSRFError extends Error {
|
|
23
|
+
readonly hostname: string;
|
|
24
|
+
readonly reason: string;
|
|
25
|
+
constructor(hostname: string, reason: string);
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Thrown when a hostname legitimately fails DNS resolution (NXDOMAIN / SERVFAIL
|
|
29
|
+
* / no A / AAAA records). Distinct from `SSRFError`: resolution failure
|
|
30
|
+
* is a "try again later / fix your typo" condition, not an attack. Callers
|
|
31
|
+
* in SaaS contexts should not log these as security events.
|
|
32
|
+
*/
|
|
33
|
+
export declare class DnsResolutionError extends Error {
|
|
34
|
+
readonly hostname: string;
|
|
35
|
+
constructor(hostname: string);
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* IPv4 range predicate — true if the address is private / reserved /
|
|
39
|
+
* link-local / loopback / multicast / broadcast / CGNAT. Expects a valid
|
|
40
|
+
* dotted-quad; caller must ensure that (e.g. via `net.isIP`).
|
|
41
|
+
*/
|
|
42
|
+
export declare function isPrivateIPv4(addr: string): boolean;
|
|
43
|
+
/**
|
|
44
|
+
* IPv6 range predicate — true for loopback, ULA, link-local, unspecified,
|
|
45
|
+
* multicast, and IPv4-mapped addresses (after unwrapping to the v4 check).
|
|
46
|
+
*/
|
|
47
|
+
export declare function isPrivateIPv6(addr: string): boolean;
|
|
48
|
+
/**
|
|
49
|
+
* Decode an integer-packed (`2130706433` = `127.0.0.1`) or hex-encoded
|
|
50
|
+
* (`0x7f000001`) hostname into dotted-quad form. Returns `null` if the
|
|
51
|
+
* input isn't a numeric hostname. Needed because some fetch stacks accept
|
|
52
|
+
* these encodings and resolve them to private IPs, bypassing a naive
|
|
53
|
+
* string-only dotted-quad check.
|
|
54
|
+
*/
|
|
55
|
+
export declare function decodeNumericIPv4(hostname: string): string | null;
|
|
56
|
+
/**
|
|
57
|
+
* Synchronous string-only check. Rejects:
|
|
58
|
+
* - literal private / reserved IP addresses (dotted-quad OR numeric/hex encoding)
|
|
59
|
+
* - exact blocked hostnames (localhost, 0, etc.)
|
|
60
|
+
* - suffix-blocked hostnames (.local, .internal, .arpa, ...)
|
|
61
|
+
*
|
|
62
|
+
* Returns `null` if the host is acceptable, or a human-readable reason
|
|
63
|
+
* string if it should be blocked.
|
|
64
|
+
*/
|
|
65
|
+
export declare function isPrivateOrReservedHost(hostname: string): string | null;
|
|
66
|
+
export interface ValidateTargetHostOptions {
|
|
67
|
+
/** Override the DNS resolver — useful for tests or custom resolvers. */
|
|
68
|
+
resolver?: {
|
|
69
|
+
resolve4: (hostname: string) => Promise<string[]>;
|
|
70
|
+
resolve6: (hostname: string) => Promise<string[]>;
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Full SSRF check: the string check above PLUS a DNS lookup to guarantee the
|
|
75
|
+
* resolved address isn't in a private range. Throws `SSRFError` on failure.
|
|
76
|
+
*
|
|
77
|
+
* Time-of-check-vs-time-of-use: an attacker-controlled DNS server can return
|
|
78
|
+
* a public IP on first lookup and a private IP on the subsequent fetch ("DNS
|
|
79
|
+
* rebinding"). Mitigations: cache the resolved IP and dial to THAT IP (host
|
|
80
|
+
* header preserved), or use a resolver that refuses re-resolution within a
|
|
81
|
+
* TTL window. This function validates; it does not pin. For the audit
|
|
82
|
+
* engine's own fetches, the pinning layer is layered on top via `safeFetch`.
|
|
83
|
+
*/
|
|
84
|
+
export declare function validateTargetHost(hostname: string, options?: ValidateTargetHostOptions): Promise<void>;
|
|
85
|
+
/**
|
|
86
|
+
* Convenience check for "is this URL pointing at localhost or a private
|
|
87
|
+
* network?". Used by the CLI to auto-apply a conservative crawl preset when
|
|
88
|
+
* a developer runs `pseolint http://localhost:3000` — a cache-cold local
|
|
89
|
+
* server can amplify every fetch into a thundering herd of DB queries.
|
|
90
|
+
*
|
|
91
|
+
* Returns false for anything that isn't a parseable URL with a hostname
|
|
92
|
+
* (paths, `file://`, empty strings). Delegates the actual decision to
|
|
93
|
+
* `isPrivateOrReservedHost` so the two stay in sync.
|
|
94
|
+
*/
|
|
95
|
+
export declare function isLocalhostUrl(url: string): boolean;
|
|
96
|
+
//# sourceMappingURL=ssrf-guard.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ssrf-guard.d.ts","sourceRoot":"","sources":["../src/ssrf-guard.ts"],"names":[],"mappings":"AAGA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,qBAAa,SAAU,SAAQ,KAAK;IAClC,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAC1B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;gBAEZ,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;CAM7C;AAED;;;;;GAKG;AACH,qBAAa,kBAAmB,SAAQ,KAAK;IAC3C,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;gBAEd,QAAQ,EAAE,MAAM;CAK7B;AAwBD;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAqBnD;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAWnD;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAoBjE;AAED;;;;;;;;GAQG;AACH,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CA4BvE;AAED,MAAM,WAAW,yBAAyB;IACxC,wEAAwE;IACxE,QAAQ,CAAC,EAAE;QACT,QAAQ,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;QAClD,QAAQ,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;KACnD,CAAC;CACH;AAED;;;;;;;;;;GAUG;AACH,wBAAsB,kBAAkB,CACtC,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,yBAA8B,GACtC,OAAO,CAAC,IAAI,CAAC,CA+Bf;AAED;;;;;;;;;GASG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAUnD"}
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import { promises as dns } from "node:dns";
|
|
2
|
+
import { isIP } from "node:net";
|
|
3
|
+
/**
|
|
4
|
+
* SSRF guard for audit targets.
|
|
5
|
+
*
|
|
6
|
+
* Two layers:
|
|
7
|
+
* 1. `isPrivateOrReservedHost(hostname)` — fast, synchronous string check.
|
|
8
|
+
* Catches literal private IPs ("10.0.0.5"), loopback names ("localhost"),
|
|
9
|
+
* link-local suffixes (".local"), and internal/metadata hostnames.
|
|
10
|
+
* 2. `validateTargetHost(hostname)` — async. Resolves the hostname via DNS
|
|
11
|
+
* and rejects if the resulting address (v4 or v6) falls into a private /
|
|
12
|
+
* reserved / link-local / multicast range. Mitigates DNS rebinding where
|
|
13
|
+
* a public hostname returns 127.0.0.1.
|
|
14
|
+
*
|
|
15
|
+
* Usage:
|
|
16
|
+
* const hostname = new URL(userSuppliedUrl).hostname;
|
|
17
|
+
* await validateTargetHost(hostname); // throws SSRFError on blocked targets
|
|
18
|
+
*
|
|
19
|
+
* Library consumers should call this BEFORE enqueuing a crawl. The audit
|
|
20
|
+
* engine itself wraps its own fetches with this check when `guardSsrf` is
|
|
21
|
+
* enabled in AuditOptions, but defense-in-depth at the API boundary is the
|
|
22
|
+
* primary mitigation.
|
|
23
|
+
*/
|
|
24
|
+
export class SSRFError extends Error {
|
|
25
|
+
hostname;
|
|
26
|
+
reason;
|
|
27
|
+
constructor(hostname, reason) {
|
|
28
|
+
super(`Target host "${hostname}" is not permitted: ${reason}`);
|
|
29
|
+
this.name = "SSRFError";
|
|
30
|
+
this.hostname = hostname;
|
|
31
|
+
this.reason = reason;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Thrown when a hostname legitimately fails DNS resolution (NXDOMAIN / SERVFAIL
|
|
36
|
+
* / no A / AAAA records). Distinct from `SSRFError`: resolution failure
|
|
37
|
+
* is a "try again later / fix your typo" condition, not an attack. Callers
|
|
38
|
+
* in SaaS contexts should not log these as security events.
|
|
39
|
+
*/
|
|
40
|
+
export class DnsResolutionError extends Error {
|
|
41
|
+
hostname;
|
|
42
|
+
constructor(hostname) {
|
|
43
|
+
super(`DNS resolution failed for "${hostname}"`);
|
|
44
|
+
this.name = "DnsResolutionError";
|
|
45
|
+
this.hostname = hostname;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
const BLOCKED_HOSTNAME_EXACT = new Set([
|
|
49
|
+
"localhost",
|
|
50
|
+
"broadcasthost",
|
|
51
|
+
"ip6-localhost",
|
|
52
|
+
"ip6-loopback",
|
|
53
|
+
"0",
|
|
54
|
+
]);
|
|
55
|
+
const BLOCKED_HOSTNAME_SUFFIXES = [
|
|
56
|
+
".local",
|
|
57
|
+
".localhost",
|
|
58
|
+
".internal",
|
|
59
|
+
".arpa",
|
|
60
|
+
".intranet",
|
|
61
|
+
".lan",
|
|
62
|
+
".home",
|
|
63
|
+
".private",
|
|
64
|
+
".corp",
|
|
65
|
+
];
|
|
66
|
+
const IPV4_MAPPED_IPV6 = /^::ffff:(?:0:)?(\d+\.\d+\.\d+\.\d+)$/i;
|
|
67
|
+
/**
|
|
68
|
+
* IPv4 range predicate — true if the address is private / reserved /
|
|
69
|
+
* link-local / loopback / multicast / broadcast / CGNAT. Expects a valid
|
|
70
|
+
* dotted-quad; caller must ensure that (e.g. via `net.isIP`).
|
|
71
|
+
*/
|
|
72
|
+
export function isPrivateIPv4(addr) {
|
|
73
|
+
const parts = addr.split(".").map((p) => Number(p));
|
|
74
|
+
if (parts.length !== 4 || parts.some((p) => !Number.isInteger(p) || p < 0 || p > 255)) {
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
const [a, b] = parts;
|
|
78
|
+
if (a === 0)
|
|
79
|
+
return true; // 0.0.0.0/8 — "this network"
|
|
80
|
+
if (a === 10)
|
|
81
|
+
return true; // 10.0.0.0/8
|
|
82
|
+
if (a === 127)
|
|
83
|
+
return true; // 127.0.0.0/8 — loopback
|
|
84
|
+
if (a === 169 && b === 254)
|
|
85
|
+
return true; // 169.254.0.0/16 — link-local + cloud metadata
|
|
86
|
+
if (a === 172 && b >= 16 && b <= 31)
|
|
87
|
+
return true; // 172.16.0.0/12
|
|
88
|
+
if (a === 192 && b === 168)
|
|
89
|
+
return true; // 192.168.0.0/16
|
|
90
|
+
if (a === 100 && b >= 64 && b <= 127)
|
|
91
|
+
return true; // 100.64.0.0/10 — CGNAT
|
|
92
|
+
if (a === 192 && b === 0 && parts[2] === 0)
|
|
93
|
+
return true; // 192.0.0.0/24 — IETF
|
|
94
|
+
if (a === 192 && b === 0 && parts[2] === 2)
|
|
95
|
+
return true; // 192.0.2.0/24 — TEST-NET-1
|
|
96
|
+
if (a === 198 && (b === 18 || b === 19))
|
|
97
|
+
return true; // 198.18.0.0/15 — benchmark
|
|
98
|
+
if (a === 198 && b === 51 && parts[2] === 100)
|
|
99
|
+
return true; // 198.51.100.0/24 — TEST-NET-2
|
|
100
|
+
if (a === 203 && b === 0 && parts[2] === 113)
|
|
101
|
+
return true; // 203.0.113.0/24 — TEST-NET-3
|
|
102
|
+
if (a >= 224 && a <= 239)
|
|
103
|
+
return true; // 224.0.0.0/4 — multicast
|
|
104
|
+
if (a >= 240)
|
|
105
|
+
return true; // 240.0.0.0/4 — reserved + 255.255.255.255 broadcast
|
|
106
|
+
return false;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* IPv6 range predicate — true for loopback, ULA, link-local, unspecified,
|
|
110
|
+
* multicast, and IPv4-mapped addresses (after unwrapping to the v4 check).
|
|
111
|
+
*/
|
|
112
|
+
export function isPrivateIPv6(addr) {
|
|
113
|
+
const normalized = addr.toLowerCase();
|
|
114
|
+
if (normalized === "::" || normalized === "::1")
|
|
115
|
+
return true;
|
|
116
|
+
if (normalized.startsWith("fe8") || normalized.startsWith("fe9") ||
|
|
117
|
+
normalized.startsWith("fea") || normalized.startsWith("feb"))
|
|
118
|
+
return true; // fe80::/10
|
|
119
|
+
if (normalized.startsWith("fc") || normalized.startsWith("fd"))
|
|
120
|
+
return true; // fc00::/7 ULA
|
|
121
|
+
if (normalized.startsWith("ff"))
|
|
122
|
+
return true; // ff00::/8 multicast
|
|
123
|
+
// IPv4-mapped IPv6 (::ffff:a.b.c.d or ::ffff:0:a.b.c.d) — unwrap and delegate
|
|
124
|
+
const mapped = normalized.match(IPV4_MAPPED_IPV6);
|
|
125
|
+
if (mapped)
|
|
126
|
+
return isPrivateIPv4(mapped[1]);
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Decode an integer-packed (`2130706433` = `127.0.0.1`) or hex-encoded
|
|
131
|
+
* (`0x7f000001`) hostname into dotted-quad form. Returns `null` if the
|
|
132
|
+
* input isn't a numeric hostname. Needed because some fetch stacks accept
|
|
133
|
+
* these encodings and resolve them to private IPs, bypassing a naive
|
|
134
|
+
* string-only dotted-quad check.
|
|
135
|
+
*/
|
|
136
|
+
export function decodeNumericIPv4(hostname) {
|
|
137
|
+
const s = hostname.toLowerCase().trim();
|
|
138
|
+
if (!s)
|
|
139
|
+
return null;
|
|
140
|
+
let n = null;
|
|
141
|
+
if (/^[0-9]+$/.test(s)) {
|
|
142
|
+
// Pure decimal (also catches single-number IPv4 form "2130706433").
|
|
143
|
+
const parsed = Number(s);
|
|
144
|
+
if (Number.isInteger(parsed) && parsed >= 0 && parsed <= 0xffffffff)
|
|
145
|
+
n = parsed;
|
|
146
|
+
}
|
|
147
|
+
else if (/^0x[0-9a-f]+$/.test(s)) {
|
|
148
|
+
// Hex — "0x7f000001".
|
|
149
|
+
const parsed = Number(s);
|
|
150
|
+
if (Number.isInteger(parsed) && parsed >= 0 && parsed <= 0xffffffff)
|
|
151
|
+
n = parsed;
|
|
152
|
+
}
|
|
153
|
+
if (n === null)
|
|
154
|
+
return null;
|
|
155
|
+
return [
|
|
156
|
+
(n >>> 24) & 0xff,
|
|
157
|
+
(n >>> 16) & 0xff,
|
|
158
|
+
(n >>> 8) & 0xff,
|
|
159
|
+
n & 0xff,
|
|
160
|
+
].join(".");
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Synchronous string-only check. Rejects:
|
|
164
|
+
* - literal private / reserved IP addresses (dotted-quad OR numeric/hex encoding)
|
|
165
|
+
* - exact blocked hostnames (localhost, 0, etc.)
|
|
166
|
+
* - suffix-blocked hostnames (.local, .internal, .arpa, ...)
|
|
167
|
+
*
|
|
168
|
+
* Returns `null` if the host is acceptable, or a human-readable reason
|
|
169
|
+
* string if it should be blocked.
|
|
170
|
+
*/
|
|
171
|
+
export function isPrivateOrReservedHost(hostname) {
|
|
172
|
+
if (!hostname)
|
|
173
|
+
return "empty hostname";
|
|
174
|
+
const lower = hostname.toLowerCase();
|
|
175
|
+
if (BLOCKED_HOSTNAME_EXACT.has(lower)) {
|
|
176
|
+
return `reserved hostname (${lower})`;
|
|
177
|
+
}
|
|
178
|
+
for (const suffix of BLOCKED_HOSTNAME_SUFFIXES) {
|
|
179
|
+
if (lower.endsWith(suffix))
|
|
180
|
+
return `reserved TLD / suffix (${suffix})`;
|
|
181
|
+
}
|
|
182
|
+
// Numeric / hex encoding of IPv4 — decode and test.
|
|
183
|
+
const decoded = decodeNumericIPv4(hostname);
|
|
184
|
+
if (decoded) {
|
|
185
|
+
if (isPrivateIPv4(decoded))
|
|
186
|
+
return `private / reserved IPv4 (${decoded}, encoded as ${hostname})`;
|
|
187
|
+
// Also reject all numeric hostnames that decode to public IPs — they're a
|
|
188
|
+
// deniability smell. Callers who intentionally audit a literal IP will
|
|
189
|
+
// pass it in dotted-quad form.
|
|
190
|
+
return `ambiguous numeric-encoded IPv4 (${hostname} decodes to ${decoded}); pass dotted-quad form explicitly`;
|
|
191
|
+
}
|
|
192
|
+
const version = isIP(hostname); // 4 | 6 | 0
|
|
193
|
+
if (version === 4 && isPrivateIPv4(hostname))
|
|
194
|
+
return "private / reserved IPv4 range";
|
|
195
|
+
if (version === 6) {
|
|
196
|
+
const bare = hostname.replace(/^\[|\]$/g, "").replace(/%.*$/, "");
|
|
197
|
+
if (isPrivateIPv6(bare))
|
|
198
|
+
return "private / reserved IPv6 range";
|
|
199
|
+
}
|
|
200
|
+
return null;
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Full SSRF check: the string check above PLUS a DNS lookup to guarantee the
|
|
204
|
+
* resolved address isn't in a private range. Throws `SSRFError` on failure.
|
|
205
|
+
*
|
|
206
|
+
* Time-of-check-vs-time-of-use: an attacker-controlled DNS server can return
|
|
207
|
+
* a public IP on first lookup and a private IP on the subsequent fetch ("DNS
|
|
208
|
+
* rebinding"). Mitigations: cache the resolved IP and dial to THAT IP (host
|
|
209
|
+
* header preserved), or use a resolver that refuses re-resolution within a
|
|
210
|
+
* TTL window. This function validates; it does not pin. For the audit
|
|
211
|
+
* engine's own fetches, the pinning layer is layered on top via `safeFetch`.
|
|
212
|
+
*/
|
|
213
|
+
export async function validateTargetHost(hostname, options = {}) {
|
|
214
|
+
const stringReason = isPrivateOrReservedHost(hostname);
|
|
215
|
+
if (stringReason)
|
|
216
|
+
throw new SSRFError(hostname, stringReason);
|
|
217
|
+
// Literal IPs pass the DNS step trivially (isIP > 0 ⇒ not a name to resolve).
|
|
218
|
+
if (isIP(hostname) !== 0)
|
|
219
|
+
return;
|
|
220
|
+
const resolver = options.resolver ?? {
|
|
221
|
+
resolve4: (h) => dns.resolve4(h),
|
|
222
|
+
resolve6: (h) => dns.resolve6(h),
|
|
223
|
+
};
|
|
224
|
+
const [v4, v6] = await Promise.allSettled([
|
|
225
|
+
resolver.resolve4(hostname),
|
|
226
|
+
resolver.resolve6(hostname),
|
|
227
|
+
]);
|
|
228
|
+
const addrs = [];
|
|
229
|
+
if (v4.status === "fulfilled")
|
|
230
|
+
for (const a of v4.value)
|
|
231
|
+
addrs.push({ kind: "v4", addr: a });
|
|
232
|
+
if (v6.status === "fulfilled")
|
|
233
|
+
for (const a of v6.value)
|
|
234
|
+
addrs.push({ kind: "v6", addr: a });
|
|
235
|
+
if (addrs.length === 0) {
|
|
236
|
+
throw new DnsResolutionError(hostname);
|
|
237
|
+
}
|
|
238
|
+
for (const { kind, addr } of addrs) {
|
|
239
|
+
const isPrivate = kind === "v4" ? isPrivateIPv4(addr) : isPrivateIPv6(addr);
|
|
240
|
+
if (isPrivate) {
|
|
241
|
+
throw new SSRFError(hostname, `resolves to private ${kind} address ${addr}`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Convenience check for "is this URL pointing at localhost or a private
|
|
247
|
+
* network?". Used by the CLI to auto-apply a conservative crawl preset when
|
|
248
|
+
* a developer runs `pseolint http://localhost:3000` — a cache-cold local
|
|
249
|
+
* server can amplify every fetch into a thundering herd of DB queries.
|
|
250
|
+
*
|
|
251
|
+
* Returns false for anything that isn't a parseable URL with a hostname
|
|
252
|
+
* (paths, `file://`, empty strings). Delegates the actual decision to
|
|
253
|
+
* `isPrivateOrReservedHost` so the two stay in sync.
|
|
254
|
+
*/
|
|
255
|
+
export function isLocalhostUrl(url) {
|
|
256
|
+
let parsed;
|
|
257
|
+
try {
|
|
258
|
+
parsed = new URL(url);
|
|
259
|
+
}
|
|
260
|
+
catch {
|
|
261
|
+
return false;
|
|
262
|
+
}
|
|
263
|
+
if (!parsed.hostname)
|
|
264
|
+
return false;
|
|
265
|
+
const host = parsed.hostname.replace(/^\[|\]$/g, "");
|
|
266
|
+
return isPrivateOrReservedHost(host) !== null;
|
|
267
|
+
}
|
|
268
|
+
//# sourceMappingURL=ssrf-guard.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ssrf-guard.js","sourceRoot":"","sources":["../src/ssrf-guard.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,GAAG,EAAE,MAAM,UAAU,CAAC;AAC3C,OAAO,EAAE,IAAI,EAAE,MAAM,UAAU,CAAC;AAEhC;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,MAAM,OAAO,SAAU,SAAQ,KAAK;IACzB,QAAQ,CAAS;IACjB,MAAM,CAAS;IAExB,YAAY,QAAgB,EAAE,MAAc;QAC1C,KAAK,CAAC,gBAAgB,QAAQ,uBAAuB,MAAM,EAAE,CAAC,CAAC;QAC/D,IAAI,CAAC,IAAI,GAAG,WAAW,CAAC;QACxB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,kBAAmB,SAAQ,KAAK;IAClC,QAAQ,CAAS;IAE1B,YAAY,QAAgB;QAC1B,KAAK,CAAC,8BAA8B,QAAQ,GAAG,CAAC,CAAC;QACjD,IAAI,CAAC,IAAI,GAAG,oBAAoB,CAAC;QACjC,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC3B,CAAC;CACF;AAED,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC;IACrC,WAAW;IACX,eAAe;IACf,eAAe;IACf,cAAc;IACd,GAAG;CACJ,CAAC,CAAC;AAEH,MAAM,yBAAyB,GAAG;IAChC,QAAQ;IACR,YAAY;IACZ,WAAW;IACX,OAAO;IACP,WAAW;IACX,MAAM;IACN,OAAO;IACP,UAAU;IACV,OAAO;CACR,CAAC;AAEF,MAAM,gBAAgB,GAAG,uCAAuC,CAAC;AAEjE;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC;QACtF,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC;IACrB,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,6BAA6B;IACvD,IAAI,CAAC,KAAK,EAAE;QAAE,OAAO,IAAI,CAAC,CAAC,aAAa;IACxC,IAAI,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,yBAAyB;IACrD,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,+CAA+C;IACxF,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE;QAAE,OAAO,IAAI,CAAC,CAAC,gBAAgB;IAClE,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,iBAAiB;IAC1D,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,wBAAwB;IAC3E,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,sBAAsB;IAC/E,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,4BAA4B;IACrF,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,4BAA4B;IAClF,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,+BAA+B;IAC3F,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,8BAA8B;IACzF,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,0BAA0B;IACjE,IAAI,CAAC,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC,CAAC,qDAAqD;IAChF,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACtC,IAAI,UAAU,KAAK,IAAI,IAAI,UAAU,KAAK,KAAK;QAAE,OAAO,IAAI,CAAC;IAC7D,IAAI,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC;QAC5D,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,YAAY;IAC3F,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,eAAe;IAC5F,IAAI,UAAU,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,qBAAqB;IACnE,8EAA8E;IAC9E,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;IAClD,IAAI,MAAM;QAAE,OAAO,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5C,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAgB;IAChD,MAAM,CAAC,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IACxC,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,IAAI,CAAC,GAAkB,IAAI,CAAC;IAC5B,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QACvB,oEAAoE;QACpE,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACzB,IAAI,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,IAAI,MAAM,IAAI,UAAU;YAAE,CAAC,GAAG,MAAM,CAAC;IAClF,CAAC;SAAM,IAAI,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QACnC,sBAAsB;QACtB,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACzB,IAAI,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,MAAM,IAAI,CAAC,IAAI,MAAM,IAAI,UAAU;YAAE,CAAC,GAAG,MAAM,CAAC;IAClF,CAAC;IACD,IAAI,CAAC,KAAK,IAAI;QAAE,OAAO,IAAI,CAAC;IAC5B,OAAO;QACL,CAAC,CAAC,KAAK,EAAE,CAAC,GAAG,IAAI;QACjB,CAAC,CAAC,KAAK,EAAE,CAAC,GAAG,IAAI;QACjB,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,IAAI;QAChB,CAAC,GAAG,IAAI;KACT,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACd,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACtD,IAAI,CAAC,QAAQ;QAAE,OAAO,gBAAgB,CAAC;IACvC,MAAM,KAAK,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;IAErC,IAAI,sBAAsB,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,sBAAsB,KAAK,GAAG,CAAC;IACxC,CAAC;IACD,KAAK,MAAM,MAAM,IAAI,yBAAyB,EAAE,CAAC;QAC/C,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,OAAO,0BAA0B,MAAM,GAAG,CAAC;IACzE,CAAC;IAED,oDAAoD;IACpD,MAAM,OAAO,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAC5C,IAAI,OAAO,EAAE,CAAC;QACZ,IAAI,aAAa,CAAC,OAAO,CAAC;YAAE,OAAO,4BAA4B,OAAO,gBAAgB,QAAQ,GAAG,CAAC;QAClG,0EAA0E;QAC1E,uEAAuE;QACvE,+BAA+B;QAC/B,OAAO,mCAAmC,QAAQ,eAAe,OAAO,qCAAqC,CAAC;IAChH,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,YAAY;IAC5C,IAAI,OAAO,KAAK,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC;QAAE,OAAO,+BAA+B,CAAC;IACrF,IAAI,OAAO,KAAK,CAAC,EAAE,CAAC;QAClB,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAClE,IAAI,aAAa,CAAC,IAAI,CAAC;YAAE,OAAO,+BAA+B,CAAC;IAClE,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAUD;;;;;;;;;;GAUG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAgB,EAChB,UAAqC,EAAE;IAEvC,MAAM,YAAY,GAAG,uBAAuB,CAAC,QAAQ,CAAC,CAAC;IACvD,IAAI,YAAY;QAAE,MAAM,IAAI,SAAS,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAE9D,8EAA8E;IAC9E,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO;IAEjC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI;QACnC,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC;QAChC,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC;KACjC,CAAC;IAEF,MAAM,CAAC,EAAE,EAAE,EAAE,CAAC,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;QACxC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAC3B,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC;KAC5B,CAAC,CAAC;IAEH,MAAM,KAAK,GAA+C,EAAE,CAAC;IAC7D,IAAI,EAAE,CAAC,MAAM,KAAK,WAAW;QAAE,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;IAC7F,IAAI,EAAE,CAAC,MAAM,KAAK,WAAW;QAAE,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;IAE7F,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IACzC,CAAC;IAED,KAAK,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,KAAK,EAAE,CAAC;QACnC,MAAM,SAAS,GAAG,IAAI,KAAK,IAAI,CAAC,CAAC,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QAC5E,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,IAAI,SAAS,CAAC,QAAQ,EAAE,uBAAuB,IAAI,YAAY,IAAI,EAAE,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,cAAc,CAAC,GAAW;IACxC,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,QAAQ;QAAE,OAAO,KAAK,CAAC;IACnC,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IACrD,OAAO,uBAAuB,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC;AAChD,CAAC"}
|