rankforge 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +30 -0
- package/package.json +49 -0
- package/src/audit-output-schema.mjs +88 -0
- package/src/audit.mjs +202 -0
- package/src/cli.mjs +508 -0
- package/src/config-schema.mjs +292 -0
- package/src/crawl.mjs +188 -0
- package/src/finding-task.mjs +9 -0
- package/src/html-extract.mjs +226 -0
- package/src/index.mjs +9 -0
- package/src/integrations.mjs +78 -0
- package/src/io-guards.mjs +196 -0
- package/src/performance.mjs +112 -0
- package/src/regex-guards.mjs +52 -0
- package/src/render-parity.mjs +149 -0
- package/src/render.mjs +45 -0
- package/src/repo-audit.mjs +429 -0
- package/src/repo-detect.mjs +87 -0
- package/src/repo-findings.mjs +9 -0
- package/src/repo-manifests.mjs +169 -0
- package/src/repo-process.mjs +298 -0
- package/src/repo-routes.mjs +46 -0
- package/src/report.mjs +898 -0
- package/src/robots.mjs +60 -0
- package/src/rule-depth.mjs +190 -0
- package/src/rule-engine.mjs +360 -0
- package/src/rules.mjs +350 -0
- package/src/site-rule-engine.mjs +177 -0
- package/src/sitemap.mjs +30 -0
- package/src/snapshot.mjs +119 -0
- package/src/source-map.json +28 -0
- package/src/structured-data.mjs +59 -0
- package/src/url-utils.mjs +25 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import { isHttpUrl, resolveUrl, sameOrigin } from "./url-utils.mjs";
|
|
2
|
+
|
|
3
|
+
const decodeEntities = (value) =>
|
|
4
|
+
String(value || "")
|
|
5
|
+
.replace(/ /g, " ")
|
|
6
|
+
.replace(/"/g, '"')
|
|
7
|
+
.replace(/'/g, "'")
|
|
8
|
+
.replace(/'/g, "'")
|
|
9
|
+
.replace(/&/g, "&")
|
|
10
|
+
.replace(/</g, "<")
|
|
11
|
+
.replace(/>/g, ">");
|
|
12
|
+
|
|
13
|
+
export const cleanText = (value) =>
|
|
14
|
+
decodeEntities(value)
|
|
15
|
+
.replace(/<[^>]+>/g, " ")
|
|
16
|
+
.replace(/\s+/g, " ")
|
|
17
|
+
.trim();
|
|
18
|
+
|
|
19
|
+
const attr = (tag, name) => {
|
|
20
|
+
const pattern = new RegExp(`\\b${name}\\s*=\\s*["']([^"']*)["']`, "i");
|
|
21
|
+
return tag.match(pattern)?.[1] ?? null;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
const allMatches = (html, pattern, mapper) => {
|
|
25
|
+
const results = [];
|
|
26
|
+
let match;
|
|
27
|
+
while ((match = pattern.exec(html))) results.push(mapper(match));
|
|
28
|
+
return results;
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const unique = (values) => [...new Set(values.filter(Boolean))];
|
|
32
|
+
|
|
33
|
+
const firstTagContent = (html, tagName) => {
|
|
34
|
+
const pattern = new RegExp(`<${tagName}\\b[^>]*>([\\s\\S]*?)<\\/${tagName}>`, "i");
|
|
35
|
+
const match = html.match(pattern);
|
|
36
|
+
return match ? cleanText(match[1]) : null;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
const metaByName = (html, name) => {
|
|
40
|
+
const tags = allMatches(html, /<meta\b[^>]*>/gi, (match) => match[0]);
|
|
41
|
+
const tag = tags.find((item) => attr(item, "name")?.toLowerCase() === name.toLowerCase());
|
|
42
|
+
return tag ? cleanText(attr(tag, "content") || "") : null;
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
const metaByProperty = (html, property) => {
|
|
46
|
+
const tags = allMatches(html, /<meta\b[^>]*>/gi, (match) => match[0]);
|
|
47
|
+
const tag = tags.find((item) => attr(item, "property")?.toLowerCase() === property.toLowerCase());
|
|
48
|
+
return tag ? cleanText(attr(tag, "content") || "") : null;
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
const metaContents = (html, selectors) => {
|
|
52
|
+
const tags = allMatches(html, /<meta\b[^>]*>/gi, (match) => match[0]);
|
|
53
|
+
return unique(
|
|
54
|
+
tags.flatMap((tag) => {
|
|
55
|
+
const content = cleanText(attr(tag, "content") || "");
|
|
56
|
+
if (!content) return [];
|
|
57
|
+
return selectors.some(({ key, value }) => attr(tag, key)?.toLowerCase() === value.toLowerCase()) ? [content] : [];
|
|
58
|
+
}),
|
|
59
|
+
);
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
const linkTags = (html) => allMatches(html, /<link\b[^>]*>/gi, (match) => match[0]);
|
|
63
|
+
|
|
64
|
+
const relParts = (tag) =>
|
|
65
|
+
String(attr(tag, "rel") || "")
|
|
66
|
+
.toLowerCase()
|
|
67
|
+
.split(/\s+/)
|
|
68
|
+
.filter(Boolean);
|
|
69
|
+
|
|
70
|
+
const linkByRel = (html, rel, baseUrl) => {
|
|
71
|
+
const tags = linkTags(html);
|
|
72
|
+
const tag = tags.find((item) =>
|
|
73
|
+
relParts(item).includes(rel.toLowerCase()),
|
|
74
|
+
);
|
|
75
|
+
const href = tag ? attr(tag, "href") : null;
|
|
76
|
+
return href && baseUrl ? resolveUrl(href, baseUrl) : href;
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
const favicon = (html, baseUrl) => {
|
|
80
|
+
const tag = linkTags(html).find((item) => {
|
|
81
|
+
const rel = relParts(item);
|
|
82
|
+
return rel.includes("icon") || rel.includes("shortcut") || rel.includes("apple-touch-icon");
|
|
83
|
+
});
|
|
84
|
+
const href = tag ? attr(tag, "href") : null;
|
|
85
|
+
return href && baseUrl ? resolveUrl(href, baseUrl) : href;
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
const hreflangLinks = (html, baseUrl) =>
|
|
89
|
+
linkTags(html)
|
|
90
|
+
.filter((tag) => relParts(tag).includes("alternate") && attr(tag, "hreflang") && attr(tag, "href"))
|
|
91
|
+
.map((tag) => ({
|
|
92
|
+
hreflang: attr(tag, "hreflang"),
|
|
93
|
+
href: baseUrl ? resolveUrl(attr(tag, "href"), baseUrl) : attr(tag, "href"),
|
|
94
|
+
}))
|
|
95
|
+
.filter((item) => item.href);
|
|
96
|
+
|
|
97
|
+
const previewDirectives = (values) =>
|
|
98
|
+
unique(
|
|
99
|
+
values
|
|
100
|
+
.filter(Boolean)
|
|
101
|
+
.flatMap((value) => String(value).split(","))
|
|
102
|
+
.map((item) => item.trim())
|
|
103
|
+
.filter((item) => /^(?:nosnippet|noarchive|noimageindex|max-(?:snippet|image-preview|video-preview):)/i.test(item)),
|
|
104
|
+
);
|
|
105
|
+
|
|
106
|
+
const structuredDataTypes = (value) => {
|
|
107
|
+
if (!value) return [];
|
|
108
|
+
if (Array.isArray(value)) return value.flatMap(structuredDataTypes);
|
|
109
|
+
if (typeof value !== "object") return [];
|
|
110
|
+
|
|
111
|
+
const types = [];
|
|
112
|
+
if (value["@type"]) {
|
|
113
|
+
if (Array.isArray(value["@type"])) types.push(...value["@type"].map(String));
|
|
114
|
+
else types.push(String(value["@type"]));
|
|
115
|
+
}
|
|
116
|
+
if (value["@graph"]) types.push(...structuredDataTypes(value["@graph"]));
|
|
117
|
+
return types;
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
const linkCounts = (links, baseUrl) => {
|
|
121
|
+
const counts = { internal: 0, external: 0 };
|
|
122
|
+
for (const link of links) {
|
|
123
|
+
if (!isHttpUrl(link.href)) continue;
|
|
124
|
+
if (baseUrl && sameOrigin(baseUrl, link.href)) counts.internal += 1;
|
|
125
|
+
else counts.external += 1;
|
|
126
|
+
}
|
|
127
|
+
return counts;
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
export const extractHtmlEvidence = (html, baseUrl = null) => {
|
|
131
|
+
const title = firstTagContent(html, "title");
|
|
132
|
+
const description = metaByName(html, "description");
|
|
133
|
+
const robots = metaByName(html, "robots");
|
|
134
|
+
const googlebot = metaByName(html, "googlebot");
|
|
135
|
+
const canonical = linkByRel(html, "canonical", baseUrl);
|
|
136
|
+
const siteName = metaByProperty(html, "og:site_name") || metaByName(html, "application-name");
|
|
137
|
+
const alternates = hreflangLinks(html, baseUrl);
|
|
138
|
+
const faviconUrl = favicon(html, baseUrl);
|
|
139
|
+
|
|
140
|
+
const h1 = allMatches(html, /<h1\b[^>]*>([\s\S]*?)<\/h1>/gi, (match) => cleanText(match[1]));
|
|
141
|
+
const headings = allMatches(html, /<h([2-6])\b[^>]*>([\s\S]*?)<\/h\1>/gi, (match) => ({
|
|
142
|
+
level: Number(match[1]),
|
|
143
|
+
text: cleanText(match[2]),
|
|
144
|
+
}));
|
|
145
|
+
|
|
146
|
+
const links = allMatches(html, /<a\b[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi, (match) => ({
|
|
147
|
+
href: baseUrl ? resolveUrl(match[1], baseUrl) : match[1],
|
|
148
|
+
text: cleanText(match[2]),
|
|
149
|
+
})).filter((link) => link.href);
|
|
150
|
+
const countsByLinkType = linkCounts(links, baseUrl);
|
|
151
|
+
|
|
152
|
+
const images = allMatches(html, /<img\b[^>]*>/gi, (match) => {
|
|
153
|
+
const tag = match[0];
|
|
154
|
+
const src = attr(tag, "src");
|
|
155
|
+
return {
|
|
156
|
+
src: src && baseUrl ? resolveUrl(src, baseUrl) : src,
|
|
157
|
+
alt: attr(tag, "alt"),
|
|
158
|
+
};
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
const structuredData = allMatches(
|
|
162
|
+
html,
|
|
163
|
+
/<script\b[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi,
|
|
164
|
+
(match) => {
|
|
165
|
+
const raw = match[1].trim();
|
|
166
|
+
try {
|
|
167
|
+
return { data: JSON.parse(raw) };
|
|
168
|
+
} catch {
|
|
169
|
+
return { parseError: true, rawPreview: cleanText(raw).slice(0, 500) };
|
|
170
|
+
}
|
|
171
|
+
},
|
|
172
|
+
);
|
|
173
|
+
const schemaTypes = unique(structuredData.flatMap((item) => structuredDataTypes(item.data)));
|
|
174
|
+
|
|
175
|
+
const authors = metaContents(html, [
|
|
176
|
+
{ key: "name", value: "author" },
|
|
177
|
+
{ key: "property", value: "article:author" },
|
|
178
|
+
]);
|
|
179
|
+
const dates = unique([
|
|
180
|
+
...metaContents(html, [
|
|
181
|
+
{ key: "name", value: "date" },
|
|
182
|
+
{ key: "property", value: "article:published_time" },
|
|
183
|
+
{ key: "property", value: "article:modified_time" },
|
|
184
|
+
{ key: "name", value: "dc.date" },
|
|
185
|
+
]),
|
|
186
|
+
...allMatches(html, /<time\b[^>]*>/gi, (match) => attr(match[0], "datetime")),
|
|
187
|
+
]);
|
|
188
|
+
|
|
189
|
+
const visibleText = cleanText(
|
|
190
|
+
html
|
|
191
|
+
.replace(/<script[\s\S]*?<\/script>/gi, " ")
|
|
192
|
+
.replace(/<style[\s\S]*?<\/style>/gi, " "),
|
|
193
|
+
);
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
title,
|
|
197
|
+
description,
|
|
198
|
+
robots,
|
|
199
|
+
canonical,
|
|
200
|
+
favicon: faviconUrl,
|
|
201
|
+
siteName,
|
|
202
|
+
hreflang: alternates,
|
|
203
|
+
previewDirectives: previewDirectives([robots, googlebot]),
|
|
204
|
+
h1,
|
|
205
|
+
headings,
|
|
206
|
+
links,
|
|
207
|
+
images,
|
|
208
|
+
structuredData,
|
|
209
|
+
schemaTypes,
|
|
210
|
+
entitySignals: {
|
|
211
|
+
authors,
|
|
212
|
+
dates,
|
|
213
|
+
},
|
|
214
|
+
counts: {
|
|
215
|
+
h1: h1.length,
|
|
216
|
+
headings: headings.length,
|
|
217
|
+
links: links.length,
|
|
218
|
+
internalLinks: countsByLinkType.internal,
|
|
219
|
+
externalLinks: countsByLinkType.external,
|
|
220
|
+
images: images.length,
|
|
221
|
+
structuredData: structuredData.length,
|
|
222
|
+
visibleTextCharacters: visibleText.length,
|
|
223
|
+
},
|
|
224
|
+
visibleTextPreview: visibleText.slice(0, 1200),
|
|
225
|
+
};
|
|
226
|
+
};
|
package/src/index.mjs
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { readTextFileLimited, resolveLimits } from "./io-guards.mjs";
|
|
2
|
+
import { readLighthouseReport } from "./performance.mjs";
|
|
3
|
+
|
|
4
|
+
const parseCsvLine = (line) => {
|
|
5
|
+
const cells = [];
|
|
6
|
+
let current = "";
|
|
7
|
+
let quoted = false;
|
|
8
|
+
for (let index = 0; index < line.length; index++) {
|
|
9
|
+
const char = line[index];
|
|
10
|
+
if (char === '"') {
|
|
11
|
+
quoted = !quoted;
|
|
12
|
+
continue;
|
|
13
|
+
}
|
|
14
|
+
if (char === "," && !quoted) {
|
|
15
|
+
cells.push(current);
|
|
16
|
+
current = "";
|
|
17
|
+
continue;
|
|
18
|
+
}
|
|
19
|
+
current += char;
|
|
20
|
+
}
|
|
21
|
+
cells.push(current);
|
|
22
|
+
return cells;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const number = (value) => {
|
|
26
|
+
const parsed = Number(String(value || "").replace("%", ""));
|
|
27
|
+
return Number.isFinite(parsed) ? parsed : null;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
export const readSearchConsoleCsv = (filePath, options = {}) => {
|
|
31
|
+
const limits = resolveLimits(options.limits);
|
|
32
|
+
const [headerLine, ...lines] = readTextFileLimited(filePath, {
|
|
33
|
+
security: options.security,
|
|
34
|
+
allowRestricted: true,
|
|
35
|
+
limits,
|
|
36
|
+
maxBytes: limits.maxIntegrationBytes,
|
|
37
|
+
})
|
|
38
|
+
.trim()
|
|
39
|
+
.split(/\r?\n/);
|
|
40
|
+
const headers = parseCsvLine(headerLine).map((header) => header.trim().toLowerCase());
|
|
41
|
+
const rows = lines.filter(Boolean).map((line) => {
|
|
42
|
+
const cells = parseCsvLine(line);
|
|
43
|
+
const row = Object.fromEntries(headers.map((header, index) => [header, cells[index] ?? ""]));
|
|
44
|
+
return {
|
|
45
|
+
query: row.query || "",
|
|
46
|
+
page: row.page || "",
|
|
47
|
+
clicks: number(row.clicks),
|
|
48
|
+
impressions: number(row.impressions),
|
|
49
|
+
ctr: number(row.ctr),
|
|
50
|
+
position: number(row.position),
|
|
51
|
+
};
|
|
52
|
+
});
|
|
53
|
+
return { type: "search_console_csv", source: filePath, rows };
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
const readJsonRows = (filePath, type, options = {}) => {
|
|
57
|
+
const limits = resolveLimits(options.limits);
|
|
58
|
+
const parsed = JSON.parse(
|
|
59
|
+
readTextFileLimited(filePath, {
|
|
60
|
+
security: options.security,
|
|
61
|
+
allowRestricted: true,
|
|
62
|
+
limits,
|
|
63
|
+
maxBytes: limits.maxIntegrationBytes,
|
|
64
|
+
}),
|
|
65
|
+
);
|
|
66
|
+
return { type, source: filePath, rows: Array.isArray(parsed) ? parsed : parsed.rows || [] };
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
export const readSerpExport = (filePath, options = {}) => readJsonRows(filePath, "serp_export", options);
|
|
70
|
+
|
|
71
|
+
export const readAiAnswers = (filePath, options = {}) => readJsonRows(filePath, "ai_answer_export", options);
|
|
72
|
+
|
|
73
|
+
export const readIntegrations = (integrations = {}, options = {}) => ({
|
|
74
|
+
searchConsole: integrations.searchConsole ? readSearchConsoleCsv(integrations.searchConsole, options) : null,
|
|
75
|
+
serp: integrations.serp ? readSerpExport(integrations.serp, options) : null,
|
|
76
|
+
aiAnswers: integrations.aiAnswers ? readAiAnswers(integrations.aiAnswers, options) : null,
|
|
77
|
+
lighthouse: integrations.lighthouse ? readLighthouseReport(integrations.lighthouse, options) : null,
|
|
78
|
+
});
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import dns from "node:dns/promises";
|
|
2
|
+
import fs from "node:fs";
|
|
3
|
+
import net from "node:net";
|
|
4
|
+
|
|
5
|
+
export const defaultLimits = {
|
|
6
|
+
timeoutMs: 15000,
|
|
7
|
+
maxHtmlBytes: 2_000_000,
|
|
8
|
+
maxTextBytes: 1_000_000,
|
|
9
|
+
maxFileBytes: 5_000_000,
|
|
10
|
+
maxIntegrationBytes: 5_000_000,
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export const resolveLimits = (limits = {}) => ({
|
|
14
|
+
...defaultLimits,
|
|
15
|
+
...(limits || {}),
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
const securityMode = (security = {}) => security?.mode || "local";
|
|
19
|
+
|
|
20
|
+
const isBlockedHostname = (hostname) => {
|
|
21
|
+
const normalized = String(hostname || "").toLowerCase().replace(/^\[|\]$/g, "");
|
|
22
|
+
return (
|
|
23
|
+
normalized === "localhost" ||
|
|
24
|
+
normalized.endsWith(".localhost") ||
|
|
25
|
+
normalized === "ip6-localhost" ||
|
|
26
|
+
normalized === "metadata.google.internal"
|
|
27
|
+
);
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
const isPrivateIpv4 = (address) => {
|
|
31
|
+
const parts = address.split(".").map(Number);
|
|
32
|
+
if (parts.length !== 4 || parts.some((part) => !Number.isInteger(part))) return false;
|
|
33
|
+
const [a, b] = parts;
|
|
34
|
+
return (
|
|
35
|
+
a === 0 ||
|
|
36
|
+
a === 10 ||
|
|
37
|
+
a === 127 ||
|
|
38
|
+
a === 169 && b === 254 ||
|
|
39
|
+
a === 172 && b >= 16 && b <= 31 ||
|
|
40
|
+
a === 192 && b === 168 ||
|
|
41
|
+
a === 100 && b >= 64 && b <= 127 ||
|
|
42
|
+
a === 198 && (b === 18 || b === 19)
|
|
43
|
+
);
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
const isPrivateIpv6 = (address) => {
|
|
47
|
+
const normalized = address.toLowerCase();
|
|
48
|
+
const firstHextet = Number.parseInt(normalized.split(":")[0], 16);
|
|
49
|
+
return (
|
|
50
|
+
normalized === "::1" ||
|
|
51
|
+
normalized.startsWith("::ffff:") ||
|
|
52
|
+
(Number.isFinite(firstHextet) && firstHextet >= 0xfe80 && firstHextet <= 0xfebf) ||
|
|
53
|
+
normalized.startsWith("fc") ||
|
|
54
|
+
normalized.startsWith("fd")
|
|
55
|
+
);
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
const isPrivateAddress = (address) => {
|
|
59
|
+
const version = net.isIP(address);
|
|
60
|
+
if (version === 4) return isPrivateIpv4(address);
|
|
61
|
+
if (version === 6) return isPrivateIpv6(address);
|
|
62
|
+
return false;
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
const resolveAddresses = async (hostname) => {
|
|
66
|
+
const literalVersion = net.isIP(hostname);
|
|
67
|
+
if (literalVersion) return [hostname];
|
|
68
|
+
try {
|
|
69
|
+
const records = await dns.lookup(hostname, { all: true, verbatim: true });
|
|
70
|
+
return records.map((record) => record.address);
|
|
71
|
+
} catch {
|
|
72
|
+
return [];
|
|
73
|
+
}
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
export const assertUrlAllowed = async (target, security = {}) => {
|
|
77
|
+
if (securityMode(security) !== "restricted") return;
|
|
78
|
+
|
|
79
|
+
const parsed = new URL(target);
|
|
80
|
+
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
81
|
+
throw new Error(`Restricted security mode only allows HTTP(S) URLs: ${target}`);
|
|
82
|
+
}
|
|
83
|
+
const hostname = parsed.hostname.replace(/^\[|\]$/g, "");
|
|
84
|
+
if (isBlockedHostname(hostname)) {
|
|
85
|
+
throw new Error(`Restricted security mode blocks private network target: ${target}`);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const addresses = await resolveAddresses(hostname);
|
|
89
|
+
if (addresses.some(isPrivateAddress)) {
|
|
90
|
+
throw new Error(`Restricted security mode blocks private network target: ${target}`);
|
|
91
|
+
}
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
export const assertFileAllowed = (target, security = {}, options = {}) => {
|
|
95
|
+
if (securityMode(security) === "restricted" && !options.allowRestricted) {
|
|
96
|
+
throw new Error(`Local file targets are disabled by restricted security mode: ${target}`);
|
|
97
|
+
}
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
export const readTextFileLimited = (filePath, options = {}) => {
|
|
101
|
+
const limits = resolveLimits(options.limits);
|
|
102
|
+
const maxBytes = options.maxBytes ?? limits.maxFileBytes;
|
|
103
|
+
if (options.security) assertFileAllowed(filePath, options.security, { allowRestricted: options.allowRestricted });
|
|
104
|
+
|
|
105
|
+
const stat = fs.statSync(filePath);
|
|
106
|
+
if (!stat.isFile()) {
|
|
107
|
+
throw new Error(`Only regular files can be read: ${filePath}`);
|
|
108
|
+
}
|
|
109
|
+
if (stat.size > maxBytes) {
|
|
110
|
+
throw new Error(`File ${filePath} exceeds ${maxBytes} bytes.`);
|
|
111
|
+
}
|
|
112
|
+
return fs.readFileSync(filePath, "utf8");
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
export const fetchWithGuards = async (target, options = {}) => {
|
|
116
|
+
const limits = resolveLimits(options.limits);
|
|
117
|
+
const redirect = options.fetchOptions?.redirect || "manual";
|
|
118
|
+
if (securityMode(options.security) === "restricted" && redirect !== "manual") {
|
|
119
|
+
throw new Error("Restricted security mode requires manual redirects.");
|
|
120
|
+
}
|
|
121
|
+
await assertUrlAllowed(target, options.security);
|
|
122
|
+
|
|
123
|
+
const controller = new AbortController();
|
|
124
|
+
const timeout = setTimeout(() => controller.abort(), limits.timeoutMs);
|
|
125
|
+
try {
|
|
126
|
+
return await fetch(target, {
|
|
127
|
+
redirect: "manual",
|
|
128
|
+
...options.fetchOptions,
|
|
129
|
+
signal: controller.signal,
|
|
130
|
+
});
|
|
131
|
+
} catch (error) {
|
|
132
|
+
if (error.name === "AbortError") {
|
|
133
|
+
throw new Error(`Timed out fetching ${target} after ${limits.timeoutMs} ms.`);
|
|
134
|
+
}
|
|
135
|
+
throw error;
|
|
136
|
+
} finally {
|
|
137
|
+
clearTimeout(timeout);
|
|
138
|
+
}
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
export const readResponseTextLimited = async (response, options = {}) => {
|
|
142
|
+
const limits = resolveLimits(options.limits);
|
|
143
|
+
const maxBytes = options.maxBytes ?? limits.maxTextBytes;
|
|
144
|
+
const label = options.label || response.url || "response";
|
|
145
|
+
const timeoutError = () => new Error(`Timed out reading response body for ${label} after ${limits.timeoutMs} ms.`);
|
|
146
|
+
const contentLength = Number(response.headers.get("content-length"));
|
|
147
|
+
if (Number.isFinite(contentLength) && contentLength > maxBytes) {
|
|
148
|
+
throw new Error(`Response body for ${label} exceeds ${maxBytes} bytes.`);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (!response.body) {
|
|
152
|
+
let timeout;
|
|
153
|
+
const text = await Promise.race([
|
|
154
|
+
response.text(),
|
|
155
|
+
new Promise((resolve, reject) => {
|
|
156
|
+
timeout = setTimeout(() => reject(timeoutError()), limits.timeoutMs);
|
|
157
|
+
}),
|
|
158
|
+
]).finally(() => clearTimeout(timeout));
|
|
159
|
+
if (Buffer.byteLength(text, "utf8") > maxBytes) {
|
|
160
|
+
throw new Error(`Response body for ${label} exceeds ${maxBytes} bytes.`);
|
|
161
|
+
}
|
|
162
|
+
return text;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const reader = response.body.getReader();
|
|
166
|
+
const decoder = new TextDecoder();
|
|
167
|
+
let received = 0;
|
|
168
|
+
let text = "";
|
|
169
|
+
let timeout;
|
|
170
|
+
let timedOut = false;
|
|
171
|
+
const timeoutPromise = new Promise((resolve, reject) => {
|
|
172
|
+
timeout = setTimeout(() => {
|
|
173
|
+
timedOut = true;
|
|
174
|
+
reader.cancel().catch(() => {});
|
|
175
|
+
reject(timeoutError());
|
|
176
|
+
}, limits.timeoutMs);
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
try {
|
|
180
|
+
while (true) {
|
|
181
|
+
const { done, value } = await Promise.race([reader.read(), timeoutPromise]);
|
|
182
|
+
if (timedOut) throw timeoutError();
|
|
183
|
+
if (done) break;
|
|
184
|
+
received += value.byteLength;
|
|
185
|
+
if (received > maxBytes) {
|
|
186
|
+
await reader.cancel();
|
|
187
|
+
throw new Error(`Response body for ${label} exceeds ${maxBytes} bytes.`);
|
|
188
|
+
}
|
|
189
|
+
text += decoder.decode(value, { stream: true });
|
|
190
|
+
}
|
|
191
|
+
} finally {
|
|
192
|
+
clearTimeout(timeout);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return text + decoder.decode();
|
|
196
|
+
};
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { implementationTaskFor } from "./finding-task.mjs";
|
|
2
|
+
import { readTextFileLimited, resolveLimits } from "./io-guards.mjs";
|
|
3
|
+
import { getRule } from "./rules.mjs";
|
|
4
|
+
|
|
5
|
+
const numericAuditValue = (audits, id) => {
|
|
6
|
+
const value = audits?.[id]?.numericValue;
|
|
7
|
+
return Number.isFinite(value) ? value : null;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
const scoreToPercent = (score) => {
|
|
11
|
+
if (!Number.isFinite(score)) return null;
|
|
12
|
+
return Math.round(score * 100);
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
const lighthouseUrl = (evidence) => evidence.finalUrl || evidence.requestedUrl || evidence.source || "Lighthouse report";
|
|
16
|
+
|
|
17
|
+
const effortFor = (severity) => (severity === "P0" || severity === "P1" ? "M" : "S");
|
|
18
|
+
|
|
19
|
+
const createPerformanceFinding = (ruleId, lighthouse, evidence, impact) => {
|
|
20
|
+
const rule = getRule(ruleId);
|
|
21
|
+
if (!rule) throw new Error(`Unknown rule: ${ruleId}`);
|
|
22
|
+
const owner = "Engineering";
|
|
23
|
+
const effort = effortFor(rule.defaultSeverity);
|
|
24
|
+
|
|
25
|
+
return {
|
|
26
|
+
ruleId: rule.id,
|
|
27
|
+
title: rule.title,
|
|
28
|
+
severity: rule.defaultSeverity,
|
|
29
|
+
dimension: rule.dimension,
|
|
30
|
+
affectedUrls: [lighthouseUrl(lighthouse)],
|
|
31
|
+
evidence,
|
|
32
|
+
impact,
|
|
33
|
+
recommendation: rule.recommendation,
|
|
34
|
+
implementationTask: implementationTaskFor(rule, owner, effort),
|
|
35
|
+
owner,
|
|
36
|
+
effort,
|
|
37
|
+
confidence: "high",
|
|
38
|
+
sources: rule.sources,
|
|
39
|
+
};
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
export const readLighthouseReport = (filePath, options = {}) => {
|
|
43
|
+
const limits = resolveLimits(options.limits);
|
|
44
|
+
const parsed = JSON.parse(
|
|
45
|
+
readTextFileLimited(filePath, {
|
|
46
|
+
security: options.security,
|
|
47
|
+
allowRestricted: true,
|
|
48
|
+
limits,
|
|
49
|
+
maxBytes: limits.maxIntegrationBytes,
|
|
50
|
+
}),
|
|
51
|
+
);
|
|
52
|
+
const performanceScore = scoreToPercent(parsed.categories?.performance?.score);
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
type: "lighthouse",
|
|
56
|
+
source: filePath,
|
|
57
|
+
lighthouseVersion: parsed.lighthouseVersion || null,
|
|
58
|
+
requestedUrl: parsed.requestedUrl || null,
|
|
59
|
+
finalUrl: parsed.finalDisplayedUrl || parsed.finalUrl || null,
|
|
60
|
+
formFactor: parsed.configSettings?.formFactor || null,
|
|
61
|
+
performanceScore,
|
|
62
|
+
metrics: {
|
|
63
|
+
lcpMs: numericAuditValue(parsed.audits, "largest-contentful-paint"),
|
|
64
|
+
cls: numericAuditValue(parsed.audits, "cumulative-layout-shift"),
|
|
65
|
+
tbtMs: numericAuditValue(parsed.audits, "total-blocking-time"),
|
|
66
|
+
},
|
|
67
|
+
};
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
export const evaluatePerformance = (lighthouse) => {
|
|
71
|
+
if (!lighthouse) return [];
|
|
72
|
+
|
|
73
|
+
const findings = [];
|
|
74
|
+
const score = lighthouse.performanceScore;
|
|
75
|
+
const lcpMs = lighthouse.metrics?.lcpMs;
|
|
76
|
+
const cls = lighthouse.metrics?.cls;
|
|
77
|
+
|
|
78
|
+
if (Number.isFinite(score) && score < 50) {
|
|
79
|
+
findings.push(
|
|
80
|
+
createPerformanceFinding(
|
|
81
|
+
"performance.lighthouse_poor",
|
|
82
|
+
lighthouse,
|
|
83
|
+
["$.integrations.lighthouse.performanceScore"],
|
|
84
|
+
`Imported Lighthouse performance score is ${score}/100.`,
|
|
85
|
+
),
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (Number.isFinite(lcpMs) && lcpMs > 2500) {
|
|
90
|
+
findings.push(
|
|
91
|
+
createPerformanceFinding(
|
|
92
|
+
"performance.lcp_poor",
|
|
93
|
+
lighthouse,
|
|
94
|
+
["$.integrations.lighthouse.metrics.lcpMs"],
|
|
95
|
+
`Largest Contentful Paint is ${Math.round(lcpMs)} ms in the imported Lighthouse report.`,
|
|
96
|
+
),
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if (Number.isFinite(cls) && cls > 0.1) {
|
|
101
|
+
findings.push(
|
|
102
|
+
createPerformanceFinding(
|
|
103
|
+
"performance.cls_poor",
|
|
104
|
+
lighthouse,
|
|
105
|
+
["$.integrations.lighthouse.metrics.cls"],
|
|
106
|
+
`Cumulative Layout Shift is ${cls} in the imported Lighthouse report.`,
|
|
107
|
+
),
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return findings;
|
|
112
|
+
};
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
export const maxRegexPatternLength = 200;
|
|
2
|
+
|
|
3
|
+
const nestedQuantifierPattern =
|
|
4
|
+
/\((?:\?:)?(?:[^()\\]|\\.)*(?:[+*?]|\{\d+,?\d*\})(?:[^()\\]|\\.)*\)(?:[+*?]|\{\d+,?\d*\})/;
|
|
5
|
+
|
|
6
|
+
const quantifiedAlternationPattern = /\((?:\?:)?([^()]*\|[^()]*)\)(?:[+*?]|\{\d+,?\d*\})/g;
|
|
7
|
+
|
|
8
|
+
const hasOverlappingAlternation = (pattern) => {
|
|
9
|
+
let match;
|
|
10
|
+
quantifiedAlternationPattern.lastIndex = 0;
|
|
11
|
+
try {
|
|
12
|
+
while ((match = quantifiedAlternationPattern.exec(pattern))) {
|
|
13
|
+
const alternatives = match[1]
|
|
14
|
+
.split("|")
|
|
15
|
+
.map((item) => item.replace(/\\(.)/g, "$1"))
|
|
16
|
+
.filter(Boolean);
|
|
17
|
+
for (let index = 0; index < alternatives.length; index++) {
|
|
18
|
+
for (let compare = index + 1; compare < alternatives.length; compare++) {
|
|
19
|
+
const left = alternatives[index];
|
|
20
|
+
const right = alternatives[compare];
|
|
21
|
+
if (left.startsWith(right) || right.startsWith(left)) return true;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
} finally {
|
|
26
|
+
quantifiedAlternationPattern.lastIndex = 0;
|
|
27
|
+
}
|
|
28
|
+
return false;
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export const unsafeRegexReason = (pattern) => {
|
|
32
|
+
if (pattern.length > maxRegexPatternLength) {
|
|
33
|
+
return `pattern is longer than ${maxRegexPatternLength} characters`;
|
|
34
|
+
}
|
|
35
|
+
if (hasOverlappingAlternation(pattern)) {
|
|
36
|
+
return "pattern contains overlapping alternation";
|
|
37
|
+
}
|
|
38
|
+
if (nestedQuantifierPattern.test(pattern)) {
|
|
39
|
+
return "pattern contains nested quantifiers";
|
|
40
|
+
}
|
|
41
|
+
return null;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
export const assertSafeRegexPattern = (pattern, key = "pattern") => {
|
|
45
|
+
const reason = unsafeRegexReason(pattern);
|
|
46
|
+
if (reason) throw new Error(`${key} contains an unsafe regular expression: ${reason}`);
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
export const compileSafeRegex = (pattern, key = "pattern") => {
|
|
50
|
+
assertSafeRegexPattern(pattern, key);
|
|
51
|
+
return new RegExp(pattern);
|
|
52
|
+
};
|