@pseolint/core 0.4.0 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auditor.d.ts +12 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +317 -43
- package/dist/auditor.js.map +1 -1
- package/dist/formatters/bucket-findings.d.ts +43 -0
- package/dist/formatters/bucket-findings.d.ts.map +1 -0
- package/dist/formatters/bucket-findings.js +110 -0
- package/dist/formatters/bucket-findings.js.map +1 -0
- package/dist/formatters/console.d.ts.map +1 -1
- package/dist/formatters/console.js +103 -34
- package/dist/formatters/console.js.map +1 -1
- package/dist/formatters/fixplan.d.ts +13 -0
- package/dist/formatters/fixplan.d.ts.map +1 -0
- package/dist/formatters/fixplan.js +328 -0
- package/dist/formatters/fixplan.js.map +1 -0
- package/dist/formatters/html.d.ts.map +1 -1
- package/dist/formatters/html.js +27 -0
- package/dist/formatters/html.js.map +1 -1
- package/dist/formatters/index.d.ts +2 -0
- package/dist/formatters/index.d.ts.map +1 -1
- package/dist/formatters/index.js +1 -0
- package/dist/formatters/index.js.map +1 -1
- package/dist/formatters/markdown.d.ts.map +1 -1
- package/dist/formatters/markdown.js +77 -7
- package/dist/formatters/markdown.js.map +1 -1
- package/dist/page-filter.d.ts +108 -0
- package/dist/page-filter.d.ts.map +1 -0
- package/dist/page-filter.js +207 -0
- package/dist/page-filter.js.map +1 -0
- package/dist/rules/aeo/answer-first.d.ts.map +1 -1
- package/dist/rules/aeo/answer-first.js +17 -3
- package/dist/rules/aeo/answer-first.js.map +1 -1
- package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
- package/dist/rules/aeo/citable-facts.js +12 -1
- package/dist/rules/aeo/citable-facts.js.map +1 -1
- package/dist/rules/aeo/content-modularity.d.ts.map +1 -1
- package/dist/rules/aeo/content-modularity.js +3 -0
- package/dist/rules/aeo/content-modularity.js.map +1 -1
- package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
- package/dist/rules/aeo/crawler-access.js +6 -0
- package/dist/rules/aeo/crawler-access.js.map +1 -1
- package/dist/rules/aeo/faq-coverage.d.ts.map +1 -1
- package/dist/rules/aeo/faq-coverage.js +4 -0
- package/dist/rules/aeo/faq-coverage.js.map +1 -1
- package/dist/rules/aeo/freshness-signals.d.ts.map +1 -1
- package/dist/rules/aeo/freshness-signals.js +9 -2
- package/dist/rules/aeo/freshness-signals.js.map +1 -1
- package/dist/rules/aeo/llms-txt.d.ts.map +1 -1
- package/dist/rules/aeo/llms-txt.js +6 -1
- package/dist/rules/aeo/llms-txt.js.map +1 -1
- package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
- package/dist/rules/aeo/summary-bait.js +5 -2
- package/dist/rules/aeo/summary-bait.js.map +1 -1
- package/dist/rules/content/missing-author.d.ts.map +1 -1
- package/dist/rules/content/missing-author.js +10 -2
- package/dist/rules/content/missing-author.js.map +1 -1
- package/dist/rules/spam/thin-content.d.ts.map +1 -1
- package/dist/rules/spam/thin-content.js +9 -1
- package/dist/rules/spam/thin-content.js.map +1 -1
- package/dist/site-classifier.d.ts +1 -1
- package/dist/site-classifier.d.ts.map +1 -1
- package/dist/site-classifier.js +216 -0
- package/dist/site-classifier.js.map +1 -1
- package/dist/types.d.ts +77 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
package/dist/auditor.d.ts
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
1
|
-
import type { AuditOptions, AuditSummary } from "./types.js";
|
|
1
|
+
import type { AuditOptions, AuditSummary, RuleResult } from "./types.js";
|
|
2
|
+
import { type SiteClassification } from "./site-classifier.js";
|
|
3
|
+
/**
|
|
4
|
+
* v0.4.3 — apply per-site-type severity + confidence overrides BEFORE any
|
|
5
|
+
* bucketing happens, so blocker/shouldFix counts and category buckets all
|
|
6
|
+
* reflect the user-visible severity, not the rule's native severity.
|
|
7
|
+
*
|
|
8
|
+
* Returns a NEW array of findings (does not mutate the input). Only the
|
|
9
|
+
* `severity` and `confidence` fields are remapped; everything else is
|
|
10
|
+
* preserved by reference.
|
|
11
|
+
*/
|
|
12
|
+
export declare function applyScoringProfileOverrides(findings: RuleResult[], classification: SiteClassification | undefined): RuleResult[];
|
|
2
13
|
export declare function auditSource(source: string, options?: AuditOptions): Promise<AuditSummary>;
|
|
3
14
|
//# sourceMappingURL=auditor.d.ts.map
|
package/dist/auditor.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AAyDA,OAAO,KAAK,EACV,YAAY,EACZ,YAAY,EAWZ,UAAU,EAGX,MAAM,YAAY,CAAC;AAQpB,OAAO,EAAgB,KAAK,kBAAkB,EAAiB,MAAM,sBAAsB,CAAC;AAwhB5F;;;;;;;;GAQG;AACH,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,UAAU,EAAE,EACtB,cAAc,EAAE,kBAAkB,GAAG,SAAS,GAC7C,UAAU,EAAE,CAed;AAquBD,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CA0wB/F"}
|
package/dist/auditor.js
CHANGED
|
@@ -2,6 +2,7 @@ import { createHash } from "node:crypto";
|
|
|
2
2
|
import { readdir, readFile, stat } from "node:fs/promises";
|
|
3
3
|
import { extname, join, resolve } from "node:path";
|
|
4
4
|
import { parseHtmlPage } from "./parser.js";
|
|
5
|
+
import { pageSkipReason } from "./page-filter.js";
|
|
5
6
|
import { mergeNormalizeUrlOptions, normalizeAuditUrl } from "./url-normalize.js";
|
|
6
7
|
import { eeatSignalsRule } from "./rules/content/eeat-signals.js";
|
|
7
8
|
import { metaUniquenessRule } from "./rules/content/meta-uniqueness.js";
|
|
@@ -76,17 +77,6 @@ const DEFAULTS = {
|
|
|
76
77
|
modularityMinSelfContainedRatio: 0.7,
|
|
77
78
|
faqMinQuestionHeadings: 2
|
|
78
79
|
};
|
|
79
|
-
/**
|
|
80
|
-
* v0.4 four-category weights. Audit is diagnostic-only (weight 0).
|
|
81
|
-
* See 2026-04-29 v0.4 redesign spec §4.2.
|
|
82
|
-
*/
|
|
83
|
-
const CATEGORY_WEIGHTS = {
|
|
84
|
-
integrity: 0.50, // spam + content + cannibal
|
|
85
|
-
discoverability: 0.20, // links + tech
|
|
86
|
-
citation: 0.25, // aeo + schema
|
|
87
|
-
data: 0.05, // data
|
|
88
|
-
audit: 0, // diagnostics, never weighted
|
|
89
|
-
};
|
|
90
80
|
/**
|
|
91
81
|
* Maps the v0.3 ruleId namespace prefix to the v0.4 four-bucket category.
|
|
92
82
|
* Used by `scoreFromFindings` to bucket findings without changing rule IDs.
|
|
@@ -102,6 +92,138 @@ const CATEGORY_MAP = {
|
|
|
102
92
|
data: "data",
|
|
103
93
|
audit: "audit",
|
|
104
94
|
};
|
|
95
|
+
const SCORING_PROFILES = {
|
|
96
|
+
"small-marketing": {
|
|
97
|
+
categoryWeights: { integrity: 0.30, discoverability: 0.40, citation: 0.20, data: 0.05, audit: 0 },
|
|
98
|
+
severityOverrides: {
|
|
99
|
+
"aeo/citable-facts": "info",
|
|
100
|
+
"aeo/answer-first": "info",
|
|
101
|
+
"aeo/summary-bait": "warning",
|
|
102
|
+
"spam/thin-content": "warning",
|
|
103
|
+
},
|
|
104
|
+
confidenceOverrides: {
|
|
105
|
+
"aeo/citable-facts": "low",
|
|
106
|
+
"aeo/answer-first": "low",
|
|
107
|
+
"aeo/summary-bait": "medium",
|
|
108
|
+
"spam/thin-content": "medium",
|
|
109
|
+
},
|
|
110
|
+
},
|
|
111
|
+
"blog": {
|
|
112
|
+
categoryWeights: { integrity: 0.40, discoverability: 0.25, citation: 0.30, data: 0.05, audit: 0 },
|
|
113
|
+
severityOverrides: {
|
|
114
|
+
"content/missing-author": "error",
|
|
115
|
+
"spam/thin-content": "error",
|
|
116
|
+
},
|
|
117
|
+
confidenceOverrides: {},
|
|
118
|
+
},
|
|
119
|
+
"programmatic-directory": {
|
|
120
|
+
categoryWeights: { integrity: 0.55, discoverability: 0.15, citation: 0.20, data: 0.10, audit: 0 },
|
|
121
|
+
severityOverrides: {},
|
|
122
|
+
confidenceOverrides: {},
|
|
123
|
+
},
|
|
124
|
+
"ecommerce": {
|
|
125
|
+
categoryWeights: { integrity: 0.20, discoverability: 0.40, citation: 0.15, data: 0.25, audit: 0 },
|
|
126
|
+
severityOverrides: {
|
|
127
|
+
"aeo/citable-facts": "info",
|
|
128
|
+
"schema/required-fields": "error",
|
|
129
|
+
},
|
|
130
|
+
confidenceOverrides: {
|
|
131
|
+
"aeo/citable-facts": "low",
|
|
132
|
+
},
|
|
133
|
+
},
|
|
134
|
+
"docs": {
|
|
135
|
+
categoryWeights: { integrity: 0.30, discoverability: 0.30, citation: 0.30, data: 0.10, audit: 0 },
|
|
136
|
+
severityOverrides: {
|
|
137
|
+
"aeo/citable-facts": "info",
|
|
138
|
+
"aeo/answer-first": "warning",
|
|
139
|
+
"content/missing-author": "info",
|
|
140
|
+
},
|
|
141
|
+
confidenceOverrides: {
|
|
142
|
+
"aeo/citable-facts": "low",
|
|
143
|
+
"aeo/answer-first": "low",
|
|
144
|
+
"content/missing-author": "low",
|
|
145
|
+
},
|
|
146
|
+
},
|
|
147
|
+
"unclear": {
|
|
148
|
+
categoryWeights: { integrity: 0.50, discoverability: 0.20, citation: 0.25, data: 0.05, audit: 0 },
|
|
149
|
+
severityOverrides: {},
|
|
150
|
+
confidenceOverrides: {},
|
|
151
|
+
},
|
|
152
|
+
};
|
|
153
|
+
/**
|
|
154
|
+
* Pick the scoring profile for a classification. Falls back to `unclear`
|
|
155
|
+
* (the conservative default) when classifier confidence is below 70%.
|
|
156
|
+
*/
|
|
157
|
+
function profileFor(classification) {
|
|
158
|
+
if (!classification || classification.confidence < 0.7)
|
|
159
|
+
return SCORING_PROFILES.unclear;
|
|
160
|
+
return SCORING_PROFILES[classification.type] ?? SCORING_PROFILES.unclear;
|
|
161
|
+
}
|
|
162
|
+
const RULE_IMPACTS = {
|
|
163
|
+
// SpamBrain — high baseline, count amplifies (cluster matters)
|
|
164
|
+
"spam/near-duplicate": { baseImpact: 25, perInstance: 5, maxImpact: 80 },
|
|
165
|
+
"spam/entity-swap": { baseImpact: 25, perInstance: 5, maxImpact: 80 },
|
|
166
|
+
"spam/doorway-pattern": { baseImpact: 30, perInstance: 0, maxImpact: 30 },
|
|
167
|
+
"spam/template-coverage": { baseImpact: 15, perInstance: 3, maxImpact: 60 },
|
|
168
|
+
"spam/template-diversity": { baseImpact: 12, perInstance: 3, maxImpact: 50 },
|
|
169
|
+
"spam/boilerplate-ratio": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
|
|
170
|
+
"spam/thin-content": { baseImpact: 8, perInstance: 2, maxImpact: 40 },
|
|
171
|
+
"spam/publication-velocity": { baseImpact: 8, perInstance: 2, maxImpact: 30 },
|
|
172
|
+
"cannibal/url-pattern": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
|
|
173
|
+
// Content
|
|
174
|
+
"content/unique-value": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
|
|
175
|
+
"content/meta-uniqueness": { baseImpact: 8, perInstance: 2, maxImpact: 40 },
|
|
176
|
+
"content/missing-author": { baseImpact: 4, perInstance: 1, maxImpact: 20 },
|
|
177
|
+
"content/eeat-signals": { baseImpact: 4, perInstance: 1, maxImpact: 20 },
|
|
178
|
+
// Tech — softened in v0.4.3-rc2 after dogfood showed nextjs.org regressing
|
|
179
|
+
// from ready→caution on tech/canonical-consistency × 4 (legit cross-domain
|
|
180
|
+
// canonicals on a CDN). Per-instance now 1 (was 3).
|
|
181
|
+
"tech/canonical-consistency": { baseImpact: 8, perInstance: 1, maxImpact: 25 },
|
|
182
|
+
"tech/canonical-noindex-conflict": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
|
|
183
|
+
"tech/robots-noindex-conflict": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
|
|
184
|
+
"tech/redirect-chain": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
|
|
185
|
+
"tech/sitemap-completeness": { baseImpact: 8, perInstance: 1, maxImpact: 30 },
|
|
186
|
+
"tech/robots-sitemap-presence": { baseImpact: 8, perInstance: 0, maxImpact: 8 },
|
|
187
|
+
"tech/soft-404": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
|
|
188
|
+
// hreflang — one bad declaration breaks all language pairs, so the COUNT
|
|
189
|
+
// doesn't compound. perInstance: 0 keeps it at the base impact regardless
|
|
190
|
+
// of how many language pairs are affected. Dogfood showed 350 findings on
|
|
191
|
+
// stripe.com from a single missing reciprocal pair — that should not be
|
|
192
|
+
// treated as 350× the impact.
|
|
193
|
+
"tech/hreflang-consistency": { baseImpact: 5, perInstance: 0, maxImpact: 5 },
|
|
194
|
+
// Links
|
|
195
|
+
"links/orphan-pages": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
|
|
196
|
+
"links/dead-ends": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
|
|
197
|
+
"links/cluster-connectivity": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
|
|
198
|
+
"links/link-depth": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
|
|
199
|
+
// AEO — much lower baselines than spam (AEO is opt-in optimization)
|
|
200
|
+
"aeo/citable-facts": { baseImpact: 2, perInstance: 1, maxImpact: 25 },
|
|
201
|
+
"aeo/answer-first": { baseImpact: 3, perInstance: 1, maxImpact: 25 },
|
|
202
|
+
"aeo/summary-bait": { baseImpact: 4, perInstance: 1, maxImpact: 25 },
|
|
203
|
+
"aeo/crawler-access": { baseImpact: 8, perInstance: 0, maxImpact: 8 },
|
|
204
|
+
"aeo/freshness-signals": { baseImpact: 2, perInstance: 1, maxImpact: 20 },
|
|
205
|
+
"aeo/llms-txt": { baseImpact: 4, perInstance: 0, maxImpact: 4 },
|
|
206
|
+
"aeo/faq-coverage": { baseImpact: 2, perInstance: 1, maxImpact: 15 },
|
|
207
|
+
"aeo/content-modularity": { baseImpact: 2, perInstance: 1, maxImpact: 15 },
|
|
208
|
+
// Schema
|
|
209
|
+
"schema/json-ld-valid": { baseImpact: 8, perInstance: 2, maxImpact: 35 },
|
|
210
|
+
"schema/required-fields": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
|
|
211
|
+
"schema/consistency": { baseImpact: 3, perInstance: 1, maxImpact: 15 },
|
|
212
|
+
// Data
|
|
213
|
+
"data/data-binding": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
|
|
214
|
+
};
|
|
215
|
+
const DEFAULT_RULE_IMPACT = { baseImpact: 5, perInstance: 1, maxImpact: 25 };
|
|
216
|
+
/**
|
|
217
|
+
* v0.4.3 — confidence-based discount applied to each finding's impact.
|
|
218
|
+
* Low-confidence findings contribute less to the bucket so they don't
|
|
219
|
+
* inflate the verdict on site types where they false-positive.
|
|
220
|
+
*/
|
|
221
|
+
const CONFIDENCE_MULTIPLIER = {
|
|
222
|
+
high: 1.0,
|
|
223
|
+
medium: 0.6,
|
|
224
|
+
low: 0.3,
|
|
225
|
+
speculative: 0.1,
|
|
226
|
+
};
|
|
105
227
|
/** Slug map for `RuleResult.docsUrl`. Defaults to the rule-id segment after the `/`. */
|
|
106
228
|
const RULE_DOCS_SLUG = {
|
|
107
229
|
// intentionally empty for v0.4 — slug = ruleId.split("/").pop() works for every shipped rule
|
|
@@ -172,7 +294,16 @@ function resolveGroupRules(baseRules, overrides) {
|
|
|
172
294
|
}
|
|
173
295
|
return result;
|
|
174
296
|
}
|
|
175
|
-
function runRulesOnPages(pages,
|
|
297
|
+
function runRulesOnPages(pages,
|
|
298
|
+
/**
|
|
299
|
+
* Full set of parsed pages including those filtered out by `respectNoindex`
|
|
300
|
+
* / `skipDetectedAuth`. Defaults to `pages` for backwards compat. The two
|
|
301
|
+
* noindex-conflict rules (`tech/canonical-noindex-conflict`,
|
|
302
|
+
* `tech/robots-noindex-conflict`) read this list specifically — without it,
|
|
303
|
+
* `respectNoindex: true` would hide noindex'd pages from the very rules
|
|
304
|
+
* designed to flag accidental noindex'ing.
|
|
305
|
+
*/
|
|
306
|
+
noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, entityPatterns, overrides, mode = "full") {
|
|
176
307
|
const findings = [];
|
|
177
308
|
const modeOk = (ruleId) => mode !== "diff" || isRuleAllowedInDiff(ruleId);
|
|
178
309
|
const tag = (results) => results.map((r) => {
|
|
@@ -245,10 +376,10 @@ function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls,
|
|
|
245
376
|
findings.push(...tag(canonicalConsistencyRule(pages, knownUrls, normalizeUrlOptions)));
|
|
246
377
|
}
|
|
247
378
|
if (isEnabled("tech/canonical-noindex-conflict") && modeOk("tech/canonical-noindex-conflict")) {
|
|
248
|
-
findings.push(...tag(canonicalNoindexConflictRule(
|
|
379
|
+
findings.push(...tag(canonicalNoindexConflictRule(noindexAwarePages, normalizeUrlOptions)));
|
|
249
380
|
}
|
|
250
381
|
if (isEnabled("tech/robots-noindex-conflict") && modeOk("tech/robots-noindex-conflict")) {
|
|
251
|
-
findings.push(...tag(robotsNoindexConflictRule(
|
|
382
|
+
findings.push(...tag(robotsNoindexConflictRule(noindexAwarePages, inbound)));
|
|
252
383
|
}
|
|
253
384
|
if (isEnabled("tech/redirect-chain") && modeOk("tech/redirect-chain")) {
|
|
254
385
|
findings.push(...tag(redirectChainRule(pages)));
|
|
@@ -257,7 +388,9 @@ function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls,
|
|
|
257
388
|
findings.push(...tag(soft404Rule(pages)));
|
|
258
389
|
}
|
|
259
390
|
if (isEnabled("tech/hreflang-consistency") && modeOk("tech/hreflang-consistency")) {
|
|
260
|
-
|
|
391
|
+
// hreflang declarations on noindex'd pages are still bugs when they're
|
|
392
|
+
// inconsistent — see auditor.test.ts "emits technical SEO findings".
|
|
393
|
+
findings.push(...tag(hreflangConsistencyRule(noindexAwarePages, normalizeUrlOptions)));
|
|
261
394
|
}
|
|
262
395
|
// Schema rules
|
|
263
396
|
if (isEnabled("schema/json-ld-valid") && modeOk("schema/json-ld-valid")) {
|
|
@@ -311,13 +444,47 @@ function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls,
|
|
|
311
444
|
function hashHtml(html) {
|
|
312
445
|
return createHash("sha256").update(html, "utf8").digest("hex");
|
|
313
446
|
}
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
447
|
+
/**
|
|
448
|
+
* v0.4.3 — apply per-site-type severity + confidence overrides BEFORE any
|
|
449
|
+
* bucketing happens, so blocker/shouldFix counts and category buckets all
|
|
450
|
+
* reflect the user-visible severity, not the rule's native severity.
|
|
451
|
+
*
|
|
452
|
+
* Returns a NEW array of findings (does not mutate the input). Only the
|
|
453
|
+
* `severity` and `confidence` fields are remapped; everything else is
|
|
454
|
+
* preserved by reference.
|
|
455
|
+
*/
|
|
456
|
+
export function applyScoringProfileOverrides(findings, classification) {
|
|
457
|
+
const profile = profileFor(classification);
|
|
458
|
+
const sevHas = Object.keys(profile.severityOverrides).length > 0;
|
|
459
|
+
const confHas = Object.keys(profile.confidenceOverrides).length > 0;
|
|
460
|
+
if (!sevHas && !confHas)
|
|
461
|
+
return findings;
|
|
462
|
+
return findings.map((f) => {
|
|
463
|
+
const newSev = profile.severityOverrides[f.ruleId];
|
|
464
|
+
const newConf = profile.confidenceOverrides[f.ruleId];
|
|
465
|
+
if (newSev === undefined && newConf === undefined)
|
|
466
|
+
return f;
|
|
467
|
+
return {
|
|
468
|
+
...f,
|
|
469
|
+
...(newSev !== undefined ? { severity: newSev } : {}),
|
|
470
|
+
...(newConf !== undefined ? { confidence: newConf } : {}),
|
|
471
|
+
};
|
|
472
|
+
});
|
|
473
|
+
}
|
|
474
|
+
/**
|
|
475
|
+
* v0.4.3 — confidence-and-count-aware scoring. Replaces the v0.4 model that
|
|
476
|
+
* counted only severity. Each rule has a `baseImpact + (count - 1) *
|
|
477
|
+
* perInstance` contribution capped by `maxImpact`. The result is multiplied
|
|
478
|
+
* by the finding's `confidence` (default `high` → 1.0). Per-site-type
|
|
479
|
+
* profiles can remap a rule's severity / confidence; this function expects
|
|
480
|
+
* those overrides to ALREADY be applied to the input findings.
|
|
481
|
+
*
|
|
482
|
+
* Bucket math: per-rule impacts sum into the rule's `CATEGORY_MAP` bucket;
|
|
483
|
+
* each bucket is then capped at 100 and weighted by the active scoring
|
|
484
|
+
* profile's `categoryWeights`.
|
|
485
|
+
*/
|
|
486
|
+
function scoreFromFindings(findings, classification) {
|
|
487
|
+
const profile = profileFor(classification);
|
|
321
488
|
// v0.4 four-bucket raw penalties.
|
|
322
489
|
const bucketRaw = {
|
|
323
490
|
integrity: 0,
|
|
@@ -336,18 +503,16 @@ function scoreFromFindings(findings) {
|
|
|
336
503
|
let blockers = 0;
|
|
337
504
|
let shouldFix = 0;
|
|
338
505
|
let informational = 0;
|
|
506
|
+
// Group findings by ruleId so we can apply baseImpact + perInstance.
|
|
507
|
+
// Each group's weighted impact lands in its category bucket.
|
|
508
|
+
const groups = new Map();
|
|
339
509
|
for (const finding of findings) {
|
|
340
510
|
const namespace = finding.ruleId.split("/")[0];
|
|
341
511
|
const bucket = CATEGORY_MAP[namespace];
|
|
342
512
|
if (!bucket)
|
|
343
513
|
continue;
|
|
344
|
-
|
|
345
|
-
// v0.4 buckets.
|
|
346
|
-
bucketRaw[bucket] = Math.min(100, bucketRaw[bucket] + weight);
|
|
347
|
-
if (bucket !== "audit") {
|
|
514
|
+
if (bucket !== "audit")
|
|
348
515
|
bucketIssues[bucket] += 1;
|
|
349
|
-
}
|
|
350
|
-
// Issue-bucket counts (audit/* findings are diagnostic-only and excluded).
|
|
351
516
|
if (bucket === "audit")
|
|
352
517
|
continue;
|
|
353
518
|
if (finding.severity === "critical" || finding.severity === "error")
|
|
@@ -356,11 +521,40 @@ function scoreFromFindings(findings) {
|
|
|
356
521
|
shouldFix += 1;
|
|
357
522
|
else
|
|
358
523
|
informational += 1;
|
|
524
|
+
const arr = groups.get(finding.ruleId) ?? [];
|
|
525
|
+
arr.push(finding);
|
|
526
|
+
groups.set(finding.ruleId, arr);
|
|
359
527
|
}
|
|
360
|
-
const
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
528
|
+
for (const [ruleId, group] of groups) {
|
|
529
|
+
const namespace = ruleId.split("/")[0];
|
|
530
|
+
const bucket = CATEGORY_MAP[namespace];
|
|
531
|
+
if (!bucket || bucket === "audit")
|
|
532
|
+
continue;
|
|
533
|
+
const impactSpec = RULE_IMPACTS[ruleId] ?? DEFAULT_RULE_IMPACT;
|
|
534
|
+
const count = group.length;
|
|
535
|
+
const rawImpact = impactSpec.baseImpact + Math.max(0, count - 1) * impactSpec.perInstance;
|
|
536
|
+
const cap = impactSpec.maxImpact ?? Number.POSITIVE_INFINITY;
|
|
537
|
+
const cappedImpact = Math.min(cap, rawImpact);
|
|
538
|
+
// Confidence multiplier — use the WORST (highest-multiplier) confidence
|
|
539
|
+
// in the group so a rule that fires repeatedly with mixed confidence is
|
|
540
|
+
// not unfairly downweighted to its lowest-confidence instance.
|
|
541
|
+
let bestMultiplier = 0;
|
|
542
|
+
for (const f of group) {
|
|
543
|
+
const conf = f.confidence ?? "high";
|
|
544
|
+
const m = CONFIDENCE_MULTIPLIER[conf];
|
|
545
|
+
if (m > bestMultiplier)
|
|
546
|
+
bestMultiplier = m;
|
|
547
|
+
}
|
|
548
|
+
if (bestMultiplier === 0)
|
|
549
|
+
bestMultiplier = CONFIDENCE_MULTIPLIER.high;
|
|
550
|
+
const weighted = cappedImpact * bestMultiplier;
|
|
551
|
+
bucketRaw[bucket] = Math.min(100, bucketRaw[bucket] + weighted);
|
|
552
|
+
}
|
|
553
|
+
const cw = profile.categoryWeights;
|
|
554
|
+
const weighted = bucketRaw.integrity * cw.integrity +
|
|
555
|
+
bucketRaw.discoverability * cw.discoverability +
|
|
556
|
+
bucketRaw.citation * cw.citation +
|
|
557
|
+
bucketRaw.data * cw.data;
|
|
364
558
|
const risk = Math.round(Math.min(100, weighted));
|
|
365
559
|
const categories = {
|
|
366
560
|
integrity: { grade: gradeForPenalty(bucketRaw.integrity), issues: bucketIssues.integrity },
|
|
@@ -944,6 +1138,11 @@ export async function auditSource(source, options) {
|
|
|
944
1138
|
const concurrency = options?.concurrency ?? preset.concurrency ?? 5;
|
|
945
1139
|
const timeoutMs = options?.timeout ?? 30000;
|
|
946
1140
|
const ignorePatterns = options?.ignore ?? [];
|
|
1141
|
+
const respectNoindex = options?.respectNoindex ?? true;
|
|
1142
|
+
const skipDetectedAuth = options?.skipDetectedAuth ?? false;
|
|
1143
|
+
const skipBoilerplate = options?.skipBoilerplate ?? false;
|
|
1144
|
+
const skipSearchPages = options?.skipSearchPages ?? false;
|
|
1145
|
+
const skipEmptyBody = options?.skipEmptyBody ?? false;
|
|
947
1146
|
const sampleSize = options?.sampleSize ?? preset.sampleSize ?? 0;
|
|
948
1147
|
const externalSignal = options?.signal;
|
|
949
1148
|
const guardSsrf = options?.guardSsrf ?? preset.guardSsrf ?? false;
|
|
@@ -1140,13 +1339,33 @@ export async function auditSource(source, options) {
|
|
|
1140
1339
|
})()
|
|
1141
1340
|
: fisherYatesSample(filtered, sampleSize))
|
|
1142
1341
|
: filtered;
|
|
1143
|
-
const
|
|
1342
|
+
const parsedPagesAll = sampled.map((page) => {
|
|
1144
1343
|
const parsed = parseHtmlPage(page.html, page.url, { normalizeUrl: normalizeUrlOptions });
|
|
1145
1344
|
if (page.httpMeta) {
|
|
1146
1345
|
parsed.httpMeta = page.httpMeta;
|
|
1147
1346
|
}
|
|
1148
1347
|
return parsed;
|
|
1149
1348
|
});
|
|
1349
|
+
// v0.4.1 §page-filter: drop noindex'd pages and (when enabled) heuristically
|
|
1350
|
+
// detected auth pages BEFORE rule evaluation. The site owner's noindex is a
|
|
1351
|
+
// hard signal — they already opted out of SEO indexing, so auditing those
|
|
1352
|
+
// URLs produces only noise. Auth detection is opt-in via skipDetectedAuth
|
|
1353
|
+
// (off for the CLI by default; on for the hosted web form).
|
|
1354
|
+
const skippedByPolicy = [];
|
|
1355
|
+
const parsedPages = parsedPagesAll.filter((p) => {
|
|
1356
|
+
const reason = pageSkipReason(p, {
|
|
1357
|
+
respectNoindex,
|
|
1358
|
+
skipDetectedAuth,
|
|
1359
|
+
skipBoilerplate,
|
|
1360
|
+
skipSearchPages,
|
|
1361
|
+
skipEmptyBody,
|
|
1362
|
+
});
|
|
1363
|
+
if (reason) {
|
|
1364
|
+
skippedByPolicy.push({ url: p.url, reason });
|
|
1365
|
+
return false;
|
|
1366
|
+
}
|
|
1367
|
+
return true;
|
|
1368
|
+
});
|
|
1150
1369
|
const knownUrls = new Set(parsedPages.map((p) => p.url));
|
|
1151
1370
|
const rootUrl = parsedPages.find((p) => /(^|[\\/])index\.html?$/i.test(p.url))?.url ?? parsedPages[0]?.url ?? "";
|
|
1152
1371
|
const adjacency = new Map();
|
|
@@ -1267,10 +1486,13 @@ export async function auditSource(source, options) {
|
|
|
1267
1486
|
continue;
|
|
1268
1487
|
const groupRules = resolveGroupRules(resolvedRules, groupConfig?.overrides);
|
|
1269
1488
|
const enabledCheck = (ruleId) => !suppressedRuleSet.has(ruleId) && isRuleEnabled(ruleId, groupConfig?.rules);
|
|
1270
|
-
const findings = runRulesOnPages(groupPages, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full");
|
|
1489
|
+
const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full");
|
|
1271
1490
|
allFindings.push(...findings);
|
|
1272
1491
|
groupPageCounts[groupName] = groupPages.length;
|
|
1273
|
-
|
|
1492
|
+
// v0.4.3: per-group scoring uses the same site-classification profile so
|
|
1493
|
+
// group-level risk numbers reflect the same severity / confidence remaps
|
|
1494
|
+
// as the headline verdict.
|
|
1495
|
+
const { risk: groupRisk } = scoreFromFindings(applyScoringProfileOverrides(findings, siteClassification), siteClassification);
|
|
1274
1496
|
groupScores[groupName] = groupRisk;
|
|
1275
1497
|
}
|
|
1276
1498
|
throwIfAborted();
|
|
@@ -1280,7 +1502,13 @@ export async function auditSource(source, options) {
|
|
|
1280
1502
|
});
|
|
1281
1503
|
// Populate docsUrl on every finding before they leave the engine.
|
|
1282
1504
|
withDocsUrls(enriched.findings);
|
|
1283
|
-
|
|
1505
|
+
// v0.4.3: apply site-type-aware severity + confidence overrides so blocker
|
|
1506
|
+
// counts, issue buckets, and category bucketing all reflect the user-visible
|
|
1507
|
+
// severity (not the rule's native severity). The remapped findings replace
|
|
1508
|
+
// the enrichment output so every downstream consumer (summary.issues, AI
|
|
1509
|
+
// triage input, telemetry, formatters) sees the corrected severity.
|
|
1510
|
+
enriched.findings = applyScoringProfileOverrides(enriched.findings, siteClassification);
|
|
1511
|
+
const { risk, categories, bucketCounts } = scoreFromFindings(enriched.findings, siteClassification);
|
|
1284
1512
|
const auditedPageCount = Object.values(groupPageCounts).reduce((a, b) => a + b, 0);
|
|
1285
1513
|
const issues = bucketIssues(enriched.findings);
|
|
1286
1514
|
const verdict = verdictForRisk(risk);
|
|
@@ -1317,22 +1545,68 @@ export async function auditSource(source, options) {
|
|
|
1317
1545
|
if (cacheConfig) {
|
|
1318
1546
|
summary.cacheStats = cacheStats;
|
|
1319
1547
|
}
|
|
1320
|
-
// v0.4 §4.5: warn when
|
|
1548
|
+
// v0.4 §4.5 / v0.4.1: warn when ignore patterns matched zero discovered URLs.
|
|
1549
|
+
// - Per-pattern warning fires only when `warnUnmatchedIgnore` is true
|
|
1550
|
+
// (set by the CLI when `--ignore` was passed explicitly). Quiet by
|
|
1551
|
+
// default for config-loaded patterns where broad safety lists like
|
|
1552
|
+
// `**/dashboard/**` legitimately don't match small marketing sites.
|
|
1553
|
+
// - When ALL patterns matched zero (strongest typo signal, e.g. user
|
|
1554
|
+
// wrote `*.json` instead of `**/*.json`), emit a single consolidated
|
|
1555
|
+
// warning regardless of source.
|
|
1321
1556
|
if (ignorePatterns.length > 0) {
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1557
|
+
const unmatched = ignorePatterns.filter((pattern) => !deduped.some((p) => globMatchPathname(pattern, p.url)));
|
|
1558
|
+
if (unmatched.length === ignorePatterns.length) {
|
|
1559
|
+
// eslint-disable-next-line no-console
|
|
1560
|
+
console.warn(`[pseolint] none of the ${ignorePatterns.length} ignore pattern${ignorePatterns.length === 1 ? "" : "s"} matched any URLs — check config or --ignore for typos`);
|
|
1561
|
+
}
|
|
1562
|
+
else if (options?.warnUnmatchedIgnore === true) {
|
|
1563
|
+
for (const pattern of unmatched) {
|
|
1325
1564
|
// eslint-disable-next-line no-console
|
|
1326
1565
|
console.warn(`[pseolint] ignore pattern '${pattern}' matched 0 URLs — likely typo`);
|
|
1327
1566
|
}
|
|
1328
1567
|
}
|
|
1329
1568
|
}
|
|
1330
|
-
// Merge state-skipped (unchanged since last run)
|
|
1331
|
-
// robots.txt Disallow'd)
|
|
1332
|
-
|
|
1569
|
+
// Merge state-skipped (unchanged since last run), robots-skipped (target
|
|
1570
|
+
// robots.txt Disallow'd), and policy-skipped (noindex / detected-auth) URLs
|
|
1571
|
+
// so callers have a single audit-skipped surface.
|
|
1572
|
+
const allSkipped = [
|
|
1573
|
+
...skippedUrls,
|
|
1574
|
+
...skippedByRobots,
|
|
1575
|
+
...skippedByPolicy.map((s) => s.url),
|
|
1576
|
+
];
|
|
1333
1577
|
if (allSkipped.length > 0) {
|
|
1334
1578
|
summary.skippedUrls = allSkipped;
|
|
1335
1579
|
}
|
|
1580
|
+
// v0.4.1: surface noindex / auth skips as a discoverable diagnostic so the
|
|
1581
|
+
// user sees what the engine excluded. Catches the accidental-noindex bug:
|
|
1582
|
+
// pages silently dropped from indexing show up as a visible skip line
|
|
1583
|
+
// instead of being absent without explanation.
|
|
1584
|
+
if (skippedByPolicy.length > 0) {
|
|
1585
|
+
const noindexCount = skippedByPolicy.filter((s) => s.reason === "noindex").length;
|
|
1586
|
+
const authCount = skippedByPolicy.filter((s) => s.reason === "auth-detected").length;
|
|
1587
|
+
const boilerplateCount = skippedByPolicy.filter((s) => s.reason === "boilerplate").length;
|
|
1588
|
+
const searchCount = skippedByPolicy.filter((s) => s.reason === "search-result").length;
|
|
1589
|
+
const spaShellCount = skippedByPolicy.filter((s) => s.reason === "spa-shell").length;
|
|
1590
|
+
const sample = skippedByPolicy.slice(0, 5).map((s) => `${s.url} (${s.reason})`).join(", ");
|
|
1591
|
+
const more = skippedByPolicy.length > 5 ? `, +${skippedByPolicy.length - 5} more` : "";
|
|
1592
|
+
const parts = [];
|
|
1593
|
+
if (noindexCount > 0)
|
|
1594
|
+
parts.push(`${noindexCount} marked noindex`);
|
|
1595
|
+
if (authCount > 0)
|
|
1596
|
+
parts.push(`${authCount} detected as auth (login/register/etc)`);
|
|
1597
|
+
if (boilerplateCount > 0)
|
|
1598
|
+
parts.push(`${boilerplateCount} cookie/legal/consent boilerplate`);
|
|
1599
|
+
if (searchCount > 0)
|
|
1600
|
+
parts.push(`${searchCount} search-result page${searchCount === 1 ? "" : "s"}`);
|
|
1601
|
+
if (spaShellCount > 0)
|
|
1602
|
+
parts.push(`${spaShellCount} un-hydrated SPA shell${spaShellCount === 1 ? "" : "s"}`);
|
|
1603
|
+
auditFindings.push({
|
|
1604
|
+
ruleId: "audit/skipped-by-policy",
|
|
1605
|
+
severity: "info",
|
|
1606
|
+
message: `Skipped ${skippedByPolicy.length} page${skippedByPolicy.length === 1 ? "" : "s"} from rule evaluation — ${parts.join(", ")}. First few: ${sample}${more}.`,
|
|
1607
|
+
relatedUrls: skippedByPolicy.map((s) => s.url),
|
|
1608
|
+
});
|
|
1609
|
+
}
|
|
1336
1610
|
// Local flat view of every finding the engine produced, used internally for
|
|
1337
1611
|
// state persistence, regression detection, AI triage input, and telemetry
|
|
1338
1612
|
// counts. NOT exposed on the AuditSummary — consumers must use
|