@pseolint/core 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auditor.d.ts +12 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +240 -31
- package/dist/auditor.js.map +1 -1
- package/dist/formatters/bucket-findings.d.ts +43 -0
- package/dist/formatters/bucket-findings.d.ts.map +1 -0
- package/dist/formatters/bucket-findings.js +110 -0
- package/dist/formatters/bucket-findings.js.map +1 -0
- package/dist/formatters/console.d.ts.map +1 -1
- package/dist/formatters/console.js +103 -34
- package/dist/formatters/console.js.map +1 -1
- package/dist/formatters/fixplan.d.ts +13 -0
- package/dist/formatters/fixplan.d.ts.map +1 -0
- package/dist/formatters/fixplan.js +328 -0
- package/dist/formatters/fixplan.js.map +1 -0
- package/dist/formatters/html.d.ts.map +1 -1
- package/dist/formatters/html.js +27 -0
- package/dist/formatters/html.js.map +1 -1
- package/dist/formatters/index.d.ts +2 -0
- package/dist/formatters/index.d.ts.map +1 -1
- package/dist/formatters/index.js +1 -0
- package/dist/formatters/index.js.map +1 -1
- package/dist/formatters/markdown.d.ts.map +1 -1
- package/dist/formatters/markdown.js +77 -7
- package/dist/formatters/markdown.js.map +1 -1
- package/dist/page-filter.d.ts +64 -6
- package/dist/page-filter.d.ts.map +1 -1
- package/dist/page-filter.js +124 -3
- package/dist/page-filter.js.map +1 -1
- package/dist/rules/aeo/answer-first.d.ts.map +1 -1
- package/dist/rules/aeo/answer-first.js +17 -3
- package/dist/rules/aeo/answer-first.js.map +1 -1
- package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
- package/dist/rules/aeo/citable-facts.js +12 -1
- package/dist/rules/aeo/citable-facts.js.map +1 -1
- package/dist/rules/aeo/content-modularity.d.ts.map +1 -1
- package/dist/rules/aeo/content-modularity.js +3 -0
- package/dist/rules/aeo/content-modularity.js.map +1 -1
- package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
- package/dist/rules/aeo/crawler-access.js +6 -0
- package/dist/rules/aeo/crawler-access.js.map +1 -1
- package/dist/rules/aeo/faq-coverage.d.ts.map +1 -1
- package/dist/rules/aeo/faq-coverage.js +4 -0
- package/dist/rules/aeo/faq-coverage.js.map +1 -1
- package/dist/rules/aeo/freshness-signals.d.ts.map +1 -1
- package/dist/rules/aeo/freshness-signals.js +9 -2
- package/dist/rules/aeo/freshness-signals.js.map +1 -1
- package/dist/rules/aeo/llms-txt.d.ts.map +1 -1
- package/dist/rules/aeo/llms-txt.js +6 -1
- package/dist/rules/aeo/llms-txt.js.map +1 -1
- package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
- package/dist/rules/aeo/summary-bait.js +5 -2
- package/dist/rules/aeo/summary-bait.js.map +1 -1
- package/dist/rules/content/missing-author.d.ts.map +1 -1
- package/dist/rules/content/missing-author.js +10 -2
- package/dist/rules/content/missing-author.js.map +1 -1
- package/dist/rules/spam/thin-content.d.ts.map +1 -1
- package/dist/rules/spam/thin-content.js +9 -1
- package/dist/rules/spam/thin-content.js.map +1 -1
- package/dist/site-classifier.d.ts +1 -1
- package/dist/site-classifier.d.ts.map +1 -1
- package/dist/site-classifier.js +216 -0
- package/dist/site-classifier.js.map +1 -1
- package/dist/types.d.ts +44 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
package/dist/auditor.d.ts
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
1
|
-
import type { AuditOptions, AuditSummary } from "./types.js";
|
|
1
|
+
import type { AuditOptions, AuditSummary, RuleResult } from "./types.js";
|
|
2
|
+
import { type SiteClassification } from "./site-classifier.js";
|
|
3
|
+
/**
|
|
4
|
+
* v0.4.3 — apply per-site-type severity + confidence overrides BEFORE any
|
|
5
|
+
* bucketing happens, so blocker/shouldFix counts and category buckets all
|
|
6
|
+
* reflect the user-visible severity, not the rule's native severity.
|
|
7
|
+
*
|
|
8
|
+
* Returns a NEW array of findings (does not mutate the input). Only the
|
|
9
|
+
* `severity` and `confidence` fields are remapped; everything else is
|
|
10
|
+
* preserved by reference.
|
|
11
|
+
*/
|
|
12
|
+
export declare function applyScoringProfileOverrides(findings: RuleResult[], classification: SiteClassification | undefined): RuleResult[];
|
|
2
13
|
export declare function auditSource(source: string, options?: AuditOptions): Promise<AuditSummary>;
|
|
3
14
|
//# sourceMappingURL=auditor.d.ts.map
|
package/dist/auditor.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AAyDA,OAAO,KAAK,EACV,YAAY,EACZ,YAAY,
|
|
1
|
+
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AAyDA,OAAO,KAAK,EACV,YAAY,EACZ,YAAY,EAWZ,UAAU,EAGX,MAAM,YAAY,CAAC;AAQpB,OAAO,EAAgB,KAAK,kBAAkB,EAAiB,MAAM,sBAAsB,CAAC;AAwhB5F;;;;;;;;GAQG;AACH,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,UAAU,EAAE,EACtB,cAAc,EAAE,kBAAkB,GAAG,SAAS,GAC7C,UAAU,EAAE,CAed;AAquBD,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CA0wB/F"}
|
package/dist/auditor.js
CHANGED
|
@@ -77,17 +77,6 @@ const DEFAULTS = {
|
|
|
77
77
|
modularityMinSelfContainedRatio: 0.7,
|
|
78
78
|
faqMinQuestionHeadings: 2
|
|
79
79
|
};
|
|
80
|
-
/**
|
|
81
|
-
* v0.4 four-category weights. Audit is diagnostic-only (weight 0).
|
|
82
|
-
* See 2026-04-29 v0.4 redesign spec §4.2.
|
|
83
|
-
*/
|
|
84
|
-
const CATEGORY_WEIGHTS = {
|
|
85
|
-
integrity: 0.50, // spam + content + cannibal
|
|
86
|
-
discoverability: 0.20, // links + tech
|
|
87
|
-
citation: 0.25, // aeo + schema
|
|
88
|
-
data: 0.05, // data
|
|
89
|
-
audit: 0, // diagnostics, never weighted
|
|
90
|
-
};
|
|
91
80
|
/**
|
|
92
81
|
* Maps the v0.3 ruleId namespace prefix to the v0.4 four-bucket category.
|
|
93
82
|
* Used by `scoreFromFindings` to bucket findings without changing rule IDs.
|
|
@@ -103,6 +92,138 @@ const CATEGORY_MAP = {
|
|
|
103
92
|
data: "data",
|
|
104
93
|
audit: "audit",
|
|
105
94
|
};
|
|
95
|
+
const SCORING_PROFILES = {
|
|
96
|
+
"small-marketing": {
|
|
97
|
+
categoryWeights: { integrity: 0.30, discoverability: 0.40, citation: 0.20, data: 0.05, audit: 0 },
|
|
98
|
+
severityOverrides: {
|
|
99
|
+
"aeo/citable-facts": "info",
|
|
100
|
+
"aeo/answer-first": "info",
|
|
101
|
+
"aeo/summary-bait": "warning",
|
|
102
|
+
"spam/thin-content": "warning",
|
|
103
|
+
},
|
|
104
|
+
confidenceOverrides: {
|
|
105
|
+
"aeo/citable-facts": "low",
|
|
106
|
+
"aeo/answer-first": "low",
|
|
107
|
+
"aeo/summary-bait": "medium",
|
|
108
|
+
"spam/thin-content": "medium",
|
|
109
|
+
},
|
|
110
|
+
},
|
|
111
|
+
"blog": {
|
|
112
|
+
categoryWeights: { integrity: 0.40, discoverability: 0.25, citation: 0.30, data: 0.05, audit: 0 },
|
|
113
|
+
severityOverrides: {
|
|
114
|
+
"content/missing-author": "error",
|
|
115
|
+
"spam/thin-content": "error",
|
|
116
|
+
},
|
|
117
|
+
confidenceOverrides: {},
|
|
118
|
+
},
|
|
119
|
+
"programmatic-directory": {
|
|
120
|
+
categoryWeights: { integrity: 0.55, discoverability: 0.15, citation: 0.20, data: 0.10, audit: 0 },
|
|
121
|
+
severityOverrides: {},
|
|
122
|
+
confidenceOverrides: {},
|
|
123
|
+
},
|
|
124
|
+
"ecommerce": {
|
|
125
|
+
categoryWeights: { integrity: 0.20, discoverability: 0.40, citation: 0.15, data: 0.25, audit: 0 },
|
|
126
|
+
severityOverrides: {
|
|
127
|
+
"aeo/citable-facts": "info",
|
|
128
|
+
"schema/required-fields": "error",
|
|
129
|
+
},
|
|
130
|
+
confidenceOverrides: {
|
|
131
|
+
"aeo/citable-facts": "low",
|
|
132
|
+
},
|
|
133
|
+
},
|
|
134
|
+
"docs": {
|
|
135
|
+
categoryWeights: { integrity: 0.30, discoverability: 0.30, citation: 0.30, data: 0.10, audit: 0 },
|
|
136
|
+
severityOverrides: {
|
|
137
|
+
"aeo/citable-facts": "info",
|
|
138
|
+
"aeo/answer-first": "warning",
|
|
139
|
+
"content/missing-author": "info",
|
|
140
|
+
},
|
|
141
|
+
confidenceOverrides: {
|
|
142
|
+
"aeo/citable-facts": "low",
|
|
143
|
+
"aeo/answer-first": "low",
|
|
144
|
+
"content/missing-author": "low",
|
|
145
|
+
},
|
|
146
|
+
},
|
|
147
|
+
"unclear": {
|
|
148
|
+
categoryWeights: { integrity: 0.50, discoverability: 0.20, citation: 0.25, data: 0.05, audit: 0 },
|
|
149
|
+
severityOverrides: {},
|
|
150
|
+
confidenceOverrides: {},
|
|
151
|
+
},
|
|
152
|
+
};
|
|
153
|
+
/**
|
|
154
|
+
* Pick the scoring profile for a classification. Falls back to `unclear`
|
|
155
|
+
* (the conservative default) when classifier confidence is below 70%.
|
|
156
|
+
*/
|
|
157
|
+
function profileFor(classification) {
|
|
158
|
+
if (!classification || classification.confidence < 0.7)
|
|
159
|
+
return SCORING_PROFILES.unclear;
|
|
160
|
+
return SCORING_PROFILES[classification.type] ?? SCORING_PROFILES.unclear;
|
|
161
|
+
}
|
|
162
|
+
const RULE_IMPACTS = {
|
|
163
|
+
// SpamBrain — high baseline, count amplifies (cluster matters)
|
|
164
|
+
"spam/near-duplicate": { baseImpact: 25, perInstance: 5, maxImpact: 80 },
|
|
165
|
+
"spam/entity-swap": { baseImpact: 25, perInstance: 5, maxImpact: 80 },
|
|
166
|
+
"spam/doorway-pattern": { baseImpact: 30, perInstance: 0, maxImpact: 30 },
|
|
167
|
+
"spam/template-coverage": { baseImpact: 15, perInstance: 3, maxImpact: 60 },
|
|
168
|
+
"spam/template-diversity": { baseImpact: 12, perInstance: 3, maxImpact: 50 },
|
|
169
|
+
"spam/boilerplate-ratio": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
|
|
170
|
+
"spam/thin-content": { baseImpact: 8, perInstance: 2, maxImpact: 40 },
|
|
171
|
+
"spam/publication-velocity": { baseImpact: 8, perInstance: 2, maxImpact: 30 },
|
|
172
|
+
"cannibal/url-pattern": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
|
|
173
|
+
// Content
|
|
174
|
+
"content/unique-value": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
|
|
175
|
+
"content/meta-uniqueness": { baseImpact: 8, perInstance: 2, maxImpact: 40 },
|
|
176
|
+
"content/missing-author": { baseImpact: 4, perInstance: 1, maxImpact: 20 },
|
|
177
|
+
"content/eeat-signals": { baseImpact: 4, perInstance: 1, maxImpact: 20 },
|
|
178
|
+
// Tech — softened in v0.4.3-rc2 after dogfood showed nextjs.org regressing
|
|
179
|
+
// from ready→caution on tech/canonical-consistency × 4 (legit cross-domain
|
|
180
|
+
// canonicals on a CDN). Per-instance now 1 (was 3).
|
|
181
|
+
"tech/canonical-consistency": { baseImpact: 8, perInstance: 1, maxImpact: 25 },
|
|
182
|
+
"tech/canonical-noindex-conflict": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
|
|
183
|
+
"tech/robots-noindex-conflict": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
|
|
184
|
+
"tech/redirect-chain": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
|
|
185
|
+
"tech/sitemap-completeness": { baseImpact: 8, perInstance: 1, maxImpact: 30 },
|
|
186
|
+
"tech/robots-sitemap-presence": { baseImpact: 8, perInstance: 0, maxImpact: 8 },
|
|
187
|
+
"tech/soft-404": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
|
|
188
|
+
// hreflang — one bad declaration breaks all language pairs, so the COUNT
|
|
189
|
+
// doesn't compound. perInstance: 0 keeps it at the base impact regardless
|
|
190
|
+
// of how many language pairs are affected. Dogfood showed 350 findings on
|
|
191
|
+
// stripe.com from a single missing reciprocal pair — that should not be
|
|
192
|
+
// treated as 350× the impact.
|
|
193
|
+
"tech/hreflang-consistency": { baseImpact: 5, perInstance: 0, maxImpact: 5 },
|
|
194
|
+
// Links
|
|
195
|
+
"links/orphan-pages": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
|
|
196
|
+
"links/dead-ends": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
|
|
197
|
+
"links/cluster-connectivity": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
|
|
198
|
+
"links/link-depth": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
|
|
199
|
+
// AEO — much lower baselines than spam (AEO is opt-in optimization)
|
|
200
|
+
"aeo/citable-facts": { baseImpact: 2, perInstance: 1, maxImpact: 25 },
|
|
201
|
+
"aeo/answer-first": { baseImpact: 3, perInstance: 1, maxImpact: 25 },
|
|
202
|
+
"aeo/summary-bait": { baseImpact: 4, perInstance: 1, maxImpact: 25 },
|
|
203
|
+
"aeo/crawler-access": { baseImpact: 8, perInstance: 0, maxImpact: 8 },
|
|
204
|
+
"aeo/freshness-signals": { baseImpact: 2, perInstance: 1, maxImpact: 20 },
|
|
205
|
+
"aeo/llms-txt": { baseImpact: 4, perInstance: 0, maxImpact: 4 },
|
|
206
|
+
"aeo/faq-coverage": { baseImpact: 2, perInstance: 1, maxImpact: 15 },
|
|
207
|
+
"aeo/content-modularity": { baseImpact: 2, perInstance: 1, maxImpact: 15 },
|
|
208
|
+
// Schema
|
|
209
|
+
"schema/json-ld-valid": { baseImpact: 8, perInstance: 2, maxImpact: 35 },
|
|
210
|
+
"schema/required-fields": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
|
|
211
|
+
"schema/consistency": { baseImpact: 3, perInstance: 1, maxImpact: 15 },
|
|
212
|
+
// Data
|
|
213
|
+
"data/data-binding": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
|
|
214
|
+
};
|
|
215
|
+
const DEFAULT_RULE_IMPACT = { baseImpact: 5, perInstance: 1, maxImpact: 25 };
|
|
216
|
+
/**
|
|
217
|
+
* v0.4.3 — confidence-based discount applied to each finding's impact.
|
|
218
|
+
* Low-confidence findings contribute less to the bucket so they don't
|
|
219
|
+
* inflate the verdict on site types where they false-positive.
|
|
220
|
+
*/
|
|
221
|
+
const CONFIDENCE_MULTIPLIER = {
|
|
222
|
+
high: 1.0,
|
|
223
|
+
medium: 0.6,
|
|
224
|
+
low: 0.3,
|
|
225
|
+
speculative: 0.1,
|
|
226
|
+
};
|
|
106
227
|
/** Slug map for `RuleResult.docsUrl`. Defaults to the rule-id segment after the `/`. */
|
|
107
228
|
const RULE_DOCS_SLUG = {
|
|
108
229
|
// intentionally empty for v0.4 — slug = ruleId.split("/").pop() works for every shipped rule
|
|
@@ -323,13 +444,47 @@ noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, in
|
|
|
323
444
|
function hashHtml(html) {
|
|
324
445
|
return createHash("sha256").update(html, "utf8").digest("hex");
|
|
325
446
|
}
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
447
|
+
/**
|
|
448
|
+
* v0.4.3 — apply per-site-type severity + confidence overrides BEFORE any
|
|
449
|
+
* bucketing happens, so blocker/shouldFix counts and category buckets all
|
|
450
|
+
* reflect the user-visible severity, not the rule's native severity.
|
|
451
|
+
*
|
|
452
|
+
* Returns a NEW array of findings (does not mutate the input). Only the
|
|
453
|
+
* `severity` and `confidence` fields are remapped; everything else is
|
|
454
|
+
* preserved by reference.
|
|
455
|
+
*/
|
|
456
|
+
export function applyScoringProfileOverrides(findings, classification) {
|
|
457
|
+
const profile = profileFor(classification);
|
|
458
|
+
const sevHas = Object.keys(profile.severityOverrides).length > 0;
|
|
459
|
+
const confHas = Object.keys(profile.confidenceOverrides).length > 0;
|
|
460
|
+
if (!sevHas && !confHas)
|
|
461
|
+
return findings;
|
|
462
|
+
return findings.map((f) => {
|
|
463
|
+
const newSev = profile.severityOverrides[f.ruleId];
|
|
464
|
+
const newConf = profile.confidenceOverrides[f.ruleId];
|
|
465
|
+
if (newSev === undefined && newConf === undefined)
|
|
466
|
+
return f;
|
|
467
|
+
return {
|
|
468
|
+
...f,
|
|
469
|
+
...(newSev !== undefined ? { severity: newSev } : {}),
|
|
470
|
+
...(newConf !== undefined ? { confidence: newConf } : {}),
|
|
471
|
+
};
|
|
472
|
+
});
|
|
473
|
+
}
|
|
474
|
+
/**
|
|
475
|
+
* v0.4.3 — confidence-and-count-aware scoring. Replaces the v0.4 model that
|
|
476
|
+
* counted only severity. Each rule has a `baseImpact + (count - 1) *
|
|
477
|
+
* perInstance` contribution capped by `maxImpact`. The result is multiplied
|
|
478
|
+
* by the finding's `confidence` (default `high` → 1.0). Per-site-type
|
|
479
|
+
* profiles can remap a rule's severity / confidence; this function expects
|
|
480
|
+
* those overrides to ALREADY be applied to the input findings.
|
|
481
|
+
*
|
|
482
|
+
* Bucket math: per-rule impacts sum into the rule's `CATEGORY_MAP` bucket;
|
|
483
|
+
* each bucket is then capped at 100 and weighted by the active scoring
|
|
484
|
+
* profile's `categoryWeights`.
|
|
485
|
+
*/
|
|
486
|
+
function scoreFromFindings(findings, classification) {
|
|
487
|
+
const profile = profileFor(classification);
|
|
333
488
|
// v0.4 four-bucket raw penalties.
|
|
334
489
|
const bucketRaw = {
|
|
335
490
|
integrity: 0,
|
|
@@ -348,18 +503,16 @@ function scoreFromFindings(findings) {
|
|
|
348
503
|
let blockers = 0;
|
|
349
504
|
let shouldFix = 0;
|
|
350
505
|
let informational = 0;
|
|
506
|
+
// Group findings by ruleId so we can apply baseImpact + perInstance.
|
|
507
|
+
// Each group's weighted impact lands in its category bucket.
|
|
508
|
+
const groups = new Map();
|
|
351
509
|
for (const finding of findings) {
|
|
352
510
|
const namespace = finding.ruleId.split("/")[0];
|
|
353
511
|
const bucket = CATEGORY_MAP[namespace];
|
|
354
512
|
if (!bucket)
|
|
355
513
|
continue;
|
|
356
|
-
|
|
357
|
-
// v0.4 buckets.
|
|
358
|
-
bucketRaw[bucket] = Math.min(100, bucketRaw[bucket] + weight);
|
|
359
|
-
if (bucket !== "audit") {
|
|
514
|
+
if (bucket !== "audit")
|
|
360
515
|
bucketIssues[bucket] += 1;
|
|
361
|
-
}
|
|
362
|
-
// Issue-bucket counts (audit/* findings are diagnostic-only and excluded).
|
|
363
516
|
if (bucket === "audit")
|
|
364
517
|
continue;
|
|
365
518
|
if (finding.severity === "critical" || finding.severity === "error")
|
|
@@ -368,11 +521,40 @@ function scoreFromFindings(findings) {
|
|
|
368
521
|
shouldFix += 1;
|
|
369
522
|
else
|
|
370
523
|
informational += 1;
|
|
524
|
+
const arr = groups.get(finding.ruleId) ?? [];
|
|
525
|
+
arr.push(finding);
|
|
526
|
+
groups.set(finding.ruleId, arr);
|
|
371
527
|
}
|
|
372
|
-
const
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
528
|
+
for (const [ruleId, group] of groups) {
|
|
529
|
+
const namespace = ruleId.split("/")[0];
|
|
530
|
+
const bucket = CATEGORY_MAP[namespace];
|
|
531
|
+
if (!bucket || bucket === "audit")
|
|
532
|
+
continue;
|
|
533
|
+
const impactSpec = RULE_IMPACTS[ruleId] ?? DEFAULT_RULE_IMPACT;
|
|
534
|
+
const count = group.length;
|
|
535
|
+
const rawImpact = impactSpec.baseImpact + Math.max(0, count - 1) * impactSpec.perInstance;
|
|
536
|
+
const cap = impactSpec.maxImpact ?? Number.POSITIVE_INFINITY;
|
|
537
|
+
const cappedImpact = Math.min(cap, rawImpact);
|
|
538
|
+
// Confidence multiplier — use the WORST (highest-multiplier) confidence
|
|
539
|
+
// in the group so a rule that fires repeatedly with mixed confidence is
|
|
540
|
+
// not unfairly downweighted to its lowest-confidence instance.
|
|
541
|
+
let bestMultiplier = 0;
|
|
542
|
+
for (const f of group) {
|
|
543
|
+
const conf = f.confidence ?? "high";
|
|
544
|
+
const m = CONFIDENCE_MULTIPLIER[conf];
|
|
545
|
+
if (m > bestMultiplier)
|
|
546
|
+
bestMultiplier = m;
|
|
547
|
+
}
|
|
548
|
+
if (bestMultiplier === 0)
|
|
549
|
+
bestMultiplier = CONFIDENCE_MULTIPLIER.high;
|
|
550
|
+
const weighted = cappedImpact * bestMultiplier;
|
|
551
|
+
bucketRaw[bucket] = Math.min(100, bucketRaw[bucket] + weighted);
|
|
552
|
+
}
|
|
553
|
+
const cw = profile.categoryWeights;
|
|
554
|
+
const weighted = bucketRaw.integrity * cw.integrity +
|
|
555
|
+
bucketRaw.discoverability * cw.discoverability +
|
|
556
|
+
bucketRaw.citation * cw.citation +
|
|
557
|
+
bucketRaw.data * cw.data;
|
|
376
558
|
const risk = Math.round(Math.min(100, weighted));
|
|
377
559
|
const categories = {
|
|
378
560
|
integrity: { grade: gradeForPenalty(bucketRaw.integrity), issues: bucketIssues.integrity },
|
|
@@ -958,6 +1140,9 @@ export async function auditSource(source, options) {
|
|
|
958
1140
|
const ignorePatterns = options?.ignore ?? [];
|
|
959
1141
|
const respectNoindex = options?.respectNoindex ?? true;
|
|
960
1142
|
const skipDetectedAuth = options?.skipDetectedAuth ?? false;
|
|
1143
|
+
const skipBoilerplate = options?.skipBoilerplate ?? false;
|
|
1144
|
+
const skipSearchPages = options?.skipSearchPages ?? false;
|
|
1145
|
+
const skipEmptyBody = options?.skipEmptyBody ?? false;
|
|
961
1146
|
const sampleSize = options?.sampleSize ?? preset.sampleSize ?? 0;
|
|
962
1147
|
const externalSignal = options?.signal;
|
|
963
1148
|
const guardSsrf = options?.guardSsrf ?? preset.guardSsrf ?? false;
|
|
@@ -1168,7 +1353,13 @@ export async function auditSource(source, options) {
|
|
|
1168
1353
|
// (off for the CLI by default; on for the hosted web form).
|
|
1169
1354
|
const skippedByPolicy = [];
|
|
1170
1355
|
const parsedPages = parsedPagesAll.filter((p) => {
|
|
1171
|
-
const reason = pageSkipReason(p, {
|
|
1356
|
+
const reason = pageSkipReason(p, {
|
|
1357
|
+
respectNoindex,
|
|
1358
|
+
skipDetectedAuth,
|
|
1359
|
+
skipBoilerplate,
|
|
1360
|
+
skipSearchPages,
|
|
1361
|
+
skipEmptyBody,
|
|
1362
|
+
});
|
|
1172
1363
|
if (reason) {
|
|
1173
1364
|
skippedByPolicy.push({ url: p.url, reason });
|
|
1174
1365
|
return false;
|
|
@@ -1298,7 +1489,10 @@ export async function auditSource(source, options) {
|
|
|
1298
1489
|
const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full");
|
|
1299
1490
|
allFindings.push(...findings);
|
|
1300
1491
|
groupPageCounts[groupName] = groupPages.length;
|
|
1301
|
-
|
|
1492
|
+
// v0.4.3: per-group scoring uses the same site-classification profile so
|
|
1493
|
+
// group-level risk numbers reflect the same severity / confidence remaps
|
|
1494
|
+
// as the headline verdict.
|
|
1495
|
+
const { risk: groupRisk } = scoreFromFindings(applyScoringProfileOverrides(findings, siteClassification), siteClassification);
|
|
1302
1496
|
groupScores[groupName] = groupRisk;
|
|
1303
1497
|
}
|
|
1304
1498
|
throwIfAborted();
|
|
@@ -1308,7 +1502,13 @@ export async function auditSource(source, options) {
|
|
|
1308
1502
|
});
|
|
1309
1503
|
// Populate docsUrl on every finding before they leave the engine.
|
|
1310
1504
|
withDocsUrls(enriched.findings);
|
|
1311
|
-
|
|
1505
|
+
// v0.4.3: apply site-type-aware severity + confidence overrides so blocker
|
|
1506
|
+
// counts, issue buckets, and category bucketing all reflect the user-visible
|
|
1507
|
+
// severity (not the rule's native severity). The remapped findings replace
|
|
1508
|
+
// the enrichment output so every downstream consumer (summary.issues, AI
|
|
1509
|
+
// triage input, telemetry, formatters) sees the corrected severity.
|
|
1510
|
+
enriched.findings = applyScoringProfileOverrides(enriched.findings, siteClassification);
|
|
1511
|
+
const { risk, categories, bucketCounts } = scoreFromFindings(enriched.findings, siteClassification);
|
|
1312
1512
|
const auditedPageCount = Object.values(groupPageCounts).reduce((a, b) => a + b, 0);
|
|
1313
1513
|
const issues = bucketIssues(enriched.findings);
|
|
1314
1514
|
const verdict = verdictForRisk(risk);
|
|
@@ -1384,6 +1584,9 @@ export async function auditSource(source, options) {
|
|
|
1384
1584
|
if (skippedByPolicy.length > 0) {
|
|
1385
1585
|
const noindexCount = skippedByPolicy.filter((s) => s.reason === "noindex").length;
|
|
1386
1586
|
const authCount = skippedByPolicy.filter((s) => s.reason === "auth-detected").length;
|
|
1587
|
+
const boilerplateCount = skippedByPolicy.filter((s) => s.reason === "boilerplate").length;
|
|
1588
|
+
const searchCount = skippedByPolicy.filter((s) => s.reason === "search-result").length;
|
|
1589
|
+
const spaShellCount = skippedByPolicy.filter((s) => s.reason === "spa-shell").length;
|
|
1387
1590
|
const sample = skippedByPolicy.slice(0, 5).map((s) => `${s.url} (${s.reason})`).join(", ");
|
|
1388
1591
|
const more = skippedByPolicy.length > 5 ? `, +${skippedByPolicy.length - 5} more` : "";
|
|
1389
1592
|
const parts = [];
|
|
@@ -1391,6 +1594,12 @@ export async function auditSource(source, options) {
|
|
|
1391
1594
|
parts.push(`${noindexCount} marked noindex`);
|
|
1392
1595
|
if (authCount > 0)
|
|
1393
1596
|
parts.push(`${authCount} detected as auth (login/register/etc)`);
|
|
1597
|
+
if (boilerplateCount > 0)
|
|
1598
|
+
parts.push(`${boilerplateCount} cookie/legal/consent boilerplate`);
|
|
1599
|
+
if (searchCount > 0)
|
|
1600
|
+
parts.push(`${searchCount} search-result page${searchCount === 1 ? "" : "s"}`);
|
|
1601
|
+
if (spaShellCount > 0)
|
|
1602
|
+
parts.push(`${spaShellCount} un-hydrated SPA shell${spaShellCount === 1 ? "" : "s"}`);
|
|
1394
1603
|
auditFindings.push({
|
|
1395
1604
|
ruleId: "audit/skipped-by-policy",
|
|
1396
1605
|
severity: "info",
|