npm - @pseolint/core - Versions diffs - 0.4.0 → 0.4.3 - Mend

@pseolint/core 0.4.0 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/dist/auditor.d.ts +12 -1
package/dist/auditor.d.ts.map +1 -1
package/dist/auditor.js +317 -43
package/dist/auditor.js.map +1 -1
package/dist/formatters/bucket-findings.d.ts +43 -0
package/dist/formatters/bucket-findings.d.ts.map +1 -0
package/dist/formatters/bucket-findings.js +110 -0
package/dist/formatters/bucket-findings.js.map +1 -0
package/dist/formatters/console.d.ts.map +1 -1
package/dist/formatters/console.js +103 -34
package/dist/formatters/console.js.map +1 -1
package/dist/formatters/fixplan.d.ts +13 -0
package/dist/formatters/fixplan.d.ts.map +1 -0
package/dist/formatters/fixplan.js +328 -0
package/dist/formatters/fixplan.js.map +1 -0
package/dist/formatters/html.d.ts.map +1 -1
package/dist/formatters/html.js +27 -0
package/dist/formatters/html.js.map +1 -1
package/dist/formatters/index.d.ts +2 -0
package/dist/formatters/index.d.ts.map +1 -1
package/dist/formatters/index.js +1 -0
package/dist/formatters/index.js.map +1 -1
package/dist/formatters/markdown.d.ts.map +1 -1
package/dist/formatters/markdown.js +77 -7
package/dist/formatters/markdown.js.map +1 -1
package/dist/page-filter.d.ts +108 -0
package/dist/page-filter.d.ts.map +1 -0
package/dist/page-filter.js +207 -0
package/dist/page-filter.js.map +1 -0
package/dist/rules/aeo/answer-first.d.ts.map +1 -1
package/dist/rules/aeo/answer-first.js +17 -3
package/dist/rules/aeo/answer-first.js.map +1 -1
package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
package/dist/rules/aeo/citable-facts.js +12 -1
package/dist/rules/aeo/citable-facts.js.map +1 -1
package/dist/rules/aeo/content-modularity.d.ts.map +1 -1
package/dist/rules/aeo/content-modularity.js +3 -0
package/dist/rules/aeo/content-modularity.js.map +1 -1
package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
package/dist/rules/aeo/crawler-access.js +6 -0
package/dist/rules/aeo/crawler-access.js.map +1 -1
package/dist/rules/aeo/faq-coverage.d.ts.map +1 -1
package/dist/rules/aeo/faq-coverage.js +4 -0
package/dist/rules/aeo/faq-coverage.js.map +1 -1
package/dist/rules/aeo/freshness-signals.d.ts.map +1 -1
package/dist/rules/aeo/freshness-signals.js +9 -2
package/dist/rules/aeo/freshness-signals.js.map +1 -1
package/dist/rules/aeo/llms-txt.d.ts.map +1 -1
package/dist/rules/aeo/llms-txt.js +6 -1
package/dist/rules/aeo/llms-txt.js.map +1 -1
package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
package/dist/rules/aeo/summary-bait.js +5 -2
package/dist/rules/aeo/summary-bait.js.map +1 -1
package/dist/rules/content/missing-author.d.ts.map +1 -1
package/dist/rules/content/missing-author.js +10 -2
package/dist/rules/content/missing-author.js.map +1 -1
package/dist/rules/spam/thin-content.d.ts.map +1 -1
package/dist/rules/spam/thin-content.js +9 -1
package/dist/rules/spam/thin-content.js.map +1 -1
package/dist/site-classifier.d.ts +1 -1
package/dist/site-classifier.d.ts.map +1 -1
package/dist/site-classifier.js +216 -0
package/dist/site-classifier.js.map +1 -1
package/dist/types.d.ts +77 -2
package/dist/types.d.ts.map +1 -1
package/dist/types.js.map +1 -1
package/package.json +1 -1

package/dist/auditor.d.ts CHANGED Viewed

@@ -1,3 +1,14 @@
-import type { AuditOptions, AuditSummary } from "./types.js";
+import type { AuditOptions, AuditSummary, RuleResult } from "./types.js";
+import { type SiteClassification } from "./site-classifier.js";
+/**
+ * v0.4.3 — apply per-site-type severity + confidence overrides BEFORE any
+ * bucketing happens, so blocker/shouldFix counts and category buckets all
+ * reflect the user-visible severity, not the rule's native severity.
+ *
+ * Returns a NEW array of findings (does not mutate the input). Only the
+ * `severity` and `confidence` fields are remapped; everything else is
+ * preserved by reference.
+ */
+export declare function applyScoringProfileOverrides(findings: RuleResult[], classification: SiteClassification | undefined): RuleResult[];
 export declare function auditSource(source: string, options?: AuditOptions): Promise<AuditSummary>;
 //# sourceMappingURL=auditor.d.ts.map

package/dist/auditor.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"~~AAwDA~~,OAAO,KAAK,EACV,YAAY,EACZ,YAAY,~~EAab~~,MAAM,YAAY,CAAC;~~AAoiCpB~~,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,~~CAmrB~~/F"}
1	+ {"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AAyDA,OAAO,KAAK,EACV,YAAY,EACZ,YAAY,EAWZ,UAAU,EAGX,MAAM,YAAY,CAAC;AAQpB,OAAO,EAAgB,KAAK,kBAAkB,EAAiB,MAAM,sBAAsB,CAAC;AAwhB5F;;;;;;;;GAQG;AACH,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,UAAU,EAAE,EACtB,cAAc,EAAE,kBAAkB,GAAG,SAAS,GAC7C,UAAU,EAAE,CAed;AAquBD,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CA0wB/F"}

package/dist/auditor.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { createHash } from "node:crypto";
 import { readdir, readFile, stat } from "node:fs/promises";
 import { extname, join, resolve } from "node:path";
 import { parseHtmlPage } from "./parser.js";
+import { pageSkipReason } from "./page-filter.js";
 import { mergeNormalizeUrlOptions, normalizeAuditUrl } from "./url-normalize.js";
 import { eeatSignalsRule } from "./rules/content/eeat-signals.js";
 import { metaUniquenessRule } from "./rules/content/meta-uniqueness.js";
@@ -76,17 +77,6 @@ const DEFAULTS = {
     modularityMinSelfContainedRatio: 0.7,
     faqMinQuestionHeadings: 2
 };
-/**
- * v0.4 four-category weights. Audit is diagnostic-only (weight 0).
- * See 2026-04-29 v0.4 redesign spec §4.2.
- */
-const CATEGORY_WEIGHTS = {
-    integrity: 0.50, // spam + content + cannibal
-    discoverability: 0.20, // links + tech
-    citation: 0.25, // aeo + schema
-    data: 0.05, // data
-    audit: 0, // diagnostics, never weighted
-};
 /**
  * Maps the v0.3 ruleId namespace prefix to the v0.4 four-bucket category.
  * Used by `scoreFromFindings` to bucket findings without changing rule IDs.
@@ -102,6 +92,138 @@ const CATEGORY_MAP = {
     data: "data",
     audit: "audit",
 };
+const SCORING_PROFILES = {
+    "small-marketing": {
+        categoryWeights: { integrity: 0.30, discoverability: 0.40, citation: 0.20, data: 0.05, audit: 0 },
+        severityOverrides: {
+            "aeo/citable-facts": "info",
+            "aeo/answer-first": "info",
+            "aeo/summary-bait": "warning",
+            "spam/thin-content": "warning",
+        },
+        confidenceOverrides: {
+            "aeo/citable-facts": "low",
+            "aeo/answer-first": "low",
+            "aeo/summary-bait": "medium",
+            "spam/thin-content": "medium",
+        },
+    },
+    "blog": {
+        categoryWeights: { integrity: 0.40, discoverability: 0.25, citation: 0.30, data: 0.05, audit: 0 },
+        severityOverrides: {
+            "content/missing-author": "error",
+            "spam/thin-content": "error",
+        },
+        confidenceOverrides: {},
+    },
+    "programmatic-directory": {
+        categoryWeights: { integrity: 0.55, discoverability: 0.15, citation: 0.20, data: 0.10, audit: 0 },
+        severityOverrides: {},
+        confidenceOverrides: {},
+    },
+    "ecommerce": {
+        categoryWeights: { integrity: 0.20, discoverability: 0.40, citation: 0.15, data: 0.25, audit: 0 },
+        severityOverrides: {
+            "aeo/citable-facts": "info",
+            "schema/required-fields": "error",
+        },
+        confidenceOverrides: {
+            "aeo/citable-facts": "low",
+        },
+    },
+    "docs": {
+        categoryWeights: { integrity: 0.30, discoverability: 0.30, citation: 0.30, data: 0.10, audit: 0 },
+        severityOverrides: {
+            "aeo/citable-facts": "info",
+            "aeo/answer-first": "warning",
+            "content/missing-author": "info",
+        },
+        confidenceOverrides: {
+            "aeo/citable-facts": "low",
+            "aeo/answer-first": "low",
+            "content/missing-author": "low",
+        },
+    },
+    "unclear": {
+        categoryWeights: { integrity: 0.50, discoverability: 0.20, citation: 0.25, data: 0.05, audit: 0 },
+        severityOverrides: {},
+        confidenceOverrides: {},
+    },
+};
+/**
+ * Pick the scoring profile for a classification. Falls back to `unclear`
+ * (the conservative default) when classifier confidence is below 70%.
+ */
+function profileFor(classification) {
+    if (!classification || classification.confidence < 0.7)
+        return SCORING_PROFILES.unclear;
+    return SCORING_PROFILES[classification.type] ?? SCORING_PROFILES.unclear;
+}
+const RULE_IMPACTS = {
+    // SpamBrain — high baseline, count amplifies (cluster matters)
+    "spam/near-duplicate": { baseImpact: 25, perInstance: 5, maxImpact: 80 },
+    "spam/entity-swap": { baseImpact: 25, perInstance: 5, maxImpact: 80 },
+    "spam/doorway-pattern": { baseImpact: 30, perInstance: 0, maxImpact: 30 },
+    "spam/template-coverage": { baseImpact: 15, perInstance: 3, maxImpact: 60 },
+    "spam/template-diversity": { baseImpact: 12, perInstance: 3, maxImpact: 50 },
+    "spam/boilerplate-ratio": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
+    "spam/thin-content": { baseImpact: 8, perInstance: 2, maxImpact: 40 },
+    "spam/publication-velocity": { baseImpact: 8, perInstance: 2, maxImpact: 30 },
+    "cannibal/url-pattern": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
+    // Content
+    "content/unique-value": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
+    "content/meta-uniqueness": { baseImpact: 8, perInstance: 2, maxImpact: 40 },
+    "content/missing-author": { baseImpact: 4, perInstance: 1, maxImpact: 20 },
+    "content/eeat-signals": { baseImpact: 4, perInstance: 1, maxImpact: 20 },
+    // Tech — softened in v0.4.3-rc2 after dogfood showed nextjs.org regressing
+    // from ready→caution on tech/canonical-consistency × 4 (legit cross-domain
+    // canonicals on a CDN). Per-instance now 1 (was 3).
+    "tech/canonical-consistency": { baseImpact: 8, perInstance: 1, maxImpact: 25 },
+    "tech/canonical-noindex-conflict": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
+    "tech/robots-noindex-conflict": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
+    "tech/redirect-chain": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
+    "tech/sitemap-completeness": { baseImpact: 8, perInstance: 1, maxImpact: 30 },
+    "tech/robots-sitemap-presence": { baseImpact: 8, perInstance: 0, maxImpact: 8 },
+    "tech/soft-404": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
+    // hreflang — one bad declaration breaks all language pairs, so the COUNT
+    // doesn't compound. perInstance: 0 keeps it at the base impact regardless
+    // of how many language pairs are affected. Dogfood showed 350 findings on
+    // stripe.com from a single missing reciprocal pair — that should not be
+    // treated as 350× the impact.
+    "tech/hreflang-consistency": { baseImpact: 5, perInstance: 0, maxImpact: 5 },
+    // Links
+    "links/orphan-pages": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
+    "links/dead-ends": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
+    "links/cluster-connectivity": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
+    "links/link-depth": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
+    // AEO — much lower baselines than spam (AEO is opt-in optimization)
+    "aeo/citable-facts": { baseImpact: 2, perInstance: 1, maxImpact: 25 },
+    "aeo/answer-first": { baseImpact: 3, perInstance: 1, maxImpact: 25 },
+    "aeo/summary-bait": { baseImpact: 4, perInstance: 1, maxImpact: 25 },
+    "aeo/crawler-access": { baseImpact: 8, perInstance: 0, maxImpact: 8 },
+    "aeo/freshness-signals": { baseImpact: 2, perInstance: 1, maxImpact: 20 },
+    "aeo/llms-txt": { baseImpact: 4, perInstance: 0, maxImpact: 4 },
+    "aeo/faq-coverage": { baseImpact: 2, perInstance: 1, maxImpact: 15 },
+    "aeo/content-modularity": { baseImpact: 2, perInstance: 1, maxImpact: 15 },
+    // Schema
+    "schema/json-ld-valid": { baseImpact: 8, perInstance: 2, maxImpact: 35 },
+    "schema/required-fields": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
+    "schema/consistency": { baseImpact: 3, perInstance: 1, maxImpact: 15 },
+    // Data
+    "data/data-binding": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
+};
+const DEFAULT_RULE_IMPACT = { baseImpact: 5, perInstance: 1, maxImpact: 25 };
+/**
+ * v0.4.3 — confidence-based discount applied to each finding's impact.
+ * Low-confidence findings contribute less to the bucket so they don't
+ * inflate the verdict on site types where they false-positive.
+ */
+const CONFIDENCE_MULTIPLIER = {
+    high: 1.0,
+    medium: 0.6,
+    low: 0.3,
+    speculative: 0.1,
+};
 /** Slug map for `RuleResult.docsUrl`. Defaults to the rule-id segment after the `/`. */
 const RULE_DOCS_SLUG = {
 // intentionally empty for v0.4 — slug = ruleId.split("/").pop() works for every shipped rule
@@ -172,7 +294,16 @@ function resolveGroupRules(baseRules, overrides) {
     }
     return result;
 }
-function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, entityPatterns, overrides, mode = "full") {
+function runRulesOnPages(pages,
+/**
+ * Full set of parsed pages including those filtered out by `respectNoindex`
+ * / `skipDetectedAuth`. Defaults to `pages` for backwards compat. The two
+ * noindex-conflict rules (`tech/canonical-noindex-conflict`,
+ * `tech/robots-noindex-conflict`) read this list specifically — without it,
+ * `respectNoindex: true` would hide noindex'd pages from the very rules
+ * designed to flag accidental noindex'ing.
+ */
+noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, entityPatterns, overrides, mode = "full") {
     const findings = [];
     const modeOk = (ruleId) => mode !== "diff" || isRuleAllowedInDiff(ruleId);
     const tag = (results) => results.map((r) => {
@@ -245,10 +376,10 @@ function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls,
         findings.push(...tag(canonicalConsistencyRule(pages, knownUrls, normalizeUrlOptions)));
     }
     if (isEnabled("tech/canonical-noindex-conflict") && modeOk("tech/canonical-noindex-conflict")) {
-        findings.push(...tag(canonicalNoindexConflictRule(pages, normalizeUrlOptions)));
+        findings.push(...tag(canonicalNoindexConflictRule(noindexAwarePages, normalizeUrlOptions)));
     }
     if (isEnabled("tech/robots-noindex-conflict") && modeOk("tech/robots-noindex-conflict")) {
-        findings.push(...tag(robotsNoindexConflictRule(pages, inbound)));
+        findings.push(...tag(robotsNoindexConflictRule(noindexAwarePages, inbound)));
     }
     if (isEnabled("tech/redirect-chain") && modeOk("tech/redirect-chain")) {
         findings.push(...tag(redirectChainRule(pages)));
@@ -257,7 +388,9 @@ function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls,
         findings.push(...tag(soft404Rule(pages)));
     }
     if (isEnabled("tech/hreflang-consistency") && modeOk("tech/hreflang-consistency")) {
-        findings.push(...tag(hreflangConsistencyRule(pages, normalizeUrlOptions)));
+        // hreflang declarations on noindex'd pages are still bugs when they're
+        // inconsistent — see auditor.test.ts "emits technical SEO findings".
+        findings.push(...tag(hreflangConsistencyRule(noindexAwarePages, normalizeUrlOptions)));
     }
     // Schema rules
     if (isEnabled("schema/json-ld-valid") && modeOk("schema/json-ld-valid")) {
@@ -311,13 +444,47 @@ function runRulesOnPages(pages, resolvedRules, isEnabled, groupName, knownUrls,
 function hashHtml(html) {
     return createHash("sha256").update(html, "utf8").digest("hex");
 }
-const SEVERITY_WEIGHTS = {
-    critical: 40,
-    error: 25,
-    warning: 12,
-    info: 5,
-};
-function scoreFromFindings(findings) {
+/**
+ * v0.4.3 — apply per-site-type severity + confidence overrides BEFORE any
+ * bucketing happens, so blocker/shouldFix counts and category buckets all
+ * reflect the user-visible severity, not the rule's native severity.
+ *
+ * Returns a NEW array of findings (does not mutate the input). Only the
+ * `severity` and `confidence` fields are remapped; everything else is
+ * preserved by reference.
+ */
+export function applyScoringProfileOverrides(findings, classification) {
+    const profile = profileFor(classification);
+    const sevHas = Object.keys(profile.severityOverrides).length > 0;
+    const confHas = Object.keys(profile.confidenceOverrides).length > 0;
+    if (!sevHas && !confHas)
+        return findings;
+    return findings.map((f) => {
+        const newSev = profile.severityOverrides[f.ruleId];
+        const newConf = profile.confidenceOverrides[f.ruleId];
+        if (newSev === undefined && newConf === undefined)
+            return f;
+        return {
+            ...f,
+            ...(newSev !== undefined ? { severity: newSev } : {}),
+            ...(newConf !== undefined ? { confidence: newConf } : {}),
+        };
+    });
+}
+/**
+ * v0.4.3 — confidence-and-count-aware scoring. Replaces the v0.4 model that
+ * counted only severity. Each rule has a `baseImpact + (count - 1) *
+ * perInstance` contribution capped by `maxImpact`. The result is multiplied
+ * by the finding's `confidence` (default `high` → 1.0). Per-site-type
+ * profiles can remap a rule's severity / confidence; this function expects
+ * those overrides to ALREADY be applied to the input findings.
+ *
+ * Bucket math: per-rule impacts sum into the rule's `CATEGORY_MAP` bucket;
+ * each bucket is then capped at 100 and weighted by the active scoring
+ * profile's `categoryWeights`.
+ */
+function scoreFromFindings(findings, classification) {
+    const profile = profileFor(classification);
     // v0.4 four-bucket raw penalties.
     const bucketRaw = {
         integrity: 0,
@@ -336,18 +503,16 @@ function scoreFromFindings(findings) {
     let blockers = 0;
     let shouldFix = 0;
     let informational = 0;
+    // Group findings by ruleId so we can apply baseImpact + perInstance.
+    // Each group's weighted impact lands in its category bucket.
+    const groups = new Map();
     for (const finding of findings) {
         const namespace = finding.ruleId.split("/")[0];
         const bucket = CATEGORY_MAP[namespace];
         if (!bucket)
             continue;
-        const weight = SEVERITY_WEIGHTS[finding.severity];
-        // v0.4 buckets.
-        bucketRaw[bucket] = Math.min(100, bucketRaw[bucket] + weight);
-        if (bucket !== "audit") {
+        if (bucket !== "audit")
             bucketIssues[bucket] += 1;
-        }
-        // Issue-bucket counts (audit/* findings are diagnostic-only and excluded).
         if (bucket === "audit")
             continue;
         if (finding.severity === "critical" || finding.severity === "error")
@@ -356,11 +521,40 @@ function scoreFromFindings(findings) {
             shouldFix += 1;
         else
             informational += 1;
+        const arr = groups.get(finding.ruleId) ?? [];
+        arr.push(finding);
+        groups.set(finding.ruleId, arr);
     }
-    const weighted = bucketRaw.integrity * CATEGORY_WEIGHTS.integrity +
-        bucketRaw.discoverability * CATEGORY_WEIGHTS.discoverability +
-        bucketRaw.citation * CATEGORY_WEIGHTS.citation +
-        bucketRaw.data * CATEGORY_WEIGHTS.data;
+    for (const [ruleId, group] of groups) {
+        const namespace = ruleId.split("/")[0];
+        const bucket = CATEGORY_MAP[namespace];
+        if (!bucket || bucket === "audit")
+            continue;
+        const impactSpec = RULE_IMPACTS[ruleId] ?? DEFAULT_RULE_IMPACT;
+        const count = group.length;
+        const rawImpact = impactSpec.baseImpact + Math.max(0, count - 1) * impactSpec.perInstance;
+        const cap = impactSpec.maxImpact ?? Number.POSITIVE_INFINITY;
+        const cappedImpact = Math.min(cap, rawImpact);
+        // Confidence multiplier — use the WORST (highest-multiplier) confidence
+        // in the group so a rule that fires repeatedly with mixed confidence is
+        // not unfairly downweighted to its lowest-confidence instance.
+        let bestMultiplier = 0;
+        for (const f of group) {
+            const conf = f.confidence ?? "high";
+            const m = CONFIDENCE_MULTIPLIER[conf];
+            if (m > bestMultiplier)
+                bestMultiplier = m;
+        }
+        if (bestMultiplier === 0)
+            bestMultiplier = CONFIDENCE_MULTIPLIER.high;
+        const weighted = cappedImpact * bestMultiplier;
+        bucketRaw[bucket] = Math.min(100, bucketRaw[bucket] + weighted);
+    }
+    const cw = profile.categoryWeights;
+    const weighted = bucketRaw.integrity * cw.integrity +
+        bucketRaw.discoverability * cw.discoverability +
+        bucketRaw.citation * cw.citation +
+        bucketRaw.data * cw.data;
     const risk = Math.round(Math.min(100, weighted));
     const categories = {
         integrity: { grade: gradeForPenalty(bucketRaw.integrity), issues: bucketIssues.integrity },
@@ -944,6 +1138,11 @@ export async function auditSource(source, options) {
     const concurrency = options?.concurrency ?? preset.concurrency ?? 5;
     const timeoutMs = options?.timeout ?? 30000;
     const ignorePatterns = options?.ignore ?? [];
+    const respectNoindex = options?.respectNoindex ?? true;
+    const skipDetectedAuth = options?.skipDetectedAuth ?? false;
+    const skipBoilerplate = options?.skipBoilerplate ?? false;
+    const skipSearchPages = options?.skipSearchPages ?? false;
+    const skipEmptyBody = options?.skipEmptyBody ?? false;
     const sampleSize = options?.sampleSize ?? preset.sampleSize ?? 0;
     const externalSignal = options?.signal;
     const guardSsrf = options?.guardSsrf ?? preset.guardSsrf ?? false;
@@ -1140,13 +1339,33 @@ export async function auditSource(source, options) {
             })()
             : fisherYatesSample(filtered, sampleSize))
         : filtered;
-    const parsedPages = sampled.map((page) => {
+    const parsedPagesAll = sampled.map((page) => {
         const parsed = parseHtmlPage(page.html, page.url, { normalizeUrl: normalizeUrlOptions });
         if (page.httpMeta) {
             parsed.httpMeta = page.httpMeta;
         }
         return parsed;
     });
+    // v0.4.1 §page-filter: drop noindex'd pages and (when enabled) heuristically
+    // detected auth pages BEFORE rule evaluation. The site owner's noindex is a
+    // hard signal — they already opted out of SEO indexing, so auditing those
+    // URLs produces only noise. Auth detection is opt-in via skipDetectedAuth
+    // (off for the CLI by default; on for the hosted web form).
+    const skippedByPolicy = [];
+    const parsedPages = parsedPagesAll.filter((p) => {
+        const reason = pageSkipReason(p, {
+            respectNoindex,
+            skipDetectedAuth,
+            skipBoilerplate,
+            skipSearchPages,
+            skipEmptyBody,
+        });
+        if (reason) {
+            skippedByPolicy.push({ url: p.url, reason });
+            return false;
+        }
+        return true;
+    });
     const knownUrls = new Set(parsedPages.map((p) => p.url));
     const rootUrl = parsedPages.find((p) => /(^|[\\/])index\.html?$/i.test(p.url))?.url ?? parsedPages[0]?.url ?? "";
     const adjacency = new Map();
@@ -1267,10 +1486,13 @@ export async function auditSource(source, options) {
             continue;
         const groupRules = resolveGroupRules(resolvedRules, groupConfig?.overrides);
         const enabledCheck = (ruleId) => !suppressedRuleSet.has(ruleId) && isRuleEnabled(ruleId, groupConfig?.rules);
-        const findings = runRulesOnPages(groupPages, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full");
+        const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full");
         allFindings.push(...findings);
         groupPageCounts[groupName] = groupPages.length;
-        const { risk: groupRisk } = scoreFromFindings(findings);
+        // v0.4.3: per-group scoring uses the same site-classification profile so
+        // group-level risk numbers reflect the same severity / confidence remaps
+        // as the headline verdict.
+        const { risk: groupRisk } = scoreFromFindings(applyScoringProfileOverrides(findings, siteClassification), siteClassification);
         groupScores[groupName] = groupRisk;
     }
     throwIfAborted();
@@ -1280,7 +1502,13 @@ export async function auditSource(source, options) {
     });
     // Populate docsUrl on every finding before they leave the engine.
     withDocsUrls(enriched.findings);
-    const { risk, categories, bucketCounts } = scoreFromFindings(enriched.findings);
+    // v0.4.3: apply site-type-aware severity + confidence overrides so blocker
+    // counts, issue buckets, and category bucketing all reflect the user-visible
+    // severity (not the rule's native severity). The remapped findings replace
+    // the enrichment output so every downstream consumer (summary.issues, AI
+    // triage input, telemetry, formatters) sees the corrected severity.
+    enriched.findings = applyScoringProfileOverrides(enriched.findings, siteClassification);
+    const { risk, categories, bucketCounts } = scoreFromFindings(enriched.findings, siteClassification);
     const auditedPageCount = Object.values(groupPageCounts).reduce((a, b) => a + b, 0);
     const issues = bucketIssues(enriched.findings);
     const verdict = verdictForRisk(risk);
@@ -1317,22 +1545,68 @@ export async function auditSource(source, options) {
     if (cacheConfig) {
         summary.cacheStats = cacheStats;
     }
-    // v0.4 §4.5: warn when an `ignore` pattern matched zero discovered URLs.
+    // v0.4 §4.5 / v0.4.1: warn when ignore patterns matched zero discovered URLs.
+    //   - Per-pattern warning fires only when `warnUnmatchedIgnore` is true
+    //     (set by the CLI when `--ignore` was passed explicitly). Quiet by
+    //     default for config-loaded patterns where broad safety lists like
+    //     `**/dashboard/**` legitimately don't match small marketing sites.
+    //   - When ALL patterns matched zero (strongest typo signal, e.g. user
+    //     wrote `*.json` instead of `**/*.json`), emit a single consolidated
+    //     warning regardless of source.
     if (ignorePatterns.length > 0) {
-        for (const pattern of ignorePatterns) {
-            const matched = deduped.some((p) => globMatchPathname(pattern, p.url));
-            if (!matched) {
+        const unmatched = ignorePatterns.filter((pattern) => !deduped.some((p) => globMatchPathname(pattern, p.url)));
+        if (unmatched.length === ignorePatterns.length) {
+            // eslint-disable-next-line no-console
+            console.warn(`[pseolint] none of the ${ignorePatterns.length} ignore pattern${ignorePatterns.length === 1 ? "" : "s"} matched any URLs — check config or --ignore for typos`);
+        }
+        else if (options?.warnUnmatchedIgnore === true) {
+            for (const pattern of unmatched) {
                 // eslint-disable-next-line no-console
                 console.warn(`[pseolint] ignore pattern '${pattern}' matched 0 URLs — likely typo`);
             }
         }
     }
-    // Merge state-skipped (unchanged since last run) and robots-skipped (target
-    // robots.txt Disallow'd) URLs so callers have a single audit-skipped surface.
-    const allSkipped = [...skippedUrls, ...skippedByRobots];
+    // Merge state-skipped (unchanged since last run), robots-skipped (target
+    // robots.txt Disallow'd), and policy-skipped (noindex / detected-auth) URLs
+    // so callers have a single audit-skipped surface.
+    const allSkipped = [
+        ...skippedUrls,
+        ...skippedByRobots,
+        ...skippedByPolicy.map((s) => s.url),
+    ];
     if (allSkipped.length > 0) {
         summary.skippedUrls = allSkipped;
     }
+    // v0.4.1: surface noindex / auth skips as a discoverable diagnostic so the
+    // user sees what the engine excluded. Catches the accidental-noindex bug:
+    // pages silently dropped from indexing show up as a visible skip line
+    // instead of being absent without explanation.
+    if (skippedByPolicy.length > 0) {
+        const noindexCount = skippedByPolicy.filter((s) => s.reason === "noindex").length;
+        const authCount = skippedByPolicy.filter((s) => s.reason === "auth-detected").length;
+        const boilerplateCount = skippedByPolicy.filter((s) => s.reason === "boilerplate").length;
+        const searchCount = skippedByPolicy.filter((s) => s.reason === "search-result").length;
+        const spaShellCount = skippedByPolicy.filter((s) => s.reason === "spa-shell").length;
+        const sample = skippedByPolicy.slice(0, 5).map((s) => `${s.url} (${s.reason})`).join(", ");
+        const more = skippedByPolicy.length > 5 ? `, +${skippedByPolicy.length - 5} more` : "";
+        const parts = [];
+        if (noindexCount > 0)
+            parts.push(`${noindexCount} marked noindex`);
+        if (authCount > 0)
+            parts.push(`${authCount} detected as auth (login/register/etc)`);
+        if (boilerplateCount > 0)
+            parts.push(`${boilerplateCount} cookie/legal/consent boilerplate`);
+        if (searchCount > 0)
+            parts.push(`${searchCount} search-result page${searchCount === 1 ? "" : "s"}`);
+        if (spaShellCount > 0)
+            parts.push(`${spaShellCount} un-hydrated SPA shell${spaShellCount === 1 ? "" : "s"}`);
+        auditFindings.push({
+            ruleId: "audit/skipped-by-policy",
+            severity: "info",
+            message: `Skipped ${skippedByPolicy.length} page${skippedByPolicy.length === 1 ? "" : "s"} from rule evaluation — ${parts.join(", ")}. First few: ${sample}${more}.`,
+            relatedUrls: skippedByPolicy.map((s) => s.url),
+        });
+    }
     // Local flat view of every finding the engine produced, used internally for
     // state persistence, regression detection, AI triage input, and telemetry
     // counts. NOT exposed on the AuditSummary — consumers must use