npm - @pseolint/core - Versions diffs - 0.4.1 → 0.5.3 - Mend

@pseolint/core 0.4.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (321) hide show

package/README.md +264 -169
package/dist/ai/manifest/diff.d.ts +78 -0
package/dist/ai/manifest/diff.d.ts.map +1 -0
package/dist/ai/manifest/diff.js +139 -0
package/dist/ai/manifest/diff.js.map +1 -0
package/dist/ai/manifest/index.d.ts +18 -0
package/dist/ai/manifest/index.d.ts.map +1 -0
package/dist/ai/manifest/index.js +15 -0
package/dist/ai/manifest/index.js.map +1 -0
package/dist/ai/manifest/validate-manifest.d.ts +37 -0
package/dist/ai/manifest/validate-manifest.d.ts.map +1 -0
package/dist/ai/manifest/validate-manifest.js +67 -0
package/dist/ai/manifest/validate-manifest.js.map +1 -0
package/dist/ai/manifest/validators/domain-patches.d.ts +15 -0
package/dist/ai/manifest/validators/domain-patches.d.ts.map +1 -0
package/dist/ai/manifest/validators/domain-patches.js +110 -0
package/dist/ai/manifest/validators/domain-patches.js.map +1 -0
package/dist/ai/manifest/validators/index.d.ts +5 -0
package/dist/ai/manifest/validators/index.d.ts.map +1 -0
package/dist/ai/manifest/validators/index.js +4 -0
package/dist/ai/manifest/validators/index.js.map +1 -0
package/dist/ai/manifest/validators/page-changes.d.ts +36 -0
package/dist/ai/manifest/validators/page-changes.d.ts.map +1 -0
package/dist/ai/manifest/validators/page-changes.js +221 -0
package/dist/ai/manifest/validators/page-changes.js.map +1 -0
package/dist/ai/manifest/validators/types.d.ts +17 -0
package/dist/ai/manifest/validators/types.d.ts.map +1 -0
package/dist/ai/manifest/validators/types.js +5 -0
package/dist/ai/manifest/validators/types.js.map +1 -0
package/dist/ai/orchestrate.d.ts +74 -0
package/dist/ai/orchestrate.d.ts.map +1 -0
package/dist/ai/orchestrate.js +54 -0
package/dist/ai/orchestrate.js.map +1 -0
package/dist/ai/orchestrator/budget.d.ts +57 -0
package/dist/ai/orchestrator/budget.d.ts.map +1 -0
package/dist/ai/orchestrator/budget.js +114 -0
package/dist/ai/orchestrator/budget.js.map +1 -0
package/dist/ai/orchestrator/finish-tool.d.ts +568 -0
package/dist/ai/orchestrator/finish-tool.d.ts.map +1 -0
package/dist/ai/orchestrator/finish-tool.js +114 -0
package/dist/ai/orchestrator/finish-tool.js.map +1 -0
package/dist/ai/orchestrator/index.d.ts +25 -0
package/dist/ai/orchestrator/index.d.ts.map +1 -0
package/dist/ai/orchestrator/index.js +21 -0
package/dist/ai/orchestrator/index.js.map +1 -0
package/dist/ai/orchestrator/log.d.ts +24 -0
package/dist/ai/orchestrator/log.d.ts.map +1 -0
package/dist/ai/orchestrator/log.js +48 -0
package/dist/ai/orchestrator/log.js.map +1 -0
package/dist/ai/orchestrator/page-cache.d.ts +64 -0
package/dist/ai/orchestrator/page-cache.d.ts.map +1 -0
package/dist/ai/orchestrator/page-cache.js +127 -0
package/dist/ai/orchestrator/page-cache.js.map +1 -0
package/dist/ai/orchestrator/prompt.d.ts +16 -0
package/dist/ai/orchestrator/prompt.d.ts.map +1 -0
package/dist/ai/orchestrator/prompt.js +52 -0
package/dist/ai/orchestrator/prompt.js.map +1 -0
package/dist/ai/orchestrator/runner.d.ts +65 -0
package/dist/ai/orchestrator/runner.d.ts.map +1 -0
package/dist/ai/orchestrator/runner.js +223 -0
package/dist/ai/orchestrator/runner.js.map +1 -0
package/dist/ai/orchestrator/session.d.ts +44 -0
package/dist/ai/orchestrator/session.d.ts.map +1 -0
package/dist/ai/orchestrator/session.js +64 -0
package/dist/ai/orchestrator/session.js.map +1 -0
package/dist/ai/orchestrator/types.d.ts +99 -0
package/dist/ai/orchestrator/types.d.ts.map +1 -0
package/dist/ai/orchestrator/types.js +8 -0
package/dist/ai/orchestrator/types.js.map +1 -0
package/dist/ai/probes/cache.d.ts +12 -0
package/dist/ai/probes/cache.d.ts.map +1 -0
package/dist/ai/probes/cache.js +46 -0
package/dist/ai/probes/cache.js.map +1 -0
package/dist/ai/tools/ask-ai-engine.d.ts +77 -0
package/dist/ai/tools/ask-ai-engine.d.ts.map +1 -0
package/dist/ai/tools/ask-ai-engine.js +253 -0
package/dist/ai/tools/ask-ai-engine.js.map +1 -0
package/dist/ai/tools/check-domain-crawler-access.d.ts +71 -0
package/dist/ai/tools/check-domain-crawler-access.d.ts.map +1 -0
package/dist/ai/tools/check-domain-crawler-access.js +76 -0
package/dist/ai/tools/check-domain-crawler-access.js.map +1 -0
package/dist/ai/tools/check-domain-llms-txt.d.ts +70 -0
package/dist/ai/tools/check-domain-llms-txt.d.ts.map +1 -0
package/dist/ai/tools/check-domain-llms-txt.js +75 -0
package/dist/ai/tools/check-domain-llms-txt.js.map +1 -0
package/dist/ai/tools/check-indexability.d.ts +58 -0
package/dist/ai/tools/check-indexability.d.ts.map +1 -0
package/dist/ai/tools/check-indexability.js +64 -0
package/dist/ai/tools/check-indexability.js.map +1 -0
package/dist/ai/tools/check-robots.d.ts +68 -0
package/dist/ai/tools/check-robots.d.ts.map +1 -0
package/dist/ai/tools/check-robots.js +90 -0
package/dist/ai/tools/check-robots.js.map +1 -0
package/dist/ai/tools/check-rule-answer-first.d.ts +54 -0
package/dist/ai/tools/check-rule-answer-first.d.ts.map +1 -0
package/dist/ai/tools/check-rule-answer-first.js +50 -0
package/dist/ai/tools/check-rule-answer-first.js.map +1 -0
package/dist/ai/tools/check-rule-canonical-consistency.d.ts +66 -0
package/dist/ai/tools/check-rule-canonical-consistency.d.ts.map +1 -0
package/dist/ai/tools/check-rule-canonical-consistency.js +51 -0
package/dist/ai/tools/check-rule-canonical-consistency.js.map +1 -0
package/dist/ai/tools/check-rule-citable-facts.d.ts +58 -0
package/dist/ai/tools/check-rule-citable-facts.d.ts.map +1 -0
package/dist/ai/tools/check-rule-citable-facts.js +41 -0
package/dist/ai/tools/check-rule-citable-facts.js.map +1 -0
package/dist/ai/tools/check-rule-content-modularity.d.ts +58 -0
package/dist/ai/tools/check-rule-content-modularity.d.ts.map +1 -0
package/dist/ai/tools/check-rule-content-modularity.js +45 -0
package/dist/ai/tools/check-rule-content-modularity.js.map +1 -0
package/dist/ai/tools/check-rule-faq-coverage.d.ts +54 -0
package/dist/ai/tools/check-rule-faq-coverage.d.ts.map +1 -0
package/dist/ai/tools/check-rule-faq-coverage.js +39 -0
package/dist/ai/tools/check-rule-faq-coverage.js.map +1 -0
package/dist/ai/tools/check-rule-freshness-signals.d.ts +54 -0
package/dist/ai/tools/check-rule-freshness-signals.d.ts.map +1 -0
package/dist/ai/tools/check-rule-freshness-signals.js +45 -0
package/dist/ai/tools/check-rule-freshness-signals.js.map +1 -0
package/dist/ai/tools/check-rule-json-ld-valid.d.ts +54 -0
package/dist/ai/tools/check-rule-json-ld-valid.d.ts.map +1 -0
package/dist/ai/tools/check-rule-json-ld-valid.js +44 -0
package/dist/ai/tools/check-rule-json-ld-valid.js.map +1 -0
package/dist/ai/tools/check-rule-missing-author.d.ts +54 -0
package/dist/ai/tools/check-rule-missing-author.d.ts.map +1 -0
package/dist/ai/tools/check-rule-missing-author.js +45 -0
package/dist/ai/tools/check-rule-missing-author.js.map +1 -0
package/dist/ai/tools/check-rule-near-duplicate.d.ts +82 -0
package/dist/ai/tools/check-rule-near-duplicate.d.ts.map +1 -0
package/dist/ai/tools/check-rule-near-duplicate.js +63 -0
package/dist/ai/tools/check-rule-near-duplicate.js.map +1 -0
package/dist/ai/tools/check-rule-required-fields.d.ts +50 -0
package/dist/ai/tools/check-rule-required-fields.d.ts.map +1 -0
package/dist/ai/tools/check-rule-required-fields.js +38 -0
package/dist/ai/tools/check-rule-required-fields.js.map +1 -0
package/dist/ai/tools/check-rule-schema-consistency.d.ts +54 -0
package/dist/ai/tools/check-rule-schema-consistency.d.ts.map +1 -0
package/dist/ai/tools/check-rule-schema-consistency.js +44 -0
package/dist/ai/tools/check-rule-schema-consistency.js.map +1 -0
package/dist/ai/tools/check-rule-summary-bait.d.ts +54 -0
package/dist/ai/tools/check-rule-summary-bait.d.ts.map +1 -0
package/dist/ai/tools/check-rule-summary-bait.js +39 -0
package/dist/ai/tools/check-rule-summary-bait.js.map +1 -0
package/dist/ai/tools/check-rule-thin-content.d.ts +66 -0
package/dist/ai/tools/check-rule-thin-content.d.ts.map +1 -0
package/dist/ai/tools/check-rule-thin-content.js +58 -0
package/dist/ai/tools/check-rule-thin-content.js.map +1 -0
package/dist/ai/tools/detect-templates.d.ts +60 -0
package/dist/ai/tools/detect-templates.d.ts.map +1 -0
package/dist/ai/tools/detect-templates.js +43 -0
package/dist/ai/tools/detect-templates.js.map +1 -0
package/dist/ai/tools/fetch-page.d.ts +70 -0
package/dist/ai/tools/fetch-page.d.ts.map +1 -0
package/dist/ai/tools/fetch-page.js +93 -0
package/dist/ai/tools/fetch-page.js.map +1 -0
package/dist/ai/tools/fetch-sitemap.d.ts +60 -0
package/dist/ai/tools/fetch-sitemap.d.ts.map +1 -0
package/dist/ai/tools/fetch-sitemap.js +116 -0
package/dist/ai/tools/fetch-sitemap.js.map +1 -0
package/dist/ai/tools/index.d.ts +1555 -0
package/dist/ai/tools/index.d.ts.map +1 -0
package/dist/ai/tools/index.js +119 -0
package/dist/ai/tools/index.js.map +1 -0
package/dist/ai/tools/parse-page.d.ts +94 -0
package/dist/ai/tools/parse-page.d.ts.map +1 -0
package/dist/ai/tools/parse-page.js +108 -0
package/dist/ai/tools/parse-page.js.map +1 -0
package/dist/ai/tools/query-serp.d.ts +113 -0
package/dist/ai/tools/query-serp.d.ts.map +1 -0
package/dist/ai/tools/query-serp.js +131 -0
package/dist/ai/tools/query-serp.js.map +1 -0
package/dist/ai/tools/sample-template.d.ts +67 -0
package/dist/ai/tools/sample-template.d.ts.map +1 -0
package/dist/ai/tools/sample-template.js +75 -0
package/dist/ai/tools/sample-template.js.map +1 -0
package/dist/ai/tools/types.d.ts +73 -0
package/dist/ai/tools/types.d.ts.map +1 -0
package/dist/ai/tools/types.js +64 -0
package/dist/ai/tools/types.js.map +1 -0
package/dist/ai/tools/validate-jsonld.d.ts +62 -0
package/dist/ai/tools/validate-jsonld.d.ts.map +1 -0
package/dist/ai/tools/validate-jsonld.js +84 -0
package/dist/ai/tools/validate-jsonld.js.map +1 -0
package/dist/auditor.d.ts +16 -1
package/dist/auditor.d.ts.map +1 -1
package/dist/auditor.js +862 -88
package/dist/auditor.js.map +1 -1
package/dist/backpressure.d.ts.map +1 -1
package/dist/backpressure.js +10 -3
package/dist/backpressure.js.map +1 -1
package/dist/enrich-findings.d.ts.map +1 -1
package/dist/enrich-findings.js +15 -1
package/dist/enrich-findings.js.map +1 -1
package/dist/formatters/bucket-findings.d.ts +43 -0
package/dist/formatters/bucket-findings.d.ts.map +1 -0
package/dist/formatters/bucket-findings.js +110 -0
package/dist/formatters/bucket-findings.js.map +1 -0
package/dist/formatters/console.d.ts.map +1 -1
package/dist/formatters/console.js +116 -34
package/dist/formatters/console.js.map +1 -1
package/dist/formatters/fixplan.d.ts +13 -0
package/dist/formatters/fixplan.d.ts.map +1 -0
package/dist/formatters/fixplan.js +328 -0
package/dist/formatters/fixplan.js.map +1 -0
package/dist/formatters/html.d.ts.map +1 -1
package/dist/formatters/html.js +27 -0
package/dist/formatters/html.js.map +1 -1
package/dist/formatters/index.d.ts +2 -0
package/dist/formatters/index.d.ts.map +1 -1
package/dist/formatters/index.js +1 -0
package/dist/formatters/index.js.map +1 -1
package/dist/formatters/markdown.d.ts.map +1 -1
package/dist/formatters/markdown.js +97 -9
package/dist/formatters/markdown.js.map +1 -1
package/dist/index.d.ts +12 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +8 -0
package/dist/index.js.map +1 -1
package/dist/page-filter.d.ts +64 -6
package/dist/page-filter.d.ts.map +1 -1
package/dist/page-filter.js +124 -3
package/dist/page-filter.js.map +1 -1
package/dist/rule-references.d.ts.map +1 -1
package/dist/rule-references.js +5 -0
package/dist/rule-references.js.map +1 -1
package/dist/rules/aeo/answer-first.d.ts.map +1 -1
package/dist/rules/aeo/answer-first.js +17 -3
package/dist/rules/aeo/answer-first.js.map +1 -1
package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
package/dist/rules/aeo/citable-facts.js +12 -1
package/dist/rules/aeo/citable-facts.js.map +1 -1
package/dist/rules/aeo/content-modularity.d.ts.map +1 -1
package/dist/rules/aeo/content-modularity.js +3 -0
package/dist/rules/aeo/content-modularity.js.map +1 -1
package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
package/dist/rules/aeo/crawler-access.js +6 -0
package/dist/rules/aeo/crawler-access.js.map +1 -1
package/dist/rules/aeo/faq-coverage.d.ts.map +1 -1
package/dist/rules/aeo/faq-coverage.js +4 -0
package/dist/rules/aeo/faq-coverage.js.map +1 -1
package/dist/rules/aeo/freshness-signals.d.ts.map +1 -1
package/dist/rules/aeo/freshness-signals.js +9 -2
package/dist/rules/aeo/freshness-signals.js.map +1 -1
package/dist/rules/aeo/llms-txt.d.ts.map +1 -1
package/dist/rules/aeo/llms-txt.js +6 -1
package/dist/rules/aeo/llms-txt.js.map +1 -1
package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
package/dist/rules/aeo/summary-bait.js +5 -2
package/dist/rules/aeo/summary-bait.js.map +1 -1
package/dist/rules/content/heading-structure.d.ts +21 -0
package/dist/rules/content/heading-structure.d.ts.map +1 -0
package/dist/rules/content/heading-structure.js +56 -0
package/dist/rules/content/heading-structure.js.map +1 -0
package/dist/rules/content/image-alt-text.d.ts +18 -0
package/dist/rules/content/image-alt-text.d.ts.map +1 -0
package/dist/rules/content/image-alt-text.js +77 -0
package/dist/rules/content/image-alt-text.js.map +1 -0
package/dist/rules/content/missing-author.d.ts.map +1 -1
package/dist/rules/content/missing-author.js +10 -2
package/dist/rules/content/missing-author.js.map +1 -1
package/dist/rules/content/title-uniqueness.d.ts +18 -0
package/dist/rules/content/title-uniqueness.d.ts.map +1 -0
package/dist/rules/content/title-uniqueness.js +70 -0
package/dist/rules/content/title-uniqueness.js.map +1 -0
package/dist/rules/links/host-section-divergence.d.ts +3 -0
package/dist/rules/links/host-section-divergence.d.ts.map +1 -0
package/dist/rules/links/host-section-divergence.js +158 -0
package/dist/rules/links/host-section-divergence.js.map +1 -0
package/dist/rules/links/link-depth.d.ts +12 -1
package/dist/rules/links/link-depth.d.ts.map +1 -1
package/dist/rules/links/link-depth.js +25 -12
package/dist/rules/links/link-depth.js.map +1 -1
package/dist/rules/scope.d.ts.map +1 -1
package/dist/rules/scope.js +5 -0
package/dist/rules/scope.js.map +1 -1
package/dist/rules/spam/doorway-pattern.d.ts.map +1 -1
package/dist/rules/spam/doorway-pattern.js +27 -4
package/dist/rules/spam/doorway-pattern.js.map +1 -1
package/dist/rules/spam/publication-velocity.d.ts +1 -1
package/dist/rules/spam/publication-velocity.d.ts.map +1 -1
package/dist/rules/spam/publication-velocity.js +9 -4
package/dist/rules/spam/publication-velocity.js.map +1 -1
package/dist/rules/spam/template-coverage.js +1 -1
package/dist/rules/spam/template-coverage.js.map +1 -1
package/dist/rules/spam/template-diversity.js +1 -1
package/dist/rules/spam/template-diversity.js.map +1 -1
package/dist/rules/spam/thin-content.d.ts.map +1 -1
package/dist/rules/spam/thin-content.js +9 -1
package/dist/rules/spam/thin-content.js.map +1 -1
package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -1
package/dist/rules/tech/hreflang-consistency.js +33 -4
package/dist/rules/tech/hreflang-consistency.js.map +1 -1
package/dist/rules/tech/og-completeness.d.ts +11 -0
package/dist/rules/tech/og-completeness.d.ts.map +1 -1
package/dist/rules/tech/og-completeness.js +22 -23
package/dist/rules/tech/og-completeness.js.map +1 -1
package/dist/ruleset-version.d.ts +8 -0
package/dist/ruleset-version.d.ts.map +1 -0
package/dist/ruleset-version.js +8 -0
package/dist/ruleset-version.js.map +1 -0
package/dist/scrape-strategy.d.ts +42 -0
package/dist/scrape-strategy.d.ts.map +1 -0
package/dist/scrape-strategy.js +101 -0
package/dist/scrape-strategy.js.map +1 -0
package/dist/site-classifier.d.ts +1 -1
package/dist/site-classifier.d.ts.map +1 -1
package/dist/site-classifier.js +217 -0
package/dist/site-classifier.js.map +1 -1
package/dist/state.d.ts +36 -1
package/dist/state.d.ts.map +1 -1
package/dist/state.js +3 -1
package/dist/state.js.map +1 -1
package/dist/stratified-sample.d.ts +9 -1
package/dist/stratified-sample.d.ts.map +1 -1
package/dist/stratified-sample.js +23 -6
package/dist/stratified-sample.js.map +1 -1
package/dist/types.d.ts +179 -2
package/dist/types.d.ts.map +1 -1
package/dist/types.js.map +1 -1
package/dist/url-normalize.d.ts.map +1 -1
package/dist/url-normalize.js +13 -1
package/dist/url-normalize.js.map +1 -1
package/package.json +90 -90

package/dist/auditor.js CHANGED Viewed

@@ -18,7 +18,12 @@ import { thinContentRule } from "./rules/spam/thin-content.js";
 import { deadEndsRule } from "./rules/links/dead-ends.js";
 import { linkDepthRule } from "./rules/links/link-depth.js";
 import { clusterConnectivityRule } from "./rules/links/cluster-connectivity.js";
+import { hostSectionDivergenceRule } from "./rules/links/host-section-divergence.js";
 import { orphanPagesRule } from "./rules/links/orphan-pages.js";
+import { ogCompletenessRule } from "./rules/tech/og-completeness.js";
+import { titleUniquenessRule } from "./rules/content/title-uniqueness.js";
+import { headingStructureRule } from "./rules/content/heading-structure.js";
+import { imageAltTextRule } from "./rules/content/image-alt-text.js";
 import { canonicalConsistencyRule } from "./rules/tech/canonical-consistency.js";
 import { canonicalNoindexConflictRule } from "./rules/tech/canonical-noindex-conflict.js";
 import { hreflangConsistencyRule } from "./rules/tech/hreflang-consistency.js";
@@ -55,14 +60,17 @@ import { SSRFError, validateTargetHost } from "./ssrf-guard.js";
 import { SAFE_MODE_PRESETS, resolveSafeModeKey } from "./safe-mode-preset.js";
 import { FetchObserver, computeReadiness, detectDevServer } from "./fetch-observer.js";
 import { BackpressureMonitor, OriginDegradedError } from "./backpressure.js";
-import { stratifiedSample } from "./stratified-sample.js";
+import { stratifiedSample, mulberry32 } from "./stratified-sample.js";
 import { classifySite } from "./site-classifier.js";
 import { readState, writeState, computeContentHash, STATE_SCHEMA_VERSION, } from "./state.js";
+import { CORE_RULESET_VERSION } from "./ruleset-version.js";
+import { planScrapeStrategy, DEFAULT_AGE_FLOOR_DAYS } from "./scrape-strategy.js";
 const DEFAULTS = {
     nearDuplicateThreshold: 0.85,
     entitySwapThreshold: 0.95,
     thinContentMinWords: 300,
     publicationVelocityMaxPerDay: 100,
+    publicationVelocityMaxPerDayCorpusFraction: 0.10,
     boilerplateMaxRatio: 0.7,
     templateDiversityMinUniqueRatio: 0.35,
     uniqueValueMinWords: 100,
@@ -77,17 +85,6 @@ const DEFAULTS = {
     modularityMinSelfContainedRatio: 0.7,
     faqMinQuestionHeadings: 2
 };
-/**
- * v0.4 four-category weights. Audit is diagnostic-only (weight 0).
- * See 2026-04-29 v0.4 redesign spec §4.2.
- */
-const CATEGORY_WEIGHTS = {
-    integrity: 0.50, // spam + content + cannibal
-    discoverability: 0.20, // links + tech
-    citation: 0.25, // aeo + schema
-    data: 0.05, // data
-    audit: 0, // diagnostics, never weighted
-};
 /**
  * Maps the v0.3 ruleId namespace prefix to the v0.4 four-bucket category.
  * Used by `scoreFromFindings` to bucket findings without changing rule IDs.
@@ -103,6 +100,331 @@ const CATEGORY_MAP = {
     data: "data",
     audit: "audit",
 };
+const SCORING_PROFILES = {
+    "small-marketing": {
+        categoryWeights: { integrity: 0.30, discoverability: 0.40, citation: 0.20, data: 0.05, audit: 0 },
+        severityOverrides: {
+            "aeo/citable-facts": "info",
+            "aeo/answer-first": "info",
+            "aeo/summary-bait": "warning",
+            // 2026-05-03 calibration round 5: Segment integrations had 24 thin
+            // pages (200-300 words is correct for a catalog record). thin-content
+            // contributing capped 40 impact pushed integrity to its 100 cap → 30
+            // contribution at small-marketing weight, which alone tripped
+            // 'concerning'. Demoting to info keeps the signal visible without
+            // tanking the verdict on catalog-shape sites mis-classified as
+            // small-marketing. Real marketing sites (linear.app etc) don't
+            // normally have many sub-300-word pages so this won't hide quality
+            // issues there.
+            "spam/thin-content": "info",
+            "aeo/freshness-signals": "info",
+            "content/missing-author": "info",
+            // 2026-05-03 calibration round 3: Segment integrations classified as
+            // small-marketing@0.88 and tripped doorway-pattern 300× critical
+            // (catalog records are thin + entity-swap by design — not actually a
+            // doorway funnel). The classifier mistakes catalog directories as
+            // small-marketing; this demotion absorbs that mis-classification
+            // without weakening detection on actual small-marketing sites
+            // (linear.app, supabase.com — none of which produce entity-swap pairs).
+            "spam/doorway-pattern": "warning",
+            // 2026-05-03 calibration round 4: spam/boilerplate-ratio fired ERROR
+            // on Segment's integration directory (24 pages, 60%+ shared template
+            // chrome). On a marketing-template site the rule is correct — repeated
+            // "About us" / "Pricing" copy across pages IS a quality issue. On a
+            // catalog mis-classified to small-marketing, the shared chrome IS the
+            // template — by design. Demote to warning here; real marketing sites
+            // (linear.app, supabase.com) won't trip it because their corpus is
+            // page-diverse, but catalog-shape pages classified as small-marketing
+            // (Segment, Wise) won't tank the verdict.
+            "spam/boilerplate-ratio": "warning",
+            // 2026-05-03 v0.5.2 round 10: og-completeness, heading-structure,
+            // image-alt-text were added as new rules and tipped Segment from
+            // concerning → critical because catalog/template-driven sites
+            // commonly have shared OG defaults, weird H1 patterns (multiple H1s
+            // for repeated nav cards), and unlabelled logo grids. These are
+            // real findings on isolated sites but typical for catalog shape;
+            // demote to info here so the signal stays visible without driving
+            // the verdict.
+            "tech/og-completeness": "info",
+            "content/heading-structure": "info",
+            "content/image-alt-text": "info",
+        },
+        confidenceOverrides: {
+            "aeo/citable-facts": "low",
+            "aeo/answer-first": "low",
+            "aeo/summary-bait": "medium",
+            "spam/thin-content": "low",
+            "aeo/freshness-signals": "low",
+            "content/missing-author": "low",
+            "spam/doorway-pattern": "medium",
+            "spam/boilerplate-ratio": "medium",
+            "tech/og-completeness": "low",
+            "content/heading-structure": "low",
+            "content/image-alt-text": "low",
+        },
+    },
+    "blog": {
+        categoryWeights: { integrity: 0.40, discoverability: 0.25, citation: 0.30, data: 0.05, audit: 0 },
+        severityOverrides: {
+            "content/missing-author": "error",
+            "spam/thin-content": "error",
+        },
+        confidenceOverrides: {},
+    },
+    "programmatic-directory": {
+        categoryWeights: { integrity: 0.55, discoverability: 0.15, citation: 0.20, data: 0.10, audit: 0 },
+        // Symmetry argument: every other profile has severity overrides for the
+        // rules that mis-fit its shape (`docs` demotes AEO + author rules,
+        // `ecommerce` demotes `aeo/citable-facts`, `small-marketing` demotes 4
+        // rules). `programmatic-directory` is the site type *most* structurally
+        // different from the "page = article" assumptions the AEO and EEAT rules
+        // are calibrated against — yet was the only profile with no overrides.
+        //
+        // Pre-calibration adjustment: demote (never escalate) the rules that
+        // first-principles analysis predicts will false-positive on catalog-
+        // shaped sites (Zapier integrations, G2 categories, Wise currency pairs,
+        // etc.). A reputable-pSEO calibration corpus + runner has been added
+        // (scripts/calibration-reputable-pseo.ts); these overrides will be
+        // tightened or loosened based on actual fire-rates measured against
+        // sites that demonstrably win in production. See
+        // docs/superpowers/specs/2026-05-03-calibration-against-reputable-pseo.md.
+        severityOverrides: {
+            // Catalog pages are tables, not prose. AEO rules calibrated on
+            // editorial content over-fire here.
+            "aeo/citable-facts": "info",
+            "aeo/answer-first": "info",
+            "aeo/content-modularity": "info",
+            // 2026-05-03 calibration: freshness-signals fired on every page of
+            // every reputable pSEO site. Catalog freshness is expressed via the
+            // data (live currency rates, current job listings, current pricing),
+            // not via visible "last updated" stamps. Demote.
+            "aeo/freshness-signals": "info",
+            // Authorship lives at the platform level (operator's about page),
+            // not on every catalog record. Following the rule's "add a byline"
+            // fix on a Zillow listing would actively make the page worse.
+            "content/missing-author": "info",
+            "content/eeat-signals": "info",
+            // Template uniformity is correct for catalogs by design. Keep the
+            // signal but cap at warning — never error.
+            "spam/template-diversity": "warning",
+            // 2026-05-03 v0.5.2 round 10: same catalog logic as small-marketing.
+            "tech/og-completeness": "info",
+            "content/heading-structure": "info",
+            "content/image-alt-text": "info",
+            // 2026-05-03 calibration round 2: catalogs are near-duplicate by
+            // design. spam/near-duplicate fires CRITICAL on every catalog pair.
+            // Demote to warning — keeps the signal visible without dominating
+            // the score.
+            "spam/near-duplicate": "warning",
+            // 2026-05-03 calibration round 5: catalog records are by-design
+            // shorter than the 300-word default. Demote to info on programmatic-
+            // directory; the data IS the content.
+            "spam/thin-content": "info",
+            // 2026-05-03 calibration round 2: doorway-pattern fires CRITICAL on
+            // every (thin + entity-swap) pair. On Segment integrations, integration
+            // pages are thin (200-300 words is the right amount for a directory
+            // record) and entity-swap (slack/google-sheets, slack/airtable, …) by
+            // design. The composite signal is genuinely true but the *intent*
+            // (doorway funnel) doesn't match the reality (catalog record).
+            // Demoting to warning preserves the signal without tanking the score.
+            "spam/doorway-pattern": "warning",
+            // 2026-05-03 calibration round 4: catalog pages share template chrome
+            // by design — same as `spam/template-diversity`, this signal is
+            // structurally true on programmatic-directories.
+            "spam/boilerplate-ratio": "warning",
+        },
+        confidenceOverrides: {
+            "aeo/citable-facts": "low",
+            "aeo/answer-first": "low",
+            "aeo/content-modularity": "low",
+            "aeo/freshness-signals": "low",
+            "content/missing-author": "low",
+            "content/eeat-signals": "low",
+            "spam/template-diversity": "medium",
+            "spam/near-duplicate": "medium",
+            "spam/doorway-pattern": "medium",
+            "spam/boilerplate-ratio": "medium",
+            "spam/thin-content": "low",
+            "tech/og-completeness": "low",
+            "content/heading-structure": "low",
+            "content/image-alt-text": "low",
+        },
+    },
+    "ecommerce": {
+        categoryWeights: { integrity: 0.20, discoverability: 0.40, citation: 0.15, data: 0.25, audit: 0 },
+        severityOverrides: {
+            "aeo/citable-facts": "info",
+            "schema/required-fields": "error",
+        },
+        confidenceOverrides: {
+            "aeo/citable-facts": "low",
+        },
+    },
+    "docs": {
+        categoryWeights: { integrity: 0.30, discoverability: 0.30, citation: 0.30, data: 0.10, audit: 0 },
+        severityOverrides: {
+            "aeo/citable-facts": "info",
+            "aeo/answer-first": "warning",
+            "content/missing-author": "info",
+        },
+        confidenceOverrides: {
+            "aeo/citable-facts": "low",
+            "aeo/answer-first": "low",
+            "content/missing-author": "low",
+        },
+    },
+    "unclear": {
+        categoryWeights: { integrity: 0.50, discoverability: 0.20, citation: 0.25, data: 0.05, audit: 0 },
+        // 2026-05-03 calibration round 2: the original "stay strict when unsure"
+        // intent meant that 4 of 5 reputable pSEO sites that classified as
+        // unclear (Zapier integrations, Typeform templates, Jasper templates,
+        // Numbeo cost-of-living) failed their verdict ceiling. The dominant
+        // driver was always `aeo/citable-facts` at full error severity — but
+        // catalog/template-gallery pages don't have prose, so the rule fires
+        // for a STRUCTURAL reason (page is a table, not a paragraph), not a
+        // QUALITY reason. Demoting the structurally-incompatible rules to
+        // info on `unclear` is conservative:
+        //   - if site is genuinely editorial and got mis-classified, signals
+        //     still surface (just info, not error) — author can act on them.
+        //   - if site is catalog and got mis-classified to unclear, verdict
+        //     no longer falsely tanks.
+        // Real spam signals (near-dup, doorway, thin) keep their severity.
+        severityOverrides: {
+            "aeo/citable-facts": "info",
+            "aeo/answer-first": "info",
+            "aeo/content-modularity": "info",
+            "aeo/freshness-signals": "info",
+            "content/missing-author": "info",
+            "content/eeat-signals": "info",
+            // 2026-05-03 calibration round 3: Airbyte classified as unclear@0.5
+            // and scored concerning despite all info-severity findings in the
+            // top 5. The 8 critical "blockers" came from spam/near-duplicate,
+            // spam/entity-swap, spam/doorway-pattern firing 1-2× each on its
+            // connectors directory — invisible per-rule but cumulatively pushing
+            // the score over 'caution'. On unclear sites we cannot tell whether
+            // these triple-fires represent a real doorway or a catalog; the
+            // calibration corpus shows reputable catalogs hitting them more
+            // often than real doorways do. Demote to warning — keeps the signal
+            // visible (it appears in shouldFix bucket, with full message) without
+            // tanking the verdict on a structurally-ambiguous site.
+            "spam/near-duplicate": "warning",
+            "spam/entity-swap": "warning",
+            "spam/doorway-pattern": "warning",
+            // 2026-05-03 calibration round 4: same boilerplate logic on unclear —
+            // we can't tell whether the site is a marketing site (boilerplate IS
+            // a quality issue) or a catalog (it isn't), so demote conservatively.
+            "spam/boilerplate-ratio": "warning",
+            // 2026-05-03 calibration round 5: same thin-content logic on unclear.
+            // Catalog-shape sites that classify as unclear (Zapier, Typeform,
+            // Jasper) had thin-content firing at error on the 5-15% of pages
+            // shorter than the 300-word default. Demote to info — surfaces the
+            // signal without driving the verdict on a structurally-ambiguous site.
+            "spam/thin-content": "info",
+            // 2026-05-03 v0.5.2 round 10: same demotions as programmatic-
+            // directory profile — these tipped Webflow/Zapier/Numbeo/Airbyte
+            // back into concerning territory because they classify as unclear
+            // and the new rules aren't yet calibrated for catalog shape.
+            "tech/og-completeness": "info",
+            "content/heading-structure": "info",
+            "content/image-alt-text": "info",
+        },
+        confidenceOverrides: {
+            "aeo/citable-facts": "low",
+            "aeo/answer-first": "low",
+            "aeo/content-modularity": "low",
+            "aeo/freshness-signals": "low",
+            "content/missing-author": "low",
+            "content/eeat-signals": "low",
+            "spam/near-duplicate": "medium",
+            "spam/entity-swap": "medium",
+            "spam/doorway-pattern": "medium",
+            "spam/boilerplate-ratio": "medium",
+            "spam/thin-content": "low",
+            "tech/og-completeness": "low",
+            "content/heading-structure": "low",
+            "content/image-alt-text": "low",
+        },
+    },
+};
+/**
+ * Pick the scoring profile for a classification. Falls back to `unclear`
+ * (the conservative default) when classifier confidence is below 70%.
+ */
+function profileFor(classification) {
+    if (!classification || classification.confidence < 0.7)
+        return SCORING_PROFILES.unclear;
+    return SCORING_PROFILES[classification.type] ?? SCORING_PROFILES.unclear;
+}
+const RULE_IMPACTS = {
+    // SpamBrain — high baseline, count amplifies (cluster matters)
+    "spam/near-duplicate": { baseImpact: 25, perInstance: 5, maxImpact: 80 },
+    "spam/entity-swap": { baseImpact: 25, perInstance: 5, maxImpact: 80 },
+    "spam/doorway-pattern": { baseImpact: 30, perInstance: 0, maxImpact: 30 },
+    "spam/template-coverage": { baseImpact: 15, perInstance: 3, maxImpact: 60 },
+    "spam/template-diversity": { baseImpact: 12, perInstance: 3, maxImpact: 50 },
+    "spam/boilerplate-ratio": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
+    "spam/thin-content": { baseImpact: 8, perInstance: 2, maxImpact: 40 },
+    "spam/publication-velocity": { baseImpact: 8, perInstance: 2, maxImpact: 30 },
+    "cannibal/url-pattern": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
+    // Content
+    "content/unique-value": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
+    "content/meta-uniqueness": { baseImpact: 8, perInstance: 2, maxImpact: 40 },
+    "content/missing-author": { baseImpact: 4, perInstance: 1, maxImpact: 20 },
+    "content/eeat-signals": { baseImpact: 4, perInstance: 1, maxImpact: 20 },
+    // 2026-05-03 v0.5.2 blind-spot fixes
+    "content/title-uniqueness": { baseImpact: 8, perInstance: 2, maxImpact: 25 }, // 2026-05-03 round 11: title is high-impact but the original 50-cap was disproportionate to other content rules and tipped Typeform into critical on a 6-finding cluster. Keep the rule at native error severity (duplicate titles ARE real bugs); just don't let one rule dominate the integrity bucket.
+    "content/heading-structure": { baseImpact: 5, perInstance: 1, maxImpact: 20 },
+    "content/image-alt-text": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
+    // Tech — softened in v0.4.3-rc2 after dogfood showed nextjs.org regressing
+    // from ready→caution on tech/canonical-consistency × 4 (legit cross-domain
+    // canonicals on a CDN). Per-instance now 1 (was 3).
+    "tech/canonical-consistency": { baseImpact: 8, perInstance: 1, maxImpact: 25 },
+    "tech/canonical-noindex-conflict": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
+    "tech/robots-noindex-conflict": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
+    "tech/redirect-chain": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
+    "tech/sitemap-completeness": { baseImpact: 8, perInstance: 1, maxImpact: 30 },
+    "tech/robots-sitemap-presence": { baseImpact: 8, perInstance: 0, maxImpact: 8 },
+    "tech/soft-404": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
+    // hreflang — one bad declaration breaks all language pairs, so the COUNT
+    // doesn't compound. perInstance: 0 keeps it at the base impact regardless
+    // of how many language pairs are affected. Dogfood showed 350 findings on
+    // stripe.com from a single missing reciprocal pair — that should not be
+    // treated as 350× the impact.
+    "tech/hreflang-consistency": { baseImpact: 5, perInstance: 0, maxImpact: 5 },
+    "tech/og-completeness": { baseImpact: 4, perInstance: 1, maxImpact: 20 },
+    // Links
+    "links/orphan-pages": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
+    "links/dead-ends": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
+    "links/cluster-connectivity": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
+    "links/link-depth": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
+    // AEO — much lower baselines than spam (AEO is opt-in optimization)
+    "aeo/citable-facts": { baseImpact: 2, perInstance: 1, maxImpact: 25 },
+    "aeo/answer-first": { baseImpact: 3, perInstance: 1, maxImpact: 25 },
+    "aeo/summary-bait": { baseImpact: 4, perInstance: 1, maxImpact: 25 },
+    "aeo/crawler-access": { baseImpact: 8, perInstance: 0, maxImpact: 8 },
+    "aeo/freshness-signals": { baseImpact: 2, perInstance: 1, maxImpact: 20 },
+    "aeo/llms-txt": { baseImpact: 4, perInstance: 0, maxImpact: 4 },
+    "aeo/faq-coverage": { baseImpact: 2, perInstance: 1, maxImpact: 15 },
+    "aeo/content-modularity": { baseImpact: 2, perInstance: 1, maxImpact: 15 },
+    // Schema
+    "schema/json-ld-valid": { baseImpact: 8, perInstance: 2, maxImpact: 35 },
+    "schema/required-fields": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
+    "schema/consistency": { baseImpact: 3, perInstance: 1, maxImpact: 15 },
+    // Data
+    "data/data-binding": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
+};
+const DEFAULT_RULE_IMPACT = { baseImpact: 5, perInstance: 1, maxImpact: 25 };
+/**
+ * v0.4.3 — confidence-based discount applied to each finding's impact.
+ * Low-confidence findings contribute less to the bucket so they don't
+ * inflate the verdict on site types where they false-positive.
+ */
+const CONFIDENCE_MULTIPLIER = {
+    high: 1.0,
+    medium: 0.6,
+    low: 0.3,
+    speculative: 0.1,
+};
 /** Slug map for `RuleResult.docsUrl`. Defaults to the rule-id segment after the `/`. */
 const RULE_DOCS_SLUG = {
 // intentionally empty for v0.4 — slug = ruleId.split("/").pop() works for every shipped rule
@@ -121,6 +443,39 @@ function verdictForRisk(risk) {
         return "concerning";
     return "critical";
 }
+/**
+ * 2026-05-03 v0.5.2 — apply the bring-your-own-authority shift to the
+ * verdict ladder. The raw `risk` number is unchanged; only the user-
+ * facing verdict mapping shifts.
+ *
+ *   `authorityScore >= 80` (established brand)  → shift ONE TIER LENIENT
+ *   `authorityScore <= 30` (newer/lower)        → shift ONE TIER STRICT
+ *   31..79 or undefined                          → no shift
+ *
+ * "One tier lenient" means: critical → concerning, concerning → caution,
+ * caution → ready, ready → ready (clamped). "One tier strict" is the
+ * inverse direction: ready → caution, caution → concerning,
+ * concerning → critical, critical → critical.
+ */
+const VERDICT_LADDER = ["ready", "caution", "concerning", "critical"];
+function shiftVerdictForAuthority(verdict, authorityScore) {
+    if (authorityScore === undefined)
+        return verdict;
+    if (!Number.isFinite(authorityScore))
+        return verdict;
+    if (authorityScore < 0 || authorityScore > 100)
+        return verdict;
+    const idx = VERDICT_LADDER.indexOf(verdict);
+    if (idx < 0)
+        return verdict;
+    if (authorityScore >= 80) {
+        return VERDICT_LADDER[Math.max(0, idx - 1)];
+    }
+    if (authorityScore <= 30) {
+        return VERDICT_LADDER[Math.min(VERDICT_LADDER.length - 1, idx + 1)];
+    }
+    return verdict;
+}
 function gradeForPenalty(penalty) {
     if (penalty <= 20)
         return "A";
@@ -182,7 +537,15 @@ function runRulesOnPages(pages,
  * `respectNoindex: true` would hide noindex'd pages from the very rules
  * designed to flag accidental noindex'ing.
  */
-noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, entityPatterns, overrides, mode = "full") {
+noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, entityPatterns, overrides, mode = "full",
+/**
+ * 2026-05-03 calibration credibility fix: signals that the audit is
+ * running on a sampled subset of the discovered URLs. Rules whose
+ * outputs depend on a complete link graph (`links/unreachable-from-
+ * root`) skip their checks when this is true to avoid sampling-
+ * artifact false positives.
+ */
+sampled = false) {
     const findings = [];
     const modeOk = (ruleId) => mode !== "diff" || isRuleAllowedInDiff(ruleId);
     const tag = (results) => results.map((r) => {
@@ -211,7 +574,7 @@ noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, in
         findings.push(...tag(doorwayPatternRule(nearDuplicate.pairs, entitySwap.pairs, thinContent.thinContentUrls, pages)));
     }
     if (isEnabled("spam/publication-velocity") && modeOk("spam/publication-velocity")) {
-        findings.push(...tag(publicationVelocityRule(pages, resolvedRules.publicationVelocityMaxPerDay)));
+        findings.push(...tag(publicationVelocityRule(pages, resolvedRules.publicationVelocityMaxPerDay, resolvedRules.publicationVelocityMaxPerDayCorpusFraction)));
     }
     if (isEnabled("spam/boilerplate-ratio") && modeOk("spam/boilerplate-ratio")) {
         findings.push(...tag(boilerplateRatioRule(pages, resolvedRules.boilerplateMaxRatio)));
@@ -235,6 +598,17 @@ noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, in
     if (isEnabled("content/eeat-signals") && modeOk("content/eeat-signals")) {
         findings.push(...tag(eeatSignalsRule(pages)));
     }
+    // 2026-05-03 v0.5.2 blind-spot fixes — title uniqueness + heading
+    // structure + image alt-text were tier-1 gaps in the blind-spot audit.
+    if (isEnabled("content/title-uniqueness") && modeOk("content/title-uniqueness")) {
+        findings.push(...tag(titleUniquenessRule(pages)));
+    }
+    if (isEnabled("content/heading-structure") && modeOk("content/heading-structure")) {
+        findings.push(...tag(headingStructureRule(pages)));
+    }
+    if (isEnabled("content/image-alt-text") && modeOk("content/image-alt-text")) {
+        findings.push(...tag(imageAltTextRule(pages)));
+    }
     // Link rules — use the global link graph
     if (isEnabled("links/orphan-pages") && modeOk("links/orphan-pages")) {
         findings.push(...tag(orphanPagesRule(pages, inbound, rootUrl)));
@@ -244,12 +618,15 @@ noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, in
     }
     if (isEnabled("links/link-depth") && modeOk("links/link-depth")) {
         if (rootUrl) {
-            findings.push(...tag(linkDepthRule(pages, adjacency, rootUrl, resolvedRules.linkDepthMaxClicks, inbound)));
+            findings.push(...tag(linkDepthRule(pages, adjacency, rootUrl, resolvedRules.linkDepthMaxClicks, inbound, sampled)));
         }
     }
     if (isEnabled("links/cluster-connectivity") && modeOk("links/cluster-connectivity")) {
         findings.push(...tag(clusterConnectivityRule(pages, knownUrls)));
     }
+    if (isEnabled("links/host-section-divergence") && modeOk("links/host-section-divergence")) {
+        findings.push(...tag(hostSectionDivergenceRule(pages, adjacency)));
+    }
     // Tech rules
     if (isEnabled("tech/canonical-consistency") && modeOk("tech/canonical-consistency")) {
         findings.push(...tag(canonicalConsistencyRule(pages, knownUrls, normalizeUrlOptions)));
@@ -271,6 +648,11 @@ noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, in
         // inconsistent — see auditor.test.ts "emits technical SEO findings".
         findings.push(...tag(hreflangConsistencyRule(noindexAwarePages, normalizeUrlOptions)));
     }
+    // 2026-05-03 v0.5.2 blind-spot fix: og-completeness was referenced in
+    // the v0.4.x README without ever shipping. Now it does.
+    if (isEnabled("tech/og-completeness") && modeOk("tech/og-completeness")) {
+        findings.push(...tag(ogCompletenessRule(pages)));
+    }
     // Schema rules
     if (isEnabled("schema/json-ld-valid") && modeOk("schema/json-ld-valid")) {
         findings.push(...tag(jsonLdValidRule(pages)));
@@ -323,13 +705,67 @@ noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, in
 function hashHtml(html) {
     return createHash("sha256").update(html, "utf8").digest("hex");
 }
-const SEVERITY_WEIGHTS = {
-    critical: 40,
-    error: 25,
-    warning: 12,
-    info: 5,
-};
-function scoreFromFindings(findings) {
+/**
+ * v0.4.3 — apply per-site-type severity + confidence overrides BEFORE any
+ * bucketing happens, so blocker/shouldFix counts and category buckets all
+ * reflect the user-visible severity, not the rule's native severity.
+ *
+ * Returns a NEW array of findings (does not mutate the input). Only the
+ * `severity` and `confidence` fields are remapped; everything else is
+ * preserved by reference.
+ */
+export function applyScoringProfileOverrides(findings, classification) {
+    const profile = profileFor(classification);
+    const sevHas = Object.keys(profile.severityOverrides).length > 0;
+    const confHas = Object.keys(profile.confidenceOverrides).length > 0;
+    if (!sevHas && !confHas)
+        return findings;
+    return findings.map((f) => {
+        const newSev = profile.severityOverrides[f.ruleId];
+        const newConf = profile.confidenceOverrides[f.ruleId];
+        if (newSev === undefined && newConf === undefined)
+            return f;
+        return {
+            ...f,
+            ...(newSev !== undefined ? { severity: newSev } : {}),
+            ...(newConf !== undefined ? { confidence: newConf } : {}),
+        };
+    });
+}
+/**
+ * 2026-05-03 credibility: list of rule IDs that ACTUALLY had their severity
+ * remapped on this audit. Distinct from `profile.severityOverrides` which is
+ * the static set of demotions defined per profile — this is the subset of
+ * those that actually fired. Surfaced via `summary.appliedSeverityDemotions`
+ * so formatters can show the user "engine demoted X rules because <site
+ * type> profile" rather than hiding the mechanism.
+ */
+function computeAppliedDemotions(findings, classification) {
+    const profile = profileFor(classification);
+    if (Object.keys(profile.severityOverrides).length === 0)
+        return [];
+    const applied = new Set();
+    for (const f of findings) {
+        if (profile.severityOverrides[f.ruleId] !== undefined) {
+            applied.add(f.ruleId);
+        }
+    }
+    return Array.from(applied).sort();
+}
+/**
+ * v0.4.3 — confidence-and-count-aware scoring. Replaces the v0.4 model that
+ * counted only severity. Each rule has a `baseImpact + (count - 1) *
+ * perInstance` contribution capped by `maxImpact`. The result is multiplied
+ * by the finding's `confidence` (default `high` → 1.0). Per-site-type
+ * profiles can remap a rule's severity / confidence; this function expects
+ * those overrides to ALREADY be applied to the input findings.
+ *
+ * Bucket math: per-rule impacts sum into the rule's `CATEGORY_MAP` bucket;
+ * each bucket is then capped at 100 and weighted by the active scoring
+ * profile's `categoryWeights`.
+ */
+function scoreFromFindings(findings, classification) {
+    const profile = profileFor(classification);
     // v0.4 four-bucket raw penalties.
     const bucketRaw = {
         integrity: 0,
@@ -348,18 +784,16 @@ function scoreFromFindings(findings) {
     let blockers = 0;
     let shouldFix = 0;
     let informational = 0;
+    // Group findings by ruleId so we can apply baseImpact + perInstance.
+    // Each group's weighted impact lands in its category bucket.
+    const groups = new Map();
     for (const finding of findings) {
         const namespace = finding.ruleId.split("/")[0];
         const bucket = CATEGORY_MAP[namespace];
         if (!bucket)
             continue;
-        const weight = SEVERITY_WEIGHTS[finding.severity];
-        // v0.4 buckets.
-        bucketRaw[bucket] = Math.min(100, bucketRaw[bucket] + weight);
-        if (bucket !== "audit") {
+        if (bucket !== "audit")
             bucketIssues[bucket] += 1;
-        }
-        // Issue-bucket counts (audit/* findings are diagnostic-only and excluded).
         if (bucket === "audit")
             continue;
         if (finding.severity === "critical" || finding.severity === "error")
@@ -368,11 +802,73 @@ function scoreFromFindings(findings) {
             shouldFix += 1;
         else
             informational += 1;
+        const arr = groups.get(finding.ruleId) ?? [];
+        arr.push(finding);
+        groups.set(finding.ruleId, arr);
+    }
+    // 2026-05-03 calibration credibility fix: track info-severity vs
+    // non-info contributions to each bucket separately so a flood of info
+    // findings can't fill the bucket cap and tank the verdict on its own.
+    // Round 7 surfaced this on Airbyte and round 8 on Zapier — both had
+    // ALL info-severity findings in their top drivers yet scored
+    // `concerning` because cumulative info impact filled the citation
+    // bucket past its 100 cap. Now: info contribution per bucket caps at
+    // 50; warning+ contribution caps at 100; final bucket = sum, capped
+    // at 100. A site with no real warning/error findings can score at
+    // most ~12.5 risk from info accumulation at typical 0.25 citation
+    // weight — which keeps verdict aligned with the visible severity in
+    // the report.
+    const bucketInfoOnly = {
+        integrity: 0, discoverability: 0, citation: 0, data: 0, audit: 0,
+    };
+    const bucketNonInfo = {
+        integrity: 0, discoverability: 0, citation: 0, data: 0, audit: 0,
+    };
+    for (const [ruleId, group] of groups) {
+        const namespace = ruleId.split("/")[0];
+        const bucket = CATEGORY_MAP[namespace];
+        if (!bucket || bucket === "audit")
+            continue;
+        const impactSpec = RULE_IMPACTS[ruleId] ?? DEFAULT_RULE_IMPACT;
+        const count = group.length;
+        const rawImpact = impactSpec.baseImpact + Math.max(0, count - 1) * impactSpec.perInstance;
+        const cap = impactSpec.maxImpact ?? Number.POSITIVE_INFINITY;
+        const cappedImpact = Math.min(cap, rawImpact);
+        // Confidence multiplier — use the WORST (highest-multiplier) confidence
+        // in the group so a rule that fires repeatedly with mixed confidence is
+        // not unfairly downweighted to its lowest-confidence instance.
+        let bestMultiplier = 0;
+        for (const f of group) {
+            const conf = f.confidence ?? "high";
+            const m = CONFIDENCE_MULTIPLIER[conf];
+            if (m > bestMultiplier)
+                bestMultiplier = m;
+        }
+        if (bestMultiplier === 0)
+            bestMultiplier = CONFIDENCE_MULTIPLIER.high;
+        const weighted = cappedImpact * bestMultiplier;
+        // Bucket the rule's contribution by the highest severity in the group.
+        // Mixed-severity groups (e.g. error + info) count toward non-info — once
+        // a rule has any non-info finding, its count contribution is treated as
+        // a real-issue signal, not info accumulation.
+        const isInfoOnly = group.every((f) => f.severity === "info");
+        if (isInfoOnly) {
+            bucketInfoOnly[bucket] += weighted;
+        }
+        else {
+            bucketNonInfo[bucket] += weighted;
+        }
     }
-    const weighted = bucketRaw.integrity * CATEGORY_WEIGHTS.integrity +
-        bucketRaw.discoverability * CATEGORY_WEIGHTS.discoverability +
-        bucketRaw.citation * CATEGORY_WEIGHTS.citation +
-        bucketRaw.data * CATEGORY_WEIGHTS.data;
+    for (const key of ["integrity", "discoverability", "citation", "data"]) {
+        const info = Math.min(50, bucketInfoOnly[key]);
+        const nonInfo = Math.min(100, bucketNonInfo[key]);
+        bucketRaw[key] = Math.min(100, info + nonInfo);
+    }
+    const cw = profile.categoryWeights;
+    const weighted = bucketRaw.integrity * cw.integrity +
+        bucketRaw.discoverability * cw.discoverability +
+        bucketRaw.citation * cw.citation +
+        bucketRaw.data * cw.data;
     const risk = Math.round(Math.min(100, weighted));
     const categories = {
         integrity: { grade: gradeForPenalty(bucketRaw.integrity), issues: bucketIssues.integrity },
@@ -534,6 +1030,25 @@ function parseSitemapUrls(xml) {
     const matches = Array.from(xml.matchAll(/<loc>\s*([^<\s]+)\s*<\/loc>/gi));
     return matches.map((match) => match[1]).filter(Boolean);
 }
+export function parseSitemapUrlsWithLastmod(xml) {
+    const out = [];
+    // Match both <url>...</url> blocks (in <urlset>) and <sitemap>...</sitemap>
+    // blocks (in <sitemapindex>). Both carry <loc> + optional <lastmod>.
+    const blocks = xml.matchAll(/<(url|sitemap)\b[^>]*>([\s\S]*?)<\/\1>/gi);
+    for (const block of blocks) {
+        const inner = block[2] ?? "";
+        const locMatch = inner.match(/<loc\b[^>]*>([\s\S]*?)<\/loc>/i);
+        if (!locMatch)
+            continue;
+        const url = locMatch[1].trim();
+        if (!url)
+            continue;
+        const lastmodMatch = inner.match(/<lastmod\b[^>]*>([\s\S]*?)<\/lastmod>/i);
+        const lastmod = lastmodMatch ? lastmodMatch[1].trim() : undefined;
+        out.push({ url, lastmod });
+    }
+    return out;
+}
 function looksLikeSitemap(text) {
     const lowered = text.toLowerCase();
     return lowered.includes("<urlset") || lowered.includes("<sitemapindex");
@@ -602,22 +1117,32 @@ function shouldIgnore(url, patterns) {
     }
     return false;
 }
-function fisherYatesSample(items, n) {
+function fisherYatesSample(items, n, random = Math.random) {
     const arr = [...items];
     for (let i = arr.length - 1; i > 0 && arr.length - i <= n; i -= 1) {
-        const j = Math.floor(Math.random() * (i + 1));
+        const j = Math.floor(random() * (i + 1));
         [arr[i], arr[j]] = [arr[j], arr[i]];
     }
     return arr.slice(arr.length - n);
 }
 async function collectUrlsFromSitemap(sitemapText, sitemapUrl, visited, timeoutMs, cache, stats, signal, validateHop) {
     visited.add(sitemapUrl);
-    const locs = parseSitemapUrls(sitemapText);
+    const entries = parseSitemapUrlsWithLastmod(sitemapText);
     if (!isSitemapIndex(sitemapText)) {
-        return locs;
+        const urls = [];
+        const lastmodByUrl = new Map();
+        for (const entry of entries) {
+            urls.push(entry.url);
+            if (entry.lastmod !== undefined) {
+                lastmodByUrl.set(entry.url, entry.lastmod);
+            }
+        }
+        return { urls, lastmodByUrl };
     }
     const allUrls = [];
-    for (const childUrl of locs) {
+    const allLastmodByUrl = new Map();
+    for (const entry of entries) {
+        const childUrl = entry.url;
         if (signal?.aborted)
             throw signal.reason ?? new Error("aborted");
         if (visited.has(childUrl))
@@ -628,10 +1153,13 @@ async function collectUrlsFromSitemap(sitemapText, sitemapUrl, visited, timeoutM
         const childLike = child.contentType.includes("xml") || looksLikeSitemap(child.text);
         if (!childLike)
             continue;
-        const childUrls = await collectUrlsFromSitemap(child.text, childUrl, visited, timeoutMs, cache, stats, signal, validateHop);
+        const { urls: childUrls, lastmodByUrl: childLastmodByUrl } = await collectUrlsFromSitemap(child.text, childUrl, visited, timeoutMs, cache, stats, signal, validateHop);
         allUrls.push(...childUrls);
+        for (const [u, lm] of childLastmodByUrl) {
+            allLastmodByUrl.set(u, lm);
+        }
     }
-    return allUrls;
+    return { urls: allUrls, lastmodByUrl: allLastmodByUrl };
 }
 async function fetchRobotsMeta(origin, timeoutMs, cache, stats, signal, validateHop) {
     if (!origin)
@@ -664,7 +1192,7 @@ function isDisallowedByRobots(urlPath, patterns) {
 function budgetExceeded(b) {
     return b.cap > 0 && b.used >= b.cap;
 }
-async function loadPagesFromSource(source, concurrency, timeoutMs, crawlDiscovery, discoveryBudget, cache, stats, fillBudgetViaLinkDiscovery = false, byteBudget = { used: 0, cap: 0 }, signal, guardSsrf = false, respectRobotsTxt = true, skippedByRobots = [], followRedirects = true, maxCrawlDiscovered = 5000) {
+async function loadPagesFromSource(source, concurrency, timeoutMs, crawlDiscovery, discoveryBudget, cache, stats, fillBudgetViaLinkDiscovery = false, byteBudget = { used: 0, cap: 0 }, signal, guardSsrf = false, respectRobotsTxt = true, skippedByRobots = [], followRedirects = true, maxCrawlDiscovered = 5000, monitoringContext = null) {
     // Memoized SSRF validator. When guardSsrf is on, every URL fetched by the
     // audit (source, sitemap entries, redirects, discovered links) goes through
     // this. DNS is hit once per unique hostname per audit — a 4k-page audit on
@@ -724,11 +1252,33 @@ async function loadPagesFromSource(source, concurrency, timeoutMs, crawlDiscover
         const isXml = (contentType.includes("xml") || looksLikeSitemap(text)) && sourceStatus !== -1;
         if (isXml) {
             const visited = new Set();
-            const allSitemapUrls = await collectUrlsFromSitemap(text, source, visited, timeoutMs, cache, stats, signal, validateHop);
+            const { urls: allSitemapUrls, lastmodByUrl: sitemapLastmodByUrl } = await collectUrlsFromSitemap(text, source, visited, timeoutMs, cache, stats, signal, validateHop);
             // If we have a budget, sample from sitemap URLs before fetching
-            const urlsToFetch = discoveryBudget > 0 && allSitemapUrls.length > discoveryBudget
+            const sampledUrls = discoveryBudget > 0 && allSitemapUrls.length > discoveryBudget
                 ? fisherYatesSample(allSitemapUrls, discoveryBudget)
                 : allSitemapUrls;
+            // v0.5: change-driven monitoring. Apply the decision matrix BEFORE
+            // fetching bodies. URLs in plan.skip are not network-touched at all —
+            // their findings will be carried forward from prior state by the caller.
+            // This is the whole point of monitoring mode: rule eval is microseconds,
+            // the fetch is seconds; move the skip decision upstream of the fetch.
+            let scrapePlan;
+            let urlsToFetch;
+            if (monitoringContext) {
+                scrapePlan = planScrapeStrategy({
+                    candidateUrls: sampledUrls,
+                    priorState: monitoringContext.priorState,
+                    sitemapLastmodByUrl,
+                    currentRulesetVersion: monitoringContext.currentRulesetVersion,
+                    ageFloorDays: monitoringContext.ageFloorDays,
+                    now: monitoringContext.now,
+                    forceRefetchUrls: monitoringContext.forceRefetchUrls,
+                });
+                urlsToFetch = Array.from(scrapePlan.refetch.keys());
+            }
+            else {
+                urlsToFetch = sampledUrls;
+            }
             const pages = [];
             // Fetch robots.txt once for the origin — reused for Crawl-Delay pacing and Disallow checks.
             const sourceOrigin = (() => { try {
@@ -835,7 +1385,7 @@ async function loadPagesFromSource(source, concurrency, timeoutMs, crawlDiscover
                     });
                 }
             }
-            return { pages, sitemapUrls: new Set(allSitemapUrls), discoveredUrlCount: allSitemapUrls.length };
+            return { pages, sitemapUrls: new Set(allSitemapUrls), sitemapLastmodByUrl, discoveredUrlCount: allSitemapUrls.length, scrapePlan };
         }
         if (contentType.includes("html") || looksLikeHtml(text)) {
             const initialPage = { url: source, html: text };
@@ -958,6 +1508,9 @@ export async function auditSource(source, options) {
     const ignorePatterns = options?.ignore ?? [];
     const respectNoindex = options?.respectNoindex ?? true;
     const skipDetectedAuth = options?.skipDetectedAuth ?? false;
+    const skipBoilerplate = options?.skipBoilerplate ?? false;
+    const skipSearchPages = options?.skipSearchPages ?? false;
+    const skipEmptyBody = options?.skipEmptyBody ?? false;
     const sampleSize = options?.sampleSize ?? preset.sampleSize ?? 0;
     const externalSignal = options?.signal;
     const guardSsrf = options?.guardSsrf ?? preset.guardSsrf ?? false;
@@ -973,12 +1526,26 @@ export async function auditSource(source, options) {
     let backpressureError = null;
     const signal = composeSignals(externalSignal, backpressureAbort.signal);
     const observer = new FetchObserver();
+    // 2026-05-03 calibration: the prior (3s p95 cap, 2× baseline multiplier)
+    // gate aborted 4 of 12 reputable-pSEO audits on what was normal load
+    // variance — Zapier at p95=576ms (2.4× a 236ms baseline), Webflow at
+    // p95=1808ms (2.2× 833ms), Airbyte at p95=1288ms (3.4× 380ms). For real
+    // production CDNs these spikes are noise, not degradation. Raise the
+    // gate so it still catches truly broken origins (sustained 4× slowdown
+    // OR p95 above 8s) without tripping on normal audit-induced load.
     const monitor = backpressureEnabled
         ? new BackpressureMonitor({
             warmupSize: 10,
-            absoluteP95Ms: 3000,
-            baselineMultiplier: 2,
-            errorRatioThreshold: 0.1,
+            absoluteP95Ms: 8000,
+            baselineMultiplier: 4,
+            // 2026-05-03 production fix: 0.1 (10%) was tripping pseolint.dev
+            // audits on real production sites that legitimately return ~10% 5xx
+            // (transient errors, async page renderers warming up, sites in
+            // canary). Combined with the `>=` comparison bug (also fixed),
+            // this aborted every web-app audit. 0.15 keeps the gate honest —
+            // a sustained 15%+ 5xx rate is a real problem, not noise — while
+            // letting transient errors not bring down the whole audit.
+            errorRatioThreshold: 0.15,
         })
         : null;
     // v0.4: framework gets set on the first observation that carries headers
@@ -1010,6 +1577,8 @@ export async function auditSource(source, options) {
         entitySwapThreshold: options?.rules?.entitySwapThreshold ?? DEFAULTS.entitySwapThreshold,
         thinContentMinWords: options?.rules?.thinContentMinWords ?? DEFAULTS.thinContentMinWords,
         publicationVelocityMaxPerDay: options?.rules?.publicationVelocityMaxPerDay ?? DEFAULTS.publicationVelocityMaxPerDay,
+        publicationVelocityMaxPerDayCorpusFraction: options?.rules?.publicationVelocityMaxPerDayCorpusFraction
+            ?? DEFAULTS.publicationVelocityMaxPerDayCorpusFraction,
         boilerplateMaxRatio: options?.rules?.boilerplateMaxRatio ?? DEFAULTS.boilerplateMaxRatio,
         templateDiversityMinUniqueRatio: options?.rules?.templateDiversityMinUniqueRatio ?? DEFAULTS.templateDiversityMinUniqueRatio,
         uniqueValueMinWords: options?.rules?.uniqueValueMinWords ?? DEFAULTS.uniqueValueMinWords,
@@ -1051,7 +1620,65 @@ export async function auditSource(source, options) {
     const fetchByteBudget = { used: 0, cap: maxFetchBytes };
     // v0.4 §4.7: detectedFramework is set in onObservation above, side-effect
     // of the normal source URL fetch. No separate probe needed.
-    const { pages: loadedPagesRaw, sitemapUrls: sitemapUrlSet, discoveredUrlCount } = await loadPagesFromSource(source, concurrency, timeoutMs, crawlDiscovery, discoveryBudget, cacheConfig, cacheStats, fillBudgetViaLinkDiscovery, fetchByteBudget, signal, guardSsrf, respectRobotsTxt, skippedByRobots, followRedirects, maxCrawlDiscovered);
+    // v0.5: read prior state BEFORE loadPagesFromSource so the change-driven
+    // monitoring decision matrix can run pre-fetch and tell loadPagesFromSource
+    // which URLs to actually fetch. Reading state is cheap; doing it here also
+    // means we know `priorState` once for both the monitoring path and the
+    // post-audit state-write path further down.
+    let priorState = null;
+    const skippedUrls = [];
+    const currentRenderMode = options?.render ? "rendered" : "static";
+    if (options?.state?.path || options?.state?.since || options?.state?.exitOnRegression || options?.state?.mode) {
+        const statePath = options.state?.path ?? ".pseolint/state.json";
+        priorState = await readState(statePath);
+        if (priorState && priorState.renderMode !== currentRenderMode) {
+            console.error(`warning: prior state renderMode=${priorState.renderMode} differs from current ${currentRenderMode}. Performing full re-audit.`);
+            priorState = null;
+        }
+    }
+    // Effective monitoring mode:
+    //   - explicit `state.mode` wins ("monitoring" or "fresh")
+    //   - else if `--since` is passed and prior state exists → "monitoring" (back-compat alias)
+    //   - else if prior state exists → "monitoring" (auto, v0.5 default)
+    //   - else → "fresh" (no prior state available)
+    const explicitMode = options?.state?.mode;
+    const effectiveMode = explicitMode ??
+        (priorState ? "monitoring" : "fresh");
+    // Build the monitoring context only for HTTP sources in monitoring mode with
+    // prior state. Single-page HTML and filesystem sources skip this — they are
+    // exempted from the strategy (a single-page audit has nothing to plan; local
+    // reads are cheap so re-reading every file beats branch complexity).
+    const isHttpSource = /^https?:\/\//i.test(source);
+    // If the user asked for monitoring against a filesystem source, surface that
+    // we're ignoring the request. Silent bypass leads to "why is my state file
+    // not being used?" debugging. Only log when the user actively chose
+    // monitoring (explicit --mode or --since) — auto-monitoring on prior state
+    // existence is implicit and shouldn't warn.
+    if (!isHttpSource && effectiveMode === "monitoring" && (options?.state?.mode === "monitoring" || options?.state?.since)) {
+        console.error("warning: monitoring mode requested but source is a local file/directory; reading every HTML file (the matrix only applies to HTTP sources).");
+    }
+    const monitoringContext = effectiveMode === "monitoring" && priorState && isHttpSource
+        ? {
+            priorState,
+            currentRulesetVersion: CORE_RULESET_VERSION,
+            ageFloorDays: options?.state?.ageFloorDays ?? DEFAULT_AGE_FLOOR_DAYS,
+            now: new Date(),
+            forceRefetchUrls: options?.force?.urls,
+        }
+        : null;
+    if (!priorState && options?.state?.since) {
+        console.error("no prior state found — performing full baseline audit");
+    }
+    const { pages: loadedPagesRaw, sitemapUrls: sitemapUrlSet, sitemapLastmodByUrl, discoveredUrlCount, scrapePlan } = await loadPagesFromSource(source, concurrency, timeoutMs, crawlDiscovery, discoveryBudget, cacheConfig, cacheStats, fillBudgetViaLinkDiscovery, fetchByteBudget, signal, guardSsrf, respectRobotsTxt, skippedByRobots, followRedirects, maxCrawlDiscovered, monitoringContext);
+    // The scrapePlan tells us which URLs were skipped pre-fetch under monitoring
+    // mode. Surface them in skippedUrls so they show up under summary.skippedUrls
+    // (kept for back-compat with --since consumers); T7 will carry their prior
+    // findings forward and T8 will surface the full plan in summary.scrapePlan.
+    if (scrapePlan) {
+        for (const url of scrapePlan.skip.keys()) {
+            skippedUrls.push(url);
+        }
+    }
     throwIfAborted();
     const loadedPages = [...loadedPagesRaw];
     // v0.4 §4.7: content-type-aware crawling. Filter out fetched URLs whose
@@ -1080,34 +1707,11 @@ export async function auditSource(source, options) {
     if (discoveredUrlCount && discoveredUrlCount > loadedPages.length) {
         console.error(`Discovered ${discoveredUrlCount} pages, fetched ${loadedPages.length} for audit. Use --sample-size 0 for full crawl.`);
     }
-    // State read + delta filtering
-    let priorState = null;
-    const skippedUrls = [];
-    if (options?.state?.since || options?.state?.exitOnRegression) {
-        const statePath = options.state.path ?? ".pseolint/state.json";
-        priorState = await readState(statePath);
-        const currentRenderMode = options.render ? "rendered" : "static";
-        if (priorState && priorState.renderMode !== currentRenderMode) {
-            console.error(`warning: prior state renderMode=${priorState.renderMode} differs from current ${currentRenderMode}. Performing full re-audit.`);
-            priorState = null;
-        }
-        if (priorState && options.state.since) {
-            const kept = [];
-            for (const p of loadedPages) {
-                const prior = priorState.urls[p.url];
-                if (prior && prior.contentHash === computeContentHash(p.html)) {
-                    skippedUrls.push(p.url);
-                }
-                else {
-                    kept.push(p);
-                }
-            }
-            loadedPages.splice(0, loadedPages.length, ...kept);
-        }
-        else if (!priorState && options.state.since) {
-            console.error("no prior state found — performing full baseline audit");
-        }
-    }
+    // v0.5: prior state was loaded BEFORE loadPagesFromSource so the change-
+    // driven monitoring decision matrix could run pre-fetch. URLs the matrix
+    // marked as "skip" were never fetched and are recorded in skippedUrls
+    // above. The old post-fetch contentHash skip is gone — the decision now
+    // happens upstream of the network round-trip.
     let robotsTxtContent = "";
     if (/^https?:\/\//i.test(source)) {
         try {
@@ -1145,14 +1749,22 @@ export async function auditSource(source, options) {
         ? deduped.filter((page) => !shouldIgnore(page.url, ignorePatterns))
         : deduped;
     const strategy = options?.samplingStrategy ?? "stratified";
-    const sampled = sampleSize > 0 && sampleSize < filtered.length
+    // 2026-05-03 calibration credibility fix: when sampleSeed is set, use a
+    // deterministic PRNG so repeated audits pick the same pages and the
+    // verdict is reproducible. Without a seed, fall back to Math.random
+    // (legacy behavior, kept for backward compatibility).
+    const samplingRandom = options?.sampleSeed !== undefined
+        ? mulberry32(options.sampleSeed)
+        : Math.random;
+    const isSampledAudit = sampleSize > 0 && sampleSize < filtered.length;
+    const sampled = isSampledAudit
         ? (strategy === "stratified"
             ? (() => {
                 const urlsMap = new Map(filtered.map(p => [p.url, p]));
-                const sampledUrls = stratifiedSample(filtered.map(p => p.url), sampleSize);
+                const sampledUrls = stratifiedSample(filtered.map(p => p.url), sampleSize, samplingRandom);
                 return sampledUrls.map(u => urlsMap.get(u));
             })()
-            : fisherYatesSample(filtered, sampleSize))
+            : fisherYatesSample(filtered, sampleSize, samplingRandom))
         : filtered;
     const parsedPagesAll = sampled.map((page) => {
         const parsed = parseHtmlPage(page.html, page.url, { normalizeUrl: normalizeUrlOptions });
@@ -1168,7 +1780,13 @@ export async function auditSource(source, options) {
     // (off for the CLI by default; on for the hosted web form).
     const skippedByPolicy = [];
     const parsedPages = parsedPagesAll.filter((p) => {
-        const reason = pageSkipReason(p, { respectNoindex, skipDetectedAuth });
+        const reason = pageSkipReason(p, {
+            respectNoindex,
+            skipDetectedAuth,
+            skipBoilerplate,
+            skipSearchPages,
+            skipEmptyBody,
+        });
         if (reason) {
             skippedByPolicy.push({ url: p.url, reason });
             return false;
@@ -1295,10 +1913,13 @@ export async function auditSource(source, options) {
             continue;
         const groupRules = resolveGroupRules(resolvedRules, groupConfig?.overrides);
         const enabledCheck = (ruleId) => !suppressedRuleSet.has(ruleId) && isRuleEnabled(ruleId, groupConfig?.rules);
-        const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full");
+        const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full", isSampledAudit);
         allFindings.push(...findings);
         groupPageCounts[groupName] = groupPages.length;
-        const { risk: groupRisk } = scoreFromFindings(findings);
+        // v0.4.3: per-group scoring uses the same site-classification profile so
+        // group-level risk numbers reflect the same severity / confidence remaps
+        // as the headline verdict.
+        const { risk: groupRisk } = scoreFromFindings(applyScoringProfileOverrides(findings, siteClassification), siteClassification);
         groupScores[groupName] = groupRisk;
     }
     throwIfAborted();
@@ -1308,10 +1929,61 @@ export async function auditSource(source, options) {
     });
     // Populate docsUrl on every finding before they leave the engine.
     withDocsUrls(enriched.findings);
-    const { risk, categories, bucketCounts } = scoreFromFindings(enriched.findings);
+    // v0.4.3: apply site-type-aware severity + confidence overrides so blocker
+    // counts, issue buckets, and category bucketing all reflect the user-visible
+    // severity (not the rule's native severity). The remapped findings replace
+    // the enrichment output so every downstream consumer (summary.issues, AI
+    // triage input, telemetry, formatters) sees the corrected severity.
+    enriched.findings = applyScoringProfileOverrides(enriched.findings, siteClassification);
+    // v0.5: change-driven monitoring carry-forward. URLs that the pre-fetch
+    // strategy marked as "skip" were never fetched this run, so no rule produced
+    // findings for them. Restore their findings from prior state, marked with
+    // `carriedForward: true` and `lastVerifiedAt` so consumers can reason about
+    // staleness. Inject after enrichment + overrides — these findings already
+    // went through both in their original run; re-running enrichment would
+    // strip their template / cluster assignments because parsedPages doesn't
+    // contain the skipped pages.
+    if (priorState && skippedUrls.length > 0) {
+        for (const url of skippedUrls) {
+            const prior = priorState.urls[url];
+            if (!prior || prior.findings.length === 0)
+                continue;
+            for (const f of prior.findings) {
+                const carried = {
+                    ruleId: f.ruleId,
+                    severity: f.severity,
+                    message: f.message,
+                    confidence: f.confidence,
+                    carriedForward: true,
+                    lastVerifiedAt: prior.fetchedAt,
+                    // State stores `url` but the engine type uses `pageUrl` — map back.
+                    pageUrl: typeof f.url === "string" ? f.url : url,
+                };
+                // Optional fields are preserved opportunistically when present in state.
+                if (typeof f.fix === "string")
+                    carried.fix = f.fix;
+                if (typeof f.ref === "string")
+                    carried.ref = f.ref;
+                if (typeof f.docsUrl === "string")
+                    carried.docsUrl = f.docsUrl;
+                if (Array.isArray(f.relatedUrls))
+                    carried.relatedUrls = f.relatedUrls;
+                if (typeof f.group === "string")
+                    carried.group = f.group;
+                if (typeof f.similarity === "number")
+                    carried.similarity = f.similarity;
+                if (f.context !== undefined)
+                    carried.context = f.context;
+                if (f.effort !== undefined)
+                    carried.effort = f.effort;
+                enriched.findings.push(carried);
+            }
+        }
+    }
+    const { risk, categories, bucketCounts } = scoreFromFindings(enriched.findings, siteClassification);
     const auditedPageCount = Object.values(groupPageCounts).reduce((a, b) => a + b, 0);
     const issues = bucketIssues(enriched.findings);
-    const verdict = verdictForRisk(risk);
+    const verdict = shiftVerdictForAuthority(verdictForRisk(risk), options?.authorityScore);
     const headline = buildHeadline(bucketCounts);
     // audit/* findings are diagnostic-only and never appear in summary.issues.
     // Surface them under diagnostics so consumers (telemetry, debug UIs) can
@@ -1323,6 +1995,7 @@ export async function auditSource(source, options) {
         fetched: parsedPages.length,
         skipped: skippedByContentType.length + skippedByRobots.length + skippedUrls.length,
     };
+    const appliedSeverityDemotions = computeAppliedDemotions(enriched.findings, siteClassification);
     const summary = {
         schemaVersion: SCHEMA_VERSION,
         verdict,
@@ -1331,6 +2004,7 @@ export async function auditSource(source, options) {
         categories,
         issues,
         siteClassification,
+        appliedSeverityDemotions: appliedSeverityDemotions.length > 0 ? appliedSeverityDemotions : undefined,
         diagnostics: {
             originReadiness: readinessReport,
             crawlStats,
@@ -1377,6 +2051,31 @@ export async function auditSource(source, options) {
     if (allSkipped.length > 0) {
         summary.skippedUrls = allSkipped;
     }
+    // v0.5+: surface the change-driven monitoring summary when this run was a
+    // monitoring run (had prior state and didn't force --mode=fresh). Filesystem
+    // sources don't get a scrapePlan because they bypass the matrix.
+    if (effectiveMode === "monitoring" && priorState && scrapePlan) {
+        const reasonCounts = {};
+        for (const reason of scrapePlan.refetch.values()) {
+            reasonCounts[reason] = (reasonCounts[reason] ?? 0) + 1;
+        }
+        for (const reason of scrapePlan.skip.values()) {
+            reasonCounts[reason] = (reasonCounts[reason] ?? 0) + 1;
+        }
+        // `fetched` is the number of URLs whose bodies actually came back —
+        // robots-disallowed, byte-budget-exceeded, content-type-filtered, and 4xx
+        // URLs the matrix INTENDED to refetch may have dropped out before we got
+        // here. `intended` (= scrapePlan.refetch.size) is exposed too so callers
+        // can spot the gap (e.g. "intended 200, fetched 187, 13 URLs dropped").
+        summary.scrapePlan = {
+            fetched: loadedPages.length,
+            intended: scrapePlan.refetch.size,
+            carriedForward: scrapePlan.skip.size,
+            reasonCounts,
+            rulesetVersion: CORE_RULESET_VERSION,
+            lastFullAuditAt: priorState.lastFullAuditAt ?? priorState.lastRun ?? null,
+        };
+    }
     // v0.4.1: surface noindex / auth skips as a discoverable diagnostic so the
     // user sees what the engine excluded. Catches the accidental-noindex bug:
     // pages silently dropped from indexing show up as a visible skip line
@@ -1384,6 +2083,9 @@ export async function auditSource(source, options) {
     if (skippedByPolicy.length > 0) {
         const noindexCount = skippedByPolicy.filter((s) => s.reason === "noindex").length;
         const authCount = skippedByPolicy.filter((s) => s.reason === "auth-detected").length;
+        const boilerplateCount = skippedByPolicy.filter((s) => s.reason === "boilerplate").length;
+        const searchCount = skippedByPolicy.filter((s) => s.reason === "search-result").length;
+        const spaShellCount = skippedByPolicy.filter((s) => s.reason === "spa-shell").length;
         const sample = skippedByPolicy.slice(0, 5).map((s) => `${s.url} (${s.reason})`).join(", ");
         const more = skippedByPolicy.length > 5 ? `, +${skippedByPolicy.length - 5} more` : "";
         const parts = [];
@@ -1391,6 +2093,12 @@ export async function auditSource(source, options) {
             parts.push(`${noindexCount} marked noindex`);
         if (authCount > 0)
             parts.push(`${authCount} detected as auth (login/register/etc)`);
+        if (boilerplateCount > 0)
+            parts.push(`${boilerplateCount} cookie/legal/consent boilerplate`);
+        if (searchCount > 0)
+            parts.push(`${searchCount} search-result page${searchCount === 1 ? "" : "s"}`);
+        if (spaShellCount > 0)
+            parts.push(`${spaShellCount} un-hydrated SPA shell${spaShellCount === 1 ? "" : "s"}`);
         auditFindings.push({
             ruleId: "audit/skipped-by-policy",
             severity: "info",
@@ -1410,6 +2118,13 @@ export async function auditSource(source, options) {
         for (const f of enrichedFindings) {
             if (!f.pageUrl)
                 continue;
+            // Carried-forward findings are not "current" — we did not re-verify them
+            // this run. Including them would mask a genuine regression on a skipped
+            // URL: prior set has rule X carried-forward, current set also has X
+            // (carried-forward), comparison says "no new rule", we miss the case
+            // where the page actually started failing rule Y too.
+            if (f.carriedForward)
+                continue;
             const set = currentFindings.get(f.pageUrl) ?? new Set();
             set.add(f.ruleId);
             currentFindings.set(f.pageUrl, set);
@@ -1435,6 +2150,12 @@ export async function auditSource(source, options) {
         const renderMode = options.render ? "rendered" : "static";
         const urls = {};
         const findingsByUrl = new Map();
+        // v0.5+: persist full finding records per URL so future monitoring runs
+        // can carry them forward when the URL is skipped pre-fetch. Carried-
+        // forward findings (carriedForward=true) are NOT re-persisted under the
+        // fetched URL — they belong to the prior entry that's preserved verbatim
+        // for skipped URLs above.
+        const fullFindingsByUrl = new Map();
         for (const f of enrichedFindings) {
             if (!f.pageUrl)
                 continue;
@@ -1442,9 +2163,16 @@ export async function auditSource(source, options) {
             if (!list.includes(f.ruleId))
                 list.push(f.ruleId);
             findingsByUrl.set(f.pageUrl, list);
+            if (!f.carriedForward) {
+                const records = fullFindingsByUrl.get(f.pageUrl) ?? [];
+                records.push(f);
+                fullFindingsByUrl.set(f.pageUrl, records);
+            }
         }
-        // Preserve prior entries for URLs skipped by --since (they didn't change).
-        // Without this, delta runs would lose state for unchanged URLs.
+        // Preserve prior entries for URLs the monitoring matrix skipped (we never
+        // fetched them this run; their fetchedAt MUST NOT advance or the age floor
+        // never trips). Skipped URLs include those in scrapePlan.skip plus any
+        // robots-skipped URLs from prior runs that are still in priorState.
         if (priorState && skippedUrls.length > 0) {
             for (const url of skippedUrls) {
                 const prior = priorState.urls[url];
@@ -1452,19 +2180,65 @@ export async function auditSource(source, options) {
                     urls[url] = prior;
             }
         }
+        const nowIso = new Date().toISOString();
         for (const p of loadedPages) {
-            urls[p.url] = {
+            const priorEntry = priorState?.urls[p.url];
+            const responseHeaders = p.httpMeta?.headers;
+            const lastModifiedHeader = responseHeaders?.["last-modified"];
+            const etagHeader = responseHeaders?.["etag"];
+            const sitemapLastmodForUrl = sitemapLastmodByUrl?.get(p.url);
+            const entry = {
                 contentHash: computeContentHash(p.html),
-                fetchedAt: new Date().toISOString(),
+                fetchedAt: nowIso,
                 status: p.httpMeta?.statusCode ?? 200,
                 findingIds: findingsByUrl.get(p.url) ?? [],
+                findings: (fullFindingsByUrl.get(p.url) ?? []).map((f) => ({
+                    id: `${f.ruleId}::${p.url}`,
+                    ruleId: f.ruleId,
+                    severity: f.severity,
+                    confidence: f.confidence ?? "high",
+                    message: f.message,
+                    ...(f.fix !== undefined ? { fix: f.fix } : {}),
+                    ...(f.ref !== undefined ? { ref: f.ref } : {}),
+                    ...(f.docsUrl !== undefined ? { docsUrl: f.docsUrl } : {}),
+                    ...(f.pageUrl !== undefined ? { url: f.pageUrl } : {}),
+                    ...(f.relatedUrls !== undefined ? { relatedUrls: f.relatedUrls } : {}),
+                    ...(f.group !== undefined ? { group: f.group } : {}),
+                    ...(f.similarity !== undefined ? { similarity: f.similarity } : {}),
+                    ...(f.context !== undefined ? { context: f.context } : {}),
+                    ...(f.effort !== undefined ? { effort: f.effort } : {}),
+                })),
+                rulesetVersion: CORE_RULESET_VERSION,
             };
+            if (lastModifiedHeader)
+                entry.lastModified = lastModifiedHeader;
+            else if (priorEntry?.lastModified)
+                entry.lastModified = priorEntry.lastModified;
+            if (etagHeader)
+                entry.etag = etagHeader;
+            else if (priorEntry?.etag)
+                entry.etag = priorEntry.etag;
+            if (sitemapLastmodForUrl)
+                entry.sitemapLastmodAtAudit = sitemapLastmodForUrl;
+            else if (priorEntry?.sitemapLastmodAtAudit)
+                entry.sitemapLastmodAtAudit = priorEntry.sitemapLastmodAtAudit;
+            urls[p.url] = entry;
         }
+        // `lastFullAuditAt` advances only when this run actually re-fetched every
+        // candidate URL. In monitoring mode (matrix skipped some URLs), preserve
+        // the prior baseline timestamp so callers can reason about staleness.
+        // In fresh mode (every candidate URL was fetched), bump to now.
+        const isMonitoringRun = effectiveMode === "monitoring" && priorState !== null;
+        const lastFullAuditAt = isMonitoringRun
+            ? (priorState?.lastFullAuditAt ?? priorState?.lastRun ?? nowIso)
+            : nowIso;
         const newState = {
             version: STATE_SCHEMA_VERSION,
-            lastRun: new Date().toISOString(),
+            lastRun: nowIso,
+            lastFullAuditAt,
             source,
             renderMode,
+            rulesetVersion: CORE_RULESET_VERSION,
             urls,
             summary: {
                 score: summary.risk,