@pseolint/core 0.4.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. package/README.md +264 -169
  2. package/dist/ai/manifest/diff.d.ts +78 -0
  3. package/dist/ai/manifest/diff.d.ts.map +1 -0
  4. package/dist/ai/manifest/diff.js +139 -0
  5. package/dist/ai/manifest/diff.js.map +1 -0
  6. package/dist/ai/manifest/index.d.ts +18 -0
  7. package/dist/ai/manifest/index.d.ts.map +1 -0
  8. package/dist/ai/manifest/index.js +15 -0
  9. package/dist/ai/manifest/index.js.map +1 -0
  10. package/dist/ai/manifest/validate-manifest.d.ts +37 -0
  11. package/dist/ai/manifest/validate-manifest.d.ts.map +1 -0
  12. package/dist/ai/manifest/validate-manifest.js +67 -0
  13. package/dist/ai/manifest/validate-manifest.js.map +1 -0
  14. package/dist/ai/manifest/validators/domain-patches.d.ts +15 -0
  15. package/dist/ai/manifest/validators/domain-patches.d.ts.map +1 -0
  16. package/dist/ai/manifest/validators/domain-patches.js +110 -0
  17. package/dist/ai/manifest/validators/domain-patches.js.map +1 -0
  18. package/dist/ai/manifest/validators/index.d.ts +5 -0
  19. package/dist/ai/manifest/validators/index.d.ts.map +1 -0
  20. package/dist/ai/manifest/validators/index.js +4 -0
  21. package/dist/ai/manifest/validators/index.js.map +1 -0
  22. package/dist/ai/manifest/validators/page-changes.d.ts +36 -0
  23. package/dist/ai/manifest/validators/page-changes.d.ts.map +1 -0
  24. package/dist/ai/manifest/validators/page-changes.js +221 -0
  25. package/dist/ai/manifest/validators/page-changes.js.map +1 -0
  26. package/dist/ai/manifest/validators/types.d.ts +17 -0
  27. package/dist/ai/manifest/validators/types.d.ts.map +1 -0
  28. package/dist/ai/manifest/validators/types.js +5 -0
  29. package/dist/ai/manifest/validators/types.js.map +1 -0
  30. package/dist/ai/orchestrate.d.ts +74 -0
  31. package/dist/ai/orchestrate.d.ts.map +1 -0
  32. package/dist/ai/orchestrate.js +54 -0
  33. package/dist/ai/orchestrate.js.map +1 -0
  34. package/dist/ai/orchestrator/budget.d.ts +57 -0
  35. package/dist/ai/orchestrator/budget.d.ts.map +1 -0
  36. package/dist/ai/orchestrator/budget.js +114 -0
  37. package/dist/ai/orchestrator/budget.js.map +1 -0
  38. package/dist/ai/orchestrator/finish-tool.d.ts +568 -0
  39. package/dist/ai/orchestrator/finish-tool.d.ts.map +1 -0
  40. package/dist/ai/orchestrator/finish-tool.js +114 -0
  41. package/dist/ai/orchestrator/finish-tool.js.map +1 -0
  42. package/dist/ai/orchestrator/index.d.ts +25 -0
  43. package/dist/ai/orchestrator/index.d.ts.map +1 -0
  44. package/dist/ai/orchestrator/index.js +21 -0
  45. package/dist/ai/orchestrator/index.js.map +1 -0
  46. package/dist/ai/orchestrator/log.d.ts +24 -0
  47. package/dist/ai/orchestrator/log.d.ts.map +1 -0
  48. package/dist/ai/orchestrator/log.js +48 -0
  49. package/dist/ai/orchestrator/log.js.map +1 -0
  50. package/dist/ai/orchestrator/page-cache.d.ts +64 -0
  51. package/dist/ai/orchestrator/page-cache.d.ts.map +1 -0
  52. package/dist/ai/orchestrator/page-cache.js +127 -0
  53. package/dist/ai/orchestrator/page-cache.js.map +1 -0
  54. package/dist/ai/orchestrator/prompt.d.ts +16 -0
  55. package/dist/ai/orchestrator/prompt.d.ts.map +1 -0
  56. package/dist/ai/orchestrator/prompt.js +52 -0
  57. package/dist/ai/orchestrator/prompt.js.map +1 -0
  58. package/dist/ai/orchestrator/runner.d.ts +65 -0
  59. package/dist/ai/orchestrator/runner.d.ts.map +1 -0
  60. package/dist/ai/orchestrator/runner.js +223 -0
  61. package/dist/ai/orchestrator/runner.js.map +1 -0
  62. package/dist/ai/orchestrator/session.d.ts +44 -0
  63. package/dist/ai/orchestrator/session.d.ts.map +1 -0
  64. package/dist/ai/orchestrator/session.js +64 -0
  65. package/dist/ai/orchestrator/session.js.map +1 -0
  66. package/dist/ai/orchestrator/types.d.ts +99 -0
  67. package/dist/ai/orchestrator/types.d.ts.map +1 -0
  68. package/dist/ai/orchestrator/types.js +8 -0
  69. package/dist/ai/orchestrator/types.js.map +1 -0
  70. package/dist/ai/probes/cache.d.ts +12 -0
  71. package/dist/ai/probes/cache.d.ts.map +1 -0
  72. package/dist/ai/probes/cache.js +46 -0
  73. package/dist/ai/probes/cache.js.map +1 -0
  74. package/dist/ai/tools/ask-ai-engine.d.ts +77 -0
  75. package/dist/ai/tools/ask-ai-engine.d.ts.map +1 -0
  76. package/dist/ai/tools/ask-ai-engine.js +253 -0
  77. package/dist/ai/tools/ask-ai-engine.js.map +1 -0
  78. package/dist/ai/tools/check-domain-crawler-access.d.ts +71 -0
  79. package/dist/ai/tools/check-domain-crawler-access.d.ts.map +1 -0
  80. package/dist/ai/tools/check-domain-crawler-access.js +76 -0
  81. package/dist/ai/tools/check-domain-crawler-access.js.map +1 -0
  82. package/dist/ai/tools/check-domain-llms-txt.d.ts +70 -0
  83. package/dist/ai/tools/check-domain-llms-txt.d.ts.map +1 -0
  84. package/dist/ai/tools/check-domain-llms-txt.js +75 -0
  85. package/dist/ai/tools/check-domain-llms-txt.js.map +1 -0
  86. package/dist/ai/tools/check-indexability.d.ts +58 -0
  87. package/dist/ai/tools/check-indexability.d.ts.map +1 -0
  88. package/dist/ai/tools/check-indexability.js +64 -0
  89. package/dist/ai/tools/check-indexability.js.map +1 -0
  90. package/dist/ai/tools/check-robots.d.ts +68 -0
  91. package/dist/ai/tools/check-robots.d.ts.map +1 -0
  92. package/dist/ai/tools/check-robots.js +90 -0
  93. package/dist/ai/tools/check-robots.js.map +1 -0
  94. package/dist/ai/tools/check-rule-answer-first.d.ts +54 -0
  95. package/dist/ai/tools/check-rule-answer-first.d.ts.map +1 -0
  96. package/dist/ai/tools/check-rule-answer-first.js +50 -0
  97. package/dist/ai/tools/check-rule-answer-first.js.map +1 -0
  98. package/dist/ai/tools/check-rule-canonical-consistency.d.ts +66 -0
  99. package/dist/ai/tools/check-rule-canonical-consistency.d.ts.map +1 -0
  100. package/dist/ai/tools/check-rule-canonical-consistency.js +51 -0
  101. package/dist/ai/tools/check-rule-canonical-consistency.js.map +1 -0
  102. package/dist/ai/tools/check-rule-citable-facts.d.ts +58 -0
  103. package/dist/ai/tools/check-rule-citable-facts.d.ts.map +1 -0
  104. package/dist/ai/tools/check-rule-citable-facts.js +41 -0
  105. package/dist/ai/tools/check-rule-citable-facts.js.map +1 -0
  106. package/dist/ai/tools/check-rule-content-modularity.d.ts +58 -0
  107. package/dist/ai/tools/check-rule-content-modularity.d.ts.map +1 -0
  108. package/dist/ai/tools/check-rule-content-modularity.js +45 -0
  109. package/dist/ai/tools/check-rule-content-modularity.js.map +1 -0
  110. package/dist/ai/tools/check-rule-faq-coverage.d.ts +54 -0
  111. package/dist/ai/tools/check-rule-faq-coverage.d.ts.map +1 -0
  112. package/dist/ai/tools/check-rule-faq-coverage.js +39 -0
  113. package/dist/ai/tools/check-rule-faq-coverage.js.map +1 -0
  114. package/dist/ai/tools/check-rule-freshness-signals.d.ts +54 -0
  115. package/dist/ai/tools/check-rule-freshness-signals.d.ts.map +1 -0
  116. package/dist/ai/tools/check-rule-freshness-signals.js +45 -0
  117. package/dist/ai/tools/check-rule-freshness-signals.js.map +1 -0
  118. package/dist/ai/tools/check-rule-json-ld-valid.d.ts +54 -0
  119. package/dist/ai/tools/check-rule-json-ld-valid.d.ts.map +1 -0
  120. package/dist/ai/tools/check-rule-json-ld-valid.js +44 -0
  121. package/dist/ai/tools/check-rule-json-ld-valid.js.map +1 -0
  122. package/dist/ai/tools/check-rule-missing-author.d.ts +54 -0
  123. package/dist/ai/tools/check-rule-missing-author.d.ts.map +1 -0
  124. package/dist/ai/tools/check-rule-missing-author.js +45 -0
  125. package/dist/ai/tools/check-rule-missing-author.js.map +1 -0
  126. package/dist/ai/tools/check-rule-near-duplicate.d.ts +82 -0
  127. package/dist/ai/tools/check-rule-near-duplicate.d.ts.map +1 -0
  128. package/dist/ai/tools/check-rule-near-duplicate.js +63 -0
  129. package/dist/ai/tools/check-rule-near-duplicate.js.map +1 -0
  130. package/dist/ai/tools/check-rule-required-fields.d.ts +50 -0
  131. package/dist/ai/tools/check-rule-required-fields.d.ts.map +1 -0
  132. package/dist/ai/tools/check-rule-required-fields.js +38 -0
  133. package/dist/ai/tools/check-rule-required-fields.js.map +1 -0
  134. package/dist/ai/tools/check-rule-schema-consistency.d.ts +54 -0
  135. package/dist/ai/tools/check-rule-schema-consistency.d.ts.map +1 -0
  136. package/dist/ai/tools/check-rule-schema-consistency.js +44 -0
  137. package/dist/ai/tools/check-rule-schema-consistency.js.map +1 -0
  138. package/dist/ai/tools/check-rule-summary-bait.d.ts +54 -0
  139. package/dist/ai/tools/check-rule-summary-bait.d.ts.map +1 -0
  140. package/dist/ai/tools/check-rule-summary-bait.js +39 -0
  141. package/dist/ai/tools/check-rule-summary-bait.js.map +1 -0
  142. package/dist/ai/tools/check-rule-thin-content.d.ts +66 -0
  143. package/dist/ai/tools/check-rule-thin-content.d.ts.map +1 -0
  144. package/dist/ai/tools/check-rule-thin-content.js +58 -0
  145. package/dist/ai/tools/check-rule-thin-content.js.map +1 -0
  146. package/dist/ai/tools/detect-templates.d.ts +60 -0
  147. package/dist/ai/tools/detect-templates.d.ts.map +1 -0
  148. package/dist/ai/tools/detect-templates.js +43 -0
  149. package/dist/ai/tools/detect-templates.js.map +1 -0
  150. package/dist/ai/tools/fetch-page.d.ts +70 -0
  151. package/dist/ai/tools/fetch-page.d.ts.map +1 -0
  152. package/dist/ai/tools/fetch-page.js +93 -0
  153. package/dist/ai/tools/fetch-page.js.map +1 -0
  154. package/dist/ai/tools/fetch-sitemap.d.ts +60 -0
  155. package/dist/ai/tools/fetch-sitemap.d.ts.map +1 -0
  156. package/dist/ai/tools/fetch-sitemap.js +116 -0
  157. package/dist/ai/tools/fetch-sitemap.js.map +1 -0
  158. package/dist/ai/tools/index.d.ts +1555 -0
  159. package/dist/ai/tools/index.d.ts.map +1 -0
  160. package/dist/ai/tools/index.js +119 -0
  161. package/dist/ai/tools/index.js.map +1 -0
  162. package/dist/ai/tools/parse-page.d.ts +94 -0
  163. package/dist/ai/tools/parse-page.d.ts.map +1 -0
  164. package/dist/ai/tools/parse-page.js +108 -0
  165. package/dist/ai/tools/parse-page.js.map +1 -0
  166. package/dist/ai/tools/query-serp.d.ts +113 -0
  167. package/dist/ai/tools/query-serp.d.ts.map +1 -0
  168. package/dist/ai/tools/query-serp.js +131 -0
  169. package/dist/ai/tools/query-serp.js.map +1 -0
  170. package/dist/ai/tools/sample-template.d.ts +67 -0
  171. package/dist/ai/tools/sample-template.d.ts.map +1 -0
  172. package/dist/ai/tools/sample-template.js +75 -0
  173. package/dist/ai/tools/sample-template.js.map +1 -0
  174. package/dist/ai/tools/types.d.ts +73 -0
  175. package/dist/ai/tools/types.d.ts.map +1 -0
  176. package/dist/ai/tools/types.js +64 -0
  177. package/dist/ai/tools/types.js.map +1 -0
  178. package/dist/ai/tools/validate-jsonld.d.ts +62 -0
  179. package/dist/ai/tools/validate-jsonld.d.ts.map +1 -0
  180. package/dist/ai/tools/validate-jsonld.js +84 -0
  181. package/dist/ai/tools/validate-jsonld.js.map +1 -0
  182. package/dist/auditor.d.ts +16 -1
  183. package/dist/auditor.d.ts.map +1 -1
  184. package/dist/auditor.js +862 -88
  185. package/dist/auditor.js.map +1 -1
  186. package/dist/backpressure.d.ts.map +1 -1
  187. package/dist/backpressure.js +10 -3
  188. package/dist/backpressure.js.map +1 -1
  189. package/dist/enrich-findings.d.ts.map +1 -1
  190. package/dist/enrich-findings.js +15 -1
  191. package/dist/enrich-findings.js.map +1 -1
  192. package/dist/formatters/bucket-findings.d.ts +43 -0
  193. package/dist/formatters/bucket-findings.d.ts.map +1 -0
  194. package/dist/formatters/bucket-findings.js +110 -0
  195. package/dist/formatters/bucket-findings.js.map +1 -0
  196. package/dist/formatters/console.d.ts.map +1 -1
  197. package/dist/formatters/console.js +116 -34
  198. package/dist/formatters/console.js.map +1 -1
  199. package/dist/formatters/fixplan.d.ts +13 -0
  200. package/dist/formatters/fixplan.d.ts.map +1 -0
  201. package/dist/formatters/fixplan.js +328 -0
  202. package/dist/formatters/fixplan.js.map +1 -0
  203. package/dist/formatters/html.d.ts.map +1 -1
  204. package/dist/formatters/html.js +27 -0
  205. package/dist/formatters/html.js.map +1 -1
  206. package/dist/formatters/index.d.ts +2 -0
  207. package/dist/formatters/index.d.ts.map +1 -1
  208. package/dist/formatters/index.js +1 -0
  209. package/dist/formatters/index.js.map +1 -1
  210. package/dist/formatters/markdown.d.ts.map +1 -1
  211. package/dist/formatters/markdown.js +97 -9
  212. package/dist/formatters/markdown.js.map +1 -1
  213. package/dist/index.d.ts +12 -1
  214. package/dist/index.d.ts.map +1 -1
  215. package/dist/index.js +8 -0
  216. package/dist/index.js.map +1 -1
  217. package/dist/page-filter.d.ts +64 -6
  218. package/dist/page-filter.d.ts.map +1 -1
  219. package/dist/page-filter.js +124 -3
  220. package/dist/page-filter.js.map +1 -1
  221. package/dist/rule-references.d.ts.map +1 -1
  222. package/dist/rule-references.js +5 -0
  223. package/dist/rule-references.js.map +1 -1
  224. package/dist/rules/aeo/answer-first.d.ts.map +1 -1
  225. package/dist/rules/aeo/answer-first.js +17 -3
  226. package/dist/rules/aeo/answer-first.js.map +1 -1
  227. package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
  228. package/dist/rules/aeo/citable-facts.js +12 -1
  229. package/dist/rules/aeo/citable-facts.js.map +1 -1
  230. package/dist/rules/aeo/content-modularity.d.ts.map +1 -1
  231. package/dist/rules/aeo/content-modularity.js +3 -0
  232. package/dist/rules/aeo/content-modularity.js.map +1 -1
  233. package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
  234. package/dist/rules/aeo/crawler-access.js +6 -0
  235. package/dist/rules/aeo/crawler-access.js.map +1 -1
  236. package/dist/rules/aeo/faq-coverage.d.ts.map +1 -1
  237. package/dist/rules/aeo/faq-coverage.js +4 -0
  238. package/dist/rules/aeo/faq-coverage.js.map +1 -1
  239. package/dist/rules/aeo/freshness-signals.d.ts.map +1 -1
  240. package/dist/rules/aeo/freshness-signals.js +9 -2
  241. package/dist/rules/aeo/freshness-signals.js.map +1 -1
  242. package/dist/rules/aeo/llms-txt.d.ts.map +1 -1
  243. package/dist/rules/aeo/llms-txt.js +6 -1
  244. package/dist/rules/aeo/llms-txt.js.map +1 -1
  245. package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
  246. package/dist/rules/aeo/summary-bait.js +5 -2
  247. package/dist/rules/aeo/summary-bait.js.map +1 -1
  248. package/dist/rules/content/heading-structure.d.ts +21 -0
  249. package/dist/rules/content/heading-structure.d.ts.map +1 -0
  250. package/dist/rules/content/heading-structure.js +56 -0
  251. package/dist/rules/content/heading-structure.js.map +1 -0
  252. package/dist/rules/content/image-alt-text.d.ts +18 -0
  253. package/dist/rules/content/image-alt-text.d.ts.map +1 -0
  254. package/dist/rules/content/image-alt-text.js +77 -0
  255. package/dist/rules/content/image-alt-text.js.map +1 -0
  256. package/dist/rules/content/missing-author.d.ts.map +1 -1
  257. package/dist/rules/content/missing-author.js +10 -2
  258. package/dist/rules/content/missing-author.js.map +1 -1
  259. package/dist/rules/content/title-uniqueness.d.ts +18 -0
  260. package/dist/rules/content/title-uniqueness.d.ts.map +1 -0
  261. package/dist/rules/content/title-uniqueness.js +70 -0
  262. package/dist/rules/content/title-uniqueness.js.map +1 -0
  263. package/dist/rules/links/host-section-divergence.d.ts +3 -0
  264. package/dist/rules/links/host-section-divergence.d.ts.map +1 -0
  265. package/dist/rules/links/host-section-divergence.js +158 -0
  266. package/dist/rules/links/host-section-divergence.js.map +1 -0
  267. package/dist/rules/links/link-depth.d.ts +12 -1
  268. package/dist/rules/links/link-depth.d.ts.map +1 -1
  269. package/dist/rules/links/link-depth.js +25 -12
  270. package/dist/rules/links/link-depth.js.map +1 -1
  271. package/dist/rules/scope.d.ts.map +1 -1
  272. package/dist/rules/scope.js +5 -0
  273. package/dist/rules/scope.js.map +1 -1
  274. package/dist/rules/spam/doorway-pattern.d.ts.map +1 -1
  275. package/dist/rules/spam/doorway-pattern.js +27 -4
  276. package/dist/rules/spam/doorway-pattern.js.map +1 -1
  277. package/dist/rules/spam/publication-velocity.d.ts +1 -1
  278. package/dist/rules/spam/publication-velocity.d.ts.map +1 -1
  279. package/dist/rules/spam/publication-velocity.js +9 -4
  280. package/dist/rules/spam/publication-velocity.js.map +1 -1
  281. package/dist/rules/spam/template-coverage.js +1 -1
  282. package/dist/rules/spam/template-coverage.js.map +1 -1
  283. package/dist/rules/spam/template-diversity.js +1 -1
  284. package/dist/rules/spam/template-diversity.js.map +1 -1
  285. package/dist/rules/spam/thin-content.d.ts.map +1 -1
  286. package/dist/rules/spam/thin-content.js +9 -1
  287. package/dist/rules/spam/thin-content.js.map +1 -1
  288. package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -1
  289. package/dist/rules/tech/hreflang-consistency.js +33 -4
  290. package/dist/rules/tech/hreflang-consistency.js.map +1 -1
  291. package/dist/rules/tech/og-completeness.d.ts +11 -0
  292. package/dist/rules/tech/og-completeness.d.ts.map +1 -1
  293. package/dist/rules/tech/og-completeness.js +22 -23
  294. package/dist/rules/tech/og-completeness.js.map +1 -1
  295. package/dist/ruleset-version.d.ts +8 -0
  296. package/dist/ruleset-version.d.ts.map +1 -0
  297. package/dist/ruleset-version.js +8 -0
  298. package/dist/ruleset-version.js.map +1 -0
  299. package/dist/scrape-strategy.d.ts +42 -0
  300. package/dist/scrape-strategy.d.ts.map +1 -0
  301. package/dist/scrape-strategy.js +101 -0
  302. package/dist/scrape-strategy.js.map +1 -0
  303. package/dist/site-classifier.d.ts +1 -1
  304. package/dist/site-classifier.d.ts.map +1 -1
  305. package/dist/site-classifier.js +217 -0
  306. package/dist/site-classifier.js.map +1 -1
  307. package/dist/state.d.ts +36 -1
  308. package/dist/state.d.ts.map +1 -1
  309. package/dist/state.js +3 -1
  310. package/dist/state.js.map +1 -1
  311. package/dist/stratified-sample.d.ts +9 -1
  312. package/dist/stratified-sample.d.ts.map +1 -1
  313. package/dist/stratified-sample.js +23 -6
  314. package/dist/stratified-sample.js.map +1 -1
  315. package/dist/types.d.ts +179 -2
  316. package/dist/types.d.ts.map +1 -1
  317. package/dist/types.js.map +1 -1
  318. package/dist/url-normalize.d.ts.map +1 -1
  319. package/dist/url-normalize.js +13 -1
  320. package/dist/url-normalize.js.map +1 -1
  321. package/package.json +90 -90
package/dist/auditor.js CHANGED
@@ -18,7 +18,12 @@ import { thinContentRule } from "./rules/spam/thin-content.js";
18
18
  import { deadEndsRule } from "./rules/links/dead-ends.js";
19
19
  import { linkDepthRule } from "./rules/links/link-depth.js";
20
20
  import { clusterConnectivityRule } from "./rules/links/cluster-connectivity.js";
21
+ import { hostSectionDivergenceRule } from "./rules/links/host-section-divergence.js";
21
22
  import { orphanPagesRule } from "./rules/links/orphan-pages.js";
23
+ import { ogCompletenessRule } from "./rules/tech/og-completeness.js";
24
+ import { titleUniquenessRule } from "./rules/content/title-uniqueness.js";
25
+ import { headingStructureRule } from "./rules/content/heading-structure.js";
26
+ import { imageAltTextRule } from "./rules/content/image-alt-text.js";
22
27
  import { canonicalConsistencyRule } from "./rules/tech/canonical-consistency.js";
23
28
  import { canonicalNoindexConflictRule } from "./rules/tech/canonical-noindex-conflict.js";
24
29
  import { hreflangConsistencyRule } from "./rules/tech/hreflang-consistency.js";
@@ -55,14 +60,17 @@ import { SSRFError, validateTargetHost } from "./ssrf-guard.js";
55
60
  import { SAFE_MODE_PRESETS, resolveSafeModeKey } from "./safe-mode-preset.js";
56
61
  import { FetchObserver, computeReadiness, detectDevServer } from "./fetch-observer.js";
57
62
  import { BackpressureMonitor, OriginDegradedError } from "./backpressure.js";
58
- import { stratifiedSample } from "./stratified-sample.js";
63
+ import { stratifiedSample, mulberry32 } from "./stratified-sample.js";
59
64
  import { classifySite } from "./site-classifier.js";
60
65
  import { readState, writeState, computeContentHash, STATE_SCHEMA_VERSION, } from "./state.js";
66
+ import { CORE_RULESET_VERSION } from "./ruleset-version.js";
67
+ import { planScrapeStrategy, DEFAULT_AGE_FLOOR_DAYS } from "./scrape-strategy.js";
61
68
  const DEFAULTS = {
62
69
  nearDuplicateThreshold: 0.85,
63
70
  entitySwapThreshold: 0.95,
64
71
  thinContentMinWords: 300,
65
72
  publicationVelocityMaxPerDay: 100,
73
+ publicationVelocityMaxPerDayCorpusFraction: 0.10,
66
74
  boilerplateMaxRatio: 0.7,
67
75
  templateDiversityMinUniqueRatio: 0.35,
68
76
  uniqueValueMinWords: 100,
@@ -77,17 +85,6 @@ const DEFAULTS = {
77
85
  modularityMinSelfContainedRatio: 0.7,
78
86
  faqMinQuestionHeadings: 2
79
87
  };
80
- /**
81
- * v0.4 four-category weights. Audit is diagnostic-only (weight 0).
82
- * See 2026-04-29 v0.4 redesign spec §4.2.
83
- */
84
- const CATEGORY_WEIGHTS = {
85
- integrity: 0.50, // spam + content + cannibal
86
- discoverability: 0.20, // links + tech
87
- citation: 0.25, // aeo + schema
88
- data: 0.05, // data
89
- audit: 0, // diagnostics, never weighted
90
- };
91
88
  /**
92
89
  * Maps the v0.3 ruleId namespace prefix to the v0.4 four-bucket category.
93
90
  * Used by `scoreFromFindings` to bucket findings without changing rule IDs.
@@ -103,6 +100,331 @@ const CATEGORY_MAP = {
103
100
  data: "data",
104
101
  audit: "audit",
105
102
  };
103
+ const SCORING_PROFILES = {
104
+ "small-marketing": {
105
+ categoryWeights: { integrity: 0.30, discoverability: 0.40, citation: 0.20, data: 0.05, audit: 0 },
106
+ severityOverrides: {
107
+ "aeo/citable-facts": "info",
108
+ "aeo/answer-first": "info",
109
+ "aeo/summary-bait": "warning",
110
+ // 2026-05-03 calibration round 5: Segment integrations had 24 thin
111
+ // pages (200-300 words is correct for a catalog record). thin-content
112
+ // contributing capped 40 impact pushed integrity to its 100 cap → 30
113
+ // contribution at small-marketing weight, which alone tripped
114
+ // 'concerning'. Demoting to info keeps the signal visible without
115
+ // tanking the verdict on catalog-shape sites mis-classified as
116
+ // small-marketing. Real marketing sites (linear.app etc) don't
117
+ // normally have many sub-300-word pages so this won't hide quality
118
+ // issues there.
119
+ "spam/thin-content": "info",
120
+ "aeo/freshness-signals": "info",
121
+ "content/missing-author": "info",
122
+ // 2026-05-03 calibration round 3: Segment integrations classified as
123
+ // small-marketing@0.88 and tripped doorway-pattern 300× critical
124
+ // (catalog records are thin + entity-swap by design — not actually a
125
+ // doorway funnel). The classifier mistakes catalog directories as
126
+ // small-marketing; this demotion absorbs that mis-classification
127
+ // without weakening detection on actual small-marketing sites
128
+ // (linear.app, supabase.com — none of which produce entity-swap pairs).
129
+ "spam/doorway-pattern": "warning",
130
+ // 2026-05-03 calibration round 4: spam/boilerplate-ratio fired ERROR
131
+ // on Segment's integration directory (24 pages, 60%+ shared template
132
+ // chrome). On a marketing-template site the rule is correct — repeated
133
+ // "About us" / "Pricing" copy across pages IS a quality issue. On a
134
+ // catalog mis-classified to small-marketing, the shared chrome IS the
135
+ // template — by design. Demote to warning here; real marketing sites
136
+ // (linear.app, supabase.com) won't trip it because their corpus is
137
+ // page-diverse, but catalog-shape pages classified as small-marketing
138
+ // (Segment, Wise) won't tank the verdict.
139
+ "spam/boilerplate-ratio": "warning",
140
+ // 2026-05-03 v0.5.2 round 10: og-completeness, heading-structure,
141
+ // image-alt-text were added as new rules and tipped Segment from
142
+ // concerning → critical because catalog/template-driven sites
143
+ // commonly have shared OG defaults, weird H1 patterns (multiple H1s
144
+ // for repeated nav cards), and unlabelled logo grids. These are
145
+ // real findings on isolated sites but typical for catalog shape;
146
+ // demote to info here so the signal stays visible without driving
147
+ // the verdict.
148
+ "tech/og-completeness": "info",
149
+ "content/heading-structure": "info",
150
+ "content/image-alt-text": "info",
151
+ },
152
+ confidenceOverrides: {
153
+ "aeo/citable-facts": "low",
154
+ "aeo/answer-first": "low",
155
+ "aeo/summary-bait": "medium",
156
+ "spam/thin-content": "low",
157
+ "aeo/freshness-signals": "low",
158
+ "content/missing-author": "low",
159
+ "spam/doorway-pattern": "medium",
160
+ "spam/boilerplate-ratio": "medium",
161
+ "tech/og-completeness": "low",
162
+ "content/heading-structure": "low",
163
+ "content/image-alt-text": "low",
164
+ },
165
+ },
166
+ "blog": {
167
+ categoryWeights: { integrity: 0.40, discoverability: 0.25, citation: 0.30, data: 0.05, audit: 0 },
168
+ severityOverrides: {
169
+ "content/missing-author": "error",
170
+ "spam/thin-content": "error",
171
+ },
172
+ confidenceOverrides: {},
173
+ },
174
+ "programmatic-directory": {
175
+ categoryWeights: { integrity: 0.55, discoverability: 0.15, citation: 0.20, data: 0.10, audit: 0 },
176
+ // Symmetry argument: every other profile has severity overrides for the
177
+ // rules that mis-fit its shape (`docs` demotes AEO + author rules,
178
+ // `ecommerce` demotes `aeo/citable-facts`, `small-marketing` demotes 4
179
+ // rules). `programmatic-directory` is the site type *most* structurally
180
+ // different from the "page = article" assumptions the AEO and EEAT rules
181
+ // are calibrated against — yet was the only profile with no overrides.
182
+ //
183
+ // Pre-calibration adjustment: demote (never escalate) the rules that
184
+ // first-principles analysis predicts will false-positive on catalog-
185
+ // shaped sites (Zapier integrations, G2 categories, Wise currency pairs,
186
+ // etc.). A reputable-pSEO calibration corpus + runner has been added
187
+ // (scripts/calibration-reputable-pseo.ts); these overrides will be
188
+ // tightened or loosened based on actual fire-rates measured against
189
+ // sites that demonstrably win in production. See
190
+ // docs/superpowers/specs/2026-05-03-calibration-against-reputable-pseo.md.
191
+ severityOverrides: {
192
+ // Catalog pages are tables, not prose. AEO rules calibrated on
193
+ // editorial content over-fire here.
194
+ "aeo/citable-facts": "info",
195
+ "aeo/answer-first": "info",
196
+ "aeo/content-modularity": "info",
197
+ // 2026-05-03 calibration: freshness-signals fired on every page of
198
+ // every reputable pSEO site. Catalog freshness is expressed via the
199
+ // data (live currency rates, current job listings, current pricing),
200
+ // not via visible "last updated" stamps. Demote.
201
+ "aeo/freshness-signals": "info",
202
+ // Authorship lives at the platform level (operator's about page),
203
+ // not on every catalog record. Following the rule's "add a byline"
204
+ // fix on a Zillow listing would actively make the page worse.
205
+ "content/missing-author": "info",
206
+ "content/eeat-signals": "info",
207
+ // Template uniformity is correct for catalogs by design. Keep the
208
+ // signal but cap at warning — never error.
209
+ "spam/template-diversity": "warning",
210
+ // 2026-05-03 v0.5.2 round 10: same catalog logic as small-marketing.
211
+ "tech/og-completeness": "info",
212
+ "content/heading-structure": "info",
213
+ "content/image-alt-text": "info",
214
+ // 2026-05-03 calibration round 2: catalogs are near-duplicate by
215
+ // design. spam/near-duplicate fires CRITICAL on every catalog pair.
216
+ // Demote to warning — keeps the signal visible without dominating
217
+ // the score.
218
+ "spam/near-duplicate": "warning",
219
+ // 2026-05-03 calibration round 5: catalog records are by-design
220
+ // shorter than the 300-word default. Demote to info on programmatic-
221
+ // directory; the data IS the content.
222
+ "spam/thin-content": "info",
223
+ // 2026-05-03 calibration round 2: doorway-pattern fires CRITICAL on
224
+ // every (thin + entity-swap) pair. On Segment integrations, integration
225
+ // pages are thin (200-300 words is the right amount for a directory
226
+ // record) and entity-swap (slack/google-sheets, slack/airtable, …) by
227
+ // design. The composite signal is genuinely true but the *intent*
228
+ // (doorway funnel) doesn't match the reality (catalog record).
229
+ // Demoting to warning preserves the signal without tanking the score.
230
+ "spam/doorway-pattern": "warning",
231
+ // 2026-05-03 calibration round 4: catalog pages share template chrome
232
+ // by design — same as `spam/template-diversity`, this signal is
233
+ // structurally true on programmatic-directories.
234
+ "spam/boilerplate-ratio": "warning",
235
+ },
236
+ confidenceOverrides: {
237
+ "aeo/citable-facts": "low",
238
+ "aeo/answer-first": "low",
239
+ "aeo/content-modularity": "low",
240
+ "aeo/freshness-signals": "low",
241
+ "content/missing-author": "low",
242
+ "content/eeat-signals": "low",
243
+ "spam/template-diversity": "medium",
244
+ "spam/near-duplicate": "medium",
245
+ "spam/doorway-pattern": "medium",
246
+ "spam/boilerplate-ratio": "medium",
247
+ "spam/thin-content": "low",
248
+ "tech/og-completeness": "low",
249
+ "content/heading-structure": "low",
250
+ "content/image-alt-text": "low",
251
+ },
252
+ },
253
+ "ecommerce": {
254
+ categoryWeights: { integrity: 0.20, discoverability: 0.40, citation: 0.15, data: 0.25, audit: 0 },
255
+ severityOverrides: {
256
+ "aeo/citable-facts": "info",
257
+ "schema/required-fields": "error",
258
+ },
259
+ confidenceOverrides: {
260
+ "aeo/citable-facts": "low",
261
+ },
262
+ },
263
+ "docs": {
264
+ categoryWeights: { integrity: 0.30, discoverability: 0.30, citation: 0.30, data: 0.10, audit: 0 },
265
+ severityOverrides: {
266
+ "aeo/citable-facts": "info",
267
+ "aeo/answer-first": "warning",
268
+ "content/missing-author": "info",
269
+ },
270
+ confidenceOverrides: {
271
+ "aeo/citable-facts": "low",
272
+ "aeo/answer-first": "low",
273
+ "content/missing-author": "low",
274
+ },
275
+ },
276
+ "unclear": {
277
+ categoryWeights: { integrity: 0.50, discoverability: 0.20, citation: 0.25, data: 0.05, audit: 0 },
278
+ // 2026-05-03 calibration round 2: the original "stay strict when unsure"
279
+ // intent meant that 4 of 5 reputable pSEO sites that classified as
280
+ // unclear (Zapier integrations, Typeform templates, Jasper templates,
281
+ // Numbeo cost-of-living) failed their verdict ceiling. The dominant
282
+ // driver was always `aeo/citable-facts` at full error severity — but
283
+ // catalog/template-gallery pages don't have prose, so the rule fires
284
+ // for a STRUCTURAL reason (page is a table, not a paragraph), not a
285
+ // QUALITY reason. Demoting the structurally-incompatible rules to
286
+ // info on `unclear` is conservative:
287
+ // - if site is genuinely editorial and got mis-classified, signals
288
+ // still surface (just info, not error) — author can act on them.
289
+ // - if site is catalog and got mis-classified to unclear, verdict
290
+ // no longer falsely tanks.
291
+ // Real spam signals (near-dup, doorway, thin) keep their severity.
292
+ severityOverrides: {
293
+ "aeo/citable-facts": "info",
294
+ "aeo/answer-first": "info",
295
+ "aeo/content-modularity": "info",
296
+ "aeo/freshness-signals": "info",
297
+ "content/missing-author": "info",
298
+ "content/eeat-signals": "info",
299
+ // 2026-05-03 calibration round 3: Airbyte classified as unclear@0.5
300
+ // and scored concerning despite all info-severity findings in the
301
+ // top 5. The 8 critical "blockers" came from spam/near-duplicate,
302
+ // spam/entity-swap, spam/doorway-pattern firing 1-2× each on its
303
+ // connectors directory — invisible per-rule but cumulatively pushing
304
+ // the score over 'caution'. On unclear sites we cannot tell whether
305
+ // these triple-fires represent a real doorway or a catalog; the
306
+ // calibration corpus shows reputable catalogs hitting them more
307
+ // often than real doorways do. Demote to warning — keeps the signal
308
+ // visible (it appears in shouldFix bucket, with full message) without
309
+ // tanking the verdict on a structurally-ambiguous site.
310
+ "spam/near-duplicate": "warning",
311
+ "spam/entity-swap": "warning",
312
+ "spam/doorway-pattern": "warning",
313
+ // 2026-05-03 calibration round 4: same boilerplate logic on unclear —
314
+ // we can't tell whether the site is a marketing site (boilerplate IS
315
+ // a quality issue) or a catalog (it isn't), so demote conservatively.
316
+ "spam/boilerplate-ratio": "warning",
317
+ // 2026-05-03 calibration round 5: same thin-content logic on unclear.
318
+ // Catalog-shape sites that classify as unclear (Zapier, Typeform,
319
+ // Jasper) had thin-content firing at error on the 5-15% of pages
320
+ // shorter than the 300-word default. Demote to info — surfaces the
321
+ // signal without driving the verdict on a structurally-ambiguous site.
322
+ "spam/thin-content": "info",
323
+ // 2026-05-03 v0.5.2 round 10: same demotions as programmatic-
324
+ // directory profile — these tipped Webflow/Zapier/Numbeo/Airbyte
325
+ // back into concerning territory because they classify as unclear
326
+ // and the new rules aren't yet calibrated for catalog shape.
327
+ "tech/og-completeness": "info",
328
+ "content/heading-structure": "info",
329
+ "content/image-alt-text": "info",
330
+ },
331
+ confidenceOverrides: {
332
+ "aeo/citable-facts": "low",
333
+ "aeo/answer-first": "low",
334
+ "aeo/content-modularity": "low",
335
+ "aeo/freshness-signals": "low",
336
+ "content/missing-author": "low",
337
+ "content/eeat-signals": "low",
338
+ "spam/near-duplicate": "medium",
339
+ "spam/entity-swap": "medium",
340
+ "spam/doorway-pattern": "medium",
341
+ "spam/boilerplate-ratio": "medium",
342
+ "spam/thin-content": "low",
343
+ "tech/og-completeness": "low",
344
+ "content/heading-structure": "low",
345
+ "content/image-alt-text": "low",
346
+ },
347
+ },
348
+ };
349
+ /**
350
+ * Pick the scoring profile for a classification. Falls back to `unclear`
351
+ * (the conservative default) when classifier confidence is below 70%.
352
+ */
353
+ function profileFor(classification) {
354
+ if (!classification || classification.confidence < 0.7)
355
+ return SCORING_PROFILES.unclear;
356
+ return SCORING_PROFILES[classification.type] ?? SCORING_PROFILES.unclear;
357
+ }
358
+ const RULE_IMPACTS = {
359
+ // SpamBrain — high baseline, count amplifies (cluster matters)
360
+ "spam/near-duplicate": { baseImpact: 25, perInstance: 5, maxImpact: 80 },
361
+ "spam/entity-swap": { baseImpact: 25, perInstance: 5, maxImpact: 80 },
362
+ "spam/doorway-pattern": { baseImpact: 30, perInstance: 0, maxImpact: 30 },
363
+ "spam/template-coverage": { baseImpact: 15, perInstance: 3, maxImpact: 60 },
364
+ "spam/template-diversity": { baseImpact: 12, perInstance: 3, maxImpact: 50 },
365
+ "spam/boilerplate-ratio": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
366
+ "spam/thin-content": { baseImpact: 8, perInstance: 2, maxImpact: 40 },
367
+ "spam/publication-velocity": { baseImpact: 8, perInstance: 2, maxImpact: 30 },
368
+ "cannibal/url-pattern": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
369
+ // Content
370
+ "content/unique-value": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
371
+ "content/meta-uniqueness": { baseImpact: 8, perInstance: 2, maxImpact: 40 },
372
+ "content/missing-author": { baseImpact: 4, perInstance: 1, maxImpact: 20 },
373
+ "content/eeat-signals": { baseImpact: 4, perInstance: 1, maxImpact: 20 },
374
+ // 2026-05-03 v0.5.2 blind-spot fixes
375
+ "content/title-uniqueness": { baseImpact: 8, perInstance: 2, maxImpact: 25 }, // 2026-05-03 round 11: title is high-impact but the original 50-cap was disproportionate to other content rules and tipped Typeform into critical on a 6-finding cluster. Keep the rule at native error severity (duplicate titles ARE real bugs); just don't let one rule dominate the integrity bucket.
376
+ "content/heading-structure": { baseImpact: 5, perInstance: 1, maxImpact: 20 },
377
+ "content/image-alt-text": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
378
+ // Tech — softened in v0.4.3-rc2 after dogfood showed nextjs.org regressing
379
+ // from ready→caution on tech/canonical-consistency × 4 (legit cross-domain
380
+ // canonicals on a CDN). Per-instance now 1 (was 3).
381
+ "tech/canonical-consistency": { baseImpact: 8, perInstance: 1, maxImpact: 25 },
382
+ "tech/canonical-noindex-conflict": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
383
+ "tech/robots-noindex-conflict": { baseImpact: 10, perInstance: 2, maxImpact: 40 },
384
+ "tech/redirect-chain": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
385
+ "tech/sitemap-completeness": { baseImpact: 8, perInstance: 1, maxImpact: 30 },
386
+ "tech/robots-sitemap-presence": { baseImpact: 8, perInstance: 0, maxImpact: 8 },
387
+ "tech/soft-404": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
388
+ // hreflang — one bad declaration breaks all language pairs, so the COUNT
389
+ // doesn't compound. perInstance: 0 keeps it at the base impact regardless
390
+ // of how many language pairs are affected. Dogfood showed 350 findings on
391
+ // stripe.com from a single missing reciprocal pair — that should not be
392
+ // treated as 350× the impact.
393
+ "tech/hreflang-consistency": { baseImpact: 5, perInstance: 0, maxImpact: 5 },
394
+ "tech/og-completeness": { baseImpact: 4, perInstance: 1, maxImpact: 20 },
395
+ // Links
396
+ "links/orphan-pages": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
397
+ "links/dead-ends": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
398
+ "links/cluster-connectivity": { baseImpact: 5, perInstance: 1, maxImpact: 25 },
399
+ "links/link-depth": { baseImpact: 3, perInstance: 1, maxImpact: 20 },
400
+ // AEO — much lower baselines than spam (AEO is opt-in optimization)
401
+ "aeo/citable-facts": { baseImpact: 2, perInstance: 1, maxImpact: 25 },
402
+ "aeo/answer-first": { baseImpact: 3, perInstance: 1, maxImpact: 25 },
403
+ "aeo/summary-bait": { baseImpact: 4, perInstance: 1, maxImpact: 25 },
404
+ "aeo/crawler-access": { baseImpact: 8, perInstance: 0, maxImpact: 8 },
405
+ "aeo/freshness-signals": { baseImpact: 2, perInstance: 1, maxImpact: 20 },
406
+ "aeo/llms-txt": { baseImpact: 4, perInstance: 0, maxImpact: 4 },
407
+ "aeo/faq-coverage": { baseImpact: 2, perInstance: 1, maxImpact: 15 },
408
+ "aeo/content-modularity": { baseImpact: 2, perInstance: 1, maxImpact: 15 },
409
+ // Schema
410
+ "schema/json-ld-valid": { baseImpact: 8, perInstance: 2, maxImpact: 35 },
411
+ "schema/required-fields": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
412
+ "schema/consistency": { baseImpact: 3, perInstance: 1, maxImpact: 15 },
413
+ // Data
414
+ "data/data-binding": { baseImpact: 6, perInstance: 1, maxImpact: 30 },
415
+ };
416
+ const DEFAULT_RULE_IMPACT = { baseImpact: 5, perInstance: 1, maxImpact: 25 };
417
+ /**
418
+ * v0.4.3 — confidence-based discount applied to each finding's impact.
419
+ * Low-confidence findings contribute less to the bucket so they don't
420
+ * inflate the verdict on site types where they false-positive.
421
+ */
422
+ const CONFIDENCE_MULTIPLIER = {
423
+ high: 1.0,
424
+ medium: 0.6,
425
+ low: 0.3,
426
+ speculative: 0.1,
427
+ };
106
428
  /** Slug map for `RuleResult.docsUrl`. Defaults to the rule-id segment after the `/`. */
107
429
  const RULE_DOCS_SLUG = {
108
430
  // intentionally empty for v0.4 — slug = ruleId.split("/").pop() works for every shipped rule
@@ -121,6 +443,39 @@ function verdictForRisk(risk) {
121
443
  return "concerning";
122
444
  return "critical";
123
445
  }
446
+ /**
447
+ * 2026-05-03 v0.5.2 — apply the bring-your-own-authority shift to the
448
+ * verdict ladder. The raw `risk` number is unchanged; only the user-
449
+ * facing verdict mapping shifts.
450
+ *
451
+ * `authorityScore >= 80` (established brand) → shift ONE TIER LENIENT
452
+ * `authorityScore <= 30` (newer/lower) → shift ONE TIER STRICT
453
+ * 31..79 or undefined → no shift
454
+ *
455
+ * "One tier lenient" means: critical → concerning, concerning → caution,
456
+ * caution → ready, ready → ready (clamped). "One tier strict" is the
457
+ * inverse direction: ready → caution, caution → concerning,
458
+ * concerning → critical, critical → critical.
459
+ */
460
+ const VERDICT_LADDER = ["ready", "caution", "concerning", "critical"];
461
+ function shiftVerdictForAuthority(verdict, authorityScore) {
462
+ if (authorityScore === undefined)
463
+ return verdict;
464
+ if (!Number.isFinite(authorityScore))
465
+ return verdict;
466
+ if (authorityScore < 0 || authorityScore > 100)
467
+ return verdict;
468
+ const idx = VERDICT_LADDER.indexOf(verdict);
469
+ if (idx < 0)
470
+ return verdict;
471
+ if (authorityScore >= 80) {
472
+ return VERDICT_LADDER[Math.max(0, idx - 1)];
473
+ }
474
+ if (authorityScore <= 30) {
475
+ return VERDICT_LADDER[Math.min(VERDICT_LADDER.length - 1, idx + 1)];
476
+ }
477
+ return verdict;
478
+ }
124
479
  function gradeForPenalty(penalty) {
125
480
  if (penalty <= 20)
126
481
  return "A";
@@ -182,7 +537,15 @@ function runRulesOnPages(pages,
182
537
  * `respectNoindex: true` would hide noindex'd pages from the very rules
183
538
  * designed to flag accidental noindex'ing.
184
539
  */
185
- noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, entityPatterns, overrides, mode = "full") {
540
+ noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, entityPatterns, overrides, mode = "full",
541
+ /**
542
+ * 2026-05-03 calibration credibility fix: signals that the audit is
543
+ * running on a sampled subset of the discovered URLs. Rules whose
544
+ * outputs depend on a complete link graph (`links/unreachable-from-
545
+ * root`) skip their checks when this is true to avoid sampling-
546
+ * artifact false positives.
547
+ */
548
+ sampled = false) {
186
549
  const findings = [];
187
550
  const modeOk = (ruleId) => mode !== "diff" || isRuleAllowedInDiff(ruleId);
188
551
  const tag = (results) => results.map((r) => {
@@ -211,7 +574,7 @@ noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, in
211
574
  findings.push(...tag(doorwayPatternRule(nearDuplicate.pairs, entitySwap.pairs, thinContent.thinContentUrls, pages)));
212
575
  }
213
576
  if (isEnabled("spam/publication-velocity") && modeOk("spam/publication-velocity")) {
214
- findings.push(...tag(publicationVelocityRule(pages, resolvedRules.publicationVelocityMaxPerDay)));
577
+ findings.push(...tag(publicationVelocityRule(pages, resolvedRules.publicationVelocityMaxPerDay, resolvedRules.publicationVelocityMaxPerDayCorpusFraction)));
215
578
  }
216
579
  if (isEnabled("spam/boilerplate-ratio") && modeOk("spam/boilerplate-ratio")) {
217
580
  findings.push(...tag(boilerplateRatioRule(pages, resolvedRules.boilerplateMaxRatio)));
@@ -235,6 +598,17 @@ noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, in
235
598
  if (isEnabled("content/eeat-signals") && modeOk("content/eeat-signals")) {
236
599
  findings.push(...tag(eeatSignalsRule(pages)));
237
600
  }
601
+ // 2026-05-03 v0.5.2 blind-spot fixes — title uniqueness + heading
602
+ // structure + image alt-text were tier-1 gaps in the blind-spot audit.
603
+ if (isEnabled("content/title-uniqueness") && modeOk("content/title-uniqueness")) {
604
+ findings.push(...tag(titleUniquenessRule(pages)));
605
+ }
606
+ if (isEnabled("content/heading-structure") && modeOk("content/heading-structure")) {
607
+ findings.push(...tag(headingStructureRule(pages)));
608
+ }
609
+ if (isEnabled("content/image-alt-text") && modeOk("content/image-alt-text")) {
610
+ findings.push(...tag(imageAltTextRule(pages)));
611
+ }
238
612
  // Link rules — use the global link graph
239
613
  if (isEnabled("links/orphan-pages") && modeOk("links/orphan-pages")) {
240
614
  findings.push(...tag(orphanPagesRule(pages, inbound, rootUrl)));
@@ -244,12 +618,15 @@ noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, in
244
618
  }
245
619
  if (isEnabled("links/link-depth") && modeOk("links/link-depth")) {
246
620
  if (rootUrl) {
247
- findings.push(...tag(linkDepthRule(pages, adjacency, rootUrl, resolvedRules.linkDepthMaxClicks, inbound)));
621
+ findings.push(...tag(linkDepthRule(pages, adjacency, rootUrl, resolvedRules.linkDepthMaxClicks, inbound, sampled)));
248
622
  }
249
623
  }
250
624
  if (isEnabled("links/cluster-connectivity") && modeOk("links/cluster-connectivity")) {
251
625
  findings.push(...tag(clusterConnectivityRule(pages, knownUrls)));
252
626
  }
627
+ if (isEnabled("links/host-section-divergence") && modeOk("links/host-section-divergence")) {
628
+ findings.push(...tag(hostSectionDivergenceRule(pages, adjacency)));
629
+ }
253
630
  // Tech rules
254
631
  if (isEnabled("tech/canonical-consistency") && modeOk("tech/canonical-consistency")) {
255
632
  findings.push(...tag(canonicalConsistencyRule(pages, knownUrls, normalizeUrlOptions)));
@@ -271,6 +648,11 @@ noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, in
271
648
  // inconsistent — see auditor.test.ts "emits technical SEO findings".
272
649
  findings.push(...tag(hreflangConsistencyRule(noindexAwarePages, normalizeUrlOptions)));
273
650
  }
651
+ // 2026-05-03 v0.5.2 blind-spot fix: og-completeness was referenced in
652
+ // the v0.4.x README without ever shipping. Now it does.
653
+ if (isEnabled("tech/og-completeness") && modeOk("tech/og-completeness")) {
654
+ findings.push(...tag(ogCompletenessRule(pages)));
655
+ }
274
656
  // Schema rules
275
657
  if (isEnabled("schema/json-ld-valid") && modeOk("schema/json-ld-valid")) {
276
658
  findings.push(...tag(jsonLdValidRule(pages)));
@@ -323,13 +705,67 @@ noindexAwarePages, resolvedRules, isEnabled, groupName, knownUrls, adjacency, in
323
705
  function hashHtml(html) {
324
706
  return createHash("sha256").update(html, "utf8").digest("hex");
325
707
  }
326
- const SEVERITY_WEIGHTS = {
327
- critical: 40,
328
- error: 25,
329
- warning: 12,
330
- info: 5,
331
- };
332
- function scoreFromFindings(findings) {
708
+ /**
709
+ * v0.4.3 — apply per-site-type severity + confidence overrides BEFORE any
710
+ * bucketing happens, so blocker/shouldFix counts and category buckets all
711
+ * reflect the user-visible severity, not the rule's native severity.
712
+ *
713
+ * Returns a NEW array of findings (does not mutate the input). Only the
714
+ * `severity` and `confidence` fields are remapped; everything else is
715
+ * preserved by reference.
716
+ */
717
+ export function applyScoringProfileOverrides(findings, classification) {
718
+ const profile = profileFor(classification);
719
+ const sevHas = Object.keys(profile.severityOverrides).length > 0;
720
+ const confHas = Object.keys(profile.confidenceOverrides).length > 0;
721
+ if (!sevHas && !confHas)
722
+ return findings;
723
+ return findings.map((f) => {
724
+ const newSev = profile.severityOverrides[f.ruleId];
725
+ const newConf = profile.confidenceOverrides[f.ruleId];
726
+ if (newSev === undefined && newConf === undefined)
727
+ return f;
728
+ return {
729
+ ...f,
730
+ ...(newSev !== undefined ? { severity: newSev } : {}),
731
+ ...(newConf !== undefined ? { confidence: newConf } : {}),
732
+ };
733
+ });
734
+ }
735
+ /**
736
+ * 2026-05-03 credibility: list of rule IDs that ACTUALLY had their severity
737
+ * remapped on this audit. Distinct from `profile.severityOverrides` which is
738
+ * the static set of demotions defined per profile — this is the subset of
739
+ * those that actually fired. Surfaced via `summary.appliedSeverityDemotions`
740
+ * so formatters can show the user "engine demoted X rules because <site
741
+ * type> profile" rather than hiding the mechanism.
742
+ */
743
+ function computeAppliedDemotions(findings, classification) {
744
+ const profile = profileFor(classification);
745
+ if (Object.keys(profile.severityOverrides).length === 0)
746
+ return [];
747
+ const applied = new Set();
748
+ for (const f of findings) {
749
+ if (profile.severityOverrides[f.ruleId] !== undefined) {
750
+ applied.add(f.ruleId);
751
+ }
752
+ }
753
+ return Array.from(applied).sort();
754
+ }
755
+ /**
756
+ * v0.4.3 — confidence-and-count-aware scoring. Replaces the v0.4 model that
757
+ * counted only severity. Each rule has a `baseImpact + (count - 1) *
758
+ * perInstance` contribution capped by `maxImpact`. The result is multiplied
759
+ * by the finding's `confidence` (default `high` → 1.0). Per-site-type
760
+ * profiles can remap a rule's severity / confidence; this function expects
761
+ * those overrides to ALREADY be applied to the input findings.
762
+ *
763
+ * Bucket math: per-rule impacts sum into the rule's `CATEGORY_MAP` bucket;
764
+ * each bucket is then capped at 100 and weighted by the active scoring
765
+ * profile's `categoryWeights`.
766
+ */
767
+ function scoreFromFindings(findings, classification) {
768
+ const profile = profileFor(classification);
333
769
  // v0.4 four-bucket raw penalties.
334
770
  const bucketRaw = {
335
771
  integrity: 0,
@@ -348,18 +784,16 @@ function scoreFromFindings(findings) {
348
784
  let blockers = 0;
349
785
  let shouldFix = 0;
350
786
  let informational = 0;
787
+ // Group findings by ruleId so we can apply baseImpact + perInstance.
788
+ // Each group's weighted impact lands in its category bucket.
789
+ const groups = new Map();
351
790
  for (const finding of findings) {
352
791
  const namespace = finding.ruleId.split("/")[0];
353
792
  const bucket = CATEGORY_MAP[namespace];
354
793
  if (!bucket)
355
794
  continue;
356
- const weight = SEVERITY_WEIGHTS[finding.severity];
357
- // v0.4 buckets.
358
- bucketRaw[bucket] = Math.min(100, bucketRaw[bucket] + weight);
359
- if (bucket !== "audit") {
795
+ if (bucket !== "audit")
360
796
  bucketIssues[bucket] += 1;
361
- }
362
- // Issue-bucket counts (audit/* findings are diagnostic-only and excluded).
363
797
  if (bucket === "audit")
364
798
  continue;
365
799
  if (finding.severity === "critical" || finding.severity === "error")
@@ -368,11 +802,73 @@ function scoreFromFindings(findings) {
368
802
  shouldFix += 1;
369
803
  else
370
804
  informational += 1;
805
+ const arr = groups.get(finding.ruleId) ?? [];
806
+ arr.push(finding);
807
+ groups.set(finding.ruleId, arr);
808
+ }
809
+ // 2026-05-03 calibration credibility fix: track info-severity vs
810
+ // non-info contributions to each bucket separately so a flood of info
811
+ // findings can't fill the bucket cap and tank the verdict on its own.
812
+ // Round 7 surfaced this on Airbyte and round 8 on Zapier — both had
813
+ // ALL info-severity findings in their top drivers yet scored
814
+ // `concerning` because cumulative info impact filled the citation
815
+ // bucket past its 100 cap. Now: info contribution per bucket caps at
816
+ // 50; warning+ contribution caps at 100; final bucket = sum, capped
817
+ // at 100. A site with no real warning/error findings can score at
818
+ // most ~12.5 risk from info accumulation at typical 0.25 citation
819
+ // weight — which keeps verdict aligned with the visible severity in
820
+ // the report.
821
+ const bucketInfoOnly = {
822
+ integrity: 0, discoverability: 0, citation: 0, data: 0, audit: 0,
823
+ };
824
+ const bucketNonInfo = {
825
+ integrity: 0, discoverability: 0, citation: 0, data: 0, audit: 0,
826
+ };
827
+ for (const [ruleId, group] of groups) {
828
+ const namespace = ruleId.split("/")[0];
829
+ const bucket = CATEGORY_MAP[namespace];
830
+ if (!bucket || bucket === "audit")
831
+ continue;
832
+ const impactSpec = RULE_IMPACTS[ruleId] ?? DEFAULT_RULE_IMPACT;
833
+ const count = group.length;
834
+ const rawImpact = impactSpec.baseImpact + Math.max(0, count - 1) * impactSpec.perInstance;
835
+ const cap = impactSpec.maxImpact ?? Number.POSITIVE_INFINITY;
836
+ const cappedImpact = Math.min(cap, rawImpact);
837
+ // Confidence multiplier — use the WORST (highest-multiplier) confidence
838
+ // in the group so a rule that fires repeatedly with mixed confidence is
839
+ // not unfairly downweighted to its lowest-confidence instance.
840
+ let bestMultiplier = 0;
841
+ for (const f of group) {
842
+ const conf = f.confidence ?? "high";
843
+ const m = CONFIDENCE_MULTIPLIER[conf];
844
+ if (m > bestMultiplier)
845
+ bestMultiplier = m;
846
+ }
847
+ if (bestMultiplier === 0)
848
+ bestMultiplier = CONFIDENCE_MULTIPLIER.high;
849
+ const weighted = cappedImpact * bestMultiplier;
850
+ // Bucket the rule's contribution by the highest severity in the group.
851
+ // Mixed-severity groups (e.g. error + info) count toward non-info — once
852
+ // a rule has any non-info finding, its count contribution is treated as
853
+ // a real-issue signal, not info accumulation.
854
+ const isInfoOnly = group.every((f) => f.severity === "info");
855
+ if (isInfoOnly) {
856
+ bucketInfoOnly[bucket] += weighted;
857
+ }
858
+ else {
859
+ bucketNonInfo[bucket] += weighted;
860
+ }
371
861
  }
372
- const weighted = bucketRaw.integrity * CATEGORY_WEIGHTS.integrity +
373
- bucketRaw.discoverability * CATEGORY_WEIGHTS.discoverability +
374
- bucketRaw.citation * CATEGORY_WEIGHTS.citation +
375
- bucketRaw.data * CATEGORY_WEIGHTS.data;
862
+ for (const key of ["integrity", "discoverability", "citation", "data"]) {
863
+ const info = Math.min(50, bucketInfoOnly[key]);
864
+ const nonInfo = Math.min(100, bucketNonInfo[key]);
865
+ bucketRaw[key] = Math.min(100, info + nonInfo);
866
+ }
867
+ const cw = profile.categoryWeights;
868
+ const weighted = bucketRaw.integrity * cw.integrity +
869
+ bucketRaw.discoverability * cw.discoverability +
870
+ bucketRaw.citation * cw.citation +
871
+ bucketRaw.data * cw.data;
376
872
  const risk = Math.round(Math.min(100, weighted));
377
873
  const categories = {
378
874
  integrity: { grade: gradeForPenalty(bucketRaw.integrity), issues: bucketIssues.integrity },
@@ -534,6 +1030,25 @@ function parseSitemapUrls(xml) {
534
1030
  const matches = Array.from(xml.matchAll(/<loc>\s*([^<\s]+)\s*<\/loc>/gi));
535
1031
  return matches.map((match) => match[1]).filter(Boolean);
536
1032
  }
1033
+ export function parseSitemapUrlsWithLastmod(xml) {
1034
+ const out = [];
1035
+ // Match both <url>...</url> blocks (in <urlset>) and <sitemap>...</sitemap>
1036
+ // blocks (in <sitemapindex>). Both carry <loc> + optional <lastmod>.
1037
+ const blocks = xml.matchAll(/<(url|sitemap)\b[^>]*>([\s\S]*?)<\/\1>/gi);
1038
+ for (const block of blocks) {
1039
+ const inner = block[2] ?? "";
1040
+ const locMatch = inner.match(/<loc\b[^>]*>([\s\S]*?)<\/loc>/i);
1041
+ if (!locMatch)
1042
+ continue;
1043
+ const url = locMatch[1].trim();
1044
+ if (!url)
1045
+ continue;
1046
+ const lastmodMatch = inner.match(/<lastmod\b[^>]*>([\s\S]*?)<\/lastmod>/i);
1047
+ const lastmod = lastmodMatch ? lastmodMatch[1].trim() : undefined;
1048
+ out.push({ url, lastmod });
1049
+ }
1050
+ return out;
1051
+ }
537
1052
  function looksLikeSitemap(text) {
538
1053
  const lowered = text.toLowerCase();
539
1054
  return lowered.includes("<urlset") || lowered.includes("<sitemapindex");
@@ -602,22 +1117,32 @@ function shouldIgnore(url, patterns) {
602
1117
  }
603
1118
  return false;
604
1119
  }
605
- function fisherYatesSample(items, n) {
1120
+ function fisherYatesSample(items, n, random = Math.random) {
606
1121
  const arr = [...items];
607
1122
  for (let i = arr.length - 1; i > 0 && arr.length - i <= n; i -= 1) {
608
- const j = Math.floor(Math.random() * (i + 1));
1123
+ const j = Math.floor(random() * (i + 1));
609
1124
  [arr[i], arr[j]] = [arr[j], arr[i]];
610
1125
  }
611
1126
  return arr.slice(arr.length - n);
612
1127
  }
613
1128
  async function collectUrlsFromSitemap(sitemapText, sitemapUrl, visited, timeoutMs, cache, stats, signal, validateHop) {
614
1129
  visited.add(sitemapUrl);
615
- const locs = parseSitemapUrls(sitemapText);
1130
+ const entries = parseSitemapUrlsWithLastmod(sitemapText);
616
1131
  if (!isSitemapIndex(sitemapText)) {
617
- return locs;
1132
+ const urls = [];
1133
+ const lastmodByUrl = new Map();
1134
+ for (const entry of entries) {
1135
+ urls.push(entry.url);
1136
+ if (entry.lastmod !== undefined) {
1137
+ lastmodByUrl.set(entry.url, entry.lastmod);
1138
+ }
1139
+ }
1140
+ return { urls, lastmodByUrl };
618
1141
  }
619
1142
  const allUrls = [];
620
- for (const childUrl of locs) {
1143
+ const allLastmodByUrl = new Map();
1144
+ for (const entry of entries) {
1145
+ const childUrl = entry.url;
621
1146
  if (signal?.aborted)
622
1147
  throw signal.reason ?? new Error("aborted");
623
1148
  if (visited.has(childUrl))
@@ -628,10 +1153,13 @@ async function collectUrlsFromSitemap(sitemapText, sitemapUrl, visited, timeoutM
628
1153
  const childLike = child.contentType.includes("xml") || looksLikeSitemap(child.text);
629
1154
  if (!childLike)
630
1155
  continue;
631
- const childUrls = await collectUrlsFromSitemap(child.text, childUrl, visited, timeoutMs, cache, stats, signal, validateHop);
1156
+ const { urls: childUrls, lastmodByUrl: childLastmodByUrl } = await collectUrlsFromSitemap(child.text, childUrl, visited, timeoutMs, cache, stats, signal, validateHop);
632
1157
  allUrls.push(...childUrls);
1158
+ for (const [u, lm] of childLastmodByUrl) {
1159
+ allLastmodByUrl.set(u, lm);
1160
+ }
633
1161
  }
634
- return allUrls;
1162
+ return { urls: allUrls, lastmodByUrl: allLastmodByUrl };
635
1163
  }
636
1164
  async function fetchRobotsMeta(origin, timeoutMs, cache, stats, signal, validateHop) {
637
1165
  if (!origin)
@@ -664,7 +1192,7 @@ function isDisallowedByRobots(urlPath, patterns) {
664
1192
  function budgetExceeded(b) {
665
1193
  return b.cap > 0 && b.used >= b.cap;
666
1194
  }
667
- async function loadPagesFromSource(source, concurrency, timeoutMs, crawlDiscovery, discoveryBudget, cache, stats, fillBudgetViaLinkDiscovery = false, byteBudget = { used: 0, cap: 0 }, signal, guardSsrf = false, respectRobotsTxt = true, skippedByRobots = [], followRedirects = true, maxCrawlDiscovered = 5000) {
1195
+ async function loadPagesFromSource(source, concurrency, timeoutMs, crawlDiscovery, discoveryBudget, cache, stats, fillBudgetViaLinkDiscovery = false, byteBudget = { used: 0, cap: 0 }, signal, guardSsrf = false, respectRobotsTxt = true, skippedByRobots = [], followRedirects = true, maxCrawlDiscovered = 5000, monitoringContext = null) {
668
1196
  // Memoized SSRF validator. When guardSsrf is on, every URL fetched by the
669
1197
  // audit (source, sitemap entries, redirects, discovered links) goes through
670
1198
  // this. DNS is hit once per unique hostname per audit — a 4k-page audit on
@@ -724,11 +1252,33 @@ async function loadPagesFromSource(source, concurrency, timeoutMs, crawlDiscover
724
1252
  const isXml = (contentType.includes("xml") || looksLikeSitemap(text)) && sourceStatus !== -1;
725
1253
  if (isXml) {
726
1254
  const visited = new Set();
727
- const allSitemapUrls = await collectUrlsFromSitemap(text, source, visited, timeoutMs, cache, stats, signal, validateHop);
1255
+ const { urls: allSitemapUrls, lastmodByUrl: sitemapLastmodByUrl } = await collectUrlsFromSitemap(text, source, visited, timeoutMs, cache, stats, signal, validateHop);
728
1256
  // If we have a budget, sample from sitemap URLs before fetching
729
- const urlsToFetch = discoveryBudget > 0 && allSitemapUrls.length > discoveryBudget
1257
+ const sampledUrls = discoveryBudget > 0 && allSitemapUrls.length > discoveryBudget
730
1258
  ? fisherYatesSample(allSitemapUrls, discoveryBudget)
731
1259
  : allSitemapUrls;
1260
+ // v0.5: change-driven monitoring. Apply the decision matrix BEFORE
1261
+ // fetching bodies. URLs in plan.skip are not network-touched at all —
1262
+ // their findings will be carried forward from prior state by the caller.
1263
+ // This is the whole point of monitoring mode: rule eval is microseconds,
1264
+ // the fetch is seconds; move the skip decision upstream of the fetch.
1265
+ let scrapePlan;
1266
+ let urlsToFetch;
1267
+ if (monitoringContext) {
1268
+ scrapePlan = planScrapeStrategy({
1269
+ candidateUrls: sampledUrls,
1270
+ priorState: monitoringContext.priorState,
1271
+ sitemapLastmodByUrl,
1272
+ currentRulesetVersion: monitoringContext.currentRulesetVersion,
1273
+ ageFloorDays: monitoringContext.ageFloorDays,
1274
+ now: monitoringContext.now,
1275
+ forceRefetchUrls: monitoringContext.forceRefetchUrls,
1276
+ });
1277
+ urlsToFetch = Array.from(scrapePlan.refetch.keys());
1278
+ }
1279
+ else {
1280
+ urlsToFetch = sampledUrls;
1281
+ }
732
1282
  const pages = [];
733
1283
  // Fetch robots.txt once for the origin — reused for Crawl-Delay pacing and Disallow checks.
734
1284
  const sourceOrigin = (() => { try {
@@ -835,7 +1385,7 @@ async function loadPagesFromSource(source, concurrency, timeoutMs, crawlDiscover
835
1385
  });
836
1386
  }
837
1387
  }
838
- return { pages, sitemapUrls: new Set(allSitemapUrls), discoveredUrlCount: allSitemapUrls.length };
1388
+ return { pages, sitemapUrls: new Set(allSitemapUrls), sitemapLastmodByUrl, discoveredUrlCount: allSitemapUrls.length, scrapePlan };
839
1389
  }
840
1390
  if (contentType.includes("html") || looksLikeHtml(text)) {
841
1391
  const initialPage = { url: source, html: text };
@@ -958,6 +1508,9 @@ export async function auditSource(source, options) {
958
1508
  const ignorePatterns = options?.ignore ?? [];
959
1509
  const respectNoindex = options?.respectNoindex ?? true;
960
1510
  const skipDetectedAuth = options?.skipDetectedAuth ?? false;
1511
+ const skipBoilerplate = options?.skipBoilerplate ?? false;
1512
+ const skipSearchPages = options?.skipSearchPages ?? false;
1513
+ const skipEmptyBody = options?.skipEmptyBody ?? false;
961
1514
  const sampleSize = options?.sampleSize ?? preset.sampleSize ?? 0;
962
1515
  const externalSignal = options?.signal;
963
1516
  const guardSsrf = options?.guardSsrf ?? preset.guardSsrf ?? false;
@@ -973,12 +1526,26 @@ export async function auditSource(source, options) {
973
1526
  let backpressureError = null;
974
1527
  const signal = composeSignals(externalSignal, backpressureAbort.signal);
975
1528
  const observer = new FetchObserver();
1529
+ // 2026-05-03 calibration: the prior (3s p95 cap, 2× baseline multiplier)
1530
+ // gate aborted 4 of 12 reputable-pSEO audits on what was normal load
1531
+ // variance — Zapier at p95=576ms (2.4× a 236ms baseline), Webflow at
1532
+ // p95=1808ms (2.2× 833ms), Airbyte at p95=1288ms (3.4× 380ms). For real
1533
+ // production CDNs these spikes are noise, not degradation. Raise the
1534
+ // gate so it still catches truly broken origins (sustained 4× slowdown
1535
+ // OR p95 above 8s) without tripping on normal audit-induced load.
976
1536
  const monitor = backpressureEnabled
977
1537
  ? new BackpressureMonitor({
978
1538
  warmupSize: 10,
979
- absoluteP95Ms: 3000,
980
- baselineMultiplier: 2,
981
- errorRatioThreshold: 0.1,
1539
+ absoluteP95Ms: 8000,
1540
+ baselineMultiplier: 4,
1541
+ // 2026-05-03 production fix: 0.1 (10%) was tripping pseolint.dev
1542
+ // audits on real production sites that legitimately return ~10% 5xx
1543
+ // (transient errors, async page renderers warming up, sites in
1544
+ // canary). Combined with the `>=` comparison bug (also fixed),
1545
+ // this aborted every web-app audit. 0.15 keeps the gate honest —
1546
+ // a sustained 15%+ 5xx rate is a real problem, not noise — while
1547
+ // letting transient errors not bring down the whole audit.
1548
+ errorRatioThreshold: 0.15,
982
1549
  })
983
1550
  : null;
984
1551
  // v0.4: framework gets set on the first observation that carries headers
@@ -1010,6 +1577,8 @@ export async function auditSource(source, options) {
1010
1577
  entitySwapThreshold: options?.rules?.entitySwapThreshold ?? DEFAULTS.entitySwapThreshold,
1011
1578
  thinContentMinWords: options?.rules?.thinContentMinWords ?? DEFAULTS.thinContentMinWords,
1012
1579
  publicationVelocityMaxPerDay: options?.rules?.publicationVelocityMaxPerDay ?? DEFAULTS.publicationVelocityMaxPerDay,
1580
+ publicationVelocityMaxPerDayCorpusFraction: options?.rules?.publicationVelocityMaxPerDayCorpusFraction
1581
+ ?? DEFAULTS.publicationVelocityMaxPerDayCorpusFraction,
1013
1582
  boilerplateMaxRatio: options?.rules?.boilerplateMaxRatio ?? DEFAULTS.boilerplateMaxRatio,
1014
1583
  templateDiversityMinUniqueRatio: options?.rules?.templateDiversityMinUniqueRatio ?? DEFAULTS.templateDiversityMinUniqueRatio,
1015
1584
  uniqueValueMinWords: options?.rules?.uniqueValueMinWords ?? DEFAULTS.uniqueValueMinWords,
@@ -1051,7 +1620,65 @@ export async function auditSource(source, options) {
1051
1620
  const fetchByteBudget = { used: 0, cap: maxFetchBytes };
1052
1621
  // v0.4 §4.7: detectedFramework is set in onObservation above, side-effect
1053
1622
  // of the normal source URL fetch. No separate probe needed.
1054
- const { pages: loadedPagesRaw, sitemapUrls: sitemapUrlSet, discoveredUrlCount } = await loadPagesFromSource(source, concurrency, timeoutMs, crawlDiscovery, discoveryBudget, cacheConfig, cacheStats, fillBudgetViaLinkDiscovery, fetchByteBudget, signal, guardSsrf, respectRobotsTxt, skippedByRobots, followRedirects, maxCrawlDiscovered);
1623
+ // v0.5: read prior state BEFORE loadPagesFromSource so the change-driven
1624
+ // monitoring decision matrix can run pre-fetch and tell loadPagesFromSource
1625
+ // which URLs to actually fetch. Reading state is cheap; doing it here also
1626
+ // means we know `priorState` once for both the monitoring path and the
1627
+ // post-audit state-write path further down.
1628
+ let priorState = null;
1629
+ const skippedUrls = [];
1630
+ const currentRenderMode = options?.render ? "rendered" : "static";
1631
+ if (options?.state?.path || options?.state?.since || options?.state?.exitOnRegression || options?.state?.mode) {
1632
+ const statePath = options.state?.path ?? ".pseolint/state.json";
1633
+ priorState = await readState(statePath);
1634
+ if (priorState && priorState.renderMode !== currentRenderMode) {
1635
+ console.error(`warning: prior state renderMode=${priorState.renderMode} differs from current ${currentRenderMode}. Performing full re-audit.`);
1636
+ priorState = null;
1637
+ }
1638
+ }
1639
+ // Effective monitoring mode:
1640
+ // - explicit `state.mode` wins ("monitoring" or "fresh")
1641
+ // - else if `--since` is passed and prior state exists → "monitoring" (back-compat alias)
1642
+ // - else if prior state exists → "monitoring" (auto, v0.5 default)
1643
+ // - else → "fresh" (no prior state available)
1644
+ const explicitMode = options?.state?.mode;
1645
+ const effectiveMode = explicitMode ??
1646
+ (priorState ? "monitoring" : "fresh");
1647
+ // Build the monitoring context only for HTTP sources in monitoring mode with
1648
+ // prior state. Single-page HTML and filesystem sources skip this — they are
1649
+ // exempted from the strategy (a single-page audit has nothing to plan; local
1650
+ // reads are cheap so re-reading every file beats branch complexity).
1651
+ const isHttpSource = /^https?:\/\//i.test(source);
1652
+ // If the user asked for monitoring against a filesystem source, surface that
1653
+ // we're ignoring the request. Silent bypass leads to "why is my state file
1654
+ // not being used?" debugging. Only log when the user actively chose
1655
+ // monitoring (explicit --mode or --since) — auto-monitoring on prior state
1656
+ // existence is implicit and shouldn't warn.
1657
+ if (!isHttpSource && effectiveMode === "monitoring" && (options?.state?.mode === "monitoring" || options?.state?.since)) {
1658
+ console.error("warning: monitoring mode requested but source is a local file/directory; reading every HTML file (the matrix only applies to HTTP sources).");
1659
+ }
1660
+ const monitoringContext = effectiveMode === "monitoring" && priorState && isHttpSource
1661
+ ? {
1662
+ priorState,
1663
+ currentRulesetVersion: CORE_RULESET_VERSION,
1664
+ ageFloorDays: options?.state?.ageFloorDays ?? DEFAULT_AGE_FLOOR_DAYS,
1665
+ now: new Date(),
1666
+ forceRefetchUrls: options?.force?.urls,
1667
+ }
1668
+ : null;
1669
+ if (!priorState && options?.state?.since) {
1670
+ console.error("no prior state found — performing full baseline audit");
1671
+ }
1672
+ const { pages: loadedPagesRaw, sitemapUrls: sitemapUrlSet, sitemapLastmodByUrl, discoveredUrlCount, scrapePlan } = await loadPagesFromSource(source, concurrency, timeoutMs, crawlDiscovery, discoveryBudget, cacheConfig, cacheStats, fillBudgetViaLinkDiscovery, fetchByteBudget, signal, guardSsrf, respectRobotsTxt, skippedByRobots, followRedirects, maxCrawlDiscovered, monitoringContext);
1673
+ // The scrapePlan tells us which URLs were skipped pre-fetch under monitoring
1674
+ // mode. Surface them in skippedUrls so they show up under summary.skippedUrls
1675
+ // (kept for back-compat with --since consumers); T7 will carry their prior
1676
+ // findings forward and T8 will surface the full plan in summary.scrapePlan.
1677
+ if (scrapePlan) {
1678
+ for (const url of scrapePlan.skip.keys()) {
1679
+ skippedUrls.push(url);
1680
+ }
1681
+ }
1055
1682
  throwIfAborted();
1056
1683
  const loadedPages = [...loadedPagesRaw];
1057
1684
  // v0.4 §4.7: content-type-aware crawling. Filter out fetched URLs whose
@@ -1080,34 +1707,11 @@ export async function auditSource(source, options) {
1080
1707
  if (discoveredUrlCount && discoveredUrlCount > loadedPages.length) {
1081
1708
  console.error(`Discovered ${discoveredUrlCount} pages, fetched ${loadedPages.length} for audit. Use --sample-size 0 for full crawl.`);
1082
1709
  }
1083
- // State read + delta filtering
1084
- let priorState = null;
1085
- const skippedUrls = [];
1086
- if (options?.state?.since || options?.state?.exitOnRegression) {
1087
- const statePath = options.state.path ?? ".pseolint/state.json";
1088
- priorState = await readState(statePath);
1089
- const currentRenderMode = options.render ? "rendered" : "static";
1090
- if (priorState && priorState.renderMode !== currentRenderMode) {
1091
- console.error(`warning: prior state renderMode=${priorState.renderMode} differs from current ${currentRenderMode}. Performing full re-audit.`);
1092
- priorState = null;
1093
- }
1094
- if (priorState && options.state.since) {
1095
- const kept = [];
1096
- for (const p of loadedPages) {
1097
- const prior = priorState.urls[p.url];
1098
- if (prior && prior.contentHash === computeContentHash(p.html)) {
1099
- skippedUrls.push(p.url);
1100
- }
1101
- else {
1102
- kept.push(p);
1103
- }
1104
- }
1105
- loadedPages.splice(0, loadedPages.length, ...kept);
1106
- }
1107
- else if (!priorState && options.state.since) {
1108
- console.error("no prior state found — performing full baseline audit");
1109
- }
1110
- }
1710
+ // v0.5: prior state was loaded BEFORE loadPagesFromSource so the change-
1711
+ // driven monitoring decision matrix could run pre-fetch. URLs the matrix
1712
+ // marked as "skip" were never fetched and are recorded in skippedUrls
1713
+ // above. The old post-fetch contentHash skip is gone — the decision now
1714
+ // happens upstream of the network round-trip.
1111
1715
  let robotsTxtContent = "";
1112
1716
  if (/^https?:\/\//i.test(source)) {
1113
1717
  try {
@@ -1145,14 +1749,22 @@ export async function auditSource(source, options) {
1145
1749
  ? deduped.filter((page) => !shouldIgnore(page.url, ignorePatterns))
1146
1750
  : deduped;
1147
1751
  const strategy = options?.samplingStrategy ?? "stratified";
1148
- const sampled = sampleSize > 0 && sampleSize < filtered.length
1752
+ // 2026-05-03 calibration credibility fix: when sampleSeed is set, use a
1753
+ // deterministic PRNG so repeated audits pick the same pages and the
1754
+ // verdict is reproducible. Without a seed, fall back to Math.random
1755
+ // (legacy behavior, kept for backward compatibility).
1756
+ const samplingRandom = options?.sampleSeed !== undefined
1757
+ ? mulberry32(options.sampleSeed)
1758
+ : Math.random;
1759
+ const isSampledAudit = sampleSize > 0 && sampleSize < filtered.length;
1760
+ const sampled = isSampledAudit
1149
1761
  ? (strategy === "stratified"
1150
1762
  ? (() => {
1151
1763
  const urlsMap = new Map(filtered.map(p => [p.url, p]));
1152
- const sampledUrls = stratifiedSample(filtered.map(p => p.url), sampleSize);
1764
+ const sampledUrls = stratifiedSample(filtered.map(p => p.url), sampleSize, samplingRandom);
1153
1765
  return sampledUrls.map(u => urlsMap.get(u));
1154
1766
  })()
1155
- : fisherYatesSample(filtered, sampleSize))
1767
+ : fisherYatesSample(filtered, sampleSize, samplingRandom))
1156
1768
  : filtered;
1157
1769
  const parsedPagesAll = sampled.map((page) => {
1158
1770
  const parsed = parseHtmlPage(page.html, page.url, { normalizeUrl: normalizeUrlOptions });
@@ -1168,7 +1780,13 @@ export async function auditSource(source, options) {
1168
1780
  // (off for the CLI by default; on for the hosted web form).
1169
1781
  const skippedByPolicy = [];
1170
1782
  const parsedPages = parsedPagesAll.filter((p) => {
1171
- const reason = pageSkipReason(p, { respectNoindex, skipDetectedAuth });
1783
+ const reason = pageSkipReason(p, {
1784
+ respectNoindex,
1785
+ skipDetectedAuth,
1786
+ skipBoilerplate,
1787
+ skipSearchPages,
1788
+ skipEmptyBody,
1789
+ });
1172
1790
  if (reason) {
1173
1791
  skippedByPolicy.push({ url: p.url, reason });
1174
1792
  return false;
@@ -1295,10 +1913,13 @@ export async function auditSource(source, options) {
1295
1913
  continue;
1296
1914
  const groupRules = resolveGroupRules(resolvedRules, groupConfig?.overrides);
1297
1915
  const enabledCheck = (ruleId) => !suppressedRuleSet.has(ruleId) && isRuleEnabled(ruleId, groupConfig?.rules);
1298
- const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full");
1916
+ const findings = runRulesOnPages(groupPages, parsedPagesAll, groupRules, enabledCheck, groupName, knownUrls, adjacency, inbound, rootUrl, normalizeUrlOptions, source, DEFAULT_ENTITY_PATTERNS, groupConfig?.overrides, options?.mode ?? "full", isSampledAudit);
1299
1917
  allFindings.push(...findings);
1300
1918
  groupPageCounts[groupName] = groupPages.length;
1301
- const { risk: groupRisk } = scoreFromFindings(findings);
1919
+ // v0.4.3: per-group scoring uses the same site-classification profile so
1920
+ // group-level risk numbers reflect the same severity / confidence remaps
1921
+ // as the headline verdict.
1922
+ const { risk: groupRisk } = scoreFromFindings(applyScoringProfileOverrides(findings, siteClassification), siteClassification);
1302
1923
  groupScores[groupName] = groupRisk;
1303
1924
  }
1304
1925
  throwIfAborted();
@@ -1308,10 +1929,61 @@ export async function auditSource(source, options) {
1308
1929
  });
1309
1930
  // Populate docsUrl on every finding before they leave the engine.
1310
1931
  withDocsUrls(enriched.findings);
1311
- const { risk, categories, bucketCounts } = scoreFromFindings(enriched.findings);
1932
+ // v0.4.3: apply site-type-aware severity + confidence overrides so blocker
1933
+ // counts, issue buckets, and category bucketing all reflect the user-visible
1934
+ // severity (not the rule's native severity). The remapped findings replace
1935
+ // the enrichment output so every downstream consumer (summary.issues, AI
1936
+ // triage input, telemetry, formatters) sees the corrected severity.
1937
+ enriched.findings = applyScoringProfileOverrides(enriched.findings, siteClassification);
1938
+ // v0.5: change-driven monitoring carry-forward. URLs that the pre-fetch
1939
+ // strategy marked as "skip" were never fetched this run, so no rule produced
1940
+ // findings for them. Restore their findings from prior state, marked with
1941
+ // `carriedForward: true` and `lastVerifiedAt` so consumers can reason about
1942
+ // staleness. Inject after enrichment + overrides — these findings already
1943
+ // went through both in their original run; re-running enrichment would
1944
+ // strip their template / cluster assignments because parsedPages doesn't
1945
+ // contain the skipped pages.
1946
+ if (priorState && skippedUrls.length > 0) {
1947
+ for (const url of skippedUrls) {
1948
+ const prior = priorState.urls[url];
1949
+ if (!prior || prior.findings.length === 0)
1950
+ continue;
1951
+ for (const f of prior.findings) {
1952
+ const carried = {
1953
+ ruleId: f.ruleId,
1954
+ severity: f.severity,
1955
+ message: f.message,
1956
+ confidence: f.confidence,
1957
+ carriedForward: true,
1958
+ lastVerifiedAt: prior.fetchedAt,
1959
+ // State stores `url` but the engine type uses `pageUrl` — map back.
1960
+ pageUrl: typeof f.url === "string" ? f.url : url,
1961
+ };
1962
+ // Optional fields are preserved opportunistically when present in state.
1963
+ if (typeof f.fix === "string")
1964
+ carried.fix = f.fix;
1965
+ if (typeof f.ref === "string")
1966
+ carried.ref = f.ref;
1967
+ if (typeof f.docsUrl === "string")
1968
+ carried.docsUrl = f.docsUrl;
1969
+ if (Array.isArray(f.relatedUrls))
1970
+ carried.relatedUrls = f.relatedUrls;
1971
+ if (typeof f.group === "string")
1972
+ carried.group = f.group;
1973
+ if (typeof f.similarity === "number")
1974
+ carried.similarity = f.similarity;
1975
+ if (f.context !== undefined)
1976
+ carried.context = f.context;
1977
+ if (f.effort !== undefined)
1978
+ carried.effort = f.effort;
1979
+ enriched.findings.push(carried);
1980
+ }
1981
+ }
1982
+ }
1983
+ const { risk, categories, bucketCounts } = scoreFromFindings(enriched.findings, siteClassification);
1312
1984
  const auditedPageCount = Object.values(groupPageCounts).reduce((a, b) => a + b, 0);
1313
1985
  const issues = bucketIssues(enriched.findings);
1314
- const verdict = verdictForRisk(risk);
1986
+ const verdict = shiftVerdictForAuthority(verdictForRisk(risk), options?.authorityScore);
1315
1987
  const headline = buildHeadline(bucketCounts);
1316
1988
  // audit/* findings are diagnostic-only and never appear in summary.issues.
1317
1989
  // Surface them under diagnostics so consumers (telemetry, debug UIs) can
@@ -1323,6 +1995,7 @@ export async function auditSource(source, options) {
1323
1995
  fetched: parsedPages.length,
1324
1996
  skipped: skippedByContentType.length + skippedByRobots.length + skippedUrls.length,
1325
1997
  };
1998
+ const appliedSeverityDemotions = computeAppliedDemotions(enriched.findings, siteClassification);
1326
1999
  const summary = {
1327
2000
  schemaVersion: SCHEMA_VERSION,
1328
2001
  verdict,
@@ -1331,6 +2004,7 @@ export async function auditSource(source, options) {
1331
2004
  categories,
1332
2005
  issues,
1333
2006
  siteClassification,
2007
+ appliedSeverityDemotions: appliedSeverityDemotions.length > 0 ? appliedSeverityDemotions : undefined,
1334
2008
  diagnostics: {
1335
2009
  originReadiness: readinessReport,
1336
2010
  crawlStats,
@@ -1377,6 +2051,31 @@ export async function auditSource(source, options) {
1377
2051
  if (allSkipped.length > 0) {
1378
2052
  summary.skippedUrls = allSkipped;
1379
2053
  }
2054
+ // v0.5+: surface the change-driven monitoring summary when this run was a
2055
+ // monitoring run (had prior state and didn't force --mode=fresh). Filesystem
2056
+ // sources don't get a scrapePlan because they bypass the matrix.
2057
+ if (effectiveMode === "monitoring" && priorState && scrapePlan) {
2058
+ const reasonCounts = {};
2059
+ for (const reason of scrapePlan.refetch.values()) {
2060
+ reasonCounts[reason] = (reasonCounts[reason] ?? 0) + 1;
2061
+ }
2062
+ for (const reason of scrapePlan.skip.values()) {
2063
+ reasonCounts[reason] = (reasonCounts[reason] ?? 0) + 1;
2064
+ }
2065
+ // `fetched` is the number of URLs whose bodies actually came back —
2066
+ // robots-disallowed, byte-budget-exceeded, content-type-filtered, and 4xx
2067
+ // URLs the matrix INTENDED to refetch may have dropped out before we got
2068
+ // here. `intended` (= scrapePlan.refetch.size) is exposed too so callers
2069
+ // can spot the gap (e.g. "intended 200, fetched 187, 13 URLs dropped").
2070
+ summary.scrapePlan = {
2071
+ fetched: loadedPages.length,
2072
+ intended: scrapePlan.refetch.size,
2073
+ carriedForward: scrapePlan.skip.size,
2074
+ reasonCounts,
2075
+ rulesetVersion: CORE_RULESET_VERSION,
2076
+ lastFullAuditAt: priorState.lastFullAuditAt ?? priorState.lastRun ?? null,
2077
+ };
2078
+ }
1380
2079
  // v0.4.1: surface noindex / auth skips as a discoverable diagnostic so the
1381
2080
  // user sees what the engine excluded. Catches the accidental-noindex bug:
1382
2081
  // pages silently dropped from indexing show up as a visible skip line
@@ -1384,6 +2083,9 @@ export async function auditSource(source, options) {
1384
2083
  if (skippedByPolicy.length > 0) {
1385
2084
  const noindexCount = skippedByPolicy.filter((s) => s.reason === "noindex").length;
1386
2085
  const authCount = skippedByPolicy.filter((s) => s.reason === "auth-detected").length;
2086
+ const boilerplateCount = skippedByPolicy.filter((s) => s.reason === "boilerplate").length;
2087
+ const searchCount = skippedByPolicy.filter((s) => s.reason === "search-result").length;
2088
+ const spaShellCount = skippedByPolicy.filter((s) => s.reason === "spa-shell").length;
1387
2089
  const sample = skippedByPolicy.slice(0, 5).map((s) => `${s.url} (${s.reason})`).join(", ");
1388
2090
  const more = skippedByPolicy.length > 5 ? `, +${skippedByPolicy.length - 5} more` : "";
1389
2091
  const parts = [];
@@ -1391,6 +2093,12 @@ export async function auditSource(source, options) {
1391
2093
  parts.push(`${noindexCount} marked noindex`);
1392
2094
  if (authCount > 0)
1393
2095
  parts.push(`${authCount} detected as auth (login/register/etc)`);
2096
+ if (boilerplateCount > 0)
2097
+ parts.push(`${boilerplateCount} cookie/legal/consent boilerplate`);
2098
+ if (searchCount > 0)
2099
+ parts.push(`${searchCount} search-result page${searchCount === 1 ? "" : "s"}`);
2100
+ if (spaShellCount > 0)
2101
+ parts.push(`${spaShellCount} un-hydrated SPA shell${spaShellCount === 1 ? "" : "s"}`);
1394
2102
  auditFindings.push({
1395
2103
  ruleId: "audit/skipped-by-policy",
1396
2104
  severity: "info",
@@ -1410,6 +2118,13 @@ export async function auditSource(source, options) {
1410
2118
  for (const f of enrichedFindings) {
1411
2119
  if (!f.pageUrl)
1412
2120
  continue;
2121
+ // Carried-forward findings are not "current" — we did not re-verify them
2122
+ // this run. Including them would mask a genuine regression on a skipped
2123
+ // URL: prior set has rule X carried-forward, current set also has X
2124
+ // (carried-forward), comparison says "no new rule", we miss the case
2125
+ // where the page actually started failing rule Y too.
2126
+ if (f.carriedForward)
2127
+ continue;
1413
2128
  const set = currentFindings.get(f.pageUrl) ?? new Set();
1414
2129
  set.add(f.ruleId);
1415
2130
  currentFindings.set(f.pageUrl, set);
@@ -1435,6 +2150,12 @@ export async function auditSource(source, options) {
1435
2150
  const renderMode = options.render ? "rendered" : "static";
1436
2151
  const urls = {};
1437
2152
  const findingsByUrl = new Map();
2153
+ // v0.5+: persist full finding records per URL so future monitoring runs
2154
+ // can carry them forward when the URL is skipped pre-fetch. Carried-
2155
+ // forward findings (carriedForward=true) are NOT re-persisted under the
2156
+ // fetched URL — they belong to the prior entry that's preserved verbatim
2157
+ // for skipped URLs above.
2158
+ const fullFindingsByUrl = new Map();
1438
2159
  for (const f of enrichedFindings) {
1439
2160
  if (!f.pageUrl)
1440
2161
  continue;
@@ -1442,9 +2163,16 @@ export async function auditSource(source, options) {
1442
2163
  if (!list.includes(f.ruleId))
1443
2164
  list.push(f.ruleId);
1444
2165
  findingsByUrl.set(f.pageUrl, list);
2166
+ if (!f.carriedForward) {
2167
+ const records = fullFindingsByUrl.get(f.pageUrl) ?? [];
2168
+ records.push(f);
2169
+ fullFindingsByUrl.set(f.pageUrl, records);
2170
+ }
1445
2171
  }
1446
- // Preserve prior entries for URLs skipped by --since (they didn't change).
1447
- // Without this, delta runs would lose state for unchanged URLs.
2172
+ // Preserve prior entries for URLs the monitoring matrix skipped (we never
2173
+ // fetched them this run; their fetchedAt MUST NOT advance or the age floor
2174
+ // never trips). Skipped URLs include those in scrapePlan.skip plus any
2175
+ // robots-skipped URLs from prior runs that are still in priorState.
1448
2176
  if (priorState && skippedUrls.length > 0) {
1449
2177
  for (const url of skippedUrls) {
1450
2178
  const prior = priorState.urls[url];
@@ -1452,19 +2180,65 @@ export async function auditSource(source, options) {
1452
2180
  urls[url] = prior;
1453
2181
  }
1454
2182
  }
2183
+ const nowIso = new Date().toISOString();
1455
2184
  for (const p of loadedPages) {
1456
- urls[p.url] = {
2185
+ const priorEntry = priorState?.urls[p.url];
2186
+ const responseHeaders = p.httpMeta?.headers;
2187
+ const lastModifiedHeader = responseHeaders?.["last-modified"];
2188
+ const etagHeader = responseHeaders?.["etag"];
2189
+ const sitemapLastmodForUrl = sitemapLastmodByUrl?.get(p.url);
2190
+ const entry = {
1457
2191
  contentHash: computeContentHash(p.html),
1458
- fetchedAt: new Date().toISOString(),
2192
+ fetchedAt: nowIso,
1459
2193
  status: p.httpMeta?.statusCode ?? 200,
1460
2194
  findingIds: findingsByUrl.get(p.url) ?? [],
2195
+ findings: (fullFindingsByUrl.get(p.url) ?? []).map((f) => ({
2196
+ id: `${f.ruleId}::${p.url}`,
2197
+ ruleId: f.ruleId,
2198
+ severity: f.severity,
2199
+ confidence: f.confidence ?? "high",
2200
+ message: f.message,
2201
+ ...(f.fix !== undefined ? { fix: f.fix } : {}),
2202
+ ...(f.ref !== undefined ? { ref: f.ref } : {}),
2203
+ ...(f.docsUrl !== undefined ? { docsUrl: f.docsUrl } : {}),
2204
+ ...(f.pageUrl !== undefined ? { url: f.pageUrl } : {}),
2205
+ ...(f.relatedUrls !== undefined ? { relatedUrls: f.relatedUrls } : {}),
2206
+ ...(f.group !== undefined ? { group: f.group } : {}),
2207
+ ...(f.similarity !== undefined ? { similarity: f.similarity } : {}),
2208
+ ...(f.context !== undefined ? { context: f.context } : {}),
2209
+ ...(f.effort !== undefined ? { effort: f.effort } : {}),
2210
+ })),
2211
+ rulesetVersion: CORE_RULESET_VERSION,
1461
2212
  };
2213
+ if (lastModifiedHeader)
2214
+ entry.lastModified = lastModifiedHeader;
2215
+ else if (priorEntry?.lastModified)
2216
+ entry.lastModified = priorEntry.lastModified;
2217
+ if (etagHeader)
2218
+ entry.etag = etagHeader;
2219
+ else if (priorEntry?.etag)
2220
+ entry.etag = priorEntry.etag;
2221
+ if (sitemapLastmodForUrl)
2222
+ entry.sitemapLastmodAtAudit = sitemapLastmodForUrl;
2223
+ else if (priorEntry?.sitemapLastmodAtAudit)
2224
+ entry.sitemapLastmodAtAudit = priorEntry.sitemapLastmodAtAudit;
2225
+ urls[p.url] = entry;
1462
2226
  }
2227
+ // `lastFullAuditAt` advances only when this run actually re-fetched every
2228
+ // candidate URL. In monitoring mode (matrix skipped some URLs), preserve
2229
+ // the prior baseline timestamp so callers can reason about staleness.
2230
+ // In fresh mode (every candidate URL was fetched), bump to now.
2231
+ const isMonitoringRun = effectiveMode === "monitoring" && priorState !== null;
2232
+ const lastFullAuditAt = isMonitoringRun
2233
+ ? (priorState?.lastFullAuditAt ?? priorState?.lastRun ?? nowIso)
2234
+ : nowIso;
1463
2235
  const newState = {
1464
2236
  version: STATE_SCHEMA_VERSION,
1465
- lastRun: new Date().toISOString(),
2237
+ lastRun: nowIso,
2238
+ lastFullAuditAt,
1466
2239
  source,
1467
2240
  renderMode,
2241
+ rulesetVersion: CORE_RULESET_VERSION,
1468
2242
  urls,
1469
2243
  summary: {
1470
2244
  score: summary.risk,