@pseolint/core 0.6.6 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +3 -3
  2. package/dist/algorithms/authority/commoncrawl.d.ts +13 -0
  3. package/dist/algorithms/authority/commoncrawl.d.ts.map +1 -0
  4. package/dist/algorithms/authority/commoncrawl.js +17 -0
  5. package/dist/algorithms/authority/commoncrawl.js.map +1 -0
  6. package/dist/algorithms/authority/openpagerank.d.ts +19 -0
  7. package/dist/algorithms/authority/openpagerank.d.ts.map +1 -0
  8. package/dist/algorithms/authority/openpagerank.js +42 -0
  9. package/dist/algorithms/authority/openpagerank.js.map +1 -0
  10. package/dist/algorithms/authority/provider.d.ts +16 -0
  11. package/dist/algorithms/authority/provider.d.ts.map +1 -0
  12. package/dist/algorithms/authority/provider.js +24 -0
  13. package/dist/algorithms/authority/provider.js.map +1 -0
  14. package/dist/algorithms/auto-entity-mask.d.ts +19 -0
  15. package/dist/algorithms/auto-entity-mask.d.ts.map +1 -0
  16. package/dist/algorithms/auto-entity-mask.js +102 -0
  17. package/dist/algorithms/auto-entity-mask.js.map +1 -0
  18. package/dist/algorithms/example-regions.d.ts +22 -0
  19. package/dist/algorithms/example-regions.d.ts.map +1 -0
  20. package/dist/algorithms/example-regions.js +32 -0
  21. package/dist/algorithms/example-regions.js.map +1 -0
  22. package/dist/algorithms/fact-extraction.d.ts +46 -0
  23. package/dist/algorithms/fact-extraction.d.ts.map +1 -0
  24. package/dist/algorithms/fact-extraction.js +223 -0
  25. package/dist/algorithms/fact-extraction.js.map +1 -0
  26. package/dist/auditor.d.ts.map +1 -1
  27. package/dist/auditor.js +55 -9
  28. package/dist/auditor.js.map +1 -1
  29. package/dist/enrich-findings.d.ts.map +1 -1
  30. package/dist/enrich-findings.js +9 -8
  31. package/dist/enrich-findings.js.map +1 -1
  32. package/dist/index.d.ts +11 -0
  33. package/dist/index.d.ts.map +1 -1
  34. package/dist/index.js +9 -0
  35. package/dist/index.js.map +1 -1
  36. package/dist/origin-preflight.d.ts +89 -0
  37. package/dist/origin-preflight.d.ts.map +1 -0
  38. package/dist/origin-preflight.js +93 -0
  39. package/dist/origin-preflight.js.map +1 -0
  40. package/dist/rule-references.d.ts.map +1 -1
  41. package/dist/rule-references.js +1 -0
  42. package/dist/rule-references.js.map +1 -1
  43. package/dist/rules/aeo/citable-facts.d.ts.map +1 -1
  44. package/dist/rules/aeo/citable-facts.js +4 -33
  45. package/dist/rules/aeo/citable-facts.js.map +1 -1
  46. package/dist/rules/aeo/crawler-access.d.ts +14 -0
  47. package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
  48. package/dist/rules/aeo/crawler-access.js +96 -15
  49. package/dist/rules/aeo/crawler-access.js.map +1 -1
  50. package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
  51. package/dist/rules/aeo/summary-bait.js +4 -3
  52. package/dist/rules/aeo/summary-bait.js.map +1 -1
  53. package/dist/rules/content/citation-coverage.d.ts +11 -0
  54. package/dist/rules/content/citation-coverage.d.ts.map +1 -0
  55. package/dist/rules/content/citation-coverage.js +43 -0
  56. package/dist/rules/content/citation-coverage.js.map +1 -0
  57. package/dist/rules/content/common-phrase-reuse.d.ts.map +1 -1
  58. package/dist/rules/content/common-phrase-reuse.js +7 -2
  59. package/dist/rules/content/common-phrase-reuse.js.map +1 -1
  60. package/dist/rules/content/regurgitated-content.d.ts.map +1 -1
  61. package/dist/rules/content/regurgitated-content.js +11 -2
  62. package/dist/rules/content/regurgitated-content.js.map +1 -1
  63. package/dist/rules/content/translation-no-op.d.ts.map +1 -1
  64. package/dist/rules/content/translation-no-op.js +5 -1
  65. package/dist/rules/content/translation-no-op.js.map +1 -1
  66. package/dist/rules/content/unique-value.d.ts +15 -1
  67. package/dist/rules/content/unique-value.d.ts.map +1 -1
  68. package/dist/rules/content/unique-value.js +46 -39
  69. package/dist/rules/content/unique-value.js.map +1 -1
  70. package/dist/rules/content/value-add.d.ts.map +1 -1
  71. package/dist/rules/content/value-add.js +3 -1
  72. package/dist/rules/content/value-add.js.map +1 -1
  73. package/dist/rules/links/cluster-connectivity.d.ts +7 -1
  74. package/dist/rules/links/cluster-connectivity.d.ts.map +1 -1
  75. package/dist/rules/links/cluster-connectivity.js +8 -2
  76. package/dist/rules/links/cluster-connectivity.js.map +1 -1
  77. package/dist/rules/links/orphan-pages.d.ts +8 -1
  78. package/dist/rules/links/orphan-pages.d.ts.map +1 -1
  79. package/dist/rules/links/orphan-pages.js +10 -1
  80. package/dist/rules/links/orphan-pages.js.map +1 -1
  81. package/dist/rules/schema/consistency.d.ts.map +1 -1
  82. package/dist/rules/schema/consistency.js +33 -21
  83. package/dist/rules/schema/consistency.js.map +1 -1
  84. package/dist/rules/scope.d.ts.map +1 -1
  85. package/dist/rules/scope.js +1 -0
  86. package/dist/rules/scope.js.map +1 -1
  87. package/dist/rules/spam/entity-swap.d.ts.map +1 -1
  88. package/dist/rules/spam/entity-swap.js +51 -9
  89. package/dist/rules/spam/entity-swap.js.map +1 -1
  90. package/dist/rules/spam/thin-content.d.ts.map +1 -1
  91. package/dist/rules/spam/thin-content.js +5 -1
  92. package/dist/rules/spam/thin-content.js.map +1 -1
  93. package/dist/rules/tech/canonical-consistency.d.ts.map +1 -1
  94. package/dist/rules/tech/canonical-consistency.js +144 -28
  95. package/dist/rules/tech/canonical-consistency.js.map +1 -1
  96. package/dist/rules/tech/sitemap-completeness.d.ts +14 -2
  97. package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -1
  98. package/dist/rules/tech/sitemap-completeness.js +21 -5
  99. package/dist/rules/tech/sitemap-completeness.js.map +1 -1
  100. package/dist/rules/tech/soft-404.d.ts +11 -0
  101. package/dist/rules/tech/soft-404.d.ts.map +1 -1
  102. package/dist/rules/tech/soft-404.js +47 -5
  103. package/dist/rules/tech/soft-404.js.map +1 -1
  104. package/dist/site-classifier.d.ts.map +1 -1
  105. package/dist/site-classifier.js +1 -0
  106. package/dist/site-classifier.js.map +1 -1
  107. package/dist/template-detection.d.ts +1 -0
  108. package/dist/template-detection.d.ts.map +1 -1
  109. package/dist/template-detection.js +1 -1
  110. package/dist/template-detection.js.map +1 -1
  111. package/dist/types.d.ts +22 -1
  112. package/dist/types.d.ts.map +1 -1
  113. package/package.json +17 -1
@@ -1 +1 @@
1
- {"version":3,"file":"value-add.js","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAEA,MAAM,OAAO,GAAG,mBAAmB,CAAC;AAEpC,MAAM,kBAAkB,GAAG;IACzB,iBAAiB;IACjB,kBAAkB;IAClB,gBAAgB;IAChB,aAAa;IACb,gBAAgB;CACjB,CAAC;AAEF,SAAS,mBAAmB,CAAC,IAAgB;IAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,KAAK,IAAI,CAAC,CAAC;IACrE,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa;QAAE,KAAK,IAAI,CAAC,CAAC;IACpF,IAAI,IAAI,CAAC,aAAa;QAAE,KAAK,IAAI,CAAC,CAAC;IACnC,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAAE,KAAK,IAAI,CAAC,CAAC;IAClE,OAAO,KAAK,CAAC;AACf,CAAC;AAYD,SAAS,cAAc,CAAC,IAAgB,EAAE,WAAyB;IACjE,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IAEvE,wEAAwE;IACxE,MAAM,eAAe,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IAC9F,MAAM,WAAW,GAAG,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEhD,qDAAqD;IACrD,MAAM,gBAAgB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,uBAAuB,CAAC,CAAC;IACxF,IAAI,SAAiB,CAAC;IACtB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,IAAI,gBAAgB,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnD,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,CAAC;QACN,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,mBAAmB,CAAC,CAAC;IAChF,IAAI,KAAa,CAAC;IAClB,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,MAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnF,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,CAAC;QACN,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;IAED,iCAAiC;IACjC,MAAM,SAAS,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC5C,IAAI,IAAY,CAAC;IACjB,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnB,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QAC1B,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;IAED,4DAA4D;IAC5D,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,2BAA2B;QAC7C,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CACvE,CAAC;IACF,MAAM,WAAW,GAAG,kBAAkB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEnD,mFAAmF;IACnF,MAAM,cAAc,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,6BAA6B,CAAC,CAAC;IAC5F,MAAM,WAAW,GAAG,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/C,gFAAgF;IAChF,wEAAwE;IACxE,0EAA0E;IAC1E,6EAA6E;IAC7E,yEAAyE;IACzE,qEAAqE;IACrE,uEAAuE;IACvE,MAAM,sBAAsB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IACrG,MAAM,mBAAmB,GAAG,sBAAsB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/D,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,mBAAmB,EAAE,CAAC;AAChG,CAAC;AAED,SAAS,SAAS,CAAC,OAAgB;IACjC,MAAM,MAAM,GAAG;QACb,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,SAAS;QACjB,OAAO,CAAC,KAAK;QACb,OAAO,CAAC,IAAI;QACZ,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,mBAAmB;KAC5B,CAAC;IACF,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;AAC3D,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa;IACrC,IAAI,KAAK,GAAG,GAAG;QAAE,OAAO,UAAU,CAAC;IACnC,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,YAAY,CAAC,IAAgB,EAAE,KAAa,EAAE,OAAgB;IACrE,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACtD,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAA8B,CAAC;IACrE,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC;QACjC,IAAI,GAAG,GAAG,GAAG;YAAE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxC,CAAC;IACD,MAAM,UAAU,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC;IAC1F,OAAO,CACL,GAAG,IAAI,CAAC,GAAG,qBAAqB,GAAG,CAAC,KAAK,CAAC,kBAAkB;QAC5D,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,gBAAgB,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI;QACnF,UAAU,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,kBAAkB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI;QACzG,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,2BAA2B,GAAG,CAAC,OAAO,CAAC,mBAAmB,CAAC,KAAK;QACzG,kBAAkB,UAAU,iEAAiE,CAC9F,CAAC;AACJ,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,YAAY,CAAC,KAAmB,EAAE,QAAsB;IACtE,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACjC,IAAI,KAAK,IAAI,GAAG;YAAE,SAAS;QAE3B,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,gBAAgB,CAAC,KAAK,CAAC;YACjC,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC;YAC3C,GAAG,EAAE,qMAAqM;YAC1M,OAAO,EAAE,IAAI,CAAC,GAAG;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
1
+ {"version":3,"file":"value-add.js","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,wBAAwB,EAAE,MAAM,qCAAqC,CAAC;AAE/E,MAAM,OAAO,GAAG,mBAAmB,CAAC;AAEpC,MAAM,kBAAkB,GAAG;IACzB,iBAAiB;IACjB,kBAAkB;IAClB,gBAAgB;IAChB,aAAa;IACb,gBAAgB;CACjB,CAAC;AAEF,SAAS,mBAAmB,CAAC,IAAgB;IAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,KAAK,IAAI,CAAC,CAAC;IACrE,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa;QAAE,KAAK,IAAI,CAAC,CAAC;IACpF,IAAI,IAAI,CAAC,aAAa;QAAE,KAAK,IAAI,CAAC,CAAC;IACnC,IACE,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjD,wBAAwB,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC;QACtD,KAAK,IAAI,CAAC,CAAC;IACb,OAAO,KAAK,CAAC;AACf,CAAC;AAYD,SAAS,cAAc,CAAC,IAAgB,EAAE,WAAyB;IACjE,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IAEvE,wEAAwE;IACxE,MAAM,eAAe,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IAC9F,MAAM,WAAW,GAAG,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEhD,qDAAqD;IACrD,MAAM,gBAAgB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,uBAAuB,CAAC,CAAC;IACxF,IAAI,SAAiB,CAAC;IACtB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,IAAI,gBAAgB,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnD,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,CAAC;QACN,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,mBAAmB,CAAC,CAAC;IAChF,IAAI,KAAa,CAAC;IAClB,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,MAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnF,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,CAAC;QACN,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;IAED,iCAAiC;IACjC,MAAM,SAAS,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC5C,IAAI,IAAY,CAAC;IACjB,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnB,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QAC1B,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;IAED,4DAA4D;IAC5D,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,2BAA2B;QAC7C,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CACvE,CAAC;IACF,MAAM,WAAW,GAAG,kBAAkB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEnD,mFAAmF;IACnF,MAAM,cAAc,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,6BAA6B,CAAC,CAAC;IAC5F,MAAM,WAAW,GAAG,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/C,gFAAgF;IAChF,wEAAwE;IACxE,0EAA0E;IAC1E,6EAA6E;IAC7E,yEAAyE;IACzE,qEAAqE;IACrE,uEAAuE;IACvE,MAAM,sBAAsB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IACrG,MAAM,mBAAmB,GAAG,sBAAsB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/D,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,mBAAmB,EAAE,CAAC;AAChG,CAAC;AAED,SAAS,SAAS,CAAC,OAAgB;IACjC,MAAM,MAAM,GAAG;QACb,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,SAAS;QACjB,OAAO,CAAC,KAAK;QACb,OAAO,CAAC,IAAI;QACZ,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,mBAAmB;KAC5B,CAAC;IACF,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;AAC3D,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa;IACrC,IAAI,KAAK,GAAG,GAAG;QAAE,OAAO,UAAU,CAAC;IACnC,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,YAAY,CAAC,IAAgB,EAAE,KAAa,EAAE,OAAgB;IACrE,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACtD,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAA8B,CAAC;IACrE,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC;QACjC,IAAI,GAAG,GAAG,GAAG;YAAE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxC,CAAC;IACD,MAAM,UAAU,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC;IAC1F,OAAO,CACL,GAAG,IAAI,CAAC,GAAG,qBAAqB,GAAG,CAAC,KAAK,CAAC,kBAAkB;QAC5D,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,gBAAgB,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI;QACnF,UAAU,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,kBAAkB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI;QACzG,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,2BAA2B,GAAG,CAAC,OAAO,CAAC,mBAAmB,CAAC,KAAK;QACzG,kBAAkB,UAAU,iEAAiE,CAC9F,CAAC;AACJ,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,YAAY,CAAC,KAAmB,EAAE,QAAsB;IACtE,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACjC,IAAI,KAAK,IAAI,GAAG;YAAE,SAAS;QAE3B,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,gBAAgB,CAAC,KAAK,CAAC;YACjC,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC;YAC3C,GAAG,EAAE,qMAAqM;YAC1M,OAAO,EAAE,IAAI,CAAC,GAAG;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -3,5 +3,11 @@ import type { ParsedPage, RuleResult } from "../../types.js";
3
3
  * Flags clusters (same parent directory) with 2+ pages that are siloed: no outbound
4
4
  * internal crawl link to another cluster and no inbound from another cluster.
5
5
  */
6
- export declare function clusterConnectivityRule(pages: ParsedPage[], knownUrls: Set<string>): RuleResult[];
6
+ export declare function clusterConnectivityRule(pages: ParsedPage[], knownUrls: Set<string>,
7
+ /**
8
+ * 2026-06-16 calibration FP fix: cross-cluster links routinely target pages
9
+ * that were not fetched on a sampled crawl, so a "siloed cluster" verdict is
10
+ * unreliable. Only run on a full crawl.
11
+ */
12
+ sampled?: boolean): RuleResult[];
7
13
  //# sourceMappingURL=cluster-connectivity.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"cluster-connectivity.d.ts","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyB7D;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,GACrB,UAAU,EAAE,CA0Dd"}
1
+ {"version":3,"file":"cluster-connectivity.d.ts","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyB7D;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC;AACtB;;;;GAIG;AACH,OAAO,UAAQ,GACd,UAAU,EAAE,CA0Dd"}
@@ -19,8 +19,14 @@ function hasCrossClusterInbound(clusterDir, urlsInCluster, pages, knownUrls) {
19
19
  * Flags clusters (same parent directory) with 2+ pages that are siloed: no outbound
20
20
  * internal crawl link to another cluster and no inbound from another cluster.
21
21
  */
22
- export function clusterConnectivityRule(pages, knownUrls) {
23
- if (pages.length < 2) {
22
+ export function clusterConnectivityRule(pages, knownUrls,
23
+ /**
24
+ * 2026-06-16 calibration FP fix: cross-cluster links routinely target pages
25
+ * that were not fetched on a sampled crawl, so a "siloed cluster" verdict is
26
+ * unreliable. Only run on a full crawl.
27
+ */
28
+ sampled = false) {
29
+ if (sampled || pages.length < 2) {
24
30
  return [];
25
31
  }
26
32
  const clusterPages = new Map();
@@ -1 +1 @@
1
- {"version":3,"file":"cluster-connectivity.js","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,SAAS,sBAAsB,CAC7B,UAAkB,EAClB,aAA0B,EAC1B,KAAmB,EACnB,SAAsB;IAEtB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,SAAS;YACX,CAAC;YACD,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC5B,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAmB,EACnB,SAAsB;IAEtB,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,EAAuB,CAAC;IACpD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAU,CAAC;QACvD,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACf,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,IAAI,YAAY,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;QACxD,IAAI,IAAI,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YAClB,SAAS;QACX,CAAC;QAED,IAAI,uBAAuB,GAAG,KAAK,CAAC;QACpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxB,SAAS;YACX,CAAC;YACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,SAAS;gBACX,CAAC;gBACD,MAAM,aAAa,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;gBAC7C,IAAI,aAAa,KAAK,UAAU,EAAE,CAAC;oBACjC,uBAAuB,GAAG,IAAI,CAAC;oBAC/B,MAAM;gBACR,CAAC;YACH,CAAC;YACD,IAAI,uBAAuB,EAAE,CAAC;gBAC5B,MAAM;YACR,CAAC;QACH,CAAC;QAED,MAAM,UAAU,GAAG,sBAAsB,CAAC,UAAU,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;QAE9E,IAAI,CAAC,uBAAuB,IAAI,CAAC,UAAU,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,WAAW,UAAU,KAAK,IAAI,CAAC,IAAI,uDAAuD;gBACnG,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;gBACpC,GAAG,EAAE,kGAAkG;aACxG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"cluster-connectivity.js","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,SAAS,sBAAsB,CAC7B,UAAkB,EAClB,aAA0B,EAC1B,KAAmB,EACnB,SAAsB;IAEtB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,SAAS;YACX,CAAC;YACD,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC5B,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAmB,EACnB,SAAsB;AACtB;;;;GAIG;AACH,OAAO,GAAG,KAAK;IAEf,IAAI,OAAO,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChC,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,EAAuB,CAAC;IACpD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAU,CAAC;QACvD,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACf,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,IAAI,YAAY,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;QACxD,IAAI,IAAI,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YAClB,SAAS;QACX,CAAC;QAED,IAAI,uBAAuB,GAAG,KAAK,CAAC;QACpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxB,SAAS;YACX,CAAC;YACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,SAAS;gBACX,CAAC;gBACD,MAAM,aAAa,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;gBAC7C,IAAI,aAAa,KAAK,UAAU,EAAE,CAAC;oBACjC,uBAAuB,GAAG,IAAI,CAAC;oBAC/B,MAAM;gBACR,CAAC;YACH,CAAC;YACD,IAAI,uBAAuB,EAAE,CAAC;gBAC5B,MAAM;YACR,CAAC;QACH,CAAC;QAED,MAAM,UAAU,GAAG,sBAAsB,CAAC,UAAU,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;QAE9E,IAAI,CAAC,uBAAuB,IAAI,CAAC,UAAU,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,WAAW,UAAU,KAAK,IAAI,CAAC,IAAI,uDAAuD;gBACnG,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;gBACpC,GAAG,EAAE,kGAAkG;aACxG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1,3 +1,10 @@
1
1
  import type { ParsedPage, RuleResult } from "../../types.js";
2
- export declare function orphanPagesRule(pages: ParsedPage[], inboundLinks: Map<string, number>, rootUrl?: string): RuleResult[];
2
+ export declare function orphanPagesRule(pages: ParsedPage[], inboundLinks: Map<string, number>, rootUrl?: string,
3
+ /**
4
+ * 2026-06-16 calibration FP fix: on a sampled crawl the page that links to a
5
+ * given URL is often simply not in the fetched subset, so "0 inbound in this
6
+ * crawl" is not evidence of a real orphan. Orphan detection is only reliable
7
+ * on a full crawl — skip it when sampled rather than flag healthy pages.
8
+ */
9
+ sampled?: boolean): RuleResult[];
3
10
  //# sourceMappingURL=orphan-pages.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"orphan-pages.d.ts","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,OAAO,CAAC,EAAE,MAAM,GACf,UAAU,EAAE,CAmBd"}
1
+ {"version":3,"file":"orphan-pages.d.ts","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,OAAO,CAAC,EAAE,MAAM;AAChB;;;;;GAKG;AACH,OAAO,UAAQ,GACd,UAAU,EAAE,CAqBd"}
@@ -1,4 +1,13 @@
1
- export function orphanPagesRule(pages, inboundLinks, rootUrl) {
1
+ export function orphanPagesRule(pages, inboundLinks, rootUrl,
2
+ /**
3
+ * 2026-06-16 calibration FP fix: on a sampled crawl the page that links to a
4
+ * given URL is often simply not in the fetched subset, so "0 inbound in this
5
+ * crawl" is not evidence of a real orphan. Orphan detection is only reliable
6
+ * on a full crawl — skip it when sampled rather than flag healthy pages.
7
+ */
8
+ sampled = false) {
9
+ if (sampled)
10
+ return [];
2
11
  const findings = [];
3
12
  for (const page of pages) {
4
13
  if (rootUrl && page.url === rootUrl) {
@@ -1 +1 @@
1
- {"version":3,"file":"orphan-pages.js","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,YAAiC,EACjC,OAAgB;IAEhB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,OAAO,IAAI,IAAI,CAAC,GAAG,KAAK,OAAO,EAAE,CAAC;YACpC,SAAS;QACX,CAAC;QACD,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,oBAAoB;gBAC5B,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;gBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,8FAA8F;aACpG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"orphan-pages.js","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,YAAiC,EACjC,OAAgB;AAChB;;;;;GAKG;AACH,OAAO,GAAG,KAAK;IAEf,IAAI,OAAO;QAAE,OAAO,EAAE,CAAC;IAEvB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,OAAO,IAAI,IAAI,CAAC,GAAG,KAAK,OAAO,EAAE,CAAC;YACpC,SAAS;QACX,CAAC;QACD,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,oBAAoB;gBAC5B,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;gBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,8FAA8F;aACpG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAiDvE"}
1
+ {"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CA6DvE"}
@@ -1,6 +1,10 @@
1
1
  export function schemaConsistencyRule(pages) {
2
2
  const findings = [];
3
- const typesByPage = new Map();
3
+ // Group pages by structureSignature so we only compare @type within template clusters.
4
+ // A normal site legitimately mixes types across templates (WebSite on home, Article on
5
+ // blog, Product on listings). Variance is only a problem when pages that share the same
6
+ // template (same structureSignature) use different @type values.
7
+ const clustersBySignature = new Map();
4
8
  for (const page of pages) {
5
9
  const types = new Set();
6
10
  for (const entry of page.jsonLd) {
@@ -15,30 +19,38 @@ export function schemaConsistencyRule(pages) {
15
19
  types.add(obj["@type"]);
16
20
  }
17
21
  }
18
- if (types.size > 0) {
19
- typesByPage.set(page.url, types);
22
+ if (types.size === 0) {
23
+ continue;
20
24
  }
21
- }
22
- if (typesByPage.size < 2) {
23
- return findings;
24
- }
25
- const allTypes = new Set();
26
- for (const types of typesByPage.values()) {
27
- for (const t of types) {
28
- allTypes.add(t);
25
+ const sig = page.structureSignature;
26
+ if (!clustersBySignature.has(sig)) {
27
+ clustersBySignature.set(sig, []);
29
28
  }
29
+ clustersBySignature.get(sig).push({ url: page.url, types });
30
30
  }
31
- if (allTypes.size <= 1) {
32
- return findings;
31
+ // Within each cluster of ≥2 pages, check whether all pages use the same @type set.
32
+ for (const members of clustersBySignature.values()) {
33
+ if (members.length < 2) {
34
+ continue;
35
+ }
36
+ const allTypesInCluster = new Set();
37
+ for (const { types } of members) {
38
+ for (const t of types) {
39
+ allTypesInCluster.add(t);
40
+ }
41
+ }
42
+ if (allTypesInCluster.size <= 1) {
43
+ continue;
44
+ }
45
+ const typeList = Array.from(allTypesInCluster).sort().join(", ");
46
+ findings.push({
47
+ ruleId: "schema/consistency",
48
+ severity: "info",
49
+ message: `Template pages use mixed schema types (${typeList}). Consider using a consistent @type across template pages.`,
50
+ relatedUrls: members.map((m) => m.url),
51
+ fix: `Use a consistent @type across all pages that share the same template structure.`
52
+ });
33
53
  }
34
- const typeList = Array.from(allTypes).sort().join(", ");
35
- findings.push({
36
- ruleId: "schema/consistency",
37
- severity: "info",
38
- message: `Pages use mixed schema types (${typeList}). Consider using a consistent @type across template pages.`,
39
- relatedUrls: Array.from(typesByPage.keys()),
40
- fix: `Use a consistent @type across all template pages, or separate pages into groups with different schema types.`
41
- });
42
54
  return findings;
43
55
  }
44
56
  //# sourceMappingURL=consistency.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAuB,CAAC;IAEnD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YACnB,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACnC,CAAC;IACH,CAAC;IAED,IAAI,WAAW,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,KAAK,MAAM,KAAK,IAAI,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC;QACzC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;QACvB,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxD,QAAQ,CAAC,IAAI,CAAC;QACZ,MAAM,EAAE,oBAAoB;QAC5B,QAAQ,EAAE,MAAM;QAChB,OAAO,EAAE,iCAAiC,QAAQ,6DAA6D;QAC/G,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QAC3C,GAAG,EAAE,8GAA8G;KACpH,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,uFAAuF;IACvF,uFAAuF;IACvF,wFAAwF;IACxF,iEAAiE;IACjE,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAsD,CAAC;IAE1F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YACrB,SAAS;QACX,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,kBAAkB,CAAC;QACpC,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YAClC,mBAAmB,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACnC,CAAC;QACD,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/D,CAAC;IAED,mFAAmF;IACnF,KAAK,MAAM,OAAO,IAAI,mBAAmB,CAAC,MAAM,EAAE,EAAE,CAAC;QACnD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QAED,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAU,CAAC;QAC5C,KAAK,MAAM,EAAE,KAAK,EAAE,IAAI,OAAO,EAAE,CAAC;YAChC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;gBACtB,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,IAAI,iBAAiB,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,oBAAoB;YAC5B,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,0CAA0C,QAAQ,6DAA6D;YACxH,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YACtC,GAAG,EAAE,iFAAiF;SACvF,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"scope.d.ts","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,QAAQ,CAAC;AAE1C,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAkEhD,CAAC;AAEF,4GAA4G;AAC5G,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAE3D"}
1
+ {"version":3,"file":"scope.d.ts","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,QAAQ,CAAC;AAE1C,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAmEhD,CAAC;AAEF,4GAA4G;AAC5G,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAE3D"}
@@ -16,6 +16,7 @@ export const RULE_SCOPE = {
16
16
  "content/title-uniqueness": "corpus",
17
17
  "content/heading-structure": "page",
18
18
  "content/image-alt-text": "page",
19
+ "content/citation-coverage": "page",
19
20
  // links
20
21
  "links/orphan-pages": "corpus",
21
22
  "links/dead-ends": "corpus",
@@ -1 +1 @@
1
- {"version":3,"file":"scope.js","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AASA,MAAM,CAAC,MAAM,UAAU,GAA8B;IACnD,OAAO;IACP,qBAAqB,EAAE,QAAQ;IAC/B,kBAAkB,EAAE,QAAQ;IAC5B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,QAAQ;IAClC,yBAAyB,EAAE,QAAQ;IACnC,2BAA2B,EAAE,QAAQ;IACrC,sBAAsB,EAAE,QAAQ;IAChC,wBAAwB,EAAE,QAAQ;IAElC,UAAU;IACV,sBAAsB,EAAE,QAAQ;IAChC,yBAAyB,EAAE,QAAQ;IACnC,wBAAwB,EAAE,MAAM;IAChC,sBAAsB,EAAE,MAAM;IAC9B,0BAA0B,EAAE,QAAQ;IACpC,2BAA2B,EAAE,MAAM;IACnC,wBAAwB,EAAE,MAAM;IAEhC,QAAQ;IACR,oBAAoB,EAAE,QAAQ;IAC9B,iBAAiB,EAAE,QAAQ;IAC3B,4BAA4B,EAAE,QAAQ;IACtC,kBAAkB,EAAE,QAAQ;IAC5B,6BAA6B,EAAE,QAAQ;IACvC,+BAA+B,EAAE,QAAQ;IAEzC,OAAO;IACP,4BAA4B,EAAE,QAAQ;IACtC,iCAAiC,EAAE,MAAM;IACzC,8BAA8B,EAAE,QAAQ;IACxC,2BAA2B,EAAE,QAAQ;IACrC,qBAAqB,EAAE,MAAM;IAC7B,eAAe,EAAE,MAAM;IACvB,2BAA2B,EAAE,QAAQ;IACrC,wBAAwB,EAAE,QAAQ;IAClC,8BAA8B,EAAE,QAAQ;IACxC,sBAAsB,EAAE,MAAM;IAE9B,SAAS;IACT,sBAAsB,EAAE,MAAM;IAC9B,wBAAwB,EAAE,MAAM;IAChC,oBAAoB,EAAE,QAAQ;IAE9B,kEAAkE;IAClE,kEAAkE;IAClE,uCAAuC;IACvC,sBAAsB,EAAE,QAAQ;IAEhC,eAAe;IACf,sBAAsB,EAAE,MAAM;IAC9B,6BAA6B,EAAE,QAAQ;IAEvC,iBAAiB;IACjB,qBAAqB,EAAE,QAAQ;IAE/B,oCAAoC;IACpC,cAAc,EAAE,QAAQ;IACxB,oBAAoB,EAAE,QAAQ;IAC9B,uBAAuB,EAAE,MAAM;IAC/B,kBAAkB,EAAE,MAAM;IAC1B,kBAAkB,EAAE,MAAM;IAC1B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,MAAM;IAChC,kBAAkB,EAAE,MAAM;CAC3B,CAAC;AAEF,4GAA4G;AAC5G,MAAM,UAAU,mBAAmB,CAAC,MAAc;IAChD,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,QAAQ,CAAC,KAAK,MAAM,CAAC;AACrD,CAAC"}
1
+ {"version":3,"file":"scope.js","sourceRoot":"","sources":["../../src/rules/scope.ts"],"names":[],"mappings":"AASA,MAAM,CAAC,MAAM,UAAU,GAA8B;IACnD,OAAO;IACP,qBAAqB,EAAE,QAAQ;IAC/B,kBAAkB,EAAE,QAAQ;IAC5B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,QAAQ;IAClC,yBAAyB,EAAE,QAAQ;IACnC,2BAA2B,EAAE,QAAQ;IACrC,sBAAsB,EAAE,QAAQ;IAChC,wBAAwB,EAAE,QAAQ;IAElC,UAAU;IACV,sBAAsB,EAAE,QAAQ;IAChC,yBAAyB,EAAE,QAAQ;IACnC,wBAAwB,EAAE,MAAM;IAChC,sBAAsB,EAAE,MAAM;IAC9B,0BAA0B,EAAE,QAAQ;IACpC,2BAA2B,EAAE,MAAM;IACnC,wBAAwB,EAAE,MAAM;IAChC,2BAA2B,EAAE,MAAM;IAEnC,QAAQ;IACR,oBAAoB,EAAE,QAAQ;IAC9B,iBAAiB,EAAE,QAAQ;IAC3B,4BAA4B,EAAE,QAAQ;IACtC,kBAAkB,EAAE,QAAQ;IAC5B,6BAA6B,EAAE,QAAQ;IACvC,+BAA+B,EAAE,QAAQ;IAEzC,OAAO;IACP,4BAA4B,EAAE,QAAQ;IACtC,iCAAiC,EAAE,MAAM;IACzC,8BAA8B,EAAE,QAAQ;IACxC,2BAA2B,EAAE,QAAQ;IACrC,qBAAqB,EAAE,MAAM;IAC7B,eAAe,EAAE,MAAM;IACvB,2BAA2B,EAAE,QAAQ;IACrC,wBAAwB,EAAE,QAAQ;IAClC,8BAA8B,EAAE,QAAQ;IACxC,sBAAsB,EAAE,MAAM;IAE9B,SAAS;IACT,sBAAsB,EAAE,MAAM;IAC9B,wBAAwB,EAAE,MAAM;IAChC,oBAAoB,EAAE,QAAQ;IAE9B,kEAAkE;IAClE,kEAAkE;IAClE,uCAAuC;IACvC,sBAAsB,EAAE,QAAQ;IAEhC,eAAe;IACf,sBAAsB,EAAE,MAAM;IAC9B,6BAA6B,EAAE,QAAQ;IAEvC,iBAAiB;IACjB,qBAAqB,EAAE,QAAQ;IAE/B,oCAAoC;IACpC,cAAc,EAAE,QAAQ;IACxB,oBAAoB,EAAE,QAAQ;IAC9B,uBAAuB,EAAE,MAAM;IAC/B,kBAAkB,EAAE,MAAM;IAC1B,kBAAkB,EAAE,MAAM;IAC1B,mBAAmB,EAAE,MAAM;IAC3B,wBAAwB,EAAE,MAAM;IAChC,kBAAkB,EAAE,MAAM;CAC3B,CAAC;AAEF,4GAA4G;AAC5G,MAAM,UAAU,mBAAmB,CAAC,MAAc;IAChD,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,QAAQ,CAAC,KAAK,MAAM,CAAC;AACrD,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"entity-swap.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAChF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAErD,wBAAgB,cAAc,CAC5B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,iBAAiB,EAAE,EAC7B,SAAS,EAAE,MAAM,GAChB;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,KAAK,EAAE,SAAS,EAAE,CAAA;CAAE,CAwBhD"}
1
+ {"version":3,"file":"entity-swap.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAChF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAsBrD,wBAAgB,cAAc,CAC5B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,iBAAiB,EAAE,EAC7B,SAAS,EAAE,MAAM,GAChB;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,KAAK,EAAE,SAAS,EAAE,CAAA;CAAE,CAgDhD"}
@@ -1,23 +1,65 @@
1
1
  import { maskEntities } from "../../algorithms/entity-mask.js";
2
2
  import { hammingDistance, simHashFromText, similarityFromDistance } from "../../algorithms/simhash.js";
3
+ /**
4
+ * Compute masking coverage: fraction of pages where at least one entity token
5
+ * was replaced by a placeholder. A page "benefited" from masking when its
6
+ * masked text differs from the original.
7
+ *
8
+ * ponytail: threshold is <20% of pages masked → low coverage (weak entity signal).
9
+ * Zero patterns supplied is a degenerate case and always yields low coverage.
10
+ */
11
+ function maskingCoverage(pages, patterns) {
12
+ if (patterns.length === 0 || pages.length === 0)
13
+ return 0;
14
+ let touched = 0;
15
+ for (const page of pages) {
16
+ const masked = maskEntities(page.contentText, patterns);
17
+ if (masked !== page.contentText)
18
+ touched += 1;
19
+ }
20
+ return touched / pages.length;
21
+ }
22
+ const LOW_COVERAGE_THRESHOLD = 0.2; // ponytail: <20% pages masked → low-confidence signal
3
23
  export function entitySwapRule(pages, patterns, threshold) {
4
24
  const findings = [];
5
25
  const pairs = [];
6
26
  const hashes = pages.map((page) => simHashFromText(maskEntities(page.contentText, patterns)));
27
+ const coverage = maskingCoverage(pages, patterns);
28
+ const isLowCoverage = coverage < LOW_COVERAGE_THRESHOLD;
7
29
  for (let i = 0; i < pages.length; i += 1) {
8
30
  for (let j = i + 1; j < pages.length; j += 1) {
9
31
  const similarity = similarityFromDistance(hammingDistance(hashes[i], hashes[j]));
10
32
  if (similarity >= threshold) {
11
33
  pairs.push({ leftUrl: pages[i].url, rightUrl: pages[j].url, similarity });
12
- findings.push({
13
- ruleId: "spam/entity-swap",
14
- severity: "critical",
15
- message: `${pages[i].url} and ${pages[j].url} look structurally identical after entity masking.`,
16
- pageUrl: pages[i].url,
17
- relatedUrls: [pages[j].url],
18
- similarity,
19
- fix: "These pages are identical after masking entity names. Add entity-specific content: local regulations, statistics, fees, or requirements unique to each entity."
20
- });
34
+ if (isLowCoverage) {
35
+ // Weak/absent entity patterns mean masking barely changed the text;
36
+ // this finding overlaps a plain near-duplicate signal, not a confirmed
37
+ // entity-swap. Downgrade to warning with low confidence.
38
+ findings.push({
39
+ ruleId: "spam/entity-swap",
40
+ severity: "warning",
41
+ confidence: "low",
42
+ message: `${pages[i].url} and ${pages[j].url} are near-identical, but entity masking ` +
43
+ `coverage is too low to confirm an entity-swap pattern (masking touched ` +
44
+ `${Math.round(coverage * 100)}% of pages). ` +
45
+ `Provide entity patterns or treat this as a near-duplicate finding instead.`,
46
+ pageUrl: pages[i].url,
47
+ relatedUrls: [pages[j].url],
48
+ similarity,
49
+ fix: "Supply entity patterns (city names, states, product names) so the rule can confirm whether these pages are entity-swapped templates. If no entity patterns apply, address as near-duplicate spam instead."
50
+ });
51
+ }
52
+ else {
53
+ findings.push({
54
+ ruleId: "spam/entity-swap",
55
+ severity: "critical",
56
+ message: `${pages[i].url} and ${pages[j].url} look structurally identical after entity masking.`,
57
+ pageUrl: pages[i].url,
58
+ relatedUrls: [pages[j].url],
59
+ similarity,
60
+ fix: "These pages are identical after masking entity names. Add entity-specific content: local regulations, statistics, fees, or requirements unique to each entity."
61
+ });
62
+ }
21
63
  }
22
64
  }
23
65
  }
@@ -1 +1 @@
1
- {"version":3,"file":"entity-swap.js","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAIvG,MAAM,UAAU,cAAc,CAC5B,KAAmB,EACnB,QAA6B,EAC7B,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;IAE9F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;gBAC1E,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,kBAAkB;oBAC1B,QAAQ,EAAE,UAAU;oBACpB,OAAO,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,oDAAoD;oBAChG,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;oBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;oBAC3B,UAAU;oBACV,GAAG,EAAE,gKAAgK;iBACtK,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC7B,CAAC"}
1
+ {"version":3,"file":"entity-swap.js","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAIvG;;;;;;;GAOG;AACH,SAAS,eAAe,CAAC,KAAmB,EAAE,QAA6B;IACzE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC1D,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;QACxD,IAAI,MAAM,KAAK,IAAI,CAAC,WAAW;YAAE,OAAO,IAAI,CAAC,CAAC;IAChD,CAAC;IACD,OAAO,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC;AAChC,CAAC;AAED,MAAM,sBAAsB,GAAG,GAAG,CAAC,CAAC,sDAAsD;AAE1F,MAAM,UAAU,cAAc,CAC5B,KAAmB,EACnB,QAA6B,EAC7B,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;IAE9F,MAAM,QAAQ,GAAG,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IAClD,MAAM,aAAa,GAAG,QAAQ,GAAG,sBAAsB,CAAC;IAExD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;gBAE1E,IAAI,aAAa,EAAE,CAAC;oBAClB,oEAAoE;oBACpE,uEAAuE;oBACvE,0DAA0D;oBAC1D,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,kBAAkB;wBAC1B,QAAQ,EAAE,SAAS;wBACnB,UAAU,EAAE,KAAK;wBACjB,OAAO,EACL,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,0CAA0C;4BAC7E,yEAAyE;4BACzE,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC,eAAe;4BAC5C,4EAA4E;wBAC9E,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;wBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;wBAC3B,UAAU;wBACV,GAAG,EAAE,2MAA2M;qBACjN,CAAC,CAAC;gBACL,CAAC;qBAAM,CAAC;oBACN,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,kBAAkB;wBAC1B,QAAQ,EAAE,UAAU;wBACpB,OAAO,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,oDAAoD;wBAChG,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;wBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;wBAC3B,UAAU;wBACV,GAAG,EAAE,gKAAgK;qBACtK,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC7B,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"thin-content.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAMzE,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,MAAM,GACf;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,eAAe,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;CAAE,CA8B1D"}
1
+ {"version":3,"file":"thin-content.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAc,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAMzE,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,MAAM,GACf;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,eAAe,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;CAAE,CAkC1D"}
@@ -19,7 +19,11 @@ export function thinContentRule(pages, minWords) {
19
19
  : "";
20
20
  findings.push({
21
21
  ruleId: "spam/thin-content",
22
- severity: "error",
22
+ // High confidence (far below the floor) is an error; the medium band — which
23
+ // the rule itself flags as "could legitimately be a short page" — is a
24
+ // warning, not a ship-blocker. The page still joins thinContentUrls either
25
+ // way so spam/doorway-pattern can stack on it.
26
+ severity: confidence === "high" ? "error" : "warning",
23
27
  confidence,
24
28
  message: `${page.url} has thin content (${words} words).${shortPageNote}`,
25
29
  fix: `Add at least ${minWords - words} more words of substantive content relevant to this page's specific topic.`
@@ -1 +1 @@
1
- {"version":3,"file":"thin-content.js","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAEA,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,QAAgB;IAEhB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC3C,IAAI,KAAK,IAAI,QAAQ,EAAE,CAAC;YACtB,SAAS;QACX,CAAC;QAED,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9B,qBAAqB;QACrB,4DAA4D;QAC5D,8EAA8E;QAC9E,MAAM,UAAU,GAAe,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QACxE,MAAM,aAAa,GACjB,UAAU,KAAK,QAAQ;YACrB,CAAC,CAAC,6IAA6I;YAC/I,CAAC,CAAC,EAAE,CAAC;QAET,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,mBAAmB;YAC3B,QAAQ,EAAE,OAAO;YACjB,UAAU;YACV,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,sBAAsB,KAAK,WAAW,aAAa,EAAE;YACzE,GAAG,EAAE,gBAAgB,QAAQ,GAAG,KAAK,4EAA4E;SAClH,CAAC,CAAC;IACL,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,eAAe,EAAE,CAAC;AACvC,CAAC"}
1
+ {"version":3,"file":"thin-content.js","sourceRoot":"","sources":["../../../src/rules/spam/thin-content.ts"],"names":[],"mappings":"AAEA,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,QAAgB;IAEhB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC3C,IAAI,KAAK,IAAI,QAAQ,EAAE,CAAC;YACtB,SAAS;QACX,CAAC;QAED,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC9B,qBAAqB;QACrB,4DAA4D;QAC5D,8EAA8E;QAC9E,MAAM,UAAU,GAAe,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QACxE,MAAM,aAAa,GACjB,UAAU,KAAK,QAAQ;YACrB,CAAC,CAAC,6IAA6I;YAC/I,CAAC,CAAC,EAAE,CAAC;QAET,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,mBAAmB;YAC3B,6EAA6E;YAC7E,uEAAuE;YACvE,2EAA2E;YAC3E,+CAA+C;YAC/C,QAAQ,EAAE,UAAU,KAAK,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;YACrD,UAAU;YACV,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,sBAAsB,KAAK,WAAW,aAAa,EAAE;YACzE,GAAG,EAAE,gBAAgB,QAAQ,GAAG,KAAK,4EAA4E;SAClH,CAAC,CAAC;IACL,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,eAAe,EAAE,CAAC;AACvC,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"canonical-consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/canonical-consistency.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGlF,wBAAgB,mBAAmB,CACjC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,aAAa,EAAE,mBAAmB,GACjC,MAAM,GAAG,IAAI,CAef;AAED,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,EACtB,aAAa,EAAE,mBAAmB,GACjC,UAAU,EAAE,CA+Dd"}
1
+ {"version":3,"file":"canonical-consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/canonical-consistency.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAGlF,wBAAgB,mBAAmB,CACjC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,aAAa,EAAE,mBAAmB,GACjC,MAAM,GAAG,IAAI,CAef;AAWD,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,EACtB,aAAa,EAAE,mBAAmB,GACjC,UAAU,EAAE,CA+Kd"}
@@ -16,8 +16,22 @@ export function resolveCanonicalUrl(canonical, pageUrl, normalizeOpts) {
16
16
  }
17
17
  return normalizeAuditUrl(resolve(dirname(pageUrl), raw), normalizeOpts);
18
18
  }
19
+ /** Extract the hostname from a URL string, or null if unparseable. */
20
+ function extractHost(url) {
21
+ try {
22
+ return new URL(url).hostname;
23
+ }
24
+ catch {
25
+ return null;
26
+ }
27
+ }
19
28
  export function canonicalConsistencyRule(pages, knownUrls, normalizeOpts) {
29
+ // ── Pass 1: collect out-of-scope findings per page ──────────────────────────
30
+ // We separate "out-of-scope" (canonical host ≠ page host, and not in knownUrls)
31
+ // from other findings so we can decide whether to collapse them.
20
32
  const findings = [];
33
+ // Map from canonical-target-host → array of (pageUrl, canonicalUrl) that had that host
34
+ const outOfScopeByTargetHost = new Map();
21
35
  for (const page of pages) {
22
36
  if (!page.canonical) {
23
37
  findings.push({
@@ -40,35 +54,137 @@ export function canonicalConsistencyRule(pages, knownUrls, normalizeOpts) {
40
54
  });
41
55
  continue;
42
56
  }
43
- if (canonicalUrl === page.url)
57
+ if (canonicalUrl === page.url) {
58
+ // Self-canonical: still check HTTP header conflict
59
+ if (page.httpMeta?.linkHeader) {
60
+ const linkCanonicalMatch = page.httpMeta.linkHeader.match(/<([^>]+)>;\s*rel="canonical"/i);
61
+ if (linkCanonicalMatch) {
62
+ const httpCanonical = normalizeAuditUrl(linkCanonicalMatch[1], normalizeOpts);
63
+ const htmlCanonical = resolveCanonicalUrl(page.canonical, page.url, normalizeOpts);
64
+ if (httpCanonical && htmlCanonical && httpCanonical !== htmlCanonical) {
65
+ findings.push({
66
+ ruleId: "tech/canonical-consistency",
67
+ severity: "error",
68
+ message: `${page.url} has conflicting canonical URLs: HTML says ${htmlCanonical}, HTTP Link header says ${httpCanonical}.`,
69
+ pageUrl: page.url,
70
+ relatedUrls: [htmlCanonical, httpCanonical],
71
+ fix: "Ensure the HTML <link rel='canonical'> and HTTP Link header agree on the canonical URL."
72
+ });
73
+ }
74
+ }
75
+ }
44
76
  continue;
45
- findings.push({
46
- ruleId: "tech/canonical-consistency",
47
- severity: knownUrls.has(canonicalUrl) ? "warning" : "info",
48
- message: knownUrls.has(canonicalUrl)
49
- ? `${page.url} canonicalizes to another crawled page (${canonicalUrl}).`
50
- : `${page.url} canonicalizes outside the crawl scope (${canonicalUrl}).`,
51
- pageUrl: page.url,
52
- relatedUrls: [canonicalUrl],
53
- fix: "Verify this canonical target is intentional."
54
- });
55
- // Check HTTP Link header for canonical
56
- if (page.httpMeta?.linkHeader) {
57
- const linkCanonicalMatch = page.httpMeta.linkHeader.match(/<([^>]+)>;\s*rel="canonical"/i);
58
- if (linkCanonicalMatch) {
59
- const httpCanonical = normalizeAuditUrl(linkCanonicalMatch[1], normalizeOpts);
60
- const htmlCanonical = page.canonical
61
- ? resolveCanonicalUrl(page.canonical, page.url, normalizeOpts)
62
- : null;
63
- if (httpCanonical && htmlCanonical && httpCanonical !== htmlCanonical) {
64
- findings.push({
65
- ruleId: "tech/canonical-consistency",
66
- severity: "error",
67
- message: `${page.url} has conflicting canonical URLs: HTML says ${htmlCanonical}, HTTP Link header says ${httpCanonical}.`,
68
- pageUrl: page.url,
69
- relatedUrls: [htmlCanonical, httpCanonical],
70
- fix: "Ensure the HTML <link rel='canonical'> and HTTP Link header agree on the canonical URL."
71
- });
77
+ }
78
+ // canonical differs from page.url
79
+ const pageHost = extractHost(page.url);
80
+ const canonicalHost = extractHost(canonicalUrl);
81
+ const isOutOfScope = !knownUrls.has(canonicalUrl);
82
+ const isCrossHost = pageHost !== null && canonicalHost !== null && canonicalHost !== pageHost;
83
+ if (isOutOfScope && isCrossHost) {
84
+ // Candidate for collapsing — defer into the bucket keyed by target host
85
+ const bucket = outOfScopeByTargetHost.get(canonicalHost) ?? [];
86
+ bucket.push({ pageUrl: page.url, canonicalUrl });
87
+ outOfScopeByTargetHost.set(canonicalHost, bucket);
88
+ }
89
+ else {
90
+ // Either within-scope (warning) or same-host out-of-scope — emit per-page
91
+ findings.push({
92
+ ruleId: "tech/canonical-consistency",
93
+ severity: knownUrls.has(canonicalUrl) ? "warning" : "info",
94
+ message: knownUrls.has(canonicalUrl)
95
+ ? `${page.url} canonicalizes to another crawled page (${canonicalUrl}).`
96
+ : `${page.url} canonicalizes outside the crawl scope (${canonicalUrl}).`,
97
+ pageUrl: page.url,
98
+ relatedUrls: [canonicalUrl],
99
+ fix: "Verify this canonical target is intentional."
100
+ });
101
+ // HTTP header conflict check (only when we haven't already decided to collapse)
102
+ if (page.httpMeta?.linkHeader) {
103
+ const linkCanonicalMatch = page.httpMeta.linkHeader.match(/<([^>]+)>;\s*rel="canonical"/i);
104
+ if (linkCanonicalMatch) {
105
+ const httpCanonical = normalizeAuditUrl(linkCanonicalMatch[1], normalizeOpts);
106
+ const htmlCanonical = resolveCanonicalUrl(page.canonical, page.url, normalizeOpts);
107
+ if (httpCanonical && htmlCanonical && httpCanonical !== htmlCanonical) {
108
+ findings.push({
109
+ ruleId: "tech/canonical-consistency",
110
+ severity: "error",
111
+ message: `${page.url} has conflicting canonical URLs: HTML says ${htmlCanonical}, HTTP Link header says ${httpCanonical}.`,
112
+ pageUrl: page.url,
113
+ relatedUrls: [htmlCanonical, httpCanonical],
114
+ fix: "Ensure the HTML <link rel='canonical'> and HTTP Link header agree on the canonical URL."
115
+ });
116
+ }
117
+ }
118
+ }
119
+ }
120
+ }
121
+ // ── Pass 2: collapse uniform out-of-scope buckets ───────────────────────────
122
+ // Strategy: if ALL pages point to the SAME alternate host (one bucket with all
123
+ // out-of-scope cross-host pages), emit ONE site-level info. If multiple target
124
+ // hosts exist (inconsistent), keep per-page findings.
125
+ const buckets = [...outOfScopeByTargetHost.entries()];
126
+ if (buckets.length === 0) {
127
+ // Nothing to collapse
128
+ }
129
+ else if (buckets.length === 1) {
130
+ // Every cross-host out-of-scope canonical goes to the same alternate host → collapse
131
+ const [targetHost, entries] = buckets[0];
132
+ const count = entries.length;
133
+ // Infer the crawled host from the first page in the bucket
134
+ const crawledHost = extractHost(entries[0].pageUrl) ?? "the crawled host";
135
+ // If there is only ONE page total pointing to the alternate host,
136
+ // still emit a per-page finding (no "site-level" implication with a single page).
137
+ if (count === 1) {
138
+ const { pageUrl, canonicalUrl } = entries[0];
139
+ findings.push({
140
+ ruleId: "tech/canonical-consistency",
141
+ severity: "info",
142
+ message: `${pageUrl} canonicalizes outside the crawl scope (${canonicalUrl}).`,
143
+ pageUrl,
144
+ relatedUrls: [canonicalUrl],
145
+ fix: "Verify this canonical target is intentional."
146
+ });
147
+ }
148
+ else {
149
+ findings.push({
150
+ ruleId: "tech/canonical-consistency",
151
+ severity: "info",
152
+ message: `${count} pages canonicalize to ${targetHost}, outside the crawled host ${crawledHost} — expected if you crawled a staging/preview origin.`,
153
+ relatedUrls: entries.map((e) => e.canonicalUrl).slice(0, 10),
154
+ fix: "If this site is live at the canonical host, the canonicals are correct. If not, verify the canonical URLs."
155
+ });
156
+ }
157
+ }
158
+ else {
159
+ // Multiple target hosts — inconsistent cross-host canonicals → per-page findings
160
+ for (const [, entries] of buckets) {
161
+ for (const { pageUrl, canonicalUrl } of entries) {
162
+ findings.push({
163
+ ruleId: "tech/canonical-consistency",
164
+ severity: "info",
165
+ message: `${pageUrl} canonicalizes outside the crawl scope (${canonicalUrl}).`,
166
+ pageUrl,
167
+ relatedUrls: [canonicalUrl],
168
+ fix: "Verify this canonical target is intentional."
169
+ });
170
+ // HTTP header conflict check for deferred pages
171
+ const pageDef = pages.find((p) => p.url === pageUrl);
172
+ if (pageDef?.httpMeta?.linkHeader) {
173
+ const linkCanonicalMatch = pageDef.httpMeta.linkHeader.match(/<([^>]+)>;\s*rel="canonical"/i);
174
+ if (linkCanonicalMatch) {
175
+ const httpCanonical = normalizeAuditUrl(linkCanonicalMatch[1], normalizeOpts);
176
+ const htmlCanonical = resolveCanonicalUrl(pageDef.canonical, pageDef.url, normalizeOpts);
177
+ if (httpCanonical && htmlCanonical && httpCanonical !== htmlCanonical) {
178
+ findings.push({
179
+ ruleId: "tech/canonical-consistency",
180
+ severity: "error",
181
+ message: `${pageUrl} has conflicting canonical URLs: HTML says ${htmlCanonical}, HTTP Link header says ${httpCanonical}.`,
182
+ pageUrl,
183
+ relatedUrls: [htmlCanonical, httpCanonical],
184
+ fix: "Ensure the HTML <link rel='canonical'> and HTTP Link header agree on the canonical URL."
185
+ });
186
+ }
187
+ }
72
188
  }
73
189
  }
74
190
  }
@@ -1 +1 @@
1
- {"version":3,"file":"canonical-consistency.js","sourceRoot":"","sources":["../../../src/rules/tech/canonical-consistency.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAE7C,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAE3D,MAAM,UAAU,mBAAmB,CACjC,SAAiB,EACjB,OAAe,EACf,aAAkC;IAElC,MAAM,GAAG,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IAC7B,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,IAAI,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,iBAAiB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;IAE5E,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,OAAO,iBAAiB,CAAC,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;QACtE,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,GAAG,CAAC,EAAE,aAAa,CAAC,CAAC;AAC1E,CAAC;AAED,MAAM,UAAU,wBAAwB,CACtC,KAAmB,EACnB,SAAsB,EACtB,aAAkC;IAElC,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8BAA8B;gBAClD,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,mCAAmC,IAAI,CAAC,GAAG,qBAAqB;aACtE,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,MAAM,YAAY,GAAG,mBAAmB,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QAClF,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,kCAAkC,IAAI,CAAC,SAAS,GAAG;gBACvE,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,+BAA+B;aACrC,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG;YAAE,SAAS;QAExC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,4BAA4B;YACpC,QAAQ,EAAE,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM;YAC1D,OAAO,EAAE,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC;gBAClC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,2CAA2C,YAAY,IAAI;gBACxE,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,2CAA2C,YAAY,IAAI;YAC1E,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,WAAW,EAAE,CAAC,YAAY,CAAC;YAC3B,GAAG,EAAE,8CAA8C;SACpD,CAAC,CAAC;QAEH,uCAAuC;QACvC,IAAI,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC;YAC9B,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;YAC3F,IAAI,kBAAkB,EAAE,CAAC;gBACvB,MAAM,aAAa,GAAG,iBAAiB,CAAC,kBAAkB,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;gBAC9E,MAAM,aAAa,GAAG,IAAI,CAAC,SAAS;oBAClC,CAAC,CAAC,mBAAmB,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC;oBAC9D,CAAC,CAAC,IAAI,CAAC;gBACT,IAAI,aAAa,IAAI,aAAa,IAAI,aAAa,KAAK,aAAa,EAAE,CAAC;oBACtE,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,4BAA4B;wBACpC,QAAQ,EAAE,OAAO;wBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8CAA8C,aAAa,2BAA2B,aAAa,GAAG;wBAC1H,OAAO,EAAE,IAAI,CAAC,GAAG;wBACjB,WAAW,EAAE,CAAC,aAAa,EAAE,aAAa,CAAC;wBAC3C,GAAG,EAAE,yFAAyF;qBAC/F,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"canonical-consistency.js","sourceRoot":"","sources":["../../../src/rules/tech/canonical-consistency.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAE7C,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAE3D,MAAM,UAAU,mBAAmB,CACjC,SAAiB,EACjB,OAAe,EACf,aAAkC;IAElC,MAAM,GAAG,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IAC7B,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IAEtB,IAAI,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,iBAAiB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;IAE5E,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,OAAO,iBAAiB,CAAC,IAAI,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;QACtE,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,GAAG,CAAC,EAAE,aAAa,CAAC,CAAC;AAC1E,CAAC;AAED,sEAAsE;AACtE,SAAS,WAAW,CAAC,GAAW;IAC9B,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,wBAAwB,CACtC,KAAmB,EACnB,SAAsB,EACtB,aAAkC;IAElC,+EAA+E;IAC/E,gFAAgF;IAChF,iEAAiE;IAEjE,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,uFAAuF;IACvF,MAAM,sBAAsB,GAAG,IAAI,GAAG,EAA4D,CAAC;IAEnG,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8BAA8B;gBAClD,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,mCAAmC,IAAI,CAAC,GAAG,qBAAqB;aACtE,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,MAAM,YAAY,GAAG,mBAAmB,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;QAClF,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,kCAAkC,IAAI,CAAC,SAAS,GAAG;gBACvE,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,+BAA+B;aACrC,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,IAAI,YAAY,KAAK,IAAI,CAAC,GAAG,EAAE,CAAC;YAC9B,mDAAmD;YACnD,IAAI,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC;gBAC9B,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;gBAC3F,IAAI,kBAAkB,EAAE,CAAC;oBACvB,MAAM,aAAa,GAAG,iBAAiB,CAAC,kBAAkB,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;oBAC9E,MAAM,aAAa,GAAG,mBAAmB,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;oBACnF,IAAI,aAAa,IAAI,aAAa,IAAI,aAAa,KAAK,aAAa,EAAE,CAAC;wBACtE,QAAQ,CAAC,IAAI,CAAC;4BACZ,MAAM,EAAE,4BAA4B;4BACpC,QAAQ,EAAE,OAAO;4BACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8CAA8C,aAAa,2BAA2B,aAAa,GAAG;4BAC1H,OAAO,EAAE,IAAI,CAAC,GAAG;4BACjB,WAAW,EAAE,CAAC,aAAa,EAAE,aAAa,CAAC;4BAC3C,GAAG,EAAE,yFAAyF;yBAC/F,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;YACD,SAAS;QACX,CAAC;QAED,kCAAkC;QAClC,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvC,MAAM,aAAa,GAAG,WAAW,CAAC,YAAY,CAAC,CAAC;QAChD,MAAM,YAAY,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QAClD,MAAM,WAAW,GAAG,QAAQ,KAAK,IAAI,IAAI,aAAa,KAAK,IAAI,IAAI,aAAa,KAAK,QAAQ,CAAC;QAE9F,IAAI,YAAY,IAAI,WAAW,EAAE,CAAC;YAChC,wEAAwE;YACxE,MAAM,MAAM,GAAG,sBAAsB,CAAC,GAAG,CAAC,aAAc,CAAC,IAAI,EAAE,CAAC;YAChE,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE,YAAY,EAAE,CAAC,CAAC;YACjD,sBAAsB,CAAC,GAAG,CAAC,aAAc,EAAE,MAAM,CAAC,CAAC;QACrD,CAAC;aAAM,CAAC;YACN,0EAA0E;YAC1E,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM;gBAC1D,OAAO,EAAE,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC;oBAClC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,2CAA2C,YAAY,IAAI;oBACxE,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,2CAA2C,YAAY,IAAI;gBAC1E,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,WAAW,EAAE,CAAC,YAAY,CAAC;gBAC3B,GAAG,EAAE,8CAA8C;aACpD,CAAC,CAAC;YAEH,gFAAgF;YAChF,IAAI,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,CAAC;gBAC9B,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;gBAC3F,IAAI,kBAAkB,EAAE,CAAC;oBACvB,MAAM,aAAa,GAAG,iBAAiB,CAAC,kBAAkB,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;oBAC9E,MAAM,aAAa,GAAG,mBAAmB,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;oBACnF,IAAI,aAAa,IAAI,aAAa,IAAI,aAAa,KAAK,aAAa,EAAE,CAAC;wBACtE,QAAQ,CAAC,IAAI,CAAC;4BACZ,MAAM,EAAE,4BAA4B;4BACpC,QAAQ,EAAE,OAAO;4BACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8CAA8C,aAAa,2BAA2B,aAAa,GAAG;4BAC1H,OAAO,EAAE,IAAI,CAAC,GAAG;4BACjB,WAAW,EAAE,CAAC,aAAa,EAAE,aAAa,CAAC;4BAC3C,GAAG,EAAE,yFAAyF;yBAC/F,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,+EAA+E;IAC/E,+EAA+E;IAC/E,+EAA+E;IAC/E,sDAAsD;IAEtD,MAAM,OAAO,GAAG,CAAC,GAAG,sBAAsB,CAAC,OAAO,EAAE,CAAC,CAAC;IAEtD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,sBAAsB;IACxB,CAAC;SAAM,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,qFAAqF;QACrF,MAAM,CAAC,UAAU,EAAE,OAAO,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC;QAC7B,2DAA2D;QAC3D,MAAM,WAAW,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,kBAAkB,CAAC;QAE1E,kEAAkE;QAClE,kFAAkF;QAClF,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;YAChB,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC7C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,MAAM;gBAChB,OAAO,EAAE,GAAG,OAAO,2CAA2C,YAAY,IAAI;gBAC9E,OAAO;gBACP,WAAW,EAAE,CAAC,YAAY,CAAC;gBAC3B,GAAG,EAAE,8CAA8C;aACpD,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,MAAM;gBAChB,OAAO,EAAE,GAAG,KAAK,0BAA0B,UAAU,8BAA8B,WAAW,sDAAsD;gBACpJ,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;gBAC5D,GAAG,EAAE,4GAA4G;aAClH,CAAC,CAAC;QACL,CAAC;IACH,CAAC;SAAM,CAAC;QACN,iFAAiF;QACjF,KAAK,MAAM,CAAC,EAAE,OAAO,CAAC,IAAI,OAAO,EAAE,CAAC;YAClC,KAAK,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,IAAI,OAAO,EAAE,CAAC;gBAChD,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,4BAA4B;oBACpC,QAAQ,EAAE,MAAM;oBAChB,OAAO,EAAE,GAAG,OAAO,2CAA2C,YAAY,IAAI;oBAC9E,OAAO;oBACP,WAAW,EAAE,CAAC,YAAY,CAAC;oBAC3B,GAAG,EAAE,8CAA8C;iBACpD,CAAC,CAAC;gBAEH,gDAAgD;gBAChD,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,OAAO,CAAC,CAAC;gBACrD,IAAI,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;oBAClC,MAAM,kBAAkB,GAAG,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;oBAC9F,IAAI,kBAAkB,EAAE,CAAC;wBACvB,MAAM,aAAa,GAAG,iBAAiB,CAAC,kBAAkB,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;wBAC9E,MAAM,aAAa,GAAG,mBAAmB,CAAC,OAAO,CAAC,SAAS,EAAE,OAAO,CAAC,GAAG,EAAE,aAAa,CAAC,CAAC;wBACzF,IAAI,aAAa,IAAI,aAAa,IAAI,aAAa,KAAK,aAAa,EAAE,CAAC;4BACtE,QAAQ,CAAC,IAAI,CAAC;gCACZ,MAAM,EAAE,4BAA4B;gCACpC,QAAQ,EAAE,OAAO;gCACjB,OAAO,EAAE,GAAG,OAAO,8CAA8C,aAAa,2BAA2B,aAAa,GAAG;gCACzH,OAAO;gCACP,WAAW,EAAE,CAAC,aAAa,EAAE,aAAa,CAAC;gCAC3C,GAAG,EAAE,yFAAyF;6BAC/F,CAAC,CAAC;wBACL,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1,3 +1,15 @@
1
- import type { ParsedPage, RuleResult } from "../../types.js";
2
- export declare function sitemapCompletenessRule(pages: ParsedPage[], sitemapUrls: Set<string>): RuleResult[];
1
+ import type { NormalizeUrlOptions, ParsedPage, RuleResult } from "../../types.js";
2
+ export interface SitemapCompletenessOptions {
3
+ /**
4
+ * True when the audit ran on a sampled or link-discovery crawl.
5
+ * On sampled crawls it is normal to find pages not listed in the sitemap
6
+ * (they were discovered via links, not sitemap), so the aggregate
7
+ * "missing from sitemap" finding is demoted to `warning`.
8
+ * Wire this from the auditor's `isSampledAudit` flag.
9
+ */
10
+ sampled?: boolean;
11
+ /** URL normalization options that match what the auditor used when building page.url. */
12
+ normalizeUrlOptions?: NormalizeUrlOptions;
13
+ }
14
+ export declare function sitemapCompletenessRule(pages: ParsedPage[], sitemapUrls: Set<string>, options?: SitemapCompletenessOptions): RuleResult[];
3
15
  //# sourceMappingURL=sitemap-completeness.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"sitemap-completeness.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/sitemap-completeness.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,GACvB,UAAU,EAAE,CA0Dd"}
1
+ {"version":3,"file":"sitemap-completeness.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/sitemap-completeness.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAElF,MAAM,WAAW,0BAA0B;IACzC;;;;;;OAMG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,yFAAyF;IACzF,mBAAmB,CAAC,EAAE,mBAAmB,CAAC;CAC3C;AAED,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EACxB,OAAO,CAAC,EAAE,0BAA0B,GACnC,UAAU,EAAE,CA6Ed"}