@pseolint/core 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/dist/algorithms/authority/commoncrawl.d.ts +13 -0
  2. package/dist/algorithms/authority/commoncrawl.d.ts.map +1 -0
  3. package/dist/algorithms/authority/commoncrawl.js +17 -0
  4. package/dist/algorithms/authority/commoncrawl.js.map +1 -0
  5. package/dist/algorithms/authority/openpagerank.d.ts +19 -0
  6. package/dist/algorithms/authority/openpagerank.d.ts.map +1 -0
  7. package/dist/algorithms/authority/openpagerank.js +42 -0
  8. package/dist/algorithms/authority/openpagerank.js.map +1 -0
  9. package/dist/algorithms/authority/provider.d.ts +16 -0
  10. package/dist/algorithms/authority/provider.d.ts.map +1 -0
  11. package/dist/algorithms/authority/provider.js +24 -0
  12. package/dist/algorithms/authority/provider.js.map +1 -0
  13. package/dist/algorithms/auto-entity-mask.d.ts +19 -0
  14. package/dist/algorithms/auto-entity-mask.d.ts.map +1 -0
  15. package/dist/algorithms/auto-entity-mask.js +102 -0
  16. package/dist/algorithms/auto-entity-mask.js.map +1 -0
  17. package/dist/algorithms/example-regions.d.ts +22 -0
  18. package/dist/algorithms/example-regions.d.ts.map +1 -0
  19. package/dist/algorithms/example-regions.js +32 -0
  20. package/dist/algorithms/example-regions.js.map +1 -0
  21. package/dist/algorithms/fact-extraction.d.ts.map +1 -1
  22. package/dist/algorithms/fact-extraction.js +6 -0
  23. package/dist/algorithms/fact-extraction.js.map +1 -1
  24. package/dist/auditor.d.ts.map +1 -1
  25. package/dist/auditor.js +39 -9
  26. package/dist/auditor.js.map +1 -1
  27. package/dist/enrich-findings.d.ts.map +1 -1
  28. package/dist/enrich-findings.js +9 -8
  29. package/dist/enrich-findings.js.map +1 -1
  30. package/dist/index.d.ts +7 -0
  31. package/dist/index.d.ts.map +1 -1
  32. package/dist/index.js +6 -0
  33. package/dist/index.js.map +1 -1
  34. package/dist/rules/aeo/crawler-access.d.ts +14 -0
  35. package/dist/rules/aeo/crawler-access.d.ts.map +1 -1
  36. package/dist/rules/aeo/crawler-access.js +96 -15
  37. package/dist/rules/aeo/crawler-access.js.map +1 -1
  38. package/dist/rules/aeo/summary-bait.d.ts.map +1 -1
  39. package/dist/rules/aeo/summary-bait.js +4 -3
  40. package/dist/rules/aeo/summary-bait.js.map +1 -1
  41. package/dist/rules/content/common-phrase-reuse.d.ts.map +1 -1
  42. package/dist/rules/content/common-phrase-reuse.js +7 -2
  43. package/dist/rules/content/common-phrase-reuse.js.map +1 -1
  44. package/dist/rules/content/eeat-signals.d.ts +13 -0
  45. package/dist/rules/content/eeat-signals.d.ts.map +1 -1
  46. package/dist/rules/content/eeat-signals.js +36 -4
  47. package/dist/rules/content/eeat-signals.js.map +1 -1
  48. package/dist/rules/content/regurgitated-content.d.ts.map +1 -1
  49. package/dist/rules/content/regurgitated-content.js +11 -2
  50. package/dist/rules/content/regurgitated-content.js.map +1 -1
  51. package/dist/rules/content/translation-no-op.d.ts.map +1 -1
  52. package/dist/rules/content/translation-no-op.js +5 -1
  53. package/dist/rules/content/translation-no-op.js.map +1 -1
  54. package/dist/rules/content/unique-value.d.ts +15 -1
  55. package/dist/rules/content/unique-value.d.ts.map +1 -1
  56. package/dist/rules/content/unique-value.js +46 -39
  57. package/dist/rules/content/unique-value.js.map +1 -1
  58. package/dist/rules/content/value-add.d.ts +8 -2
  59. package/dist/rules/content/value-add.d.ts.map +1 -1
  60. package/dist/rules/content/value-add.js +39 -48
  61. package/dist/rules/content/value-add.js.map +1 -1
  62. package/dist/rules/content/wikipedia-paraphrase.d.ts +12 -7
  63. package/dist/rules/content/wikipedia-paraphrase.d.ts.map +1 -1
  64. package/dist/rules/content/wikipedia-paraphrase.js +52 -13
  65. package/dist/rules/content/wikipedia-paraphrase.js.map +1 -1
  66. package/dist/rules/links/cluster-connectivity.d.ts +7 -1
  67. package/dist/rules/links/cluster-connectivity.d.ts.map +1 -1
  68. package/dist/rules/links/cluster-connectivity.js +8 -2
  69. package/dist/rules/links/cluster-connectivity.js.map +1 -1
  70. package/dist/rules/links/orphan-pages.d.ts +8 -1
  71. package/dist/rules/links/orphan-pages.d.ts.map +1 -1
  72. package/dist/rules/links/orphan-pages.js +10 -1
  73. package/dist/rules/links/orphan-pages.js.map +1 -1
  74. package/dist/rules/schema/consistency.d.ts.map +1 -1
  75. package/dist/rules/schema/consistency.js +37 -21
  76. package/dist/rules/schema/consistency.js.map +1 -1
  77. package/dist/rules/schema/json-ld-valid.d.ts.map +1 -1
  78. package/dist/rules/schema/json-ld-valid.js +8 -1
  79. package/dist/rules/schema/json-ld-valid.js.map +1 -1
  80. package/dist/rules/schema/required-fields.d.ts.map +1 -1
  81. package/dist/rules/schema/required-fields.js +47 -1
  82. package/dist/rules/schema/required-fields.js.map +1 -1
  83. package/dist/rules/spam/boilerplate-ratio.d.ts.map +1 -1
  84. package/dist/rules/spam/boilerplate-ratio.js +36 -22
  85. package/dist/rules/spam/boilerplate-ratio.js.map +1 -1
  86. package/dist/rules/spam/entity-swap.d.ts.map +1 -1
  87. package/dist/rules/spam/entity-swap.js +51 -9
  88. package/dist/rules/spam/entity-swap.js.map +1 -1
  89. package/dist/rules/spam/template-diversity.d.ts.map +1 -1
  90. package/dist/rules/spam/template-diversity.js +37 -2
  91. package/dist/rules/spam/template-diversity.js.map +1 -1
  92. package/dist/rules/spam/thin-content.d.ts.map +1 -1
  93. package/dist/rules/spam/thin-content.js +5 -1
  94. package/dist/rules/spam/thin-content.js.map +1 -1
  95. package/dist/rules/tech/canonical-consistency.d.ts.map +1 -1
  96. package/dist/rules/tech/canonical-consistency.js +144 -28
  97. package/dist/rules/tech/canonical-consistency.js.map +1 -1
  98. package/dist/rules/tech/og-completeness.d.ts +8 -3
  99. package/dist/rules/tech/og-completeness.d.ts.map +1 -1
  100. package/dist/rules/tech/og-completeness.js +15 -7
  101. package/dist/rules/tech/og-completeness.js.map +1 -1
  102. package/dist/rules/tech/sitemap-completeness.d.ts +14 -2
  103. package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -1
  104. package/dist/rules/tech/sitemap-completeness.js +21 -5
  105. package/dist/rules/tech/sitemap-completeness.js.map +1 -1
  106. package/dist/rules/tech/soft-404.d.ts +11 -0
  107. package/dist/rules/tech/soft-404.d.ts.map +1 -1
  108. package/dist/rules/tech/soft-404.js +47 -5
  109. package/dist/rules/tech/soft-404.js.map +1 -1
  110. package/dist/template-detection.d.ts +1 -0
  111. package/dist/template-detection.d.ts.map +1 -1
  112. package/dist/template-detection.js +1 -1
  113. package/dist/template-detection.js.map +1 -1
  114. package/dist/types.d.ts +16 -1
  115. package/dist/types.d.ts.map +1 -1
  116. package/package.json +109 -93
@@ -1 +1 @@
1
- {"version":3,"file":"regurgitated-content.js","sourceRoot":"","sources":["../../../src/rules/content/regurgitated-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAGnC,MAAM,OAAO,GAAG,8BAA8B,CAAC;AAE/C;;;;;;;GAOG;AAEH,+DAA+D;AAC/D,MAAM,qBAAqB,GAAG,oBAAoB,CAAC;AAEnD,MAAM,gBAAgB,GAAG;IACvB,uBAAuB;IACvB,2BAA2B;IAC3B,0CAA0C;IAC1C,oCAAoC;CACrC,CAAC;AAEF,MAAM,iBAAiB,GAAG,8CAA8C,CAAC;AACzE,MAAM,iBAAiB,GAAG,iDAAiD,CAAC;AAE5E,MAAM,gBAAgB,GACpB,6FAA6F,CAAC;AAEhG,+EAA+E;AAC/E,kEAAkE;AAClE,MAAM,cAAc,GAAG,sDAAsD,CAAC;AAE9E,MAAM,0BAA0B,GAAG,CAAC,CAAC;AACrC,MAAM,0BAA0B,GAAG,GAAG,CAAC;AACvC,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAO5B,SAAS,sBAAsB,CAAC,CAAqB;IACnD,MAAM,KAAK,GAAG,2BAA2B,CAAC;IAC1C,IAAI,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IACxE,MAAM,SAAS,GAAG,CAAC,CAAC,6CAA6C,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAC9E,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,eAAe,CAAC,GAAW;IAClC,OAAO,gBAAgB,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;AAC7D,CAAC;AAED,SAAS,yBAAyB,CAAC,CAAqB;IACtD,MAAM,KAAK,GAAG,uCAAuC,CAAC;IACtD,MAAM,IAAI,GAAG,CAAC,CAAC,UAAU,CAAC;SACvB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;SACvC,GAAG,EAAE;SACL,MAAM,CAAC,OAAO,CAAC,CAAC;IACnB,IAAI,IAAI,CAAC,MAAM,GAAG,0BAA0B;QAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC7E,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,MAAM,CAAC;IACxD,OAAO,EAAE,KAAK,EAAE,WAAW,GAAG,IAAI,CAAC,MAAM,IAAI,0BAA0B,EAAE,KAAK,EAAE,CAAC;AACnF,CAAC;AAED,SAAS,oBAAoB,CAAC,CAAqB,EAAE,IAAY;IAC/D,MAAM,KAAK,GAAG,kCAAkC,CAAC;IACjD,IAAI,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IAChE,MAAM,cAAc,GAClB,CAAC,CAAC,aAAa,CAAC;SACb,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;SACvC,GAAG,EAAE;SACL,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAChD,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC;AAC1C,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IACpC,OAAO,EAAE,KAAK,EAAE,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE,CAAC;AAChF,CAAC;AAED,SAAS,eAAe,CAAC,IAAgB;IACvC,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa;QAAE,CAAC,IAAI,CAAC,CAAC;IAChF,IAAI,IAAI,CAAC,aAAa;QAAE,CAAC,IAAI,CAAC,CAAC;IAC/B,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,CAAC,IAAI,CAAC,CAAC;IACjE,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,wBAAwB,CAAC,CAAqB,EAAE,IAAY;IACnE,MAAM,KAAK,GAAG,2DAA2D,CAAC;IAC1E,IAAI,IAAI,IAAI,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC9C,MAAM,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC;SACzC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;SACpD,MAAM,CAAC;IACV,OAAO,EAAE,KAAK,EAAE,KAAK,IAAI,iBAAiB,EAAE,KAAK,EAAE,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,KAAmB;IACzD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,IAAI;YAAE,SAAS;QAEpB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7B,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,OAAO,GAAmB;YAC9B,sBAAsB,CAAC,CAAC,CAAC;YACzB,yBAAyB,CAAC,CAAC,CAAC;YAC5B,oBAAoB,CAAC,CAAC,EAAE,IAAI,CAAC;YAC7B,gBAAgB,CAAC,IAAI,CAAC;YACtB,wBAAwB,CAAC,CAAC,EAAE,IAAI,CAAC;SAClC,CAAC;QAEF,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAC7C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAE/B,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxD,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,KAAK,KAAK,CAAC,MAAM,2CAA2C,UAAU,GAAG;YAC7F,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EAAE,8TAA8T;YACnU,UAAU,EAAE,aAAa;SAC1B,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"regurgitated-content.js","sourceRoot":"","sources":["../../../src/rules/content/regurgitated-content.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,OAAO,EAAE,uBAAuB,EAAE,MAAM,qCAAqC,CAAC;AAE9E,MAAM,OAAO,GAAG,8BAA8B,CAAC;AAE/C;;;;;;;GAOG;AAEH,+DAA+D;AAC/D,MAAM,qBAAqB,GAAG,oBAAoB,CAAC;AAEnD,MAAM,gBAAgB,GAAG;IACvB,uBAAuB;IACvB,2BAA2B;IAC3B,0CAA0C;IAC1C,oCAAoC;CACrC,CAAC;AAEF,MAAM,iBAAiB,GAAG,8CAA8C,CAAC;AACzE,MAAM,iBAAiB,GAAG,iDAAiD,CAAC;AAE5E,MAAM,gBAAgB,GACpB,6FAA6F,CAAC;AAEhG,+EAA+E;AAC/E,kEAAkE;AAClE,MAAM,cAAc,GAAG,sDAAsD,CAAC;AAE9E,MAAM,0BAA0B,GAAG,CAAC,CAAC;AACrC,MAAM,0BAA0B,GAAG,GAAG,CAAC;AACvC,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAO5B,SAAS,sBAAsB,CAAC,CAAqB;IACnD,MAAM,KAAK,GAAG,2BAA2B,CAAC;IAC1C,IAAI,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IACxE,MAAM,SAAS,GAAG,CAAC,CAAC,6CAA6C,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAC9E,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,eAAe,CAAC,GAAW;IAClC,OAAO,gBAAgB,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;AAC7D,CAAC;AAED,SAAS,yBAAyB,CAAC,CAAqB;IACtD,MAAM,KAAK,GAAG,uCAAuC,CAAC;IACtD,MAAM,IAAI,GAAG,CAAC,CAAC,UAAU,CAAC;SACvB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;SACvC,GAAG,EAAE;SACL,MAAM,CAAC,OAAO,CAAC,CAAC;IACnB,IAAI,IAAI,CAAC,MAAM,GAAG,0BAA0B;QAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC7E,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,MAAM,CAAC;IACxD,OAAO,EAAE,KAAK,EAAE,WAAW,GAAG,IAAI,CAAC,MAAM,IAAI,0BAA0B,EAAE,KAAK,EAAE,CAAC;AACnF,CAAC;AAED,SAAS,oBAAoB,CAAC,CAAqB,EAAE,IAAY;IAC/D,MAAM,KAAK,GAAG,kCAAkC,CAAC;IACjD,IAAI,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IAChE,MAAM,cAAc,GAClB,CAAC,CAAC,aAAa,CAAC;SACb,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;SACvC,GAAG,EAAE;SACL,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAChD,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,KAAK,EAAE,CAAC;AAC1C,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IACpC,OAAO,EAAE,KAAK,EAAE,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE,CAAC;AAChF,CAAC;AAED,SAAS,eAAe,CAAC,IAAgB;IACvC,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa;QAAE,CAAC,IAAI,CAAC,CAAC;IAChF,IAAI,IAAI,CAAC,aAAa;QAAE,CAAC,IAAI,CAAC,CAAC;IAC/B,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,CAAC,IAAI,CAAC,CAAC;IACjE,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,wBAAwB,CAAC,CAAqB,EAAE,IAAY;IACnE,MAAM,KAAK,GAAG,2DAA2D,CAAC;IAC1E,IAAI,IAAI,IAAI,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC9C,MAAM,KAAK,GAAG,CAAC,CAAC,2BAA2B,CAAC;SACzC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;SACpD,MAAM,CAAC;IACV,OAAO,EAAE,KAAK,EAAE,KAAK,IAAI,iBAAiB,EAAE,KAAK,EAAE,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,KAAmB;IACzD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QAC7B,IAAI,CAAC,IAAI;YAAE,SAAS;QAEpB,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7B,6EAA6E;QAC7E,4EAA4E;QAC5E,4EAA4E;QAC5E,2EAA2E;QAC3E,6EAA6E;QAC7E,qDAAqD;QACrD,CAAC,CAAC,uBAAuB,CAAC,CAAC,MAAM,EAAE,CAAC;QACpC,MAAM,WAAW,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,OAAO,GAAmB;YAC9B,sBAAsB,CAAC,CAAC,CAAC;YACzB,yBAAyB,CAAC,CAAC,CAAC;YAC5B,oBAAoB,CAAC,CAAC,EAAE,WAAW,CAAC;YACpC,gBAAgB,CAAC,WAAW,CAAC;YAC7B,wBAAwB,CAAC,CAAC,EAAE,IAAI,CAAC;SAClC,CAAC;QAEF,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAC7C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAE/B,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxD,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,KAAK,KAAK,CAAC,MAAM,2CAA2C,UAAU,GAAG;YAC7F,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EAAE,8TAA8T;YACnU,UAAU,EAAE,aAAa;SAC1B,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"translation-no-op.d.ts","sourceRoot":"","sources":["../../../src/rules/content/translation-no-op.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAoC7D;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAgErE"}
1
+ {"version":3,"file":"translation-no-op.d.ts","sourceRoot":"","sources":["../../../src/rules/content/translation-no-op.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAoC7D;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAoErE"}
@@ -88,7 +88,11 @@ export function translationNoOpRule(pages) {
88
88
  : `${(minSim * 100).toFixed(0)}%--${(maxSim * 100).toFixed(0)}%`;
89
89
  findings.push({
90
90
  ruleId: "content/translation-no-op",
91
- severity: "error",
91
+ // Warning, not error: an untranslated locale variant is a real duplicate-
92
+ // content gap but a should-fix, not a ship-blocker — and multilingual sites
93
+ // can legitimately share some body text (disclaimers, spec tables).
94
+ severity: "warning",
95
+ confidence: "medium",
92
96
  message: `${members.length} locale variants of "${basePath}" share identical content ` +
93
97
  `(similarity ${simLabel}). Translate the body or consolidate to the canonical version.`,
94
98
  pageUrl: urls[0],
@@ -1 +1 @@
1
- {"version":3,"file":"translation-no-op.js","sourceRoot":"","sources":["../../../src/rules/content/translation-no-op.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAGvG,MAAM,gBAAgB,GAAG,kCAAkC,CAAC;AAC5D,MAAM,oBAAoB,GAAG,IAAI,CAAC;AAClC;;;;;;;GAOG;AACH,MAAM,+BAA+B,GAAG,EAAE,CAAC;AAE3C;;;GAGG;AACH,SAAS,iBAAiB,CAAC,QAAgB;IACzC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC1C,IAAI,CAAC,CAAC;QAAE,OAAO,QAAQ,CAAC;IACxB,oCAAoC;IACpC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC9D,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,GAAW;IAC9B,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAmB;IACrD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,4CAA4C;IAC5C,MAAM,MAAM,GAAG,IAAI,GAAG,EAAuD,CAAC;IAE9E,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1C,IAAI,CAAC,CAAC;YAAE,SAAS,CAAC,2BAA2B;QAC7C,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAC9B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC/B,CAAC;IAED,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACzC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEjC,uEAAuE;QACvE,uEAAuE;QACvE,wEAAwE;QACxE,kEAAkE;QAClE,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAC/B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,+BAA+B,CAChG,CAAC;QACF,IAAI,WAAW;YAAE,SAAS;QAE1B,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;QACvE,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,QAAQ,GAAG,KAAK,CAAC;QAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/C,MAAM,GAAG,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1E,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;gBAC/B,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;gBAC/B,IAAI,GAAG,IAAI,oBAAoB;oBAAE,QAAQ,GAAG,IAAI,CAAC;YACnD,CAAC;QACH,CAAC;QAED,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5C,MAAM,QAAQ,GACZ,MAAM,KAAK,MAAM;YACf,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;YACjC,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;QAErE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,2BAA2B;YACnC,QAAQ,EAAE,OAAO;YACjB,OAAO,EACL,GAAG,OAAO,CAAC,MAAM,wBAAwB,QAAQ,4BAA4B;gBAC7E,eAAe,QAAQ,gEAAgE;YACzF,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;YAChB,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAC7B,GAAG,EAAE,qKAAqK;SAC3K,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"translation-no-op.js","sourceRoot":"","sources":["../../../src/rules/content/translation-no-op.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAGvG,MAAM,gBAAgB,GAAG,kCAAkC,CAAC;AAC5D,MAAM,oBAAoB,GAAG,IAAI,CAAC;AAClC;;;;;;;GAOG;AACH,MAAM,+BAA+B,GAAG,EAAE,CAAC;AAE3C;;;GAGG;AACH,SAAS,iBAAiB,CAAC,QAAgB;IACzC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC1C,IAAI,CAAC,CAAC;QAAE,OAAO,QAAQ,CAAC;IACxB,oCAAoC;IACpC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC9D,CAAC;AAED;;GAEG;AACH,SAAS,WAAW,CAAC,GAAW;IAC9B,IAAI,CAAC;QACH,OAAO,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,GAAG,CAAC;IACb,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAmB;IACrD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,4CAA4C;IAC5C,MAAM,MAAM,GAAG,IAAI,GAAG,EAAuD,CAAC;IAE9E,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvC,MAAM,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1C,IAAI,CAAC,CAAC;YAAE,SAAS,CAAC,2BAA2B;QAC7C,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAC9B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IAC/B,CAAC;IAED,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,EAAE,CAAC;QACzC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEjC,uEAAuE;QACvE,uEAAuE;QACvE,wEAAwE;QACxE,kEAAkE;QAClE,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAC/B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,+BAA+B,CAChG,CAAC;QACF,IAAI,WAAW;YAAE,SAAS;QAE1B,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;QACvE,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,QAAQ,GAAG,KAAK,CAAC;QAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/C,MAAM,GAAG,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1E,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;gBAC/B,IAAI,GAAG,GAAG,MAAM;oBAAE,MAAM,GAAG,GAAG,CAAC;gBAC/B,IAAI,GAAG,IAAI,oBAAoB;oBAAE,QAAQ,GAAG,IAAI,CAAC;YACnD,CAAC;QACH,CAAC;QAED,IAAI,CAAC,QAAQ;YAAE,SAAS;QAExB,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5C,MAAM,QAAQ,GACZ,MAAM,KAAK,MAAM;YACf,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;YACjC,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;QAErE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,2BAA2B;YACnC,0EAA0E;YAC1E,4EAA4E;YAC5E,oEAAoE;YACpE,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,QAAQ;YACpB,OAAO,EACL,GAAG,OAAO,CAAC,MAAM,wBAAwB,QAAQ,4BAA4B;gBAC7E,eAAe,QAAQ,gEAAgE;YACzF,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;YAChB,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAC7B,GAAG,EAAE,qKAAqK;SAC3K,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1,3 +1,17 @@
1
1
  import type { ParsedPage, RuleResult } from "../../types.js";
2
- export declare function uniqueValueRule(pages: ParsedPage[], minUniqueWords: number): RuleResult[];
2
+ export interface UniqueValueThresholds {
3
+ /** Unique-content density below this fires (info). Default 0.20. */
4
+ passBelow: number;
5
+ /** Density below this escalates to error. Default 0.12. */
6
+ errorBelow: number;
7
+ }
8
+ /**
9
+ * Originality as a corpus-relative DENSITY, not an absolute count. Each distinct
10
+ * token is weighted by normalized IDF (ln(N/df)/ln(N)) — 1 if page-exclusive, ~0
11
+ * if on every page — and averaged over the page's distinct tokens. A near-
12
+ * duplicate / boilerplate page scores low regardless of corpus size or length; a
13
+ * large original page stays high. Continuous, so it doesn't shuffle at the margin.
14
+ * Volume is spam/thin-content's job; exact twins are spam/near-duplicate's.
15
+ */
16
+ export declare function uniqueValueRule(pages: ParsedPage[], thresholds: UniqueValueThresholds): RuleResult[];
3
17
  //# sourceMappingURL=unique-value.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"unique-value.d.ts","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAe7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,cAAc,EAAE,MAAM,GAAG,UAAU,EAAE,CAuCzF"}
1
+ {"version":3,"file":"unique-value.d.ts","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,MAAM,WAAW,qBAAqB;IACpC,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAC;IAClB,2DAA2D;IAC3D,UAAU,EAAE,MAAM,CAAC;CACpB;AAYD;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,UAAU,EAAE,qBAAqB,GAChC,UAAU,EAAE,CAwCd"}
@@ -1,51 +1,58 @@
1
1
  function tokenize(text) {
2
- // Strip leading/trailing punctuation so "word", "word." and "(word)" count as
3
- // the SAME token. Without this, surrounding punctuation spuriously inflated
4
- // the "unique" count (a word that's shared but happens to carry a trailing
5
- // comma on one page looked unique) — false precision in the shared/unique
6
- // split this rule now surfaces.
2
+ // Lowercase, split on whitespace, strip edge punctuation so "word", "word."
3
+ // and "(word)" are one token.
7
4
  return text
8
5
  .toLowerCase()
9
6
  .split(/\s+/)
10
7
  .map((t) => t.replace(/^[^\p{L}\p{N}]+|[^\p{L}\p{N}]+$/gu, ""))
11
8
  .filter(Boolean);
12
9
  }
13
- export function uniqueValueRule(pages, minUniqueWords) {
14
- const frequencies = new Map();
15
- const pageTokens = pages.map((page) => tokenize(page.contentText));
16
- for (const tokens of pageTokens) {
17
- for (const token of new Set(tokens)) {
18
- frequencies.set(token, (frequencies.get(token) ?? 0) + 1);
19
- }
10
+ /**
11
+ * Originality as a corpus-relative DENSITY, not an absolute count. Each distinct
12
+ * token is weighted by normalized IDF (ln(N/df)/ln(N)) 1 if page-exclusive, ~0
13
+ * if on every page — and averaged over the page's distinct tokens. A near-
14
+ * duplicate / boilerplate page scores low regardless of corpus size or length; a
15
+ * large original page stays high. Continuous, so it doesn't shuffle at the margin.
16
+ * Volume is spam/thin-content's job; exact twins are spam/near-duplicate's.
17
+ */
18
+ export function uniqueValueRule(pages, thresholds) {
19
+ const { passBelow, errorBelow } = thresholds;
20
+ const N = pages.length;
21
+ const lnN = Math.log(N);
22
+ if (N <= 1 || lnN === 0)
23
+ return []; // can't measure rarity against a single page
24
+ const df = new Map();
25
+ const pageDistinct = pages.map((p) => new Set(tokenize(p.contentText)));
26
+ for (const distinct of pageDistinct) {
27
+ for (const t of distinct)
28
+ df.set(t, (df.get(t) ?? 0) + 1);
20
29
  }
21
30
  const findings = [];
22
- pages.forEach((page, idx) => {
23
- const distinct = new Set(pageTokens[idx]);
24
- let uniqueCount = 0;
25
- let sharedCount = 0;
26
- for (const token of distinct) {
27
- if ((frequencies.get(token) ?? 0) === 1)
28
- uniqueCount += 1;
29
- else
30
- sharedCount += 1;
31
- }
32
- if (uniqueCount < minUniqueWords) {
33
- const needed = minUniqueWords - uniqueCount;
34
- findings.push({
35
- ruleId: "content/unique-value",
36
- severity: "error",
37
- // Surface the shared-vs-unique split so the author can see that most of
38
- // the page's words already appear elsewhere (the "name the overlap"
39
- // signal) not just a bare unique-word count.
40
- message: `${page.url} has only ${uniqueCount} page-unique words (min ${minUniqueWords}); ${sharedCount} of its ${distinct.size} distinct words also appear on other pages.`,
41
- pageUrl: page.url,
42
- // Axis-aware guidance: the #1 trap on pSEO sites is adding real, useful,
43
- // but per-axis-SHARED data (a role's regulations repeated across that
44
- // role's documents; a state's statutes across its pages) which doesn't
45
- // count. Spell that out so authors don't burn effort on it.
46
- fix: `Add ~${needed} more words that appear on NO other page. Content repeated across pages on the same entity axis — boilerplate, shared legal/spec blocks, or per-axis data (e.g. a role's regulations across that role's documents, a state's statutes across its pages) — does NOT count toward uniqueness, even when it's useful. Only page-specific text (a unique lead, this record's distinct facts, page-specific examples) moves this metric.`
47
- });
48
- }
31
+ pages.forEach((page, i) => {
32
+ const distinct = pageDistinct[i];
33
+ if (distinct.size === 0)
34
+ return; // empty page → thin-content handles it
35
+ let mass = 0;
36
+ for (const t of distinct)
37
+ mass += Math.log(N / (df.get(t) ?? 1)) / lnN;
38
+ const density = mass / distinct.size;
39
+ if (density >= passBelow)
40
+ return;
41
+ const severity = density < errorBelow ? "error" : "info";
42
+ const pct = (density * 100).toFixed(1);
43
+ findings.push({
44
+ ruleId: "content/unique-value",
45
+ severity,
46
+ message: `${page.url} has low unique-content density ${density.toFixed(3)} ` +
47
+ `(${pct}% of its ${distinct.size} distinct words are page-distinctive; floor ${passBelow.toFixed(2)}). ` +
48
+ `Most of its vocabulary also appears on other pages.`,
49
+ pageUrl: page.url,
50
+ fix: `Raise originality density: add page-specific text — a distinct lead, this ` +
51
+ `record's own facts, page-specific examples. Content repeated across pages on ` +
52
+ `the same axis (boilerplate, shared legal/spec blocks, per-axis data like a ` +
53
+ `role's regulations across that role's documents) is common vocabulary and ` +
54
+ `does NOT raise density, even when it is useful.`,
55
+ });
49
56
  });
50
57
  return findings;
51
58
  }
@@ -1 +1 @@
1
- {"version":3,"file":"unique-value.js","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAEA,SAAS,QAAQ,CAAC,IAAY;IAC5B,8EAA8E;IAC9E,4EAA4E;IAC5E,2EAA2E;IAC3E,0EAA0E;IAC1E,gCAAgC;IAChC,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,KAAK,CAAC;SACZ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;SAC9D,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,KAAmB,EAAE,cAAsB;IACzE,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC9C,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAEnE,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACpC,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE;QAC1B,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;QAC1C,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;YAC7B,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC;gBAAE,WAAW,IAAI,CAAC,CAAC;;gBACrD,WAAW,IAAI,CAAC,CAAC;QACxB,CAAC;QACD,IAAI,WAAW,GAAG,cAAc,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,cAAc,GAAG,WAAW,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,sBAAsB;gBAC9B,QAAQ,EAAE,OAAO;gBACjB,wEAAwE;gBACxE,oEAAoE;gBACpE,+CAA+C;gBAC/C,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,aAAa,WAAW,2BAA2B,cAAc,MAAM,WAAW,WAAW,QAAQ,CAAC,IAAI,6CAA6C;gBAC3K,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,yEAAyE;gBACzE,sEAAsE;gBACtE,uEAAuE;gBACvE,4DAA4D;gBAC5D,GAAG,EAAE,QAAQ,MAAM,qaAAqa;aACzb,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"unique-value.js","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AASA,SAAS,QAAQ,CAAC,IAAY;IAC5B,4EAA4E;IAC5E,8BAA8B;IAC9B,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,KAAK,CAAC;SACZ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,mCAAmC,EAAE,EAAE,CAAC,CAAC;SAC9D,MAAM,CAAC,OAAO,CAAC,CAAC;AACrB,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,UAAiC;IAEjC,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,GAAG,UAAU,CAAC;IAC7C,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACxB,IAAI,CAAC,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC,CAAC,6CAA6C;IAEjF,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;IACrC,MAAM,YAAY,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;IACxE,KAAK,MAAM,QAAQ,IAAI,YAAY,EAAE,CAAC;QACpC,KAAK,MAAM,CAAC,IAAI,QAAQ;YAAE,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5D,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QACxB,MAAM,QAAQ,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QACjC,IAAI,QAAQ,CAAC,IAAI,KAAK,CAAC;YAAE,OAAO,CAAC,uCAAuC;QACxE,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,MAAM,CAAC,IAAI,QAAQ;YAAE,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;QACvE,MAAM,OAAO,GAAG,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;QACrC,IAAI,OAAO,IAAI,SAAS;YAAE,OAAO;QAEjC,MAAM,QAAQ,GAAG,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC;QACzD,MAAM,GAAG,GAAG,CAAC,OAAO,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACvC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,sBAAsB;YAC9B,QAAQ;YACR,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,mCAAmC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;gBACnE,IAAI,GAAG,YAAY,QAAQ,CAAC,IAAI,+CAA+C,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBACxG,qDAAqD;YACvD,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,GAAG,EACD,4EAA4E;gBAC5E,+EAA+E;gBAC/E,6EAA6E;gBAC7E,4EAA4E;gBAC5E,iDAAiD;SACpD,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -6,8 +6,14 @@ import type { ParsedPage, RuleResult } from "../../types.js";
6
6
  * Aggregates 7 per-page signal scores (originality, freshness, facts,
7
7
  * E-E-A-T, translation, cliché-reuse, wikipedia-paraphrase) into a
8
8
  * single 0-1 quality score. Each signal weighted equally at 1/7 ≈ 14.3%.
9
- * Fires ONE critical/error finding per page when score < 0.5
10
- * (critical < 0.3, error otherwise).
9
+ *
10
+ * E-E-A-T sub-score is a continuous fraction (categoriesPresent/4), not
11
+ * a 3-step value. Reuses countSignalCategories from eeat-signals to avoid
12
+ * logic drift between the two rules.
13
+ *
14
+ * Fires ONE finding per page when score < 0.5:
15
+ * - warning (score ∈ [0.35, 0.5)) — borderline, low confidence
16
+ * - error (score < 0.35) — clearly low value-add
11
17
  */
12
18
  export declare function valueAddRule(pages: ParsedPage[], findings: RuleResult[]): RuleResult[];
13
19
  //# sourceMappingURL=value-add.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"value-add.d.ts","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAY,MAAM,gBAAgB,CAAC;AAuIvE;;;;;;;;;GASG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAmBtF"}
1
+ {"version":3,"file":"value-add.d.ts","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAY,MAAM,gBAAgB,CAAC;AA0HvE;;;;;;;;;;;;;;;GAeG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAoBtF"}
@@ -1,26 +1,6 @@
1
1
  import { hasAuthoritativeCitation } from "../../algorithms/fact-extraction.js";
2
+ import { countSignalCategories } from "./eeat-signals.js";
2
3
  const RULE_ID = "content/value-add";
3
- const EEAT_HTML_PATTERNS = [
4
- /last\s+updated/i,
5
- /last\s+modified/i,
6
- /reviewed\s+by/i,
7
- /\bsources:/i,
8
- /\breferences:/i,
9
- ];
10
- function countEeatCategories(page) {
11
- let count = 0;
12
- if (page.resolvedHrefs.some((h) => /\/about\b/i.test(h)))
13
- count += 1;
14
- const { metaAuthor, schemaAuthor, bylineElement, relAuthorLink } = page.authorSignals;
15
- if (metaAuthor !== "" || schemaAuthor || bylineElement || relAuthorLink)
16
- count += 1;
17
- if (page.publishedDate)
18
- count += 1;
19
- if (EEAT_HTML_PATTERNS.some((p) => p.test(page.html)) ||
20
- hasAuthoritativeCitation(page.resolvedHrefs, page.url))
21
- count += 1;
22
- return count;
23
- }
24
4
  function computeSignals(page, allFindings) {
25
5
  const pageFindings = allFindings.filter((f) => f.pageUrl === page.url);
26
6
  // Originality: 1.0 if regurgitated-content doesn't fire, 0.0 if it does
@@ -50,18 +30,14 @@ function computeSignals(page, allFindings) {
50
30
  else {
51
31
  facts = 0.0;
52
32
  }
53
- // E-E-A-T: based on signal count
54
- const eeatCount = countEeatCategories(page);
55
- let eeat;
56
- if (eeatCount >= 4) {
57
- eeat = 1.0;
58
- }
59
- else if (eeatCount >= 2) {
60
- eeat = 0.5;
61
- }
62
- else {
63
- eeat = 0.0;
64
- }
33
+ // E-E-A-T: continuous fraction of 4 categories present.
34
+ // Reuses countSignalCategories from eeat-signals (no duplicate logic).
35
+ // Also grants the "sources" credit for authoritative outbound citations.
36
+ const eeatCount = countSignalCategories(page);
37
+ const hasCitation = hasAuthoritativeCitation(page.resolvedHrefs, page.url);
38
+ // Clamp to max 4 after adding citation credit (if sources category already counted it won't double-count)
39
+ const effectiveEeatCount = Math.min(4, eeatCount + (hasCitation && eeatCount < 4 ? 1 : 0));
40
+ const eeat = effectiveEeatCount / 4;
65
41
  // Translation: 1.0 unless translation-no-op lists this page
66
42
  const hasTranslationNoOp = allFindings.some((f) => f.ruleId === "content/translation-no-op" &&
67
43
  (f.pageUrl === page.url || (f.relatedUrls ?? []).includes(page.url)));
@@ -69,13 +45,7 @@ function computeSignals(page, allFindings) {
69
45
  // Cliché reuse (signal 6): 1.0 if common-phrase-reuse doesn't fire, 0.0 if it does
70
46
  const hasClicheReuse = pageFindings.some((f) => f.ruleId === "content/common-phrase-reuse");
71
47
  const clicheReuse = hasClicheReuse ? 0.0 : 1.0;
72
- // Wikipedia paraphrase (signal 7, v0.5.14): 1.0 if wikipedia-paraphrase doesn't
73
- // fire on this page, 0.0 if it does. The rule fires at warning/low when
74
- // page text overlaps ≥40% with the bundled trigram corpus — a real signal
75
- // for "content lifted from Wikipedia," orthogonal to the other 6 originality
76
- // proxies. Adding it shifts each signal's weight from 1/6 (16.7%) to 1/7
77
- // (14.3%) — boundary cases at score=0.30 and score=0.50 may shift by
78
- // ±0.024 per signal, which is below the granularity of severity bands.
48
+ // Wikipedia paraphrase (signal 7): 1.0 if wikipedia-paraphrase doesn't fire, 0.0 if it does
79
49
  const hasWikipediaParaphrase = pageFindings.some((f) => f.ruleId === "content/wikipedia-paraphrase");
80
50
  const wikipediaParaphrase = hasWikipediaParaphrase ? 0.0 : 1.0;
81
51
  return { originality, freshness, facts, eeat, translation, clicheReuse, wikipediaParaphrase };
@@ -92,10 +62,24 @@ function meanScore(signals) {
92
62
  ];
93
63
  return values.reduce((a, b) => a + b, 0) / values.length;
94
64
  }
95
- function severityForScore(score) {
96
- if (score < 0.3)
97
- return "critical";
98
- return "error";
65
+ /**
66
+ * Two-band severity for the composite score:
67
+ * - score in [0.35, 0.5) → "warning" (borderline: page is weak but not egregiously thin)
68
+ * - score < 0.35 → "error" (clearly low value-add)
69
+ *
70
+ * Confidence scales with distance from the fire threshold:
71
+ * - score < 0.2 → "high"
72
+ * - score in [0.2, 0.35) → "medium"
73
+ * - score in [0.35, 0.5) → "low" (borderline warning)
74
+ */
75
+ function severityAndConfidence(score) {
76
+ if (score >= 0.35) {
77
+ return { severity: "warning", confidence: "low" };
78
+ }
79
+ if (score < 0.2) {
80
+ return { severity: "error", confidence: "high" };
81
+ }
82
+ return { severity: "error", confidence: "medium" };
99
83
  }
100
84
  function buildMessage(page, score, signals) {
101
85
  const pct = (v) => `${(v * 100).toFixed(0)}%`;
@@ -119,8 +103,14 @@ function buildMessage(page, score, signals) {
119
103
  * Aggregates 7 per-page signal scores (originality, freshness, facts,
120
104
  * E-E-A-T, translation, cliché-reuse, wikipedia-paraphrase) into a
121
105
  * single 0-1 quality score. Each signal weighted equally at 1/7 ≈ 14.3%.
122
- * Fires ONE critical/error finding per page when score < 0.5
123
- * (critical < 0.3, error otherwise).
106
+ *
107
+ * E-E-A-T sub-score is a continuous fraction (categoriesPresent/4), not
108
+ * a 3-step value. Reuses countSignalCategories from eeat-signals to avoid
109
+ * logic drift between the two rules.
110
+ *
111
+ * Fires ONE finding per page when score < 0.5:
112
+ * - warning (score ∈ [0.35, 0.5)) — borderline, low confidence
113
+ * - error (score < 0.35) — clearly low value-add
124
114
  */
125
115
  export function valueAddRule(pages, findings) {
126
116
  const results = [];
@@ -129,10 +119,11 @@ export function valueAddRule(pages, findings) {
129
119
  const score = meanScore(signals);
130
120
  if (score >= 0.5)
131
121
  continue;
122
+ const { severity, confidence } = severityAndConfidence(score);
132
123
  results.push({
133
124
  ruleId: RULE_ID,
134
- severity: severityForScore(score),
135
- confidence: "medium",
125
+ severity,
126
+ confidence,
136
127
  message: buildMessage(page, score, signals),
137
128
  fix: "Add proprietary content (original analysis, primary-source data, expert commentary, original imagery) to lift the value-add score above 0.5. Score is a composite — improve any underweight signal.",
138
129
  pageUrl: page.url,
@@ -1 +1 @@
1
- {"version":3,"file":"value-add.js","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,wBAAwB,EAAE,MAAM,qCAAqC,CAAC;AAE/E,MAAM,OAAO,GAAG,mBAAmB,CAAC;AAEpC,MAAM,kBAAkB,GAAG;IACzB,iBAAiB;IACjB,kBAAkB;IAClB,gBAAgB;IAChB,aAAa;IACb,gBAAgB;CACjB,CAAC;AAEF,SAAS,mBAAmB,CAAC,IAAgB;IAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAAE,KAAK,IAAI,CAAC,CAAC;IACrE,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa;QAAE,KAAK,IAAI,CAAC,CAAC;IACpF,IAAI,IAAI,CAAC,aAAa;QAAE,KAAK,IAAI,CAAC,CAAC;IACnC,IACE,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjD,wBAAwB,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC;QACtD,KAAK,IAAI,CAAC,CAAC;IACb,OAAO,KAAK,CAAC;AACf,CAAC;AAYD,SAAS,cAAc,CAAC,IAAgB,EAAE,WAAyB;IACjE,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IAEvE,wEAAwE;IACxE,MAAM,eAAe,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IAC9F,MAAM,WAAW,GAAG,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEhD,qDAAqD;IACrD,MAAM,gBAAgB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,uBAAuB,CAAC,CAAC;IACxF,IAAI,SAAiB,CAAC;IACtB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,IAAI,gBAAgB,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnD,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,CAAC;QACN,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,mBAAmB,CAAC,CAAC;IAChF,IAAI,KAAa,CAAC;IAClB,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,MAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnF,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,CAAC;QACN,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;IAED,iCAAiC;IACjC,MAAM,SAAS,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IAC5C,IAAI,IAAY,CAAC;IACjB,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnB,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QAC1B,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,GAAG,CAAC;IACb,CAAC;IAED,4DAA4D;IAC5D,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,2BAA2B;QAC7C,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CACvE,CAAC;IACF,MAAM,WAAW,GAAG,kBAAkB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEnD,mFAAmF;IACnF,MAAM,cAAc,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,6BAA6B,CAAC,CAAC;IAC5F,MAAM,WAAW,GAAG,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/C,gFAAgF;IAChF,wEAAwE;IACxE,0EAA0E;IAC1E,6EAA6E;IAC7E,yEAAyE;IACzE,qEAAqE;IACrE,uEAAuE;IACvE,MAAM,sBAAsB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IACrG,MAAM,mBAAmB,GAAG,sBAAsB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/D,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,mBAAmB,EAAE,CAAC;AAChG,CAAC;AAED,SAAS,SAAS,CAAC,OAAgB;IACjC,MAAM,MAAM,GAAG;QACb,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,SAAS;QACjB,OAAO,CAAC,KAAK;QACb,OAAO,CAAC,IAAI;QACZ,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,mBAAmB;KAC5B,CAAC;IACF,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;AAC3D,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa;IACrC,IAAI,KAAK,GAAG,GAAG;QAAE,OAAO,UAAU,CAAC;IACnC,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,YAAY,CAAC,IAAgB,EAAE,KAAa,EAAE,OAAgB;IACrE,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACtD,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAA8B,CAAC;IACrE,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC;QACjC,IAAI,GAAG,GAAG,GAAG;YAAE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxC,CAAC;IACD,MAAM,UAAU,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC;IAC1F,OAAO,CACL,GAAG,IAAI,CAAC,GAAG,qBAAqB,GAAG,CAAC,KAAK,CAAC,kBAAkB;QAC5D,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,gBAAgB,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI;QACnF,UAAU,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,kBAAkB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI;QACzG,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,2BAA2B,GAAG,CAAC,OAAO,CAAC,mBAAmB,CAAC,KAAK;QACzG,kBAAkB,UAAU,iEAAiE,CAC9F,CAAC;AACJ,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,YAAY,CAAC,KAAmB,EAAE,QAAsB;IACtE,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACjC,IAAI,KAAK,IAAI,GAAG;YAAE,SAAS;QAE3B,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,gBAAgB,CAAC,KAAK,CAAC;YACjC,UAAU,EAAE,QAAQ;YACpB,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC;YAC3C,GAAG,EAAE,qMAAqM;YAC1M,OAAO,EAAE,IAAI,CAAC,GAAG;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
1
+ {"version":3,"file":"value-add.js","sourceRoot":"","sources":["../../../src/rules/content/value-add.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,wBAAwB,EAAE,MAAM,qCAAqC,CAAC;AAC/E,OAAO,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAE1D,MAAM,OAAO,GAAG,mBAAmB,CAAC;AAYpC,SAAS,cAAc,CAAC,IAAgB,EAAE,WAAyB;IACjE,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IAEvE,wEAAwE;IACxE,MAAM,eAAe,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IAC9F,MAAM,WAAW,GAAG,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEhD,qDAAqD;IACrD,MAAM,gBAAgB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,uBAAuB,CAAC,CAAC;IACxF,IAAI,SAAiB,CAAC;IACtB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,IAAI,gBAAgB,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnD,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;SAAM,CAAC;QACN,SAAS,GAAG,GAAG,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,mBAAmB,CAAC,CAAC;IAChF,IAAI,KAAa,CAAC;IAClB,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,MAAM,IAAI,YAAY,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnF,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;SAAM,CAAC;QACN,KAAK,GAAG,GAAG,CAAC;IACd,CAAC;IAED,wDAAwD;IACxD,uEAAuE;IACvE,yEAAyE;IACzE,MAAM,SAAS,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC;IAC9C,MAAM,WAAW,GAAG,wBAAwB,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;IAC3E,0GAA0G;IAC1G,MAAM,kBAAkB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,WAAW,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3F,MAAM,IAAI,GAAG,kBAAkB,GAAG,CAAC,CAAC;IAEpC,4DAA4D;IAC5D,MAAM,kBAAkB,GAAG,WAAW,CAAC,IAAI,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,2BAA2B;QAC7C,CAAC,CAAC,CAAC,OAAO,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CACvE,CAAC;IACF,MAAM,WAAW,GAAG,kBAAkB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEnD,mFAAmF;IACnF,MAAM,cAAc,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,6BAA6B,CAAC,CAAC;IAC5F,MAAM,WAAW,GAAG,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/C,4FAA4F;IAC5F,MAAM,sBAAsB,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,8BAA8B,CAAC,CAAC;IACrG,MAAM,mBAAmB,GAAG,sBAAsB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAE/D,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE,mBAAmB,EAAE,CAAC;AAChG,CAAC;AAED,SAAS,SAAS,CAAC,OAAgB;IACjC,MAAM,MAAM,GAAG;QACb,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,SAAS;QACjB,OAAO,CAAC,KAAK;QACb,OAAO,CAAC,IAAI;QACZ,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,WAAW;QACnB,OAAO,CAAC,mBAAmB;KAC5B,CAAC;IACF,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;AAC3D,CAAC;AAED;;;;;;;;;GASG;AACH,SAAS,qBAAqB,CAAC,KAAa;IAC1C,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;QAClB,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;IACpD,CAAC;IACD,IAAI,KAAK,GAAG,GAAG,EAAE,CAAC;QAChB,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC;IACnD,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;AACrD,CAAC;AAED,SAAS,YAAY,CAAC,IAAgB,EAAE,KAAa,EAAE,OAAgB;IACrE,MAAM,GAAG,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;IACtD,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAA8B,CAAC;IACrE,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,OAAO,EAAE,CAAC;QACjC,IAAI,GAAG,GAAG,GAAG;YAAE,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACxC,CAAC;IACD,MAAM,UAAU,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC;IAC1F,OAAO,CACL,GAAG,IAAI,CAAC,GAAG,qBAAqB,GAAG,CAAC,KAAK,CAAC,kBAAkB;QAC5D,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,gBAAgB,GAAG,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI;QACnF,UAAU,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,cAAc,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,kBAAkB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI;QACzG,iBAAiB,GAAG,CAAC,OAAO,CAAC,WAAW,CAAC,2BAA2B,GAAG,CAAC,OAAO,CAAC,mBAAmB,CAAC,KAAK;QACzG,kBAAkB,UAAU,iEAAiE,CAC9F,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,UAAU,YAAY,CAAC,KAAmB,EAAE,QAAsB;IACtE,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACjC,IAAI,KAAK,IAAI,GAAG;YAAE,SAAS;QAE3B,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;QAC9D,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,OAAO;YACf,QAAQ;YACR,UAAU;YACV,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC;YAC3C,GAAG,EAAE,qMAAqM;YAC1M,OAAO,EAAE,IAAI,CAAC,GAAG;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -1,15 +1,20 @@
1
1
  import type { ParsedPage, RuleResult } from "../../types.js";
2
2
  /**
3
- * content/wikipedia-paraphrase — standalone originality signal (v0.5.14).
3
+ * content/wikipedia-paraphrase — advisory originality signal (v0.5.14+).
4
4
  *
5
- * Detects pages whose contentText has high trigram overlap with the curated
6
- * Wikipedia reference corpus. High overlap indicates paraphrased or verbatim
7
- * encyclopedic content that adds no proprietary value.
5
+ * Detects pages whose contentText has unusually high trigram overlap with the
6
+ * bundled Wikipedia reference corpus. This is a weak, advisory signal only:
7
+ * trigram overlap cannot distinguish actual paraphrase from legitimate topical
8
+ * proximity (e.g. a legal-template page naturally shares many encyclopedic
9
+ * trigrams with Wikipedia articles on the same topic).
8
10
  *
9
- * Composite integration into content/value-add is deferred to v0.5.15 to
10
- * avoid test-math recalibration in this release.
11
+ * Two guards reduce false positives:
12
+ * 1. Minimum-length guard: pages below MIN_TRIGRAM_COUNT trigrams (~200
13
+ * words) are skipped entirely — bloom noise alone dominates on short pages.
14
+ * 2. Raised threshold: THRESHOLD = 0.55, well above the bloom noise floor
15
+ * (~5%) and typical topical-proximity baseline.
11
16
  *
12
- * Fires: one warning/low-confidence finding per qualifying page (rate >= 0.4).
17
+ * Fires: one warning/low-confidence finding per qualifying page (rate >= 0.55).
13
18
  */
14
19
  export declare function wikipediaParaphraseRule(pages: ParsedPage[]): RuleResult[];
15
20
  //# sourceMappingURL=wikipedia-paraphrase.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"wikipedia-paraphrase.d.ts","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAM7D;;;;;;;;;;;GAWG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsBzE"}
1
+ {"version":3,"file":"wikipedia-paraphrase.d.ts","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAkC7D;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAgCzE"}
@@ -1,23 +1,61 @@
1
1
  import { wikipediaParaphraseRate } from "../../algorithms/wikipedia-paraphrase.js";
2
2
  const RULE_ID = "content/wikipedia-paraphrase";
3
- const THRESHOLD = 0.4;
4
3
  /**
5
- * content/wikipedia-paraphrase standalone originality signal (v0.5.14).
4
+ * ponytail: MIN_TRIGRAM_COUNT = 200
6
5
  *
7
- * Detects pages whose contentText has high trigram overlap with the curated
8
- * Wikipedia reference corpus. High overlap indicates paraphrased or verbatim
9
- * encyclopedic content that adds no proprietary value.
6
+ * The bloom filter has a ~5% per-query false-positive rate. On a page with
7
+ * N trigrams the expected bloom-noise hit count is 0.05 * N. For a short page
8
+ * (~48 trigrams) that alone produces ~2.4 expected FP hits; with a threshold
9
+ * of 40% (19/48) the noise alone can exceed the threshold on short pages.
10
10
  *
11
- * Composite integration into content/value-add is deferred to v0.5.15 to
12
- * avoid test-math recalibration in this release.
11
+ * Setting a floor of 200 trigrams (~202 words) means bloom noise contributes
12
+ * at most 10 / 200 = 5% of trigrams, far below the raised THRESHOLD, so noise
13
+ * cannot trigger the rule on its own.
14
+ */
15
+ const MIN_TRIGRAM_COUNT = 200;
16
+ /**
17
+ * ponytail: THRESHOLD = 0.55
18
+ *
19
+ * Raised from 0.40 to 0.55 to account for the bloom filter's ~5% per-query
20
+ * FP rate and the "topic overlap" effect: legal/medical/geography pSEO pages
21
+ * share many encyclopedic trigrams ("the united states", "in the state of")
22
+ * purely through topical proximity, not paraphrase. A 55% overlap is
23
+ * substantially above both the noise floor (~5%) and the expected topic-
24
+ * overlap baseline, making the signal meaningfully indicative of genuine
25
+ * encyclopedic reuse. At this level the rule remains advisory (confidence:
26
+ * "low") because trigram overlap cannot distinguish paraphrase from topic
27
+ * proximity — it is a weak signal, not a verdict.
28
+ */
29
+ const THRESHOLD = 0.55;
30
+ /**
31
+ * content/wikipedia-paraphrase — advisory originality signal (v0.5.14+).
32
+ *
33
+ * Detects pages whose contentText has unusually high trigram overlap with the
34
+ * bundled Wikipedia reference corpus. This is a weak, advisory signal only:
35
+ * trigram overlap cannot distinguish actual paraphrase from legitimate topical
36
+ * proximity (e.g. a legal-template page naturally shares many encyclopedic
37
+ * trigrams with Wikipedia articles on the same topic).
13
38
  *
14
- * Fires: one warning/low-confidence finding per qualifying page (rate >= 0.4).
39
+ * Two guards reduce false positives:
40
+ * 1. Minimum-length guard: pages below MIN_TRIGRAM_COUNT trigrams (~200
41
+ * words) are skipped entirely — bloom noise alone dominates on short pages.
42
+ * 2. Raised threshold: THRESHOLD = 0.55, well above the bloom noise floor
43
+ * (~5%) and typical topical-proximity baseline.
44
+ *
45
+ * Fires: one warning/low-confidence finding per qualifying page (rate >= 0.55).
15
46
  */
16
47
  export function wikipediaParaphraseRule(pages) {
17
48
  const findings = [];
18
49
  for (const page of pages) {
19
50
  if (!page.contentText || page.contentText.trim().length === 0)
20
51
  continue;
52
+ // Estimate trigram count without re-implementing extractTrigrams: count
53
+ // whitespace-separated tokens then subtract 2 (trigrams = tokens - 2).
54
+ // This is a cheap proxy; the algorithm file does the accurate extraction.
55
+ const tokenCount = page.contentText.trim().split(/\s+/).length;
56
+ const estimatedTrigrams = Math.max(0, tokenCount - 2);
57
+ if (estimatedTrigrams < MIN_TRIGRAM_COUNT)
58
+ continue;
21
59
  const rate = wikipediaParaphraseRate(page.contentText);
22
60
  if (rate < THRESHOLD)
23
61
  continue;
@@ -27,11 +65,12 @@ export function wikipediaParaphraseRule(pages) {
27
65
  severity: "warning",
28
66
  confidence: "low",
29
67
  pageUrl: page.url,
30
- message: `${page.url} contains content with high trigram overlap (${pct}%) against the Wikipedia ` +
31
- `reference corpus. May indicate paraphrased or copy-pasted Wikipedia content.`,
32
- fix: "Replace borrowed encyclopedic phrasing with original analysis specific to this page's " +
33
- "subject. Even if attributed, high paraphrase rates correlate with low value-add by " +
34
- "SpamBrain's helpful-content metric.",
68
+ message: `${page.url} has high trigram overlap (${pct}%) with the bundled Wikipedia ` +
69
+ `reference corpus. This is an advisory signal trigram overlap can reflect ` +
70
+ `topical proximity as well as copied content and cannot distinguish the two.`,
71
+ fix: "Review for borrowed encyclopedic phrasing and replace with original analysis " +
72
+ "specific to this page's subject. Even if attributed, high paraphrase rates " +
73
+ "correlate with low value-add by SpamBrain's helpful-content metric.",
35
74
  });
36
75
  }
37
76
  return findings;
@@ -1 +1 @@
1
- {"version":3,"file":"wikipedia-paraphrase.js","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,0CAA0C,CAAC;AAEnF,MAAM,OAAO,GAAG,8BAA8B,CAAC;AAC/C,MAAM,SAAS,GAAG,GAAG,CAAC;AAEtB;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,uBAAuB,CAAC,KAAmB;IACzD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QACxE,MAAM,IAAI,GAAG,uBAAuB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACvD,IAAI,IAAI,GAAG,SAAS;YAAE,SAAS;QAC/B,MAAM,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACpC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,KAAK;YACjB,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,gDAAgD,GAAG,2BAA2B;gBACzF,8EAA8E;YAChF,GAAG,EACD,wFAAwF;gBACxF,qFAAqF;gBACrF,qCAAqC;SACxC,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"wikipedia-paraphrase.js","sourceRoot":"","sources":["../../../src/rules/content/wikipedia-paraphrase.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,0CAA0C,CAAC;AAEnF,MAAM,OAAO,GAAG,8BAA8B,CAAC;AAE/C;;;;;;;;;;;GAWG;AACH,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAE9B;;;;;;;;;;;;GAYG;AACH,MAAM,SAAS,GAAG,IAAI,CAAC;AAEvB;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,UAAU,uBAAuB,CAAC,KAAmB;IACzD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAExE,wEAAwE;QACxE,uEAAuE;QACvE,0EAA0E;QAC1E,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QAC/D,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,GAAG,CAAC,CAAC,CAAC;QACtD,IAAI,iBAAiB,GAAG,iBAAiB;YAAE,SAAS;QAEpD,MAAM,IAAI,GAAG,uBAAuB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACvD,IAAI,IAAI,GAAG,SAAS;YAAE,SAAS;QAE/B,MAAM,GAAG,GAAG,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QACpC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,OAAO;YACf,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,KAAK;YACjB,OAAO,EAAE,IAAI,CAAC,GAAG;YACjB,OAAO,EACL,GAAG,IAAI,CAAC,GAAG,8BAA8B,GAAG,gCAAgC;gBAC5E,6EAA6E;gBAC7E,6EAA6E;YAC/E,GAAG,EACD,+EAA+E;gBAC/E,6EAA6E;gBAC7E,qEAAqE;SACxE,CAAC,CAAC;IACL,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -3,5 +3,11 @@ import type { ParsedPage, RuleResult } from "../../types.js";
3
3
  * Flags clusters (same parent directory) with 2+ pages that are siloed: no outbound
4
4
  * internal crawl link to another cluster and no inbound from another cluster.
5
5
  */
6
- export declare function clusterConnectivityRule(pages: ParsedPage[], knownUrls: Set<string>): RuleResult[];
6
+ export declare function clusterConnectivityRule(pages: ParsedPage[], knownUrls: Set<string>,
7
+ /**
8
+ * 2026-06-16 calibration FP fix: cross-cluster links routinely target pages
9
+ * that were not fetched on a sampled crawl, so a "siloed cluster" verdict is
10
+ * unreliable. Only run on a full crawl.
11
+ */
12
+ sampled?: boolean): RuleResult[];
7
13
  //# sourceMappingURL=cluster-connectivity.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"cluster-connectivity.d.ts","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyB7D;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,GACrB,UAAU,EAAE,CA0Dd"}
1
+ {"version":3,"file":"cluster-connectivity.d.ts","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyB7D;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC;AACtB;;;;GAIG;AACH,OAAO,UAAQ,GACd,UAAU,EAAE,CA0Dd"}
@@ -19,8 +19,14 @@ function hasCrossClusterInbound(clusterDir, urlsInCluster, pages, knownUrls) {
19
19
  * Flags clusters (same parent directory) with 2+ pages that are siloed: no outbound
20
20
  * internal crawl link to another cluster and no inbound from another cluster.
21
21
  */
22
- export function clusterConnectivityRule(pages, knownUrls) {
23
- if (pages.length < 2) {
22
+ export function clusterConnectivityRule(pages, knownUrls,
23
+ /**
24
+ * 2026-06-16 calibration FP fix: cross-cluster links routinely target pages
25
+ * that were not fetched on a sampled crawl, so a "siloed cluster" verdict is
26
+ * unreliable. Only run on a full crawl.
27
+ */
28
+ sampled = false) {
29
+ if (sampled || pages.length < 2) {
24
30
  return [];
25
31
  }
26
32
  const clusterPages = new Map();
@@ -1 +1 @@
1
- {"version":3,"file":"cluster-connectivity.js","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,SAAS,sBAAsB,CAC7B,UAAkB,EAClB,aAA0B,EAC1B,KAAmB,EACnB,SAAsB;IAEtB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,SAAS;YACX,CAAC;YACD,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC5B,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAmB,EACnB,SAAsB;IAEtB,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,EAAuB,CAAC;IACpD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAU,CAAC;QACvD,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACf,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,IAAI,YAAY,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;QACxD,IAAI,IAAI,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YAClB,SAAS;QACX,CAAC;QAED,IAAI,uBAAuB,GAAG,KAAK,CAAC;QACpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxB,SAAS;YACX,CAAC;YACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,SAAS;gBACX,CAAC;gBACD,MAAM,aAAa,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;gBAC7C,IAAI,aAAa,KAAK,UAAU,EAAE,CAAC;oBACjC,uBAAuB,GAAG,IAAI,CAAC;oBAC/B,MAAM;gBACR,CAAC;YACH,CAAC;YACD,IAAI,uBAAuB,EAAE,CAAC;gBAC5B,MAAM;YACR,CAAC;QACH,CAAC;QAED,MAAM,UAAU,GAAG,sBAAsB,CAAC,UAAU,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;QAE9E,IAAI,CAAC,uBAAuB,IAAI,CAAC,UAAU,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,WAAW,UAAU,KAAK,IAAI,CAAC,IAAI,uDAAuD;gBACnG,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;gBACpC,GAAG,EAAE,kGAAkG;aACxG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
1
+ {"version":3,"file":"cluster-connectivity.js","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,SAAS,sBAAsB,CAC7B,UAAkB,EAClB,aAA0B,EAC1B,KAAmB,EACnB,SAAsB;IAEtB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,SAAS;YACX,CAAC;YACD,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC5B,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAmB,EACnB,SAAsB;AACtB;;;;GAIG;AACH,OAAO,GAAG,KAAK;IAEf,IAAI,OAAO,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChC,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,EAAuB,CAAC;IACpD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAU,CAAC;QACvD,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACf,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,IAAI,YAAY,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;QACxD,IAAI,IAAI,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YAClB,SAAS;QACX,CAAC;QAED,IAAI,uBAAuB,GAAG,KAAK,CAAC;QACpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxB,SAAS;YACX,CAAC;YACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,SAAS;gBACX,CAAC;gBACD,MAAM,aAAa,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;gBAC7C,IAAI,aAAa,KAAK,UAAU,EAAE,CAAC;oBACjC,uBAAuB,GAAG,IAAI,CAAC;oBAC/B,MAAM;gBACR,CAAC;YACH,CAAC;YACD,IAAI,uBAAuB,EAAE,CAAC;gBAC5B,MAAM;YACR,CAAC;QACH,CAAC;QAED,MAAM,UAAU,GAAG,sBAAsB,CAAC,UAAU,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;QAE9E,IAAI,CAAC,uBAAuB,IAAI,CAAC,UAAU,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,WAAW,UAAU,KAAK,IAAI,CAAC,IAAI,uDAAuD;gBACnG,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;gBACpC,GAAG,EAAE,kGAAkG;aACxG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -1,3 +1,10 @@
1
1
  import type { ParsedPage, RuleResult } from "../../types.js";
2
- export declare function orphanPagesRule(pages: ParsedPage[], inboundLinks: Map<string, number>, rootUrl?: string): RuleResult[];
2
+ export declare function orphanPagesRule(pages: ParsedPage[], inboundLinks: Map<string, number>, rootUrl?: string,
3
+ /**
4
+ * 2026-06-16 calibration FP fix: on a sampled crawl the page that links to a
5
+ * given URL is often simply not in the fetched subset, so "0 inbound in this
6
+ * crawl" is not evidence of a real orphan. Orphan detection is only reliable
7
+ * on a full crawl — skip it when sampled rather than flag healthy pages.
8
+ */
9
+ sampled?: boolean): RuleResult[];
3
10
  //# sourceMappingURL=orphan-pages.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"orphan-pages.d.ts","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,OAAO,CAAC,EAAE,MAAM,GACf,UAAU,EAAE,CAmBd"}
1
+ {"version":3,"file":"orphan-pages.d.ts","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,OAAO,CAAC,EAAE,MAAM;AAChB;;;;;GAKG;AACH,OAAO,UAAQ,GACd,UAAU,EAAE,CAqBd"}
@@ -1,4 +1,13 @@
1
- export function orphanPagesRule(pages, inboundLinks, rootUrl) {
1
+ export function orphanPagesRule(pages, inboundLinks, rootUrl,
2
+ /**
3
+ * 2026-06-16 calibration FP fix: on a sampled crawl the page that links to a
4
+ * given URL is often simply not in the fetched subset, so "0 inbound in this
5
+ * crawl" is not evidence of a real orphan. Orphan detection is only reliable
6
+ * on a full crawl — skip it when sampled rather than flag healthy pages.
7
+ */
8
+ sampled = false) {
9
+ if (sampled)
10
+ return [];
2
11
  const findings = [];
3
12
  for (const page of pages) {
4
13
  if (rootUrl && page.url === rootUrl) {