@pseolint/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +53 -0
  3. package/dist/algorithms/entity-mask.d.ts +3 -0
  4. package/dist/algorithms/entity-mask.d.ts.map +1 -0
  5. package/dist/algorithms/entity-mask.js +8 -0
  6. package/dist/algorithms/entity-mask.js.map +1 -0
  7. package/dist/algorithms/entity-mask.test.d.ts +2 -0
  8. package/dist/algorithms/entity-mask.test.d.ts.map +1 -0
  9. package/dist/algorithms/entity-mask.test.js +23 -0
  10. package/dist/algorithms/entity-mask.test.js.map +1 -0
  11. package/dist/algorithms/simhash.d.ts +4 -0
  12. package/dist/algorithms/simhash.d.ts.map +1 -0
  13. package/dist/algorithms/simhash.js +64 -0
  14. package/dist/algorithms/simhash.js.map +1 -0
  15. package/dist/algorithms/simhash.test.d.ts +2 -0
  16. package/dist/algorithms/simhash.test.d.ts.map +1 -0
  17. package/dist/algorithms/simhash.test.js +23 -0
  18. package/dist/algorithms/simhash.test.js.map +1 -0
  19. package/dist/algorithms/tf-idf.d.ts +8 -0
  20. package/dist/algorithms/tf-idf.d.ts.map +1 -0
  21. package/dist/algorithms/tf-idf.js +55 -0
  22. package/dist/algorithms/tf-idf.js.map +1 -0
  23. package/dist/auditor.d.ts +3 -0
  24. package/dist/auditor.d.ts.map +1 -0
  25. package/dist/auditor.js +730 -0
  26. package/dist/auditor.js.map +1 -0
  27. package/dist/auditor.test.d.ts +2 -0
  28. package/dist/auditor.test.d.ts.map +1 -0
  29. package/dist/auditor.test.js +134 -0
  30. package/dist/auditor.test.js.map +1 -0
  31. package/dist/enrich-findings.d.ts +9 -0
  32. package/dist/enrich-findings.d.ts.map +1 -0
  33. package/dist/enrich-findings.js +436 -0
  34. package/dist/enrich-findings.js.map +1 -0
  35. package/dist/formatters/console.d.ts +6 -0
  36. package/dist/formatters/console.d.ts.map +1 -0
  37. package/dist/formatters/console.js +237 -0
  38. package/dist/formatters/console.js.map +1 -0
  39. package/dist/formatters/html.d.ts +3 -0
  40. package/dist/formatters/html.d.ts.map +1 -0
  41. package/dist/formatters/html.js +170 -0
  42. package/dist/formatters/html.js.map +1 -0
  43. package/dist/formatters/index.d.ts +6 -0
  44. package/dist/formatters/index.d.ts.map +1 -0
  45. package/dist/formatters/index.js +5 -0
  46. package/dist/formatters/index.js.map +1 -0
  47. package/dist/formatters/json.d.ts +3 -0
  48. package/dist/formatters/json.d.ts.map +1 -0
  49. package/dist/formatters/json.js +4 -0
  50. package/dist/formatters/json.js.map +1 -0
  51. package/dist/formatters/markdown.d.ts +3 -0
  52. package/dist/formatters/markdown.d.ts.map +1 -0
  53. package/dist/formatters/markdown.js +93 -0
  54. package/dist/formatters/markdown.js.map +1 -0
  55. package/dist/index.d.ts +45 -0
  56. package/dist/index.d.ts.map +1 -0
  57. package/dist/index.js +45 -0
  58. package/dist/index.js.map +1 -0
  59. package/dist/page-classifier.d.ts +4 -0
  60. package/dist/page-classifier.d.ts.map +1 -0
  61. package/dist/page-classifier.js +133 -0
  62. package/dist/page-classifier.js.map +1 -0
  63. package/dist/parser.d.ts +3 -0
  64. package/dist/parser.d.ts.map +1 -0
  65. package/dist/parser.js +131 -0
  66. package/dist/parser.js.map +1 -0
  67. package/dist/parser.test.d.ts +2 -0
  68. package/dist/parser.test.d.ts.map +1 -0
  69. package/dist/parser.test.js +37 -0
  70. package/dist/parser.test.js.map +1 -0
  71. package/dist/renderer.d.ts +15 -0
  72. package/dist/renderer.d.ts.map +1 -0
  73. package/dist/renderer.js +124 -0
  74. package/dist/renderer.js.map +1 -0
  75. package/dist/rule-references.d.ts +2 -0
  76. package/dist/rule-references.d.ts.map +1 -0
  77. package/dist/rule-references.js +35 -0
  78. package/dist/rule-references.js.map +1 -0
  79. package/dist/rules/cannibal/keyword-collision.d.ts +3 -0
  80. package/dist/rules/cannibal/keyword-collision.d.ts.map +1 -0
  81. package/dist/rules/cannibal/keyword-collision.js +25 -0
  82. package/dist/rules/cannibal/keyword-collision.js.map +1 -0
  83. package/dist/rules/cannibal/title-overlap.d.ts +3 -0
  84. package/dist/rules/cannibal/title-overlap.d.ts.map +1 -0
  85. package/dist/rules/cannibal/title-overlap.js +43 -0
  86. package/dist/rules/cannibal/title-overlap.js.map +1 -0
  87. package/dist/rules/cannibal/url-pattern.d.ts +3 -0
  88. package/dist/rules/cannibal/url-pattern.d.ts.map +1 -0
  89. package/dist/rules/cannibal/url-pattern.js +48 -0
  90. package/dist/rules/cannibal/url-pattern.js.map +1 -0
  91. package/dist/rules/content/eeat-signals.d.ts +3 -0
  92. package/dist/rules/content/eeat-signals.d.ts.map +1 -0
  93. package/dist/rules/content/eeat-signals.js +46 -0
  94. package/dist/rules/content/eeat-signals.js.map +1 -0
  95. package/dist/rules/content/heading-uniqueness.d.ts +3 -0
  96. package/dist/rules/content/heading-uniqueness.d.ts.map +1 -0
  97. package/dist/rules/content/heading-uniqueness.js +56 -0
  98. package/dist/rules/content/heading-uniqueness.js.map +1 -0
  99. package/dist/rules/content/meta-uniqueness.d.ts +3 -0
  100. package/dist/rules/content/meta-uniqueness.d.ts.map +1 -0
  101. package/dist/rules/content/meta-uniqueness.js +28 -0
  102. package/dist/rules/content/meta-uniqueness.js.map +1 -0
  103. package/dist/rules/content/missing-author.d.ts +3 -0
  104. package/dist/rules/content/missing-author.d.ts.map +1 -0
  105. package/dist/rules/content/missing-author.js +26 -0
  106. package/dist/rules/content/missing-author.js.map +1 -0
  107. package/dist/rules/content/unique-value.d.ts +3 -0
  108. package/dist/rules/content/unique-value.d.ts.map +1 -0
  109. package/dist/rules/content/unique-value.js +26 -0
  110. package/dist/rules/content/unique-value.js.map +1 -0
  111. package/dist/rules/links/cluster-connectivity.d.ts +7 -0
  112. package/dist/rules/links/cluster-connectivity.d.ts.map +1 -0
  113. package/dist/rules/links/cluster-connectivity.js +73 -0
  114. package/dist/rules/links/cluster-connectivity.js.map +1 -0
  115. package/dist/rules/links/cluster-key.d.ts +3 -0
  116. package/dist/rules/links/cluster-key.d.ts.map +1 -0
  117. package/dist/rules/links/cluster-key.js +22 -0
  118. package/dist/rules/links/cluster-key.js.map +1 -0
  119. package/dist/rules/links/dead-ends.d.ts +3 -0
  120. package/dist/rules/links/dead-ends.d.ts.map +1 -0
  121. package/dist/rules/links/dead-ends.js +13 -0
  122. package/dist/rules/links/dead-ends.js.map +1 -0
  123. package/dist/rules/links/hub-pages.d.ts +7 -0
  124. package/dist/rules/links/hub-pages.d.ts.map +1 -0
  125. package/dist/rules/links/hub-pages.js +73 -0
  126. package/dist/rules/links/hub-pages.js.map +1 -0
  127. package/dist/rules/links/link-depth.d.ts +3 -0
  128. package/dist/rules/links/link-depth.d.ts.map +1 -0
  129. package/dist/rules/links/link-depth.js +46 -0
  130. package/dist/rules/links/link-depth.js.map +1 -0
  131. package/dist/rules/links/orphan-pages.d.ts +3 -0
  132. package/dist/rules/links/orphan-pages.d.ts.map +1 -0
  133. package/dist/rules/links/orphan-pages.js +19 -0
  134. package/dist/rules/links/orphan-pages.js.map +1 -0
  135. package/dist/rules/schema/consistency.d.ts +3 -0
  136. package/dist/rules/schema/consistency.d.ts.map +1 -0
  137. package/dist/rules/schema/consistency.js +44 -0
  138. package/dist/rules/schema/consistency.js.map +1 -0
  139. package/dist/rules/schema/json-ld-valid.d.ts +3 -0
  140. package/dist/rules/schema/json-ld-valid.d.ts.map +1 -0
  141. package/dist/rules/schema/json-ld-valid.js +47 -0
  142. package/dist/rules/schema/json-ld-valid.js.map +1 -0
  143. package/dist/rules/schema/required-fields.d.ts +3 -0
  144. package/dist/rules/schema/required-fields.d.ts.map +1 -0
  145. package/dist/rules/schema/required-fields.js +60 -0
  146. package/dist/rules/schema/required-fields.js.map +1 -0
  147. package/dist/rules/spam/boilerplate-ratio.d.ts +3 -0
  148. package/dist/rules/spam/boilerplate-ratio.d.ts.map +1 -0
  149. package/dist/rules/spam/boilerplate-ratio.js +50 -0
  150. package/dist/rules/spam/boilerplate-ratio.js.map +1 -0
  151. package/dist/rules/spam/doorway-pattern.d.ts +4 -0
  152. package/dist/rules/spam/doorway-pattern.d.ts.map +1 -0
  153. package/dist/rules/spam/doorway-pattern.js +47 -0
  154. package/dist/rules/spam/doorway-pattern.js.map +1 -0
  155. package/dist/rules/spam/entity-swap.d.ts +7 -0
  156. package/dist/rules/spam/entity-swap.d.ts.map +1 -0
  157. package/dist/rules/spam/entity-swap.js +26 -0
  158. package/dist/rules/spam/entity-swap.js.map +1 -0
  159. package/dist/rules/spam/near-duplicate.d.ts +11 -0
  160. package/dist/rules/spam/near-duplicate.d.ts.map +1 -0
  161. package/dist/rules/spam/near-duplicate.js +25 -0
  162. package/dist/rules/spam/near-duplicate.js.map +1 -0
  163. package/dist/rules/spam/publication-velocity.d.ts +3 -0
  164. package/dist/rules/spam/publication-velocity.d.ts.map +1 -0
  165. package/dist/rules/spam/publication-velocity.js +25 -0
  166. package/dist/rules/spam/publication-velocity.js.map +1 -0
  167. package/dist/rules/spam/template-coverage.d.ts +3 -0
  168. package/dist/rules/spam/template-coverage.d.ts.map +1 -0
  169. package/dist/rules/spam/template-coverage.js +87 -0
  170. package/dist/rules/spam/template-coverage.js.map +1 -0
  171. package/dist/rules/spam/template-diversity.d.ts +3 -0
  172. package/dist/rules/spam/template-diversity.d.ts.map +1 -0
  173. package/dist/rules/spam/template-diversity.js +19 -0
  174. package/dist/rules/spam/template-diversity.js.map +1 -0
  175. package/dist/rules/spam/thin-content.d.ts +6 -0
  176. package/dist/rules/spam/thin-content.d.ts.map +1 -0
  177. package/dist/rules/spam/thin-content.js +22 -0
  178. package/dist/rules/spam/thin-content.js.map +1 -0
  179. package/dist/rules/tech/canonical-consistency.d.ts +4 -0
  180. package/dist/rules/tech/canonical-consistency.d.ts.map +1 -0
  181. package/dist/rules/tech/canonical-consistency.js +78 -0
  182. package/dist/rules/tech/canonical-consistency.js.map +1 -0
  183. package/dist/rules/tech/canonical-noindex-conflict.d.ts +3 -0
  184. package/dist/rules/tech/canonical-noindex-conflict.d.ts.map +1 -0
  185. package/dist/rules/tech/canonical-noindex-conflict.js +27 -0
  186. package/dist/rules/tech/canonical-noindex-conflict.js.map +1 -0
  187. package/dist/rules/tech/hreflang-consistency.d.ts +3 -0
  188. package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -0
  189. package/dist/rules/tech/hreflang-consistency.js +99 -0
  190. package/dist/rules/tech/hreflang-consistency.js.map +1 -0
  191. package/dist/rules/tech/og-completeness.d.ts +3 -0
  192. package/dist/rules/tech/og-completeness.d.ts.map +1 -0
  193. package/dist/rules/tech/og-completeness.js +35 -0
  194. package/dist/rules/tech/og-completeness.js.map +1 -0
  195. package/dist/rules/tech/redirect-chain.d.ts +3 -0
  196. package/dist/rules/tech/redirect-chain.d.ts.map +1 -0
  197. package/dist/rules/tech/redirect-chain.js +20 -0
  198. package/dist/rules/tech/redirect-chain.js.map +1 -0
  199. package/dist/rules/tech/robots-noindex-conflict.d.ts +3 -0
  200. package/dist/rules/tech/robots-noindex-conflict.d.ts.map +1 -0
  201. package/dist/rules/tech/robots-noindex-conflict.js +30 -0
  202. package/dist/rules/tech/robots-noindex-conflict.js.map +1 -0
  203. package/dist/rules/tech/robots-sitemap-presence.d.ts +3 -0
  204. package/dist/rules/tech/robots-sitemap-presence.d.ts.map +1 -0
  205. package/dist/rules/tech/robots-sitemap-presence.js +61 -0
  206. package/dist/rules/tech/robots-sitemap-presence.js.map +1 -0
  207. package/dist/rules/tech/sitemap-completeness.d.ts +3 -0
  208. package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -0
  209. package/dist/rules/tech/sitemap-completeness.js +40 -0
  210. package/dist/rules/tech/sitemap-completeness.js.map +1 -0
  211. package/dist/rules/tech/soft-404.d.ts +3 -0
  212. package/dist/rules/tech/soft-404.d.ts.map +1 -0
  213. package/dist/rules/tech/soft-404.js +24 -0
  214. package/dist/rules/tech/soft-404.js.map +1 -0
  215. package/dist/types.d.ts +170 -0
  216. package/dist/types.d.ts.map +1 -0
  217. package/dist/types.js +2 -0
  218. package/dist/types.js.map +1 -0
  219. package/dist/url-normalize.d.ts +10 -0
  220. package/dist/url-normalize.d.ts.map +1 -0
  221. package/dist/url-normalize.js +52 -0
  222. package/dist/url-normalize.js.map +1 -0
  223. package/package.json +46 -0
@@ -0,0 +1,73 @@
1
+ import { clusterKeyForUrl } from "./cluster-key.js";
2
+ const INDEX_NAMES = ["index.html", "index.htm"];
3
+ function indexUrlsForCluster(clusterDir, pageUrl) {
4
+ if (/^https?:\/\//i.test(pageUrl)) {
5
+ try {
6
+ const base = new URL(clusterDir);
7
+ return INDEX_NAMES.map((name) => new URL(name, base).href);
8
+ }
9
+ catch {
10
+ return [];
11
+ }
12
+ }
13
+ const sep = pageUrl.includes("\\") ? "\\" : "/";
14
+ const d = clusterDir.replace(/[/\\]+$/, "");
15
+ return INDEX_NAMES.map((n) => `${d}${sep}${n}`);
16
+ }
17
+ /**
18
+ * Hub/index coverage for medium-sized directories, plus info when a cluster is skipped
19
+ * because it exceeds `maxSiblings`.
20
+ */
21
+ export function hubPagesRule(pages, knownUrls, minSiblings, maxSiblings) {
22
+ if (pages.length === 0) {
23
+ return [];
24
+ }
25
+ const byCluster = new Map();
26
+ for (const p of pages) {
27
+ const key = clusterKeyForUrl(p.url);
28
+ const list = byCluster.get(key) ?? [];
29
+ list.push(p);
30
+ byCluster.set(key, list);
31
+ }
32
+ const findings = [];
33
+ for (const [clusterDir, group] of byCluster.entries()) {
34
+ if (group.length < minSiblings) {
35
+ continue;
36
+ }
37
+ if (group.length > maxSiblings) {
38
+ findings.push({
39
+ ruleId: "links/hub-pages-skipped",
40
+ severity: "info",
41
+ message: `Hub/index check skipped for cluster ${clusterDir} (${group.length} pages > max ${maxSiblings}).`,
42
+ relatedUrls: group.map((p) => p.url).sort(),
43
+ fix: "Create an index or hub page for this directory that links to all child pages."
44
+ });
45
+ continue;
46
+ }
47
+ const siblingUrls = new Set(group.map((p) => p.url));
48
+ const indexCandidates = indexUrlsForCluster(clusterDir, group[0].url);
49
+ const hasIndex = indexCandidates.some((u) => knownUrls.has(u));
50
+ const linksToAllSiblings = (page) => {
51
+ const linked = new Set(page.resolvedHrefs.filter((u) => knownUrls.has(u) && siblingUrls.has(u)));
52
+ linked.add(page.url);
53
+ for (const s of siblingUrls) {
54
+ if (!linked.has(s)) {
55
+ return false;
56
+ }
57
+ }
58
+ return true;
59
+ };
60
+ const hasHub = hasIndex || group.some((p) => linksToAllSiblings(p));
61
+ if (!hasHub) {
62
+ findings.push({
63
+ ruleId: "links/hub-pages",
64
+ severity: "warning",
65
+ message: `No hub/index page detected for cluster ${clusterDir} (${group.length} pages).`,
66
+ relatedUrls: Array.from(siblingUrls).sort(),
67
+ fix: "Create an index or hub page for this directory that links to all child pages."
68
+ });
69
+ }
70
+ }
71
+ return findings;
72
+ }
73
+ //# sourceMappingURL=hub-pages.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hub-pages.js","sourceRoot":"","sources":["../../../src/rules/links/hub-pages.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,MAAM,WAAW,GAAG,CAAC,YAAY,EAAE,WAAW,CAAU,CAAC;AAEzD,SAAS,mBAAmB,CAAC,UAAkB,EAAE,OAAe;IAC9D,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC;YACjC,OAAO,WAAW,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC;QAC7D,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IACD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;IAChD,MAAM,CAAC,GAAG,UAAU,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAC5C,OAAO,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC;AAClD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAC1B,KAAmB,EACnB,SAAsB,EACtB,WAAmB,EACnB,WAAmB;IAEnB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,GAAG,EAAwB,CAAC;IAClD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACb,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,SAAS,CAAC,OAAO,EAAE,EAAE,CAAC;QACtD,IAAI,KAAK,CAAC,MAAM,GAAG,WAAW,EAAE,CAAC;YAC/B,SAAS;QACX,CAAC;QAED,IAAI,KAAK,CAAC,MAAM,GAAG,WAAW,EAAE,CAAC;YAC/B,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,yBAAyB;gBACjC,QAAQ,EAAE,MAAM;gBAChB,OAAO,EAAE,uCAAuC,UAAU,KAAK,KAAK,CAAC,MAAM,gBAAgB,WAAW,IAAI;gBAC1G,WAAW,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE;gBAC3C,GAAG,EAAE,+EAA+E;aACrF,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACrD,MAAM,eAAe,GAAG,mBAAmB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACtE,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAE/D,MAAM,kBAAkB,GAAG,CAAC,IAAgB,EAAW,EAAE;YACvD,MAAM,MAAM,GAAG,IAAI,GAAG,CACpB,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CACzE,CAAC;YACF,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACrB,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;gBAC5B,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnB,OAAO,KAAK,CAAC;gBACf,CAAC;YACH,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC;QAEF,MAAM,MAAM,GAAG,QAAQ,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC;QAEpE,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,iBAAiB;gBACzB,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,0CAA0C,UAAU,KAAK,KAAK,CAAC,MAAM,UAAU;gBACxF,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE;gBAC3C,GAAG,EAAE,+EAA+E;aACrF,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { ParsedPage, RuleResult } from "../../types.js";
2
+ export declare function linkDepthRule(pages: ParsedPage[], adjacency: Map<string, Set<string>>, rootUrl: string, maxDepth: number, inbound: Map<string, number>): RuleResult[];
3
+ //# sourceMappingURL=link-depth.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"link-depth.d.ts","sourceRoot":"","sources":["../../../src/rules/links/link-depth.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,aAAa,CAC3B,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC,EACnC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAC3B,UAAU,EAAE,CAgDd"}
@@ -0,0 +1,46 @@
1
+ export function linkDepthRule(pages, adjacency, rootUrl, maxDepth, inbound) {
2
+ const queue = [{ url: rootUrl, depth: 0 }];
3
+ const visited = new Map([[rootUrl, 0]]);
4
+ while (queue.length > 0) {
5
+ const current = queue.shift();
6
+ if (!current) {
7
+ continue;
8
+ }
9
+ const neighbors = adjacency.get(current.url) ?? new Set();
10
+ for (const next of neighbors) {
11
+ const existing = visited.get(next);
12
+ const candidate = current.depth + 1;
13
+ if (existing !== undefined && existing <= candidate) {
14
+ continue;
15
+ }
16
+ visited.set(next, candidate);
17
+ queue.push({ url: next, depth: candidate });
18
+ }
19
+ }
20
+ const unreachable = pages
21
+ .filter((page) => page.url !== rootUrl)
22
+ .filter((page) => (inbound.get(page.url) ?? 0) > 0)
23
+ .filter((page) => visited.get(page.url) === undefined)
24
+ .map((page) => ({
25
+ ruleId: "links/unreachable-from-root",
26
+ severity: "warning",
27
+ message: `${page.url} is not reachable from the crawl root via internal links (but has inbound links).`,
28
+ pageUrl: page.url,
29
+ fix: "This page is unreachable from the site root. Add a navigation path to it."
30
+ }));
31
+ const deep = pages
32
+ .filter((page) => page.url !== rootUrl)
33
+ .filter((page) => {
34
+ const d = visited.get(page.url);
35
+ return d !== undefined && d > maxDepth;
36
+ })
37
+ .map((page) => ({
38
+ ruleId: "links/link-depth",
39
+ severity: "info",
40
+ message: `${page.url} is deeper than ${maxDepth} clicks from root.`,
41
+ pageUrl: page.url,
42
+ fix: "Reduce click depth by linking from a higher-level page."
43
+ }));
44
+ return [...unreachable, ...deep];
45
+ }
46
+ //# sourceMappingURL=link-depth.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"link-depth.js","sourceRoot":"","sources":["../../../src/rules/links/link-depth.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,aAAa,CAC3B,KAAmB,EACnB,SAAmC,EACnC,OAAe,EACf,QAAgB,EAChB,OAA4B;IAE5B,MAAM,KAAK,GAA0C,CAAC,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;IAClF,MAAM,OAAO,GAAG,IAAI,GAAG,CAAiB,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAExD,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;QAC9B,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,SAAS;QACX,CAAC;QACD,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAU,CAAC;QAClE,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACnC,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,GAAG,CAAC,CAAC;YACpC,IAAI,QAAQ,KAAK,SAAS,IAAI,QAAQ,IAAI,SAAS,EAAE,CAAC;gBACpD,SAAS;YACX,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAC7B,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,MAAM,WAAW,GAAG,KAAK;SACtB,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,KAAK,OAAO,CAAC;SACtC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;SAClD,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,SAAS,CAAC;SACrD,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACd,MAAM,EAAE,6BAAsC;QAC9C,QAAQ,EAAE,SAAkB;QAC5B,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,mFAAmF;QACvG,OAAO,EAAE,IAAI,CAAC,GAAG;QACjB,GAAG,EAAE,2EAA2E;KACjF,CAAC,CAAC,CAAC;IAEN,MAAM,IAAI,GAAG,KAAK;SACf,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,KAAK,OAAO,CAAC;SACtC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QACf,MAAM,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAChC,OAAO,CAAC,KAAK,SAAS,IAAI,CAAC,GAAG,QAAQ,CAAC;IACzC,CAAC,CAAC;SACD,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACd,MAAM,EAAE,kBAAkB;QAC1B,QAAQ,EAAE,MAAe;QACzB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,mBAAmB,QAAQ,oBAAoB;QACnE,OAAO,EAAE,IAAI,CAAC,GAAG;QACjB,GAAG,EAAE,yDAAyD;KAC/D,CAAC,CAAC,CAAC;IAEN,OAAO,CAAC,GAAG,WAAW,EAAE,GAAG,IAAI,CAAC,CAAC;AACnC,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { ParsedPage, RuleResult } from "../../types.js";
2
+ export declare function orphanPagesRule(pages: ParsedPage[], inboundLinks: Map<string, number>, rootUrl?: string): RuleResult[];
3
+ //# sourceMappingURL=orphan-pages.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"orphan-pages.d.ts","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,OAAO,CAAC,EAAE,MAAM,GACf,UAAU,EAAE,CAmBd"}
@@ -0,0 +1,19 @@
1
+ export function orphanPagesRule(pages, inboundLinks, rootUrl) {
2
+ const findings = [];
3
+ for (const page of pages) {
4
+ if (rootUrl && page.url === rootUrl) {
5
+ continue;
6
+ }
7
+ if ((inboundLinks.get(page.url) ?? 0) === 0) {
8
+ findings.push({
9
+ ruleId: "links/orphan-pages",
10
+ severity: "error",
11
+ message: `${page.url} has no inbound links from other pages in this crawl.`,
12
+ pageUrl: page.url,
13
+ fix: "Link to this page from a relevant hub or index page, and include it in your site navigation."
14
+ });
15
+ }
16
+ }
17
+ return findings;
18
+ }
19
+ //# sourceMappingURL=orphan-pages.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"orphan-pages.js","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,YAAiC,EACjC,OAAgB;IAEhB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,OAAO,IAAI,IAAI,CAAC,GAAG,KAAK,OAAO,EAAE,CAAC;YACpC,SAAS;QACX,CAAC;QACD,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,oBAAoB;gBAC5B,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;gBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,8FAA8F;aACpG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { ParsedPage, RuleResult } from "../../types.js";
2
+ export declare function schemaConsistencyRule(pages: ParsedPage[]): RuleResult[];
3
+ //# sourceMappingURL=consistency.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAiDvE"}
@@ -0,0 +1,44 @@
1
+ export function schemaConsistencyRule(pages) {
2
+ const findings = [];
3
+ const typesByPage = new Map();
4
+ for (const page of pages) {
5
+ const types = new Set();
6
+ for (const entry of page.jsonLd) {
7
+ if (typeof entry !== "object" || entry === null) {
8
+ continue;
9
+ }
10
+ const obj = entry;
11
+ if ("__parseError" in obj && obj.__parseError === true) {
12
+ continue;
13
+ }
14
+ if (typeof obj["@type"] === "string" && obj["@type"].trim() !== "") {
15
+ types.add(obj["@type"]);
16
+ }
17
+ }
18
+ if (types.size > 0) {
19
+ typesByPage.set(page.url, types);
20
+ }
21
+ }
22
+ if (typesByPage.size < 2) {
23
+ return findings;
24
+ }
25
+ const allTypes = new Set();
26
+ for (const types of typesByPage.values()) {
27
+ for (const t of types) {
28
+ allTypes.add(t);
29
+ }
30
+ }
31
+ if (allTypes.size <= 1) {
32
+ return findings;
33
+ }
34
+ const typeList = Array.from(allTypes).sort().join(", ");
35
+ findings.push({
36
+ ruleId: "schema/consistency",
37
+ severity: "info",
38
+ message: `Pages use mixed schema types (${typeList}). Consider using a consistent @type across template pages.`,
39
+ relatedUrls: Array.from(typesByPage.keys()),
40
+ fix: `Use a consistent @type across all template pages, or separate pages into groups with different schema types.`
41
+ });
42
+ return findings;
43
+ }
44
+ //# sourceMappingURL=consistency.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAuB,CAAC;IAEnD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YACnB,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACnC,CAAC;IACH,CAAC;IAED,IAAI,WAAW,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,KAAK,MAAM,KAAK,IAAI,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC;QACzC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;QACvB,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxD,QAAQ,CAAC,IAAI,CAAC;QACZ,MAAM,EAAE,oBAAoB;QAC5B,QAAQ,EAAE,MAAM;QAChB,OAAO,EAAE,iCAAiC,QAAQ,6DAA6D;QAC/G,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QAC3C,GAAG,EAAE,8GAA8G;KACpH,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { ParsedPage, RuleResult } from "../../types.js";
2
+ export declare function jsonLdValidRule(pages: ParsedPage[]): RuleResult[];
3
+ //# sourceMappingURL=json-ld-valid.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json-ld-valid.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAqDjE"}
@@ -0,0 +1,47 @@
1
+ export function jsonLdValidRule(pages) {
2
+ const findings = [];
3
+ for (const page of pages) {
4
+ for (const entry of page.jsonLd) {
5
+ if (typeof entry === "object" &&
6
+ entry !== null &&
7
+ "__parseError" in entry &&
8
+ entry.__parseError === true) {
9
+ findings.push({
10
+ ruleId: "schema/json-ld-valid",
11
+ severity: "error",
12
+ message: `${page.url} contains malformed JSON-LD that could not be parsed.`,
13
+ pageUrl: page.url,
14
+ fix: `Fix the JSON syntax in the <script type="application/ld+json"> block. Validate it at https://validator.schema.org/.`
15
+ });
16
+ continue;
17
+ }
18
+ if (typeof entry !== "object" || entry === null) {
19
+ continue;
20
+ }
21
+ const obj = entry;
22
+ if (!obj["@context"]) {
23
+ findings.push({
24
+ ruleId: "schema/json-ld-valid",
25
+ severity: "error",
26
+ message: `${page.url} has a JSON-LD block missing the required @context property.`,
27
+ pageUrl: page.url,
28
+ fix: `Add "@context": "https://schema.org" to the JSON-LD block.`
29
+ });
30
+ }
31
+ if (obj["@type"] !== undefined) {
32
+ const typeValue = obj["@type"];
33
+ if (typeof typeValue !== "string" || typeValue.trim() === "") {
34
+ findings.push({
35
+ ruleId: "schema/json-ld-valid",
36
+ severity: "error",
37
+ message: `${page.url} has a JSON-LD block with an invalid @type value.`,
38
+ pageUrl: page.url,
39
+ fix: `Set @type to a valid Schema.org type like "Article", "Product", or "FAQPage".`
40
+ });
41
+ }
42
+ }
43
+ }
44
+ }
45
+ return findings;
46
+ }
47
+ //# sourceMappingURL=json-ld-valid.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json-ld-valid.js","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IACE,OAAO,KAAK,KAAK,QAAQ;gBACzB,KAAK,KAAK,IAAI;gBACd,cAAc,IAAI,KAAK;gBACtB,KAAiC,CAAC,YAAY,KAAK,IAAI,EACxD,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;oBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,qHAAqH;iBAC3H,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8DAA8D;oBAClF,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,4DAA4D;iBAClE,CAAC,CAAC;YACL,CAAC;YAED,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC/B,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC/B,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;oBAC7D,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,sBAAsB;wBAC9B,QAAQ,EAAE,OAAO;wBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,mDAAmD;wBACvE,OAAO,EAAE,IAAI,CAAC,GAAG;wBACjB,GAAG,EAAE,+EAA+E;qBACrF,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { ParsedPage, RuleResult } from "../../types.js";
2
+ export declare function requiredFieldsRule(pages: ParsedPage[]): RuleResult[];
3
+ //# sourceMappingURL=required-fields.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"required-fields.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAqB7D,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAoDpE"}
@@ -0,0 +1,60 @@
1
+ const REQUIRED_FIELDS = {
2
+ Article: ["headline", "author", "datePublished"],
3
+ Product: ["name"],
4
+ FAQPage: ["mainEntity"]
5
+ };
6
+ function hasPrice(obj) {
7
+ if (obj.price !== undefined && obj.price !== null && obj.price !== "") {
8
+ return true;
9
+ }
10
+ if (typeof obj.offers === "object" && obj.offers !== null) {
11
+ const offers = obj.offers;
12
+ if (offers.price !== undefined && offers.price !== null && offers.price !== "") {
13
+ return true;
14
+ }
15
+ }
16
+ return false;
17
+ }
18
+ export function requiredFieldsRule(pages) {
19
+ const findings = [];
20
+ for (const page of pages) {
21
+ for (const entry of page.jsonLd) {
22
+ if (typeof entry !== "object" || entry === null) {
23
+ continue;
24
+ }
25
+ const obj = entry;
26
+ if ("__parseError" in obj &&
27
+ obj.__parseError === true) {
28
+ continue;
29
+ }
30
+ const schemaType = typeof obj["@type"] === "string" ? obj["@type"] : null;
31
+ if (!schemaType) {
32
+ continue;
33
+ }
34
+ const required = REQUIRED_FIELDS[schemaType];
35
+ if (!required) {
36
+ continue;
37
+ }
38
+ const missing = [];
39
+ for (const field of required) {
40
+ if (obj[field] === undefined || obj[field] === null || obj[field] === "") {
41
+ missing.push(field);
42
+ }
43
+ }
44
+ if (schemaType === "Product" && !hasPrice(obj)) {
45
+ missing.push("price");
46
+ }
47
+ if (missing.length > 0) {
48
+ findings.push({
49
+ ruleId: "schema/required-fields",
50
+ severity: "warning",
51
+ message: `${page.url} has a ${schemaType} schema missing required fields: ${missing.join(", ")}.`,
52
+ pageUrl: page.url,
53
+ fix: `Add the missing fields to your ${schemaType} schema: ${missing.join(", ")}.`
54
+ });
55
+ }
56
+ }
57
+ }
58
+ return findings;
59
+ }
60
+ //# sourceMappingURL=required-fields.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"required-fields.js","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAEA,MAAM,eAAe,GAA6B;IAChD,OAAO,EAAE,CAAC,UAAU,EAAE,QAAQ,EAAE,eAAe,CAAC;IAChD,OAAO,EAAE,CAAC,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,YAAY,CAAC;CACxB,CAAC;AAEF,SAAS,QAAQ,CAAC,GAA4B;IAC5C,IAAI,GAAG,CAAC,KAAK,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;QACtE,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,MAAM,GAAG,GAAG,CAAC,MAAiC,CAAC;QACrD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IACE,cAAc,IAAI,GAAG;gBACpB,GAA+B,CAAC,YAAY,KAAK,IAAI,EACtD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,SAAS;YACX,CAAC;YAED,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAC7C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,SAAS;YACX,CAAC;YAED,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,EAAE,EAAE,CAAC;oBACzE,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACtB,CAAC;YACH,CAAC;YAED,IAAI,UAAU,KAAK,SAAS,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;YAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,wBAAwB;oBAChC,QAAQ,EAAE,SAAS;oBACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,UAAU,UAAU,oCAAoC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;oBACjG,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,kCAAkC,UAAU,YAAY,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;iBACnF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { ParsedPage, RuleResult } from "../../types.js";
2
+ export declare function boilerplateRatioRule(pages: ParsedPage[], maxRatio: number): RuleResult[];
3
+ //# sourceMappingURL=boilerplate-ratio.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"boilerplate-ratio.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAS7D,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,MAAM,GAAG,UAAU,EAAE,CAoDxF"}
@@ -0,0 +1,50 @@
1
+ function extractTextBlocks(contentText) {
2
+ return contentText
3
+ .split(/[.!?]\s+|\n+/)
4
+ .map((block) => block.trim().toLowerCase())
5
+ .filter((block) => block.length > 20);
6
+ }
7
+ export function boilerplateRatioRule(pages, maxRatio) {
8
+ if (pages.length < 2) {
9
+ return [];
10
+ }
11
+ const pageBlocks = pages.map((page) => extractTextBlocks(page.contentText));
12
+ const blockFrequency = new Map();
13
+ for (const blocks of pageBlocks) {
14
+ const unique = new Set(blocks);
15
+ for (const block of unique) {
16
+ blockFrequency.set(block, (blockFrequency.get(block) ?? 0) + 1);
17
+ }
18
+ }
19
+ const skeletonCutoff = Math.max(2, Math.floor(pages.length * 0.8) + 1);
20
+ const skeleton = new Set(Array.from(blockFrequency.entries())
21
+ .filter(([, count]) => count >= skeletonCutoff)
22
+ .map(([block]) => block));
23
+ if (skeleton.size === 0) {
24
+ return [];
25
+ }
26
+ const findings = [];
27
+ pages.forEach((page, index) => {
28
+ const blocks = pageBlocks[index];
29
+ if (blocks.length === 0) {
30
+ return;
31
+ }
32
+ const totalWords = blocks.reduce((sum, b) => sum + b.split(/\s+/).length, 0);
33
+ const boilerplateWords = blocks
34
+ .filter((b) => skeleton.has(b))
35
+ .reduce((sum, b) => sum + b.split(/\s+/).length, 0);
36
+ if (totalWords === 0)
37
+ return;
38
+ const ratio = boilerplateWords / totalWords;
39
+ if (ratio > maxRatio) {
40
+ findings.push({
41
+ ruleId: "spam/boilerplate-ratio",
42
+ severity: "error",
43
+ message: `${page.url} has boilerplate ratio ${(ratio * 100).toFixed(1)}% (max ${(maxRatio * 100).toFixed(1)}%).`,
44
+ fix: `${(ratio * 100).toFixed(1)}% of this page's content is shared template text. Reduce repeated boilerplate sections or add unique content blocks—introductions, case studies, or page-specific data—to bring the ratio below ${(maxRatio * 100).toFixed(1)}%.`
45
+ });
46
+ }
47
+ });
48
+ return findings;
49
+ }
50
+ //# sourceMappingURL=boilerplate-ratio.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"boilerplate-ratio.js","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAEA,SAAS,iBAAiB,CAAC,WAAmB;IAC5C,OAAO,WAAW;SACf,KAAK,CAAC,cAAc,CAAC;SACrB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;SAC1C,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAAmB,EAAE,QAAgB;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAE5E,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IACjD,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IACvE,MAAM,QAAQ,GAAG,IAAI,GAAG,CACtB,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,CAAC;SACjC,MAAM,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,KAAK,IAAI,cAAc,CAAC;SAC9C,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAC3B,CAAC;IAEF,IAAI,QAAQ,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAC5B,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;QACjC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO;QACT,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC7E,MAAM,gBAAgB,GAAG,MAAM;aAC5B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;aAC9B,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAEtD,IAAI,UAAU,KAAK,CAAC;YAAE,OAAO;QAE7B,MAAM,KAAK,GAAG,gBAAgB,GAAG,UAAU,CAAC;QAC5C,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;YACrB,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,wBAAwB;gBAChC,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,0BAA0B,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBAChH,GAAG,EAAE,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,mMAAmM,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;aACnQ,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,4 @@
1
+ import type { ParsedPage, RuleResult } from "../../types.js";
2
+ import type { PairMatch } from "./near-duplicate.js";
3
+ export declare function doorwayPatternRule(nearDuplicatePairs: PairMatch[], entitySwapPairs: PairMatch[], thinContentUrls: Set<string>, pages?: ParsedPage[]): RuleResult[];
4
+ //# sourceMappingURL=doorway-pattern.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"doorway-pattern.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/doorway-pattern.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC7D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAMrD,wBAAgB,kBAAkB,CAChC,kBAAkB,EAAE,SAAS,EAAE,EAC/B,eAAe,EAAE,SAAS,EAAE,EAC5B,eAAe,EAAE,GAAG,CAAC,MAAM,CAAC,EAC5B,KAAK,CAAC,EAAE,UAAU,EAAE,GACnB,UAAU,EAAE,CAoDd"}
@@ -0,0 +1,47 @@
1
+ function pairKey(left, right) {
2
+ return left < right ? `${left}\0${right}` : `${right}\0${left}`;
3
+ }
4
+ export function doorwayPatternRule(nearDuplicatePairs, entitySwapPairs, thinContentUrls, pages) {
5
+ const entitySet = new Set(entitySwapPairs.map((pair) => pairKey(pair.leftUrl, pair.rightUrl)));
6
+ const findings = [];
7
+ const pageMap = new Map();
8
+ if (pages) {
9
+ for (const p of pages) {
10
+ pageMap.set(p.url, p);
11
+ }
12
+ }
13
+ for (const pair of nearDuplicatePairs) {
14
+ const key = pairKey(pair.leftUrl, pair.rightUrl);
15
+ if (!entitySet.has(key)) {
16
+ continue;
17
+ }
18
+ const left = pair.leftUrl < pair.rightUrl ? pair.leftUrl : pair.rightUrl;
19
+ const right = pair.leftUrl < pair.rightUrl ? pair.rightUrl : pair.leftUrl;
20
+ const signals = ["near-duplicate", "entity-swap"];
21
+ const isThin = thinContentUrls.has(left) || thinContentUrls.has(right);
22
+ if (isThin) {
23
+ signals.push("thin-content");
24
+ }
25
+ const leftPage = pageMap.get(left);
26
+ const rightPage = pageMap.get(right);
27
+ if (leftPage && rightPage && leftPage.structureSignature === rightPage.structureSignature) {
28
+ signals.push("identical-structure");
29
+ }
30
+ if (leftPage && rightPage && leftPage.metaDescription && rightPage.metaDescription &&
31
+ leftPage.metaDescription === rightPage.metaDescription) {
32
+ signals.push("identical-meta");
33
+ }
34
+ if (signals.length < 3) {
35
+ continue;
36
+ }
37
+ findings.push({
38
+ ruleId: "spam/doorway-pattern",
39
+ severity: "critical",
40
+ message: `${left} and ${right} match doorway-pattern signals (${signals.join(" + ")}).`,
41
+ relatedUrls: [left, right],
42
+ fix: "This page matches multiple spam signals. Prioritize adding unique, substantive content and differentiating the page structure."
43
+ });
44
+ }
45
+ return findings;
46
+ }
47
+ //# sourceMappingURL=doorway-pattern.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"doorway-pattern.js","sourceRoot":"","sources":["../../../src/rules/spam/doorway-pattern.ts"],"names":[],"mappings":"AAGA,SAAS,OAAO,CAAC,IAAY,EAAE,KAAa;IAC1C,OAAO,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,KAAK,KAAK,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,KAAK,IAAI,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,UAAU,kBAAkB,CAChC,kBAA+B,EAC/B,eAA4B,EAC5B,eAA4B,EAC5B,KAAoB;IAEpB,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;IAC/F,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAsB,CAAC;IAC9C,IAAI,KAAK,EAAE,CAAC;QACV,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,kBAAkB,EAAE,CAAC;QACtC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QACjD,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,SAAS;QACX,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;QACzE,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;QAC1E,MAAM,OAAO,GAAa,CAAC,gBAAgB,EAAE,aAAa,CAAC,CAAC;QAE5D,MAAM,MAAM,GAAG,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QACvE,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC/B,CAAC;QAED,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAErC,IAAI,QAAQ,IAAI,SAAS,IAAI,QAAQ,CAAC,kBAAkB,KAAK,SAAS,CAAC,kBAAkB,EAAE,CAAC;YAC1F,OAAO,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QACtC,CAAC;QAED,IAAI,QAAQ,IAAI,SAAS,IAAI,QAAQ,CAAC,eAAe,IAAI,SAAS,CAAC,eAAe;YAC9E,QAAQ,CAAC,eAAe,KAAK,SAAS,CAAC,eAAe,EAAE,CAAC;YAC3D,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QACjC,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QAED,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,sBAAsB;YAC9B,QAAQ,EAAE,UAAU;YACpB,OAAO,EAAE,GAAG,IAAI,QAAQ,KAAK,mCAAmC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI;YACvF,WAAW,EAAE,CAAC,IAAI,EAAE,KAAK,CAAC;YAC1B,GAAG,EAAE,gIAAgI;SACtI,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,7 @@
1
+ import type { EntityMaskPattern, ParsedPage, RuleResult } from "../../types.js";
2
+ import type { PairMatch } from "./near-duplicate.js";
3
+ export declare function entitySwapRule(pages: ParsedPage[], patterns: EntityMaskPattern[], threshold: number): {
4
+ findings: RuleResult[];
5
+ pairs: PairMatch[];
6
+ };
7
+ //# sourceMappingURL=entity-swap.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"entity-swap.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAChF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAErD,wBAAgB,cAAc,CAC5B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,iBAAiB,EAAE,EAC7B,SAAS,EAAE,MAAM,GAChB;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,KAAK,EAAE,SAAS,EAAE,CAAA;CAAE,CAwBhD"}
@@ -0,0 +1,26 @@
1
+ import { maskEntities } from "../../algorithms/entity-mask.js";
2
+ import { hammingDistance, simHashFromText, similarityFromDistance } from "../../algorithms/simhash.js";
3
+ export function entitySwapRule(pages, patterns, threshold) {
4
+ const findings = [];
5
+ const pairs = [];
6
+ const hashes = pages.map((page) => simHashFromText(maskEntities(page.contentText, patterns)));
7
+ for (let i = 0; i < pages.length; i += 1) {
8
+ for (let j = i + 1; j < pages.length; j += 1) {
9
+ const similarity = similarityFromDistance(hammingDistance(hashes[i], hashes[j]));
10
+ if (similarity >= threshold) {
11
+ pairs.push({ leftUrl: pages[i].url, rightUrl: pages[j].url, similarity });
12
+ findings.push({
13
+ ruleId: "spam/entity-swap",
14
+ severity: "critical",
15
+ message: `${pages[i].url} and ${pages[j].url} look structurally identical after entity masking.`,
16
+ pageUrl: pages[i].url,
17
+ relatedUrls: [pages[j].url],
18
+ similarity,
19
+ fix: "These pages are identical after masking entity names. Add entity-specific content: local regulations, statistics, fees, or requirements unique to each entity."
20
+ });
21
+ }
22
+ }
23
+ }
24
+ return { findings, pairs };
25
+ }
26
+ //# sourceMappingURL=entity-swap.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"entity-swap.js","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAIvG,MAAM,UAAU,cAAc,CAC5B,KAAmB,EACnB,QAA6B,EAC7B,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;IAE9F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;gBAC1E,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,kBAAkB;oBAC1B,QAAQ,EAAE,UAAU;oBACpB,OAAO,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,oDAAoD;oBAChG,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;oBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;oBAC3B,UAAU;oBACV,GAAG,EAAE,gKAAgK;iBACtK,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC7B,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { ParsedPage, RuleResult } from "../../types.js";
2
+ export interface PairMatch {
3
+ leftUrl: string;
4
+ rightUrl: string;
5
+ similarity: number;
6
+ }
7
+ export declare function nearDuplicateRule(pages: ParsedPage[], threshold: number): {
8
+ findings: RuleResult[];
9
+ pairs: PairMatch[];
10
+ };
11
+ //# sourceMappingURL=near-duplicate.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"near-duplicate.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/near-duplicate.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,iBAAiB,CAC/B,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,MAAM,GAChB;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,KAAK,EAAE,SAAS,EAAE,CAAA;CAAE,CAwBhD"}
@@ -0,0 +1,25 @@
1
+ import { hammingDistance, simHashFromText, similarityFromDistance } from "../../algorithms/simhash.js";
2
+ export function nearDuplicateRule(pages, threshold) {
3
+ const findings = [];
4
+ const pairs = [];
5
+ const hashes = pages.map((page) => simHashFromText(page.contentText));
6
+ for (let i = 0; i < pages.length; i += 1) {
7
+ for (let j = i + 1; j < pages.length; j += 1) {
8
+ const similarity = similarityFromDistance(hammingDistance(hashes[i], hashes[j]));
9
+ if (similarity >= threshold) {
10
+ pairs.push({ leftUrl: pages[i].url, rightUrl: pages[j].url, similarity });
11
+ findings.push({
12
+ ruleId: "spam/near-duplicate",
13
+ severity: "critical",
14
+ message: `${pages[i].url} and ${pages[j].url} are near-duplicates (${(similarity * 100).toFixed(1)}% similar).`,
15
+ pageUrl: pages[i].url,
16
+ relatedUrls: [pages[j].url],
17
+ similarity,
18
+ fix: "Differentiate these pages with unique content. Add page-specific details, data, examples, or analysis that the other page doesn't have."
19
+ });
20
+ }
21
+ }
22
+ }
23
+ return { findings, pairs };
24
+ }
25
+ //# sourceMappingURL=near-duplicate.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"near-duplicate.js","sourceRoot":"","sources":["../../../src/rules/spam/near-duplicate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AASvG,MAAM,UAAU,iBAAiB,CAC/B,KAAmB,EACnB,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAEtE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;gBAC1E,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,qBAAqB;oBAC7B,QAAQ,EAAE,UAAU;oBACpB,OAAO,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,yBAAyB,CAAC,UAAU,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,aAAa;oBAC/G,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;oBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;oBAC3B,UAAU;oBACV,GAAG,EAAE,yIAAyI;iBAC/I,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC7B,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { ParsedPage, RuleResult } from "../../types.js";
2
+ export declare function publicationVelocityRule(pages: ParsedPage[], maxPerDay: number): RuleResult[];
3
+ //# sourceMappingURL=publication-velocity.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"publication-velocity.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/publication-velocity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,UAAU,EAAE,CAyB5F"}