@pseolint/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +53 -0
- package/dist/algorithms/entity-mask.d.ts +3 -0
- package/dist/algorithms/entity-mask.d.ts.map +1 -0
- package/dist/algorithms/entity-mask.js +8 -0
- package/dist/algorithms/entity-mask.js.map +1 -0
- package/dist/algorithms/entity-mask.test.d.ts +2 -0
- package/dist/algorithms/entity-mask.test.d.ts.map +1 -0
- package/dist/algorithms/entity-mask.test.js +23 -0
- package/dist/algorithms/entity-mask.test.js.map +1 -0
- package/dist/algorithms/simhash.d.ts +4 -0
- package/dist/algorithms/simhash.d.ts.map +1 -0
- package/dist/algorithms/simhash.js +64 -0
- package/dist/algorithms/simhash.js.map +1 -0
- package/dist/algorithms/simhash.test.d.ts +2 -0
- package/dist/algorithms/simhash.test.d.ts.map +1 -0
- package/dist/algorithms/simhash.test.js +23 -0
- package/dist/algorithms/simhash.test.js.map +1 -0
- package/dist/algorithms/tf-idf.d.ts +8 -0
- package/dist/algorithms/tf-idf.d.ts.map +1 -0
- package/dist/algorithms/tf-idf.js +55 -0
- package/dist/algorithms/tf-idf.js.map +1 -0
- package/dist/auditor.d.ts +3 -0
- package/dist/auditor.d.ts.map +1 -0
- package/dist/auditor.js +730 -0
- package/dist/auditor.js.map +1 -0
- package/dist/auditor.test.d.ts +2 -0
- package/dist/auditor.test.d.ts.map +1 -0
- package/dist/auditor.test.js +134 -0
- package/dist/auditor.test.js.map +1 -0
- package/dist/enrich-findings.d.ts +9 -0
- package/dist/enrich-findings.d.ts.map +1 -0
- package/dist/enrich-findings.js +436 -0
- package/dist/enrich-findings.js.map +1 -0
- package/dist/formatters/console.d.ts +6 -0
- package/dist/formatters/console.d.ts.map +1 -0
- package/dist/formatters/console.js +237 -0
- package/dist/formatters/console.js.map +1 -0
- package/dist/formatters/html.d.ts +3 -0
- package/dist/formatters/html.d.ts.map +1 -0
- package/dist/formatters/html.js +170 -0
- package/dist/formatters/html.js.map +1 -0
- package/dist/formatters/index.d.ts +6 -0
- package/dist/formatters/index.d.ts.map +1 -0
- package/dist/formatters/index.js +5 -0
- package/dist/formatters/index.js.map +1 -0
- package/dist/formatters/json.d.ts +3 -0
- package/dist/formatters/json.d.ts.map +1 -0
- package/dist/formatters/json.js +4 -0
- package/dist/formatters/json.js.map +1 -0
- package/dist/formatters/markdown.d.ts +3 -0
- package/dist/formatters/markdown.d.ts.map +1 -0
- package/dist/formatters/markdown.js +93 -0
- package/dist/formatters/markdown.js.map +1 -0
- package/dist/index.d.ts +45 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +45 -0
- package/dist/index.js.map +1 -0
- package/dist/page-classifier.d.ts +4 -0
- package/dist/page-classifier.d.ts.map +1 -0
- package/dist/page-classifier.js +133 -0
- package/dist/page-classifier.js.map +1 -0
- package/dist/parser.d.ts +3 -0
- package/dist/parser.d.ts.map +1 -0
- package/dist/parser.js +131 -0
- package/dist/parser.js.map +1 -0
- package/dist/parser.test.d.ts +2 -0
- package/dist/parser.test.d.ts.map +1 -0
- package/dist/parser.test.js +37 -0
- package/dist/parser.test.js.map +1 -0
- package/dist/renderer.d.ts +15 -0
- package/dist/renderer.d.ts.map +1 -0
- package/dist/renderer.js +124 -0
- package/dist/renderer.js.map +1 -0
- package/dist/rule-references.d.ts +2 -0
- package/dist/rule-references.d.ts.map +1 -0
- package/dist/rule-references.js +35 -0
- package/dist/rule-references.js.map +1 -0
- package/dist/rules/cannibal/keyword-collision.d.ts +3 -0
- package/dist/rules/cannibal/keyword-collision.d.ts.map +1 -0
- package/dist/rules/cannibal/keyword-collision.js +25 -0
- package/dist/rules/cannibal/keyword-collision.js.map +1 -0
- package/dist/rules/cannibal/title-overlap.d.ts +3 -0
- package/dist/rules/cannibal/title-overlap.d.ts.map +1 -0
- package/dist/rules/cannibal/title-overlap.js +43 -0
- package/dist/rules/cannibal/title-overlap.js.map +1 -0
- package/dist/rules/cannibal/url-pattern.d.ts +3 -0
- package/dist/rules/cannibal/url-pattern.d.ts.map +1 -0
- package/dist/rules/cannibal/url-pattern.js +48 -0
- package/dist/rules/cannibal/url-pattern.js.map +1 -0
- package/dist/rules/content/eeat-signals.d.ts +3 -0
- package/dist/rules/content/eeat-signals.d.ts.map +1 -0
- package/dist/rules/content/eeat-signals.js +46 -0
- package/dist/rules/content/eeat-signals.js.map +1 -0
- package/dist/rules/content/heading-uniqueness.d.ts +3 -0
- package/dist/rules/content/heading-uniqueness.d.ts.map +1 -0
- package/dist/rules/content/heading-uniqueness.js +56 -0
- package/dist/rules/content/heading-uniqueness.js.map +1 -0
- package/dist/rules/content/meta-uniqueness.d.ts +3 -0
- package/dist/rules/content/meta-uniqueness.d.ts.map +1 -0
- package/dist/rules/content/meta-uniqueness.js +28 -0
- package/dist/rules/content/meta-uniqueness.js.map +1 -0
- package/dist/rules/content/missing-author.d.ts +3 -0
- package/dist/rules/content/missing-author.d.ts.map +1 -0
- package/dist/rules/content/missing-author.js +26 -0
- package/dist/rules/content/missing-author.js.map +1 -0
- package/dist/rules/content/unique-value.d.ts +3 -0
- package/dist/rules/content/unique-value.d.ts.map +1 -0
- package/dist/rules/content/unique-value.js +26 -0
- package/dist/rules/content/unique-value.js.map +1 -0
- package/dist/rules/links/cluster-connectivity.d.ts +7 -0
- package/dist/rules/links/cluster-connectivity.d.ts.map +1 -0
- package/dist/rules/links/cluster-connectivity.js +73 -0
- package/dist/rules/links/cluster-connectivity.js.map +1 -0
- package/dist/rules/links/cluster-key.d.ts +3 -0
- package/dist/rules/links/cluster-key.d.ts.map +1 -0
- package/dist/rules/links/cluster-key.js +22 -0
- package/dist/rules/links/cluster-key.js.map +1 -0
- package/dist/rules/links/dead-ends.d.ts +3 -0
- package/dist/rules/links/dead-ends.d.ts.map +1 -0
- package/dist/rules/links/dead-ends.js +13 -0
- package/dist/rules/links/dead-ends.js.map +1 -0
- package/dist/rules/links/hub-pages.d.ts +7 -0
- package/dist/rules/links/hub-pages.d.ts.map +1 -0
- package/dist/rules/links/hub-pages.js +73 -0
- package/dist/rules/links/hub-pages.js.map +1 -0
- package/dist/rules/links/link-depth.d.ts +3 -0
- package/dist/rules/links/link-depth.d.ts.map +1 -0
- package/dist/rules/links/link-depth.js +46 -0
- package/dist/rules/links/link-depth.js.map +1 -0
- package/dist/rules/links/orphan-pages.d.ts +3 -0
- package/dist/rules/links/orphan-pages.d.ts.map +1 -0
- package/dist/rules/links/orphan-pages.js +19 -0
- package/dist/rules/links/orphan-pages.js.map +1 -0
- package/dist/rules/schema/consistency.d.ts +3 -0
- package/dist/rules/schema/consistency.d.ts.map +1 -0
- package/dist/rules/schema/consistency.js +44 -0
- package/dist/rules/schema/consistency.js.map +1 -0
- package/dist/rules/schema/json-ld-valid.d.ts +3 -0
- package/dist/rules/schema/json-ld-valid.d.ts.map +1 -0
- package/dist/rules/schema/json-ld-valid.js +47 -0
- package/dist/rules/schema/json-ld-valid.js.map +1 -0
- package/dist/rules/schema/required-fields.d.ts +3 -0
- package/dist/rules/schema/required-fields.d.ts.map +1 -0
- package/dist/rules/schema/required-fields.js +60 -0
- package/dist/rules/schema/required-fields.js.map +1 -0
- package/dist/rules/spam/boilerplate-ratio.d.ts +3 -0
- package/dist/rules/spam/boilerplate-ratio.d.ts.map +1 -0
- package/dist/rules/spam/boilerplate-ratio.js +50 -0
- package/dist/rules/spam/boilerplate-ratio.js.map +1 -0
- package/dist/rules/spam/doorway-pattern.d.ts +4 -0
- package/dist/rules/spam/doorway-pattern.d.ts.map +1 -0
- package/dist/rules/spam/doorway-pattern.js +47 -0
- package/dist/rules/spam/doorway-pattern.js.map +1 -0
- package/dist/rules/spam/entity-swap.d.ts +7 -0
- package/dist/rules/spam/entity-swap.d.ts.map +1 -0
- package/dist/rules/spam/entity-swap.js +26 -0
- package/dist/rules/spam/entity-swap.js.map +1 -0
- package/dist/rules/spam/near-duplicate.d.ts +11 -0
- package/dist/rules/spam/near-duplicate.d.ts.map +1 -0
- package/dist/rules/spam/near-duplicate.js +25 -0
- package/dist/rules/spam/near-duplicate.js.map +1 -0
- package/dist/rules/spam/publication-velocity.d.ts +3 -0
- package/dist/rules/spam/publication-velocity.d.ts.map +1 -0
- package/dist/rules/spam/publication-velocity.js +25 -0
- package/dist/rules/spam/publication-velocity.js.map +1 -0
- package/dist/rules/spam/template-coverage.d.ts +3 -0
- package/dist/rules/spam/template-coverage.d.ts.map +1 -0
- package/dist/rules/spam/template-coverage.js +87 -0
- package/dist/rules/spam/template-coverage.js.map +1 -0
- package/dist/rules/spam/template-diversity.d.ts +3 -0
- package/dist/rules/spam/template-diversity.d.ts.map +1 -0
- package/dist/rules/spam/template-diversity.js +19 -0
- package/dist/rules/spam/template-diversity.js.map +1 -0
- package/dist/rules/spam/thin-content.d.ts +6 -0
- package/dist/rules/spam/thin-content.d.ts.map +1 -0
- package/dist/rules/spam/thin-content.js +22 -0
- package/dist/rules/spam/thin-content.js.map +1 -0
- package/dist/rules/tech/canonical-consistency.d.ts +4 -0
- package/dist/rules/tech/canonical-consistency.d.ts.map +1 -0
- package/dist/rules/tech/canonical-consistency.js +78 -0
- package/dist/rules/tech/canonical-consistency.js.map +1 -0
- package/dist/rules/tech/canonical-noindex-conflict.d.ts +3 -0
- package/dist/rules/tech/canonical-noindex-conflict.d.ts.map +1 -0
- package/dist/rules/tech/canonical-noindex-conflict.js +27 -0
- package/dist/rules/tech/canonical-noindex-conflict.js.map +1 -0
- package/dist/rules/tech/hreflang-consistency.d.ts +3 -0
- package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -0
- package/dist/rules/tech/hreflang-consistency.js +99 -0
- package/dist/rules/tech/hreflang-consistency.js.map +1 -0
- package/dist/rules/tech/og-completeness.d.ts +3 -0
- package/dist/rules/tech/og-completeness.d.ts.map +1 -0
- package/dist/rules/tech/og-completeness.js +35 -0
- package/dist/rules/tech/og-completeness.js.map +1 -0
- package/dist/rules/tech/redirect-chain.d.ts +3 -0
- package/dist/rules/tech/redirect-chain.d.ts.map +1 -0
- package/dist/rules/tech/redirect-chain.js +20 -0
- package/dist/rules/tech/redirect-chain.js.map +1 -0
- package/dist/rules/tech/robots-noindex-conflict.d.ts +3 -0
- package/dist/rules/tech/robots-noindex-conflict.d.ts.map +1 -0
- package/dist/rules/tech/robots-noindex-conflict.js +30 -0
- package/dist/rules/tech/robots-noindex-conflict.js.map +1 -0
- package/dist/rules/tech/robots-sitemap-presence.d.ts +3 -0
- package/dist/rules/tech/robots-sitemap-presence.d.ts.map +1 -0
- package/dist/rules/tech/robots-sitemap-presence.js +61 -0
- package/dist/rules/tech/robots-sitemap-presence.js.map +1 -0
- package/dist/rules/tech/sitemap-completeness.d.ts +3 -0
- package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -0
- package/dist/rules/tech/sitemap-completeness.js +40 -0
- package/dist/rules/tech/sitemap-completeness.js.map +1 -0
- package/dist/rules/tech/soft-404.d.ts +3 -0
- package/dist/rules/tech/soft-404.d.ts.map +1 -0
- package/dist/rules/tech/soft-404.js +24 -0
- package/dist/rules/tech/soft-404.js.map +1 -0
- package/dist/types.d.ts +170 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/url-normalize.d.ts +10 -0
- package/dist/url-normalize.d.ts.map +1 -0
- package/dist/url-normalize.js +52 -0
- package/dist/url-normalize.js.map +1 -0
- package/package.json +46 -0
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { clusterKeyForUrl } from "./cluster-key.js";
|
|
2
|
+
const INDEX_NAMES = ["index.html", "index.htm"];
|
|
3
|
+
function indexUrlsForCluster(clusterDir, pageUrl) {
|
|
4
|
+
if (/^https?:\/\//i.test(pageUrl)) {
|
|
5
|
+
try {
|
|
6
|
+
const base = new URL(clusterDir);
|
|
7
|
+
return INDEX_NAMES.map((name) => new URL(name, base).href);
|
|
8
|
+
}
|
|
9
|
+
catch {
|
|
10
|
+
return [];
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
const sep = pageUrl.includes("\\") ? "\\" : "/";
|
|
14
|
+
const d = clusterDir.replace(/[/\\]+$/, "");
|
|
15
|
+
return INDEX_NAMES.map((n) => `${d}${sep}${n}`);
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Hub/index coverage for medium-sized directories, plus info when a cluster is skipped
|
|
19
|
+
* because it exceeds `maxSiblings`.
|
|
20
|
+
*/
|
|
21
|
+
export function hubPagesRule(pages, knownUrls, minSiblings, maxSiblings) {
|
|
22
|
+
if (pages.length === 0) {
|
|
23
|
+
return [];
|
|
24
|
+
}
|
|
25
|
+
const byCluster = new Map();
|
|
26
|
+
for (const p of pages) {
|
|
27
|
+
const key = clusterKeyForUrl(p.url);
|
|
28
|
+
const list = byCluster.get(key) ?? [];
|
|
29
|
+
list.push(p);
|
|
30
|
+
byCluster.set(key, list);
|
|
31
|
+
}
|
|
32
|
+
const findings = [];
|
|
33
|
+
for (const [clusterDir, group] of byCluster.entries()) {
|
|
34
|
+
if (group.length < minSiblings) {
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
if (group.length > maxSiblings) {
|
|
38
|
+
findings.push({
|
|
39
|
+
ruleId: "links/hub-pages-skipped",
|
|
40
|
+
severity: "info",
|
|
41
|
+
message: `Hub/index check skipped for cluster ${clusterDir} (${group.length} pages > max ${maxSiblings}).`,
|
|
42
|
+
relatedUrls: group.map((p) => p.url).sort(),
|
|
43
|
+
fix: "Create an index or hub page for this directory that links to all child pages."
|
|
44
|
+
});
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
const siblingUrls = new Set(group.map((p) => p.url));
|
|
48
|
+
const indexCandidates = indexUrlsForCluster(clusterDir, group[0].url);
|
|
49
|
+
const hasIndex = indexCandidates.some((u) => knownUrls.has(u));
|
|
50
|
+
const linksToAllSiblings = (page) => {
|
|
51
|
+
const linked = new Set(page.resolvedHrefs.filter((u) => knownUrls.has(u) && siblingUrls.has(u)));
|
|
52
|
+
linked.add(page.url);
|
|
53
|
+
for (const s of siblingUrls) {
|
|
54
|
+
if (!linked.has(s)) {
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return true;
|
|
59
|
+
};
|
|
60
|
+
const hasHub = hasIndex || group.some((p) => linksToAllSiblings(p));
|
|
61
|
+
if (!hasHub) {
|
|
62
|
+
findings.push({
|
|
63
|
+
ruleId: "links/hub-pages",
|
|
64
|
+
severity: "warning",
|
|
65
|
+
message: `No hub/index page detected for cluster ${clusterDir} (${group.length} pages).`,
|
|
66
|
+
relatedUrls: Array.from(siblingUrls).sort(),
|
|
67
|
+
fix: "Create an index or hub page for this directory that links to all child pages."
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return findings;
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=hub-pages.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hub-pages.js","sourceRoot":"","sources":["../../../src/rules/links/hub-pages.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,MAAM,WAAW,GAAG,CAAC,YAAY,EAAE,WAAW,CAAU,CAAC;AAEzD,SAAS,mBAAmB,CAAC,UAAkB,EAAE,OAAe;IAC9D,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC;YACjC,OAAO,WAAW,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC;QAC7D,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IACD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;IAChD,MAAM,CAAC,GAAG,UAAU,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAC5C,OAAO,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC;AAClD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAC1B,KAAmB,EACnB,SAAsB,EACtB,WAAmB,EACnB,WAAmB;IAEnB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,GAAG,EAAwB,CAAC;IAClD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACb,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,SAAS,CAAC,OAAO,EAAE,EAAE,CAAC;QACtD,IAAI,KAAK,CAAC,MAAM,GAAG,WAAW,EAAE,CAAC;YAC/B,SAAS;QACX,CAAC;QAED,IAAI,KAAK,CAAC,MAAM,GAAG,WAAW,EAAE,CAAC;YAC/B,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,yBAAyB;gBACjC,QAAQ,EAAE,MAAM;gBAChB,OAAO,EAAE,uCAAuC,UAAU,KAAK,KAAK,CAAC,MAAM,gBAAgB,WAAW,IAAI;gBAC1G,WAAW,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE;gBAC3C,GAAG,EAAE,+EAA+E;aACrF,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACrD,MAAM,eAAe,GAAG,mBAAmB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACtE,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAE/D,MAAM,kBAAkB,GAAG,CAAC,IAAgB,EAAW,EAAE;YACvD,MAAM,MAAM,GAAG,IAAI,GAAG,CACpB,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CACzE,CAAC;YACF,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACrB,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;gBAC5B,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnB,OAAO,KAAK,CAAC;gBACf,CAAC;YACH,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC;QAEF,MAAM,MAAM,GAAG,QAAQ,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC;QAEpE,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,iBAAiB;gBACzB,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,0CAA0C,UAAU,KAAK,KAAK,CAAC,MAAM,UAAU;gBACxF,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE;gBAC3C,GAAG,EAAE,+EAA+E;aACrF,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
export declare function linkDepthRule(pages: ParsedPage[], adjacency: Map<string, Set<string>>, rootUrl: string, maxDepth: number, inbound: Map<string, number>): RuleResult[];
|
|
3
|
+
//# sourceMappingURL=link-depth.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"link-depth.d.ts","sourceRoot":"","sources":["../../../src/rules/links/link-depth.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,aAAa,CAC3B,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC,EACnC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,GAC3B,UAAU,EAAE,CAgDd"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
export function linkDepthRule(pages, adjacency, rootUrl, maxDepth, inbound) {
|
|
2
|
+
const queue = [{ url: rootUrl, depth: 0 }];
|
|
3
|
+
const visited = new Map([[rootUrl, 0]]);
|
|
4
|
+
while (queue.length > 0) {
|
|
5
|
+
const current = queue.shift();
|
|
6
|
+
if (!current) {
|
|
7
|
+
continue;
|
|
8
|
+
}
|
|
9
|
+
const neighbors = adjacency.get(current.url) ?? new Set();
|
|
10
|
+
for (const next of neighbors) {
|
|
11
|
+
const existing = visited.get(next);
|
|
12
|
+
const candidate = current.depth + 1;
|
|
13
|
+
if (existing !== undefined && existing <= candidate) {
|
|
14
|
+
continue;
|
|
15
|
+
}
|
|
16
|
+
visited.set(next, candidate);
|
|
17
|
+
queue.push({ url: next, depth: candidate });
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
const unreachable = pages
|
|
21
|
+
.filter((page) => page.url !== rootUrl)
|
|
22
|
+
.filter((page) => (inbound.get(page.url) ?? 0) > 0)
|
|
23
|
+
.filter((page) => visited.get(page.url) === undefined)
|
|
24
|
+
.map((page) => ({
|
|
25
|
+
ruleId: "links/unreachable-from-root",
|
|
26
|
+
severity: "warning",
|
|
27
|
+
message: `${page.url} is not reachable from the crawl root via internal links (but has inbound links).`,
|
|
28
|
+
pageUrl: page.url,
|
|
29
|
+
fix: "This page is unreachable from the site root. Add a navigation path to it."
|
|
30
|
+
}));
|
|
31
|
+
const deep = pages
|
|
32
|
+
.filter((page) => page.url !== rootUrl)
|
|
33
|
+
.filter((page) => {
|
|
34
|
+
const d = visited.get(page.url);
|
|
35
|
+
return d !== undefined && d > maxDepth;
|
|
36
|
+
})
|
|
37
|
+
.map((page) => ({
|
|
38
|
+
ruleId: "links/link-depth",
|
|
39
|
+
severity: "info",
|
|
40
|
+
message: `${page.url} is deeper than ${maxDepth} clicks from root.`,
|
|
41
|
+
pageUrl: page.url,
|
|
42
|
+
fix: "Reduce click depth by linking from a higher-level page."
|
|
43
|
+
}));
|
|
44
|
+
return [...unreachable, ...deep];
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=link-depth.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"link-depth.js","sourceRoot":"","sources":["../../../src/rules/links/link-depth.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,aAAa,CAC3B,KAAmB,EACnB,SAAmC,EACnC,OAAe,EACf,QAAgB,EAChB,OAA4B;IAE5B,MAAM,KAAK,GAA0C,CAAC,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;IAClF,MAAM,OAAO,GAAG,IAAI,GAAG,CAAiB,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAExD,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;QAC9B,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,SAAS;QACX,CAAC;QACD,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAU,CAAC;QAClE,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACnC,MAAM,SAAS,GAAG,OAAO,CAAC,KAAK,GAAG,CAAC,CAAC;YACpC,IAAI,QAAQ,KAAK,SAAS,IAAI,QAAQ,IAAI,SAAS,EAAE,CAAC;gBACpD,SAAS;YACX,CAAC;YACD,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAC7B,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,MAAM,WAAW,GAAG,KAAK;SACtB,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,KAAK,OAAO,CAAC;SACtC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;SAClD,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,SAAS,CAAC;SACrD,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACd,MAAM,EAAE,6BAAsC;QAC9C,QAAQ,EAAE,SAAkB;QAC5B,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,mFAAmF;QACvG,OAAO,EAAE,IAAI,CAAC,GAAG;QACjB,GAAG,EAAE,2EAA2E;KACjF,CAAC,CAAC,CAAC;IAEN,MAAM,IAAI,GAAG,KAAK;SACf,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,KAAK,OAAO,CAAC;SACtC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QACf,MAAM,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAChC,OAAO,CAAC,KAAK,SAAS,IAAI,CAAC,GAAG,QAAQ,CAAC;IACzC,CAAC,CAAC;SACD,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACd,MAAM,EAAE,kBAAkB;QAC1B,QAAQ,EAAE,MAAe;QACzB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,mBAAmB,QAAQ,oBAAoB;QACnE,OAAO,EAAE,IAAI,CAAC,GAAG;QACjB,GAAG,EAAE,yDAAyD;KAC/D,CAAC,CAAC,CAAC;IAEN,OAAO,CAAC,GAAG,WAAW,EAAE,GAAG,IAAI,CAAC,CAAC;AACnC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"orphan-pages.d.ts","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAC7B,KAAK,EAAE,UAAU,EAAE,EACnB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EACjC,OAAO,CAAC,EAAE,MAAM,GACf,UAAU,EAAE,CAmBd"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export function orphanPagesRule(pages, inboundLinks, rootUrl) {
|
|
2
|
+
const findings = [];
|
|
3
|
+
for (const page of pages) {
|
|
4
|
+
if (rootUrl && page.url === rootUrl) {
|
|
5
|
+
continue;
|
|
6
|
+
}
|
|
7
|
+
if ((inboundLinks.get(page.url) ?? 0) === 0) {
|
|
8
|
+
findings.push({
|
|
9
|
+
ruleId: "links/orphan-pages",
|
|
10
|
+
severity: "error",
|
|
11
|
+
message: `${page.url} has no inbound links from other pages in this crawl.`,
|
|
12
|
+
pageUrl: page.url,
|
|
13
|
+
fix: "Link to this page from a relevant hub or index page, and include it in your site navigation."
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
return findings;
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=orphan-pages.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"orphan-pages.js","sourceRoot":"","sources":["../../../src/rules/links/orphan-pages.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAC7B,KAAmB,EACnB,YAAiC,EACjC,OAAgB;IAEhB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,OAAO,IAAI,IAAI,CAAC,GAAG,KAAK,OAAO,EAAE,CAAC;YACpC,SAAS;QACX,CAAC;QACD,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,oBAAoB;gBAC5B,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;gBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,8FAA8F;aACpG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"consistency.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,qBAAqB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAiDvE"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
export function schemaConsistencyRule(pages) {
|
|
2
|
+
const findings = [];
|
|
3
|
+
const typesByPage = new Map();
|
|
4
|
+
for (const page of pages) {
|
|
5
|
+
const types = new Set();
|
|
6
|
+
for (const entry of page.jsonLd) {
|
|
7
|
+
if (typeof entry !== "object" || entry === null) {
|
|
8
|
+
continue;
|
|
9
|
+
}
|
|
10
|
+
const obj = entry;
|
|
11
|
+
if ("__parseError" in obj && obj.__parseError === true) {
|
|
12
|
+
continue;
|
|
13
|
+
}
|
|
14
|
+
if (typeof obj["@type"] === "string" && obj["@type"].trim() !== "") {
|
|
15
|
+
types.add(obj["@type"]);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
if (types.size > 0) {
|
|
19
|
+
typesByPage.set(page.url, types);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
if (typesByPage.size < 2) {
|
|
23
|
+
return findings;
|
|
24
|
+
}
|
|
25
|
+
const allTypes = new Set();
|
|
26
|
+
for (const types of typesByPage.values()) {
|
|
27
|
+
for (const t of types) {
|
|
28
|
+
allTypes.add(t);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
if (allTypes.size <= 1) {
|
|
32
|
+
return findings;
|
|
33
|
+
}
|
|
34
|
+
const typeList = Array.from(allTypes).sort().join(", ");
|
|
35
|
+
findings.push({
|
|
36
|
+
ruleId: "schema/consistency",
|
|
37
|
+
severity: "info",
|
|
38
|
+
message: `Pages use mixed schema types (${typeList}). Consider using a consistent @type across template pages.`,
|
|
39
|
+
relatedUrls: Array.from(typesByPage.keys()),
|
|
40
|
+
fix: `Use a consistent @type across all template pages, or separate pages into groups with different schema types.`
|
|
41
|
+
});
|
|
42
|
+
return findings;
|
|
43
|
+
}
|
|
44
|
+
//# sourceMappingURL=consistency.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"consistency.js","sourceRoot":"","sources":["../../../src/rules/schema/consistency.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,qBAAqB,CAAC,KAAmB;IACvD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAuB,CAAC;IAEnD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YACD,MAAM,GAAG,GAAG,KAAgC,CAAC;YAC7C,IAAI,cAAc,IAAI,GAAG,IAAI,GAAG,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;gBACvD,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACnE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,IAAI,KAAK,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YACnB,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACnC,CAAC;IACH,CAAC;IAED,IAAI,WAAW,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IACnC,KAAK,MAAM,KAAK,IAAI,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC;QACzC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;QACvB,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxD,QAAQ,CAAC,IAAI,CAAC;QACZ,MAAM,EAAE,oBAAoB;QAC5B,QAAQ,EAAE,MAAM;QAChB,OAAO,EAAE,iCAAiC,QAAQ,6DAA6D;QAC/G,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;QAC3C,GAAG,EAAE,8GAA8G;KACpH,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json-ld-valid.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAqDjE"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
export function jsonLdValidRule(pages) {
|
|
2
|
+
const findings = [];
|
|
3
|
+
for (const page of pages) {
|
|
4
|
+
for (const entry of page.jsonLd) {
|
|
5
|
+
if (typeof entry === "object" &&
|
|
6
|
+
entry !== null &&
|
|
7
|
+
"__parseError" in entry &&
|
|
8
|
+
entry.__parseError === true) {
|
|
9
|
+
findings.push({
|
|
10
|
+
ruleId: "schema/json-ld-valid",
|
|
11
|
+
severity: "error",
|
|
12
|
+
message: `${page.url} contains malformed JSON-LD that could not be parsed.`,
|
|
13
|
+
pageUrl: page.url,
|
|
14
|
+
fix: `Fix the JSON syntax in the <script type="application/ld+json"> block. Validate it at https://validator.schema.org/.`
|
|
15
|
+
});
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
if (typeof entry !== "object" || entry === null) {
|
|
19
|
+
continue;
|
|
20
|
+
}
|
|
21
|
+
const obj = entry;
|
|
22
|
+
if (!obj["@context"]) {
|
|
23
|
+
findings.push({
|
|
24
|
+
ruleId: "schema/json-ld-valid",
|
|
25
|
+
severity: "error",
|
|
26
|
+
message: `${page.url} has a JSON-LD block missing the required @context property.`,
|
|
27
|
+
pageUrl: page.url,
|
|
28
|
+
fix: `Add "@context": "https://schema.org" to the JSON-LD block.`
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
if (obj["@type"] !== undefined) {
|
|
32
|
+
const typeValue = obj["@type"];
|
|
33
|
+
if (typeof typeValue !== "string" || typeValue.trim() === "") {
|
|
34
|
+
findings.push({
|
|
35
|
+
ruleId: "schema/json-ld-valid",
|
|
36
|
+
severity: "error",
|
|
37
|
+
message: `${page.url} has a JSON-LD block with an invalid @type value.`,
|
|
38
|
+
pageUrl: page.url,
|
|
39
|
+
fix: `Set @type to a valid Schema.org type like "Article", "Product", or "FAQPage".`
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return findings;
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=json-ld-valid.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json-ld-valid.js","sourceRoot":"","sources":["../../../src/rules/schema/json-ld-valid.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IACE,OAAO,KAAK,KAAK,QAAQ;gBACzB,KAAK,KAAK,IAAI;gBACd,cAAc,IAAI,KAAK;gBACtB,KAAiC,CAAC,YAAY,KAAK,IAAI,EACxD,CAAC;gBACD,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;oBAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,qHAAqH;iBAC3H,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YAED,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;gBACrB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,OAAO;oBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8DAA8D;oBAClF,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,4DAA4D;iBAClE,CAAC,CAAC;YACL,CAAC;YAED,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC/B,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC/B,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;oBAC7D,QAAQ,CAAC,IAAI,CAAC;wBACZ,MAAM,EAAE,sBAAsB;wBAC9B,QAAQ,EAAE,OAAO;wBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,mDAAmD;wBACvE,OAAO,EAAE,IAAI,CAAC,GAAG;wBACjB,GAAG,EAAE,+EAA+E;qBACrF,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"required-fields.d.ts","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAqB7D,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAoDpE"}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
const REQUIRED_FIELDS = {
|
|
2
|
+
Article: ["headline", "author", "datePublished"],
|
|
3
|
+
Product: ["name"],
|
|
4
|
+
FAQPage: ["mainEntity"]
|
|
5
|
+
};
|
|
6
|
+
function hasPrice(obj) {
|
|
7
|
+
if (obj.price !== undefined && obj.price !== null && obj.price !== "") {
|
|
8
|
+
return true;
|
|
9
|
+
}
|
|
10
|
+
if (typeof obj.offers === "object" && obj.offers !== null) {
|
|
11
|
+
const offers = obj.offers;
|
|
12
|
+
if (offers.price !== undefined && offers.price !== null && offers.price !== "") {
|
|
13
|
+
return true;
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
return false;
|
|
17
|
+
}
|
|
18
|
+
export function requiredFieldsRule(pages) {
|
|
19
|
+
const findings = [];
|
|
20
|
+
for (const page of pages) {
|
|
21
|
+
for (const entry of page.jsonLd) {
|
|
22
|
+
if (typeof entry !== "object" || entry === null) {
|
|
23
|
+
continue;
|
|
24
|
+
}
|
|
25
|
+
const obj = entry;
|
|
26
|
+
if ("__parseError" in obj &&
|
|
27
|
+
obj.__parseError === true) {
|
|
28
|
+
continue;
|
|
29
|
+
}
|
|
30
|
+
const schemaType = typeof obj["@type"] === "string" ? obj["@type"] : null;
|
|
31
|
+
if (!schemaType) {
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
const required = REQUIRED_FIELDS[schemaType];
|
|
35
|
+
if (!required) {
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
const missing = [];
|
|
39
|
+
for (const field of required) {
|
|
40
|
+
if (obj[field] === undefined || obj[field] === null || obj[field] === "") {
|
|
41
|
+
missing.push(field);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
if (schemaType === "Product" && !hasPrice(obj)) {
|
|
45
|
+
missing.push("price");
|
|
46
|
+
}
|
|
47
|
+
if (missing.length > 0) {
|
|
48
|
+
findings.push({
|
|
49
|
+
ruleId: "schema/required-fields",
|
|
50
|
+
severity: "warning",
|
|
51
|
+
message: `${page.url} has a ${schemaType} schema missing required fields: ${missing.join(", ")}.`,
|
|
52
|
+
pageUrl: page.url,
|
|
53
|
+
fix: `Add the missing fields to your ${schemaType} schema: ${missing.join(", ")}.`
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return findings;
|
|
59
|
+
}
|
|
60
|
+
//# sourceMappingURL=required-fields.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"required-fields.js","sourceRoot":"","sources":["../../../src/rules/schema/required-fields.ts"],"names":[],"mappings":"AAEA,MAAM,eAAe,GAA6B;IAChD,OAAO,EAAE,CAAC,UAAU,EAAE,QAAQ,EAAE,eAAe,CAAC;IAChD,OAAO,EAAE,CAAC,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,YAAY,CAAC;CACxB,CAAC;AAEF,SAAS,QAAQ,CAAC,GAA4B;IAC5C,IAAI,GAAG,CAAC,KAAK,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;QACtE,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,MAAM,GAAG,GAAG,CAAC,MAAiC,CAAC;QACrD,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,KAAK,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE,CAAC;YAC/E,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAmB;IACpD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBAChD,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,KAAgC,CAAC;YAE7C,IACE,cAAc,IAAI,GAAG;gBACpB,GAA+B,CAAC,YAAY,KAAK,IAAI,EACtD,CAAC;gBACD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC1E,IAAI,CAAC,UAAU,EAAE,CAAC;gBAChB,SAAS;YACX,CAAC;YAED,MAAM,QAAQ,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;YAC7C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,SAAS;YACX,CAAC;YAED,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;gBAC7B,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,EAAE,EAAE,CAAC;oBACzE,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACtB,CAAC;YACH,CAAC;YAED,IAAI,UAAU,KAAK,SAAS,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC/C,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;YAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,wBAAwB;oBAChC,QAAQ,EAAE,SAAS;oBACnB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,UAAU,UAAU,oCAAoC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;oBACjG,OAAO,EAAE,IAAI,CAAC,GAAG;oBACjB,GAAG,EAAE,kCAAkC,UAAU,YAAY,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;iBACnF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"boilerplate-ratio.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAS7D,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,QAAQ,EAAE,MAAM,GAAG,UAAU,EAAE,CAoDxF"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
function extractTextBlocks(contentText) {
|
|
2
|
+
return contentText
|
|
3
|
+
.split(/[.!?]\s+|\n+/)
|
|
4
|
+
.map((block) => block.trim().toLowerCase())
|
|
5
|
+
.filter((block) => block.length > 20);
|
|
6
|
+
}
|
|
7
|
+
export function boilerplateRatioRule(pages, maxRatio) {
|
|
8
|
+
if (pages.length < 2) {
|
|
9
|
+
return [];
|
|
10
|
+
}
|
|
11
|
+
const pageBlocks = pages.map((page) => extractTextBlocks(page.contentText));
|
|
12
|
+
const blockFrequency = new Map();
|
|
13
|
+
for (const blocks of pageBlocks) {
|
|
14
|
+
const unique = new Set(blocks);
|
|
15
|
+
for (const block of unique) {
|
|
16
|
+
blockFrequency.set(block, (blockFrequency.get(block) ?? 0) + 1);
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
const skeletonCutoff = Math.max(2, Math.floor(pages.length * 0.8) + 1);
|
|
20
|
+
const skeleton = new Set(Array.from(blockFrequency.entries())
|
|
21
|
+
.filter(([, count]) => count >= skeletonCutoff)
|
|
22
|
+
.map(([block]) => block));
|
|
23
|
+
if (skeleton.size === 0) {
|
|
24
|
+
return [];
|
|
25
|
+
}
|
|
26
|
+
const findings = [];
|
|
27
|
+
pages.forEach((page, index) => {
|
|
28
|
+
const blocks = pageBlocks[index];
|
|
29
|
+
if (blocks.length === 0) {
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
const totalWords = blocks.reduce((sum, b) => sum + b.split(/\s+/).length, 0);
|
|
33
|
+
const boilerplateWords = blocks
|
|
34
|
+
.filter((b) => skeleton.has(b))
|
|
35
|
+
.reduce((sum, b) => sum + b.split(/\s+/).length, 0);
|
|
36
|
+
if (totalWords === 0)
|
|
37
|
+
return;
|
|
38
|
+
const ratio = boilerplateWords / totalWords;
|
|
39
|
+
if (ratio > maxRatio) {
|
|
40
|
+
findings.push({
|
|
41
|
+
ruleId: "spam/boilerplate-ratio",
|
|
42
|
+
severity: "error",
|
|
43
|
+
message: `${page.url} has boilerplate ratio ${(ratio * 100).toFixed(1)}% (max ${(maxRatio * 100).toFixed(1)}%).`,
|
|
44
|
+
fix: `${(ratio * 100).toFixed(1)}% of this page's content is shared template text. Reduce repeated boilerplate sections or add unique content blocks—introductions, case studies, or page-specific data—to bring the ratio below ${(maxRatio * 100).toFixed(1)}%.`
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
return findings;
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=boilerplate-ratio.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"boilerplate-ratio.js","sourceRoot":"","sources":["../../../src/rules/spam/boilerplate-ratio.ts"],"names":[],"mappings":"AAEA,SAAS,iBAAiB,CAAC,WAAmB;IAC5C,OAAO,WAAW;SACf,KAAK,CAAC,cAAc,CAAC;SACrB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;SAC1C,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAAmB,EAAE,QAAgB;IACxE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAE5E,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IACjD,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IACvE,MAAM,QAAQ,GAAG,IAAI,GAAG,CACtB,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,CAAC;SACjC,MAAM,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,KAAK,IAAI,cAAc,CAAC;SAC9C,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAC3B,CAAC;IAEF,IAAI,QAAQ,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAC5B,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;QACjC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO;QACT,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC7E,MAAM,gBAAgB,GAAG,MAAM;aAC5B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;aAC9B,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAEtD,IAAI,UAAU,KAAK,CAAC;YAAE,OAAO;QAE7B,MAAM,KAAK,GAAG,gBAAgB,GAAG,UAAU,CAAC;QAC5C,IAAI,KAAK,GAAG,QAAQ,EAAE,CAAC;YACrB,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,wBAAwB;gBAChC,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,0BAA0B,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;gBAChH,GAAG,EAAE,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,mMAAmM,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;aACnQ,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
import type { PairMatch } from "./near-duplicate.js";
|
|
3
|
+
export declare function doorwayPatternRule(nearDuplicatePairs: PairMatch[], entitySwapPairs: PairMatch[], thinContentUrls: Set<string>, pages?: ParsedPage[]): RuleResult[];
|
|
4
|
+
//# sourceMappingURL=doorway-pattern.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"doorway-pattern.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/doorway-pattern.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC7D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAMrD,wBAAgB,kBAAkB,CAChC,kBAAkB,EAAE,SAAS,EAAE,EAC/B,eAAe,EAAE,SAAS,EAAE,EAC5B,eAAe,EAAE,GAAG,CAAC,MAAM,CAAC,EAC5B,KAAK,CAAC,EAAE,UAAU,EAAE,GACnB,UAAU,EAAE,CAoDd"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
function pairKey(left, right) {
|
|
2
|
+
return left < right ? `${left}\0${right}` : `${right}\0${left}`;
|
|
3
|
+
}
|
|
4
|
+
export function doorwayPatternRule(nearDuplicatePairs, entitySwapPairs, thinContentUrls, pages) {
|
|
5
|
+
const entitySet = new Set(entitySwapPairs.map((pair) => pairKey(pair.leftUrl, pair.rightUrl)));
|
|
6
|
+
const findings = [];
|
|
7
|
+
const pageMap = new Map();
|
|
8
|
+
if (pages) {
|
|
9
|
+
for (const p of pages) {
|
|
10
|
+
pageMap.set(p.url, p);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
for (const pair of nearDuplicatePairs) {
|
|
14
|
+
const key = pairKey(pair.leftUrl, pair.rightUrl);
|
|
15
|
+
if (!entitySet.has(key)) {
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
const left = pair.leftUrl < pair.rightUrl ? pair.leftUrl : pair.rightUrl;
|
|
19
|
+
const right = pair.leftUrl < pair.rightUrl ? pair.rightUrl : pair.leftUrl;
|
|
20
|
+
const signals = ["near-duplicate", "entity-swap"];
|
|
21
|
+
const isThin = thinContentUrls.has(left) || thinContentUrls.has(right);
|
|
22
|
+
if (isThin) {
|
|
23
|
+
signals.push("thin-content");
|
|
24
|
+
}
|
|
25
|
+
const leftPage = pageMap.get(left);
|
|
26
|
+
const rightPage = pageMap.get(right);
|
|
27
|
+
if (leftPage && rightPage && leftPage.structureSignature === rightPage.structureSignature) {
|
|
28
|
+
signals.push("identical-structure");
|
|
29
|
+
}
|
|
30
|
+
if (leftPage && rightPage && leftPage.metaDescription && rightPage.metaDescription &&
|
|
31
|
+
leftPage.metaDescription === rightPage.metaDescription) {
|
|
32
|
+
signals.push("identical-meta");
|
|
33
|
+
}
|
|
34
|
+
if (signals.length < 3) {
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
findings.push({
|
|
38
|
+
ruleId: "spam/doorway-pattern",
|
|
39
|
+
severity: "critical",
|
|
40
|
+
message: `${left} and ${right} match doorway-pattern signals (${signals.join(" + ")}).`,
|
|
41
|
+
relatedUrls: [left, right],
|
|
42
|
+
fix: "This page matches multiple spam signals. Prioritize adding unique, substantive content and differentiating the page structure."
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
return findings;
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=doorway-pattern.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"doorway-pattern.js","sourceRoot":"","sources":["../../../src/rules/spam/doorway-pattern.ts"],"names":[],"mappings":"AAGA,SAAS,OAAO,CAAC,IAAY,EAAE,KAAa;IAC1C,OAAO,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,KAAK,KAAK,EAAE,CAAC,CAAC,CAAC,GAAG,KAAK,KAAK,IAAI,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,UAAU,kBAAkB,CAChC,kBAA+B,EAC/B,eAA4B,EAC5B,eAA4B,EAC5B,KAAoB;IAEpB,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;IAC/F,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAsB,CAAC;IAC9C,IAAI,KAAK,EAAE,CAAC;QACV,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,kBAAkB,EAAE,CAAC;QACtC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QACjD,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,SAAS;QACX,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;QACzE,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;QAC1E,MAAM,OAAO,GAAa,CAAC,gBAAgB,EAAE,aAAa,CAAC,CAAC;QAE5D,MAAM,MAAM,GAAG,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QACvE,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC/B,CAAC;QAED,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAErC,IAAI,QAAQ,IAAI,SAAS,IAAI,QAAQ,CAAC,kBAAkB,KAAK,SAAS,CAAC,kBAAkB,EAAE,CAAC;YAC1F,OAAO,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QACtC,CAAC;QAED,IAAI,QAAQ,IAAI,SAAS,IAAI,QAAQ,CAAC,eAAe,IAAI,SAAS,CAAC,eAAe;YAC9E,QAAQ,CAAC,eAAe,KAAK,SAAS,CAAC,eAAe,EAAE,CAAC;YAC3D,OAAO,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QACjC,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QAED,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,sBAAsB;YAC9B,QAAQ,EAAE,UAAU;YACpB,OAAO,EAAE,GAAG,IAAI,QAAQ,KAAK,mCAAmC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI;YACvF,WAAW,EAAE,CAAC,IAAI,EAAE,KAAK,CAAC;YAC1B,GAAG,EAAE,gIAAgI;SACtI,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { EntityMaskPattern, ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
import type { PairMatch } from "./near-duplicate.js";
|
|
3
|
+
export declare function entitySwapRule(pages: ParsedPage[], patterns: EntityMaskPattern[], threshold: number): {
|
|
4
|
+
findings: RuleResult[];
|
|
5
|
+
pairs: PairMatch[];
|
|
6
|
+
};
|
|
7
|
+
//# sourceMappingURL=entity-swap.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"entity-swap.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAChF,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAErD,wBAAgB,cAAc,CAC5B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,iBAAiB,EAAE,EAC7B,SAAS,EAAE,MAAM,GAChB;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,KAAK,EAAE,SAAS,EAAE,CAAA;CAAE,CAwBhD"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { maskEntities } from "../../algorithms/entity-mask.js";
|
|
2
|
+
import { hammingDistance, simHashFromText, similarityFromDistance } from "../../algorithms/simhash.js";
|
|
3
|
+
export function entitySwapRule(pages, patterns, threshold) {
|
|
4
|
+
const findings = [];
|
|
5
|
+
const pairs = [];
|
|
6
|
+
const hashes = pages.map((page) => simHashFromText(maskEntities(page.contentText, patterns)));
|
|
7
|
+
for (let i = 0; i < pages.length; i += 1) {
|
|
8
|
+
for (let j = i + 1; j < pages.length; j += 1) {
|
|
9
|
+
const similarity = similarityFromDistance(hammingDistance(hashes[i], hashes[j]));
|
|
10
|
+
if (similarity >= threshold) {
|
|
11
|
+
pairs.push({ leftUrl: pages[i].url, rightUrl: pages[j].url, similarity });
|
|
12
|
+
findings.push({
|
|
13
|
+
ruleId: "spam/entity-swap",
|
|
14
|
+
severity: "critical",
|
|
15
|
+
message: `${pages[i].url} and ${pages[j].url} look structurally identical after entity masking.`,
|
|
16
|
+
pageUrl: pages[i].url,
|
|
17
|
+
relatedUrls: [pages[j].url],
|
|
18
|
+
similarity,
|
|
19
|
+
fix: "These pages are identical after masking entity names. Add entity-specific content: local regulations, statistics, fees, or requirements unique to each entity."
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return { findings, pairs };
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=entity-swap.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"entity-swap.js","sourceRoot":"","sources":["../../../src/rules/spam/entity-swap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAC/D,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AAIvG,MAAM,UAAU,cAAc,CAC5B,KAAmB,EACnB,QAA6B,EAC7B,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;IAE9F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;gBAC1E,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,kBAAkB;oBAC1B,QAAQ,EAAE,UAAU;oBACpB,OAAO,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,oDAAoD;oBAChG,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;oBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;oBAC3B,UAAU;oBACV,GAAG,EAAE,gKAAgK;iBACtK,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC7B,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
export interface PairMatch {
|
|
3
|
+
leftUrl: string;
|
|
4
|
+
rightUrl: string;
|
|
5
|
+
similarity: number;
|
|
6
|
+
}
|
|
7
|
+
export declare function nearDuplicateRule(pages: ParsedPage[], threshold: number): {
|
|
8
|
+
findings: RuleResult[];
|
|
9
|
+
pairs: PairMatch[];
|
|
10
|
+
};
|
|
11
|
+
//# sourceMappingURL=near-duplicate.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"near-duplicate.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/near-duplicate.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,iBAAiB,CAC/B,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,MAAM,GAChB;IAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;IAAC,KAAK,EAAE,SAAS,EAAE,CAAA;CAAE,CAwBhD"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { hammingDistance, simHashFromText, similarityFromDistance } from "../../algorithms/simhash.js";
|
|
2
|
+
export function nearDuplicateRule(pages, threshold) {
|
|
3
|
+
const findings = [];
|
|
4
|
+
const pairs = [];
|
|
5
|
+
const hashes = pages.map((page) => simHashFromText(page.contentText));
|
|
6
|
+
for (let i = 0; i < pages.length; i += 1) {
|
|
7
|
+
for (let j = i + 1; j < pages.length; j += 1) {
|
|
8
|
+
const similarity = similarityFromDistance(hammingDistance(hashes[i], hashes[j]));
|
|
9
|
+
if (similarity >= threshold) {
|
|
10
|
+
pairs.push({ leftUrl: pages[i].url, rightUrl: pages[j].url, similarity });
|
|
11
|
+
findings.push({
|
|
12
|
+
ruleId: "spam/near-duplicate",
|
|
13
|
+
severity: "critical",
|
|
14
|
+
message: `${pages[i].url} and ${pages[j].url} are near-duplicates (${(similarity * 100).toFixed(1)}% similar).`,
|
|
15
|
+
pageUrl: pages[i].url,
|
|
16
|
+
relatedUrls: [pages[j].url],
|
|
17
|
+
similarity,
|
|
18
|
+
fix: "Differentiate these pages with unique content. Add page-specific details, data, examples, or analysis that the other page doesn't have."
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return { findings, pairs };
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=near-duplicate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"near-duplicate.js","sourceRoot":"","sources":["../../../src/rules/spam/near-duplicate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,sBAAsB,EAAE,MAAM,6BAA6B,CAAC;AASvG,MAAM,UAAU,iBAAiB,CAC/B,KAAmB,EACnB,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAEtE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,sBAAsB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;gBAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,UAAU,EAAE,CAAC,CAAC;gBAC1E,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,qBAAqB;oBAC7B,QAAQ,EAAE,UAAU;oBACpB,OAAO,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,yBAAyB,CAAC,UAAU,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,aAAa;oBAC/G,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;oBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;oBAC3B,UAAU;oBACV,GAAG,EAAE,yIAAyI;iBAC/I,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC7B,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"publication-velocity.d.ts","sourceRoot":"","sources":["../../../src/rules/spam/publication-velocity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,UAAU,EAAE,CAyB5F"}
|