@pseolint/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +53 -0
- package/dist/algorithms/entity-mask.d.ts +3 -0
- package/dist/algorithms/entity-mask.d.ts.map +1 -0
- package/dist/algorithms/entity-mask.js +8 -0
- package/dist/algorithms/entity-mask.js.map +1 -0
- package/dist/algorithms/entity-mask.test.d.ts +2 -0
- package/dist/algorithms/entity-mask.test.d.ts.map +1 -0
- package/dist/algorithms/entity-mask.test.js +23 -0
- package/dist/algorithms/entity-mask.test.js.map +1 -0
- package/dist/algorithms/simhash.d.ts +4 -0
- package/dist/algorithms/simhash.d.ts.map +1 -0
- package/dist/algorithms/simhash.js +64 -0
- package/dist/algorithms/simhash.js.map +1 -0
- package/dist/algorithms/simhash.test.d.ts +2 -0
- package/dist/algorithms/simhash.test.d.ts.map +1 -0
- package/dist/algorithms/simhash.test.js +23 -0
- package/dist/algorithms/simhash.test.js.map +1 -0
- package/dist/algorithms/tf-idf.d.ts +8 -0
- package/dist/algorithms/tf-idf.d.ts.map +1 -0
- package/dist/algorithms/tf-idf.js +55 -0
- package/dist/algorithms/tf-idf.js.map +1 -0
- package/dist/auditor.d.ts +3 -0
- package/dist/auditor.d.ts.map +1 -0
- package/dist/auditor.js +730 -0
- package/dist/auditor.js.map +1 -0
- package/dist/auditor.test.d.ts +2 -0
- package/dist/auditor.test.d.ts.map +1 -0
- package/dist/auditor.test.js +134 -0
- package/dist/auditor.test.js.map +1 -0
- package/dist/enrich-findings.d.ts +9 -0
- package/dist/enrich-findings.d.ts.map +1 -0
- package/dist/enrich-findings.js +436 -0
- package/dist/enrich-findings.js.map +1 -0
- package/dist/formatters/console.d.ts +6 -0
- package/dist/formatters/console.d.ts.map +1 -0
- package/dist/formatters/console.js +237 -0
- package/dist/formatters/console.js.map +1 -0
- package/dist/formatters/html.d.ts +3 -0
- package/dist/formatters/html.d.ts.map +1 -0
- package/dist/formatters/html.js +170 -0
- package/dist/formatters/html.js.map +1 -0
- package/dist/formatters/index.d.ts +6 -0
- package/dist/formatters/index.d.ts.map +1 -0
- package/dist/formatters/index.js +5 -0
- package/dist/formatters/index.js.map +1 -0
- package/dist/formatters/json.d.ts +3 -0
- package/dist/formatters/json.d.ts.map +1 -0
- package/dist/formatters/json.js +4 -0
- package/dist/formatters/json.js.map +1 -0
- package/dist/formatters/markdown.d.ts +3 -0
- package/dist/formatters/markdown.d.ts.map +1 -0
- package/dist/formatters/markdown.js +93 -0
- package/dist/formatters/markdown.js.map +1 -0
- package/dist/index.d.ts +45 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +45 -0
- package/dist/index.js.map +1 -0
- package/dist/page-classifier.d.ts +4 -0
- package/dist/page-classifier.d.ts.map +1 -0
- package/dist/page-classifier.js +133 -0
- package/dist/page-classifier.js.map +1 -0
- package/dist/parser.d.ts +3 -0
- package/dist/parser.d.ts.map +1 -0
- package/dist/parser.js +131 -0
- package/dist/parser.js.map +1 -0
- package/dist/parser.test.d.ts +2 -0
- package/dist/parser.test.d.ts.map +1 -0
- package/dist/parser.test.js +37 -0
- package/dist/parser.test.js.map +1 -0
- package/dist/renderer.d.ts +15 -0
- package/dist/renderer.d.ts.map +1 -0
- package/dist/renderer.js +124 -0
- package/dist/renderer.js.map +1 -0
- package/dist/rule-references.d.ts +2 -0
- package/dist/rule-references.d.ts.map +1 -0
- package/dist/rule-references.js +35 -0
- package/dist/rule-references.js.map +1 -0
- package/dist/rules/cannibal/keyword-collision.d.ts +3 -0
- package/dist/rules/cannibal/keyword-collision.d.ts.map +1 -0
- package/dist/rules/cannibal/keyword-collision.js +25 -0
- package/dist/rules/cannibal/keyword-collision.js.map +1 -0
- package/dist/rules/cannibal/title-overlap.d.ts +3 -0
- package/dist/rules/cannibal/title-overlap.d.ts.map +1 -0
- package/dist/rules/cannibal/title-overlap.js +43 -0
- package/dist/rules/cannibal/title-overlap.js.map +1 -0
- package/dist/rules/cannibal/url-pattern.d.ts +3 -0
- package/dist/rules/cannibal/url-pattern.d.ts.map +1 -0
- package/dist/rules/cannibal/url-pattern.js +48 -0
- package/dist/rules/cannibal/url-pattern.js.map +1 -0
- package/dist/rules/content/eeat-signals.d.ts +3 -0
- package/dist/rules/content/eeat-signals.d.ts.map +1 -0
- package/dist/rules/content/eeat-signals.js +46 -0
- package/dist/rules/content/eeat-signals.js.map +1 -0
- package/dist/rules/content/heading-uniqueness.d.ts +3 -0
- package/dist/rules/content/heading-uniqueness.d.ts.map +1 -0
- package/dist/rules/content/heading-uniqueness.js +56 -0
- package/dist/rules/content/heading-uniqueness.js.map +1 -0
- package/dist/rules/content/meta-uniqueness.d.ts +3 -0
- package/dist/rules/content/meta-uniqueness.d.ts.map +1 -0
- package/dist/rules/content/meta-uniqueness.js +28 -0
- package/dist/rules/content/meta-uniqueness.js.map +1 -0
- package/dist/rules/content/missing-author.d.ts +3 -0
- package/dist/rules/content/missing-author.d.ts.map +1 -0
- package/dist/rules/content/missing-author.js +26 -0
- package/dist/rules/content/missing-author.js.map +1 -0
- package/dist/rules/content/unique-value.d.ts +3 -0
- package/dist/rules/content/unique-value.d.ts.map +1 -0
- package/dist/rules/content/unique-value.js +26 -0
- package/dist/rules/content/unique-value.js.map +1 -0
- package/dist/rules/links/cluster-connectivity.d.ts +7 -0
- package/dist/rules/links/cluster-connectivity.d.ts.map +1 -0
- package/dist/rules/links/cluster-connectivity.js +73 -0
- package/dist/rules/links/cluster-connectivity.js.map +1 -0
- package/dist/rules/links/cluster-key.d.ts +3 -0
- package/dist/rules/links/cluster-key.d.ts.map +1 -0
- package/dist/rules/links/cluster-key.js +22 -0
- package/dist/rules/links/cluster-key.js.map +1 -0
- package/dist/rules/links/dead-ends.d.ts +3 -0
- package/dist/rules/links/dead-ends.d.ts.map +1 -0
- package/dist/rules/links/dead-ends.js +13 -0
- package/dist/rules/links/dead-ends.js.map +1 -0
- package/dist/rules/links/hub-pages.d.ts +7 -0
- package/dist/rules/links/hub-pages.d.ts.map +1 -0
- package/dist/rules/links/hub-pages.js +73 -0
- package/dist/rules/links/hub-pages.js.map +1 -0
- package/dist/rules/links/link-depth.d.ts +3 -0
- package/dist/rules/links/link-depth.d.ts.map +1 -0
- package/dist/rules/links/link-depth.js +46 -0
- package/dist/rules/links/link-depth.js.map +1 -0
- package/dist/rules/links/orphan-pages.d.ts +3 -0
- package/dist/rules/links/orphan-pages.d.ts.map +1 -0
- package/dist/rules/links/orphan-pages.js +19 -0
- package/dist/rules/links/orphan-pages.js.map +1 -0
- package/dist/rules/schema/consistency.d.ts +3 -0
- package/dist/rules/schema/consistency.d.ts.map +1 -0
- package/dist/rules/schema/consistency.js +44 -0
- package/dist/rules/schema/consistency.js.map +1 -0
- package/dist/rules/schema/json-ld-valid.d.ts +3 -0
- package/dist/rules/schema/json-ld-valid.d.ts.map +1 -0
- package/dist/rules/schema/json-ld-valid.js +47 -0
- package/dist/rules/schema/json-ld-valid.js.map +1 -0
- package/dist/rules/schema/required-fields.d.ts +3 -0
- package/dist/rules/schema/required-fields.d.ts.map +1 -0
- package/dist/rules/schema/required-fields.js +60 -0
- package/dist/rules/schema/required-fields.js.map +1 -0
- package/dist/rules/spam/boilerplate-ratio.d.ts +3 -0
- package/dist/rules/spam/boilerplate-ratio.d.ts.map +1 -0
- package/dist/rules/spam/boilerplate-ratio.js +50 -0
- package/dist/rules/spam/boilerplate-ratio.js.map +1 -0
- package/dist/rules/spam/doorway-pattern.d.ts +4 -0
- package/dist/rules/spam/doorway-pattern.d.ts.map +1 -0
- package/dist/rules/spam/doorway-pattern.js +47 -0
- package/dist/rules/spam/doorway-pattern.js.map +1 -0
- package/dist/rules/spam/entity-swap.d.ts +7 -0
- package/dist/rules/spam/entity-swap.d.ts.map +1 -0
- package/dist/rules/spam/entity-swap.js +26 -0
- package/dist/rules/spam/entity-swap.js.map +1 -0
- package/dist/rules/spam/near-duplicate.d.ts +11 -0
- package/dist/rules/spam/near-duplicate.d.ts.map +1 -0
- package/dist/rules/spam/near-duplicate.js +25 -0
- package/dist/rules/spam/near-duplicate.js.map +1 -0
- package/dist/rules/spam/publication-velocity.d.ts +3 -0
- package/dist/rules/spam/publication-velocity.d.ts.map +1 -0
- package/dist/rules/spam/publication-velocity.js +25 -0
- package/dist/rules/spam/publication-velocity.js.map +1 -0
- package/dist/rules/spam/template-coverage.d.ts +3 -0
- package/dist/rules/spam/template-coverage.d.ts.map +1 -0
- package/dist/rules/spam/template-coverage.js +87 -0
- package/dist/rules/spam/template-coverage.js.map +1 -0
- package/dist/rules/spam/template-diversity.d.ts +3 -0
- package/dist/rules/spam/template-diversity.d.ts.map +1 -0
- package/dist/rules/spam/template-diversity.js +19 -0
- package/dist/rules/spam/template-diversity.js.map +1 -0
- package/dist/rules/spam/thin-content.d.ts +6 -0
- package/dist/rules/spam/thin-content.d.ts.map +1 -0
- package/dist/rules/spam/thin-content.js +22 -0
- package/dist/rules/spam/thin-content.js.map +1 -0
- package/dist/rules/tech/canonical-consistency.d.ts +4 -0
- package/dist/rules/tech/canonical-consistency.d.ts.map +1 -0
- package/dist/rules/tech/canonical-consistency.js +78 -0
- package/dist/rules/tech/canonical-consistency.js.map +1 -0
- package/dist/rules/tech/canonical-noindex-conflict.d.ts +3 -0
- package/dist/rules/tech/canonical-noindex-conflict.d.ts.map +1 -0
- package/dist/rules/tech/canonical-noindex-conflict.js +27 -0
- package/dist/rules/tech/canonical-noindex-conflict.js.map +1 -0
- package/dist/rules/tech/hreflang-consistency.d.ts +3 -0
- package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -0
- package/dist/rules/tech/hreflang-consistency.js +99 -0
- package/dist/rules/tech/hreflang-consistency.js.map +1 -0
- package/dist/rules/tech/og-completeness.d.ts +3 -0
- package/dist/rules/tech/og-completeness.d.ts.map +1 -0
- package/dist/rules/tech/og-completeness.js +35 -0
- package/dist/rules/tech/og-completeness.js.map +1 -0
- package/dist/rules/tech/redirect-chain.d.ts +3 -0
- package/dist/rules/tech/redirect-chain.d.ts.map +1 -0
- package/dist/rules/tech/redirect-chain.js +20 -0
- package/dist/rules/tech/redirect-chain.js.map +1 -0
- package/dist/rules/tech/robots-noindex-conflict.d.ts +3 -0
- package/dist/rules/tech/robots-noindex-conflict.d.ts.map +1 -0
- package/dist/rules/tech/robots-noindex-conflict.js +30 -0
- package/dist/rules/tech/robots-noindex-conflict.js.map +1 -0
- package/dist/rules/tech/robots-sitemap-presence.d.ts +3 -0
- package/dist/rules/tech/robots-sitemap-presence.d.ts.map +1 -0
- package/dist/rules/tech/robots-sitemap-presence.js +61 -0
- package/dist/rules/tech/robots-sitemap-presence.js.map +1 -0
- package/dist/rules/tech/sitemap-completeness.d.ts +3 -0
- package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -0
- package/dist/rules/tech/sitemap-completeness.js +40 -0
- package/dist/rules/tech/sitemap-completeness.js.map +1 -0
- package/dist/rules/tech/soft-404.d.ts +3 -0
- package/dist/rules/tech/soft-404.d.ts.map +1 -0
- package/dist/rules/tech/soft-404.js +24 -0
- package/dist/rules/tech/soft-404.js.map +1 -0
- package/dist/types.d.ts +170 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/url-normalize.d.ts +10 -0
- package/dist/url-normalize.d.ts.map +1 -0
- package/dist/url-normalize.js +52 -0
- package/dist/url-normalize.js.map +1 -0
- package/package.json +46 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { buildCorpus, extractKeywords } from "../../algorithms/tf-idf.js";
|
|
2
|
+
export function keywordCollisionRule(pages, minShared = 6) {
|
|
3
|
+
const findings = [];
|
|
4
|
+
const corpus = buildCorpus(pages.map((page) => page.contentText));
|
|
5
|
+
const keywordsPerPage = pages.map((page) => extractKeywords(page.contentText, corpus, 10));
|
|
6
|
+
for (let i = 0; i < pages.length; i += 1) {
|
|
7
|
+
for (let j = i + 1; j < pages.length; j += 1) {
|
|
8
|
+
const setA = new Set(keywordsPerPage[i]);
|
|
9
|
+
const shared = keywordsPerPage[j].filter((kw) => setA.has(kw));
|
|
10
|
+
if (shared.length >= minShared) {
|
|
11
|
+
findings.push({
|
|
12
|
+
ruleId: "cannibal/keyword-collision",
|
|
13
|
+
severity: "warning",
|
|
14
|
+
message: `${pages[i].url} and ${pages[j].url} share ${shared.length} of their top 10 keywords: ${shared.join(", ")}.`,
|
|
15
|
+
pageUrl: pages[i].url,
|
|
16
|
+
relatedUrls: [pages[j].url],
|
|
17
|
+
similarity: shared.length / 10,
|
|
18
|
+
fix: `These pages target the same keywords. Consolidate them into one page or differentiate their content focus.`
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return findings;
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=keyword-collision.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"keyword-collision.js","sourceRoot":"","sources":["../../../src/rules/cannibal/keyword-collision.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAG1E,MAAM,UAAU,oBAAoB,CAClC,KAAmB,EACnB,YAAoB,CAAC;IAErB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAClE,MAAM,eAAe,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,WAAW,EAAE,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC;IAE3F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC;YACzC,MAAM,MAAM,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;YAC/D,IAAI,MAAM,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;gBAC/B,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,4BAA4B;oBACpC,QAAQ,EAAE,SAAS;oBACnB,OAAO,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,UAAU,MAAM,CAAC,MAAM,8BAA8B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;oBACrH,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;oBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;oBAC3B,UAAU,EAAE,MAAM,CAAC,MAAM,GAAG,EAAE;oBAC9B,GAAG,EAAE,4GAA4G;iBAClH,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"title-overlap.d.ts","sourceRoot":"","sources":["../../../src/rules/cannibal/title-overlap.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AA0BhF,wBAAgB,gBAAgB,CAC9B,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,iBAAiB,EAAE,EAC7B,SAAS,EAAE,MAAM,GAChB,UAAU,EAAE,CAsBd"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { maskEntities } from "../../algorithms/entity-mask.js";
|
|
2
|
+
function tokenize(text) {
|
|
3
|
+
return new Set(text
|
|
4
|
+
.toLowerCase()
|
|
5
|
+
.replace(/[^\p{L}\p{N}\s]+/gu, " ")
|
|
6
|
+
.split(/\s+/)
|
|
7
|
+
.filter(Boolean));
|
|
8
|
+
}
|
|
9
|
+
function jaccardSimilarity(a, b) {
|
|
10
|
+
if (a.size === 0 && b.size === 0) {
|
|
11
|
+
return 0;
|
|
12
|
+
}
|
|
13
|
+
let intersection = 0;
|
|
14
|
+
for (const token of a) {
|
|
15
|
+
if (b.has(token)) {
|
|
16
|
+
intersection += 1;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
const union = a.size + b.size - intersection;
|
|
20
|
+
return union === 0 ? 0 : intersection / union;
|
|
21
|
+
}
|
|
22
|
+
export function titleOverlapRule(pages, patterns, threshold) {
|
|
23
|
+
const findings = [];
|
|
24
|
+
const maskedTokens = pages.map((page) => tokenize(maskEntities(page.title, patterns)));
|
|
25
|
+
for (let i = 0; i < pages.length; i += 1) {
|
|
26
|
+
for (let j = i + 1; j < pages.length; j += 1) {
|
|
27
|
+
const similarity = jaccardSimilarity(maskedTokens[i], maskedTokens[j]);
|
|
28
|
+
if (similarity > threshold) {
|
|
29
|
+
findings.push({
|
|
30
|
+
ruleId: "cannibal/title-overlap",
|
|
31
|
+
severity: "warning",
|
|
32
|
+
message: `${pages[i].url} and ${pages[j].url} have overlapping titles after entity masking (${(similarity * 100).toFixed(1)}% Jaccard similarity).`,
|
|
33
|
+
pageUrl: pages[i].url,
|
|
34
|
+
relatedUrls: [pages[j].url],
|
|
35
|
+
similarity,
|
|
36
|
+
fix: `Differentiate page titles by including unique, page-specific keywords or angles.`
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return findings;
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=title-overlap.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"title-overlap.js","sourceRoot":"","sources":["../../../src/rules/cannibal/title-overlap.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAG/D,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI,GAAG,CACZ,IAAI;SACD,WAAW,EAAE;SACb,OAAO,CAAC,oBAAoB,EAAE,GAAG,CAAC;SAClC,KAAK,CAAC,KAAK,CAAC;SACZ,MAAM,CAAC,OAAO,CAAC,CACnB,CAAC;AACJ,CAAC;AAED,SAAS,iBAAiB,CAAC,CAAc,EAAE,CAAc;IACvD,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,CAAC,CAAC;IACX,CAAC;IACD,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,KAAK,MAAM,KAAK,IAAI,CAAC,EAAE,CAAC;QACtB,IAAI,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YACjB,YAAY,IAAI,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IACD,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,YAAY,CAAC;IAC7C,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,KAAK,CAAC;AAChD,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,KAAmB,EACnB,QAA6B,EAC7B,SAAiB;IAEjB,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,YAAY,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;IAEvF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7C,MAAM,UAAU,GAAG,iBAAiB,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;YACvE,IAAI,UAAU,GAAG,SAAS,EAAE,CAAC;gBAC3B,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,wBAAwB;oBAChC,QAAQ,EAAE,SAAS;oBACnB,OAAO,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,kDAAkD,CAAC,UAAU,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,wBAAwB;oBACnJ,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;oBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;oBAC3B,UAAU;oBACV,GAAG,EAAE,kFAAkF;iBACxF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url-pattern.d.ts","sourceRoot":"","sources":["../../../src/rules/cannibal/url-pattern.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAqB7D,wBAAgB,cAAc,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAgChE"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
function lastSegment(url) {
|
|
2
|
+
let pathname;
|
|
3
|
+
try {
|
|
4
|
+
pathname = new URL(url).pathname;
|
|
5
|
+
}
|
|
6
|
+
catch {
|
|
7
|
+
pathname = url;
|
|
8
|
+
}
|
|
9
|
+
const stripped = pathname.replace(/\/+$/, "");
|
|
10
|
+
const lastSlash = stripped.lastIndexOf("/");
|
|
11
|
+
const directory = lastSlash >= 0 ? stripped.slice(0, lastSlash) : "";
|
|
12
|
+
const segment = lastSlash >= 0 ? stripped.slice(lastSlash + 1) : stripped;
|
|
13
|
+
return {
|
|
14
|
+
directory,
|
|
15
|
+
tokens: segment.split("-").filter(Boolean).sort()
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
export function urlPatternRule(pages) {
|
|
19
|
+
const findings = [];
|
|
20
|
+
const parsed = pages.map((page) => ({ url: page.url, ...lastSegment(page.url) }));
|
|
21
|
+
for (let i = 0; i < parsed.length; i += 1) {
|
|
22
|
+
for (let j = i + 1; j < parsed.length; j += 1) {
|
|
23
|
+
const a = parsed[i];
|
|
24
|
+
const b = parsed[j];
|
|
25
|
+
if (a.directory !== b.directory || a.directory === "") {
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
if (a.tokens.length === 0 || b.tokens.length === 0) {
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
if (a.url === b.url) {
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
if (a.tokens.join("-") === b.tokens.join("-")) {
|
|
35
|
+
findings.push({
|
|
36
|
+
ruleId: "cannibal/url-pattern",
|
|
37
|
+
severity: "info",
|
|
38
|
+
message: `${pages[i].url} and ${pages[j].url} have the same URL tokens in different order, creating ambiguous intent overlap.`,
|
|
39
|
+
pageUrl: pages[i].url,
|
|
40
|
+
relatedUrls: [pages[j].url],
|
|
41
|
+
fix: "These URLs have the same tokens in different order. Consolidate to one canonical URL pattern."
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return findings;
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=url-pattern.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url-pattern.js","sourceRoot":"","sources":["../../../src/rules/cannibal/url-pattern.ts"],"names":[],"mappings":"AAEA,SAAS,WAAW,CAAC,GAAW;IAC9B,IAAI,QAAgB,CAAC;IACrB,IAAI,CAAC;QACH,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;IACnC,CAAC;IAAC,MAAM,CAAC;QACP,QAAQ,GAAG,GAAG,CAAC;IACjB,CAAC;IAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,SAAS,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACrE,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;IAE1E,OAAO;QACL,SAAS;QACT,MAAM,EAAE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE;KAClD,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,KAAmB;IAChD,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAElF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9C,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACpB,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YAEpB,IAAI,CAAC,CAAC,SAAS,KAAK,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,SAAS,KAAK,EAAE,EAAE,CAAC;gBACtD,SAAS;YACX,CAAC;YACD,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACnD,SAAS;YACX,CAAC;YACD,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,GAAG,EAAE,CAAC;gBACpB,SAAS;YACX,CAAC;YACD,IAAI,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9C,QAAQ,CAAC,IAAI,CAAC;oBACZ,MAAM,EAAE,sBAAsB;oBAC9B,QAAQ,EAAE,MAAM;oBAChB,OAAO,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,kFAAkF;oBAC9H,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG;oBACrB,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;oBAC3B,GAAG,EAAE,+FAA+F;iBACrG,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eeat-signals.d.ts","sourceRoot":"","sources":["../../../src/rules/content/eeat-signals.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAiC7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsBjE"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
const EEAT_HTML_PATTERNS = [
|
|
2
|
+
/last\s+updated/i,
|
|
3
|
+
/last\s+modified/i,
|
|
4
|
+
/reviewed\s+by/i,
|
|
5
|
+
/\bsources:/i,
|
|
6
|
+
/\breferences:/i
|
|
7
|
+
];
|
|
8
|
+
function countSignalCategories(page) {
|
|
9
|
+
let count = 0;
|
|
10
|
+
if (page.resolvedHrefs.some((href) => /\/about\b/i.test(href))) {
|
|
11
|
+
count += 1;
|
|
12
|
+
}
|
|
13
|
+
const { metaAuthor, schemaAuthor, bylineElement, relAuthorLink } = page.authorSignals;
|
|
14
|
+
if (metaAuthor !== "" || schemaAuthor || bylineElement || relAuthorLink) {
|
|
15
|
+
count += 1;
|
|
16
|
+
}
|
|
17
|
+
if (page.publishedDate) {
|
|
18
|
+
count += 1;
|
|
19
|
+
}
|
|
20
|
+
if (EEAT_HTML_PATTERNS.some((pattern) => pattern.test(page.html))) {
|
|
21
|
+
count += 1;
|
|
22
|
+
}
|
|
23
|
+
return count;
|
|
24
|
+
}
|
|
25
|
+
export function eeatSignalsRule(pages) {
|
|
26
|
+
const lacking = pages.filter((page) => countSignalCategories(page) < 2);
|
|
27
|
+
if (lacking.length === 0)
|
|
28
|
+
return [];
|
|
29
|
+
if (lacking.length === pages.length && pages.length > 3) {
|
|
30
|
+
return [{
|
|
31
|
+
ruleId: "content/eeat-signals",
|
|
32
|
+
severity: "info",
|
|
33
|
+
message: `All ${lacking.length} pages have fewer than 2 out of 4 E-E-A-T signal categories. Site-wide trust signals are missing.`,
|
|
34
|
+
fix: `Add trust signals site-wide: author info, publication dates, about page links, sources, or "last updated" markers.`,
|
|
35
|
+
relatedUrls: lacking.map((p) => p.url).sort()
|
|
36
|
+
}];
|
|
37
|
+
}
|
|
38
|
+
return lacking.map((page) => ({
|
|
39
|
+
ruleId: "content/eeat-signals",
|
|
40
|
+
severity: "info",
|
|
41
|
+
message: `${page.url} has fewer than 2 out of 4 E-E-A-T signal categories.`,
|
|
42
|
+
pageUrl: page.url,
|
|
43
|
+
fix: `Add trust signals: author info, publication dates, about page links, sources, or "last updated" markers.`
|
|
44
|
+
}));
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=eeat-signals.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eeat-signals.js","sourceRoot":"","sources":["../../../src/rules/content/eeat-signals.ts"],"names":[],"mappings":"AAEA,MAAM,kBAAkB,GAAG;IACzB,iBAAiB;IACjB,kBAAkB;IAClB,gBAAgB;IAChB,aAAa;IACb,gBAAgB;CACjB,CAAC;AAEF,SAAS,qBAAqB,CAAC,IAAgB;IAC7C,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,IAAI,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;QAC/D,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,IAAI,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa,EAAE,CAAC;QACxE,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;QACvB,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;QAClE,KAAK,IAAI,CAAC,CAAC;IACb,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,KAAmB;IACjD,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,qBAAqB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAExE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEpC,IAAI,OAAO,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxD,OAAO,CAAC;gBACN,MAAM,EAAE,sBAAsB;gBAC9B,QAAQ,EAAE,MAAM;gBAChB,OAAO,EAAE,OAAO,OAAO,CAAC,MAAM,mGAAmG;gBACjI,GAAG,EAAE,oHAAoH;gBACzH,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE;aAC9C,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC5B,MAAM,EAAE,sBAAsB;QAC9B,QAAQ,EAAE,MAAe;QACzB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,uDAAuD;QAC3E,OAAO,EAAE,IAAI,CAAC,GAAG;QACjB,GAAG,EAAE,0GAA0G;KAChH,CAAC,CAAC,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"heading-uniqueness.d.ts","sourceRoot":"","sources":["../../../src/rules/content/heading-uniqueness.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAMhF,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,iBAAiB,EAAE,GAC5B,UAAU,EAAE,CAmDd"}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { maskEntities } from "../../algorithms/entity-mask.js";
|
|
2
|
+
function norm(value, patterns) {
|
|
3
|
+
return maskEntities(value.toLowerCase().trim(), patterns);
|
|
4
|
+
}
|
|
5
|
+
export function headingUniquenessRule(pages, patterns) {
|
|
6
|
+
const h1Groups = new Map();
|
|
7
|
+
const h2Groups = new Map();
|
|
8
|
+
for (const page of pages) {
|
|
9
|
+
const h1Key = page.headings.h1.map((h) => norm(h, patterns)).join(" | ");
|
|
10
|
+
const h2Key = page.headings.h2.map((h) => norm(h, patterns)).join(" | ");
|
|
11
|
+
if (h1Key) {
|
|
12
|
+
const group = h1Groups.get(h1Key) ?? [];
|
|
13
|
+
group.push(page.url);
|
|
14
|
+
h1Groups.set(h1Key, group);
|
|
15
|
+
}
|
|
16
|
+
if (h2Key) {
|
|
17
|
+
const group = h2Groups.get(h2Key) ?? [];
|
|
18
|
+
group.push(page.url);
|
|
19
|
+
h2Groups.set(h2Key, group);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
const findings = [];
|
|
23
|
+
const emitted = new Set();
|
|
24
|
+
for (const [heading, urls] of h1Groups) {
|
|
25
|
+
if (urls.length < 2)
|
|
26
|
+
continue;
|
|
27
|
+
const key = `h1::${heading}`;
|
|
28
|
+
if (emitted.has(key))
|
|
29
|
+
continue;
|
|
30
|
+
emitted.add(key);
|
|
31
|
+
findings.push({
|
|
32
|
+
ruleId: "content/heading-uniqueness",
|
|
33
|
+
severity: "warning",
|
|
34
|
+
message: `${urls.length} pages share identical normalized H1 heading.`,
|
|
35
|
+
relatedUrls: urls.sort(),
|
|
36
|
+
fix: "Write unique headings for each page that reflect its specific content."
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
for (const [heading, urls] of h2Groups) {
|
|
40
|
+
if (urls.length < 2)
|
|
41
|
+
continue;
|
|
42
|
+
const key = `h2::${heading}`;
|
|
43
|
+
if (emitted.has(key))
|
|
44
|
+
continue;
|
|
45
|
+
emitted.add(key);
|
|
46
|
+
findings.push({
|
|
47
|
+
ruleId: "content/heading-uniqueness",
|
|
48
|
+
severity: "warning",
|
|
49
|
+
message: `${urls.length} pages share identical normalized H2 headings.`,
|
|
50
|
+
relatedUrls: urls.sort(),
|
|
51
|
+
fix: "Write unique headings for each page that reflect its specific content."
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
return findings;
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=heading-uniqueness.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"heading-uniqueness.js","sourceRoot":"","sources":["../../../src/rules/content/heading-uniqueness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAG/D,SAAS,IAAI,CAAC,KAAa,EAAE,QAA6B;IACxD,OAAO,YAAY,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,EAAE,QAAQ,CAAC,CAAC;AAC5D,CAAC;AAED,MAAM,UAAU,qBAAqB,CACnC,KAAmB,EACnB,QAA6B;IAE7B,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC7C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAoB,CAAC;IAE7C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACzE,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACzE,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YACxC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACrB,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAC7B,CAAC;QACD,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;YACxC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACrB,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,KAAK,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,IAAI,QAAQ,EAAE,CAAC;QACvC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAC9B,MAAM,GAAG,GAAG,OAAO,OAAO,EAAE,CAAC;QAC7B,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QAC/B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACjB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,4BAA4B;YACpC,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,+CAA+C;YACtE,WAAW,EAAE,IAAI,CAAC,IAAI,EAAE;YACxB,GAAG,EAAE,wEAAwE;SAC9E,CAAC,CAAC;IACL,CAAC;IAED,KAAK,MAAM,CAAC,OAAO,EAAE,IAAI,CAAC,IAAI,QAAQ,EAAE,CAAC;QACvC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAC9B,MAAM,GAAG,GAAG,OAAO,OAAO,EAAE,CAAC;QAC7B,IAAI,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QAC/B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACjB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,4BAA4B;YACpC,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,gDAAgD;YACvE,WAAW,EAAE,IAAI,CAAC,IAAI,EAAE;YACxB,GAAG,EAAE,wEAAwE;SAC9E,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"meta-uniqueness.d.ts","sourceRoot":"","sources":["../../../src/rules/content/meta-uniqueness.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEhF,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,UAAU,EAAE,EACnB,QAAQ,EAAE,iBAAiB,EAAE,EAC7B,uBAAuB,EAAE,MAAM,GAC9B,UAAU,EAAE,CA0Bd"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { maskEntities } from "../../algorithms/entity-mask.js";
|
|
2
|
+
export function metaUniquenessRule(pages, patterns, _minJaccardForCollision) {
|
|
3
|
+
const groups = new Map();
|
|
4
|
+
for (const page of pages) {
|
|
5
|
+
if (!page.metaDescription)
|
|
6
|
+
continue;
|
|
7
|
+
const masked = maskEntities(page.metaDescription, patterns).toLowerCase().trim();
|
|
8
|
+
if (!masked)
|
|
9
|
+
continue;
|
|
10
|
+
const group = groups.get(masked) ?? [];
|
|
11
|
+
group.push(page.url);
|
|
12
|
+
groups.set(masked, group);
|
|
13
|
+
}
|
|
14
|
+
const findings = [];
|
|
15
|
+
for (const [, urls] of groups) {
|
|
16
|
+
if (urls.length < 2)
|
|
17
|
+
continue;
|
|
18
|
+
findings.push({
|
|
19
|
+
ruleId: "content/meta-uniqueness",
|
|
20
|
+
severity: "error",
|
|
21
|
+
message: `${urls.length} pages share the same meta description template after entity masking.`,
|
|
22
|
+
relatedUrls: urls.sort(),
|
|
23
|
+
fix: "Write a unique meta description for each page that highlights what makes it specifically different."
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
return findings;
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=meta-uniqueness.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"meta-uniqueness.js","sourceRoot":"","sources":["../../../src/rules/content/meta-uniqueness.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iCAAiC,CAAC;AAG/D,MAAM,UAAU,kBAAkB,CAChC,KAAmB,EACnB,QAA6B,EAC7B,uBAA+B;IAE/B,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoB,CAAC;IAE3C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,eAAe;YAAE,SAAS;QACpC,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,eAAe,EAAE,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QACjF,IAAI,CAAC,MAAM;YAAE,SAAS;QACtB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACvC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACrB,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IAC5B,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,CAAC,EAAE,IAAI,CAAC,IAAI,MAAM,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAC9B,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,yBAAyB;YACjC,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,uEAAuE;YAC9F,WAAW,EAAE,IAAI,CAAC,IAAI,EAAE;YACxB,GAAG,EAAE,qGAAqG;SAC3G,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"missing-author.d.ts","sourceRoot":"","sources":["../../../src/rules/content/missing-author.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAO7D,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsBnE"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
function hasAuthorSignal(page) {
|
|
2
|
+
const { metaAuthor, schemaAuthor, bylineElement, relAuthorLink } = page.authorSignals;
|
|
3
|
+
return metaAuthor !== "" || schemaAuthor || bylineElement || relAuthorLink;
|
|
4
|
+
}
|
|
5
|
+
export function missingAuthorRule(pages) {
|
|
6
|
+
const missing = pages.filter((page) => !hasAuthorSignal(page));
|
|
7
|
+
if (missing.length === 0)
|
|
8
|
+
return [];
|
|
9
|
+
if (missing.length === pages.length && pages.length > 3) {
|
|
10
|
+
return [{
|
|
11
|
+
ruleId: "content/missing-author",
|
|
12
|
+
severity: "warning",
|
|
13
|
+
message: `All ${missing.length} pages have no author signals (meta author, schema author, byline, or rel="author" link). This is a site-wide E-E-A-T risk.`,
|
|
14
|
+
fix: `Add author attribution site-wide: <meta name="author" content="Name">, a visible byline, or author data in your JSON-LD schema.`,
|
|
15
|
+
relatedUrls: missing.map((p) => p.url).sort()
|
|
16
|
+
}];
|
|
17
|
+
}
|
|
18
|
+
return missing.map((page) => ({
|
|
19
|
+
ruleId: "content/missing-author",
|
|
20
|
+
severity: "warning",
|
|
21
|
+
message: `${page.url} has no author signals. This is an E-E-A-T risk.`,
|
|
22
|
+
pageUrl: page.url,
|
|
23
|
+
fix: `Add author attribution: <meta name="author" content="Name">, a visible byline, or author data in your JSON-LD schema.`
|
|
24
|
+
}));
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=missing-author.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"missing-author.js","sourceRoot":"","sources":["../../../src/rules/content/missing-author.ts"],"names":[],"mappings":"AAEA,SAAS,eAAe,CAAC,IAAgB;IACvC,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,aAAa,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC;IACtF,OAAO,UAAU,KAAK,EAAE,IAAI,YAAY,IAAI,aAAa,IAAI,aAAa,CAAC;AAC7E,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,KAAmB;IACnD,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC;IAE/D,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEpC,IAAI,OAAO,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxD,OAAO,CAAC;gBACN,MAAM,EAAE,wBAAwB;gBAChC,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,OAAO,OAAO,CAAC,MAAM,6HAA6H;gBAC3J,GAAG,EAAE,iIAAiI;gBACtI,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE;aAC9C,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC5B,MAAM,EAAE,wBAAwB;QAChC,QAAQ,EAAE,SAAkB;QAC5B,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,kDAAkD;QACtE,OAAO,EAAE,IAAI,CAAC,GAAG;QACjB,GAAG,EAAE,uHAAuH;KAC7H,CAAC,CAAC,CAAC;AACN,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"unique-value.d.ts","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAM7D,wBAAgB,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE,cAAc,EAAE,MAAM,GAAG,UAAU,EAAE,CA0BzF"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
function tokenize(text) {
|
|
2
|
+
return text.toLowerCase().split(/\s+/).filter(Boolean);
|
|
3
|
+
}
|
|
4
|
+
export function uniqueValueRule(pages, minUniqueWords) {
|
|
5
|
+
const frequencies = new Map();
|
|
6
|
+
const pageTokens = pages.map((page) => tokenize(page.contentText));
|
|
7
|
+
for (const tokens of pageTokens) {
|
|
8
|
+
for (const token of new Set(tokens)) {
|
|
9
|
+
frequencies.set(token, (frequencies.get(token) ?? 0) + 1);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
const findings = [];
|
|
13
|
+
pages.forEach((page, idx) => {
|
|
14
|
+
const uniqueCount = new Set(pageTokens[idx].filter((token) => (frequencies.get(token) ?? 0) === 1)).size;
|
|
15
|
+
if (uniqueCount < minUniqueWords) {
|
|
16
|
+
findings.push({
|
|
17
|
+
ruleId: "content/unique-value",
|
|
18
|
+
severity: "error",
|
|
19
|
+
message: `${page.url} has only ${uniqueCount} unique words (min ${minUniqueWords}).`,
|
|
20
|
+
fix: `Add ${minUniqueWords - uniqueCount} more words of content not found on any other page.`
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
return findings;
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=unique-value.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"unique-value.js","sourceRoot":"","sources":["../../../src/rules/content/unique-value.ts"],"names":[],"mappings":"AAEA,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,KAAmB,EAAE,cAAsB;IACzE,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC9C,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;IAEnE,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,KAAK,MAAM,KAAK,IAAI,IAAI,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACpC,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5D,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAClC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE;QAC1B,MAAM,WAAW,GAAG,IAAI,GAAG,CACzB,UAAU,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CACvE,CAAC,IAAI,CAAC;QACP,IAAI,WAAW,GAAG,cAAc,EAAE,CAAC;YACjC,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,sBAAsB;gBAC9B,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,aAAa,WAAW,sBAAsB,cAAc,IAAI;gBACpF,GAAG,EAAE,OAAO,cAAc,GAAG,WAAW,qDAAqD;aAC9F,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Flags clusters (same parent directory) with 2+ pages that are siloed: no outbound
|
|
4
|
+
* internal crawl link to another cluster and no inbound from another cluster.
|
|
5
|
+
*/
|
|
6
|
+
export declare function clusterConnectivityRule(pages: ParsedPage[], knownUrls: Set<string>): RuleResult[];
|
|
7
|
+
//# sourceMappingURL=cluster-connectivity.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cluster-connectivity.d.ts","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAyB7D;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,GACrB,UAAU,EAAE,CA0Dd"}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { clusterKeyForUrl } from "./cluster-key.js";
|
|
2
|
+
function hasCrossClusterInbound(clusterDir, urlsInCluster, pages, knownUrls) {
|
|
3
|
+
for (const page of pages) {
|
|
4
|
+
if (urlsInCluster.has(page.url)) {
|
|
5
|
+
continue;
|
|
6
|
+
}
|
|
7
|
+
for (const link of page.resolvedHrefs) {
|
|
8
|
+
if (!knownUrls.has(link)) {
|
|
9
|
+
continue;
|
|
10
|
+
}
|
|
11
|
+
if (urlsInCluster.has(link)) {
|
|
12
|
+
return true;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
return false;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Flags clusters (same parent directory) with 2+ pages that are siloed: no outbound
|
|
20
|
+
* internal crawl link to another cluster and no inbound from another cluster.
|
|
21
|
+
*/
|
|
22
|
+
export function clusterConnectivityRule(pages, knownUrls) {
|
|
23
|
+
if (pages.length < 2) {
|
|
24
|
+
return [];
|
|
25
|
+
}
|
|
26
|
+
const clusterPages = new Map();
|
|
27
|
+
for (const p of pages) {
|
|
28
|
+
const key = clusterKeyForUrl(p.url);
|
|
29
|
+
const set = clusterPages.get(key) ?? new Set();
|
|
30
|
+
set.add(p.url);
|
|
31
|
+
clusterPages.set(key, set);
|
|
32
|
+
}
|
|
33
|
+
if (clusterPages.size < 2) {
|
|
34
|
+
return [];
|
|
35
|
+
}
|
|
36
|
+
const findings = [];
|
|
37
|
+
for (const [clusterDir, urls] of clusterPages.entries()) {
|
|
38
|
+
if (urls.size < 2) {
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
let hasCrossClusterOutbound = false;
|
|
42
|
+
for (const page of pages) {
|
|
43
|
+
if (!urls.has(page.url)) {
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
for (const link of page.resolvedHrefs) {
|
|
47
|
+
if (!knownUrls.has(link)) {
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
const targetCluster = clusterKeyForUrl(link);
|
|
51
|
+
if (targetCluster !== clusterDir) {
|
|
52
|
+
hasCrossClusterOutbound = true;
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
if (hasCrossClusterOutbound) {
|
|
57
|
+
break;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
const hasInbound = hasCrossClusterInbound(clusterDir, urls, pages, knownUrls);
|
|
61
|
+
if (!hasCrossClusterOutbound && !hasInbound) {
|
|
62
|
+
findings.push({
|
|
63
|
+
ruleId: "links/cluster-connectivity",
|
|
64
|
+
severity: "warning",
|
|
65
|
+
message: `Cluster ${clusterDir} (${urls.size} pages) has no crawl links to or from other clusters.`,
|
|
66
|
+
relatedUrls: Array.from(urls).sort(),
|
|
67
|
+
fix: "Add cross-links between this cluster and other sections of your site to build topical authority."
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return findings;
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=cluster-connectivity.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cluster-connectivity.js","sourceRoot":"","sources":["../../../src/rules/links/cluster-connectivity.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,SAAS,sBAAsB,CAC7B,UAAkB,EAClB,aAA0B,EAC1B,KAAmB,EACnB,SAAsB;IAEtB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAChC,SAAS;QACX,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzB,SAAS;YACX,CAAC;YACD,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC5B,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAmB,EACnB,SAAsB;IAEtB,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,EAAuB,CAAC;IACpD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAU,CAAC;QACvD,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACf,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,IAAI,YAAY,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;QACxD,IAAI,IAAI,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YAClB,SAAS;QACX,CAAC;QAED,IAAI,uBAAuB,GAAG,KAAK,CAAC;QACpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACxB,SAAS;YACX,CAAC;YACD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;gBACtC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,SAAS;gBACX,CAAC;gBACD,MAAM,aAAa,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;gBAC7C,IAAI,aAAa,KAAK,UAAU,EAAE,CAAC;oBACjC,uBAAuB,GAAG,IAAI,CAAC;oBAC/B,MAAM;gBACR,CAAC;YACH,CAAC;YACD,IAAI,uBAAuB,EAAE,CAAC;gBAC5B,MAAM;YACR,CAAC;QACH,CAAC;QAED,MAAM,UAAU,GAAG,sBAAsB,CAAC,UAAU,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;QAE9E,IAAI,CAAC,uBAAuB,IAAI,CAAC,UAAU,EAAE,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,4BAA4B;gBACpC,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,WAAW,UAAU,KAAK,IAAI,CAAC,IAAI,uDAAuD;gBACnG,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;gBACpC,GAAG,EAAE,kGAAkG;aACxG,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cluster-key.d.ts","sourceRoot":"","sources":["../../../src/rules/links/cluster-key.ts"],"names":[],"mappings":"AAEA,oEAAoE;AACpE,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAiBxD"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { dirname } from "node:path";
|
|
2
|
+
/** Directory-style cluster key: same parent path = same cluster. */
|
|
3
|
+
export function clusterKeyForUrl(pageUrl) {
|
|
4
|
+
if (/^https?:\/\//i.test(pageUrl)) {
|
|
5
|
+
try {
|
|
6
|
+
const u = new URL(pageUrl);
|
|
7
|
+
let pathname = u.pathname;
|
|
8
|
+
if (pathname.length > 1 && pathname.endsWith("/")) {
|
|
9
|
+
pathname = pathname.slice(0, -1);
|
|
10
|
+
}
|
|
11
|
+
const path = pathname.replace(/\/[^/]+$/, "") || "/";
|
|
12
|
+
const dir = path.endsWith("/") ? path : `${path}/`;
|
|
13
|
+
return `${u.origin}${dir}`;
|
|
14
|
+
}
|
|
15
|
+
catch {
|
|
16
|
+
return pageUrl;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
const dir = dirname(pageUrl);
|
|
20
|
+
return dir.endsWith("\\") || dir.endsWith("/") ? dir : `${dir}\\`;
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=cluster-key.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cluster-key.js","sourceRoot":"","sources":["../../../src/rules/links/cluster-key.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,oEAAoE;AACpE,MAAM,UAAU,gBAAgB,CAAC,OAAe;IAC9C,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;YAC3B,IAAI,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;YAC1B,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAClD,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACnC,CAAC;YACD,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,IAAI,GAAG,CAAC;YACrD,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC;YACnD,OAAO,GAAG,CAAC,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QAC7B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,OAAO,CAAC;QACjB,CAAC;IACH,CAAC;IACD,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAC7B,OAAO,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC;AACpE,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dead-ends.d.ts","sourceRoot":"","sources":["../../../src/rules/links/dead-ends.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,YAAY,CAC1B,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,EACtB,OAAO,CAAC,EAAE,MAAM,GACf,UAAU,EAAE,CAcd"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export function deadEndsRule(pages, knownUrls, rootUrl) {
|
|
2
|
+
return pages
|
|
3
|
+
.filter((page) => !rootUrl || page.url !== rootUrl)
|
|
4
|
+
.filter((page) => page.resolvedHrefs.filter((link) => knownUrls.has(link) && link !== page.url).length === 0)
|
|
5
|
+
.map((page) => ({
|
|
6
|
+
ruleId: "links/dead-ends",
|
|
7
|
+
severity: "warning",
|
|
8
|
+
message: `${page.url} has no outbound links to other pages in this crawl.`,
|
|
9
|
+
pageUrl: page.url,
|
|
10
|
+
fix: "Add outbound links to related pages on your site to help users and crawlers discover more content."
|
|
11
|
+
}));
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=dead-ends.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dead-ends.js","sourceRoot":"","sources":["../../../src/rules/links/dead-ends.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,YAAY,CAC1B,KAAmB,EACnB,SAAsB,EACtB,OAAgB;IAEhB,OAAO,KAAK;SACT,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,OAAO,IAAI,IAAI,CAAC,GAAG,KAAK,OAAO,CAAC;SAClD,MAAM,CACL,CAAC,IAAI,EAAE,EAAE,CACP,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,KAAK,CAAC,CAC7F;SACA,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACd,MAAM,EAAE,iBAAiB;QACzB,QAAQ,EAAE,SAAkB;QAC5B,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,sDAAsD;QAC1E,OAAO,EAAE,IAAI,CAAC,GAAG;QACjB,GAAG,EAAE,oGAAoG;KAC1G,CAAC,CAAC,CAAC;AACR,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { ParsedPage, RuleResult } from "../../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Hub/index coverage for medium-sized directories, plus info when a cluster is skipped
|
|
4
|
+
* because it exceeds `maxSiblings`.
|
|
5
|
+
*/
|
|
6
|
+
export declare function hubPagesRule(pages: ParsedPage[], knownUrls: Set<string>, minSiblings: number, maxSiblings: number): RuleResult[];
|
|
7
|
+
//# sourceMappingURL=hub-pages.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hub-pages.d.ts","sourceRoot":"","sources":["../../../src/rules/links/hub-pages.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAmB7D;;;GAGG;AACH,wBAAgB,YAAY,CAC1B,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,EACtB,WAAW,EAAE,MAAM,EACnB,WAAW,EAAE,MAAM,GAClB,UAAU,EAAE,CA8Dd"}
|