@pseolint/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +53 -0
- package/dist/algorithms/entity-mask.d.ts +3 -0
- package/dist/algorithms/entity-mask.d.ts.map +1 -0
- package/dist/algorithms/entity-mask.js +8 -0
- package/dist/algorithms/entity-mask.js.map +1 -0
- package/dist/algorithms/entity-mask.test.d.ts +2 -0
- package/dist/algorithms/entity-mask.test.d.ts.map +1 -0
- package/dist/algorithms/entity-mask.test.js +23 -0
- package/dist/algorithms/entity-mask.test.js.map +1 -0
- package/dist/algorithms/simhash.d.ts +4 -0
- package/dist/algorithms/simhash.d.ts.map +1 -0
- package/dist/algorithms/simhash.js +64 -0
- package/dist/algorithms/simhash.js.map +1 -0
- package/dist/algorithms/simhash.test.d.ts +2 -0
- package/dist/algorithms/simhash.test.d.ts.map +1 -0
- package/dist/algorithms/simhash.test.js +23 -0
- package/dist/algorithms/simhash.test.js.map +1 -0
- package/dist/algorithms/tf-idf.d.ts +8 -0
- package/dist/algorithms/tf-idf.d.ts.map +1 -0
- package/dist/algorithms/tf-idf.js +55 -0
- package/dist/algorithms/tf-idf.js.map +1 -0
- package/dist/auditor.d.ts +3 -0
- package/dist/auditor.d.ts.map +1 -0
- package/dist/auditor.js +730 -0
- package/dist/auditor.js.map +1 -0
- package/dist/auditor.test.d.ts +2 -0
- package/dist/auditor.test.d.ts.map +1 -0
- package/dist/auditor.test.js +134 -0
- package/dist/auditor.test.js.map +1 -0
- package/dist/enrich-findings.d.ts +9 -0
- package/dist/enrich-findings.d.ts.map +1 -0
- package/dist/enrich-findings.js +436 -0
- package/dist/enrich-findings.js.map +1 -0
- package/dist/formatters/console.d.ts +6 -0
- package/dist/formatters/console.d.ts.map +1 -0
- package/dist/formatters/console.js +237 -0
- package/dist/formatters/console.js.map +1 -0
- package/dist/formatters/html.d.ts +3 -0
- package/dist/formatters/html.d.ts.map +1 -0
- package/dist/formatters/html.js +170 -0
- package/dist/formatters/html.js.map +1 -0
- package/dist/formatters/index.d.ts +6 -0
- package/dist/formatters/index.d.ts.map +1 -0
- package/dist/formatters/index.js +5 -0
- package/dist/formatters/index.js.map +1 -0
- package/dist/formatters/json.d.ts +3 -0
- package/dist/formatters/json.d.ts.map +1 -0
- package/dist/formatters/json.js +4 -0
- package/dist/formatters/json.js.map +1 -0
- package/dist/formatters/markdown.d.ts +3 -0
- package/dist/formatters/markdown.d.ts.map +1 -0
- package/dist/formatters/markdown.js +93 -0
- package/dist/formatters/markdown.js.map +1 -0
- package/dist/index.d.ts +45 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +45 -0
- package/dist/index.js.map +1 -0
- package/dist/page-classifier.d.ts +4 -0
- package/dist/page-classifier.d.ts.map +1 -0
- package/dist/page-classifier.js +133 -0
- package/dist/page-classifier.js.map +1 -0
- package/dist/parser.d.ts +3 -0
- package/dist/parser.d.ts.map +1 -0
- package/dist/parser.js +131 -0
- package/dist/parser.js.map +1 -0
- package/dist/parser.test.d.ts +2 -0
- package/dist/parser.test.d.ts.map +1 -0
- package/dist/parser.test.js +37 -0
- package/dist/parser.test.js.map +1 -0
- package/dist/renderer.d.ts +15 -0
- package/dist/renderer.d.ts.map +1 -0
- package/dist/renderer.js +124 -0
- package/dist/renderer.js.map +1 -0
- package/dist/rule-references.d.ts +2 -0
- package/dist/rule-references.d.ts.map +1 -0
- package/dist/rule-references.js +35 -0
- package/dist/rule-references.js.map +1 -0
- package/dist/rules/cannibal/keyword-collision.d.ts +3 -0
- package/dist/rules/cannibal/keyword-collision.d.ts.map +1 -0
- package/dist/rules/cannibal/keyword-collision.js +25 -0
- package/dist/rules/cannibal/keyword-collision.js.map +1 -0
- package/dist/rules/cannibal/title-overlap.d.ts +3 -0
- package/dist/rules/cannibal/title-overlap.d.ts.map +1 -0
- package/dist/rules/cannibal/title-overlap.js +43 -0
- package/dist/rules/cannibal/title-overlap.js.map +1 -0
- package/dist/rules/cannibal/url-pattern.d.ts +3 -0
- package/dist/rules/cannibal/url-pattern.d.ts.map +1 -0
- package/dist/rules/cannibal/url-pattern.js +48 -0
- package/dist/rules/cannibal/url-pattern.js.map +1 -0
- package/dist/rules/content/eeat-signals.d.ts +3 -0
- package/dist/rules/content/eeat-signals.d.ts.map +1 -0
- package/dist/rules/content/eeat-signals.js +46 -0
- package/dist/rules/content/eeat-signals.js.map +1 -0
- package/dist/rules/content/heading-uniqueness.d.ts +3 -0
- package/dist/rules/content/heading-uniqueness.d.ts.map +1 -0
- package/dist/rules/content/heading-uniqueness.js +56 -0
- package/dist/rules/content/heading-uniqueness.js.map +1 -0
- package/dist/rules/content/meta-uniqueness.d.ts +3 -0
- package/dist/rules/content/meta-uniqueness.d.ts.map +1 -0
- package/dist/rules/content/meta-uniqueness.js +28 -0
- package/dist/rules/content/meta-uniqueness.js.map +1 -0
- package/dist/rules/content/missing-author.d.ts +3 -0
- package/dist/rules/content/missing-author.d.ts.map +1 -0
- package/dist/rules/content/missing-author.js +26 -0
- package/dist/rules/content/missing-author.js.map +1 -0
- package/dist/rules/content/unique-value.d.ts +3 -0
- package/dist/rules/content/unique-value.d.ts.map +1 -0
- package/dist/rules/content/unique-value.js +26 -0
- package/dist/rules/content/unique-value.js.map +1 -0
- package/dist/rules/links/cluster-connectivity.d.ts +7 -0
- package/dist/rules/links/cluster-connectivity.d.ts.map +1 -0
- package/dist/rules/links/cluster-connectivity.js +73 -0
- package/dist/rules/links/cluster-connectivity.js.map +1 -0
- package/dist/rules/links/cluster-key.d.ts +3 -0
- package/dist/rules/links/cluster-key.d.ts.map +1 -0
- package/dist/rules/links/cluster-key.js +22 -0
- package/dist/rules/links/cluster-key.js.map +1 -0
- package/dist/rules/links/dead-ends.d.ts +3 -0
- package/dist/rules/links/dead-ends.d.ts.map +1 -0
- package/dist/rules/links/dead-ends.js +13 -0
- package/dist/rules/links/dead-ends.js.map +1 -0
- package/dist/rules/links/hub-pages.d.ts +7 -0
- package/dist/rules/links/hub-pages.d.ts.map +1 -0
- package/dist/rules/links/hub-pages.js +73 -0
- package/dist/rules/links/hub-pages.js.map +1 -0
- package/dist/rules/links/link-depth.d.ts +3 -0
- package/dist/rules/links/link-depth.d.ts.map +1 -0
- package/dist/rules/links/link-depth.js +46 -0
- package/dist/rules/links/link-depth.js.map +1 -0
- package/dist/rules/links/orphan-pages.d.ts +3 -0
- package/dist/rules/links/orphan-pages.d.ts.map +1 -0
- package/dist/rules/links/orphan-pages.js +19 -0
- package/dist/rules/links/orphan-pages.js.map +1 -0
- package/dist/rules/schema/consistency.d.ts +3 -0
- package/dist/rules/schema/consistency.d.ts.map +1 -0
- package/dist/rules/schema/consistency.js +44 -0
- package/dist/rules/schema/consistency.js.map +1 -0
- package/dist/rules/schema/json-ld-valid.d.ts +3 -0
- package/dist/rules/schema/json-ld-valid.d.ts.map +1 -0
- package/dist/rules/schema/json-ld-valid.js +47 -0
- package/dist/rules/schema/json-ld-valid.js.map +1 -0
- package/dist/rules/schema/required-fields.d.ts +3 -0
- package/dist/rules/schema/required-fields.d.ts.map +1 -0
- package/dist/rules/schema/required-fields.js +60 -0
- package/dist/rules/schema/required-fields.js.map +1 -0
- package/dist/rules/spam/boilerplate-ratio.d.ts +3 -0
- package/dist/rules/spam/boilerplate-ratio.d.ts.map +1 -0
- package/dist/rules/spam/boilerplate-ratio.js +50 -0
- package/dist/rules/spam/boilerplate-ratio.js.map +1 -0
- package/dist/rules/spam/doorway-pattern.d.ts +4 -0
- package/dist/rules/spam/doorway-pattern.d.ts.map +1 -0
- package/dist/rules/spam/doorway-pattern.js +47 -0
- package/dist/rules/spam/doorway-pattern.js.map +1 -0
- package/dist/rules/spam/entity-swap.d.ts +7 -0
- package/dist/rules/spam/entity-swap.d.ts.map +1 -0
- package/dist/rules/spam/entity-swap.js +26 -0
- package/dist/rules/spam/entity-swap.js.map +1 -0
- package/dist/rules/spam/near-duplicate.d.ts +11 -0
- package/dist/rules/spam/near-duplicate.d.ts.map +1 -0
- package/dist/rules/spam/near-duplicate.js +25 -0
- package/dist/rules/spam/near-duplicate.js.map +1 -0
- package/dist/rules/spam/publication-velocity.d.ts +3 -0
- package/dist/rules/spam/publication-velocity.d.ts.map +1 -0
- package/dist/rules/spam/publication-velocity.js +25 -0
- package/dist/rules/spam/publication-velocity.js.map +1 -0
- package/dist/rules/spam/template-coverage.d.ts +3 -0
- package/dist/rules/spam/template-coverage.d.ts.map +1 -0
- package/dist/rules/spam/template-coverage.js +87 -0
- package/dist/rules/spam/template-coverage.js.map +1 -0
- package/dist/rules/spam/template-diversity.d.ts +3 -0
- package/dist/rules/spam/template-diversity.d.ts.map +1 -0
- package/dist/rules/spam/template-diversity.js +19 -0
- package/dist/rules/spam/template-diversity.js.map +1 -0
- package/dist/rules/spam/thin-content.d.ts +6 -0
- package/dist/rules/spam/thin-content.d.ts.map +1 -0
- package/dist/rules/spam/thin-content.js +22 -0
- package/dist/rules/spam/thin-content.js.map +1 -0
- package/dist/rules/tech/canonical-consistency.d.ts +4 -0
- package/dist/rules/tech/canonical-consistency.d.ts.map +1 -0
- package/dist/rules/tech/canonical-consistency.js +78 -0
- package/dist/rules/tech/canonical-consistency.js.map +1 -0
- package/dist/rules/tech/canonical-noindex-conflict.d.ts +3 -0
- package/dist/rules/tech/canonical-noindex-conflict.d.ts.map +1 -0
- package/dist/rules/tech/canonical-noindex-conflict.js +27 -0
- package/dist/rules/tech/canonical-noindex-conflict.js.map +1 -0
- package/dist/rules/tech/hreflang-consistency.d.ts +3 -0
- package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -0
- package/dist/rules/tech/hreflang-consistency.js +99 -0
- package/dist/rules/tech/hreflang-consistency.js.map +1 -0
- package/dist/rules/tech/og-completeness.d.ts +3 -0
- package/dist/rules/tech/og-completeness.d.ts.map +1 -0
- package/dist/rules/tech/og-completeness.js +35 -0
- package/dist/rules/tech/og-completeness.js.map +1 -0
- package/dist/rules/tech/redirect-chain.d.ts +3 -0
- package/dist/rules/tech/redirect-chain.d.ts.map +1 -0
- package/dist/rules/tech/redirect-chain.js +20 -0
- package/dist/rules/tech/redirect-chain.js.map +1 -0
- package/dist/rules/tech/robots-noindex-conflict.d.ts +3 -0
- package/dist/rules/tech/robots-noindex-conflict.d.ts.map +1 -0
- package/dist/rules/tech/robots-noindex-conflict.js +30 -0
- package/dist/rules/tech/robots-noindex-conflict.js.map +1 -0
- package/dist/rules/tech/robots-sitemap-presence.d.ts +3 -0
- package/dist/rules/tech/robots-sitemap-presence.d.ts.map +1 -0
- package/dist/rules/tech/robots-sitemap-presence.js +61 -0
- package/dist/rules/tech/robots-sitemap-presence.js.map +1 -0
- package/dist/rules/tech/sitemap-completeness.d.ts +3 -0
- package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -0
- package/dist/rules/tech/sitemap-completeness.js +40 -0
- package/dist/rules/tech/sitemap-completeness.js.map +1 -0
- package/dist/rules/tech/soft-404.d.ts +3 -0
- package/dist/rules/tech/soft-404.d.ts.map +1 -0
- package/dist/rules/tech/soft-404.js +24 -0
- package/dist/rules/tech/soft-404.js.map +1 -0
- package/dist/types.d.ts +170 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/url-normalize.d.ts +10 -0
- package/dist/url-normalize.d.ts.map +1 -0
- package/dist/url-normalize.js +52 -0
- package/dist/url-normalize.js.map +1 -0
- package/package.json +46 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
async function fetchTextIfOk(url) {
|
|
2
|
+
try {
|
|
3
|
+
const response = await fetch(url);
|
|
4
|
+
if (!response.ok) {
|
|
5
|
+
return null;
|
|
6
|
+
}
|
|
7
|
+
return await response.text();
|
|
8
|
+
}
|
|
9
|
+
catch {
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
export async function robotsSitemapPresenceRule(source) {
|
|
14
|
+
if (!/^https?:\/\//i.test(source)) {
|
|
15
|
+
return [];
|
|
16
|
+
}
|
|
17
|
+
const sourceUrl = new URL(source);
|
|
18
|
+
const origin = sourceUrl.origin;
|
|
19
|
+
const robotsUrl = `${origin}/robots.txt`;
|
|
20
|
+
const sitemapUrl = `${origin}/sitemap.xml`;
|
|
21
|
+
const findings = [];
|
|
22
|
+
const robotsText = await fetchTextIfOk(robotsUrl);
|
|
23
|
+
if (!robotsText) {
|
|
24
|
+
findings.push({
|
|
25
|
+
ruleId: "tech/robots-sitemap-presence",
|
|
26
|
+
severity: "warning",
|
|
27
|
+
message: `Could not fetch ${robotsUrl}.`,
|
|
28
|
+
fix: `Create a robots.txt file at ${robotsUrl} and include a Sitemap directive pointing to your sitemap.`
|
|
29
|
+
});
|
|
30
|
+
return findings;
|
|
31
|
+
}
|
|
32
|
+
if (!/^\s*sitemap\s*:/gim.test(robotsText)) {
|
|
33
|
+
findings.push({
|
|
34
|
+
ruleId: "tech/robots-sitemap-presence",
|
|
35
|
+
severity: "info",
|
|
36
|
+
message: `${robotsUrl} does not declare a Sitemap directive.`,
|
|
37
|
+
fix: `Add a Sitemap directive to ${robotsUrl}, e.g.: Sitemap: ${sitemapUrl}`
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
const sitemapText = await fetchTextIfOk(sitemapUrl);
|
|
41
|
+
if (!sitemapText) {
|
|
42
|
+
findings.push({
|
|
43
|
+
ruleId: "tech/robots-sitemap-presence",
|
|
44
|
+
severity: "warning",
|
|
45
|
+
message: `Could not fetch ${sitemapUrl}.`,
|
|
46
|
+
fix: `Create an XML sitemap at ${sitemapUrl} and reference it in ${robotsUrl}.`
|
|
47
|
+
});
|
|
48
|
+
return findings;
|
|
49
|
+
}
|
|
50
|
+
const lowered = sitemapText.toLowerCase();
|
|
51
|
+
if (!lowered.includes("<urlset") && !lowered.includes("<sitemapindex")) {
|
|
52
|
+
findings.push({
|
|
53
|
+
ruleId: "tech/robots-sitemap-presence",
|
|
54
|
+
severity: "warning",
|
|
55
|
+
message: `${sitemapUrl} was fetched but does not look like sitemap XML.`,
|
|
56
|
+
fix: `Ensure ${sitemapUrl} is valid sitemap XML containing a <urlset> or <sitemapindex> root element.`
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
return findings;
|
|
60
|
+
}
|
|
61
|
+
//# sourceMappingURL=robots-sitemap-presence.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"robots-sitemap-presence.js","sourceRoot":"","sources":["../../../src/rules/tech/robots-sitemap-presence.ts"],"names":[],"mappings":"AAEA,KAAK,UAAU,aAAa,CAAC,GAAW;IACtC,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAAC,MAAc;IAC5D,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;QAClC,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;IAClC,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;IAChC,MAAM,SAAS,GAAG,GAAG,MAAM,aAAa,CAAC;IACzC,MAAM,UAAU,GAAG,GAAG,MAAM,cAAc,CAAC;IAC3C,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,MAAM,UAAU,GAAG,MAAM,aAAa,CAAC,SAAS,CAAC,CAAC;IAClD,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,mBAAmB,SAAS,GAAG;YACxC,GAAG,EAAE,+BAA+B,SAAS,4DAA4D;SAC1G,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;QAC3C,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,QAAQ,EAAE,MAAM;YAChB,OAAO,EAAE,GAAG,SAAS,wCAAwC;YAC7D,GAAG,EAAE,8BAA8B,SAAS,oBAAoB,UAAU,EAAE;SAC7E,CAAC,CAAC;IACL,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,aAAa,CAAC,UAAU,CAAC,CAAC;IACpD,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,mBAAmB,UAAU,GAAG;YACzC,GAAG,EAAE,4BAA4B,UAAU,wBAAwB,SAAS,GAAG;SAChF,CAAC,CAAC;QACH,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,OAAO,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;IAC1C,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;QACvE,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,8BAA8B;YACtC,QAAQ,EAAE,SAAS;YACnB,OAAO,EAAE,GAAG,UAAU,kDAAkD;YACxE,GAAG,EAAE,UAAU,UAAU,6EAA6E;SACvG,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sitemap-completeness.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/sitemap-completeness.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,UAAU,EAAE,EACnB,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,GACvB,UAAU,EAAE,CA0Cd"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
export function sitemapCompletenessRule(pages, sitemapUrls) {
|
|
2
|
+
if (sitemapUrls.size === 0)
|
|
3
|
+
return [];
|
|
4
|
+
const findings = [];
|
|
5
|
+
const missingFromSitemap = pages.filter((p) => !sitemapUrls.has(p.url));
|
|
6
|
+
if (missingFromSitemap.length > 0) {
|
|
7
|
+
findings.push({
|
|
8
|
+
ruleId: "tech/sitemap-completeness",
|
|
9
|
+
severity: "error",
|
|
10
|
+
message: `${missingFromSitemap.length} crawlable page(s) not in sitemap.`,
|
|
11
|
+
fix: "Add these pages to your sitemap.xml to ensure Google discovers them.",
|
|
12
|
+
relatedUrls: missingFromSitemap.map((p) => p.url).sort()
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
for (const page of pages) {
|
|
16
|
+
if (!page.httpMeta || !sitemapUrls.has(page.url))
|
|
17
|
+
continue;
|
|
18
|
+
if (page.httpMeta.statusCode >= 400) {
|
|
19
|
+
findings.push({
|
|
20
|
+
ruleId: "tech/sitemap-completeness",
|
|
21
|
+
severity: "error",
|
|
22
|
+
message: `Sitemap URL ${page.url} returns HTTP ${page.httpMeta.statusCode}.`,
|
|
23
|
+
pageUrl: page.url,
|
|
24
|
+
fix: "Remove this URL from sitemap.xml or fix the page to return HTTP 200."
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
if (page.httpMeta.redirectChain.length > 0 && page.httpMeta.finalUrl !== page.url) {
|
|
28
|
+
findings.push({
|
|
29
|
+
ruleId: "tech/sitemap-completeness",
|
|
30
|
+
severity: "warning",
|
|
31
|
+
message: `Sitemap URL ${page.url} redirects to ${page.httpMeta.finalUrl}.`,
|
|
32
|
+
pageUrl: page.url,
|
|
33
|
+
relatedUrls: [page.httpMeta.finalUrl],
|
|
34
|
+
fix: `Update sitemap.xml to use the final URL: ${page.httpMeta.finalUrl}`
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return findings;
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=sitemap-completeness.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sitemap-completeness.js","sourceRoot":"","sources":["../../../src/rules/tech/sitemap-completeness.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,uBAAuB,CACrC,KAAmB,EACnB,WAAwB;IAExB,IAAI,WAAW,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEtC,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,MAAM,kBAAkB,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACxE,IAAI,kBAAkB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClC,QAAQ,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,2BAA2B;YACnC,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,GAAG,kBAAkB,CAAC,MAAM,oCAAoC;YACzE,GAAG,EAAE,sEAAsE;YAC3E,WAAW,EAAE,kBAAkB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE;SACzD,CAAC,CAAC;IACL,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,SAAS;QAE3D,IAAI,IAAI,CAAC,QAAQ,CAAC,UAAU,IAAI,GAAG,EAAE,CAAC;YACpC,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,2BAA2B;gBACnC,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,eAAe,IAAI,CAAC,GAAG,iBAAiB,IAAI,CAAC,QAAQ,CAAC,UAAU,GAAG;gBAC5E,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,sEAAsE;aAC5E,CAAC,CAAC;QACL,CAAC;QAED,IAAI,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,KAAK,IAAI,CAAC,GAAG,EAAE,CAAC;YAClF,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,2BAA2B;gBACnC,QAAQ,EAAE,SAAS;gBACnB,OAAO,EAAE,eAAe,IAAI,CAAC,GAAG,iBAAiB,IAAI,CAAC,QAAQ,CAAC,QAAQ,GAAG;gBAC1E,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,WAAW,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;gBACrC,GAAG,EAAE,4CAA4C,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE;aAC1E,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"soft-404.d.ts","sourceRoot":"","sources":["../../../src/rules/tech/soft-404.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAI7D,wBAAgB,WAAW,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CAsB7D"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
const SOFT_404_PATTERNS = /\b(not\s*found|404|page\s*missing|does\s*not\s*exist|no\s*longer\s*available)\b/i;
|
|
2
|
+
export function soft404Rule(pages) {
|
|
3
|
+
const findings = [];
|
|
4
|
+
for (const page of pages) {
|
|
5
|
+
if (!page.httpMeta)
|
|
6
|
+
continue;
|
|
7
|
+
if (page.httpMeta.statusCode !== 200)
|
|
8
|
+
continue;
|
|
9
|
+
const wordCount = page.contentText.split(/\s+/).filter(Boolean).length;
|
|
10
|
+
if (wordCount >= 50)
|
|
11
|
+
continue;
|
|
12
|
+
if (SOFT_404_PATTERNS.test(page.title)) {
|
|
13
|
+
findings.push({
|
|
14
|
+
ruleId: "tech/soft-404",
|
|
15
|
+
severity: "error",
|
|
16
|
+
message: `${page.url} returns HTTP 200 but appears to be an error page (title: "${page.title}", ${wordCount} words).`,
|
|
17
|
+
pageUrl: page.url,
|
|
18
|
+
fix: "Return a proper HTTP 404 status code for error pages instead of 200."
|
|
19
|
+
});
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
return findings;
|
|
23
|
+
}
|
|
24
|
+
//# sourceMappingURL=soft-404.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"soft-404.js","sourceRoot":"","sources":["../../../src/rules/tech/soft-404.ts"],"names":[],"mappings":"AAEA,MAAM,iBAAiB,GAAG,kFAAkF,CAAC;AAE7G,MAAM,UAAU,WAAW,CAAC,KAAmB;IAC7C,MAAM,QAAQ,GAAiB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,QAAQ;YAAE,SAAS;QAC7B,IAAI,IAAI,CAAC,QAAQ,CAAC,UAAU,KAAK,GAAG;YAAE,SAAS;QAE/C,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;QACvE,IAAI,SAAS,IAAI,EAAE;YAAE,SAAS;QAE9B,IAAI,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACvC,QAAQ,CAAC,IAAI,CAAC;gBACZ,MAAM,EAAE,eAAe;gBACvB,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,GAAG,IAAI,CAAC,GAAG,8DAA8D,IAAI,CAAC,KAAK,MAAM,SAAS,UAAU;gBACrH,OAAO,EAAE,IAAI,CAAC,GAAG;gBACjB,GAAG,EAAE,sEAAsE;aAC5E,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
export type Severity = "info" | "warning" | "error" | "critical";
|
|
2
|
+
/** Options for `normalizeAuditUrl` (HTTP identity). */
|
|
3
|
+
export interface NormalizeUrlOptions {
|
|
4
|
+
/** When true (default), drop `?query` for URL identity. */
|
|
5
|
+
stripQuery?: boolean;
|
|
6
|
+
/** When true, strip a leading `www.` from the hostname (opt-in; can be wrong for some TLDs). */
|
|
7
|
+
stripWwwHost?: boolean;
|
|
8
|
+
}
|
|
9
|
+
export interface ParseHtmlOptions {
|
|
10
|
+
normalizeUrl?: NormalizeUrlOptions;
|
|
11
|
+
}
|
|
12
|
+
export type FixEffort = "quick" | "moderate" | "structural";
|
|
13
|
+
export type FindingContext = {
|
|
14
|
+
type: "cluster";
|
|
15
|
+
clusterSize: number;
|
|
16
|
+
members: string[];
|
|
17
|
+
worstPairs: Array<{
|
|
18
|
+
left: string;
|
|
19
|
+
right: string;
|
|
20
|
+
similarity: number;
|
|
21
|
+
}>;
|
|
22
|
+
similarityRange: [number, number];
|
|
23
|
+
} | {
|
|
24
|
+
type: "contentBreakdown";
|
|
25
|
+
sharedBlocks: Array<{
|
|
26
|
+
text: string;
|
|
27
|
+
wordCount: number;
|
|
28
|
+
}>;
|
|
29
|
+
sharedWordCount: number;
|
|
30
|
+
uniqueWordCount: number;
|
|
31
|
+
totalWordCount: number;
|
|
32
|
+
};
|
|
33
|
+
export interface RuleResult {
|
|
34
|
+
ruleId: string;
|
|
35
|
+
severity: Severity;
|
|
36
|
+
message: string;
|
|
37
|
+
/** What to do about this finding. */
|
|
38
|
+
fix?: string;
|
|
39
|
+
/** Google documentation URL backing this finding. */
|
|
40
|
+
ref?: string;
|
|
41
|
+
/** Primary page this finding refers to, when applicable. */
|
|
42
|
+
pageUrl?: string;
|
|
43
|
+
/** Other URLs involved (e.g. cluster members, related pairs). */
|
|
44
|
+
relatedUrls?: string[];
|
|
45
|
+
/** Page group this finding belongs to, if page classification is configured. */
|
|
46
|
+
group?: string;
|
|
47
|
+
/** Numeric similarity score (0-1) for pairwise rules. Used by enrichment clustering. */
|
|
48
|
+
similarity?: number;
|
|
49
|
+
/** Structured context attached by the enrichment pipeline. */
|
|
50
|
+
context?: FindingContext;
|
|
51
|
+
/** Fix effort level assigned by the enrichment pipeline. */
|
|
52
|
+
effort?: FixEffort;
|
|
53
|
+
}
|
|
54
|
+
export interface CategoryScores {
|
|
55
|
+
spam: number;
|
|
56
|
+
content: number;
|
|
57
|
+
links: number;
|
|
58
|
+
tech: number;
|
|
59
|
+
schema: number;
|
|
60
|
+
cannibal: number;
|
|
61
|
+
}
|
|
62
|
+
export interface AuditSummary {
|
|
63
|
+
score: number;
|
|
64
|
+
categoryScores: CategoryScores;
|
|
65
|
+
groupScores?: Record<string, number>;
|
|
66
|
+
groupPageCounts?: Record<string, number>;
|
|
67
|
+
pageCount: number;
|
|
68
|
+
findings: RuleResult[];
|
|
69
|
+
/** True when the enrichment pipeline detects template-generated content. */
|
|
70
|
+
templateDetected?: boolean;
|
|
71
|
+
/** Pre-enrichment finding count, for backward compatibility with CI scripts. */
|
|
72
|
+
rawFindingCount?: number;
|
|
73
|
+
}
|
|
74
|
+
export interface PageGroupConfig {
|
|
75
|
+
/** Glob pattern(s) to match page URLs/paths. */
|
|
76
|
+
match: string | string[];
|
|
77
|
+
/** Rule globs to enable for this group. If omitted, all rules apply. */
|
|
78
|
+
rules?: string[];
|
|
79
|
+
/** Per-rule threshold or severity overrides. Keys are rule IDs. */
|
|
80
|
+
overrides?: Record<string, Record<string, unknown>>;
|
|
81
|
+
}
|
|
82
|
+
export interface AuditOptions {
|
|
83
|
+
rules?: {
|
|
84
|
+
/** Passed to `normalizeAuditUrl` for crawl URLs and resolved links (default: strip query, keep www). */
|
|
85
|
+
stripUrlQuery?: boolean;
|
|
86
|
+
stripWwwHost?: boolean;
|
|
87
|
+
nearDuplicateThreshold?: number;
|
|
88
|
+
entitySwapThreshold?: number;
|
|
89
|
+
thinContentMinWords?: number;
|
|
90
|
+
publicationVelocityMaxPerDay?: number;
|
|
91
|
+
boilerplateMaxRatio?: number;
|
|
92
|
+
templateDiversityMinUniqueRatio?: number;
|
|
93
|
+
uniqueValueMinWords?: number;
|
|
94
|
+
metaUniquenessMinJaccard?: number;
|
|
95
|
+
linkDepthMaxClicks?: number;
|
|
96
|
+
/** Minimum pages in one directory before hub/index coverage is required. */
|
|
97
|
+
hubPagesMinSiblings?: number;
|
|
98
|
+
/** Skip hub/index checks when a directory has more than this many pages (e.g. large blogs). */
|
|
99
|
+
hubPagesMaxSiblings?: number;
|
|
100
|
+
titleOverlapThreshold?: number;
|
|
101
|
+
keywordCollisionMinShared?: number;
|
|
102
|
+
templateCoverageMinPages?: number;
|
|
103
|
+
};
|
|
104
|
+
/** Max parallel HTTP fetches when auditing a remote sitemap (default: 5). */
|
|
105
|
+
concurrency?: number;
|
|
106
|
+
/** Per-request timeout in milliseconds (default: 30000). */
|
|
107
|
+
timeout?: number;
|
|
108
|
+
/** Audit a random subset of N pages. 0 means all pages (default: 0). */
|
|
109
|
+
sampleSize?: number;
|
|
110
|
+
/** URL/path glob patterns to exclude from the audit. */
|
|
111
|
+
ignore?: string[];
|
|
112
|
+
crawlDiscovery?: boolean;
|
|
113
|
+
/** Page groups with per-group rule sets and threshold overrides. */
|
|
114
|
+
pageGroups?: Record<string, PageGroupConfig>;
|
|
115
|
+
/** Browser rendering options for client-rendered pages. */
|
|
116
|
+
render?: {
|
|
117
|
+
browserWsEndpoint?: string;
|
|
118
|
+
};
|
|
119
|
+
/** Override template auto-detection. When set, skips heuristic detection. */
|
|
120
|
+
templateGenerated?: boolean;
|
|
121
|
+
}
|
|
122
|
+
export interface EntityMaskPattern {
|
|
123
|
+
placeholder: string;
|
|
124
|
+
pattern: RegExp;
|
|
125
|
+
}
|
|
126
|
+
export interface HttpMeta {
|
|
127
|
+
statusCode: number;
|
|
128
|
+
finalUrl: string;
|
|
129
|
+
redirectChain: string[];
|
|
130
|
+
xRobotsTag: string;
|
|
131
|
+
linkHeader: string;
|
|
132
|
+
}
|
|
133
|
+
export interface ParsedPage {
|
|
134
|
+
url: string;
|
|
135
|
+
title: string;
|
|
136
|
+
metaDescription: string;
|
|
137
|
+
canonical: string;
|
|
138
|
+
robotsMeta: string;
|
|
139
|
+
og: {
|
|
140
|
+
title: string;
|
|
141
|
+
description: string;
|
|
142
|
+
image: string;
|
|
143
|
+
};
|
|
144
|
+
hreflangs: Array<{
|
|
145
|
+
lang: string;
|
|
146
|
+
href: string;
|
|
147
|
+
}>;
|
|
148
|
+
headings: {
|
|
149
|
+
h1: string[];
|
|
150
|
+
h2: string[];
|
|
151
|
+
};
|
|
152
|
+
/**
|
|
153
|
+
* Resolved `a[href]` targets. For HTTP(S) page URLs, only `http:` / `https:` targets are kept
|
|
154
|
+
* (`javascript:`, `data:`, etc. are dropped). For filesystem page URLs, paths are normalized.
|
|
155
|
+
*/
|
|
156
|
+
resolvedHrefs: string[];
|
|
157
|
+
publishedDate?: string;
|
|
158
|
+
structureSignature: string;
|
|
159
|
+
jsonLd: unknown[];
|
|
160
|
+
authorSignals: {
|
|
161
|
+
metaAuthor: string;
|
|
162
|
+
schemaAuthor: boolean;
|
|
163
|
+
bylineElement: boolean;
|
|
164
|
+
relAuthorLink: boolean;
|
|
165
|
+
};
|
|
166
|
+
contentText: string;
|
|
167
|
+
html: string;
|
|
168
|
+
httpMeta?: HttpMeta;
|
|
169
|
+
}
|
|
170
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,QAAQ,GAAG,MAAM,GAAG,SAAS,GAAG,OAAO,GAAG,UAAU,CAAC;AAEjE,uDAAuD;AACvD,MAAM,WAAW,mBAAmB;IAClC,2DAA2D;IAC3D,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,gGAAgG;IAChG,YAAY,CAAC,EAAE,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,gBAAgB;IAC/B,YAAY,CAAC,EAAE,mBAAmB,CAAC;CACpC;AAED,MAAM,MAAM,SAAS,GAAG,OAAO,GAAG,UAAU,GAAG,YAAY,CAAC;AAE5D,MAAM,MAAM,cAAc,GACtB;IACE,IAAI,EAAE,SAAS,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,UAAU,EAAE,KAAK,CAAC;QAChB,IAAI,EAAE,MAAM,CAAC;QACb,KAAK,EAAE,MAAM,CAAC;QACd,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC,CAAC;IACH,eAAe,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACnC,GACD;IACE,IAAI,EAAE,kBAAkB,CAAC;IACzB,YAAY,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACzD,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB,CAAC;AAEN,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,qCAAqC;IACrC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,qDAAqD;IACrD,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,4DAA4D;IAC5D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iEAAiE;IACjE,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,gFAAgF;IAChF,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,wFAAwF;IACxF,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,8DAA8D;IAC9D,OAAO,CAAC,EAAE,cAAc,CAAC;IACzB,4DAA4D;IAC5D,MAAM,CAAC,EAAE,SAAS,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,cAAc,EAAE,cAAc,CAAC;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACrC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,UAAU,EAAE,CAAC;IACvB,4EAA4E;IAC5E,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,gFAAgF;IAChF,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,eAAe;IAC9B,gDAAgD;IAChD,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IACzB,wEAAwE;IACxE,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,mEAAmE;IACnE,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;CACrD;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,CAAC,EAAE;QACN,wGAAwG;QACxG,aAAa,CAAC,EAAE,OAAO,CAAC;QACxB,YAAY,CAAC,EAAE,OAAO,CAAC;QACvB,sBAAsB,CAAC,EAAE,MAAM,CAAC;QAChC,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,4BAA4B,CAAC,EAAE,MAAM,CAAC;QACtC,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,+BAA+B,CAAC,EAAE,MAAM,CAAC;QACzC,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,wBAAwB,CAAC,EAAE,MAAM,CAAC;QAClC,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,4EAA4E;QAC5E,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,+FAA+F;QAC/F,mBAAmB,CAAC,EAAE,MAAM,CAAC;QAC7B,qBAAqB,CAAC,EAAE,MAAM,CAAC;QAC/B,yBAAyB,CAAC,EAAE,MAAM,CAAC;QACnC,wBAAwB,CAAC,EAAE,MAAM,CAAC;KACnC,CAAC;IACF,6EAA6E;IAC7E,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,4DAA4D;IAC5D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,wEAAwE;IACxE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,oEAAoE;IACpE,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IAC7C,2DAA2D;IAC3D,MAAM,CAAC,EAAE;QACP,iBAAiB,CAAC,EAAE,MAAM,CAAC;KAC5B,CAAC;IACF,6EAA6E;IAC7E,iBAAiB,CAAC,EAAE,OAAO,CAAC;CAC7B;AAED,MAAM,WAAW,iBAAiB;IAChC,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,QAAQ;IACvB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,eAAe,EAAE,MAAM,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,EAAE,EAAE;QACF,KAAK,EAAE,MAAM,CAAC;QACd,WAAW,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;IACF,SAAS,EAAE,KAAK,CAAC;QACf,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH,QAAQ,EAAE;QACR,EAAE,EAAE,MAAM,EAAE,CAAC;QACb,EAAE,EAAE,MAAM,EAAE,CAAC;KACd,CAAC;IACF;;;OAGG;IACH,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,MAAM,EAAE,OAAO,EAAE,CAAC;IAClB,aAAa,EAAE;QACb,UAAU,EAAE,MAAM,CAAC;QACnB,YAAY,EAAE,OAAO,CAAC;QACtB,aAAa,EAAE,OAAO,CAAC;QACvB,aAAa,EAAE,OAAO,CAAC;KACxB,CAAC;IACF,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,QAAQ,CAAC;CACrB"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { NormalizeUrlOptions } from "./types.js";
|
|
2
|
+
export declare function mergeNormalizeUrlOptions(options?: NormalizeUrlOptions): Required<NormalizeUrlOptions>;
|
|
3
|
+
/**
|
|
4
|
+
* Stable URL/path identity for audits: deduping crawl entries and matching edges.
|
|
5
|
+
* HTTP(S): lowercase host, strip fragment, optional query strip, optional `www.` strip,
|
|
6
|
+
* drop default ports, trim trailing slash on non-root paths.
|
|
7
|
+
* Filesystem: Node path.normalize.
|
|
8
|
+
*/
|
|
9
|
+
export declare function normalizeAuditUrl(url: string, options?: NormalizeUrlOptions): string;
|
|
10
|
+
//# sourceMappingURL=url-normalize.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url-normalize.d.ts","sourceRoot":"","sources":["../src/url-normalize.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAOtD,wBAAgB,wBAAwB,CAAC,OAAO,CAAC,EAAE,mBAAmB,GAAG,QAAQ,CAAC,mBAAmB,CAAC,CAKrG;AAED;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,mBAAmB,GAAG,MAAM,CASpF"}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { normalize } from "node:path";
|
|
2
|
+
const DEFAULTS = {
|
|
3
|
+
stripQuery: true,
|
|
4
|
+
stripWwwHost: false
|
|
5
|
+
};
|
|
6
|
+
export function mergeNormalizeUrlOptions(options) {
|
|
7
|
+
return {
|
|
8
|
+
stripQuery: options?.stripQuery ?? DEFAULTS.stripQuery,
|
|
9
|
+
stripWwwHost: options?.stripWwwHost ?? DEFAULTS.stripWwwHost
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Stable URL/path identity for audits: deduping crawl entries and matching edges.
|
|
14
|
+
* HTTP(S): lowercase host, strip fragment, optional query strip, optional `www.` strip,
|
|
15
|
+
* drop default ports, trim trailing slash on non-root paths.
|
|
16
|
+
* Filesystem: Node path.normalize.
|
|
17
|
+
*/
|
|
18
|
+
export function normalizeAuditUrl(url, options) {
|
|
19
|
+
const trimmed = url.trim();
|
|
20
|
+
if (!trimmed) {
|
|
21
|
+
return trimmed;
|
|
22
|
+
}
|
|
23
|
+
if (/^https?:\/\//i.test(trimmed)) {
|
|
24
|
+
return normalizeHttpUrl(trimmed, mergeNormalizeUrlOptions(options));
|
|
25
|
+
}
|
|
26
|
+
return normalize(trimmed);
|
|
27
|
+
}
|
|
28
|
+
function normalizeHttpUrl(url, opts) {
|
|
29
|
+
const u = new URL(url);
|
|
30
|
+
u.hash = "";
|
|
31
|
+
if (opts.stripQuery) {
|
|
32
|
+
u.search = "";
|
|
33
|
+
}
|
|
34
|
+
let hostname = u.hostname.toLowerCase();
|
|
35
|
+
if (opts.stripWwwHost && hostname.startsWith("www.")) {
|
|
36
|
+
hostname = hostname.slice(4);
|
|
37
|
+
}
|
|
38
|
+
u.hostname = hostname;
|
|
39
|
+
if (u.protocol === "http:" && u.port === "80") {
|
|
40
|
+
u.port = "";
|
|
41
|
+
}
|
|
42
|
+
if (u.protocol === "https:" && u.port === "443") {
|
|
43
|
+
u.port = "";
|
|
44
|
+
}
|
|
45
|
+
let pathname = u.pathname;
|
|
46
|
+
if (pathname.length > 1 && pathname.endsWith("/")) {
|
|
47
|
+
pathname = pathname.slice(0, -1);
|
|
48
|
+
}
|
|
49
|
+
u.pathname = pathname || "/";
|
|
50
|
+
return u.href;
|
|
51
|
+
}
|
|
52
|
+
//# sourceMappingURL=url-normalize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"url-normalize.js","sourceRoot":"","sources":["../src/url-normalize.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAGtC,MAAM,QAAQ,GAAkC;IAC9C,UAAU,EAAE,IAAI;IAChB,YAAY,EAAE,KAAK;CACpB,CAAC;AAEF,MAAM,UAAU,wBAAwB,CAAC,OAA6B;IACpE,OAAO;QACL,UAAU,EAAE,OAAO,EAAE,UAAU,IAAI,QAAQ,CAAC,UAAU;QACtD,YAAY,EAAE,OAAO,EAAE,YAAY,IAAI,QAAQ,CAAC,YAAY;KAC7D,CAAC;AACJ,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAAC,GAAW,EAAE,OAA6B;IAC1E,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;IAC3B,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,OAAO,CAAC;IACjB,CAAC;IACD,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,OAAO,gBAAgB,CAAC,OAAO,EAAE,wBAAwB,CAAC,OAAO,CAAC,CAAC,CAAC;IACtE,CAAC;IACD,OAAO,SAAS,CAAC,OAAO,CAAC,CAAC;AAC5B,CAAC;AAED,SAAS,gBAAgB,CAAC,GAAW,EAAE,IAAmC;IACxE,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACvB,CAAC,CAAC,IAAI,GAAG,EAAE,CAAC;IACZ,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;QACpB,CAAC,CAAC,MAAM,GAAG,EAAE,CAAC;IAChB,CAAC;IACD,IAAI,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IACxC,IAAI,IAAI,CAAC,YAAY,IAAI,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QACrD,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC;IACD,CAAC,CAAC,QAAQ,GAAG,QAAQ,CAAC;IACtB,IAAI,CAAC,CAAC,QAAQ,KAAK,OAAO,IAAI,CAAC,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;QAC9C,CAAC,CAAC,IAAI,GAAG,EAAE,CAAC;IACd,CAAC;IACD,IAAI,CAAC,CAAC,QAAQ,KAAK,QAAQ,IAAI,CAAC,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;QAChD,CAAC,CAAC,IAAI,GAAG,EAAE,CAAC;IACd,CAAC;IACD,IAAI,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;IAC1B,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAClD,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACnC,CAAC;IACD,CAAC,CAAC,QAAQ,GAAG,QAAQ,IAAI,GAAG,CAAC;IAC7B,OAAO,CAAC,CAAC,IAAI,CAAC;AAChB,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@pseolint/core",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Programmatic SEO audit engine — 34 rules across 6 categories for SpamBrain risk detection",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"author": "Ouranos Labs <contact@ouranos-labs.dev>",
|
|
7
|
+
"homepage": "https://pseolint.dev",
|
|
8
|
+
"repository": {
|
|
9
|
+
"type": "git",
|
|
10
|
+
"url": "https://github.com/ouranos-labs/pseolint.git",
|
|
11
|
+
"directory": "packages/core"
|
|
12
|
+
},
|
|
13
|
+
"keywords": ["seo", "pseo", "programmatic-seo", "spambrain", "lint", "audit"],
|
|
14
|
+
"type": "module",
|
|
15
|
+
"exports": {
|
|
16
|
+
".": {
|
|
17
|
+
"import": "./dist/index.js",
|
|
18
|
+
"types": "./dist/index.d.ts"
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"main": "dist/index.js",
|
|
22
|
+
"types": "dist/index.d.ts",
|
|
23
|
+
"engines": {
|
|
24
|
+
"node": ">=18"
|
|
25
|
+
},
|
|
26
|
+
"files": [
|
|
27
|
+
"dist"
|
|
28
|
+
],
|
|
29
|
+
"scripts": {
|
|
30
|
+
"build": "tsc -p tsconfig.json",
|
|
31
|
+
"lint": "tsc --noEmit -p tsconfig.json",
|
|
32
|
+
"test": "vitest run --passWithNoTests tests/**/*.test.ts",
|
|
33
|
+
"typecheck": "tsc --noEmit -p tsconfig.json"
|
|
34
|
+
},
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"cheerio": "^1.2.0"
|
|
37
|
+
},
|
|
38
|
+
"peerDependencies": {
|
|
39
|
+
"playwright-core": ">=1.40.0"
|
|
40
|
+
},
|
|
41
|
+
"peerDependenciesMeta": {
|
|
42
|
+
"playwright-core": {
|
|
43
|
+
"optional": true
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|