@djangocfg/seo 2.1.109 → 2.1.110
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +42 -33
- package/dist/cli.mjs.map +1 -1
- package/dist/crawler/index.mjs +42 -33
- package/dist/crawler/index.mjs.map +1 -1
- package/dist/index.mjs +42 -33
- package/dist/index.mjs.map +1 -1
- package/package.json +3 -3
- package/src/crawler/crawler.ts +27 -26
- package/src/crawler/sitemap-validator.ts +17 -14
package/dist/cli.mjs
CHANGED
|
@@ -8,7 +8,7 @@ import { searchconsole } from '@googleapis/searchconsole';
|
|
|
8
8
|
import pLimit from 'p-limit';
|
|
9
9
|
import pRetry from 'p-retry';
|
|
10
10
|
import { JWT } from 'google-auth-library';
|
|
11
|
-
import {
|
|
11
|
+
import { parseHTML, DOMParser } from 'linkedom';
|
|
12
12
|
import robotsParser from 'robots-parser';
|
|
13
13
|
import { mkdir, writeFile } from 'fs/promises';
|
|
14
14
|
import * as linkinator from 'linkinator';
|
|
@@ -753,14 +753,16 @@ var SiteCrawler = class {
|
|
|
753
753
|
* Parse HTML and extract SEO-relevant data
|
|
754
754
|
*/
|
|
755
755
|
parseHtml(html, result, pageUrl, depth) {
|
|
756
|
-
const
|
|
757
|
-
|
|
756
|
+
const { document } = parseHTML(html);
|
|
757
|
+
const titleEl = document.querySelector("title");
|
|
758
|
+
result.title = titleEl?.textContent?.trim() || void 0;
|
|
758
759
|
if (!result.title) {
|
|
759
760
|
result.warnings.push("Missing title tag");
|
|
760
761
|
} else if (result.title.length > 60) {
|
|
761
762
|
result.warnings.push(`Title too long (${result.title.length} chars, recommended: <60)`);
|
|
762
763
|
}
|
|
763
|
-
|
|
764
|
+
const metaDesc = document.querySelector('meta[name="description"]');
|
|
765
|
+
result.metaDescription = metaDesc?.getAttribute("content")?.trim() || void 0;
|
|
764
766
|
if (!result.metaDescription) {
|
|
765
767
|
result.warnings.push("Missing meta description");
|
|
766
768
|
} else if (result.metaDescription.length > 160) {
|
|
@@ -768,25 +770,28 @@ var SiteCrawler = class {
|
|
|
768
770
|
`Meta description too long (${result.metaDescription.length} chars, recommended: <160)`
|
|
769
771
|
);
|
|
770
772
|
}
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
773
|
+
const metaRobots = document.querySelector('meta[name="robots"]');
|
|
774
|
+
result.metaRobots = metaRobots?.getAttribute("content")?.trim() || void 0;
|
|
775
|
+
const xRobots = document.querySelector('meta[http-equiv="X-Robots-Tag"]');
|
|
776
|
+
const xRobotsContent = xRobots?.getAttribute("content")?.trim();
|
|
777
|
+
if (xRobotsContent) {
|
|
778
|
+
result.metaRobots = result.metaRobots ? `${result.metaRobots}, ${xRobotsContent}` : xRobotsContent;
|
|
775
779
|
}
|
|
776
|
-
|
|
780
|
+
const canonical = document.querySelector('link[rel="canonical"]');
|
|
781
|
+
result.canonicalUrl = canonical?.getAttribute("href")?.trim() || void 0;
|
|
777
782
|
if (!result.canonicalUrl) {
|
|
778
783
|
result.warnings.push("Missing canonical tag");
|
|
779
784
|
}
|
|
780
|
-
result.h1 =
|
|
781
|
-
result.h2 =
|
|
785
|
+
result.h1 = Array.from(document.querySelectorAll("h1")).map((el) => el.textContent?.trim() || "");
|
|
786
|
+
result.h2 = Array.from(document.querySelectorAll("h2")).map((el) => el.textContent?.trim() || "");
|
|
782
787
|
if (result.h1.length === 0) {
|
|
783
788
|
result.warnings.push("Missing H1 tag");
|
|
784
789
|
} else if (result.h1.length > 1) {
|
|
785
790
|
result.warnings.push(`Multiple H1 tags (${result.h1.length})`);
|
|
786
791
|
}
|
|
787
|
-
|
|
788
|
-
const href =
|
|
789
|
-
if (!href)
|
|
792
|
+
for (const el of document.querySelectorAll("a[href]")) {
|
|
793
|
+
const href = el.getAttribute("href");
|
|
794
|
+
if (!href) continue;
|
|
790
795
|
try {
|
|
791
796
|
const linkUrl = new URL(href, pageUrl);
|
|
792
797
|
if (linkUrl.hostname === this.baseUrl.hostname) {
|
|
@@ -800,18 +805,19 @@ var SiteCrawler = class {
|
|
|
800
805
|
}
|
|
801
806
|
} catch {
|
|
802
807
|
}
|
|
803
|
-
}
|
|
804
|
-
|
|
805
|
-
const src =
|
|
806
|
-
const alt =
|
|
808
|
+
}
|
|
809
|
+
for (const el of document.querySelectorAll("img")) {
|
|
810
|
+
const src = el.getAttribute("src");
|
|
811
|
+
const alt = el.getAttribute("alt");
|
|
812
|
+
const hasAltAttr = alt !== null;
|
|
807
813
|
if (src) {
|
|
808
814
|
result.images.push({
|
|
809
815
|
src,
|
|
810
|
-
alt,
|
|
811
|
-
hasAlt:
|
|
816
|
+
alt: alt ?? void 0,
|
|
817
|
+
hasAlt: hasAltAttr && alt.trim().length > 0
|
|
812
818
|
});
|
|
813
819
|
}
|
|
814
|
-
}
|
|
820
|
+
}
|
|
815
821
|
const imagesWithoutAlt = result.images.filter((img) => !img.hasAlt);
|
|
816
822
|
if (imagesWithoutAlt.length > 0) {
|
|
817
823
|
result.warnings.push(`${imagesWithoutAlt.length} images without alt text`);
|
|
@@ -1145,28 +1151,31 @@ async function analyzeSitemap(sitemapUrl) {
|
|
|
1145
1151
|
metadata: { contentType }
|
|
1146
1152
|
});
|
|
1147
1153
|
}
|
|
1148
|
-
const
|
|
1149
|
-
const
|
|
1150
|
-
|
|
1154
|
+
const parser = new DOMParser();
|
|
1155
|
+
const doc = parser.parseFromString(content, "text/xml");
|
|
1156
|
+
const sitemapIndex = doc.querySelector("sitemapindex");
|
|
1157
|
+
if (sitemapIndex) {
|
|
1151
1158
|
analysis.type = "sitemap-index";
|
|
1152
|
-
|
|
1153
|
-
const loc =
|
|
1159
|
+
for (const sitemap of doc.querySelectorAll("sitemap")) {
|
|
1160
|
+
const loc = sitemap.querySelector("loc")?.textContent?.trim();
|
|
1154
1161
|
if (loc) {
|
|
1155
1162
|
analysis.childSitemaps.push(loc);
|
|
1156
1163
|
}
|
|
1157
|
-
}
|
|
1164
|
+
}
|
|
1158
1165
|
consola3.debug(`Sitemap index contains ${analysis.childSitemaps.length} sitemaps`);
|
|
1159
1166
|
} else {
|
|
1160
1167
|
analysis.type = "sitemap";
|
|
1161
|
-
|
|
1162
|
-
const loc =
|
|
1168
|
+
for (const url of doc.querySelectorAll("url")) {
|
|
1169
|
+
const loc = url.querySelector("loc")?.textContent?.trim();
|
|
1163
1170
|
if (loc) {
|
|
1164
1171
|
analysis.urls.push(loc);
|
|
1165
1172
|
}
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1173
|
+
if (!analysis.lastmod) {
|
|
1174
|
+
const lastmod = url.querySelector("lastmod")?.textContent?.trim();
|
|
1175
|
+
if (lastmod) {
|
|
1176
|
+
analysis.lastmod = lastmod;
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1170
1179
|
}
|
|
1171
1180
|
consola3.debug(`Sitemap contains ${analysis.urls.length} URLs`);
|
|
1172
1181
|
}
|