@djangocfg/seo 2.1.109 → 2.1.110

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -8,7 +8,7 @@ import { searchconsole } from '@googleapis/searchconsole';
8
8
  import pLimit from 'p-limit';
9
9
  import pRetry from 'p-retry';
10
10
  import { JWT } from 'google-auth-library';
11
- import { load } from 'cheerio';
11
+ import { parseHTML, DOMParser } from 'linkedom';
12
12
  import robotsParser from 'robots-parser';
13
13
  import { mkdir, writeFile } from 'fs/promises';
14
14
  import * as linkinator from 'linkinator';
@@ -753,14 +753,16 @@ var SiteCrawler = class {
753
753
  * Parse HTML and extract SEO-relevant data
754
754
  */
755
755
  parseHtml(html, result, pageUrl, depth) {
756
- const $ = load(html);
757
- result.title = $("title").first().text().trim() || void 0;
756
+ const { document } = parseHTML(html);
757
+ const titleEl = document.querySelector("title");
758
+ result.title = titleEl?.textContent?.trim() || void 0;
758
759
  if (!result.title) {
759
760
  result.warnings.push("Missing title tag");
760
761
  } else if (result.title.length > 60) {
761
762
  result.warnings.push(`Title too long (${result.title.length} chars, recommended: <60)`);
762
763
  }
763
- result.metaDescription = $('meta[name="description"]').attr("content")?.trim() || void 0;
764
+ const metaDesc = document.querySelector('meta[name="description"]');
765
+ result.metaDescription = metaDesc?.getAttribute("content")?.trim() || void 0;
764
766
  if (!result.metaDescription) {
765
767
  result.warnings.push("Missing meta description");
766
768
  } else if (result.metaDescription.length > 160) {
@@ -768,25 +770,28 @@ var SiteCrawler = class {
768
770
  `Meta description too long (${result.metaDescription.length} chars, recommended: <160)`
769
771
  );
770
772
  }
771
- result.metaRobots = $('meta[name="robots"]').attr("content")?.trim() || void 0;
772
- const xRobots = $('meta[http-equiv="X-Robots-Tag"]').attr("content")?.trim();
773
- if (xRobots) {
774
- result.metaRobots = result.metaRobots ? `${result.metaRobots}, ${xRobots}` : xRobots;
773
+ const metaRobots = document.querySelector('meta[name="robots"]');
774
+ result.metaRobots = metaRobots?.getAttribute("content")?.trim() || void 0;
775
+ const xRobots = document.querySelector('meta[http-equiv="X-Robots-Tag"]');
776
+ const xRobotsContent = xRobots?.getAttribute("content")?.trim();
777
+ if (xRobotsContent) {
778
+ result.metaRobots = result.metaRobots ? `${result.metaRobots}, ${xRobotsContent}` : xRobotsContent;
775
779
  }
776
- result.canonicalUrl = $('link[rel="canonical"]').attr("href")?.trim() || void 0;
780
+ const canonical = document.querySelector('link[rel="canonical"]');
781
+ result.canonicalUrl = canonical?.getAttribute("href")?.trim() || void 0;
777
782
  if (!result.canonicalUrl) {
778
783
  result.warnings.push("Missing canonical tag");
779
784
  }
780
- result.h1 = $("h1").map((_, el) => $(el).text().trim()).get();
781
- result.h2 = $("h2").map((_, el) => $(el).text().trim()).get();
785
+ result.h1 = Array.from(document.querySelectorAll("h1")).map((el) => el.textContent?.trim() || "");
786
+ result.h2 = Array.from(document.querySelectorAll("h2")).map((el) => el.textContent?.trim() || "");
782
787
  if (result.h1.length === 0) {
783
788
  result.warnings.push("Missing H1 tag");
784
789
  } else if (result.h1.length > 1) {
785
790
  result.warnings.push(`Multiple H1 tags (${result.h1.length})`);
786
791
  }
787
- $("a[href]").each((_, el) => {
788
- const href = $(el).attr("href");
789
- if (!href) return;
792
+ for (const el of document.querySelectorAll("a[href]")) {
793
+ const href = el.getAttribute("href");
794
+ if (!href) continue;
790
795
  try {
791
796
  const linkUrl = new URL(href, pageUrl);
792
797
  if (linkUrl.hostname === this.baseUrl.hostname) {
@@ -800,18 +805,19 @@ var SiteCrawler = class {
800
805
  }
801
806
  } catch {
802
807
  }
803
- });
804
- $("img").each((_, el) => {
805
- const src = $(el).attr("src");
806
- const alt = $(el).attr("alt");
808
+ }
809
+ for (const el of document.querySelectorAll("img")) {
810
+ const src = el.getAttribute("src");
811
+ const alt = el.getAttribute("alt");
812
+ const hasAltAttr = alt !== null;
807
813
  if (src) {
808
814
  result.images.push({
809
815
  src,
810
- alt,
811
- hasAlt: alt !== void 0 && alt.trim().length > 0
816
+ alt: alt ?? void 0,
817
+ hasAlt: hasAltAttr && alt.trim().length > 0
812
818
  });
813
819
  }
814
- });
820
+ }
815
821
  const imagesWithoutAlt = result.images.filter((img) => !img.hasAlt);
816
822
  if (imagesWithoutAlt.length > 0) {
817
823
  result.warnings.push(`${imagesWithoutAlt.length} images without alt text`);
@@ -1145,28 +1151,31 @@ async function analyzeSitemap(sitemapUrl) {
1145
1151
  metadata: { contentType }
1146
1152
  });
1147
1153
  }
1148
- const $ = load(content, { xmlMode: true });
1149
- const sitemapIndex = $("sitemapindex");
1150
- if (sitemapIndex.length > 0) {
1154
+ const parser = new DOMParser();
1155
+ const doc = parser.parseFromString(content, "text/xml");
1156
+ const sitemapIndex = doc.querySelector("sitemapindex");
1157
+ if (sitemapIndex) {
1151
1158
  analysis.type = "sitemap-index";
1152
- $("sitemap").each((_, el) => {
1153
- const loc = $("loc", el).text().trim();
1159
+ for (const sitemap of doc.querySelectorAll("sitemap")) {
1160
+ const loc = sitemap.querySelector("loc")?.textContent?.trim();
1154
1161
  if (loc) {
1155
1162
  analysis.childSitemaps.push(loc);
1156
1163
  }
1157
- });
1164
+ }
1158
1165
  consola3.debug(`Sitemap index contains ${analysis.childSitemaps.length} sitemaps`);
1159
1166
  } else {
1160
1167
  analysis.type = "sitemap";
1161
- $("url").each((_, el) => {
1162
- const loc = $("loc", el).text().trim();
1168
+ for (const url of doc.querySelectorAll("url")) {
1169
+ const loc = url.querySelector("loc")?.textContent?.trim();
1163
1170
  if (loc) {
1164
1171
  analysis.urls.push(loc);
1165
1172
  }
1166
- });
1167
- const lastmod = $("url lastmod").first().text().trim();
1168
- if (lastmod) {
1169
- analysis.lastmod = lastmod;
1173
+ if (!analysis.lastmod) {
1174
+ const lastmod = url.querySelector("lastmod")?.textContent?.trim();
1175
+ if (lastmod) {
1176
+ analysis.lastmod = lastmod;
1177
+ }
1178
+ }
1170
1179
  }
1171
1180
  consola3.debug(`Sitemap contains ${analysis.urls.length} URLs`);
1172
1181
  }