npm - @djangocfg/seo - Versions diffs - 2.1.109 → 2.1.110 - Mend

@djangocfg/seo 2.1.109 → 2.1.110

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/cli.mjs +42 -33
package/dist/cli.mjs.map +1 -1
package/dist/crawler/index.mjs +42 -33
package/dist/crawler/index.mjs.map +1 -1
package/dist/index.mjs +42 -33
package/dist/index.mjs.map +1 -1
package/package.json +3 -3
package/src/crawler/crawler.ts +27 -26
package/src/crawler/sitemap-validator.ts +17 -14

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@djangocfg/seo",
-  "version": "2.1.109",
+  "version": "2.1.110",
   "description": "SEO analytics and indexing diagnostics module with Google Search Console integration and AI-ready reports",
   "keywords": [
     "seo",
@@ -92,7 +92,7 @@
     "google-auth-library": "^9.15.1",
     "chalk": "^5.3.0",
     "consola": "^3.4.2",
-    "cheerio": "^1.0.0",
+    "linkedom": "^0.18.12",
     "linkinator": "^7.5.0",
     "p-limit": "^6.2.0",
     "p-retry": "^7.0.0",
@@ -101,7 +101,7 @@
     "robots-parser": "^3.0.1"
   },
   "devDependencies": {
-    "@djangocfg/typescript-config": "^2.1.109",
+    "@djangocfg/typescript-config": "^2.1.110",
     "@types/node": "^24.7.2",
     "tsup": "^8.5.0",
     "tsx": "^4.19.2",

package/src/crawler/crawler.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  * Internal site crawler for SEO analysis
  */
-import { load } from 'cheerio';
+import { parseHTML } from 'linkedom';
 import pLimit from 'p-limit';
 import consola from 'consola';
 import type { CrawlResult, CrawlerConfig, SeoIssue } from '../types/index.js';
@@ -139,10 +139,11 @@ export class SiteCrawler {
    * Parse HTML and extract SEO-relevant data
    */
   private parseHtml(html: string, result: CrawlResult, pageUrl: string, depth: number): void {
-    const $ = load(html);
+    const { document } = parseHTML(html);
     // Title
-    result.title = $('title').first().text().trim() || undefined;
+    const titleEl = document.querySelector('title');
+    result.title = titleEl?.textContent?.trim() || undefined;
     if (!result.title) {
       result.warnings.push('Missing title tag');
     } else if (result.title.length > 60) {
@@ -150,8 +151,8 @@ export class SiteCrawler {
     }
     // Meta description
-    result.metaDescription =
-      $('meta[name="description"]').attr('content')?.trim() || undefined;
+    const metaDesc = document.querySelector('meta[name="description"]');
+    result.metaDescription = metaDesc?.getAttribute('content')?.trim() || undefined;
     if (!result.metaDescription) {
       result.warnings.push('Missing meta description');
     } else if (result.metaDescription.length > 160) {
@@ -161,25 +162,24 @@ export class SiteCrawler {
     }
     // Meta robots
-    result.metaRobots = $('meta[name="robots"]').attr('content')?.trim() || undefined;
-    const xRobots = $('meta[http-equiv="X-Robots-Tag"]').attr('content')?.trim();
-    if (xRobots) {
-      result.metaRobots = result.metaRobots ? `${result.metaRobots}, ${xRobots}` : xRobots;
+    const metaRobots = document.querySelector('meta[name="robots"]');
+    result.metaRobots = metaRobots?.getAttribute('content')?.trim() || undefined;
+    const xRobots = document.querySelector('meta[http-equiv="X-Robots-Tag"]');
+    const xRobotsContent = xRobots?.getAttribute('content')?.trim();
+    if (xRobotsContent) {
+      result.metaRobots = result.metaRobots ? `${result.metaRobots}, ${xRobotsContent}` : xRobotsContent;
     }
     // Canonical
-    result.canonicalUrl = $('link[rel="canonical"]').attr('href')?.trim() || undefined;
+    const canonical = document.querySelector('link[rel="canonical"]');
+    result.canonicalUrl = canonical?.getAttribute('href')?.trim() || undefined;
     if (!result.canonicalUrl) {
       result.warnings.push('Missing canonical tag');
     }
     // Headings
-    result.h1 = $('h1')
-      .map((_, el) => $(el).text().trim())
-      .get();
-    result.h2 = $('h2')
-      .map((_, el) => $(el).text().trim())
-      .get();
+    result.h1 = Array.from(document.querySelectorAll('h1')).map((el) => (el as { textContent?: string | null }).textContent?.trim() || '');
+    result.h2 = Array.from(document.querySelectorAll('h2')).map((el) => (el as { textContent?: string | null }).textContent?.trim() || '');
     if (result.h1.length === 0) {
       result.warnings.push('Missing H1 tag');
@@ -188,9 +188,9 @@ export class SiteCrawler {
     }
     // Links
-    $('a[href]').each((_, el) => {
-      const href = $(el).attr('href');
-      if (!href) return;
+    for (const el of document.querySelectorAll('a[href]')) {
+      const href = el.getAttribute('href');
+      if (!href) continue;
       try {
         const linkUrl = new URL(href, pageUrl);
@@ -209,21 +209,22 @@ export class SiteCrawler {
       } catch {
         // Invalid URL, skip
       }
-    });
+    }
     // Images
-    $('img').each((_, el) => {
-      const src = $(el).attr('src');
-      const alt = $(el).attr('alt');
+    for (const el of document.querySelectorAll('img')) {
+      const src = el.getAttribute('src');
+      const alt = el.getAttribute('alt');
+      const hasAltAttr = alt !== null;
       if (src) {
         result.images.push({
           src,
-          alt,
-          hasAlt: alt !== undefined && alt.trim().length > 0,
+          alt: alt ?? undefined,
+          hasAlt: hasAltAttr && alt.trim().length > 0,
         });
       }
-    });
+    }
     const imagesWithoutAlt = result.images.filter((img) => !img.hasAlt);
     if (imagesWithoutAlt.length > 0) {

package/src/crawler/sitemap-validator.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  * Validate XML sitemaps
  */
-import { load } from 'cheerio';
+import { DOMParser } from 'linkedom';
 import consola from 'consola';
 import type { SeoIssue } from '../types/index.js';
@@ -72,34 +72,37 @@ export async function analyzeSitemap(sitemapUrl: string): Promise<SitemapAnalysi
     }
     // Parse XML
-    const $ = load(content, { xmlMode: true });
+    const parser = new DOMParser();
+    const doc = parser.parseFromString(content, 'text/xml');
     // Check if it's a sitemap index
-    const sitemapIndex = $('sitemapindex');
-    if (sitemapIndex.length > 0) {
+    const sitemapIndex = doc.querySelector('sitemapindex');
+    if (sitemapIndex) {
       analysis.type = 'sitemap-index';
-      $('sitemap').each((_, el) => {
-        const loc = $('loc', el).text().trim();
+      for (const sitemap of doc.querySelectorAll('sitemap')) {
+        const loc = sitemap.querySelector('loc')?.textContent?.trim();
         if (loc) {
           analysis.childSitemaps.push(loc);
         }
-      });
+      }
       consola.debug(`Sitemap index contains ${analysis.childSitemaps.length} sitemaps`);
     } else {
       analysis.type = 'sitemap';
-      $('url').each((_, el) => {
-        const loc = $('loc', el).text().trim();
+      for (const url of doc.querySelectorAll('url')) {
+        const loc = url.querySelector('loc')?.textContent?.trim();
         if (loc) {
           analysis.urls.push(loc);
         }
-      });
-      const lastmod = $('url lastmod').first().text().trim();
-      if (lastmod) {
-        analysis.lastmod = lastmod;
+        // Get lastmod from first URL
+        if (!analysis.lastmod) {
+          const lastmod = url.querySelector('lastmod')?.textContent?.trim();
+          if (lastmod) {
+            analysis.lastmod = lastmod;
+          }
+        }
       }
       consola.debug(`Sitemap contains ${analysis.urls.length} URLs`);