@djangocfg/seo 2.1.109 → 2.1.110

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@djangocfg/seo",
3
- "version": "2.1.109",
3
+ "version": "2.1.110",
4
4
  "description": "SEO analytics and indexing diagnostics module with Google Search Console integration and AI-ready reports",
5
5
  "keywords": [
6
6
  "seo",
@@ -92,7 +92,7 @@
92
92
  "google-auth-library": "^9.15.1",
93
93
  "chalk": "^5.3.0",
94
94
  "consola": "^3.4.2",
95
- "cheerio": "^1.0.0",
95
+ "linkedom": "^0.18.12",
96
96
  "linkinator": "^7.5.0",
97
97
  "p-limit": "^6.2.0",
98
98
  "p-retry": "^7.0.0",
@@ -101,7 +101,7 @@
101
101
  "robots-parser": "^3.0.1"
102
102
  },
103
103
  "devDependencies": {
104
- "@djangocfg/typescript-config": "^2.1.109",
104
+ "@djangocfg/typescript-config": "^2.1.110",
105
105
  "@types/node": "^24.7.2",
106
106
  "tsup": "^8.5.0",
107
107
  "tsx": "^4.19.2",
@@ -3,7 +3,7 @@
3
3
  * Internal site crawler for SEO analysis
4
4
  */
5
5
 
6
- import { load } from 'cheerio';
6
+ import { parseHTML } from 'linkedom';
7
7
  import pLimit from 'p-limit';
8
8
  import consola from 'consola';
9
9
  import type { CrawlResult, CrawlerConfig, SeoIssue } from '../types/index.js';
@@ -139,10 +139,11 @@ export class SiteCrawler {
139
139
  * Parse HTML and extract SEO-relevant data
140
140
  */
141
141
  private parseHtml(html: string, result: CrawlResult, pageUrl: string, depth: number): void {
142
- const $ = load(html);
142
+ const { document } = parseHTML(html);
143
143
 
144
144
  // Title
145
- result.title = $('title').first().text().trim() || undefined;
145
+ const titleEl = document.querySelector('title');
146
+ result.title = titleEl?.textContent?.trim() || undefined;
146
147
  if (!result.title) {
147
148
  result.warnings.push('Missing title tag');
148
149
  } else if (result.title.length > 60) {
@@ -150,8 +151,8 @@ export class SiteCrawler {
150
151
  }
151
152
 
152
153
  // Meta description
153
- result.metaDescription =
154
- $('meta[name="description"]').attr('content')?.trim() || undefined;
154
+ const metaDesc = document.querySelector('meta[name="description"]');
155
+ result.metaDescription = metaDesc?.getAttribute('content')?.trim() || undefined;
155
156
  if (!result.metaDescription) {
156
157
  result.warnings.push('Missing meta description');
157
158
  } else if (result.metaDescription.length > 160) {
@@ -161,25 +162,24 @@ export class SiteCrawler {
161
162
  }
162
163
 
163
164
  // Meta robots
164
- result.metaRobots = $('meta[name="robots"]').attr('content')?.trim() || undefined;
165
- const xRobots = $('meta[http-equiv="X-Robots-Tag"]').attr('content')?.trim();
166
- if (xRobots) {
167
- result.metaRobots = result.metaRobots ? `${result.metaRobots}, ${xRobots}` : xRobots;
165
+ const metaRobots = document.querySelector('meta[name="robots"]');
166
+ result.metaRobots = metaRobots?.getAttribute('content')?.trim() || undefined;
167
+ const xRobots = document.querySelector('meta[http-equiv="X-Robots-Tag"]');
168
+ const xRobotsContent = xRobots?.getAttribute('content')?.trim();
169
+ if (xRobotsContent) {
170
+ result.metaRobots = result.metaRobots ? `${result.metaRobots}, ${xRobotsContent}` : xRobotsContent;
168
171
  }
169
172
 
170
173
  // Canonical
171
- result.canonicalUrl = $('link[rel="canonical"]').attr('href')?.trim() || undefined;
174
+ const canonical = document.querySelector('link[rel="canonical"]');
175
+ result.canonicalUrl = canonical?.getAttribute('href')?.trim() || undefined;
172
176
  if (!result.canonicalUrl) {
173
177
  result.warnings.push('Missing canonical tag');
174
178
  }
175
179
 
176
180
  // Headings
177
- result.h1 = $('h1')
178
- .map((_, el) => $(el).text().trim())
179
- .get();
180
- result.h2 = $('h2')
181
- .map((_, el) => $(el).text().trim())
182
- .get();
181
+ result.h1 = Array.from(document.querySelectorAll('h1')).map((el) => (el as { textContent?: string | null }).textContent?.trim() || '');
182
+ result.h2 = Array.from(document.querySelectorAll('h2')).map((el) => (el as { textContent?: string | null }).textContent?.trim() || '');
183
183
 
184
184
  if (result.h1.length === 0) {
185
185
  result.warnings.push('Missing H1 tag');
@@ -188,9 +188,9 @@ export class SiteCrawler {
188
188
  }
189
189
 
190
190
  // Links
191
- $('a[href]').each((_, el) => {
192
- const href = $(el).attr('href');
193
- if (!href) return;
191
+ for (const el of document.querySelectorAll('a[href]')) {
192
+ const href = el.getAttribute('href');
193
+ if (!href) continue;
194
194
 
195
195
  try {
196
196
  const linkUrl = new URL(href, pageUrl);
@@ -209,21 +209,22 @@ export class SiteCrawler {
209
209
  } catch {
210
210
  // Invalid URL, skip
211
211
  }
212
- });
212
+ }
213
213
 
214
214
  // Images
215
- $('img').each((_, el) => {
216
- const src = $(el).attr('src');
217
- const alt = $(el).attr('alt');
215
+ for (const el of document.querySelectorAll('img')) {
216
+ const src = el.getAttribute('src');
217
+ const alt = el.getAttribute('alt');
218
+ const hasAltAttr = alt !== null;
218
219
 
219
220
  if (src) {
220
221
  result.images.push({
221
222
  src,
222
- alt,
223
- hasAlt: alt !== undefined && alt.trim().length > 0,
223
+ alt: alt ?? undefined,
224
+ hasAlt: hasAltAttr && alt.trim().length > 0,
224
225
  });
225
226
  }
226
- });
227
+ }
227
228
 
228
229
  const imagesWithoutAlt = result.images.filter((img) => !img.hasAlt);
229
230
  if (imagesWithoutAlt.length > 0) {
@@ -3,7 +3,7 @@
3
3
  * Validate XML sitemaps
4
4
  */
5
5
 
6
- import { load } from 'cheerio';
6
+ import { DOMParser } from 'linkedom';
7
7
  import consola from 'consola';
8
8
  import type { SeoIssue } from '../types/index.js';
9
9
 
@@ -72,34 +72,37 @@ export async function analyzeSitemap(sitemapUrl: string): Promise<SitemapAnalysi
72
72
  }
73
73
 
74
74
  // Parse XML
75
- const $ = load(content, { xmlMode: true });
75
+ const parser = new DOMParser();
76
+ const doc = parser.parseFromString(content, 'text/xml');
76
77
 
77
78
  // Check if it's a sitemap index
78
- const sitemapIndex = $('sitemapindex');
79
- if (sitemapIndex.length > 0) {
79
+ const sitemapIndex = doc.querySelector('sitemapindex');
80
+ if (sitemapIndex) {
80
81
  analysis.type = 'sitemap-index';
81
82
 
82
- $('sitemap').each((_, el) => {
83
- const loc = $('loc', el).text().trim();
83
+ for (const sitemap of doc.querySelectorAll('sitemap')) {
84
+ const loc = sitemap.querySelector('loc')?.textContent?.trim();
84
85
  if (loc) {
85
86
  analysis.childSitemaps.push(loc);
86
87
  }
87
- });
88
+ }
88
89
 
89
90
  consola.debug(`Sitemap index contains ${analysis.childSitemaps.length} sitemaps`);
90
91
  } else {
91
92
  analysis.type = 'sitemap';
92
93
 
93
- $('url').each((_, el) => {
94
- const loc = $('loc', el).text().trim();
94
+ for (const url of doc.querySelectorAll('url')) {
95
+ const loc = url.querySelector('loc')?.textContent?.trim();
95
96
  if (loc) {
96
97
  analysis.urls.push(loc);
97
98
  }
98
- });
99
-
100
- const lastmod = $('url lastmod').first().text().trim();
101
- if (lastmod) {
102
- analysis.lastmod = lastmod;
99
+ // Get lastmod from first URL
100
+ if (!analysis.lastmod) {
101
+ const lastmod = url.querySelector('lastmod')?.textContent?.trim();
102
+ if (lastmod) {
103
+ analysis.lastmod = lastmod;
104
+ }
105
+ }
103
106
  }
104
107
 
105
108
  consola.debug(`Sitemap contains ${analysis.urls.length} URLs`);