@djangocfg/seo 2.1.109 → 2.1.110
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +42 -33
- package/dist/cli.mjs.map +1 -1
- package/dist/crawler/index.mjs +42 -33
- package/dist/crawler/index.mjs.map +1 -1
- package/dist/index.mjs +42 -33
- package/dist/index.mjs.map +1 -1
- package/package.json +3 -3
- package/src/crawler/crawler.ts +27 -26
- package/src/crawler/sitemap-validator.ts +17 -14
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@djangocfg/seo",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.110",
|
|
4
4
|
"description": "SEO analytics and indexing diagnostics module with Google Search Console integration and AI-ready reports",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"seo",
|
|
@@ -92,7 +92,7 @@
|
|
|
92
92
|
"google-auth-library": "^9.15.1",
|
|
93
93
|
"chalk": "^5.3.0",
|
|
94
94
|
"consola": "^3.4.2",
|
|
95
|
-
"
|
|
95
|
+
"linkedom": "^0.18.12",
|
|
96
96
|
"linkinator": "^7.5.0",
|
|
97
97
|
"p-limit": "^6.2.0",
|
|
98
98
|
"p-retry": "^7.0.0",
|
|
@@ -101,7 +101,7 @@
|
|
|
101
101
|
"robots-parser": "^3.0.1"
|
|
102
102
|
},
|
|
103
103
|
"devDependencies": {
|
|
104
|
-
"@djangocfg/typescript-config": "^2.1.
|
|
104
|
+
"@djangocfg/typescript-config": "^2.1.110",
|
|
105
105
|
"@types/node": "^24.7.2",
|
|
106
106
|
"tsup": "^8.5.0",
|
|
107
107
|
"tsx": "^4.19.2",
|
package/src/crawler/crawler.ts
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Internal site crawler for SEO analysis
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
-
import {
|
|
6
|
+
import { parseHTML } from 'linkedom';
|
|
7
7
|
import pLimit from 'p-limit';
|
|
8
8
|
import consola from 'consola';
|
|
9
9
|
import type { CrawlResult, CrawlerConfig, SeoIssue } from '../types/index.js';
|
|
@@ -139,10 +139,11 @@ export class SiteCrawler {
|
|
|
139
139
|
* Parse HTML and extract SEO-relevant data
|
|
140
140
|
*/
|
|
141
141
|
private parseHtml(html: string, result: CrawlResult, pageUrl: string, depth: number): void {
|
|
142
|
-
const
|
|
142
|
+
const { document } = parseHTML(html);
|
|
143
143
|
|
|
144
144
|
// Title
|
|
145
|
-
|
|
145
|
+
const titleEl = document.querySelector('title');
|
|
146
|
+
result.title = titleEl?.textContent?.trim() || undefined;
|
|
146
147
|
if (!result.title) {
|
|
147
148
|
result.warnings.push('Missing title tag');
|
|
148
149
|
} else if (result.title.length > 60) {
|
|
@@ -150,8 +151,8 @@ export class SiteCrawler {
|
|
|
150
151
|
}
|
|
151
152
|
|
|
152
153
|
// Meta description
|
|
153
|
-
|
|
154
|
-
|
|
154
|
+
const metaDesc = document.querySelector('meta[name="description"]');
|
|
155
|
+
result.metaDescription = metaDesc?.getAttribute('content')?.trim() || undefined;
|
|
155
156
|
if (!result.metaDescription) {
|
|
156
157
|
result.warnings.push('Missing meta description');
|
|
157
158
|
} else if (result.metaDescription.length > 160) {
|
|
@@ -161,25 +162,24 @@ export class SiteCrawler {
|
|
|
161
162
|
}
|
|
162
163
|
|
|
163
164
|
// Meta robots
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
165
|
+
const metaRobots = document.querySelector('meta[name="robots"]');
|
|
166
|
+
result.metaRobots = metaRobots?.getAttribute('content')?.trim() || undefined;
|
|
167
|
+
const xRobots = document.querySelector('meta[http-equiv="X-Robots-Tag"]');
|
|
168
|
+
const xRobotsContent = xRobots?.getAttribute('content')?.trim();
|
|
169
|
+
if (xRobotsContent) {
|
|
170
|
+
result.metaRobots = result.metaRobots ? `${result.metaRobots}, ${xRobotsContent}` : xRobotsContent;
|
|
168
171
|
}
|
|
169
172
|
|
|
170
173
|
// Canonical
|
|
171
|
-
|
|
174
|
+
const canonical = document.querySelector('link[rel="canonical"]');
|
|
175
|
+
result.canonicalUrl = canonical?.getAttribute('href')?.trim() || undefined;
|
|
172
176
|
if (!result.canonicalUrl) {
|
|
173
177
|
result.warnings.push('Missing canonical tag');
|
|
174
178
|
}
|
|
175
179
|
|
|
176
180
|
// Headings
|
|
177
|
-
result.h1 =
|
|
178
|
-
|
|
179
|
-
.get();
|
|
180
|
-
result.h2 = $('h2')
|
|
181
|
-
.map((_, el) => $(el).text().trim())
|
|
182
|
-
.get();
|
|
181
|
+
result.h1 = Array.from(document.querySelectorAll('h1')).map((el) => (el as { textContent?: string | null }).textContent?.trim() || '');
|
|
182
|
+
result.h2 = Array.from(document.querySelectorAll('h2')).map((el) => (el as { textContent?: string | null }).textContent?.trim() || '');
|
|
183
183
|
|
|
184
184
|
if (result.h1.length === 0) {
|
|
185
185
|
result.warnings.push('Missing H1 tag');
|
|
@@ -188,9 +188,9 @@ export class SiteCrawler {
|
|
|
188
188
|
}
|
|
189
189
|
|
|
190
190
|
// Links
|
|
191
|
-
|
|
192
|
-
const href =
|
|
193
|
-
if (!href)
|
|
191
|
+
for (const el of document.querySelectorAll('a[href]')) {
|
|
192
|
+
const href = el.getAttribute('href');
|
|
193
|
+
if (!href) continue;
|
|
194
194
|
|
|
195
195
|
try {
|
|
196
196
|
const linkUrl = new URL(href, pageUrl);
|
|
@@ -209,21 +209,22 @@ export class SiteCrawler {
|
|
|
209
209
|
} catch {
|
|
210
210
|
// Invalid URL, skip
|
|
211
211
|
}
|
|
212
|
-
}
|
|
212
|
+
}
|
|
213
213
|
|
|
214
214
|
// Images
|
|
215
|
-
|
|
216
|
-
const src =
|
|
217
|
-
const alt =
|
|
215
|
+
for (const el of document.querySelectorAll('img')) {
|
|
216
|
+
const src = el.getAttribute('src');
|
|
217
|
+
const alt = el.getAttribute('alt');
|
|
218
|
+
const hasAltAttr = alt !== null;
|
|
218
219
|
|
|
219
220
|
if (src) {
|
|
220
221
|
result.images.push({
|
|
221
222
|
src,
|
|
222
|
-
alt,
|
|
223
|
-
hasAlt:
|
|
223
|
+
alt: alt ?? undefined,
|
|
224
|
+
hasAlt: hasAltAttr && alt.trim().length > 0,
|
|
224
225
|
});
|
|
225
226
|
}
|
|
226
|
-
}
|
|
227
|
+
}
|
|
227
228
|
|
|
228
229
|
const imagesWithoutAlt = result.images.filter((img) => !img.hasAlt);
|
|
229
230
|
if (imagesWithoutAlt.length > 0) {
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Validate XML sitemaps
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
-
import {
|
|
6
|
+
import { DOMParser } from 'linkedom';
|
|
7
7
|
import consola from 'consola';
|
|
8
8
|
import type { SeoIssue } from '../types/index.js';
|
|
9
9
|
|
|
@@ -72,34 +72,37 @@ export async function analyzeSitemap(sitemapUrl: string): Promise<SitemapAnalysi
|
|
|
72
72
|
}
|
|
73
73
|
|
|
74
74
|
// Parse XML
|
|
75
|
-
const
|
|
75
|
+
const parser = new DOMParser();
|
|
76
|
+
const doc = parser.parseFromString(content, 'text/xml');
|
|
76
77
|
|
|
77
78
|
// Check if it's a sitemap index
|
|
78
|
-
const sitemapIndex =
|
|
79
|
-
if (sitemapIndex
|
|
79
|
+
const sitemapIndex = doc.querySelector('sitemapindex');
|
|
80
|
+
if (sitemapIndex) {
|
|
80
81
|
analysis.type = 'sitemap-index';
|
|
81
82
|
|
|
82
|
-
|
|
83
|
-
const loc =
|
|
83
|
+
for (const sitemap of doc.querySelectorAll('sitemap')) {
|
|
84
|
+
const loc = sitemap.querySelector('loc')?.textContent?.trim();
|
|
84
85
|
if (loc) {
|
|
85
86
|
analysis.childSitemaps.push(loc);
|
|
86
87
|
}
|
|
87
|
-
}
|
|
88
|
+
}
|
|
88
89
|
|
|
89
90
|
consola.debug(`Sitemap index contains ${analysis.childSitemaps.length} sitemaps`);
|
|
90
91
|
} else {
|
|
91
92
|
analysis.type = 'sitemap';
|
|
92
93
|
|
|
93
|
-
|
|
94
|
-
const loc =
|
|
94
|
+
for (const url of doc.querySelectorAll('url')) {
|
|
95
|
+
const loc = url.querySelector('loc')?.textContent?.trim();
|
|
95
96
|
if (loc) {
|
|
96
97
|
analysis.urls.push(loc);
|
|
97
98
|
}
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
99
|
+
// Get lastmod from first URL
|
|
100
|
+
if (!analysis.lastmod) {
|
|
101
|
+
const lastmod = url.querySelector('lastmod')?.textContent?.trim();
|
|
102
|
+
if (lastmod) {
|
|
103
|
+
analysis.lastmod = lastmod;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
103
106
|
}
|
|
104
107
|
|
|
105
108
|
consola.debug(`Sitemap contains ${analysis.urls.length} URLs`);
|