@rog0x/mcp-seo-tools 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +105 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +175 -0
- package/dist/index.js.map +1 -0
- package/dist/tools/heading-checker.d.ts +15 -0
- package/dist/tools/heading-checker.d.ts.map +1 -0
- package/dist/tools/heading-checker.js +123 -0
- package/dist/tools/heading-checker.js.map +1 -0
- package/dist/tools/keyword-density.d.ts +22 -0
- package/dist/tools/keyword-density.d.ts.map +1 -0
- package/dist/tools/keyword-density.js +176 -0
- package/dist/tools/keyword-density.js.map +1 -0
- package/dist/tools/link-checker.d.ts +22 -0
- package/dist/tools/link-checker.d.ts.map +1 -0
- package/dist/tools/link-checker.js +171 -0
- package/dist/tools/link-checker.js.map +1 -0
- package/dist/tools/meta-analyzer.d.ts +27 -0
- package/dist/tools/meta-analyzer.d.ts.map +1 -0
- package/dist/tools/meta-analyzer.js +161 -0
- package/dist/tools/meta-analyzer.js.map +1 -0
- package/dist/tools/page-speed.d.ts +31 -0
- package/dist/tools/page-speed.d.ts.map +1 -0
- package/dist/tools/page-speed.js +180 -0
- package/dist/tools/page-speed.js.map +1 -0
- package/dist/tools/sitemap-parser.d.ts +29 -0
- package/dist/tools/sitemap-parser.d.ts.map +1 -0
- package/dist/tools/sitemap-parser.js +224 -0
- package/dist/tools/sitemap-parser.js.map +1 -0
- package/package.json +24 -0
- package/src/index.ts +199 -0
- package/src/tools/heading-checker.ts +109 -0
- package/src/tools/keyword-density.ts +180 -0
- package/src/tools/link-checker.ts +163 -0
- package/src/tools/meta-analyzer.ts +148 -0
- package/src/tools/page-speed.ts +190 -0
- package/src/tools/sitemap-parser.ts +230 -0
- package/tsconfig.json +19 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import * as cheerio from "cheerio";
|
|
2
|
+
|
|
3
|
+
export interface SitemapUrl {
|
|
4
|
+
loc: string;
|
|
5
|
+
lastmod: string | null;
|
|
6
|
+
changefreq: string | null;
|
|
7
|
+
priority: string | null;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface SitemapIndex {
|
|
11
|
+
loc: string;
|
|
12
|
+
lastmod: string | null;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface SitemapAnalysis {
|
|
16
|
+
url: string;
|
|
17
|
+
type: "urlset" | "sitemapindex" | "not_found";
|
|
18
|
+
urls: SitemapUrl[];
|
|
19
|
+
sitemapIndexEntries: SitemapIndex[];
|
|
20
|
+
totalUrls: number;
|
|
21
|
+
stats: {
|
|
22
|
+
withLastmod: number;
|
|
23
|
+
withChangefreq: number;
|
|
24
|
+
withPriority: number;
|
|
25
|
+
uniqueHosts: string[];
|
|
26
|
+
oldestLastmod: string | null;
|
|
27
|
+
newestLastmod: string | null;
|
|
28
|
+
};
|
|
29
|
+
issues: string[];
|
|
30
|
+
recommendations: string[];
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function fetchSitemap(url: string): Promise<string> {
|
|
34
|
+
const response = await fetch(url, {
|
|
35
|
+
headers: { "User-Agent": "MCPSEOTools/1.0 (Sitemap Parser)" },
|
|
36
|
+
redirect: "follow",
|
|
37
|
+
signal: AbortSignal.timeout(20000),
|
|
38
|
+
});
|
|
39
|
+
if (!response.ok) {
|
|
40
|
+
throw new Error(`HTTP ${response.status} fetching sitemap at ${url}`);
|
|
41
|
+
}
|
|
42
|
+
return response.text();
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function resolveSitemapUrl(pageUrl: string): string {
|
|
46
|
+
try {
|
|
47
|
+
const parsed = new URL(pageUrl);
|
|
48
|
+
// If the URL already ends with sitemap.xml or similar, use it directly
|
|
49
|
+
if (parsed.pathname.includes("sitemap")) return pageUrl;
|
|
50
|
+
// Otherwise, try the standard location
|
|
51
|
+
return `${parsed.protocol}//${parsed.host}/sitemap.xml`;
|
|
52
|
+
} catch {
|
|
53
|
+
return pageUrl;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export async function parseSitemap(url: string): Promise<SitemapAnalysis> {
|
|
58
|
+
const sitemapUrl = resolveSitemapUrl(url);
|
|
59
|
+
let xml: string;
|
|
60
|
+
|
|
61
|
+
try {
|
|
62
|
+
xml = await fetchSitemap(sitemapUrl);
|
|
63
|
+
} catch (err: any) {
|
|
64
|
+
// Try robots.txt for sitemap location
|
|
65
|
+
try {
|
|
66
|
+
const robotsUrl = `${new URL(sitemapUrl).protocol}//${new URL(sitemapUrl).host}/robots.txt`;
|
|
67
|
+
const robotsResp = await fetch(robotsUrl, {
|
|
68
|
+
headers: { "User-Agent": "MCPSEOTools/1.0 (Sitemap Parser)" },
|
|
69
|
+
signal: AbortSignal.timeout(10000),
|
|
70
|
+
});
|
|
71
|
+
const robotsTxt = await robotsResp.text();
|
|
72
|
+
const sitemapMatch = robotsTxt.match(/Sitemap:\s*(.+)/i);
|
|
73
|
+
if (sitemapMatch) {
|
|
74
|
+
xml = await fetchSitemap(sitemapMatch[1].trim());
|
|
75
|
+
} else {
|
|
76
|
+
return {
|
|
77
|
+
url: sitemapUrl,
|
|
78
|
+
type: "not_found",
|
|
79
|
+
urls: [],
|
|
80
|
+
sitemapIndexEntries: [],
|
|
81
|
+
totalUrls: 0,
|
|
82
|
+
stats: { withLastmod: 0, withChangefreq: 0, withPriority: 0, uniqueHosts: [], oldestLastmod: null, newestLastmod: null },
|
|
83
|
+
issues: [`No sitemap found at ${sitemapUrl} and no Sitemap directive in robots.txt.`],
|
|
84
|
+
recommendations: ["Create a sitemap.xml and submit it to search engines via Google Search Console."],
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
} catch {
|
|
88
|
+
return {
|
|
89
|
+
url: sitemapUrl,
|
|
90
|
+
type: "not_found",
|
|
91
|
+
urls: [],
|
|
92
|
+
sitemapIndexEntries: [],
|
|
93
|
+
totalUrls: 0,
|
|
94
|
+
stats: { withLastmod: 0, withChangefreq: 0, withPriority: 0, uniqueHosts: [], oldestLastmod: null, newestLastmod: null },
|
|
95
|
+
issues: [`Failed to fetch sitemap: ${err.message}`],
|
|
96
|
+
recommendations: ["Ensure a sitemap.xml exists at the root of your domain."],
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const $ = cheerio.load(xml, { xml: true });
|
|
102
|
+
|
|
103
|
+
const issues: string[] = [];
|
|
104
|
+
const recommendations: string[] = [];
|
|
105
|
+
|
|
106
|
+
// Check if it's a sitemap index
|
|
107
|
+
const sitemapIndexEntries: SitemapIndex[] = [];
|
|
108
|
+
$("sitemapindex > sitemap").each((_, el) => {
|
|
109
|
+
sitemapIndexEntries.push({
|
|
110
|
+
loc: $(el).find("loc").text().trim(),
|
|
111
|
+
lastmod: $(el).find("lastmod").text().trim() || null,
|
|
112
|
+
});
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
if (sitemapIndexEntries.length > 0) {
|
|
116
|
+
if (sitemapIndexEntries.length > 500) {
|
|
117
|
+
issues.push(`Sitemap index has ${sitemapIndexEntries.length} entries. Google supports up to 500 sitemaps per index.`);
|
|
118
|
+
}
|
|
119
|
+
const withoutLastmod = sitemapIndexEntries.filter((s) => !s.lastmod).length;
|
|
120
|
+
if (withoutLastmod > 0) {
|
|
121
|
+
recommendations.push(`${withoutLastmod} sitemap index entries lack lastmod dates. Adding them helps search engines prioritize crawling.`);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
url: sitemapUrl,
|
|
126
|
+
type: "sitemapindex",
|
|
127
|
+
urls: [],
|
|
128
|
+
sitemapIndexEntries,
|
|
129
|
+
totalUrls: sitemapIndexEntries.length,
|
|
130
|
+
stats: { withLastmod: sitemapIndexEntries.length - (sitemapIndexEntries.filter((s) => !s.lastmod).length), withChangefreq: 0, withPriority: 0, uniqueHosts: [], oldestLastmod: null, newestLastmod: null },
|
|
131
|
+
issues,
|
|
132
|
+
recommendations,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Parse URL set
|
|
137
|
+
const urls: SitemapUrl[] = [];
|
|
138
|
+
$("urlset > url").each((_, el) => {
|
|
139
|
+
urls.push({
|
|
140
|
+
loc: $(el).find("loc").text().trim(),
|
|
141
|
+
lastmod: $(el).find("lastmod").text().trim() || null,
|
|
142
|
+
changefreq: $(el).find("changefreq").text().trim() || null,
|
|
143
|
+
priority: $(el).find("priority").text().trim() || null,
|
|
144
|
+
});
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
// Stats
|
|
148
|
+
const withLastmod = urls.filter((u) => u.lastmod).length;
|
|
149
|
+
const withChangefreq = urls.filter((u) => u.changefreq).length;
|
|
150
|
+
const withPriority = urls.filter((u) => u.priority).length;
|
|
151
|
+
|
|
152
|
+
const hosts = new Set<string>();
|
|
153
|
+
for (const u of urls) {
|
|
154
|
+
try {
|
|
155
|
+
hosts.add(new URL(u.loc).hostname);
|
|
156
|
+
} catch { /* skip */ }
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const lastmods = urls
|
|
160
|
+
.map((u) => u.lastmod)
|
|
161
|
+
.filter((d): d is string => d !== null)
|
|
162
|
+
.sort();
|
|
163
|
+
const oldestLastmod = lastmods.length > 0 ? lastmods[0] : null;
|
|
164
|
+
const newestLastmod = lastmods.length > 0 ? lastmods[lastmods.length - 1] : null;
|
|
165
|
+
|
|
166
|
+
// Analysis
|
|
167
|
+
if (urls.length === 0) {
|
|
168
|
+
issues.push("Sitemap contains no URLs.");
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (urls.length > 50000) {
|
|
172
|
+
issues.push(`Sitemap has ${urls.length} URLs. Maximum allowed per sitemap file is 50,000. Split into multiple sitemaps.`);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (withLastmod === 0 && urls.length > 0) {
|
|
176
|
+
recommendations.push("No lastmod dates found. Adding lastmod helps search engines identify updated content.");
|
|
177
|
+
} else if (withLastmod < urls.length * 0.5) {
|
|
178
|
+
recommendations.push(`Only ${withLastmod} of ${urls.length} URLs have lastmod dates. Add dates to all entries.`);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Check for stale lastmod
|
|
182
|
+
if (newestLastmod) {
|
|
183
|
+
const newestDate = new Date(newestLastmod);
|
|
184
|
+
const sixMonthsAgo = new Date();
|
|
185
|
+
sixMonthsAgo.setMonth(sixMonthsAgo.getMonth() - 6);
|
|
186
|
+
if (newestDate < sixMonthsAgo) {
|
|
187
|
+
issues.push(`Most recent lastmod is ${newestLastmod}. The sitemap appears outdated. Update it regularly.`);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Check for duplicate URLs
|
|
192
|
+
const locSet = new Set<string>();
|
|
193
|
+
let duplicates = 0;
|
|
194
|
+
for (const u of urls) {
|
|
195
|
+
if (locSet.has(u.loc)) duplicates++;
|
|
196
|
+
locSet.add(u.loc);
|
|
197
|
+
}
|
|
198
|
+
if (duplicates > 0) {
|
|
199
|
+
issues.push(`${duplicates} duplicate URL(s) found. Remove duplicates to avoid crawl budget waste.`);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Check for non-canonical patterns
|
|
203
|
+
const mixedProtocol = urls.some((u) => u.loc.startsWith("http://")) && urls.some((u) => u.loc.startsWith("https://"));
|
|
204
|
+
if (mixedProtocol) {
|
|
205
|
+
issues.push("Sitemap contains both HTTP and HTTPS URLs. Use only HTTPS URLs.");
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const mixedTrailingSlash = urls.some((u) => u.loc.endsWith("/")) && urls.some((u) => !u.loc.endsWith("/") && !u.loc.match(/\.\w{2,5}$/));
|
|
209
|
+
if (mixedTrailingSlash) {
|
|
210
|
+
recommendations.push("Inconsistent trailing slashes in URLs. Standardize to one pattern for cleaner crawling.");
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
url: sitemapUrl,
|
|
215
|
+
type: "urlset",
|
|
216
|
+
urls: urls.slice(0, 200), // Cap output to avoid overwhelming responses
|
|
217
|
+
sitemapIndexEntries: [],
|
|
218
|
+
totalUrls: urls.length,
|
|
219
|
+
stats: {
|
|
220
|
+
withLastmod,
|
|
221
|
+
withChangefreq,
|
|
222
|
+
withPriority,
|
|
223
|
+
uniqueHosts: [...hosts],
|
|
224
|
+
oldestLastmod,
|
|
225
|
+
newestLastmod,
|
|
226
|
+
},
|
|
227
|
+
issues,
|
|
228
|
+
recommendations,
|
|
229
|
+
};
|
|
230
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "NodeNext",
|
|
5
|
+
"moduleResolution": "NodeNext",
|
|
6
|
+
"outDir": "dist",
|
|
7
|
+
"rootDir": "src",
|
|
8
|
+
"strict": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"skipLibCheck": true,
|
|
11
|
+
"forceConsistentCasingInFileNames": true,
|
|
12
|
+
"declaration": true,
|
|
13
|
+
"declarationMap": true,
|
|
14
|
+
"sourceMap": true,
|
|
15
|
+
"resolveJsonModule": true
|
|
16
|
+
},
|
|
17
|
+
"include": ["src/**/*"],
|
|
18
|
+
"exclude": ["node_modules", "dist"]
|
|
19
|
+
}
|