@houseofmvps/claude-rank 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,514 @@
1
+ /**
2
+ * seo-scanner.mjs — Core SEO scanner with 37 rules and cross-page analysis.
3
+ * Scans a directory of HTML files and returns a structured findings + score report.
4
+ */
5
+
6
+ import fs from 'node:fs';
7
+ import path from 'node:path';
8
+ import { parseHtml, findHtmlFiles } from './lib/html-parser.mjs';
9
+ import { checkFileSize } from './lib/security.mjs';
10
+
11
+ // ---------------------------------------------------------------------------
12
+ // Backend framework detection
13
+ // ---------------------------------------------------------------------------
14
+
15
+ const BACKEND_FRAMEWORKS = new Set([
16
+ 'hono', 'express', 'fastify', 'koa', 'nestjs', '@nestjs/core',
17
+ 'restify', 'polka', 'micro', 'sails', 'loopback', '@loopback/core',
18
+ 'django', 'flask', 'fastapi', 'rails', 'laravel', 'spring',
19
+ ]);
20
+
21
+ function isBackendOnlyProject(rootDir, htmlFiles) {
22
+ if (htmlFiles.length > 0) return false;
23
+
24
+ // Check package.json for backend-only deps
25
+ const pkgPath = path.join(rootDir, 'package.json');
26
+ try {
27
+ const raw = fs.readFileSync(pkgPath, 'utf8');
28
+ const pkg = JSON.parse(raw);
29
+ const allDeps = {
30
+ ...pkg.dependencies,
31
+ ...pkg.devDependencies,
32
+ };
33
+ for (const depName of Object.keys(allDeps)) {
34
+ if (BACKEND_FRAMEWORKS.has(depName)) {
35
+ return true;
36
+ }
37
+ }
38
+ } catch {
39
+ // no package.json or parse error — not a backend-only project
40
+ }
41
+
42
+ return false;
43
+ }
44
+
45
+ // ---------------------------------------------------------------------------
46
+ // Rule definitions
47
+ // ---------------------------------------------------------------------------
48
+
49
+ const RULES = {
50
+ // Critical
51
+ 'has-noindex': { severity: 'critical', deduction: 20 },
52
+ 'canonical-points-elsewhere':{ severity: 'critical', deduction: 20 },
53
+
54
+ // High
55
+ 'missing-title': { severity: 'high', deduction: 10 },
56
+ 'missing-meta-description': { severity: 'high', deduction: 10 },
57
+ 'missing-h1': { severity: 'high', deduction: 10 },
58
+ 'thin-content': { severity: 'high', deduction: 10 },
59
+ 'duplicate-title': { severity: 'high', deduction: 10 },
60
+ 'duplicate-meta-description':{ severity: 'high', deduction: 10 },
61
+ 'canonical-conflict': { severity: 'high', deduction: 10 },
62
+ 'orphan-page': { severity: 'high', deduction: 10 },
63
+ 'no-internal-links': { severity: 'high', deduction: 10 },
64
+ 'missing-lang': { severity: 'high', deduction: 10 },
65
+
66
+ // Medium
67
+ 'title-too-long': { severity: 'medium', deduction: 5 },
68
+ 'title-too-short': { severity: 'medium', deduction: 5 },
69
+ 'meta-description-too-long': { severity: 'medium', deduction: 5 },
70
+ 'meta-description-too-short':{ severity: 'medium', deduction: 5 },
71
+ 'missing-viewport': { severity: 'medium', deduction: 5 },
72
+ 'missing-charset': { severity: 'medium', deduction: 5 },
73
+ 'missing-og-title': { severity: 'medium', deduction: 5 },
74
+ 'missing-og-description': { severity: 'medium', deduction: 5 },
75
+ 'missing-og-image': { severity: 'medium', deduction: 5 },
76
+ 'missing-canonical': { severity: 'medium', deduction: 5 },
77
+ 'multiple-h1': { severity: 'medium', deduction: 5 },
78
+ 'skipped-heading-level': { severity: 'medium', deduction: 5 },
79
+ 'images-missing-alt': { severity: 'medium', deduction: 5 },
80
+ 'images-missing-dimensions': { severity: 'medium', deduction: 5 },
81
+ 'missing-main-landmark': { severity: 'medium', deduction: 5 },
82
+ 'missing-json-ld': { severity: 'medium', deduction: 5 },
83
+ 'missing-favicon': { severity: 'medium', deduction: 5 },
84
+ 'no-analytics': { severity: 'medium', deduction: 5 },
85
+
86
+ // Low
87
+ 'missing-og-url': { severity: 'low', deduction: 2 },
88
+ 'missing-twitter-card': { severity: 'low', deduction: 2 },
89
+ 'missing-twitter-image': { severity: 'low', deduction: 2 },
90
+ 'missing-nav-landmark': { severity: 'low', deduction: 2 },
91
+ 'missing-footer-landmark': { severity: 'low', deduction: 2 },
92
+ 'no-manifest': { severity: 'low', deduction: 2 },
93
+ 'all-scripts-blocking': { severity: 'low', deduction: 2 },
94
+ };
95
+
96
+ // ---------------------------------------------------------------------------
97
+ // Per-file rule checks
98
+ // ---------------------------------------------------------------------------
99
+
100
+ /**
101
+ * Run per-file checks. Returns array of finding objects.
102
+ * @param {object} state — PageState from parseHtml
103
+ * @param {string} filePath — absolute path
104
+ * @param {string} rootDir — root dir for relative path display
105
+ * @param {object} opts — { multiPage: boolean }
106
+ */
107
+ function checkFile(state, filePath, rootDir, opts = {}) {
108
+ const findings = [];
109
+ const rel = path.relative(rootDir, filePath);
110
+
111
+ function add(rule, message, context = {}) {
112
+ const def = RULES[rule];
113
+ findings.push({
114
+ rule,
115
+ severity: def.severity,
116
+ file: rel,
117
+ message,
118
+ ...context,
119
+ });
120
+ }
121
+
122
+ // Critical
123
+ if (state.hasNoindex) {
124
+ add('has-noindex', 'Page has noindex directive — will be excluded from search engines');
125
+ }
126
+
127
+ if (state.hasCanonical && state.canonicalUrl) {
128
+ // Canonical points elsewhere if it's not external AND doesn't match the file's own path
129
+ const canonical = state.canonicalUrl.trim();
130
+ // Only flag non-external canonicals that look like they point away from this page
131
+ if (!canonical.startsWith('http://') && !canonical.startsWith('https://')) {
132
+ // Relative canonical — check if it matches this file
133
+ const fileBase = '/' + rel.replace(/\\/g, '/');
134
+ const normalizedCanonical = canonical.startsWith('/') ? canonical : '/' + canonical;
135
+ if (normalizedCanonical !== fileBase && normalizedCanonical !== fileBase.replace(/\/index\.html$/, '/')) {
136
+ add('canonical-points-elsewhere', `Canonical URL "${canonical}" points away from this page`);
137
+ }
138
+ }
139
+ }
140
+
141
+ // High
142
+ if (!state.hasTitle) {
143
+ add('missing-title', 'Page is missing a <title> tag');
144
+ }
145
+
146
+ if (!state.hasMetaDescription) {
147
+ add('missing-meta-description', 'Page is missing a meta description');
148
+ }
149
+
150
+ if (state.h1Count === 0) {
151
+ add('missing-h1', 'Page has no <h1> heading');
152
+ }
153
+
154
+ if (state.wordCount > 0 && state.wordCount < 300) {
155
+ add('thin-content', `Page has only ${state.wordCount} words (minimum recommended: 300)`);
156
+ }
157
+
158
+ if (!state.hasLang) {
159
+ add('missing-lang', 'HTML element is missing a lang attribute');
160
+ }
161
+
162
+ if (opts.multiPage && state.internalLinks.length === 0) {
163
+ add('no-internal-links', 'Page has no outgoing internal links');
164
+ }
165
+
166
+ // Medium
167
+ if (state.hasTitle && state.titleText.length > 60) {
168
+ add('title-too-long', `Title is ${state.titleText.length} chars (max recommended: 60)`);
169
+ }
170
+
171
+ if (state.hasTitle && state.titleText.length < 20) {
172
+ add('title-too-short', `Title is only ${state.titleText.length} chars (min recommended: 20)`);
173
+ }
174
+
175
+ if (state.hasMetaDescription && state.metaDescriptionText.length > 160) {
176
+ add('meta-description-too-long', `Meta description is ${state.metaDescriptionText.length} chars (max recommended: 160)`);
177
+ }
178
+
179
+ if (state.hasMetaDescription && state.metaDescriptionText.length > 0 && state.metaDescriptionText.length < 70) {
180
+ add('meta-description-too-short', `Meta description is only ${state.metaDescriptionText.length} chars (min recommended: 70)`);
181
+ }
182
+
183
+ if (!state.hasViewport) {
184
+ add('missing-viewport', 'Page is missing a viewport meta tag');
185
+ }
186
+
187
+ if (!state.hasCharset) {
188
+ add('missing-charset', 'Page is missing a charset declaration');
189
+ }
190
+
191
+ if (!state.hasOgTitle) {
192
+ add('missing-og-title', 'Page is missing og:title Open Graph tag');
193
+ }
194
+
195
+ if (!state.hasOgDescription) {
196
+ add('missing-og-description', 'Page is missing og:description Open Graph tag');
197
+ }
198
+
199
+ if (!state.hasOgImage) {
200
+ add('missing-og-image', 'Page is missing og:image Open Graph tag');
201
+ }
202
+
203
+ if (!state.hasCanonical) {
204
+ add('missing-canonical', 'Page is missing a canonical link tag');
205
+ }
206
+
207
+ if (state.h1Count > 1) {
208
+ add('multiple-h1', `Page has ${state.h1Count} <h1> tags (should have exactly 1)`);
209
+ }
210
+
211
+ // Skipped heading level — e.g. h1 → h3 without h2
212
+ if (state.headingLevels.length > 1) {
213
+ for (let i = 1; i < state.headingLevels.length; i++) {
214
+ if (state.headingLevels[i] - state.headingLevels[i - 1] > 1) {
215
+ add('skipped-heading-level', `Heading level skipped: h${state.headingLevels[i - 1]} → h${state.headingLevels[i]}`);
216
+ break; // report once per page
217
+ }
218
+ }
219
+ }
220
+
221
+ if (state.imagesWithoutAlt > 0) {
222
+ add('images-missing-alt', `${state.imagesWithoutAlt} image(s) missing alt attribute`);
223
+ }
224
+
225
+ if (state.imagesWithoutDimensions > 0) {
226
+ add('images-missing-dimensions', `${state.imagesWithoutDimensions} image(s) missing width/height attributes`);
227
+ }
228
+
229
+ if (!state.hasMain) {
230
+ add('missing-main-landmark', 'Page is missing a <main> landmark element');
231
+ }
232
+
233
+ if (state.jsonLdScripts === 0) {
234
+ add('missing-json-ld', 'Page has no JSON-LD structured data');
235
+ }
236
+
237
+ if (!state.hasFavicon) {
238
+ add('missing-favicon', 'Page is missing a favicon link');
239
+ }
240
+
241
+ if (!state.hasAnalytics) {
242
+ add('no-analytics', 'No analytics provider detected on this page');
243
+ }
244
+
245
+ // Low
246
+ if (!state.hasOgUrl) {
247
+ add('missing-og-url', 'Page is missing og:url Open Graph tag');
248
+ }
249
+
250
+ if (!state.hasTwitterCard) {
251
+ add('missing-twitter-card', 'Page is missing twitter:card meta tag');
252
+ }
253
+
254
+ if (!state.hasTwitterImage) {
255
+ add('missing-twitter-image', 'Page is missing twitter:image meta tag');
256
+ }
257
+
258
+ if (!state.hasNav) {
259
+ add('missing-nav-landmark', 'Page is missing a <nav> landmark element');
260
+ }
261
+
262
+ if (!state.hasFooter) {
263
+ add('missing-footer-landmark', 'Page is missing a <footer> landmark element');
264
+ }
265
+
266
+ if (!state.hasManifest) {
267
+ add('no-manifest', 'Page is missing a web app manifest link');
268
+ }
269
+
270
+ if (state.totalScripts > 0 && state.deferredScripts === 0) {
271
+ add('all-scripts-blocking', `All ${state.totalScripts} script(s) are render-blocking (no async/defer)`);
272
+ }
273
+
274
+ return findings;
275
+ }
276
+
277
+ // ---------------------------------------------------------------------------
278
+ // Cross-page analysis
279
+ // ---------------------------------------------------------------------------
280
+
281
+ /**
282
+ * Build a set of all linked-to paths from all pages.
283
+ * Normalises internal links to their basename for matching.
284
+ */
285
+ function buildLinkedSet(allStates) {
286
+ const linked = new Set();
287
+ for (const { state } of allStates) {
288
+ for (const href of state.internalLinks) {
289
+ // Normalise: /about → about, /about.html → about.html, ./about → about
290
+ const norm = href.replace(/^\.?\//, '');
291
+ linked.add(norm);
292
+ // Also add without extension
293
+ linked.add(norm.replace(/\.html?$/, ''));
294
+ }
295
+ }
296
+ return linked;
297
+ }
298
+
299
+ /**
300
+ * Run cross-page checks. Returns array of finding objects.
301
+ */
302
+ function crossPageChecks(allStates, rootDir) {
303
+ const findings = [];
304
+
305
+ // --- Duplicate title detection ---
306
+ const titleMap = new Map(); // title → [filePath, ...]
307
+ for (const { filePath, state } of allStates) {
308
+ if (state.hasTitle && state.titleText) {
309
+ const title = state.titleText.trim().toLowerCase();
310
+ if (!titleMap.has(title)) titleMap.set(title, []);
311
+ titleMap.get(title).push(filePath);
312
+ }
313
+ }
314
+ for (const [title, files] of titleMap) {
315
+ if (files.length > 1) {
316
+ for (const fp of files) {
317
+ findings.push({
318
+ rule: 'duplicate-title',
319
+ severity: RULES['duplicate-title'].severity,
320
+ file: path.relative(rootDir, fp),
321
+ message: `Duplicate title "${title}" shared across ${files.length} pages`,
322
+ duplicates: files.map(f => path.relative(rootDir, f)),
323
+ });
324
+ }
325
+ }
326
+ }
327
+
328
+ // --- Duplicate meta description detection ---
329
+ const descMap = new Map();
330
+ for (const { filePath, state } of allStates) {
331
+ if (state.hasMetaDescription && state.metaDescriptionText) {
332
+ const desc = state.metaDescriptionText.trim().toLowerCase();
333
+ if (!descMap.has(desc)) descMap.set(desc, []);
334
+ descMap.get(desc).push(filePath);
335
+ }
336
+ }
337
+ for (const [, files] of descMap) {
338
+ if (files.length > 1) {
339
+ for (const fp of files) {
340
+ findings.push({
341
+ rule: 'duplicate-meta-description',
342
+ severity: RULES['duplicate-meta-description'].severity,
343
+ file: path.relative(rootDir, fp),
344
+ message: `Duplicate meta description shared across ${files.length} pages`,
345
+ duplicates: files.map(f => path.relative(rootDir, f)),
346
+ });
347
+ }
348
+ }
349
+ }
350
+
351
+ // --- Canonical conflict detection ---
352
+ const canonicalMap = new Map(); // canonicalUrl → [filePath, ...]
353
+ for (const { filePath, state } of allStates) {
354
+ if (state.hasCanonical && state.canonicalUrl) {
355
+ const canonical = state.canonicalUrl.trim();
356
+ if (!canonicalMap.has(canonical)) canonicalMap.set(canonical, []);
357
+ canonicalMap.get(canonical).push(filePath);
358
+ }
359
+ }
360
+ for (const [canonical, files] of canonicalMap) {
361
+ if (files.length > 1) {
362
+ for (const fp of files) {
363
+ findings.push({
364
+ rule: 'canonical-conflict',
365
+ severity: RULES['canonical-conflict'].severity,
366
+ file: path.relative(rootDir, fp),
367
+ message: `Multiple pages share canonical URL "${canonical}"`,
368
+ duplicates: files.map(f => path.relative(rootDir, f)),
369
+ });
370
+ }
371
+ }
372
+ }
373
+
374
+ // --- Orphan page detection ---
375
+ // A page is orphan if no other page links to it (skip index files)
376
+ if (allStates.length > 1) {
377
+ const linkedSet = buildLinkedSet(allStates);
378
+
379
+ for (const { filePath } of allStates) {
380
+ const filename = path.basename(filePath);
381
+ const nameNoExt = filename.replace(/\.html?$/, '');
382
+
383
+ // Skip index files
384
+ if (nameNoExt === 'index') continue;
385
+
386
+ // Check if this file is linked from anywhere
387
+ const isLinked =
388
+ linkedSet.has(filename) ||
389
+ linkedSet.has(nameNoExt) ||
390
+ linkedSet.has('/' + filename) ||
391
+ linkedSet.has('/' + nameNoExt);
392
+
393
+ if (!isLinked) {
394
+ findings.push({
395
+ rule: 'orphan-page',
396
+ severity: RULES['orphan-page'].severity,
397
+ file: path.relative(rootDir, filePath),
398
+ message: `Page "${filename}" has no incoming internal links from other pages`,
399
+ });
400
+ }
401
+ }
402
+ }
403
+
404
+ return findings;
405
+ }
406
+
407
+ // ---------------------------------------------------------------------------
408
+ // Score calculation
409
+ // ---------------------------------------------------------------------------
410
+
411
+ /**
412
+ * Calculate SEO score from findings (deduplicated by rule).
413
+ * @param {object[]} findings
414
+ * @returns {number} 0-100
415
+ */
416
+ function calculateScore(findings) {
417
+ const triggeredRules = new Set(findings.map(f => f.rule));
418
+ let score = 100;
419
+ for (const rule of triggeredRules) {
420
+ const def = RULES[rule];
421
+ if (def) {
422
+ score -= def.deduction;
423
+ }
424
+ }
425
+ return Math.max(0, score);
426
+ }
427
+
428
+ // ---------------------------------------------------------------------------
429
+ // Main: scanDirectory
430
+ // ---------------------------------------------------------------------------
431
+
432
+ /**
433
+ * Scan a directory for HTML files and run all SEO rules.
434
+ * @param {string} rootDir — absolute path to project root
435
+ * @returns {object} { files_scanned, findings, scores, summary } or { skipped, reason }
436
+ */
437
+ export function scanDirectory(rootDir) {
438
+ const absRoot = path.resolve(rootDir);
439
+ const htmlFiles = findHtmlFiles(absRoot);
440
+
441
+ // Backend-only detection
442
+ if (isBackendOnlyProject(absRoot, htmlFiles)) {
443
+ return {
444
+ skipped: true,
445
+ reason: 'No HTML files found — detected backend-only project (has backend framework dependency)',
446
+ };
447
+ }
448
+
449
+ if (htmlFiles.length === 0) {
450
+ return {
451
+ skipped: true,
452
+ reason: 'No HTML files found in directory',
453
+ };
454
+ }
455
+
456
+ const multiPage = htmlFiles.length > 1;
457
+
458
+ // Parse all files
459
+ const allStates = [];
460
+ for (const filePath of htmlFiles) {
461
+ const sizeCheck = checkFileSize(filePath, fs.statSync);
462
+ if (!sizeCheck.ok) continue;
463
+
464
+ let content;
465
+ try {
466
+ content = fs.readFileSync(filePath, 'utf8');
467
+ } catch {
468
+ continue;
469
+ }
470
+
471
+ const state = parseHtml(content);
472
+ allStates.push({ filePath, state });
473
+ }
474
+
475
+ // Run per-file checks
476
+ const perFileFindings = [];
477
+ for (const { filePath, state } of allStates) {
478
+ const fileFindings = checkFile(state, filePath, absRoot, { multiPage });
479
+ perFileFindings.push(...fileFindings);
480
+ }
481
+
482
+ // Run cross-page checks
483
+ const crossFindings = multiPage ? crossPageChecks(allStates, absRoot) : [];
484
+
485
+ const allFindings = [...perFileFindings, ...crossFindings];
486
+
487
+ // Score
488
+ const seoScore = calculateScore(allFindings);
489
+
490
+ // Summary counts
491
+ const summary = { critical: 0, high: 0, medium: 0, low: 0 };
492
+ for (const f of allFindings) {
493
+ if (summary[f.severity] !== undefined) {
494
+ summary[f.severity]++;
495
+ }
496
+ }
497
+
498
+ return {
499
+ files_scanned: allStates.length,
500
+ findings: allFindings,
501
+ scores: { seo: seoScore },
502
+ summary,
503
+ };
504
+ }
505
+
506
+ // ---------------------------------------------------------------------------
507
+ // CLI entry point
508
+ // ---------------------------------------------------------------------------
509
+
510
+ const args = process.argv.slice(2);
511
+ if (args.length > 0) {
512
+ const result = scanDirectory(args[0]);
513
+ console.log(JSON.stringify(result, null, 2));
514
+ }