@houseofmvps/claude-rank 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  <div align="center">
2
2
 
3
- # claude-rank
3
+ <img src="assets/hero-banner.png" alt="claude-rank — SEO/GEO/AEO Plugin for Claude Code" width="100%"/>
4
4
 
5
5
  ### The most comprehensive SEO/GEO/AEO plugin for Claude Code. 74+ rules. Auto-fix everything. Dominate search — traditional and AI.
6
6
 
@@ -4,7 +4,26 @@
4
4
 
5
5
  const args = process.argv.slice(2);
6
6
  const jsonFlag = args.includes('--json');
7
- const positional = args.filter(a => a !== '--json');
7
+ const singleFlag = args.includes('--single');
8
+ const reportFlag = args.includes('--report') ? args[args.indexOf('--report') + 1] : null;
9
+ const thresholdIdx = args.indexOf('--threshold');
10
+ const thresholdFlag = thresholdIdx !== -1 ? Number(args[thresholdIdx + 1]) : null;
11
+
12
+ // Parse --pages N flag (default: 50)
13
+ let maxPages = 50;
14
+ const pagesIdx = args.indexOf('--pages');
15
+ if (pagesIdx !== -1 && args[pagesIdx + 1]) {
16
+ const parsed = parseInt(args[pagesIdx + 1], 10);
17
+ if (!isNaN(parsed) && parsed > 0) maxPages = parsed;
18
+ }
19
+
20
+ const positional = args.filter((a, i) => {
21
+ if (a === '--json' || a === '--single') return false;
22
+ if (a === '--report' || a === '--threshold' || a === '--pages') return false;
23
+ // Skip the value after --report, --threshold, or --pages
24
+ if (i > 0 && (args[i - 1] === '--report' || args[i - 1] === '--threshold' || args[i - 1] === '--pages')) return false;
25
+ return true;
26
+ });
8
27
  const [command = 'scan', dir = '.'] = positional;
9
28
 
10
29
  const commands = {
@@ -17,7 +36,7 @@ const commands = {
17
36
  if (command === 'help' || command === '--help') {
18
37
  console.log(`claude-rank — SEO/GEO/AEO toolkit
19
38
 
20
- Usage: claude-rank <command> [directory|url] [--json]
39
+ Usage: claude-rank <command> [directory|url] [flags]
21
40
 
22
41
  Commands:
23
42
  scan Run core SEO scanner (default)
@@ -27,17 +46,28 @@ Commands:
27
46
  help Show this help message
28
47
 
29
48
  Flags:
30
- --json Output raw JSON (for programmatic use)
49
+ --json Output raw JSON (for programmatic use)
50
+ --single Scan only one page (skip multi-page crawl for URLs)
51
+ --pages N Max pages to crawl (default: 50, URL scanning only)
52
+ --report html Run all scanners and save HTML report to claude-rank-report.html
53
+ --threshold N Exit code 1 if score < N (for CI/CD pipelines)
31
54
 
32
55
  URL scanning:
33
- Pass a URL instead of a directory to scan a live page via HTTP.
56
+ Pass a URL instead of a directory to scan a live site via HTTP.
57
+ By default, crawls up to 50 pages following internal links.
58
+ Use --single to scan only the given URL without crawling.
34
59
  Only the "scan" command supports URL scanning.
35
60
 
36
61
  Examples:
37
62
  claude-rank scan ./my-project
38
63
  claude-rank scan https://savemrr.co
64
+ claude-rank scan https://savemrr.co --pages 10
65
+ claude-rank scan https://savemrr.co --single
39
66
  npx @houseofmvps/claude-rank geo .
40
67
  claude-rank scan ./site --json
68
+ claude-rank scan ./site --report html
69
+ claude-rank scan ./site --threshold 80
70
+ claude-rank scan . --report html --threshold 80
41
71
  `);
42
72
  process.exit(0);
43
73
  }
@@ -79,9 +109,11 @@ if (isUrl) {
79
109
  process.exit(1);
80
110
  }
81
111
 
82
- const { scanUrl } = await import(new URL('../tools/url-scanner.mjs', import.meta.url));
112
+ const { scanUrl, scanSite } = await import(new URL('../tools/url-scanner.mjs', import.meta.url));
83
113
  try {
84
- const result = await scanUrl(dir);
114
+ const result = singleFlag
115
+ ? await scanUrl(dir)
116
+ : await scanSite(dir, { maxPages });
85
117
  if (jsonFlag) {
86
118
  console.log(JSON.stringify(result, null, 2));
87
119
  } else {
@@ -93,12 +125,47 @@ if (isUrl) {
93
125
  }
94
126
  } else {
95
127
  // Directory-based scanning
96
- const mod = await import(new URL(toolPath, import.meta.url));
97
128
  const targetDir = resolve(dir);
98
129
 
99
- if (command === 'schema') {
130
+ // --report html: run ALL scanners, generate HTML report
131
+ if (reportFlag === 'html') {
132
+ const { writeFileSync } = await import('node:fs');
133
+ const { generateHtmlReport } = await import(new URL('../tools/lib/report-generator.mjs', import.meta.url));
134
+
135
+ const seoMod = await import(new URL('../tools/seo-scanner.mjs', import.meta.url));
136
+ const geoMod = await import(new URL('../tools/geo-scanner.mjs', import.meta.url));
137
+ const aeoMod = await import(new URL('../tools/aeo-scanner.mjs', import.meta.url));
138
+
139
+ const seo = seoMod.scanDirectory(targetDir);
140
+ const geo = geoMod.scanDirectory(targetDir);
141
+ const aeo = aeoMod.scanDirectory(targetDir);
142
+
143
+ const html = generateHtmlReport({
144
+ seo, geo, aeo,
145
+ target: dir,
146
+ timestamp: new Date().toISOString(),
147
+ });
148
+
149
+ const outPath = resolve('claude-rank-report.html');
150
+ writeFileSync(outPath, html, 'utf-8');
151
+ console.log(`HTML report saved to ${outPath}`);
152
+
153
+ // Also print terminal summaries
154
+ console.log(formatSeoReport(seo));
155
+ console.log(formatGeoReport(geo));
156
+ console.log(formatAeoReport(aeo));
157
+
158
+ // Check threshold against the primary (SEO) score
159
+ if (thresholdFlag != null) {
160
+ const score = seo.scores?.seo ?? 0;
161
+ if (score < thresholdFlag) {
162
+ console.error(`Score ${score} is below threshold ${thresholdFlag}`);
163
+ process.exit(1);
164
+ }
165
+ }
166
+ } else if (command === 'schema') {
100
167
  // schema-engine exports detectSchema (per-file) and findHtmlFiles via html-parser.
101
- // Build a directory-level result by importing the html-parser helper and scanning each file.
168
+ const mod = await import(new URL(toolPath, import.meta.url));
102
169
  const { findHtmlFiles } = await import(new URL('../tools/lib/html-parser.mjs', import.meta.url));
103
170
  const { readFileSync } = await import('node:fs');
104
171
  const files = findHtmlFiles(targetDir);
@@ -116,11 +183,22 @@ if (isUrl) {
116
183
  console.log(formatSchemaReport(results));
117
184
  }
118
185
  } else {
186
+ const mod = await import(new URL(toolPath, import.meta.url));
119
187
  const result = mod.scanDirectory(targetDir);
120
188
  if (jsonFlag) {
121
189
  console.log(JSON.stringify(result, null, 2));
122
190
  } else {
123
191
  console.log(formatters[command](result));
124
192
  }
193
+
194
+ // Check threshold
195
+ if (thresholdFlag != null) {
196
+ const scoreKey = command === 'scan' ? 'seo' : command;
197
+ const score = result.scores?.[scoreKey] ?? 0;
198
+ if (score < thresholdFlag) {
199
+ console.error(`Score ${score} is below threshold ${thresholdFlag}`);
200
+ process.exit(1);
201
+ }
202
+ }
125
203
  }
126
204
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@houseofmvps/claude-rank",
3
- "version": "1.2.1",
3
+ "version": "1.3.0",
4
4
  "description": "The most comprehensive SEO/GEO/AEO plugin for Claude Code. Audit, fix, and dominate search — traditional and AI.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,248 @@
1
+ /**
2
+ * crawler.mjs — Multi-page site crawler using BFS with concurrency control.
3
+ * Follows internal links on the same domain. Uses fetchPage() for SSRF protection.
4
+ * No external dependencies.
5
+ */
6
+
7
+ import { fetchPage } from './url-fetcher.mjs';
8
+
9
+ // ---------------------------------------------------------------------------
10
+ // URL helpers (exported for testing)
11
+ // ---------------------------------------------------------------------------
12
+
13
+ /** File extensions to skip (non-HTML resources) */
14
+ const SKIP_EXTENSIONS = new Set([
15
+ '.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico', '.bmp', '.avif',
16
+ '.css', '.js', '.mjs', '.cjs', '.map',
17
+ '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
18
+ '.zip', '.tar', '.gz', '.rar', '.7z',
19
+ '.mp3', '.mp4', '.wav', '.avi', '.mov', '.webm', '.ogg',
20
+ '.woff', '.woff2', '.ttf', '.eot', '.otf',
21
+ '.xml', '.json', '.csv', '.txt', '.rss', '.atom',
22
+ ]);
23
+
24
+ /** URL path patterns to skip (non-page routes) */
25
+ const SKIP_PATTERNS = [
26
+ /\/api\//i,
27
+ /\/auth\//i,
28
+ /\/login\b/i,
29
+ /\/logout\b/i,
30
+ /\/wp-admin/i,
31
+ /\/cdn-cgi\//i,
32
+ /\/wp-json\//i,
33
+ /\/feed\/?$/i,
34
+ /\/xmlrpc\.php/i,
35
+ /\/wp-login/i,
36
+ /\/admin\//i,
37
+ /\?/, // skip URLs with query strings to avoid crawl traps
38
+ ];
39
+
40
+ /**
41
+ * Normalize a URL: remove fragment, remove trailing slash (except root path).
42
+ * @param {string} urlStr
43
+ * @returns {string}
44
+ */
45
+ export function normalizeUrl(urlStr) {
46
+ try {
47
+ const url = new URL(urlStr);
48
+ url.hash = '';
49
+ // Remove trailing slash unless it's just the root "/"
50
+ if (url.pathname.length > 1 && url.pathname.endsWith('/')) {
51
+ url.pathname = url.pathname.slice(0, -1);
52
+ }
53
+ return url.href;
54
+ } catch {
55
+ return urlStr;
56
+ }
57
+ }
58
+
59
+ /**
60
+ * Check if a URL should be skipped based on extension or path pattern.
61
+ * @param {string} urlStr
62
+ * @returns {boolean}
63
+ */
64
+ export function shouldSkipUrl(urlStr) {
65
+ try {
66
+ const url = new URL(urlStr);
67
+ const pathname = url.pathname.toLowerCase();
68
+
69
+ // Check file extension
70
+ const lastDot = pathname.lastIndexOf('.');
71
+ if (lastDot !== -1) {
72
+ const ext = pathname.slice(lastDot);
73
+ if (SKIP_EXTENSIONS.has(ext)) return true;
74
+ }
75
+
76
+ // Check path patterns
77
+ for (const pattern of SKIP_PATTERNS) {
78
+ if (pattern.test(url.pathname + url.search)) return true;
79
+ }
80
+
81
+ return false;
82
+ } catch {
83
+ return true;
84
+ }
85
+ }
86
+
87
+ /**
88
+ * Check if two URLs share the same hostname.
89
+ * @param {string} urlA
90
+ * @param {string} urlB
91
+ * @returns {boolean}
92
+ */
93
+ export function isSameDomain(urlA, urlB) {
94
+ try {
95
+ const a = new URL(urlA);
96
+ const b = new URL(urlB);
97
+ return a.hostname === b.hostname;
98
+ } catch {
99
+ return false;
100
+ }
101
+ }
102
+
103
+ /**
104
+ * Extract internal links from HTML content.
105
+ * Returns an array of absolute URL strings on the same domain as baseUrl.
106
+ * @param {string} html
107
+ * @param {string} baseUrl
108
+ * @returns {string[]}
109
+ */
110
+ export function extractLinks(html, baseUrl) {
111
+ const links = [];
112
+ // Match <a href="..."> with both single and double quotes
113
+ const regex = /<a\s[^>]*href\s*=\s*(?:"([^"]*)"|'([^']*)')/gi;
114
+ let match;
115
+
116
+ while ((match = regex.exec(html)) !== null) {
117
+ const href = match[1] ?? match[2];
118
+ if (!href) continue;
119
+
120
+ // Skip javascript:, mailto:, tel:, data: schemes
121
+ if (/^(javascript|mailto|tel|data):/i.test(href)) continue;
122
+ // Skip empty or fragment-only
123
+ if (href === '' || href === '#' || href.startsWith('#')) continue;
124
+
125
+ try {
126
+ const resolved = new URL(href, baseUrl).href;
127
+ const normalized = normalizeUrl(resolved);
128
+
129
+ if (isSameDomain(normalized, baseUrl) && !shouldSkipUrl(normalized)) {
130
+ links.push(normalized);
131
+ }
132
+ } catch {
133
+ // Invalid URL — skip
134
+ }
135
+ }
136
+
137
+ // Deduplicate
138
+ return [...new Set(links)];
139
+ }
140
+
141
+ // ---------------------------------------------------------------------------
142
+ // Semaphore for concurrency control
143
+ // ---------------------------------------------------------------------------
144
+
145
+ class Semaphore {
146
+ constructor(max) {
147
+ this._max = max;
148
+ this._active = 0;
149
+ this._queue = [];
150
+ }
151
+
152
+ async acquire() {
153
+ if (this._active < this._max) {
154
+ this._active++;
155
+ return;
156
+ }
157
+ return new Promise(resolve => {
158
+ this._queue.push(resolve);
159
+ });
160
+ }
161
+
162
+ release() {
163
+ this._active--;
164
+ if (this._queue.length > 0) {
165
+ this._active++;
166
+ const next = this._queue.shift();
167
+ next();
168
+ }
169
+ }
170
+ }
171
+
172
+ // ---------------------------------------------------------------------------
173
+ // Main crawler
174
+ // ---------------------------------------------------------------------------
175
+
176
+ /**
177
+ * Crawl a site starting from startUrl, following internal links (BFS).
178
+ * @param {string} startUrl — starting URL
179
+ * @param {object} options
180
+ * @param {number} [options.maxPages=50] — max pages to crawl
181
+ * @param {number} [options.concurrency=3] — concurrent fetches
182
+ * @param {function} [options.onPage] — callback(url, html) called per page
183
+ * @returns {Promise<{ pages: Array<{url: string, html: string, statusCode: number}>, errors: Array<{url: string, error: string}> }>}
184
+ */
185
+ export async function crawlSite(startUrl, options = {}) {
186
+ const {
187
+ maxPages = 50,
188
+ concurrency = 3,
189
+ onPage,
190
+ } = options;
191
+
192
+ const normalizedStart = normalizeUrl(startUrl);
193
+ const visited = new Set();
194
+ const queue = [normalizedStart]; // BFS queue
195
+ const pages = [];
196
+ const errors = [];
197
+ const semaphore = new Semaphore(concurrency);
198
+
199
+ let queued = new Set([normalizedStart]);
200
+ let pagesProcessed = 0;
201
+
202
+ // Process BFS in waves for concurrency
203
+ while (queue.length > 0 && pagesProcessed < maxPages) {
204
+ // Take a batch from the queue (up to concurrency size)
205
+ const batchSize = Math.min(queue.length, maxPages - pagesProcessed, concurrency);
206
+ const batch = queue.splice(0, batchSize);
207
+
208
+ const promises = batch.map(async (url) => {
209
+ if (visited.has(url) || pagesProcessed >= maxPages) return;
210
+ visited.add(url);
211
+
212
+ await semaphore.acquire();
213
+ try {
214
+ pagesProcessed++;
215
+ const num = pagesProcessed;
216
+ process.stderr.write(`Crawling [${num}/${maxPages}] ${url}\n`);
217
+
218
+ const result = await fetchPage(url);
219
+ pages.push({
220
+ url: result.finalUrl,
221
+ html: result.html,
222
+ statusCode: result.statusCode,
223
+ });
224
+
225
+ if (onPage) {
226
+ onPage(result.finalUrl, result.html);
227
+ }
228
+
229
+ // Extract links and add new ones to queue
230
+ const links = extractLinks(result.html, result.finalUrl);
231
+ for (const link of links) {
232
+ if (!queued.has(link) && !visited.has(link) && pagesProcessed + queue.length < maxPages) {
233
+ queued.add(link);
234
+ queue.push(link);
235
+ }
236
+ }
237
+ } catch (err) {
238
+ errors.push({ url, error: err.message });
239
+ } finally {
240
+ semaphore.release();
241
+ }
242
+ });
243
+
244
+ await Promise.all(promises);
245
+ }
246
+
247
+ return { pages, errors };
248
+ }
@@ -444,6 +444,51 @@ export function parseHtml(htmlString) {
444
444
  return state;
445
445
  }
446
446
 
447
+ // ---------------------------------------------------------------------------
448
+ // detectPageType — classify page type from URL path + parsed state
449
+ // ---------------------------------------------------------------------------
450
+
451
+ /**
452
+ * Page type patterns — ordered by priority (first match wins).
453
+ * Each entry: { type, patterns[] } where patterns are matched against
454
+ * the lowercase URL path, title, and h1 text.
455
+ */
456
+ const PAGE_TYPE_RULES = [
457
+ { type: 'contact', patterns: ['contact', 'get in touch', 'reach us'] },
458
+ { type: 'terms', patterns: ['terms', 'conditions', 'tos', 'terms-of-service'] },
459
+ { type: 'privacy', patterns: ['privacy', 'cookie policy', 'gdpr'] },
460
+ { type: 'legal', patterns: ['legal', 'disclaimer', 'imprint'] },
461
+ { type: 'login', patterns: ['login', 'signin', 'sign-in', 'register', 'signup'] },
462
+ { type: '404', patterns: ['404', 'not found', 'page not found'] },
463
+ { type: 'sitemap', patterns: ['sitemap'] },
464
+ ];
465
+
466
+ /**
467
+ * Detect the page type from the file path / URL and parsed HTML state.
468
+ * Returns a page type string: 'contact', 'terms', 'privacy', 'legal',
469
+ * 'login', '404', 'sitemap', or 'content' (default).
470
+ *
471
+ * @param {string} filePath — file path or URL (used for path-based signals)
472
+ * @param {object} state — PageState from parseHtml
473
+ * @returns {string} page type
474
+ */
475
+ export function detectPageType(filePath, state) {
476
+ // Build a combined haystack from path, title, and h1
477
+ const pathLower = (filePath || '').toLowerCase();
478
+ const titleLower = (state.titleText || '').toLowerCase();
479
+ const h1Lower = (state.h1Text || '').toLowerCase();
480
+
481
+ for (const { type, patterns } of PAGE_TYPE_RULES) {
482
+ for (const pattern of patterns) {
483
+ if (pathLower.includes(pattern) || titleLower.includes(pattern) || h1Lower.includes(pattern)) {
484
+ return type;
485
+ }
486
+ }
487
+ }
488
+
489
+ return 'content';
490
+ }
491
+
447
492
  // ---------------------------------------------------------------------------
448
493
  // parseHtmlFile — read file then parseHtml
449
494
  // ---------------------------------------------------------------------------
@@ -0,0 +1,160 @@
1
+ /**
2
+ * report-generator.mjs — Generate self-contained HTML audit reports.
3
+ * No external dependencies. All CSS is inline.
4
+ */
5
+
6
+ /**
7
+ * Generate a self-contained HTML report from scan results.
8
+ * @param {object} options
9
+ * @param {object} options.seo — SEO scan result (optional)
10
+ * @param {object} options.geo — GEO scan result (optional)
11
+ * @param {object} options.aeo — AEO scan result (optional)
12
+ * @param {string} options.target — directory or URL that was scanned
13
+ * @param {string} options.timestamp — ISO timestamp
14
+ * @returns {string} — complete HTML document
15
+ */
16
+ export function generateHtmlReport({ seo, geo, aeo, target, timestamp }) {
17
+ const scanners = [];
18
+ if (seo && !seo.skipped) scanners.push({ label: 'SEO', key: 'seo', data: seo });
19
+ if (geo && !geo.skipped) scanners.push({ label: 'GEO', key: 'geo', data: geo });
20
+ if (aeo && !aeo.skipped) scanners.push({ label: 'AEO', key: 'aeo', data: aeo });
21
+
22
+ const scoreCards = scanners.map(s => {
23
+ const score = s.data.scores[s.key];
24
+ const { color, label } = scoreStyle(score);
25
+ return `
26
+ <div class="score-card">
27
+ <div class="score-ring" style="--score: ${score}; --color: ${color}">
28
+ <svg viewBox="0 0 120 120">
29
+ <circle cx="60" cy="60" r="52" class="ring-bg"/>
30
+ <circle cx="60" cy="60" r="52" class="ring-fill" style="stroke-dashoffset: calc(327 - (327 * ${score} / 100))"/>
31
+ </svg>
32
+ <span class="score-value">${score}</span>
33
+ </div>
34
+ <div class="score-label" style="color: ${color}">${label}</div>
35
+ <div class="score-type">${s.label}</div>
36
+ <div class="score-meta">${s.data.files_scanned} files &middot; ${s.data.findings.length} findings</div>
37
+ </div>`;
38
+ }).join('\n');
39
+
40
+ const allFindings = [];
41
+ for (const s of scanners) {
42
+ for (const f of s.data.findings) {
43
+ allFindings.push({ ...f, scanner: s.label });
44
+ }
45
+ }
46
+
47
+ const SEVERITY_ORDER = { critical: 0, high: 1, medium: 2, low: 3 };
48
+ allFindings.sort((a, b) => (SEVERITY_ORDER[a.severity] ?? 9) - (SEVERITY_ORDER[b.severity] ?? 9));
49
+
50
+ // Group by rule
51
+ const groups = new Map();
52
+ for (const f of allFindings) {
53
+ const key = `${f.scanner}:${f.rule}`;
54
+ if (!groups.has(key)) {
55
+ groups.set(key, { rule: f.rule, severity: f.severity, message: f.message, scanner: f.scanner, files: [] });
56
+ }
57
+ if (f.file && !groups.get(key).files.includes(f.file)) {
58
+ groups.get(key).files.push(f.file);
59
+ }
60
+ }
61
+
62
+ const findingsRows = [...groups.values()].map(g => {
63
+ const badgeColor = severityBadgeColor(g.severity);
64
+ const filesStr = g.files.length > 0
65
+ ? g.files.slice(0, 3).map(f => esc(f)).join(', ') + (g.files.length > 3 ? `, +${g.files.length - 3} more` : '')
66
+ : '—';
67
+ return `
68
+ <tr>
69
+ <td><span class="badge" style="background: ${badgeColor}">${esc(g.severity.toUpperCase())}</span></td>
70
+ <td class="rule-name">${esc(g.rule)}<span class="scanner-tag">${esc(g.scanner)}</span></td>
71
+ <td>${esc(g.message)}</td>
72
+ <td class="files-cell">${filesStr}</td>
73
+ </tr>`;
74
+ }).join('\n');
75
+
76
+ const displayDate = timestamp ? new Date(timestamp).toLocaleString('en-US', {
77
+ dateStyle: 'long', timeStyle: 'short',
78
+ }) : '';
79
+
80
+ return `<!DOCTYPE html>
81
+ <html lang="en">
82
+ <head>
83
+ <meta charset="utf-8"/>
84
+ <meta name="viewport" content="width=device-width, initial-scale=1"/>
85
+ <title>claude-rank Audit Report — ${esc(target)}</title>
86
+ <style>
87
+ *{margin:0;padding:0;box-sizing:border-box}
88
+ body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;background:#0f172a;color:#e2e8f0;line-height:1.6;padding:2rem}
89
+ .container{max-width:960px;margin:0 auto}
90
+ header{text-align:center;margin-bottom:2.5rem;padding-bottom:1.5rem;border-bottom:1px solid #1e293b}
91
+ header h1{font-size:1.75rem;color:#f8fafc;margin-bottom:.25rem}
92
+ header p{color:#94a3b8;font-size:.875rem}
93
+ .scores{display:flex;gap:2rem;justify-content:center;flex-wrap:wrap;margin-bottom:2.5rem}
94
+ .score-card{text-align:center;background:#1e293b;border-radius:12px;padding:1.5rem 2rem;min-width:180px}
95
+ .score-ring{position:relative;width:100px;height:100px;margin:0 auto .75rem}
96
+ .score-ring svg{width:100%;height:100%;transform:rotate(-90deg)}
97
+ .ring-bg{fill:none;stroke:#334155;stroke-width:8}
98
+ .ring-fill{fill:none;stroke:var(--color);stroke-width:8;stroke-linecap:round;stroke-dasharray:327;transition:stroke-dashoffset .5s}
99
+ .score-value{position:absolute;inset:0;display:flex;align-items:center;justify-content:center;font-size:1.5rem;font-weight:700;color:#f8fafc}
100
+ .score-label{font-weight:600;font-size:.875rem;text-transform:uppercase;letter-spacing:.05em}
101
+ .score-type{font-size:1.125rem;font-weight:600;color:#f8fafc;margin-top:.25rem}
102
+ .score-meta{color:#64748b;font-size:.75rem;margin-top:.25rem}
103
+ h2{font-size:1.25rem;color:#f8fafc;margin-bottom:1rem}
104
+ table{width:100%;border-collapse:collapse;font-size:.85rem;margin-bottom:2rem}
105
+ th{text-align:left;color:#94a3b8;font-weight:600;padding:.75rem .5rem;border-bottom:2px solid #1e293b}
106
+ td{padding:.65rem .5rem;border-bottom:1px solid #1e293b;vertical-align:top}
107
+ .badge{display:inline-block;padding:2px 8px;border-radius:4px;font-size:.7rem;font-weight:700;color:#fff;text-transform:uppercase}
108
+ .rule-name{font-weight:600;color:#f8fafc}
109
+ .scanner-tag{margin-left:.5rem;font-size:.65rem;color:#64748b;font-weight:400}
110
+ .files-cell{color:#94a3b8;font-size:.8rem;max-width:200px;word-break:break-all}
111
+ footer{text-align:center;color:#475569;font-size:.75rem;margin-top:2rem;padding-top:1rem;border-top:1px solid #1e293b}
112
+ footer a{color:#64748b}
113
+ .empty{text-align:center;color:#22c55e;padding:2rem;font-size:1rem}
114
+ @media print{body{background:#fff;color:#1e293b;padding:1rem}.score-card{background:#f1f5f9}th{color:#475569;border-color:#cbd5e1}td{border-color:#e2e8f0}.rule-name{color:#0f172a}header{border-color:#cbd5e1}footer{border-color:#cbd5e1;color:#94a3b8}}
115
+ </style>
116
+ </head>
117
+ <body>
118
+ <div class="container">
119
+ <header>
120
+ <h1>claude-rank Audit Report</h1>
121
+ <p>${esc(target)} &mdash; ${esc(displayDate)}</p>
122
+ </header>
123
+
124
+ <section class="scores">
125
+ ${scoreCards || '<p style="color:#94a3b8">No scan results available.</p>'}
126
+ </section>
127
+
128
+ <h2>Findings</h2>
129
+ ${groups.size > 0 ? `
130
+ <table>
131
+ <thead><tr><th>Severity</th><th>Rule</th><th>Message</th><th>Files</th></tr></thead>
132
+ <tbody>
133
+ ${findingsRows}
134
+ </tbody>
135
+ </table>` : '<div class="empty">No findings — looking great!</div>'}
136
+
137
+ <footer>Generated by claude-rank v1.2.1 &mdash; <a href="https://github.com/Houseofmvps/claude-rank">github.com/Houseofmvps/claude-rank</a></footer>
138
+ </div>
139
+ </body>
140
+ </html>`;
141
+ }
142
+
143
+ function scoreStyle(score) {
144
+ if (score >= 90) return { color: '#22c55e', label: 'Excellent' };
145
+ if (score >= 80) return { color: '#3b82f6', label: 'Good' };
146
+ if (score >= 60) return { color: '#eab308', label: 'Needs Work' };
147
+ return { color: '#ef4444', label: 'Poor' };
148
+ }
149
+
150
+ function severityBadgeColor(severity) {
151
+ if (severity === 'critical') return '#dc2626';
152
+ if (severity === 'high') return '#ef4444';
153
+ if (severity === 'medium') return '#eab308';
154
+ return '#64748b';
155
+ }
156
+
157
+ function esc(str) {
158
+ if (!str) return '';
159
+ return String(str).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
160
+ }
@@ -5,7 +5,7 @@
5
5
 
6
6
  import fs from 'node:fs';
7
7
  import path from 'node:path';
8
- import { parseHtml, findHtmlFiles } from './lib/html-parser.mjs';
8
+ import { parseHtml, findHtmlFiles, detectPageType } from './lib/html-parser.mjs';
9
9
  import { checkFileSize } from './lib/security.mjs';
10
10
 
11
11
  // ---------------------------------------------------------------------------
@@ -97,6 +97,13 @@ const RULES = {
97
97
  // Per-file rule checks
98
98
  // ---------------------------------------------------------------------------
99
99
 
100
+ // Page types where thin content is expected and should not be flagged
101
+ const THIN_CONTENT_EXEMPT = new Set(['contact', 'terms', 'privacy', 'legal', 'login', '404', 'sitemap']);
102
+ // Page types where missing analytics is expected
103
+ const NO_ANALYTICS_EXEMPT = new Set(['terms', 'privacy', 'legal']);
104
+ // Page types where missing OG image is expected
105
+ const NO_OG_IMAGE_EXEMPT = new Set(['terms', 'privacy', 'legal']);
106
+
100
107
  /**
101
108
  * Run per-file checks. Returns array of finding objects.
102
109
  * @param {object} state — PageState from parseHtml
@@ -107,6 +114,7 @@ const RULES = {
107
114
  function checkFile(state, filePath, rootDir, opts = {}) {
108
115
  const findings = [];
109
116
  const rel = path.relative(rootDir, filePath);
117
+ const pageType = detectPageType(filePath, state);
110
118
 
111
119
  function add(rule, message, context = {}) {
112
120
  const def = RULES[rule];
@@ -115,6 +123,7 @@ function checkFile(state, filePath, rootDir, opts = {}) {
115
123
  severity: def.severity,
116
124
  file: rel,
117
125
  message,
126
+ pageType,
118
127
  ...context,
119
128
  });
120
129
  }
@@ -151,7 +160,7 @@ function checkFile(state, filePath, rootDir, opts = {}) {
151
160
  add('missing-h1', 'Page has no <h1> heading');
152
161
  }
153
162
 
154
- if (state.wordCount > 0 && state.wordCount < 300) {
163
+ if (state.wordCount > 0 && state.wordCount < 300 && !THIN_CONTENT_EXEMPT.has(pageType)) {
155
164
  add('thin-content', `Page has only ${state.wordCount} words (minimum recommended: 300)`);
156
165
  }
157
166
 
@@ -196,7 +205,7 @@ function checkFile(state, filePath, rootDir, opts = {}) {
196
205
  add('missing-og-description', 'Page is missing og:description Open Graph tag');
197
206
  }
198
207
 
199
- if (!state.hasOgImage) {
208
+ if (!state.hasOgImage && !NO_OG_IMAGE_EXEMPT.has(pageType)) {
200
209
  add('missing-og-image', 'Page is missing og:image Open Graph tag');
201
210
  }
202
211
 
@@ -238,7 +247,7 @@ function checkFile(state, filePath, rootDir, opts = {}) {
238
247
  add('missing-favicon', 'Page is missing a favicon link');
239
248
  }
240
249
 
241
- if (!state.hasAnalytics) {
250
+ if (!state.hasAnalytics && !NO_ANALYTICS_EXEMPT.has(pageType)) {
242
251
  add('no-analytics', 'No analytics provider detected on this page');
243
252
  }
244
253
 
@@ -2,10 +2,12 @@
2
2
  * url-scanner.mjs — Scan a live URL for SEO issues.
3
3
  * Fetches HTML from a URL and runs the same per-page analysis as seo-scanner.
4
4
  * Cross-page rules (duplicates, orphans, canonicals) are skipped for single-URL scans.
5
+ * scanSite() crawls multiple pages and adds cross-page analysis.
5
6
  */
6
7
 
7
- import { parseHtml } from './lib/html-parser.mjs';
8
+ import { parseHtml, detectPageType } from './lib/html-parser.mjs';
8
9
  import { fetchPage } from './lib/url-fetcher.mjs';
10
+ import { crawlSite } from './lib/crawler.mjs';
9
11
 
10
12
  // ---------------------------------------------------------------------------
11
13
  // Rule definitions (same as seo-scanner, minus cross-page-only rules)
@@ -52,6 +54,11 @@ const RULES = {
52
54
  'no-manifest': { severity: 'low', deduction: 2 },
53
55
  'all-scripts-blocking': { severity: 'low', deduction: 2 },
54
56
 
57
+ // Cross-page rules (multi-page crawl only)
58
+ 'duplicate-title': { severity: 'high', deduction: 10 },
59
+ 'duplicate-meta-description':{ severity: 'high', deduction: 10 },
60
+ 'canonical-conflict': { severity: 'high', deduction: 10 },
61
+
55
62
  // HTTP-level rules (URL-scan only)
56
63
  'http-error': { severity: 'critical', deduction: 20 },
57
64
  'redirect-detected': { severity: 'low', deduction: 2 },
@@ -61,8 +68,16 @@ const RULES = {
61
68
  // Per-page rule checks (reused from seo-scanner logic)
62
69
  // ---------------------------------------------------------------------------
63
70
 
71
+ // Page types where thin content is expected and should not be flagged
72
+ const THIN_CONTENT_EXEMPT = new Set(['contact', 'terms', 'privacy', 'legal', 'login', '404', 'sitemap']);
73
+ // Page types where missing analytics is expected
74
+ const NO_ANALYTICS_EXEMPT = new Set(['terms', 'privacy', 'legal']);
75
+ // Page types where missing OG image is expected
76
+ const NO_OG_IMAGE_EXEMPT = new Set(['terms', 'privacy', 'legal']);
77
+
64
78
  function checkPage(state, pageUrl) {
65
79
  const findings = [];
80
+ const pageType = detectPageType(pageUrl, state);
66
81
 
67
82
  function add(rule, message, context = {}) {
68
83
  const def = RULES[rule];
@@ -71,6 +86,7 @@ function checkPage(state, pageUrl) {
71
86
  severity: def.severity,
72
87
  file: pageUrl,
73
88
  message,
89
+ pageType,
74
90
  ...context,
75
91
  });
76
92
  }
@@ -109,7 +125,7 @@ function checkPage(state, pageUrl) {
109
125
  add('missing-h1', 'Page has no <h1> heading');
110
126
  }
111
127
 
112
- if (state.wordCount > 0 && state.wordCount < 300) {
128
+ if (state.wordCount > 0 && state.wordCount < 300 && !THIN_CONTENT_EXEMPT.has(pageType)) {
113
129
  add('thin-content', `Page has only ${state.wordCount} words (minimum recommended: 300)`);
114
130
  }
115
131
 
@@ -150,7 +166,7 @@ function checkPage(state, pageUrl) {
150
166
  add('missing-og-description', 'Page is missing og:description Open Graph tag');
151
167
  }
152
168
 
153
- if (!state.hasOgImage) {
169
+ if (!state.hasOgImage && !NO_OG_IMAGE_EXEMPT.has(pageType)) {
154
170
  add('missing-og-image', 'Page is missing og:image Open Graph tag');
155
171
  }
156
172
 
@@ -191,7 +207,7 @@ function checkPage(state, pageUrl) {
191
207
  add('missing-favicon', 'Page is missing a favicon link');
192
208
  }
193
209
 
194
- if (!state.hasAnalytics) {
210
+ if (!state.hasAnalytics && !NO_ANALYTICS_EXEMPT.has(pageType)) {
195
211
  add('no-analytics', 'No analytics provider detected on this page');
196
212
  }
197
213
 
@@ -333,6 +349,151 @@ export async function scanUrl(url) {
333
349
  };
334
350
  }
335
351
 
352
+ // ---------------------------------------------------------------------------
353
+ // Cross-page checks (for multi-page crawl)
354
+ // ---------------------------------------------------------------------------
355
+
356
+ function crossPageChecks(allStates) {
357
+ const findings = [];
358
+
359
+ // --- Duplicate title detection ---
360
+ const titleMap = new Map();
361
+ for (const { url, state } of allStates) {
362
+ if (state.hasTitle && state.titleText) {
363
+ const title = state.titleText.trim().toLowerCase();
364
+ if (!titleMap.has(title)) titleMap.set(title, []);
365
+ titleMap.get(title).push(url);
366
+ }
367
+ }
368
+ for (const [title, urls] of titleMap) {
369
+ if (urls.length > 1) {
370
+ for (const pageUrl of urls) {
371
+ findings.push({
372
+ rule: 'duplicate-title',
373
+ severity: RULES['duplicate-title'].severity,
374
+ file: pageUrl,
375
+ message: `Duplicate title "${title}" shared across ${urls.length} pages`,
376
+ duplicates: urls,
377
+ });
378
+ }
379
+ }
380
+ }
381
+
382
+ // --- Duplicate meta description detection ---
383
+ const descMap = new Map();
384
+ for (const { url, state } of allStates) {
385
+ if (state.hasMetaDescription && state.metaDescriptionText) {
386
+ const desc = state.metaDescriptionText.trim().toLowerCase();
387
+ if (!descMap.has(desc)) descMap.set(desc, []);
388
+ descMap.get(desc).push(url);
389
+ }
390
+ }
391
+ for (const [, urls] of descMap) {
392
+ if (urls.length > 1) {
393
+ for (const pageUrl of urls) {
394
+ findings.push({
395
+ rule: 'duplicate-meta-description',
396
+ severity: RULES['duplicate-meta-description'].severity,
397
+ file: pageUrl,
398
+ message: `Duplicate meta description shared across ${urls.length} pages`,
399
+ duplicates: urls,
400
+ });
401
+ }
402
+ }
403
+ }
404
+
405
+ // --- Canonical conflict detection ---
406
+ const canonicalMap = new Map();
407
+ for (const { url, state } of allStates) {
408
+ if (state.hasCanonical && state.canonicalUrl) {
409
+ const canonical = state.canonicalUrl.trim();
410
+ if (!canonicalMap.has(canonical)) canonicalMap.set(canonical, []);
411
+ canonicalMap.get(canonical).push(url);
412
+ }
413
+ }
414
+ for (const [canonical, urls] of canonicalMap) {
415
+ if (urls.length > 1) {
416
+ for (const pageUrl of urls) {
417
+ findings.push({
418
+ rule: 'canonical-conflict',
419
+ severity: RULES['canonical-conflict'].severity,
420
+ file: pageUrl,
421
+ message: `Multiple pages share canonical URL "${canonical}"`,
422
+ duplicates: urls,
423
+ });
424
+ }
425
+ }
426
+ }
427
+
428
+ return findings;
429
+ }
430
+
431
+ // ---------------------------------------------------------------------------
432
+ // scanSite — crawl + analyse multiple pages
433
+ // ---------------------------------------------------------------------------
434
+
435
+ /**
436
+ * Crawl and scan an entire site.
437
+ * @param {string} startUrl
438
+ * @param {object} [options] — passed to crawlSite (maxPages, concurrency)
439
+ * @returns {Promise<object>} — { url, pages_scanned, files_scanned, findings, scores, summary, errors }
440
+ */
441
+ export async function scanSite(startUrl, options = {}) {
442
+ // 1. Crawl the site
443
+ const crawlResult = await crawlSite(startUrl, options);
444
+
445
+ // 2. Parse each page and run per-page checks
446
+ const allStates = [];
447
+ const perPageFindings = [];
448
+
449
+ for (const page of crawlResult.pages) {
450
+ const state = parseHtml(page.html);
451
+ allStates.push({ url: page.url, state });
452
+
453
+ const pageFindings = checkPage(state, page.url);
454
+
455
+ // HTTP-level checks
456
+ if (page.statusCode >= 400) {
457
+ const def = RULES['http-error'];
458
+ pageFindings.unshift({
459
+ rule: 'http-error',
460
+ severity: def.severity,
461
+ file: page.url,
462
+ message: `HTTP ${page.statusCode} error response`,
463
+ });
464
+ }
465
+
466
+ perPageFindings.push(...pageFindings);
467
+ }
468
+
469
+ // 3. Run cross-page checks (duplicate titles, descriptions, canonical conflicts)
470
+ const multiPage = allStates.length > 1;
471
+ const crossFindings = multiPage ? crossPageChecks(allStates) : [];
472
+
473
+ const allFindings = [...perPageFindings, ...crossFindings];
474
+
475
+ // 4. Calculate deduplicated score
476
+ const seoScore = calculateScore(allFindings);
477
+
478
+ // 5. Summary counts
479
+ const summary = { critical: 0, high: 0, medium: 0, low: 0 };
480
+ for (const f of allFindings) {
481
+ if (summary[f.severity] !== undefined) {
482
+ summary[f.severity]++;
483
+ }
484
+ }
485
+
486
+ return {
487
+ url: startUrl,
488
+ pages_scanned: crawlResult.pages.length,
489
+ files_scanned: crawlResult.pages.length,
490
+ findings: allFindings,
491
+ scores: { seo: seoScore },
492
+ summary,
493
+ errors: crawlResult.errors,
494
+ };
495
+ }
496
+
336
497
  // ---------------------------------------------------------------------------
337
498
  // CLI entry point
338
499
  // ---------------------------------------------------------------------------