@houseofmvps/claude-rank 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,10 @@
2
2
  // Standalone CLI: npx claude-rank <command> <directory>
3
3
  // Commands: scan, geo, aeo, schema, fix
4
4
 
5
- const [,, command = 'scan', dir = '.'] = process.argv;
5
+ const args = process.argv.slice(2);
6
+ const jsonFlag = args.includes('--json');
7
+ const positional = args.filter(a => a !== '--json');
8
+ const [command = 'scan', dir = '.'] = positional;
6
9
 
7
10
  const commands = {
8
11
  scan: '../tools/seo-scanner.mjs',
@@ -14,7 +17,7 @@ const commands = {
14
17
  if (command === 'help' || command === '--help') {
15
18
  console.log(`claude-rank — SEO/GEO/AEO toolkit
16
19
 
17
- Usage: claude-rank <command> [directory]
20
+ Usage: claude-rank <command> [directory|url] [--json]
18
21
 
19
22
  Commands:
20
23
  scan Run core SEO scanner (default)
@@ -23,9 +26,18 @@ Commands:
23
26
  schema Detect and validate structured data
24
27
  help Show this help message
25
28
 
29
+ Flags:
30
+ --json Output raw JSON (for programmatic use)
31
+
32
+ URL scanning:
33
+ Pass a URL instead of a directory to scan a live page via HTTP.
34
+ Only the "scan" command supports URL scanning.
35
+
26
36
  Examples:
27
37
  claude-rank scan ./my-project
38
+ claude-rank scan https://savemrr.co
28
39
  npx @houseofmvps/claude-rank geo .
40
+ claude-rank scan ./site --json
29
41
  `);
30
42
  process.exit(0);
31
43
  }
@@ -36,31 +48,79 @@ if (!toolPath) {
36
48
  process.exit(1);
37
49
  }
38
50
 
51
+ // Detect if the target is a URL (http:// or https://)
52
+ const isUrl = dir.startsWith('http://') || dir.startsWith('https://');
53
+
39
54
  // Dynamic import and run the scanner on the target directory
40
55
  import { resolve } from 'path';
41
56
 
42
57
  // Clear argv before importing tool modules so their inline CLI guards don't fire.
43
58
  // The tool files check `process.argv.slice(2).length > 0` to auto-run on import.
44
59
  process.argv = process.argv.slice(0, 2);
45
- const mod = await import(new URL(toolPath, import.meta.url));
46
- const targetDir = resolve(dir);
47
-
48
- if (command === 'schema') {
49
- // schema-engine exports detectSchema (per-file) and findHtmlFiles via html-parser.
50
- // Build a directory-level result by importing the html-parser helper and scanning each file.
51
- const { findHtmlFiles } = await import(new URL('../tools/lib/html-parser.mjs', import.meta.url));
52
- const { readFileSync } = await import('node:fs');
53
- const files = findHtmlFiles(targetDir);
54
- const results = [];
55
- for (const file of files) {
56
- const html = readFileSync(file, 'utf-8');
57
- const schemas = mod.detectSchema(html);
58
- if (schemas.length > 0) {
59
- results.push({ file, schemas });
60
+
61
+ const {
62
+ formatSeoReport,
63
+ formatGeoReport,
64
+ formatAeoReport,
65
+ formatSchemaReport,
66
+ } = await import(new URL('../tools/lib/formatter.mjs', import.meta.url));
67
+
68
+ const formatters = {
69
+ scan: formatSeoReport,
70
+ geo: formatGeoReport,
71
+ aeo: formatAeoReport,
72
+ schema: formatSchemaReport,
73
+ };
74
+
75
+ // URL-based scanning (scan command only)
76
+ if (isUrl) {
77
+ if (command !== 'scan') {
78
+ console.error(`URL scanning is only supported for the "scan" command, not "${command}".`);
79
+ process.exit(1);
80
+ }
81
+
82
+ const { scanUrl } = await import(new URL('../tools/url-scanner.mjs', import.meta.url));
83
+ try {
84
+ const result = await scanUrl(dir);
85
+ if (jsonFlag) {
86
+ console.log(JSON.stringify(result, null, 2));
87
+ } else {
88
+ console.log(formatSeoReport(result));
60
89
  }
90
+ } catch (err) {
91
+ console.error(`Error scanning URL: ${err.message}`);
92
+ process.exit(1);
61
93
  }
62
- console.log(JSON.stringify(results, null, 2));
63
94
  } else {
64
- const result = mod.scanDirectory(targetDir);
65
- console.log(JSON.stringify(result, null, 2));
95
+ // Directory-based scanning
96
+ const mod = await import(new URL(toolPath, import.meta.url));
97
+ const targetDir = resolve(dir);
98
+
99
+ if (command === 'schema') {
100
+ // schema-engine exports detectSchema (per-file) and findHtmlFiles via html-parser.
101
+ // Build a directory-level result by importing the html-parser helper and scanning each file.
102
+ const { findHtmlFiles } = await import(new URL('../tools/lib/html-parser.mjs', import.meta.url));
103
+ const { readFileSync } = await import('node:fs');
104
+ const files = findHtmlFiles(targetDir);
105
+ const results = [];
106
+ for (const file of files) {
107
+ const html = readFileSync(file, 'utf-8');
108
+ const schemas = mod.detectSchema(html);
109
+ if (schemas.length > 0) {
110
+ results.push({ file, schemas });
111
+ }
112
+ }
113
+ if (jsonFlag) {
114
+ console.log(JSON.stringify(results, null, 2));
115
+ } else {
116
+ console.log(formatSchemaReport(results));
117
+ }
118
+ } else {
119
+ const result = mod.scanDirectory(targetDir);
120
+ if (jsonFlag) {
121
+ console.log(JSON.stringify(result, null, 2));
122
+ } else {
123
+ console.log(formatters[command](result));
124
+ }
125
+ }
66
126
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@houseofmvps/claude-rank",
3
- "version": "1.1.0",
3
+ "version": "1.2.0",
4
4
  "description": "The most comprehensive SEO/GEO/AEO plugin for Claude Code. Audit, fix, and dominate search — traditional and AI.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,173 @@
1
+ /**
2
+ * formatter.mjs — Pretty terminal output for claude-rank CLI reports.
3
+ * No external dependencies — uses raw ANSI escape codes.
4
+ */
5
+
6
+ const c = {
7
+ red: s => `\x1b[31m${s}\x1b[0m`,
8
+ yellow: s => `\x1b[33m${s}\x1b[0m`,
9
+ green: s => `\x1b[32m${s}\x1b[0m`,
10
+ cyan: s => `\x1b[36m${s}\x1b[0m`,
11
+ bold: s => `\x1b[1m${s}\x1b[0m`,
12
+ dim: s => `\x1b[2m${s}\x1b[0m`,
13
+ };
14
+
15
+ const BAR_WIDTH = 15;
16
+
17
+ function scoreLabel(score) {
18
+ if (score >= 90) return c.green('EXCELLENT');
19
+ if (score >= 80) return c.green('GOOD');
20
+ if (score >= 60) return c.yellow('NEEDS WORK');
21
+ return c.red('POOR');
22
+ }
23
+
24
+ function scoreBar(score) {
25
+ const filled = Math.round((score / 100) * BAR_WIDTH);
26
+ const empty = BAR_WIDTH - filled;
27
+ return '\u2588'.repeat(filled) + '\u2591'.repeat(empty);
28
+ }
29
+
30
+ function severityColor(severity) {
31
+ if (severity === 'critical' || severity === 'high') return c.red;
32
+ if (severity === 'medium') return c.yellow;
33
+ return c.dim;
34
+ }
35
+
36
+ function pad(str, len) {
37
+ const stripped = str.replace(/\x1b\[[0-9;]*m/g, '');
38
+ return str + ' '.repeat(Math.max(0, len - stripped.length));
39
+ }
40
+
41
+ /**
42
+ * Group findings by rule, aggregating affected files and using the first message.
43
+ */
44
+ function groupFindings(findings) {
45
+ const groups = new Map();
46
+ for (const f of findings) {
47
+ if (!groups.has(f.rule)) {
48
+ groups.set(f.rule, {
49
+ rule: f.rule,
50
+ severity: f.severity,
51
+ message: f.message,
52
+ files: [],
53
+ });
54
+ }
55
+ const g = groups.get(f.rule);
56
+ if (f.file && !g.files.includes(f.file)) {
57
+ g.files.push(f.file);
58
+ }
59
+ }
60
+ return [...groups.values()];
61
+ }
62
+
63
+ function formatFileList(files, max = 3) {
64
+ if (files.length === 0) return '';
65
+ const shown = files.slice(0, max);
66
+ const rest = files.length - max;
67
+ let out = shown.join(', ');
68
+ if (rest > 0) out += `, +${rest} more`;
69
+ return out;
70
+ }
71
+
72
+ const SEVERITY_ORDER = { critical: 0, high: 1, medium: 2, low: 3 };
73
+
74
+ /**
75
+ * Format a scanner report (SEO, GEO, or AEO) with a box header and grouped findings.
76
+ */
77
+ function formatReport(result, title, scoreKey) {
78
+ if (result.skipped) {
79
+ return c.yellow(`Skipped: ${result.reason}`);
80
+ }
81
+
82
+ const score = result.scores[scoreKey];
83
+ const { files_scanned, findings, summary } = result;
84
+ const groups = groupFindings(findings);
85
+ groups.sort((a, b) => (SEVERITY_ORDER[a.severity] ?? 9) - (SEVERITY_ORDER[b.severity] ?? 9));
86
+
87
+ const W = 48;
88
+ const hr = '\u2550'.repeat(W);
89
+ const lines = [];
90
+
91
+ lines.push(`\u2554${hr}\u2557`);
92
+ lines.push(`\u2551${pad(c.bold(` ${title}`), W + 9)}\u2551`);
93
+ lines.push(`\u2560${hr}\u2563`);
94
+
95
+ const barStr = ` Score: ${score}/100 ${scoreBar(score)} ${scoreLabel(score)}`;
96
+ lines.push(`\u2551${pad(barStr, W + 22)}\u2551`);
97
+ lines.push(`\u2560${hr}\u2563`);
98
+
99
+ lines.push(`\u2551${pad(` Files scanned: ${files_scanned}`, W)}\u2551`);
100
+ lines.push(`\u2551${pad(` Findings: ${findings.length}`, W)}\u2551`);
101
+ const countsLine = ` Critical: ${summary.critical} High: ${summary.high} Medium: ${summary.medium} Low: ${summary.low}`;
102
+ lines.push(`\u2551${pad(countsLine, W)}\u2551`);
103
+ lines.push(`\u255A${hr}\u255D`);
104
+ lines.push('');
105
+
106
+ if (groups.length === 0) {
107
+ lines.push(c.green('No findings — looking great!'));
108
+ return lines.join('\n');
109
+ }
110
+
111
+ lines.push(c.bold('Findings:'));
112
+ {
113
+ for (const g of groups) {
114
+ const colorFn = severityColor(g.severity);
115
+ const tag = pad(colorFn(g.severity.toUpperCase()), 10 + 9);
116
+ const countSuffix = g.files.length > 1 ? ` (${g.files.length} pages)` : '';
117
+ lines.push(` ${tag}${c.bold(g.rule)}${c.dim(countSuffix)}`);
118
+ lines.push(` ${g.message}`);
119
+ if (g.files.length > 0) {
120
+ lines.push(` ${c.dim('Files: ' + formatFileList(g.files))}`);
121
+ }
122
+ lines.push('');
123
+ }
124
+ }
125
+
126
+ return lines.join('\n');
127
+ }
128
+
129
+ export function formatSeoReport(result) {
130
+ return formatReport(result, 'claude-rank SEO Audit', 'seo');
131
+ }
132
+
133
+ export function formatGeoReport(result) {
134
+ return formatReport(result, 'claude-rank GEO Audit', 'geo');
135
+ }
136
+
137
+ export function formatAeoReport(result) {
138
+ return formatReport(result, 'claude-rank AEO Audit', 'aeo');
139
+ }
140
+
141
+ /**
142
+ * Format schema detection results.
143
+ */
144
+ export function formatSchemaReport(results) {
145
+ if (!results || results.length === 0) {
146
+ return c.yellow('No structured data (JSON-LD, Microdata, RDFa) detected.');
147
+ }
148
+
149
+ const lines = [];
150
+ const W = 48;
151
+ const hr = '\u2550'.repeat(W);
152
+
153
+ lines.push(`\u2554${hr}\u2557`);
154
+ lines.push(`\u2551${pad(c.bold(' claude-rank Schema Report'), W + 9)}\u2551`);
155
+ lines.push(`\u2560${hr}\u2563`);
156
+ lines.push(`\u2551${pad(` Files with schemas: ${results.length}`, W)}\u2551`);
157
+ const totalSchemas = results.reduce((n, r) => n + r.schemas.length, 0);
158
+ lines.push(`\u2551${pad(` Total schemas found: ${totalSchemas}`, W)}\u2551`);
159
+ lines.push(`\u255A${hr}\u255D`);
160
+ lines.push('');
161
+
162
+ for (const r of results) {
163
+ lines.push(c.bold(r.file));
164
+ for (const s of r.schemas) {
165
+ const type = s.type || s['@type'] || 'Unknown';
166
+ const format = s.format || 'JSON-LD';
167
+ lines.push(` ${c.cyan(type)} ${c.dim(`(${format})`)}`);
168
+ }
169
+ lines.push('');
170
+ }
171
+
172
+ return lines.join('\n');
173
+ }
@@ -0,0 +1,79 @@
1
+ /**
2
+ * url-fetcher.mjs — Fetch a live URL with SSRF protection and size limits.
3
+ * Uses Node.js built-in fetch() (Node 18+). No external dependencies.
4
+ */
5
+
6
+ import { validateUrl, createResponseAccumulator } from './security.mjs';
7
+
8
+ const USER_AGENT = 'claude-rank/1.1.0 (https://github.com/Houseofmvps/claude-rank)';
9
+ const TIMEOUT_MS = 15_000;
10
+
11
+ /**
12
+ * Fetch a page by URL with SSRF protection and response size limits.
13
+ * @param {string} url — the URL to fetch
14
+ * @returns {Promise<{ html: string, url: string, statusCode: number, redirected: boolean, finalUrl: string }>}
15
+ */
16
+ export async function fetchPage(url) {
17
+ // 1. SSRF validation
18
+ const validation = validateUrl(url);
19
+ if (!validation.valid) {
20
+ throw new Error(`URL blocked: ${validation.reason}`);
21
+ }
22
+
23
+ // 2. Abort controller for timeout
24
+ const controller = new AbortController();
25
+ const timeoutId = setTimeout(() => controller.abort(), TIMEOUT_MS);
26
+
27
+ let response;
28
+ try {
29
+ response = await fetch(url, {
30
+ signal: controller.signal,
31
+ headers: {
32
+ 'User-Agent': USER_AGENT,
33
+ 'Accept': 'text/html,application/xhtml+xml,*/*',
34
+ },
35
+ redirect: 'follow',
36
+ });
37
+ } catch (err) {
38
+ clearTimeout(timeoutId);
39
+ if (err.name === 'AbortError') {
40
+ throw new Error(`Request timed out after ${TIMEOUT_MS / 1000}s: ${url}`);
41
+ }
42
+ throw new Error(`Fetch failed for ${url}: ${err.message}`);
43
+ }
44
+
45
+ clearTimeout(timeoutId);
46
+
47
+ // 3. Check Content-Type — only scan HTML responses
48
+ const contentType = response.headers.get('content-type') || '';
49
+ if (!contentType.includes('text/html') && !contentType.includes('application/xhtml+xml')) {
50
+ throw new Error(`Not an HTML page (Content-Type: ${contentType}): ${url}`);
51
+ }
52
+
53
+ // 4. Read body with size limits using response accumulator
54
+ const accumulator = createResponseAccumulator();
55
+
56
+ // Use response.body (ReadableStream) for streaming size control
57
+ // Fallback: if body is not a readable stream, use response.text()
58
+ if (response.body && typeof response.body[Symbol.asyncIterator] === 'function') {
59
+ const decoder = new TextDecoder();
60
+ for await (const chunk of response.body) {
61
+ accumulator.onData(decoder.decode(chunk, { stream: true }));
62
+ if (accumulator.isTruncated()) break;
63
+ }
64
+ } else {
65
+ // Fallback for environments where body isn't async-iterable
66
+ const text = await response.text();
67
+ accumulator.onData(text);
68
+ }
69
+
70
+ const html = accumulator.getBody();
71
+
72
+ return {
73
+ html,
74
+ url,
75
+ statusCode: response.status,
76
+ redirected: response.redirected,
77
+ finalUrl: response.url,
78
+ };
79
+ }
@@ -0,0 +1,348 @@
1
+ /**
2
+ * url-scanner.mjs — Scan a live URL for SEO issues.
3
+ * Fetches HTML from a URL and runs the same per-page analysis as seo-scanner.
4
+ * Cross-page rules (duplicates, orphans, canonicals) are skipped for single-URL scans.
5
+ */
6
+
7
+ import { parseHtml } from './lib/html-parser.mjs';
8
+ import { fetchPage } from './lib/url-fetcher.mjs';
9
+
10
+ // ---------------------------------------------------------------------------
11
+ // Rule definitions (same as seo-scanner, minus cross-page-only rules)
12
+ // ---------------------------------------------------------------------------
13
+
14
+ const RULES = {
15
+ // Critical
16
+ 'has-noindex': { severity: 'critical', deduction: 20 },
17
+ 'canonical-points-elsewhere':{ severity: 'critical', deduction: 20 },
18
+
19
+ // High
20
+ 'missing-title': { severity: 'high', deduction: 10 },
21
+ 'missing-meta-description': { severity: 'high', deduction: 10 },
22
+ 'missing-h1': { severity: 'high', deduction: 10 },
23
+ 'thin-content': { severity: 'high', deduction: 10 },
24
+ 'missing-lang': { severity: 'high', deduction: 10 },
25
+
26
+ // Medium
27
+ 'title-too-long': { severity: 'medium', deduction: 5 },
28
+ 'title-too-short': { severity: 'medium', deduction: 5 },
29
+ 'meta-description-too-long': { severity: 'medium', deduction: 5 },
30
+ 'meta-description-too-short':{ severity: 'medium', deduction: 5 },
31
+ 'missing-viewport': { severity: 'medium', deduction: 5 },
32
+ 'missing-charset': { severity: 'medium', deduction: 5 },
33
+ 'missing-og-title': { severity: 'medium', deduction: 5 },
34
+ 'missing-og-description': { severity: 'medium', deduction: 5 },
35
+ 'missing-og-image': { severity: 'medium', deduction: 5 },
36
+ 'missing-canonical': { severity: 'medium', deduction: 5 },
37
+ 'multiple-h1': { severity: 'medium', deduction: 5 },
38
+ 'skipped-heading-level': { severity: 'medium', deduction: 5 },
39
+ 'images-missing-alt': { severity: 'medium', deduction: 5 },
40
+ 'images-missing-dimensions': { severity: 'medium', deduction: 5 },
41
+ 'missing-main-landmark': { severity: 'medium', deduction: 5 },
42
+ 'missing-json-ld': { severity: 'medium', deduction: 5 },
43
+ 'missing-favicon': { severity: 'medium', deduction: 5 },
44
+ 'no-analytics': { severity: 'medium', deduction: 5 },
45
+
46
+ // Low
47
+ 'missing-og-url': { severity: 'low', deduction: 2 },
48
+ 'missing-twitter-card': { severity: 'low', deduction: 2 },
49
+ 'missing-twitter-image': { severity: 'low', deduction: 2 },
50
+ 'missing-nav-landmark': { severity: 'low', deduction: 2 },
51
+ 'missing-footer-landmark': { severity: 'low', deduction: 2 },
52
+ 'no-manifest': { severity: 'low', deduction: 2 },
53
+ 'all-scripts-blocking': { severity: 'low', deduction: 2 },
54
+
55
+ // HTTP-level rules (URL-scan only)
56
+ 'http-error': { severity: 'critical', deduction: 20 },
57
+ 'redirect-detected': { severity: 'low', deduction: 2 },
58
+ };
59
+
60
+ // ---------------------------------------------------------------------------
61
+ // Per-page rule checks (reused from seo-scanner logic)
62
+ // ---------------------------------------------------------------------------
63
+
64
+ function checkPage(state, pageUrl) {
65
+ const findings = [];
66
+
67
+ function add(rule, message, context = {}) {
68
+ const def = RULES[rule];
69
+ findings.push({
70
+ rule,
71
+ severity: def.severity,
72
+ file: pageUrl,
73
+ message,
74
+ ...context,
75
+ });
76
+ }
77
+
78
+ // Critical
79
+ if (state.hasNoindex) {
80
+ add('has-noindex', 'Page has noindex directive — will be excluded from search engines');
81
+ }
82
+
83
+ if (state.hasCanonical && state.canonicalUrl) {
84
+ const canonical = state.canonicalUrl.trim();
85
+ // For URL scans: flag if canonical points to a completely different domain
86
+ if (canonical.startsWith('http://') || canonical.startsWith('https://')) {
87
+ try {
88
+ const canonicalHost = new URL(canonical).hostname;
89
+ const pageHost = new URL(pageUrl).hostname;
90
+ if (canonicalHost !== pageHost) {
91
+ add('canonical-points-elsewhere', `Canonical URL "${canonical}" points to a different domain`);
92
+ }
93
+ } catch {
94
+ // Invalid canonical URL — skip this check
95
+ }
96
+ }
97
+ }
98
+
99
+ // High
100
+ if (!state.hasTitle) {
101
+ add('missing-title', 'Page is missing a <title> tag');
102
+ }
103
+
104
+ if (!state.hasMetaDescription) {
105
+ add('missing-meta-description', 'Page is missing a meta description');
106
+ }
107
+
108
+ if (state.h1Count === 0) {
109
+ add('missing-h1', 'Page has no <h1> heading');
110
+ }
111
+
112
+ if (state.wordCount > 0 && state.wordCount < 300) {
113
+ add('thin-content', `Page has only ${state.wordCount} words (minimum recommended: 300)`);
114
+ }
115
+
116
+ if (!state.hasLang) {
117
+ add('missing-lang', 'HTML element is missing a lang attribute');
118
+ }
119
+
120
+ // Medium
121
+ if (state.hasTitle && state.titleText.length > 60) {
122
+ add('title-too-long', `Title is ${state.titleText.length} chars (max recommended: 60)`);
123
+ }
124
+
125
+ if (state.hasTitle && state.titleText.length < 20) {
126
+ add('title-too-short', `Title is only ${state.titleText.length} chars (min recommended: 20)`);
127
+ }
128
+
129
+ if (state.hasMetaDescription && state.metaDescriptionText.length > 160) {
130
+ add('meta-description-too-long', `Meta description is ${state.metaDescriptionText.length} chars (max recommended: 160)`);
131
+ }
132
+
133
+ if (state.hasMetaDescription && state.metaDescriptionText.length > 0 && state.metaDescriptionText.length < 70) {
134
+ add('meta-description-too-short', `Meta description is only ${state.metaDescriptionText.length} chars (min recommended: 70)`);
135
+ }
136
+
137
+ if (!state.hasViewport) {
138
+ add('missing-viewport', 'Page is missing a viewport meta tag');
139
+ }
140
+
141
+ if (!state.hasCharset) {
142
+ add('missing-charset', 'Page is missing a charset declaration');
143
+ }
144
+
145
+ if (!state.hasOgTitle) {
146
+ add('missing-og-title', 'Page is missing og:title Open Graph tag');
147
+ }
148
+
149
+ if (!state.hasOgDescription) {
150
+ add('missing-og-description', 'Page is missing og:description Open Graph tag');
151
+ }
152
+
153
+ if (!state.hasOgImage) {
154
+ add('missing-og-image', 'Page is missing og:image Open Graph tag');
155
+ }
156
+
157
+ if (!state.hasCanonical) {
158
+ add('missing-canonical', 'Page is missing a canonical link tag');
159
+ }
160
+
161
+ if (state.h1Count > 1) {
162
+ add('multiple-h1', `Page has ${state.h1Count} <h1> tags (should have exactly 1)`);
163
+ }
164
+
165
+ if (state.headingLevels.length > 1) {
166
+ for (let i = 1; i < state.headingLevels.length; i++) {
167
+ if (state.headingLevels[i] - state.headingLevels[i - 1] > 1) {
168
+ add('skipped-heading-level', `Heading level skipped: h${state.headingLevels[i - 1]} → h${state.headingLevels[i]}`);
169
+ break;
170
+ }
171
+ }
172
+ }
173
+
174
+ if (state.imagesWithoutAlt > 0) {
175
+ add('images-missing-alt', `${state.imagesWithoutAlt} image(s) missing alt attribute`);
176
+ }
177
+
178
+ if (state.imagesWithoutDimensions > 0) {
179
+ add('images-missing-dimensions', `${state.imagesWithoutDimensions} image(s) missing width/height attributes`);
180
+ }
181
+
182
+ if (!state.hasMain) {
183
+ add('missing-main-landmark', 'Page is missing a <main> landmark element');
184
+ }
185
+
186
+ if (state.jsonLdScripts === 0) {
187
+ add('missing-json-ld', 'Page has no JSON-LD structured data');
188
+ }
189
+
190
+ if (!state.hasFavicon) {
191
+ add('missing-favicon', 'Page is missing a favicon link');
192
+ }
193
+
194
+ if (!state.hasAnalytics) {
195
+ add('no-analytics', 'No analytics provider detected on this page');
196
+ }
197
+
198
+ // Low
199
+ if (!state.hasOgUrl) {
200
+ add('missing-og-url', 'Page is missing og:url Open Graph tag');
201
+ }
202
+
203
+ if (!state.hasTwitterCard) {
204
+ add('missing-twitter-card', 'Page is missing twitter:card meta tag');
205
+ }
206
+
207
+ if (!state.hasTwitterImage) {
208
+ add('missing-twitter-image', 'Page is missing twitter:image meta tag');
209
+ }
210
+
211
+ if (!state.hasNav) {
212
+ add('missing-nav-landmark', 'Page is missing a <nav> landmark element');
213
+ }
214
+
215
+ if (!state.hasFooter) {
216
+ add('missing-footer-landmark', 'Page is missing a <footer> landmark element');
217
+ }
218
+
219
+ if (!state.hasManifest) {
220
+ add('no-manifest', 'Page is missing a web app manifest link');
221
+ }
222
+
223
+ if (state.totalScripts > 0 && state.deferredScripts === 0) {
224
+ add('all-scripts-blocking', `All ${state.totalScripts} script(s) are render-blocking (no async/defer)`);
225
+ }
226
+
227
+ return findings;
228
+ }
229
+
230
+ // ---------------------------------------------------------------------------
231
+ // Score calculation
232
+ // ---------------------------------------------------------------------------
233
+
234
+ function calculateScore(findings) {
235
+ const triggeredRules = new Set(findings.map(f => f.rule));
236
+ let score = 100;
237
+ for (const rule of triggeredRules) {
238
+ const def = RULES[rule];
239
+ if (def) {
240
+ score -= def.deduction;
241
+ }
242
+ }
243
+ return Math.max(0, score);
244
+ }
245
+
246
+ // ---------------------------------------------------------------------------
247
+ // scanHtml — analyse raw HTML (for testing without HTTP)
248
+ // ---------------------------------------------------------------------------
249
+
250
+ /**
251
+ * Analyse an HTML string as if it were fetched from the given URL.
252
+ * Same analysis as scanUrl but takes HTML directly (no network request).
253
+ * @param {string} html — raw HTML string
254
+ * @param {string} [url='https://example.com'] — URL for context in findings
255
+ * @returns {object} { url, findings, scores, summary }
256
+ */
257
+ export function scanHtml(html, url = 'https://example.com') {
258
+ const state = parseHtml(html);
259
+ const findings = checkPage(state, url);
260
+
261
+ const seoScore = calculateScore(findings);
262
+
263
+ const summary = { critical: 0, high: 0, medium: 0, low: 0 };
264
+ for (const f of findings) {
265
+ if (summary[f.severity] !== undefined) {
266
+ summary[f.severity]++;
267
+ }
268
+ }
269
+
270
+ return {
271
+ url,
272
+ findings,
273
+ scores: { seo: seoScore },
274
+ summary,
275
+ };
276
+ }
277
+
278
+ // ---------------------------------------------------------------------------
279
+ // scanUrl — fetch + analyse
280
+ // ---------------------------------------------------------------------------
281
+
282
+ /**
283
+ * Fetch a live URL and run SEO analysis on the returned HTML.
284
+ * @param {string} url — the URL to scan
285
+ * @returns {Promise<object>} { url, findings, scores, summary, http }
286
+ */
287
+ export async function scanUrl(url) {
288
+ const page = await fetchPage(url);
289
+
290
+ const state = parseHtml(page.html);
291
+ const findings = checkPage(state, page.finalUrl);
292
+
293
+ // HTTP-level checks
294
+ if (page.statusCode >= 400) {
295
+ const def = RULES['http-error'];
296
+ findings.unshift({
297
+ rule: 'http-error',
298
+ severity: def.severity,
299
+ file: page.finalUrl,
300
+ message: `HTTP ${page.statusCode} error response`,
301
+ });
302
+ }
303
+
304
+ if (page.redirected) {
305
+ const def = RULES['redirect-detected'];
306
+ findings.push({
307
+ rule: 'redirect-detected',
308
+ severity: def.severity,
309
+ file: url,
310
+ message: `URL redirected: ${url} → ${page.finalUrl}`,
311
+ });
312
+ }
313
+
314
+ const seoScore = calculateScore(findings);
315
+
316
+ const summary = { critical: 0, high: 0, medium: 0, low: 0 };
317
+ for (const f of findings) {
318
+ if (summary[f.severity] !== undefined) {
319
+ summary[f.severity]++;
320
+ }
321
+ }
322
+
323
+ return {
324
+ url: page.finalUrl,
325
+ findings,
326
+ scores: { seo: seoScore },
327
+ summary,
328
+ http: {
329
+ statusCode: page.statusCode,
330
+ redirected: page.redirected,
331
+ finalUrl: page.finalUrl,
332
+ },
333
+ };
334
+ }
335
+
336
+ // ---------------------------------------------------------------------------
337
+ // CLI entry point
338
+ // ---------------------------------------------------------------------------
339
+
340
+ const args = process.argv.slice(2);
341
+ if (args.length > 0) {
342
+ scanUrl(args[0]).then(result => {
343
+ console.log(JSON.stringify(result, null, 2));
344
+ }).catch(err => {
345
+ console.error(`Error: ${err.message}`);
346
+ process.exit(1);
347
+ });
348
+ }