npm - @houseofmvps/claude-rank - Versions diffs - 1.0.2 → 1.2.0 - Mend

@houseofmvps/claude-rank 1.0.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/bin/claude-rank.mjs +80 -20
package/package.json +1 -1
package/tools/aeo-scanner.mjs +14 -1
package/tools/geo-scanner.mjs +32 -9
package/tools/lib/formatter.mjs +173 -0
package/tools/lib/html-parser.mjs +28 -2
package/tools/lib/url-fetcher.mjs +79 -0
package/tools/seo-scanner.mjs +13 -1
package/tools/url-scanner.mjs +348 -0

package/bin/claude-rank.mjs CHANGED Viewed

@@ -2,7 +2,10 @@
 // Standalone CLI: npx claude-rank <command> <directory>
 // Commands: scan, geo, aeo, schema, fix
-const [,, command = 'scan', dir = '.'] = process.argv;
+const args = process.argv.slice(2);
+const jsonFlag = args.includes('--json');
+const positional = args.filter(a => a !== '--json');
+const [command = 'scan', dir = '.'] = positional;
 const commands = {
   scan: '../tools/seo-scanner.mjs',
@@ -14,7 +17,7 @@ const commands = {
 if (command === 'help' || command === '--help') {
   console.log(`claude-rank — SEO/GEO/AEO toolkit
-Usage: claude-rank <command> [directory]
+Usage: claude-rank <command> [directory|url] [--json]
 Commands:
   scan     Run core SEO scanner (default)
@@ -23,9 +26,18 @@ Commands:
   schema   Detect and validate structured data
   help     Show this help message
+Flags:
+  --json   Output raw JSON (for programmatic use)
+URL scanning:
+  Pass a URL instead of a directory to scan a live page via HTTP.
+  Only the "scan" command supports URL scanning.
 Examples:
   claude-rank scan ./my-project
+  claude-rank scan https://savemrr.co
   npx @houseofmvps/claude-rank geo .
+  claude-rank scan ./site --json
 `);
   process.exit(0);
 }
@@ -36,31 +48,79 @@ if (!toolPath) {
   process.exit(1);
 }
+// Detect if the target is a URL (http:// or https://)
+const isUrl = dir.startsWith('http://') || dir.startsWith('https://');
 // Dynamic import and run the scanner on the target directory
 import { resolve } from 'path';
 // Clear argv before importing tool modules so their inline CLI guards don't fire.
 // The tool files check `process.argv.slice(2).length > 0` to auto-run on import.
 process.argv = process.argv.slice(0, 2);
-const mod = await import(new URL(toolPath, import.meta.url));
-const targetDir = resolve(dir);
-if (command === 'schema') {
-  // schema-engine exports detectSchema (per-file) and findHtmlFiles via html-parser.
-  // Build a directory-level result by importing the html-parser helper and scanning each file.
-  const { findHtmlFiles } = await import(new URL('../tools/lib/html-parser.mjs', import.meta.url));
-  const { readFileSync } = await import('node:fs');
-  const files = findHtmlFiles(targetDir);
-  const results = [];
-  for (const file of files) {
-    const html = readFileSync(file, 'utf-8');
-    const schemas = mod.detectSchema(html);
-    if (schemas.length > 0) {
-      results.push({ file, schemas });
+const {
+  formatSeoReport,
+  formatGeoReport,
+  formatAeoReport,
+  formatSchemaReport,
+} = await import(new URL('../tools/lib/formatter.mjs', import.meta.url));
+const formatters = {
+  scan: formatSeoReport,
+  geo: formatGeoReport,
+  aeo: formatAeoReport,
+  schema: formatSchemaReport,
+};
+// URL-based scanning (scan command only)
+if (isUrl) {
+  if (command !== 'scan') {
+    console.error(`URL scanning is only supported for the "scan" command, not "${command}".`);
+    process.exit(1);
+  }
+  const { scanUrl } = await import(new URL('../tools/url-scanner.mjs', import.meta.url));
+  try {
+    const result = await scanUrl(dir);
+    if (jsonFlag) {
+      console.log(JSON.stringify(result, null, 2));
+    } else {
+      console.log(formatSeoReport(result));
     }
+  } catch (err) {
+    console.error(`Error scanning URL: ${err.message}`);
+    process.exit(1);
   }
-  console.log(JSON.stringify(results, null, 2));
 } else {
-  const result = mod.scanDirectory(targetDir);
-  console.log(JSON.stringify(result, null, 2));
+  // Directory-based scanning
+  const mod = await import(new URL(toolPath, import.meta.url));
+  const targetDir = resolve(dir);
+  if (command === 'schema') {
+    // schema-engine exports detectSchema (per-file) and findHtmlFiles via html-parser.
+    // Build a directory-level result by importing the html-parser helper and scanning each file.
+    const { findHtmlFiles } = await import(new URL('../tools/lib/html-parser.mjs', import.meta.url));
+    const { readFileSync } = await import('node:fs');
+    const files = findHtmlFiles(targetDir);
+    const results = [];
+    for (const file of files) {
+      const html = readFileSync(file, 'utf-8');
+      const schemas = mod.detectSchema(html);
+      if (schemas.length > 0) {
+        results.push({ file, schemas });
+      }
+    }
+    if (jsonFlag) {
+      console.log(JSON.stringify(results, null, 2));
+    } else {
+      console.log(formatSchemaReport(results));
+    }
+  } else {
+    const result = mod.scanDirectory(targetDir);
+    if (jsonFlag) {
+      console.log(JSON.stringify(result, null, 2));
+    } else {
+      console.log(formatters[command](result));
+    }
+  }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@houseofmvps/claude-rank",
-  "version": "1.0.2",
+  "version": "1.2.0",
   "description": "The most comprehensive SEO/GEO/AEO plugin for Claude Code. Audit, fix, and dominate search — traditional and AI.",
   "type": "module",
   "bin": {

package/tools/aeo-scanner.mjs CHANGED Viewed

@@ -208,7 +208,20 @@ function analyzePage(filePath) {
  * @returns {{ files_scanned, findings, scores: { aeo }, summary }}
  */
 export function scanDirectory(rootDir) {
-  const htmlFiles = findHtmlFiles(rootDir);
+  let htmlFiles = findHtmlFiles(rootDir);
+  // If dist/build/out has HTML, exclude root index.html (Vite/webpack source template)
+  const hasBuildDir = htmlFiles.some(f => {
+    const rel = path.relative(rootDir, f);
+    return rel.startsWith('dist' + path.sep) || rel.startsWith('build' + path.sep) || rel.startsWith('out' + path.sep);
+  });
+  if (hasBuildDir) {
+    htmlFiles = htmlFiles.filter(f => {
+      const rel = path.relative(rootDir, f);
+      return rel !== 'index.html' && rel !== 'index.htm';
+    });
+  }
   const findings = [];
   // Per-file analyses

package/tools/geo-scanner.mjs CHANGED Viewed

@@ -155,16 +155,26 @@ function parseRobotsTxt(content) {
  */
 function extractSchemaTypes(jsonLdContent) {
   const types = new Set();
+  function walkSchema(obj) {
+    if (!obj || typeof obj !== 'object') return;
+    if (Array.isArray(obj)) {
+      for (const item of obj) walkSchema(item);
+      return;
+    }
+    if (obj['@type']) {
+      const t = Array.isArray(obj['@type']) ? obj['@type'] : [obj['@type']];
+      for (const type of t) types.add(type);
+    }
+    // Walk all nested objects to find embedded schemas (e.g., author: { @type: "Person" })
+    for (const val of Object.values(obj)) {
+      if (val && typeof val === 'object') walkSchema(val);
+    }
+  }
   for (const raw of jsonLdContent) {
     try {
-      const parsed = JSON.parse(raw);
-      const items = Array.isArray(parsed) ? parsed : [parsed];
-      for (const item of items) {
-        if (item && item['@type']) {
-          const t = Array.isArray(item['@type']) ? item['@type'] : [item['@type']];
-          for (const type of t) types.add(type);
-        }
-      }
+      walkSchema(JSON.parse(raw));
     } catch {
       // Non-parseable JSON-LD — skip
     }
@@ -307,7 +317,20 @@ export function scanDirectory(rootDir) {
   // 3. Scan HTML files
   // -------------------------------------------------------------------------
-  const htmlFiles = findHtmlFiles(rootDir);
+  let htmlFiles = findHtmlFiles(rootDir);
+  // If dist/build/out has HTML, exclude root index.html (Vite/webpack source template)
+  const hasBuildDir = htmlFiles.some(f => {
+    const rel = path.relative(rootDir, f);
+    return rel.startsWith('dist' + path.sep) || rel.startsWith('build' + path.sep) || rel.startsWith('out' + path.sep);
+  });
+  if (hasBuildDir) {
+    htmlFiles = htmlFiles.filter(f => {
+      const rel = path.relative(rootDir, f);
+      return rel !== 'index.html' && rel !== 'index.htm';
+    });
+  }
   let filesScanned = 0;
   // Aggregate data across all pages

package/tools/lib/formatter.mjs ADDED Viewed

@@ -0,0 +1,173 @@
+/**
+ * formatter.mjs — Pretty terminal output for claude-rank CLI reports.
+ * No external dependencies — uses raw ANSI escape codes.
+ */
+const c = {
+  red: s => `\x1b[31m${s}\x1b[0m`,
+  yellow: s => `\x1b[33m${s}\x1b[0m`,
+  green: s => `\x1b[32m${s}\x1b[0m`,
+  cyan: s => `\x1b[36m${s}\x1b[0m`,
+  bold: s => `\x1b[1m${s}\x1b[0m`,
+  dim: s => `\x1b[2m${s}\x1b[0m`,
+};
+const BAR_WIDTH = 15;
+function scoreLabel(score) {
+  if (score >= 90) return c.green('EXCELLENT');
+  if (score >= 80) return c.green('GOOD');
+  if (score >= 60) return c.yellow('NEEDS WORK');
+  return c.red('POOR');
+}
+function scoreBar(score) {
+  const filled = Math.round((score / 100) * BAR_WIDTH);
+  const empty = BAR_WIDTH - filled;
+  return '\u2588'.repeat(filled) + '\u2591'.repeat(empty);
+}
+function severityColor(severity) {
+  if (severity === 'critical' || severity === 'high') return c.red;
+  if (severity === 'medium') return c.yellow;
+  return c.dim;
+}
+function pad(str, len) {
+  const stripped = str.replace(/\x1b\[[0-9;]*m/g, '');
+  return str + ' '.repeat(Math.max(0, len - stripped.length));
+}
+/**
+ * Group findings by rule, aggregating affected files and using the first message.
+ */
+function groupFindings(findings) {
+  const groups = new Map();
+  for (const f of findings) {
+    if (!groups.has(f.rule)) {
+      groups.set(f.rule, {
+        rule: f.rule,
+        severity: f.severity,
+        message: f.message,
+        files: [],
+      });
+    }
+    const g = groups.get(f.rule);
+    if (f.file && !g.files.includes(f.file)) {
+      g.files.push(f.file);
+    }
+  }
+  return [...groups.values()];
+}
+function formatFileList(files, max = 3) {
+  if (files.length === 0) return '';
+  const shown = files.slice(0, max);
+  const rest = files.length - max;
+  let out = shown.join(', ');
+  if (rest > 0) out += `, +${rest} more`;
+  return out;
+}
+const SEVERITY_ORDER = { critical: 0, high: 1, medium: 2, low: 3 };
+/**
+ * Format a scanner report (SEO, GEO, or AEO) with a box header and grouped findings.
+ */
+function formatReport(result, title, scoreKey) {
+  if (result.skipped) {
+    return c.yellow(`Skipped: ${result.reason}`);
+  }
+  const score = result.scores[scoreKey];
+  const { files_scanned, findings, summary } = result;
+  const groups = groupFindings(findings);
+  groups.sort((a, b) => (SEVERITY_ORDER[a.severity] ?? 9) - (SEVERITY_ORDER[b.severity] ?? 9));
+  const W = 48;
+  const hr = '\u2550'.repeat(W);
+  const lines = [];
+  lines.push(`\u2554${hr}\u2557`);
+  lines.push(`\u2551${pad(c.bold(`          ${title}`), W + 9)}\u2551`);
+  lines.push(`\u2560${hr}\u2563`);
+  const barStr = `  Score:  ${score}/100  ${scoreBar(score)}  ${scoreLabel(score)}`;
+  lines.push(`\u2551${pad(barStr, W + 22)}\u2551`);
+  lines.push(`\u2560${hr}\u2563`);
+  lines.push(`\u2551${pad(`  Files scanned: ${files_scanned}`, W)}\u2551`);
+  lines.push(`\u2551${pad(`  Findings: ${findings.length}`, W)}\u2551`);
+  const countsLine = `    Critical: ${summary.critical}  High: ${summary.high}  Medium: ${summary.medium}  Low: ${summary.low}`;
+  lines.push(`\u2551${pad(countsLine, W)}\u2551`);
+  lines.push(`\u255A${hr}\u255D`);
+  lines.push('');
+  if (groups.length === 0) {
+    lines.push(c.green('No findings — looking great!'));
+    return lines.join('\n');
+  }
+  lines.push(c.bold('Findings:'));
+  {
+    for (const g of groups) {
+      const colorFn = severityColor(g.severity);
+      const tag = pad(colorFn(g.severity.toUpperCase()), 10 + 9);
+      const countSuffix = g.files.length > 1 ? ` (${g.files.length} pages)` : '';
+      lines.push(`  ${tag}${c.bold(g.rule)}${c.dim(countSuffix)}`);
+      lines.push(`           ${g.message}`);
+      if (g.files.length > 0) {
+        lines.push(`           ${c.dim('Files: ' + formatFileList(g.files))}`);
+      }
+      lines.push('');
+    }
+  }
+  return lines.join('\n');
+}
+export function formatSeoReport(result) {
+  return formatReport(result, 'claude-rank SEO Audit', 'seo');
+}
+export function formatGeoReport(result) {
+  return formatReport(result, 'claude-rank GEO Audit', 'geo');
+}
+export function formatAeoReport(result) {
+  return formatReport(result, 'claude-rank AEO Audit', 'aeo');
+}
+/**
+ * Format schema detection results.
+ */
+export function formatSchemaReport(results) {
+  if (!results || results.length === 0) {
+    return c.yellow('No structured data (JSON-LD, Microdata, RDFa) detected.');
+  }
+  const lines = [];
+  const W = 48;
+  const hr = '\u2550'.repeat(W);
+  lines.push(`\u2554${hr}\u2557`);
+  lines.push(`\u2551${pad(c.bold('       claude-rank Schema Report'), W + 9)}\u2551`);
+  lines.push(`\u2560${hr}\u2563`);
+  lines.push(`\u2551${pad(`  Files with schemas: ${results.length}`, W)}\u2551`);
+  const totalSchemas = results.reduce((n, r) => n + r.schemas.length, 0);
+  lines.push(`\u2551${pad(`  Total schemas found: ${totalSchemas}`, W)}\u2551`);
+  lines.push(`\u255A${hr}\u255D`);
+  lines.push('');
+  for (const r of results) {
+    lines.push(c.bold(r.file));
+    for (const s of r.schemas) {
+      const type = s.type || s['@type'] || 'Unknown';
+      const format = s.format || 'JSON-LD';
+      lines.push(`  ${c.cyan(type)} ${c.dim(`(${format})`)}`);
+    }
+    lines.push('');
+  }
+  return lines.join('\n');
+}

package/tools/lib/html-parser.mjs CHANGED Viewed

@@ -127,6 +127,8 @@ export function parseHtml(htmlString) {
   let currentHeadingLevel = 0;
   let isJsonLd = false;
   let currentHeadingText = '';
+  let currentScriptSrc = '';
+  let inlineScriptBuffer = '';
   let bodyTextBuffer = '';
   const parser = new Parser(
@@ -252,8 +254,9 @@ export function parseHtml(htmlString) {
           }
           // Count total and deferred scripts
+          // type="module" is deferred by default per HTML spec
           state.totalScripts++;
-          if (attribs.async !== undefined || attribs.defer !== undefined) {
+          if (attribs.async !== undefined || attribs.defer !== undefined || scriptType === 'module') {
             state.deferredScripts++;
           }
@@ -269,6 +272,7 @@ export function parseHtml(htmlString) {
           }
           inScript = true;
+          currentScriptSrc = src;
           return;
         }
@@ -349,6 +353,12 @@ export function parseHtml(htmlString) {
           return;
         }
+        // Inline script content — accumulate for analytics detection
+        if (inScript && !isJsonLd) {
+          inlineScriptBuffer += text;
+          return;
+        }
         // Body text (skip script/style)
         if (inBody && !inScript && !inStyle) {
           bodyTextBuffer += text + ' ';
@@ -372,7 +382,19 @@ export function parseHtml(htmlString) {
             state.jsonLdScripts++;
             isJsonLd = false;
           }
+          // Check inline script content for analytics patterns (catches lazy-loaded GA etc.)
+          if (!state.hasAnalytics && !currentScriptSrc && inlineScriptBuffer) {
+            for (const { pattern, provider } of ANALYTICS_PATTERNS) {
+              if (inlineScriptBuffer.includes(pattern)) {
+                state.hasAnalytics = true;
+                state.analyticsProvider = provider;
+                break;
+              }
+            }
+          }
           inScript = false;
+          currentScriptSrc = '';
+          inlineScriptBuffer = '';
           return;
         }
@@ -451,7 +473,9 @@ export async function parseHtmlFile(filePath) {
 // findHtmlFiles — recursively find .html/.htm files
 // ---------------------------------------------------------------------------
-const SKIP_DIRS = new Set(['node_modules', '.git', '.next', '.nuxt', '.svelte-kit', '.cache', '.turbo']);
+const SKIP_DIRS = new Set(['node_modules', '.git', '.next', '.nuxt', '.svelte-kit', '.cache', '.turbo', 'public']);
+// Files that look like HTML but aren't real pages (e.g., Google/Bing site verification)
+const SKIP_FILE_PATTERNS = [/^google[a-f0-9]+\.html$/, /^bing[a-f0-9]+\.html$/, /^yandex_[a-f0-9]+\.html$/];
 /**
  * Recursively find all .html/.htm files under a directory.
@@ -479,6 +503,8 @@ export function findHtmlFiles(dir) {
       } else if (entry.isFile()) {
         const ext = path.extname(entry.name).toLowerCase();
         if (ext === '.html' || ext === '.htm') {
+          // Skip search engine verification files
+          if (SKIP_FILE_PATTERNS.some(p => p.test(entry.name))) continue;
           results.push(fullPath);
         }
       }

package/tools/lib/url-fetcher.mjs ADDED Viewed

@@ -0,0 +1,79 @@
+/**
+ * url-fetcher.mjs — Fetch a live URL with SSRF protection and size limits.
+ * Uses Node.js built-in fetch() (Node 18+). No external dependencies.
+ */
+import { validateUrl, createResponseAccumulator } from './security.mjs';
+const USER_AGENT = 'claude-rank/1.1.0 (https://github.com/Houseofmvps/claude-rank)';
+const TIMEOUT_MS = 15_000;
+/**
+ * Fetch a page by URL with SSRF protection and response size limits.
+ * @param {string} url — the URL to fetch
+ * @returns {Promise<{ html: string, url: string, statusCode: number, redirected: boolean, finalUrl: string }>}
+ */
+export async function fetchPage(url) {
+  // 1. SSRF validation
+  const validation = validateUrl(url);
+  if (!validation.valid) {
+    throw new Error(`URL blocked: ${validation.reason}`);
+  }
+  // 2. Abort controller for timeout
+  const controller = new AbortController();
+  const timeoutId = setTimeout(() => controller.abort(), TIMEOUT_MS);
+  let response;
+  try {
+    response = await fetch(url, {
+      signal: controller.signal,
+      headers: {
+        'User-Agent': USER_AGENT,
+        'Accept': 'text/html,application/xhtml+xml,*/*',
+      },
+      redirect: 'follow',
+    });
+  } catch (err) {
+    clearTimeout(timeoutId);
+    if (err.name === 'AbortError') {
+      throw new Error(`Request timed out after ${TIMEOUT_MS / 1000}s: ${url}`);
+    }
+    throw new Error(`Fetch failed for ${url}: ${err.message}`);
+  }
+  clearTimeout(timeoutId);
+  // 3. Check Content-Type — only scan HTML responses
+  const contentType = response.headers.get('content-type') || '';
+  if (!contentType.includes('text/html') && !contentType.includes('application/xhtml+xml')) {
+    throw new Error(`Not an HTML page (Content-Type: ${contentType}): ${url}`);
+  }
+  // 4. Read body with size limits using response accumulator
+  const accumulator = createResponseAccumulator();
+  // Use response.body (ReadableStream) for streaming size control
+  // Fallback: if body is not a readable stream, use response.text()
+  if (response.body && typeof response.body[Symbol.asyncIterator] === 'function') {
+    const decoder = new TextDecoder();
+    for await (const chunk of response.body) {
+      accumulator.onData(decoder.decode(chunk, { stream: true }));
+      if (accumulator.isTruncated()) break;
+    }
+  } else {
+    // Fallback for environments where body isn't async-iterable
+    const text = await response.text();
+    accumulator.onData(text);
+  }
+  const html = accumulator.getBody();
+  return {
+    html,
+    url,
+    statusCode: response.status,
+    redirected: response.redirected,
+    finalUrl: response.url,
+  };
+}

package/tools/seo-scanner.mjs CHANGED Viewed

@@ -436,7 +436,19 @@ function calculateScore(findings) {
  */
 export function scanDirectory(rootDir) {
   const absRoot = path.resolve(rootDir);
-  const htmlFiles = findHtmlFiles(absRoot);
+  let htmlFiles = findHtmlFiles(absRoot);
+  // If dist/ or build/ has HTML, exclude root index.html (Vite/webpack source template)
+  const hasBuildDir = htmlFiles.some(f => {
+    const rel = path.relative(absRoot, f);
+    return rel.startsWith('dist' + path.sep) || rel.startsWith('build' + path.sep) || rel.startsWith('out' + path.sep);
+  });
+  if (hasBuildDir) {
+    htmlFiles = htmlFiles.filter(f => {
+      const rel = path.relative(absRoot, f);
+      return rel !== 'index.html' && rel !== 'index.htm';
+    });
+  }
   // Backend-only detection
   if (isBackendOnlyProject(absRoot, htmlFiles)) {

package/tools/url-scanner.mjs ADDED Viewed

@@ -0,0 +1,348 @@
+/**
+ * url-scanner.mjs — Scan a live URL for SEO issues.
+ * Fetches HTML from a URL and runs the same per-page analysis as seo-scanner.
+ * Cross-page rules (duplicates, orphans, canonicals) are skipped for single-URL scans.
+ */
+import { parseHtml } from './lib/html-parser.mjs';
+import { fetchPage } from './lib/url-fetcher.mjs';
+// ---------------------------------------------------------------------------
+// Rule definitions (same as seo-scanner, minus cross-page-only rules)
+// ---------------------------------------------------------------------------
+const RULES = {
+  // Critical
+  'has-noindex':               { severity: 'critical', deduction: 20 },
+  'canonical-points-elsewhere':{ severity: 'critical', deduction: 20 },
+  // High
+  'missing-title':             { severity: 'high', deduction: 10 },
+  'missing-meta-description':  { severity: 'high', deduction: 10 },
+  'missing-h1':                { severity: 'high', deduction: 10 },
+  'thin-content':              { severity: 'high', deduction: 10 },
+  'missing-lang':              { severity: 'high', deduction: 10 },
+  // Medium
+  'title-too-long':            { severity: 'medium', deduction: 5 },
+  'title-too-short':           { severity: 'medium', deduction: 5 },
+  'meta-description-too-long': { severity: 'medium', deduction: 5 },
+  'meta-description-too-short':{ severity: 'medium', deduction: 5 },
+  'missing-viewport':          { severity: 'medium', deduction: 5 },
+  'missing-charset':           { severity: 'medium', deduction: 5 },
+  'missing-og-title':          { severity: 'medium', deduction: 5 },
+  'missing-og-description':    { severity: 'medium', deduction: 5 },
+  'missing-og-image':          { severity: 'medium', deduction: 5 },
+  'missing-canonical':         { severity: 'medium', deduction: 5 },
+  'multiple-h1':               { severity: 'medium', deduction: 5 },
+  'skipped-heading-level':     { severity: 'medium', deduction: 5 },
+  'images-missing-alt':        { severity: 'medium', deduction: 5 },
+  'images-missing-dimensions': { severity: 'medium', deduction: 5 },
+  'missing-main-landmark':     { severity: 'medium', deduction: 5 },
+  'missing-json-ld':           { severity: 'medium', deduction: 5 },
+  'missing-favicon':           { severity: 'medium', deduction: 5 },
+  'no-analytics':              { severity: 'medium', deduction: 5 },
+  // Low
+  'missing-og-url':            { severity: 'low', deduction: 2 },
+  'missing-twitter-card':      { severity: 'low', deduction: 2 },
+  'missing-twitter-image':     { severity: 'low', deduction: 2 },
+  'missing-nav-landmark':      { severity: 'low', deduction: 2 },
+  'missing-footer-landmark':   { severity: 'low', deduction: 2 },
+  'no-manifest':               { severity: 'low', deduction: 2 },
+  'all-scripts-blocking':      { severity: 'low', deduction: 2 },
+  // HTTP-level rules (URL-scan only)
+  'http-error':                { severity: 'critical', deduction: 20 },
+  'redirect-detected':         { severity: 'low', deduction: 2 },
+};
+// ---------------------------------------------------------------------------
+// Per-page rule checks (reused from seo-scanner logic)
+// ---------------------------------------------------------------------------
+function checkPage(state, pageUrl) {
+  const findings = [];
+  function add(rule, message, context = {}) {
+    const def = RULES[rule];
+    findings.push({
+      rule,
+      severity: def.severity,
+      file: pageUrl,
+      message,
+      ...context,
+    });
+  }
+  // Critical
+  if (state.hasNoindex) {
+    add('has-noindex', 'Page has noindex directive — will be excluded from search engines');
+  }
+  if (state.hasCanonical && state.canonicalUrl) {
+    const canonical = state.canonicalUrl.trim();
+    // For URL scans: flag if canonical points to a completely different domain
+    if (canonical.startsWith('http://') || canonical.startsWith('https://')) {
+      try {
+        const canonicalHost = new URL(canonical).hostname;
+        const pageHost = new URL(pageUrl).hostname;
+        if (canonicalHost !== pageHost) {
+          add('canonical-points-elsewhere', `Canonical URL "${canonical}" points to a different domain`);
+        }
+      } catch {
+        // Invalid canonical URL — skip this check
+      }
+    }
+  }
+  // High
+  if (!state.hasTitle) {
+    add('missing-title', 'Page is missing a <title> tag');
+  }
+  if (!state.hasMetaDescription) {
+    add('missing-meta-description', 'Page is missing a meta description');
+  }
+  if (state.h1Count === 0) {
+    add('missing-h1', 'Page has no <h1> heading');
+  }
+  if (state.wordCount > 0 && state.wordCount < 300) {
+    add('thin-content', `Page has only ${state.wordCount} words (minimum recommended: 300)`);
+  }
+  if (!state.hasLang) {
+    add('missing-lang', 'HTML element is missing a lang attribute');
+  }
+  // Medium
+  if (state.hasTitle && state.titleText.length > 60) {
+    add('title-too-long', `Title is ${state.titleText.length} chars (max recommended: 60)`);
+  }
+  if (state.hasTitle && state.titleText.length < 20) {
+    add('title-too-short', `Title is only ${state.titleText.length} chars (min recommended: 20)`);
+  }
+  if (state.hasMetaDescription && state.metaDescriptionText.length > 160) {
+    add('meta-description-too-long', `Meta description is ${state.metaDescriptionText.length} chars (max recommended: 160)`);
+  }
+  if (state.hasMetaDescription && state.metaDescriptionText.length > 0 && state.metaDescriptionText.length < 70) {
+    add('meta-description-too-short', `Meta description is only ${state.metaDescriptionText.length} chars (min recommended: 70)`);
+  }
+  if (!state.hasViewport) {
+    add('missing-viewport', 'Page is missing a viewport meta tag');
+  }
+  if (!state.hasCharset) {
+    add('missing-charset', 'Page is missing a charset declaration');
+  }
+  if (!state.hasOgTitle) {
+    add('missing-og-title', 'Page is missing og:title Open Graph tag');
+  }
+  if (!state.hasOgDescription) {
+    add('missing-og-description', 'Page is missing og:description Open Graph tag');
+  }
+  if (!state.hasOgImage) {
+    add('missing-og-image', 'Page is missing og:image Open Graph tag');
+  }
+  if (!state.hasCanonical) {
+    add('missing-canonical', 'Page is missing a canonical link tag');
+  }
+  if (state.h1Count > 1) {
+    add('multiple-h1', `Page has ${state.h1Count} <h1> tags (should have exactly 1)`);
+  }
+  if (state.headingLevels.length > 1) {
+    for (let i = 1; i < state.headingLevels.length; i++) {
+      if (state.headingLevels[i] - state.headingLevels[i - 1] > 1) {
+        add('skipped-heading-level', `Heading level skipped: h${state.headingLevels[i - 1]} → h${state.headingLevels[i]}`);
+        break;
+      }
+    }
+  }
+  if (state.imagesWithoutAlt > 0) {
+    add('images-missing-alt', `${state.imagesWithoutAlt} image(s) missing alt attribute`);
+  }
+  if (state.imagesWithoutDimensions > 0) {
+    add('images-missing-dimensions', `${state.imagesWithoutDimensions} image(s) missing width/height attributes`);
+  }
+  if (!state.hasMain) {
+    add('missing-main-landmark', 'Page is missing a <main> landmark element');
+  }
+  if (state.jsonLdScripts === 0) {
+    add('missing-json-ld', 'Page has no JSON-LD structured data');
+  }
+  if (!state.hasFavicon) {
+    add('missing-favicon', 'Page is missing a favicon link');
+  }
+  if (!state.hasAnalytics) {
+    add('no-analytics', 'No analytics provider detected on this page');
+  }
+  // Low
+  if (!state.hasOgUrl) {
+    add('missing-og-url', 'Page is missing og:url Open Graph tag');
+  }
+  if (!state.hasTwitterCard) {
+    add('missing-twitter-card', 'Page is missing twitter:card meta tag');
+  }
+  if (!state.hasTwitterImage) {
+    add('missing-twitter-image', 'Page is missing twitter:image meta tag');
+  }
+  if (!state.hasNav) {
+    add('missing-nav-landmark', 'Page is missing a <nav> landmark element');
+  }
+  if (!state.hasFooter) {
+    add('missing-footer-landmark', 'Page is missing a <footer> landmark element');
+  }
+  if (!state.hasManifest) {
+    add('no-manifest', 'Page is missing a web app manifest link');
+  }
+  if (state.totalScripts > 0 && state.deferredScripts === 0) {
+    add('all-scripts-blocking', `All ${state.totalScripts} script(s) are render-blocking (no async/defer)`);
+  }
+  return findings;
+}
+// ---------------------------------------------------------------------------
+// Score calculation
+// ---------------------------------------------------------------------------
+function calculateScore(findings) {
+  const triggeredRules = new Set(findings.map(f => f.rule));
+  let score = 100;
+  for (const rule of triggeredRules) {
+    const def = RULES[rule];
+    if (def) {
+      score -= def.deduction;
+    }
+  }
+  return Math.max(0, score);
+}
+// ---------------------------------------------------------------------------
+// scanHtml — analyse raw HTML (for testing without HTTP)
+// ---------------------------------------------------------------------------
+/**
+ * Analyse an HTML string as if it were fetched from the given URL.
+ * Same analysis as scanUrl but takes HTML directly (no network request).
+ * @param {string} html — raw HTML string
+ * @param {string} [url='https://example.com'] — URL for context in findings
+ * @returns {object} { url, findings, scores, summary }
+ */
+export function scanHtml(html, url = 'https://example.com') {
+  const state = parseHtml(html);
+  const findings = checkPage(state, url);
+  const seoScore = calculateScore(findings);
+  const summary = { critical: 0, high: 0, medium: 0, low: 0 };
+  for (const f of findings) {
+    if (summary[f.severity] !== undefined) {
+      summary[f.severity]++;
+    }
+  }
+  return {
+    url,
+    findings,
+    scores: { seo: seoScore },
+    summary,
+  };
+}
+// ---------------------------------------------------------------------------
+// scanUrl — fetch + analyse
+// ---------------------------------------------------------------------------
+/**
+ * Fetch a live URL and run SEO analysis on the returned HTML.
+ * @param {string} url — the URL to scan
+ * @returns {Promise<object>} { url, findings, scores, summary, http }
+ */
+export async function scanUrl(url) {
+  const page = await fetchPage(url);
+  const state = parseHtml(page.html);
+  const findings = checkPage(state, page.finalUrl);
+  // HTTP-level checks
+  if (page.statusCode >= 400) {
+    const def = RULES['http-error'];
+    findings.unshift({
+      rule: 'http-error',
+      severity: def.severity,
+      file: page.finalUrl,
+      message: `HTTP ${page.statusCode} error response`,
+    });
+  }
+  if (page.redirected) {
+    const def = RULES['redirect-detected'];
+    findings.push({
+      rule: 'redirect-detected',
+      severity: def.severity,
+      file: url,
+      message: `URL redirected: ${url} → ${page.finalUrl}`,
+    });
+  }
+  const seoScore = calculateScore(findings);
+  const summary = { critical: 0, high: 0, medium: 0, low: 0 };
+  for (const f of findings) {
+    if (summary[f.severity] !== undefined) {
+      summary[f.severity]++;
+    }
+  }
+  return {
+    url: page.finalUrl,
+    findings,
+    scores: { seo: seoScore },
+    summary,
+    http: {
+      statusCode: page.statusCode,
+      redirected: page.redirected,
+      finalUrl: page.finalUrl,
+    },
+  };
+}
+// ---------------------------------------------------------------------------
+// CLI entry point
+// ---------------------------------------------------------------------------
+const args = process.argv.slice(2);
+if (args.length > 0) {
+  scanUrl(args[0]).then(result => {
+    console.log(JSON.stringify(result, null, 2));
+  }).catch(err => {
+    console.error(`Error: ${err.message}`);
+    process.exit(1);
+  });
+}