npm - @houseofmvps/claude-rank - Versions diffs - 1.2.1 → 1.3.0 - Mend

@houseofmvps/claude-rank 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +1 -1
package/bin/claude-rank.mjs +87 -9
package/package.json +1 -1
package/tools/lib/crawler.mjs +248 -0
package/tools/lib/html-parser.mjs +45 -0
package/tools/lib/report-generator.mjs +160 -0
package/tools/seo-scanner.mjs +13 -4
package/tools/url-scanner.mjs +165 -4

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 <div align="center">
-# claude-rank
+<img src="assets/hero-banner.png" alt="claude-rank — SEO/GEO/AEO Plugin for Claude Code" width="100%"/>
 ### The most comprehensive SEO/GEO/AEO plugin for Claude Code. 74+ rules. Auto-fix everything. Dominate search — traditional and AI.

package/bin/claude-rank.mjs CHANGED Viewed

@@ -4,7 +4,26 @@
 const args = process.argv.slice(2);
 const jsonFlag = args.includes('--json');
-const positional = args.filter(a => a !== '--json');
+const singleFlag = args.includes('--single');
+const reportFlag = args.includes('--report') ? args[args.indexOf('--report') + 1] : null;
+const thresholdIdx = args.indexOf('--threshold');
+const thresholdFlag = thresholdIdx !== -1 ? Number(args[thresholdIdx + 1]) : null;
+// Parse --pages N flag (default: 50)
+let maxPages = 50;
+const pagesIdx = args.indexOf('--pages');
+if (pagesIdx !== -1 && args[pagesIdx + 1]) {
+  const parsed = parseInt(args[pagesIdx + 1], 10);
+  if (!isNaN(parsed) && parsed > 0) maxPages = parsed;
+}
+const positional = args.filter((a, i) => {
+  if (a === '--json' || a === '--single') return false;
+  if (a === '--report' || a === '--threshold' || a === '--pages') return false;
+  // Skip the value after --report, --threshold, or --pages
+  if (i > 0 && (args[i - 1] === '--report' || args[i - 1] === '--threshold' || args[i - 1] === '--pages')) return false;
+  return true;
+});
 const [command = 'scan', dir = '.'] = positional;
 const commands = {
@@ -17,7 +36,7 @@ const commands = {
 if (command === 'help' || command === '--help') {
   console.log(`claude-rank — SEO/GEO/AEO toolkit
-Usage: claude-rank <command> [directory|url] [--json]
+Usage: claude-rank <command> [directory|url] [flags]
 Commands:
   scan     Run core SEO scanner (default)
@@ -27,17 +46,28 @@ Commands:
   help     Show this help message
 Flags:
-  --json   Output raw JSON (for programmatic use)
+  --json            Output raw JSON (for programmatic use)
+  --single          Scan only one page (skip multi-page crawl for URLs)
+  --pages N         Max pages to crawl (default: 50, URL scanning only)
+  --report html     Run all scanners and save HTML report to claude-rank-report.html
+  --threshold N     Exit code 1 if score < N (for CI/CD pipelines)
 URL scanning:
-  Pass a URL instead of a directory to scan a live page via HTTP.
+  Pass a URL instead of a directory to scan a live site via HTTP.
+  By default, crawls up to 50 pages following internal links.
+  Use --single to scan only the given URL without crawling.
   Only the "scan" command supports URL scanning.
 Examples:
   claude-rank scan ./my-project
   claude-rank scan https://savemrr.co
+  claude-rank scan https://savemrr.co --pages 10
+  claude-rank scan https://savemrr.co --single
   npx @houseofmvps/claude-rank geo .
   claude-rank scan ./site --json
+  claude-rank scan ./site --report html
+  claude-rank scan ./site --threshold 80
+  claude-rank scan . --report html --threshold 80
 `);
   process.exit(0);
 }
@@ -79,9 +109,11 @@ if (isUrl) {
     process.exit(1);
   }
-  const { scanUrl } = await import(new URL('../tools/url-scanner.mjs', import.meta.url));
+  const { scanUrl, scanSite } = await import(new URL('../tools/url-scanner.mjs', import.meta.url));
   try {
-    const result = await scanUrl(dir);
+    const result = singleFlag
+      ? await scanUrl(dir)
+      : await scanSite(dir, { maxPages });
     if (jsonFlag) {
       console.log(JSON.stringify(result, null, 2));
     } else {
@@ -93,12 +125,47 @@ if (isUrl) {
   }
 } else {
   // Directory-based scanning
-  const mod = await import(new URL(toolPath, import.meta.url));
   const targetDir = resolve(dir);
-  if (command === 'schema') {
+  // --report html: run ALL scanners, generate HTML report
+  if (reportFlag === 'html') {
+    const { writeFileSync } = await import('node:fs');
+    const { generateHtmlReport } = await import(new URL('../tools/lib/report-generator.mjs', import.meta.url));
+    const seoMod = await import(new URL('../tools/seo-scanner.mjs', import.meta.url));
+    const geoMod = await import(new URL('../tools/geo-scanner.mjs', import.meta.url));
+    const aeoMod = await import(new URL('../tools/aeo-scanner.mjs', import.meta.url));
+    const seo = seoMod.scanDirectory(targetDir);
+    const geo = geoMod.scanDirectory(targetDir);
+    const aeo = aeoMod.scanDirectory(targetDir);
+    const html = generateHtmlReport({
+      seo, geo, aeo,
+      target: dir,
+      timestamp: new Date().toISOString(),
+    });
+    const outPath = resolve('claude-rank-report.html');
+    writeFileSync(outPath, html, 'utf-8');
+    console.log(`HTML report saved to ${outPath}`);
+    // Also print terminal summaries
+    console.log(formatSeoReport(seo));
+    console.log(formatGeoReport(geo));
+    console.log(formatAeoReport(aeo));
+    // Check threshold against the primary (SEO) score
+    if (thresholdFlag != null) {
+      const score = seo.scores?.seo ?? 0;
+      if (score < thresholdFlag) {
+        console.error(`Score ${score} is below threshold ${thresholdFlag}`);
+        process.exit(1);
+      }
+    }
+  } else if (command === 'schema') {
     // schema-engine exports detectSchema (per-file) and findHtmlFiles via html-parser.
-    // Build a directory-level result by importing the html-parser helper and scanning each file.
+    const mod = await import(new URL(toolPath, import.meta.url));
     const { findHtmlFiles } = await import(new URL('../tools/lib/html-parser.mjs', import.meta.url));
     const { readFileSync } = await import('node:fs');
     const files = findHtmlFiles(targetDir);
@@ -116,11 +183,22 @@ if (isUrl) {
       console.log(formatSchemaReport(results));
     }
   } else {
+    const mod = await import(new URL(toolPath, import.meta.url));
     const result = mod.scanDirectory(targetDir);
     if (jsonFlag) {
       console.log(JSON.stringify(result, null, 2));
     } else {
       console.log(formatters[command](result));
     }
+    // Check threshold
+    if (thresholdFlag != null) {
+      const scoreKey = command === 'scan' ? 'seo' : command;
+      const score = result.scores?.[scoreKey] ?? 0;
+      if (score < thresholdFlag) {
+        console.error(`Score ${score} is below threshold ${thresholdFlag}`);
+        process.exit(1);
+      }
+    }
   }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@houseofmvps/claude-rank",
-  "version": "1.2.1",
+  "version": "1.3.0",
   "description": "The most comprehensive SEO/GEO/AEO plugin for Claude Code. Audit, fix, and dominate search — traditional and AI.",
   "type": "module",
   "bin": {

package/tools/lib/crawler.mjs ADDED Viewed

@@ -0,0 +1,248 @@
+/**
+ * crawler.mjs — Multi-page site crawler using BFS with concurrency control.
+ * Follows internal links on the same domain. Uses fetchPage() for SSRF protection.
+ * No external dependencies.
+ */
+import { fetchPage } from './url-fetcher.mjs';
+// ---------------------------------------------------------------------------
+// URL helpers (exported for testing)
+// ---------------------------------------------------------------------------
+/** File extensions to skip (non-HTML resources) */
+const SKIP_EXTENSIONS = new Set([
+  '.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico', '.bmp', '.avif',
+  '.css', '.js', '.mjs', '.cjs', '.map',
+  '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
+  '.zip', '.tar', '.gz', '.rar', '.7z',
+  '.mp3', '.mp4', '.wav', '.avi', '.mov', '.webm', '.ogg',
+  '.woff', '.woff2', '.ttf', '.eot', '.otf',
+  '.xml', '.json', '.csv', '.txt', '.rss', '.atom',
+]);
+/** URL path patterns to skip (non-page routes) */
+const SKIP_PATTERNS = [
+  /\/api\//i,
+  /\/auth\//i,
+  /\/login\b/i,
+  /\/logout\b/i,
+  /\/wp-admin/i,
+  /\/cdn-cgi\//i,
+  /\/wp-json\//i,
+  /\/feed\/?$/i,
+  /\/xmlrpc\.php/i,
+  /\/wp-login/i,
+  /\/admin\//i,
+  /\?/,  // skip URLs with query strings to avoid crawl traps
+];
+/**
+ * Normalize a URL: remove fragment, remove trailing slash (except root path).
+ * @param {string} urlStr
+ * @returns {string}
+ */
+export function normalizeUrl(urlStr) {
+  try {
+    const url = new URL(urlStr);
+    url.hash = '';
+    // Remove trailing slash unless it's just the root "/"
+    if (url.pathname.length > 1 && url.pathname.endsWith('/')) {
+      url.pathname = url.pathname.slice(0, -1);
+    }
+    return url.href;
+  } catch {
+    return urlStr;
+  }
+}
+/**
+ * Check if a URL should be skipped based on extension or path pattern.
+ * @param {string} urlStr
+ * @returns {boolean}
+ */
+export function shouldSkipUrl(urlStr) {
+  try {
+    const url = new URL(urlStr);
+    const pathname = url.pathname.toLowerCase();
+    // Check file extension
+    const lastDot = pathname.lastIndexOf('.');
+    if (lastDot !== -1) {
+      const ext = pathname.slice(lastDot);
+      if (SKIP_EXTENSIONS.has(ext)) return true;
+    }
+    // Check path patterns
+    for (const pattern of SKIP_PATTERNS) {
+      if (pattern.test(url.pathname + url.search)) return true;
+    }
+    return false;
+  } catch {
+    return true;
+  }
+}
+/**
+ * Check if two URLs share the same hostname.
+ * @param {string} urlA
+ * @param {string} urlB
+ * @returns {boolean}
+ */
+export function isSameDomain(urlA, urlB) {
+  try {
+    const a = new URL(urlA);
+    const b = new URL(urlB);
+    return a.hostname === b.hostname;
+  } catch {
+    return false;
+  }
+}
+/**
+ * Extract internal links from HTML content.
+ * Returns an array of absolute URL strings on the same domain as baseUrl.
+ * @param {string} html
+ * @param {string} baseUrl
+ * @returns {string[]}
+ */
+export function extractLinks(html, baseUrl) {
+  const links = [];
+  // Match <a href="..."> with both single and double quotes
+  const regex = /<a\s[^>]*href\s*=\s*(?:"([^"]*)"|'([^']*)')/gi;
+  let match;
+  while ((match = regex.exec(html)) !== null) {
+    const href = match[1] ?? match[2];
+    if (!href) continue;
+    // Skip javascript:, mailto:, tel:, data: schemes
+    if (/^(javascript|mailto|tel|data):/i.test(href)) continue;
+    // Skip empty or fragment-only
+    if (href === '' || href === '#' || href.startsWith('#')) continue;
+    try {
+      const resolved = new URL(href, baseUrl).href;
+      const normalized = normalizeUrl(resolved);
+      if (isSameDomain(normalized, baseUrl) && !shouldSkipUrl(normalized)) {
+        links.push(normalized);
+      }
+    } catch {
+      // Invalid URL — skip
+    }
+  }
+  // Deduplicate
+  return [...new Set(links)];
+}
+// ---------------------------------------------------------------------------
+// Semaphore for concurrency control
+// ---------------------------------------------------------------------------
+class Semaphore {
+  constructor(max) {
+    this._max = max;
+    this._active = 0;
+    this._queue = [];
+  }
+  async acquire() {
+    if (this._active < this._max) {
+      this._active++;
+      return;
+    }
+    return new Promise(resolve => {
+      this._queue.push(resolve);
+    });
+  }
+  release() {
+    this._active--;
+    if (this._queue.length > 0) {
+      this._active++;
+      const next = this._queue.shift();
+      next();
+    }
+  }
+}
+// ---------------------------------------------------------------------------
+// Main crawler
+// ---------------------------------------------------------------------------
+/**
+ * Crawl a site starting from startUrl, following internal links (BFS).
+ * @param {string} startUrl — starting URL
+ * @param {object} options
+ * @param {number} [options.maxPages=50] — max pages to crawl
+ * @param {number} [options.concurrency=3] — concurrent fetches
+ * @param {function} [options.onPage] — callback(url, html) called per page
+ * @returns {Promise<{ pages: Array<{url: string, html: string, statusCode: number}>, errors: Array<{url: string, error: string}> }>}
+ */
+export async function crawlSite(startUrl, options = {}) {
+  const {
+    maxPages = 50,
+    concurrency = 3,
+    onPage,
+  } = options;
+  const normalizedStart = normalizeUrl(startUrl);
+  const visited = new Set();
+  const queue = [normalizedStart]; // BFS queue
+  const pages = [];
+  const errors = [];
+  const semaphore = new Semaphore(concurrency);
+  let queued = new Set([normalizedStart]);
+  let pagesProcessed = 0;
+  // Process BFS in waves for concurrency
+  while (queue.length > 0 && pagesProcessed < maxPages) {
+    // Take a batch from the queue (up to concurrency size)
+    const batchSize = Math.min(queue.length, maxPages - pagesProcessed, concurrency);
+    const batch = queue.splice(0, batchSize);
+    const promises = batch.map(async (url) => {
+      if (visited.has(url) || pagesProcessed >= maxPages) return;
+      visited.add(url);
+      await semaphore.acquire();
+      try {
+        pagesProcessed++;
+        const num = pagesProcessed;
+        process.stderr.write(`Crawling [${num}/${maxPages}] ${url}\n`);
+        const result = await fetchPage(url);
+        pages.push({
+          url: result.finalUrl,
+          html: result.html,
+          statusCode: result.statusCode,
+        });
+        if (onPage) {
+          onPage(result.finalUrl, result.html);
+        }
+        // Extract links and add new ones to queue
+        const links = extractLinks(result.html, result.finalUrl);
+        for (const link of links) {
+          if (!queued.has(link) && !visited.has(link) && pagesProcessed + queue.length < maxPages) {
+            queued.add(link);
+            queue.push(link);
+          }
+        }
+      } catch (err) {
+        errors.push({ url, error: err.message });
+      } finally {
+        semaphore.release();
+      }
+    });
+    await Promise.all(promises);
+  }
+  return { pages, errors };
+}

package/tools/lib/html-parser.mjs CHANGED Viewed

@@ -444,6 +444,51 @@ export function parseHtml(htmlString) {
   return state;
 }
+// ---------------------------------------------------------------------------
+// detectPageType — classify page type from URL path + parsed state
+// ---------------------------------------------------------------------------
+/**
+ * Page type patterns — ordered by priority (first match wins).
+ * Each entry: { type, patterns[] } where patterns are matched against
+ * the lowercase URL path, title, and h1 text.
+ */
+const PAGE_TYPE_RULES = [
+  { type: 'contact',  patterns: ['contact', 'get in touch', 'reach us'] },
+  { type: 'terms',    patterns: ['terms', 'conditions', 'tos', 'terms-of-service'] },
+  { type: 'privacy',  patterns: ['privacy', 'cookie policy', 'gdpr'] },
+  { type: 'legal',    patterns: ['legal', 'disclaimer', 'imprint'] },
+  { type: 'login',    patterns: ['login', 'signin', 'sign-in', 'register', 'signup'] },
+  { type: '404',      patterns: ['404', 'not found', 'page not found'] },
+  { type: 'sitemap',  patterns: ['sitemap'] },
+];
+/**
+ * Detect the page type from the file path / URL and parsed HTML state.
+ * Returns a page type string: 'contact', 'terms', 'privacy', 'legal',
+ * 'login', '404', 'sitemap', or 'content' (default).
+ *
+ * @param {string} filePath — file path or URL (used for path-based signals)
+ * @param {object} state — PageState from parseHtml
+ * @returns {string} page type
+ */
+export function detectPageType(filePath, state) {
+  // Build a combined haystack from path, title, and h1
+  const pathLower = (filePath || '').toLowerCase();
+  const titleLower = (state.titleText || '').toLowerCase();
+  const h1Lower = (state.h1Text || '').toLowerCase();
+  for (const { type, patterns } of PAGE_TYPE_RULES) {
+    for (const pattern of patterns) {
+      if (pathLower.includes(pattern) || titleLower.includes(pattern) || h1Lower.includes(pattern)) {
+        return type;
+      }
+    }
+  }
+  return 'content';
+}
 // ---------------------------------------------------------------------------
 // parseHtmlFile — read file then parseHtml
 // ---------------------------------------------------------------------------

package/tools/lib/report-generator.mjs ADDED Viewed

@@ -0,0 +1,160 @@
+/**
+ * report-generator.mjs — Generate self-contained HTML audit reports.
+ * No external dependencies. All CSS is inline.
+ */
+/**
+ * Generate a self-contained HTML report from scan results.
+ * @param {object} options
+ * @param {object} options.seo — SEO scan result (optional)
+ * @param {object} options.geo — GEO scan result (optional)
+ * @param {object} options.aeo — AEO scan result (optional)
+ * @param {string} options.target — directory or URL that was scanned
+ * @param {string} options.timestamp — ISO timestamp
+ * @returns {string} — complete HTML document
+ */
+export function generateHtmlReport({ seo, geo, aeo, target, timestamp }) {
+  const scanners = [];
+  if (seo && !seo.skipped) scanners.push({ label: 'SEO', key: 'seo', data: seo });
+  if (geo && !geo.skipped) scanners.push({ label: 'GEO', key: 'geo', data: geo });
+  if (aeo && !aeo.skipped) scanners.push({ label: 'AEO', key: 'aeo', data: aeo });
+  const scoreCards = scanners.map(s => {
+    const score = s.data.scores[s.key];
+    const { color, label } = scoreStyle(score);
+    return `
+      <div class="score-card">
+        <div class="score-ring" style="--score: ${score}; --color: ${color}">
+          <svg viewBox="0 0 120 120">
+            <circle cx="60" cy="60" r="52" class="ring-bg"/>
+            <circle cx="60" cy="60" r="52" class="ring-fill" style="stroke-dashoffset: calc(327 - (327 * ${score} / 100))"/>
+          </svg>
+          <span class="score-value">${score}</span>
+        </div>
+        <div class="score-label" style="color: ${color}">${label}</div>
+        <div class="score-type">${s.label}</div>
+        <div class="score-meta">${s.data.files_scanned} files &middot; ${s.data.findings.length} findings</div>
+      </div>`;
+  }).join('\n');
+  const allFindings = [];
+  for (const s of scanners) {
+    for (const f of s.data.findings) {
+      allFindings.push({ ...f, scanner: s.label });
+    }
+  }
+  const SEVERITY_ORDER = { critical: 0, high: 1, medium: 2, low: 3 };
+  allFindings.sort((a, b) => (SEVERITY_ORDER[a.severity] ?? 9) - (SEVERITY_ORDER[b.severity] ?? 9));
+  // Group by rule
+  const groups = new Map();
+  for (const f of allFindings) {
+    const key = `${f.scanner}:${f.rule}`;
+    if (!groups.has(key)) {
+      groups.set(key, { rule: f.rule, severity: f.severity, message: f.message, scanner: f.scanner, files: [] });
+    }
+    if (f.file && !groups.get(key).files.includes(f.file)) {
+      groups.get(key).files.push(f.file);
+    }
+  }
+  const findingsRows = [...groups.values()].map(g => {
+    const badgeColor = severityBadgeColor(g.severity);
+    const filesStr = g.files.length > 0
+      ? g.files.slice(0, 3).map(f => esc(f)).join(', ') + (g.files.length > 3 ? `, +${g.files.length - 3} more` : '')
+      : '—';
+    return `
+      <tr>
+        <td><span class="badge" style="background: ${badgeColor}">${esc(g.severity.toUpperCase())}</span></td>
+        <td class="rule-name">${esc(g.rule)}<span class="scanner-tag">${esc(g.scanner)}</span></td>
+        <td>${esc(g.message)}</td>
+        <td class="files-cell">${filesStr}</td>
+      </tr>`;
+  }).join('\n');
+  const displayDate = timestamp ? new Date(timestamp).toLocaleString('en-US', {
+    dateStyle: 'long', timeStyle: 'short',
+  }) : '';
+  return `<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8"/>
+<meta name="viewport" content="width=device-width, initial-scale=1"/>
+<title>claude-rank Audit Report — ${esc(target)}</title>
+<style>
+*{margin:0;padding:0;box-sizing:border-box}
+body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;background:#0f172a;color:#e2e8f0;line-height:1.6;padding:2rem}
+.container{max-width:960px;margin:0 auto}
+header{text-align:center;margin-bottom:2.5rem;padding-bottom:1.5rem;border-bottom:1px solid #1e293b}
+header h1{font-size:1.75rem;color:#f8fafc;margin-bottom:.25rem}
+header p{color:#94a3b8;font-size:.875rem}
+.scores{display:flex;gap:2rem;justify-content:center;flex-wrap:wrap;margin-bottom:2.5rem}
+.score-card{text-align:center;background:#1e293b;border-radius:12px;padding:1.5rem 2rem;min-width:180px}
+.score-ring{position:relative;width:100px;height:100px;margin:0 auto .75rem}
+.score-ring svg{width:100%;height:100%;transform:rotate(-90deg)}
+.ring-bg{fill:none;stroke:#334155;stroke-width:8}
+.ring-fill{fill:none;stroke:var(--color);stroke-width:8;stroke-linecap:round;stroke-dasharray:327;transition:stroke-dashoffset .5s}
+.score-value{position:absolute;inset:0;display:flex;align-items:center;justify-content:center;font-size:1.5rem;font-weight:700;color:#f8fafc}
+.score-label{font-weight:600;font-size:.875rem;text-transform:uppercase;letter-spacing:.05em}
+.score-type{font-size:1.125rem;font-weight:600;color:#f8fafc;margin-top:.25rem}
+.score-meta{color:#64748b;font-size:.75rem;margin-top:.25rem}
+h2{font-size:1.25rem;color:#f8fafc;margin-bottom:1rem}
+table{width:100%;border-collapse:collapse;font-size:.85rem;margin-bottom:2rem}
+th{text-align:left;color:#94a3b8;font-weight:600;padding:.75rem .5rem;border-bottom:2px solid #1e293b}
+td{padding:.65rem .5rem;border-bottom:1px solid #1e293b;vertical-align:top}
+.badge{display:inline-block;padding:2px 8px;border-radius:4px;font-size:.7rem;font-weight:700;color:#fff;text-transform:uppercase}
+.rule-name{font-weight:600;color:#f8fafc}
+.scanner-tag{margin-left:.5rem;font-size:.65rem;color:#64748b;font-weight:400}
+.files-cell{color:#94a3b8;font-size:.8rem;max-width:200px;word-break:break-all}
+footer{text-align:center;color:#475569;font-size:.75rem;margin-top:2rem;padding-top:1rem;border-top:1px solid #1e293b}
+footer a{color:#64748b}
+.empty{text-align:center;color:#22c55e;padding:2rem;font-size:1rem}
+@media print{body{background:#fff;color:#1e293b;padding:1rem}.score-card{background:#f1f5f9}th{color:#475569;border-color:#cbd5e1}td{border-color:#e2e8f0}.rule-name{color:#0f172a}header{border-color:#cbd5e1}footer{border-color:#cbd5e1;color:#94a3b8}}
+</style>
+</head>
+<body>
+<div class="container">
+<header>
+  <h1>claude-rank Audit Report</h1>
+  <p>${esc(target)} &mdash; ${esc(displayDate)}</p>
+</header>
+<section class="scores">
+${scoreCards || '<p style="color:#94a3b8">No scan results available.</p>'}
+</section>
+<h2>Findings</h2>
+${groups.size > 0 ? `
+<table>
+<thead><tr><th>Severity</th><th>Rule</th><th>Message</th><th>Files</th></tr></thead>
+<tbody>
+${findingsRows}
+</tbody>
+</table>` : '<div class="empty">No findings — looking great!</div>'}
+<footer>Generated by claude-rank v1.2.1 &mdash; <a href="https://github.com/Houseofmvps/claude-rank">github.com/Houseofmvps/claude-rank</a></footer>
+</div>
+</body>
+</html>`;
+}
+function scoreStyle(score) {
+  if (score >= 90) return { color: '#22c55e', label: 'Excellent' };
+  if (score >= 80) return { color: '#3b82f6', label: 'Good' };
+  if (score >= 60) return { color: '#eab308', label: 'Needs Work' };
+  return { color: '#ef4444', label: 'Poor' };
+}
+function severityBadgeColor(severity) {
+  if (severity === 'critical') return '#dc2626';
+  if (severity === 'high') return '#ef4444';
+  if (severity === 'medium') return '#eab308';
+  return '#64748b';
+}
+function esc(str) {
+  if (!str) return '';
+  return String(str).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
+}

package/tools/seo-scanner.mjs CHANGED Viewed

@@ -5,7 +5,7 @@
 import fs from 'node:fs';
 import path from 'node:path';
-import { parseHtml, findHtmlFiles } from './lib/html-parser.mjs';
+import { parseHtml, findHtmlFiles, detectPageType } from './lib/html-parser.mjs';
 import { checkFileSize } from './lib/security.mjs';
 // ---------------------------------------------------------------------------
@@ -97,6 +97,13 @@ const RULES = {
 // Per-file rule checks
 // ---------------------------------------------------------------------------
+// Page types where thin content is expected and should not be flagged
+const THIN_CONTENT_EXEMPT = new Set(['contact', 'terms', 'privacy', 'legal', 'login', '404', 'sitemap']);
+// Page types where missing analytics is expected
+const NO_ANALYTICS_EXEMPT = new Set(['terms', 'privacy', 'legal']);
+// Page types where missing OG image is expected
+const NO_OG_IMAGE_EXEMPT = new Set(['terms', 'privacy', 'legal']);
 /**
  * Run per-file checks. Returns array of finding objects.
  * @param {object} state — PageState from parseHtml
@@ -107,6 +114,7 @@ const RULES = {
 function checkFile(state, filePath, rootDir, opts = {}) {
   const findings = [];
   const rel = path.relative(rootDir, filePath);
+  const pageType = detectPageType(filePath, state);
   function add(rule, message, context = {}) {
     const def = RULES[rule];
@@ -115,6 +123,7 @@ function checkFile(state, filePath, rootDir, opts = {}) {
       severity: def.severity,
       file: rel,
       message,
+      pageType,
       ...context,
     });
   }
@@ -151,7 +160,7 @@ function checkFile(state, filePath, rootDir, opts = {}) {
     add('missing-h1', 'Page has no <h1> heading');
   }
-  if (state.wordCount > 0 && state.wordCount < 300) {
+  if (state.wordCount > 0 && state.wordCount < 300 && !THIN_CONTENT_EXEMPT.has(pageType)) {
     add('thin-content', `Page has only ${state.wordCount} words (minimum recommended: 300)`);
   }
@@ -196,7 +205,7 @@ function checkFile(state, filePath, rootDir, opts = {}) {
     add('missing-og-description', 'Page is missing og:description Open Graph tag');
   }
-  if (!state.hasOgImage) {
+  if (!state.hasOgImage && !NO_OG_IMAGE_EXEMPT.has(pageType)) {
     add('missing-og-image', 'Page is missing og:image Open Graph tag');
   }
@@ -238,7 +247,7 @@ function checkFile(state, filePath, rootDir, opts = {}) {
     add('missing-favicon', 'Page is missing a favicon link');
   }
-  if (!state.hasAnalytics) {
+  if (!state.hasAnalytics && !NO_ANALYTICS_EXEMPT.has(pageType)) {
     add('no-analytics', 'No analytics provider detected on this page');
   }

package/tools/url-scanner.mjs CHANGED Viewed

@@ -2,10 +2,12 @@
  * url-scanner.mjs — Scan a live URL for SEO issues.
  * Fetches HTML from a URL and runs the same per-page analysis as seo-scanner.
  * Cross-page rules (duplicates, orphans, canonicals) are skipped for single-URL scans.
+ * scanSite() crawls multiple pages and adds cross-page analysis.
  */
-import { parseHtml } from './lib/html-parser.mjs';
+import { parseHtml, detectPageType } from './lib/html-parser.mjs';
 import { fetchPage } from './lib/url-fetcher.mjs';
+import { crawlSite } from './lib/crawler.mjs';
 // ---------------------------------------------------------------------------
 // Rule definitions (same as seo-scanner, minus cross-page-only rules)
@@ -52,6 +54,11 @@ const RULES = {
   'no-manifest':               { severity: 'low', deduction: 2 },
   'all-scripts-blocking':      { severity: 'low', deduction: 2 },
+  // Cross-page rules (multi-page crawl only)
+  'duplicate-title':           { severity: 'high', deduction: 10 },
+  'duplicate-meta-description':{ severity: 'high', deduction: 10 },
+  'canonical-conflict':        { severity: 'high', deduction: 10 },
   // HTTP-level rules (URL-scan only)
   'http-error':                { severity: 'critical', deduction: 20 },
   'redirect-detected':         { severity: 'low', deduction: 2 },
@@ -61,8 +68,16 @@ const RULES = {
 // Per-page rule checks (reused from seo-scanner logic)
 // ---------------------------------------------------------------------------
+// Page types where thin content is expected and should not be flagged
+const THIN_CONTENT_EXEMPT = new Set(['contact', 'terms', 'privacy', 'legal', 'login', '404', 'sitemap']);
+// Page types where missing analytics is expected
+const NO_ANALYTICS_EXEMPT = new Set(['terms', 'privacy', 'legal']);
+// Page types where missing OG image is expected
+const NO_OG_IMAGE_EXEMPT = new Set(['terms', 'privacy', 'legal']);
 function checkPage(state, pageUrl) {
   const findings = [];
+  const pageType = detectPageType(pageUrl, state);
   function add(rule, message, context = {}) {
     const def = RULES[rule];
@@ -71,6 +86,7 @@ function checkPage(state, pageUrl) {
       severity: def.severity,
       file: pageUrl,
       message,
+      pageType,
       ...context,
     });
   }
@@ -109,7 +125,7 @@ function checkPage(state, pageUrl) {
     add('missing-h1', 'Page has no <h1> heading');
   }
-  if (state.wordCount > 0 && state.wordCount < 300) {
+  if (state.wordCount > 0 && state.wordCount < 300 && !THIN_CONTENT_EXEMPT.has(pageType)) {
     add('thin-content', `Page has only ${state.wordCount} words (minimum recommended: 300)`);
   }
@@ -150,7 +166,7 @@ function checkPage(state, pageUrl) {
     add('missing-og-description', 'Page is missing og:description Open Graph tag');
   }
-  if (!state.hasOgImage) {
+  if (!state.hasOgImage && !NO_OG_IMAGE_EXEMPT.has(pageType)) {
     add('missing-og-image', 'Page is missing og:image Open Graph tag');
   }
@@ -191,7 +207,7 @@ function checkPage(state, pageUrl) {
     add('missing-favicon', 'Page is missing a favicon link');
   }
-  if (!state.hasAnalytics) {
+  if (!state.hasAnalytics && !NO_ANALYTICS_EXEMPT.has(pageType)) {
     add('no-analytics', 'No analytics provider detected on this page');
   }
@@ -333,6 +349,151 @@ export async function scanUrl(url) {
   };
 }
+// ---------------------------------------------------------------------------
+// Cross-page checks (for multi-page crawl)
+// ---------------------------------------------------------------------------
+function crossPageChecks(allStates) {
+  const findings = [];
+  // --- Duplicate title detection ---
+  const titleMap = new Map();
+  for (const { url, state } of allStates) {
+    if (state.hasTitle && state.titleText) {
+      const title = state.titleText.trim().toLowerCase();
+      if (!titleMap.has(title)) titleMap.set(title, []);
+      titleMap.get(title).push(url);
+    }
+  }
+  for (const [title, urls] of titleMap) {
+    if (urls.length > 1) {
+      for (const pageUrl of urls) {
+        findings.push({
+          rule: 'duplicate-title',
+          severity: RULES['duplicate-title'].severity,
+          file: pageUrl,
+          message: `Duplicate title "${title}" shared across ${urls.length} pages`,
+          duplicates: urls,
+        });
+      }
+    }
+  }
+  // --- Duplicate meta description detection ---
+  const descMap = new Map();
+  for (const { url, state } of allStates) {
+    if (state.hasMetaDescription && state.metaDescriptionText) {
+      const desc = state.metaDescriptionText.trim().toLowerCase();
+      if (!descMap.has(desc)) descMap.set(desc, []);
+      descMap.get(desc).push(url);
+    }
+  }
+  for (const [, urls] of descMap) {
+    if (urls.length > 1) {
+      for (const pageUrl of urls) {
+        findings.push({
+          rule: 'duplicate-meta-description',
+          severity: RULES['duplicate-meta-description'].severity,
+          file: pageUrl,
+          message: `Duplicate meta description shared across ${urls.length} pages`,
+          duplicates: urls,
+        });
+      }
+    }
+  }
+  // --- Canonical conflict detection ---
+  const canonicalMap = new Map();
+  for (const { url, state } of allStates) {
+    if (state.hasCanonical && state.canonicalUrl) {
+      const canonical = state.canonicalUrl.trim();
+      if (!canonicalMap.has(canonical)) canonicalMap.set(canonical, []);
+      canonicalMap.get(canonical).push(url);
+    }
+  }
+  for (const [canonical, urls] of canonicalMap) {
+    if (urls.length > 1) {
+      for (const pageUrl of urls) {
+        findings.push({
+          rule: 'canonical-conflict',
+          severity: RULES['canonical-conflict'].severity,
+          file: pageUrl,
+          message: `Multiple pages share canonical URL "${canonical}"`,
+          duplicates: urls,
+        });
+      }
+    }
+  }
+  return findings;
+}
+// ---------------------------------------------------------------------------
+// scanSite — crawl + analyse multiple pages
+// ---------------------------------------------------------------------------
+/**
+ * Crawl and scan an entire site.
+ * @param {string} startUrl
+ * @param {object} [options] — passed to crawlSite (maxPages, concurrency)
+ * @returns {Promise<object>} — { url, pages_scanned, files_scanned, findings, scores, summary, errors }
+ */
+export async function scanSite(startUrl, options = {}) {
+  // 1. Crawl the site
+  const crawlResult = await crawlSite(startUrl, options);
+  // 2. Parse each page and run per-page checks
+  const allStates = [];
+  const perPageFindings = [];
+  for (const page of crawlResult.pages) {
+    const state = parseHtml(page.html);
+    allStates.push({ url: page.url, state });
+    const pageFindings = checkPage(state, page.url);
+    // HTTP-level checks
+    if (page.statusCode >= 400) {
+      const def = RULES['http-error'];
+      pageFindings.unshift({
+        rule: 'http-error',
+        severity: def.severity,
+        file: page.url,
+        message: `HTTP ${page.statusCode} error response`,
+      });
+    }
+    perPageFindings.push(...pageFindings);
+  }
+  // 3. Run cross-page checks (duplicate titles, descriptions, canonical conflicts)
+  const multiPage = allStates.length > 1;
+  const crossFindings = multiPage ? crossPageChecks(allStates) : [];
+  const allFindings = [...perPageFindings, ...crossFindings];
+  // 4. Calculate deduplicated score
+  const seoScore = calculateScore(allFindings);
+  // 5. Summary counts
+  const summary = { critical: 0, high: 0, medium: 0, low: 0 };
+  for (const f of allFindings) {
+    if (summary[f.severity] !== undefined) {
+      summary[f.severity]++;
+    }
+  }
+  return {
+    url: startUrl,
+    pages_scanned: crawlResult.pages.length,
+    files_scanned: crawlResult.pages.length,
+    findings: allFindings,
+    scores: { seo: seoScore },
+    summary,
+    errors: crawlResult.errors,
+  };
+}
 // ---------------------------------------------------------------------------
 // CLI entry point
 // ---------------------------------------------------------------------------