seo-intel 1.1.6 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.js CHANGED
@@ -24,11 +24,11 @@ import { getNextCrawlTarget, needsAnalysis, getCrawlStatus, loadAllConfigs } fro
24
24
  import {
25
25
  getDb, upsertDomain, upsertPage, insertExtraction,
26
26
  insertKeywords, insertHeadings, insertLinks, insertPageSchemas,
27
- upsertTechnical,
27
+ upsertTechnical, pruneStaleDomains,
28
28
  getCompetitorSummary, getKeywordMatrix, getHeadingStructure,
29
29
  getPageHash, getSchemasByProject
30
30
  } from './db/db.js';
31
- import { generateHtmlDashboard, generateMultiDashboard } from './reports/generate-html.js';
31
+ import { generateMultiDashboard } from './reports/generate-html.js';
32
32
  import { buildTechnicalActions } from './exports/technical.js';
33
33
  import { buildCompetitiveActions } from './exports/competitive.js';
34
34
  import { buildSuggestiveActions } from './exports/suggestive.js';
@@ -393,6 +393,21 @@ program
393
393
  }
394
394
  }
395
395
 
396
+ // ── Prune stale domains (DB entries no longer in config) ─────────────
397
+ {
398
+ const configDomains = new Set([
399
+ config.target?.domain,
400
+ ...(config.owned || []).map(o => o.domain),
401
+ ...(config.competitors || []).map(c => c.domain),
402
+ ].filter(Boolean));
403
+
404
+ const pruned = pruneStaleDomains(db, project, configDomains);
405
+ if (pruned.length) {
406
+ console.log(chalk.yellow(`\n 🧹 Pruned ${pruned.length} stale domain(s) from DB (no longer in config):`));
407
+ for (const d of pruned) console.log(chalk.dim(` − ${d}`));
408
+ }
409
+ }
410
+
396
411
  // ── Tier gate: Free tier = crawl-only, no AI extraction ──────────────
397
412
  if (opts.extract !== false && !isPro()) {
398
413
  console.log(chalk.dim('\n ℹ Free tier: crawl-only mode (AI extraction requires Solo/Agency)'));
@@ -488,6 +503,9 @@ program
488
503
  publishedDate: page.publishedDate || null,
489
504
  modifiedDate: page.modifiedDate || null,
490
505
  contentHash: page.contentHash || null,
506
+ title: page.title || null,
507
+ metaDesc: page.metaDesc || null,
508
+ bodyText: page.fullBodyText || page.bodyText || null,
491
509
  });
492
510
  const pageId = pageRes?.id;
493
511
 
@@ -579,9 +597,10 @@ program
579
597
  if (totalSkipped > 0) console.log(chalk.blue(`\n📊 Incremental: ${totalSkipped} unchanged pages skipped (same content hash)`));
580
598
  if (totalBlocked > 0) console.log(chalk.red(`\n⛔ ${totalBlocked} domain(s) blocked (rate-limited or WAF)`));
581
599
  const elapsed = ((Date.now() - crawlStart) / 1000).toFixed(1);
582
- // Auto-regenerate dashboard so it never goes stale after a crawl
600
+ // Auto-regenerate dashboard (always multi-project so all projects stay current)
583
601
  try {
584
- const dashPath = generateHtmlDashboard(db, project, config);
602
+ const allConfigs = loadAllConfigs();
603
+ const dashPath = generateMultiDashboard(db, allConfigs);
585
604
  console.log(chalk.dim(` 📊 Dashboard refreshed → ${dashPath}`));
586
605
  } catch (dashErr) {
587
606
  console.log(chalk.dim(` ⚠ Dashboard refresh skipped: ${dashErr.message}`));
@@ -697,9 +716,10 @@ program
697
716
  // Print summary
698
717
  printAnalysisSummary(analysis, project);
699
718
 
700
- // Auto-regenerate dashboard so it reflects the new analysis immediately
719
+ // Auto-regenerate dashboard (always multi-project so all projects stay current)
701
720
  try {
702
- const dashPath = generateHtmlDashboard(db, project, config);
721
+ const allConfigs = loadAllConfigs();
722
+ const dashPath = generateMultiDashboard(db, allConfigs);
703
723
  console.log(chalk.dim(` 📊 Dashboard refreshed → ${dashPath}`));
704
724
  } catch (dashErr) {
705
725
  console.log(chalk.dim(` ⚠ Dashboard refresh skipped: ${dashErr.message}`));
@@ -1074,6 +1094,9 @@ program
1074
1094
  publishedDate: page.publishedDate || null,
1075
1095
  modifiedDate: page.modifiedDate || null,
1076
1096
  contentHash: page.contentHash || null,
1097
+ title: page.title || null,
1098
+ metaDesc: page.metaDesc || null,
1099
+ bodyText: page.fullBodyText || page.bodyText || null,
1077
1100
  });
1078
1101
  const pageId = pageRes?.id;
1079
1102
 
@@ -1379,6 +1402,7 @@ program
1379
1402
  .option('--add-owned <domain>', 'Add an owned subdomain')
1380
1403
  .option('--remove-owned <domain>', 'Remove an owned subdomain')
1381
1404
  .option('--set-target <domain>', 'Change the target domain')
1405
+ .option('--prune', 'Remove DB data for domains no longer in config')
1382
1406
  .action((project, opts) => {
1383
1407
  const configPath = join(__dirname, `config/${project}.json`);
1384
1408
  let config;
@@ -1471,6 +1495,24 @@ program
1471
1495
  console.log(chalk.dim(`\n Saved → config/${project}.json`));
1472
1496
  }
1473
1497
 
1498
+ // ── Prune stale DB data (auto on remove, or manual --prune) ─────────
1499
+ if (modified || opts.prune) {
1500
+ const db = getDb();
1501
+ const configDomains = new Set([
1502
+ config.target?.domain,
1503
+ ...(config.owned || []).map(o => o.domain),
1504
+ ...(config.competitors || []).map(c => c.domain),
1505
+ ].filter(Boolean));
1506
+
1507
+ const pruned = pruneStaleDomains(db, project, configDomains);
1508
+ if (pruned.length) {
1509
+ console.log(chalk.yellow(`\n 🧹 Pruned ${pruned.length} stale domain(s) from DB:`));
1510
+ for (const d of pruned) console.log(chalk.dim(` − ${d}`));
1511
+ } else if (opts.prune) {
1512
+ console.log(chalk.dim('\n ✓ No stale domains to prune'));
1513
+ }
1514
+ }
1515
+
1474
1516
  // ── Always show current config
1475
1517
  console.log(chalk.bold.cyan(`\n 📋 ${project} — Domain Configuration\n`));
1476
1518
  console.log(chalk.white(' Target:'));
@@ -1560,13 +1602,14 @@ async function runAnalysis(project, db) {
1560
1602
  program
1561
1603
  .command('extract <project>')
1562
1604
  .description('Run AI extraction on all crawled-but-not-yet-extracted pages (requires Solo/Agency)')
1563
- .option('--stealth', 'Advanced browser mode for JS-heavy and dynamic sites')
1564
- .action(async (project, opts) => {
1605
+ .action(async (project) => {
1565
1606
  if (!requirePro('extract')) return;
1566
1607
  const db = getDb();
1608
+
1609
+ // Query pages that have body_text stored (from crawl) but no extraction yet
1567
1610
  const pendingPages = db.prepare(`
1568
- SELECT p.id, p.url, p.word_count,
1569
- e.id as extracted
1611
+ SELECT p.id, p.url, p.word_count, p.title, p.meta_desc, p.body_text,
1612
+ p.published_date, p.modified_date
1570
1613
  FROM pages p
1571
1614
  JOIN domains d ON d.id = p.domain_id
1572
1615
  LEFT JOIN extractions e ON e.page_id = p.id
@@ -1578,102 +1621,241 @@ program
1578
1621
  process.exit(0);
1579
1622
  }
1580
1623
 
1581
- const mode = opts.stealth ? chalk.magenta('STEALTH') : chalk.gray('standard');
1582
- console.log(chalk.bold.cyan(`\n⚙️ Extracting ${pendingPages.length} pages for ${project} via Qwen [${mode}]...\n`));
1624
+ // Check how many have body_text stored vs need re-crawl
1625
+ const withContent = pendingPages.filter(r => r.body_text);
1626
+ const needsRecrawl = pendingPages.length - withContent.length;
1627
+
1628
+ console.log(chalk.bold.cyan(`\n⚙️ Extracting ${pendingPages.length} pages for ${project} via Qwen...\n`));
1629
+ if (needsRecrawl > 0) {
1630
+ console.log(chalk.yellow(` ⚠ ${needsRecrawl} pages have no stored content (crawled before v1.1.6). Re-crawl to populate.\n`));
1631
+ }
1583
1632
 
1584
1633
  const extractStart = Date.now();
1585
- let done = 0, failed = 0;
1634
+ let done = 0, failed = 0, skipped = 0;
1635
+
1636
+ // ── Pre-extract template grouping: sample N per group, skip the rest ──
1637
+ const SAMPLE_PER_GROUP = 5;
1638
+ const MIN_GROUP_FOR_SAMPLING = 10;
1639
+ let extractQueue = pendingPages.filter(r => r.body_text); // only pages with stored content
1640
+
1641
+ try {
1642
+ const { clusterUrls } = await import('./analyses/templates/cluster.js');
1643
+ const { groups } = clusterUrls(
1644
+ extractQueue.map(r => ({ url: r.url })),
1645
+ { minGroupSize: MIN_GROUP_FOR_SAMPLING }
1646
+ );
1647
+
1648
+ if (groups.length > 0) {
1649
+ const skipUrls = new Set();
1586
1650
 
1587
- // ── Stealth: single session across all pages (cookie accumulation) ──
1588
- let stealthSession = null;
1589
- if (opts.stealth) {
1590
- const { createStealthSession } = await import('./crawler/stealth.js');
1591
- stealthSession = await createStealthSession();
1592
- console.log(chalk.magenta(' 🥷 Advanced mode — full browser rendering, persistent sessions\n'));
1651
+ for (const group of groups) {
1652
+ const urls = group.urls;
1653
+ if (urls.length <= SAMPLE_PER_GROUP) continue;
1654
+
1655
+ const sampleSet = new Set();
1656
+ sampleSet.add(urls[0]); sampleSet.add(urls[1]);
1657
+ sampleSet.add(urls[urls.length - 1]); sampleSet.add(urls[urls.length - 2]);
1658
+ sampleSet.add(urls[Math.floor(urls.length / 2)]);
1659
+
1660
+ const skippedCount = urls.length - sampleSet.size;
1661
+ for (const u of urls) {
1662
+ if (!sampleSet.has(u)) skipUrls.add(u);
1663
+ }
1664
+ console.log(chalk.yellow(` [template] ${group.pattern} → ${urls.length} pages, sampling ${sampleSet.size}, skipping ${skippedCount}`));
1665
+ }
1666
+
1667
+ if (skipUrls.size > 0) {
1668
+ extractQueue = extractQueue.filter(r => !skipUrls.has(r.url));
1669
+ skipped += skipUrls.size;
1670
+ console.log(chalk.yellow(` [template] ${withContent.length} extractable → ${extractQueue.length} to extract (${skipUrls.size} template-skipped)\n`));
1671
+ }
1672
+ }
1673
+ } catch (e) {
1674
+ console.log(chalk.gray(` [template] Pattern detection skipped: ${e.message}`));
1593
1675
  }
1594
1676
 
1595
- // Register cleanup so SIGTERM closes the browser gracefully
1596
- onShutdown(async () => {
1597
- if (stealthSession) {
1598
- await stealthSession.close();
1599
- console.log(chalk.magenta(' 🥷 Stealth session closed'));
1677
+ // ── Consecutive failure tracking per URL pattern ──
1678
+ const CONSEC_FAIL_THRESHOLD = 3;
1679
+ const patternFailCounts = new Map();
1680
+ const skippedPatterns = new Set();
1681
+
1682
+ function getPatternKey(url) {
1683
+ try {
1684
+ const u = new URL(url);
1685
+ const parts = u.pathname.split('/').filter(Boolean);
1686
+ return u.hostname + '/' + parts.map(p =>
1687
+ (p.length > 20 || /^[0-9a-fA-F]{8,}$/.test(p) || /^0x/.test(p) || /[-_]/.test(p)) ? '{var}' : p
1688
+ ).join('/');
1689
+ } catch { return url; }
1690
+ }
1691
+
1692
+ // ── Content similarity detection ──
1693
+ const SIMILARITY_THRESHOLD = 0.80;
1694
+ const SIMILARITY_SAMPLE_SIZE = 3;
1695
+ const patternFingerprints = new Map();
1696
+
1697
+ function textToShingles(text, n = 3) {
1698
+ const words = (text || '').toLowerCase().replace(/[^a-z0-9\s]/g, '').split(/\s+/).filter(Boolean);
1699
+ const shingles = new Set();
1700
+ for (let i = 0; i <= words.length - n; i++) {
1701
+ shingles.add(words.slice(i, i + n).join(' '));
1702
+ }
1703
+ return shingles;
1704
+ }
1705
+
1706
+ function jaccardSimilarity(a, b) {
1707
+ if (!a.size || !b.size) return 0;
1708
+ let intersection = 0;
1709
+ for (const s of a) { if (b.has(s)) intersection++; }
1710
+ return intersection / (a.size + b.size - intersection);
1711
+ }
1712
+
1713
+ function checkPatternSimilarity(patKey, newShingles) {
1714
+ if (!patternFingerprints.has(patKey)) patternFingerprints.set(patKey, []);
1715
+ const fps = patternFingerprints.get(patKey);
1716
+ fps.push(newShingles);
1717
+ if (fps.length < SIMILARITY_SAMPLE_SIZE || fps.length > SIMILARITY_SAMPLE_SIZE) return false;
1718
+ for (let i = 0; i < fps.length; i++) {
1719
+ for (let j = i + 1; j < fps.length; j++) {
1720
+ if (jaccardSimilarity(fps[i], fps[j]) < SIMILARITY_THRESHOLD) return false;
1721
+ }
1600
1722
  }
1601
- });
1723
+ return true;
1724
+ }
1602
1725
 
1603
- try {
1604
- for (const row of pendingPages) {
1605
- process.stdout.write(chalk.gray(` [${done + failed + 1}/${pendingPages.length}] ${row.url.slice(0, 65)} `));
1606
- if (opts.stealth) process.stdout.write(chalk.magenta('stealth '));
1607
- process.stdout.write(chalk.gray('fetching...'));
1608
-
1609
- writeProgress({
1610
- status: 'running', command: 'extract', project,
1611
- current_url: row.url,
1612
- page_index: done + failed + 1, total: pendingPages.length,
1613
- percent: Math.round(((done + failed) / pendingPages.length) * 100),
1614
- started_at: extractStart, failed,
1615
- stealth: !!opts.stealth,
1616
- });
1726
+ // ── Prepare headings + schema queries (per-page lookups from DB) ──
1727
+ const getHeadings = db.prepare('SELECT level, text FROM headings WHERE page_id = ? ORDER BY id');
1728
+ const getSchemaTypes = db.prepare('SELECT DISTINCT schema_type FROM page_schemas WHERE page_id = ?');
1617
1729
 
1618
- try {
1619
- let pageData;
1730
+ const totalToProcess = extractQueue.length;
1731
+ console.log(chalk.gray(` 📖 Reading from DB — no network needed\n`));
1620
1732
 
1621
- if (stealthSession) {
1622
- // Stealth: reuse persistent browser session
1623
- pageData = await stealthSession.fetchPage(row.url);
1624
- } else {
1625
- // Standard: quick single-page crawl
1626
- const { crawlAll } = await import('./crawler/index.js');
1627
- const crawled = await crawlAll(row.url);
1628
- pageData = crawled[0] || null;
1629
- }
1733
+ for (const row of extractQueue) {
1734
+ const patKey = getPatternKey(row.url);
1735
+ if (skippedPatterns.has(patKey)) {
1736
+ skipped++;
1737
+ continue;
1738
+ }
1630
1739
 
1631
- if (!pageData || pageData.status >= 400) {
1632
- const reason = pageData ? `HTTP ${pageData.status}` : 'no data';
1633
- process.stdout.write(chalk.red(` ✗ ${reason}\n`));
1634
- failed++;
1635
- if (stealthSession) {
1636
- // Jittered delay even on failure — don't hammer a blocking site
1637
- await new Promise(r => setTimeout(r, 1500 + Math.random() * 2000));
1638
- }
1639
- continue;
1640
- }
1740
+ const pos = done + failed + 1;
1741
+ process.stdout.write(chalk.gray(` [${pos}/${totalToProcess}] ${row.url.slice(0, 70)} `));
1742
+ process.stdout.write(chalk.gray('extracting...'));
1641
1743
 
1642
- process.stdout.write(chalk.gray(' extracting...'));
1643
- const extractFn = await getExtractPage();
1644
- const extraction = await extractFn(pageData);
1645
- insertExtraction(db, { pageId: row.id, data: extraction });
1646
- insertKeywords(db, row.id, extraction.keywords);
1647
-
1648
- // Also update headings + links + schemas with fresh data from stealth fetch
1649
- if (stealthSession) {
1650
- insertHeadings(db, row.id, pageData.headings);
1651
- insertLinks(db, row.id, pageData.links);
1652
- if (pageData.parsedSchemas?.length) insertPageSchemas(db, row.id, pageData.parsedSchemas);
1653
- }
1744
+ writeProgress({
1745
+ status: 'running', command: 'extract', project,
1746
+ current_url: row.url,
1747
+ page_index: pos, total: totalToProcess,
1748
+ percent: Math.round(((done + failed) / totalToProcess) * 100),
1749
+ started_at: extractStart, failed, skipped,
1750
+ });
1751
+
1752
+ let pageFailed = false;
1654
1753
 
1655
- process.stdout.write(chalk.green(` ✓${pageData.parsedSchemas?.length ? ` [${pageData.parsedSchemas.length} schema]` : ''}\n`));
1754
+ try {
1755
+ // Read headings + schema types from DB
1756
+ const headings = getHeadings.all(row.id);
1757
+ const schemaTypes = getSchemaTypes.all(row.id).map(r => r.schema_type);
1758
+
1759
+ const extractFn = await getExtractPage();
1760
+ const extraction = await extractFn({
1761
+ url: row.url,
1762
+ title: row.title || '',
1763
+ metaDesc: row.meta_desc || '',
1764
+ headings,
1765
+ bodyText: row.body_text,
1766
+ schemaTypes,
1767
+ publishedDate: row.published_date,
1768
+ modifiedDate: row.modified_date,
1769
+ });
1770
+ insertExtraction(db, { pageId: row.id, data: extraction });
1771
+ insertKeywords(db, row.id, extraction.keywords);
1772
+
1773
+ const isDegraded = extraction.extraction_source === 'degraded';
1774
+ if (isDegraded) {
1775
+ process.stdout.write(chalk.yellow(` ⚠ degraded\n`));
1656
1776
  done++;
1657
- } catch (err) {
1658
- process.stdout.write(chalk.red(` ${err.message}\n`));
1659
- failed++;
1777
+ pageFailed = true;
1778
+ } else {
1779
+ process.stdout.write(chalk.green(` ✓\n`));
1780
+ done++;
1781
+ patternFailCounts.set(patKey, 0);
1660
1782
  }
1661
1783
 
1662
- // Jittered delay in stealth mode (2-5s) to mimic human browsing
1663
- if (stealthSession) {
1664
- await new Promise(r => setTimeout(r, 2000 + Math.random() * 3000));
1784
+ // ── Content similarity detection ──
1785
+ if (row.body_text.length > 50) {
1786
+ const shingles = textToShingles(row.body_text);
1787
+ if (checkPatternSimilarity(patKey, shingles) && !skippedPatterns.has(patKey)) {
1788
+ const remaining = extractQueue.filter(r => getPatternKey(r.url) === patKey).length - (patternFingerprints.get(patKey)?.length || 0);
1789
+ skippedPatterns.add(patKey);
1790
+ if (remaining > 0) {
1791
+ console.log(chalk.yellow(` [similarity] 🔍 ${SIMILARITY_SAMPLE_SIZE} pages from ${patKey} are ${Math.round(SIMILARITY_THRESHOLD * 100)}%+ identical — skipping ${remaining} remaining`));
1792
+ }
1793
+ }
1665
1794
  }
1795
+ } catch (err) {
1796
+ process.stdout.write(chalk.red(` ✗ ${err.message}\n`));
1797
+ failed++;
1798
+ pageFailed = true;
1666
1799
  }
1667
- } finally {
1668
- // Always close stealth session
1669
- if (stealthSession) {
1670
- await stealthSession.close();
1671
- console.log(chalk.magenta(`\n 🥷 Stealth session closed (${stealthSession.getPageCount()} pages fetched)`));
1800
+
1801
+ // ── Track consecutive failures per pattern ──
1802
+ if (pageFailed) {
1803
+ const count = (patternFailCounts.get(patKey) || 0) + 1;
1804
+ patternFailCounts.set(patKey, count);
1805
+ if (count >= CONSEC_FAIL_THRESHOLD) {
1806
+ const remaining = extractQueue.filter(r => !skippedPatterns.has(getPatternKey(r.url)) && getPatternKey(r.url) === patKey).length;
1807
+ skippedPatterns.add(patKey);
1808
+ console.log(chalk.yellow(` [template] ⚡ ${count} consecutive failures for ${patKey} — skipping ~${remaining} remaining pages`));
1809
+ }
1672
1810
  }
1673
1811
  }
1674
1812
 
1675
- writeProgress({ status: 'completed', command: 'extract', project, extracted: done, failed, total: pendingPages.length, started_at: extractStart, finished_at: Date.now() });
1676
- console.log(chalk.bold.green(`\n✅ Extraction complete: ${done} extracted, ${failed} failed\n`));
1813
+ writeProgress({ status: 'completed', command: 'extract', project, extracted: done, failed, skipped, total: pendingPages.length, started_at: extractStart, finished_at: Date.now() });
1814
+ const skipMsg = skipped > 0 ? chalk.yellow(`, ${skipped} template-skipped`) : '';
1815
+ const recrawlMsg = needsRecrawl > 0 ? chalk.yellow(`, ${needsRecrawl} need re-crawl`) : '';
1816
+ console.log(chalk.bold.green(`\n✅ Extraction complete: ${done} extracted, ${failed} failed${skipMsg}${recrawlMsg}\n`));
1817
+ });
1818
+
1819
+ // ── TEMPLATES ANALYSIS ────────────────────────────────────────────────────
1820
+ program
1821
+ .command('templates <project>')
1822
+ .description('Detect programmatic template pages — assess SEO value without crawling all of them')
1823
+ .option('--min-group <n>', 'Minimum URLs to qualify as a template group', '10')
1824
+ .option('--sample-size <n>', 'Pages to stealth-crawl per template group', '20')
1825
+ .option('--skip-crawl', 'Skip sample crawl (pattern analysis + GSC only)')
1826
+ .option('--skip-gsc', 'Skip GSC overlay phase')
1827
+ .option('--skip-competitors', 'Skip competitor sitemap census')
1828
+ .action(async (project, opts) => {
1829
+ if (!requirePro('templates')) return;
1830
+
1831
+ console.log(chalk.bold.cyan(`\n🔍 SEO Intel — Template Analysis`));
1832
+ console.log(chalk.dim(` Project: ${project}`));
1833
+
1834
+ try {
1835
+ const { runTemplatesAnalysis } = await import('./analyses/templates/index.js');
1836
+ const report = await runTemplatesAnalysis(project, {
1837
+ minGroupSize: parseInt(opts.minGroup) || 10,
1838
+ sampleSize: parseInt(opts.sampleSize) || 20,
1839
+ skipCrawl: !!opts.skipCrawl,
1840
+ skipGsc: !!opts.skipGsc,
1841
+ skipCompetitors: !!opts.skipCompetitors,
1842
+ log: (msg) => console.log(chalk.gray(msg)),
1843
+ });
1844
+
1845
+ if (report.groups.length === 0) {
1846
+ console.log(chalk.yellow(`\n No template patterns detected.\n`));
1847
+ process.exit(0);
1848
+ }
1849
+
1850
+ // Summary
1851
+ console.log(chalk.bold.green(`\n✅ Template analysis complete`));
1852
+ console.log(chalk.dim(` ${report.stats.totalGroups} groups · ${report.stats.totalGrouped.toLocaleString()} URLs · ${(report.stats.coverage * 100).toFixed(0)}% of sitemap`));
1853
+ console.log(chalk.dim(` Run ${chalk.white('seo-intel html ' + project)} to see the full dashboard.\n`));
1854
+ } catch (err) {
1855
+ console.error(chalk.red(`\n Error: ${err.message}\n`));
1856
+ if (process.env.DEBUG) console.error(err.stack);
1857
+ process.exit(1);
1858
+ }
1677
1859
  });
1678
1860
 
1679
1861
  // ── HTML DASHBOARD ─────────────────────────────────────────────────────────
@@ -1741,10 +1923,10 @@ program
1741
1923
  }
1742
1924
  });
1743
1925
 
1744
- // ── HTML ALL-PROJECTS DASHBOARD ──────────────────────────────────────────────
1926
+ // ── HTML ALL-PROJECTS DASHBOARD (alias for html — kept for backwards compat) ──
1745
1927
  program
1746
1928
  .command('html-all')
1747
- .description('Generate a single HTML dashboard with all projects (dropdown switcher)')
1929
+ .description('Alias for "html" generates the all-projects dashboard')
1748
1930
  .action(() => {
1749
1931
  const db = getDb();
1750
1932
  const configs = loadAllConfigs();
package/crawler/index.js CHANGED
@@ -547,11 +547,16 @@ async function processPage(page, url, base, depth, queue, maxDepth) {
547
547
  // ── Quality gate — detect shells, blocked pages, empty content ──
548
548
  const quality = assessQuality({ wordCount, bodyText, title, status });
549
549
 
550
+ // Full body text for DB storage (extraction reads this); truncated for log output
551
+ const fullBodyText = sanitize(bodyText, 50000); // ~200K chars — enough for any real page
552
+ const shortBodyText = sanitize(bodyText, 2000); // compact version for logging
553
+
550
554
  return {
551
555
  url, depth, status, loadMs, wordCount, isIndexable,
552
556
  title, metaDesc, headings,
553
557
  links: [...internalLinks, ...externalLinks],
554
- bodyText: sanitize(bodyText, 2000),
558
+ bodyText: shortBodyText,
559
+ fullBodyText,
555
560
  schemaTypes, parsedSchemas, vitals, publishedDate, modifiedDate,
556
561
  contentHash: hash,
557
562
  quality: quality.ok, qualityReason: quality.reason,
package/db/db.js CHANGED
@@ -21,6 +21,9 @@ export function getDb(dbPath = './seo-intel.db') {
21
21
  // Migrations for existing databases
22
22
  try { _db.exec('ALTER TABLE pages ADD COLUMN content_hash TEXT'); } catch { /* already exists */ }
23
23
  try { _db.exec('ALTER TABLE pages ADD COLUMN first_seen_at INTEGER'); } catch { /* already exists */ }
24
+ try { _db.exec('ALTER TABLE pages ADD COLUMN title TEXT'); } catch { /* already exists */ }
25
+ try { _db.exec('ALTER TABLE pages ADD COLUMN meta_desc TEXT'); } catch { /* already exists */ }
26
+ try { _db.exec('ALTER TABLE pages ADD COLUMN body_text TEXT'); } catch { /* already exists */ }
24
27
 
25
28
  // Backfill first_seen_at from crawled_at for existing rows
26
29
  _db.exec('UPDATE pages SET first_seen_at = crawled_at WHERE first_seen_at IS NULL');
@@ -42,11 +45,11 @@ export function upsertDomain(db, { domain, project, role }) {
42
45
  `).run(domain, project, role, now, now);
43
46
  }
44
47
 
45
- export function upsertPage(db, { domainId, url, statusCode, wordCount, loadMs, isIndexable, clickDepth = 0, publishedDate = null, modifiedDate = null, contentHash = null }) {
48
+ export function upsertPage(db, { domainId, url, statusCode, wordCount, loadMs, isIndexable, clickDepth = 0, publishedDate = null, modifiedDate = null, contentHash = null, title = null, metaDesc = null, bodyText = null }) {
46
49
  const now = Date.now();
47
50
  db.prepare(`
48
- INSERT INTO pages (domain_id, url, crawled_at, first_seen_at, status_code, word_count, load_ms, is_indexable, click_depth, published_date, modified_date, content_hash)
49
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
51
+ INSERT INTO pages (domain_id, url, crawled_at, first_seen_at, status_code, word_count, load_ms, is_indexable, click_depth, published_date, modified_date, content_hash, title, meta_desc, body_text)
52
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
50
53
  ON CONFLICT(url) DO UPDATE SET
51
54
  crawled_at = excluded.crawled_at,
52
55
  status_code = excluded.status_code,
@@ -55,8 +58,11 @@ export function upsertPage(db, { domainId, url, statusCode, wordCount, loadMs, i
55
58
  click_depth = excluded.click_depth,
56
59
  published_date = excluded.published_date,
57
60
  modified_date = excluded.modified_date,
58
- content_hash = excluded.content_hash
59
- `).run(domainId, url, now, now, statusCode, wordCount, loadMs, isIndexable ? 1 : 0, clickDepth, publishedDate, modifiedDate, contentHash);
61
+ content_hash = excluded.content_hash,
62
+ title = excluded.title,
63
+ meta_desc = excluded.meta_desc,
64
+ body_text = excluded.body_text
65
+ `).run(domainId, url, now, now, statusCode, wordCount, loadMs, isIndexable ? 1 : 0, clickDepth, publishedDate, modifiedDate, contentHash, title || null, metaDesc || null, bodyText || null);
60
66
  // first_seen_at is NOT in the ON CONFLICT UPDATE — it stays from original INSERT
61
67
  return db.prepare('SELECT id FROM pages WHERE url = ?').get(url);
62
68
  }
@@ -214,6 +220,147 @@ export function getKeywordMatrix(db, project) {
214
220
  `).all(project);
215
221
  }
216
222
 
223
+ // ── Template analysis ─────────────────────────────────────────────────────
224
+
225
+ export function upsertTemplateGroup(db, g) {
226
+ return db.prepare(`
227
+ INSERT INTO template_groups
228
+ (project, domain, pattern, url_count, sample_size,
229
+ avg_word_count, content_similarity, dom_similarity,
230
+ gsc_urls_with_impressions, gsc_total_clicks, gsc_total_impressions,
231
+ gsc_avg_position, indexation_efficiency, score, verdict, recommendation,
232
+ analyzed_at)
233
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
234
+ ON CONFLICT(project, domain, pattern) DO UPDATE SET
235
+ url_count = excluded.url_count,
236
+ sample_size = excluded.sample_size,
237
+ avg_word_count = excluded.avg_word_count,
238
+ content_similarity = excluded.content_similarity,
239
+ dom_similarity = excluded.dom_similarity,
240
+ gsc_urls_with_impressions = excluded.gsc_urls_with_impressions,
241
+ gsc_total_clicks = excluded.gsc_total_clicks,
242
+ gsc_total_impressions = excluded.gsc_total_impressions,
243
+ gsc_avg_position = excluded.gsc_avg_position,
244
+ indexation_efficiency = excluded.indexation_efficiency,
245
+ score = excluded.score,
246
+ verdict = excluded.verdict,
247
+ recommendation = excluded.recommendation,
248
+ analyzed_at = excluded.analyzed_at
249
+ `).run(
250
+ g.project, g.domain, g.pattern, g.urlCount, g.sampleSize || 0,
251
+ g.avgWordCount ?? null, g.contentSimilarity ?? null, g.domSimilarity ?? null,
252
+ g.gscUrlsWithImpressions || 0, g.gscTotalClicks || 0, g.gscTotalImpressions || 0,
253
+ g.gscAvgPosition ?? null, g.indexationEfficiency ?? null,
254
+ g.score ?? null, g.verdict || null, JSON.stringify(g.recommendation || []),
255
+ g.analyzedAt || Date.now()
256
+ );
257
+ }
258
+
259
+ export function getTemplateGroupId(db, project, domain, pattern) {
260
+ return db.prepare(
261
+ 'SELECT id FROM template_groups WHERE project = ? AND domain = ? AND pattern = ?'
262
+ ).get(project, domain, pattern)?.id;
263
+ }
264
+
265
+ export function upsertTemplateSample(db, s) {
266
+ db.prepare(`
267
+ INSERT INTO template_samples
268
+ (group_id, url, sample_role, status_code, word_count,
269
+ title, meta_desc, has_canonical, has_schema, is_indexable,
270
+ dom_fingerprint, content_hash, body_text, crawled_at)
271
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
272
+ ON CONFLICT(group_id, url) DO UPDATE SET
273
+ sample_role = excluded.sample_role,
274
+ status_code = excluded.status_code,
275
+ word_count = excluded.word_count,
276
+ title = excluded.title,
277
+ meta_desc = excluded.meta_desc,
278
+ has_canonical = excluded.has_canonical,
279
+ has_schema = excluded.has_schema,
280
+ is_indexable = excluded.is_indexable,
281
+ dom_fingerprint = excluded.dom_fingerprint,
282
+ content_hash = excluded.content_hash,
283
+ body_text = excluded.body_text,
284
+ crawled_at = excluded.crawled_at
285
+ `).run(
286
+ s.groupId, s.url, s.sampleRole, s.statusCode ?? null, s.wordCount ?? null,
287
+ s.title || null, s.metaDesc || null,
288
+ s.hasCanonical ? 1 : 0, s.hasSchema ? 1 : 0, s.isIndexable ? 1 : 0,
289
+ s.domFingerprint || null, s.contentHash || null, s.bodyText || null,
290
+ s.crawledAt || Date.now()
291
+ );
292
+ }
293
+
294
+ export function getTemplateGroups(db, project) {
295
+ return db.prepare(
296
+ 'SELECT * FROM template_groups WHERE project = ? ORDER BY url_count DESC'
297
+ ).all(project);
298
+ }
299
+
300
+ export function getTemplateSamples(db, groupId) {
301
+ return db.prepare(
302
+ 'SELECT * FROM template_samples WHERE group_id = ? ORDER BY sample_role, url'
303
+ ).all(groupId);
304
+ }
305
+
306
+ // ── Domain sync / prune ───────────────────────────────────────────────────
307
+
308
+ /**
309
+ * Remove DB domains (+ all child data) that no longer exist in config.
310
+ * Returns array of pruned domain names.
311
+ */
312
+ export function pruneStaleDomains(db, project, configDomains) {
313
+ // configDomains = Set or array of domain strings currently in config
314
+ const validSet = new Set(configDomains);
315
+
316
+ const dbDomains = db.prepare(
317
+ 'SELECT id, domain FROM domains WHERE project = ?'
318
+ ).all(project);
319
+
320
+ const stale = dbDomains.filter(d => !validSet.has(d.domain));
321
+ if (!stale.length) return [];
322
+
323
+ db.exec('PRAGMA foreign_keys = OFF');
324
+ db.exec('BEGIN');
325
+ try {
326
+ for (const { id, domain } of stale) {
327
+ // Delete all child tables referencing pages in this domain
328
+ const pageIds = db.prepare(
329
+ 'SELECT id FROM pages WHERE domain_id = ?'
330
+ ).all(id).map(r => r.id);
331
+
332
+ if (pageIds.length) {
333
+ const placeholders = pageIds.map(() => '?').join(',');
334
+ db.prepare(`DELETE FROM links WHERE source_id IN (${placeholders})`).run(...pageIds);
335
+ db.prepare(`DELETE FROM technical WHERE page_id IN (${placeholders})`).run(...pageIds);
336
+ db.prepare(`DELETE FROM headings WHERE page_id IN (${placeholders})`).run(...pageIds);
337
+ db.prepare(`DELETE FROM page_schemas WHERE page_id IN (${placeholders})`).run(...pageIds);
338
+ db.prepare(`DELETE FROM extractions WHERE page_id IN (${placeholders})`).run(...pageIds);
339
+ db.prepare(`DELETE FROM keywords WHERE page_id IN (${placeholders})`).run(...pageIds);
340
+ db.prepare(`DELETE FROM pages WHERE domain_id = ?`).run(id);
341
+ }
342
+
343
+ // Template groups for this domain
344
+ db.prepare(
345
+ 'DELETE FROM template_samples WHERE group_id IN (SELECT id FROM template_groups WHERE project = ? AND domain = ?)'
346
+ ).run(project, domain);
347
+ db.prepare(
348
+ 'DELETE FROM template_groups WHERE project = ? AND domain = ?'
349
+ ).run(project, domain);
350
+
351
+ db.prepare('DELETE FROM domains WHERE id = ?').run(id);
352
+ }
353
+ db.exec('COMMIT');
354
+ } catch (e) {
355
+ db.exec('ROLLBACK');
356
+ throw e;
357
+ } finally {
358
+ db.exec('PRAGMA foreign_keys = ON');
359
+ }
360
+
361
+ return stale.map(d => d.domain);
362
+ }
363
+
217
364
  export function getHeadingStructure(db, project) {
218
365
  return db.prepare(`
219
366
  SELECT d.domain, d.role, h.level, h.text
package/db/schema.sql CHANGED
@@ -23,6 +23,9 @@ CREATE TABLE IF NOT EXISTS pages (
23
23
  published_date TEXT, -- ISO string or null
24
24
  modified_date TEXT, -- ISO string or null
25
25
  content_hash TEXT, -- SHA-256 of body text for incremental crawling
26
+ title TEXT, -- page <title>
27
+ meta_desc TEXT, -- meta description
28
+ body_text TEXT, -- cleaned body text for extraction (stored at crawl time)
26
29
  FOREIGN KEY (domain_id) REFERENCES domains(id)
27
30
  );
28
31
 
@@ -110,6 +113,51 @@ CREATE TABLE IF NOT EXISTS page_schemas (
110
113
  extracted_at INTEGER NOT NULL
111
114
  );
112
115
 
116
+ -- Template analysis tables
117
+ CREATE TABLE IF NOT EXISTS template_groups (
118
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
119
+ project TEXT NOT NULL,
120
+ domain TEXT NOT NULL,
121
+ pattern TEXT NOT NULL,
122
+ url_count INTEGER NOT NULL,
123
+ sample_size INTEGER NOT NULL DEFAULT 0,
124
+ avg_word_count REAL,
125
+ content_similarity REAL,
126
+ dom_similarity REAL,
127
+ gsc_urls_with_impressions INTEGER DEFAULT 0,
128
+ gsc_total_clicks INTEGER DEFAULT 0,
129
+ gsc_total_impressions INTEGER DEFAULT 0,
130
+ gsc_avg_position REAL,
131
+ indexation_efficiency REAL,
132
+ score INTEGER,
133
+ verdict TEXT,
134
+ recommendation TEXT,
135
+ analyzed_at INTEGER NOT NULL,
136
+ UNIQUE(project, domain, pattern)
137
+ );
138
+
139
+ CREATE TABLE IF NOT EXISTS template_samples (
140
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
141
+ group_id INTEGER NOT NULL REFERENCES template_groups(id) ON DELETE CASCADE,
142
+ url TEXT NOT NULL,
143
+ sample_role TEXT NOT NULL,
144
+ status_code INTEGER,
145
+ word_count INTEGER,
146
+ title TEXT,
147
+ meta_desc TEXT,
148
+ has_canonical INTEGER DEFAULT 0,
149
+ has_schema INTEGER DEFAULT 0,
150
+ is_indexable INTEGER DEFAULT 1,
151
+ dom_fingerprint TEXT,
152
+ content_hash TEXT,
153
+ body_text TEXT,
154
+ crawled_at INTEGER,
155
+ UNIQUE(group_id, url)
156
+ );
157
+
158
+ CREATE INDEX IF NOT EXISTS idx_template_groups_project ON template_groups(project);
159
+ CREATE INDEX IF NOT EXISTS idx_template_samples_group ON template_samples(group_id);
160
+
113
161
  -- Indexes
114
162
  CREATE INDEX IF NOT EXISTS idx_pages_domain ON pages(domain_id);
115
163
  CREATE INDEX IF NOT EXISTS idx_keywords_page ON keywords(page_id);
package/lib/gate.js CHANGED
@@ -52,6 +52,7 @@ const FEATURE_NAMES = {
52
52
  'entities': 'Entity Coverage Analysis',
53
53
  'friction': 'Friction Point Analysis',
54
54
  'js-delta': 'JS Rendering Delta',
55
+ 'templates': 'Programmatic Template Intelligence',
55
56
  'html': 'HTML Dashboard',
56
57
  'html-all': 'HTML Dashboard (All Projects)',
57
58
  'gsc-insights': 'GSC Intelligence & Insights',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "seo-intel",
3
- "version": "1.1.6",
3
+ "version": "1.1.7",
4
4
  "description": "Local Ahrefs-style SEO competitor intelligence. Crawl → SQLite → cloud analysis.",
5
5
  "type": "module",
6
6
  "license": "SEE LICENSE IN LICENSE",
@@ -525,8 +525,13 @@ function buildHtmlTemplate(data, opts = {}) {
525
525
  white-space: nowrap;
526
526
  }
527
527
  .es-btn:hover { border-color: var(--accent-gold); color: var(--accent-gold); }
528
- .es-btn-stop { border-color: rgba(220,80,80,0.3); color: #dc5050; }
529
- .es-btn-stop:hover { border-color: #dc5050; color: #ff6b6b; background: rgba(220,80,80,0.08); }
528
+ .es-btn-stop { border-color: var(--border-card); color: var(--text-muted); }
529
+ .es-btn-stop:hover { border-color: var(--text-secondary); color: var(--text-secondary); }
530
+ .es-btn-stop.active { border-color: rgba(220,80,80,0.5); color: #dc5050; animation: stopPulse 2s ease-in-out infinite; }
531
+ .es-btn-stop.active:hover { border-color: #dc5050; color: #ff6b6b; background: rgba(220,80,80,0.08); }
532
+ @keyframes stopPulse { 0%,100% { border-color: rgba(220,80,80,0.3); } 50% { border-color: rgba(220,80,80,0.7); } }
533
+ .es-btn-restart { border-color: rgba(100,160,220,0.3); color: #6ca0dc; }
534
+ .es-btn-restart:hover { border-color: #6ca0dc; color: #8fc0f0; background: rgba(100,160,220,0.08); }
530
535
  .es-btn:disabled {
531
536
  opacity: 0.4; cursor: not-allowed;
532
537
  border-color: var(--border-card);
@@ -1950,9 +1955,12 @@ function buildHtmlTemplate(data, opts = {}) {
1950
1955
  <i class="fa-solid fa-brain"></i> Extract
1951
1956
  </button>`
1952
1957
  }
1953
- <button class="es-btn es-btn-stop" id="btnStop${suffix}" onclick="stopJob()" style="display:${extractionStatus.liveProgress?.status === 'running' ? 'inline-flex' : 'none'};">
1958
+ <button class="es-btn es-btn-stop${extractionStatus.liveProgress?.status === 'running' ? ' active' : ''}" id="btnStop${suffix}" onclick="stopJob()">
1954
1959
  <i class="fa-solid fa-stop"></i> Stop
1955
1960
  </button>
1961
+ <button class="es-btn es-btn-restart" id="btnRestart${suffix}" onclick="restartServer()">
1962
+ <i class="fa-solid fa-rotate-right"></i> Restart
1963
+ </button>
1956
1964
  <label class="es-stealth-toggle">
1957
1965
  <input type="checkbox" id="stealthToggle${suffix}"${extractionStatus.liveProgress?.stealth ? ' checked' : ''}>
1958
1966
  <i class="fa-solid fa-user-ninja"></i> Stealth
@@ -1981,7 +1989,8 @@ function buildHtmlTemplate(data, opts = {}) {
1981
1989
  ${pro ? `<button class="term-btn" data-cmd="extract" data-project="${project}"><i class="fa-solid fa-brain"></i> Extract</button>
1982
1990
  <button class="term-btn" data-cmd="analyze" data-project="${project}"><i class="fa-solid fa-chart-column"></i> Analyze</button>
1983
1991
  <button class="term-btn" data-cmd="brief" data-project="${project}"><i class="fa-solid fa-file-lines"></i> Brief</button>
1984
- <button class="term-btn" data-cmd="keywords" data-project="${project}"><i class="fa-solid fa-key"></i> Keywords</button>` : ''}
1992
+ <button class="term-btn" data-cmd="keywords" data-project="${project}"><i class="fa-solid fa-key"></i> Keywords</button>
1993
+ <button class="term-btn" data-cmd="templates" data-project="${project}"><i class="fa-solid fa-clone"></i> Templates</button>` : ''}
1985
1994
  <button class="term-btn" data-cmd="status" data-project=""><i class="fa-solid fa-circle-info"></i> Status</button>
1986
1995
  <button class="term-btn" data-cmd="guide" data-project="${project}"><i class="fa-solid fa-map"></i> Guide</button>
1987
1996
  <button class="term-btn" data-cmd="setup" data-project="" style="margin-left:auto;border-color:rgba(232,213,163,0.25);"><i class="fa-solid fa-gear"></i> Setup</button>
@@ -2084,7 +2093,10 @@ function buildHtmlTemplate(data, opts = {}) {
2084
2093
  if (extra?.stealth) params.set('stealth', 'true');
2085
2094
  if (extra?.format) params.set('format', extra.format);
2086
2095
 
2087
- appendLine('$ seo-intel ' + command + (proj ? ' ' + proj : '') + (extra?.scope ? ' --scope ' + extra.scope : ''), 'cmd');
2096
+ var stealthFlag = extra?.stealth ? ' --stealth' : '';
2097
+ appendLine('$ seo-intel ' + command + (proj ? ' ' + proj : '') + stealthFlag + (extra?.scope ? ' --scope ' + extra.scope : ''), 'cmd');
2098
+
2099
+ var isCrawlOrExtract = (command === 'crawl' || command === 'extract');
2088
2100
 
2089
2101
  eventSource = new EventSource('/api/terminal?' + params.toString());
2090
2102
  eventSource.onmessage = function(e) {
@@ -2101,29 +2113,56 @@ function buildHtmlTemplate(data, opts = {}) {
2101
2113
  status.style.color = code === 0 ? 'var(--color-success)' : 'var(--color-danger)';
2102
2114
  eventSource.close();
2103
2115
  eventSource = null;
2116
+ // Update status bar when crawl/extract finishes
2117
+ if (isCrawlOrExtract && window._setButtonsState) window._setButtonsState(false, null);
2104
2118
  }
2105
2119
  } catch (_) {}
2106
2120
  };
2107
2121
  eventSource.onerror = function() {
2108
2122
  if (running) {
2109
- appendLine('Connection lost.', 'error');
2123
+ // SSE disconnected but crawl/extract continues server-side
2124
+ if (isCrawlOrExtract) {
2125
+ appendLine('Terminal disconnected — job continues in background.', 'stderr');
2126
+ } else {
2127
+ appendLine('Connection lost.', 'error');
2128
+ }
2110
2129
  running = false;
2111
- status.textContent = 'disconnected';
2112
- status.style.color = 'var(--color-danger)';
2130
+ status.textContent = isCrawlOrExtract ? 'backgrounded' : 'disconnected';
2131
+ status.style.color = isCrawlOrExtract ? 'var(--text-muted)' : 'var(--color-danger)';
2113
2132
  }
2114
2133
  eventSource?.close();
2115
2134
  eventSource = null;
2116
2135
  };
2117
2136
  }
2118
2137
 
2119
- // Button clicks
2138
+ // Expose terminal for status bar buttons
2139
+ window._terminalRun = function(cmd, proj, extra) { runCommand(cmd, proj, extra); };
2140
+ window._terminalStop = function() {
2141
+ if (eventSource) { eventSource.close(); eventSource = null; }
2142
+ if (running) {
2143
+ appendLine('Stopped.', 'exit-err');
2144
+ running = false;
2145
+ status.textContent = 'stopped';
2146
+ status.style.color = 'var(--color-warning)';
2147
+ }
2148
+ };
2149
+
2150
+ // Button clicks — crawl/extract read stealth toggle
2120
2151
  document.querySelectorAll('.terminal-panel .term-btn').forEach(function(btn) {
2121
2152
  if (btn.closest('.terminal-panel') !== output.closest('.terminal-panel')) return;
2122
2153
  btn.addEventListener('click', function() {
2123
2154
  const cmd = btn.getAttribute('data-cmd');
2124
2155
  const proj = btn.getAttribute('data-project');
2125
2156
  const scope = btn.getAttribute('data-scope');
2126
- runCommand(cmd, proj, scope ? { scope } : undefined);
2157
+ var extra = scope ? { scope: scope } : {};
2158
+ // Crawl/extract: read stealth toggle + update status bar
2159
+ if (cmd === 'crawl' || cmd === 'extract') {
2160
+ var stealthEl = document.querySelector('[id^="stealthToggle"]');
2161
+ if (stealthEl?.checked) extra.stealth = true;
2162
+ if (window._setButtonsState) window._setButtonsState(true, cmd);
2163
+ if (window._startPolling) window._startPolling();
2164
+ }
2165
+ runCommand(cmd, proj, extra);
2127
2166
  });
2128
2167
  });
2129
2168
 
@@ -3558,41 +3597,55 @@ function buildHtmlTemplate(data, opts = {}) {
3558
3597
  let pollTimer = null;
3559
3598
 
3560
3599
  window.startJob = function(command, proj) {
3561
- const stealth = document.getElementById('stealthToggle' + sfx)?.checked || false;
3562
- const body = { project: proj, stealth: stealth };
3600
+ var stealth = document.getElementById('stealthToggle' + sfx)?.checked || false;
3601
+ var extra = {};
3602
+ if (stealth) extra.stealth = true;
3563
3603
 
3564
- fetch('/api/' + command, {
3565
- method: 'POST',
3566
- headers: { 'Content-Type': 'application/json' },
3567
- body: JSON.stringify(body),
3568
- })
3569
- .then(function(r) { return r.json(); })
3570
- .then(function(data) {
3571
- if (data.error) { alert('Cannot start: ' + data.error); return; }
3572
- setButtonsState(true, command);
3573
- startPolling();
3574
- })
3575
- .catch(function(err) { alert('Server error: ' + err.message); });
3604
+ // Route through terminal for visible output
3605
+ if (window._terminalRun) {
3606
+ window._terminalRun(command, proj, extra);
3607
+ }
3608
+ setButtonsState(true, command);
3609
+ startPolling();
3576
3610
  };
3577
3611
 
3578
3612
  window.stopJob = function() {
3613
+ // Close terminal SSE (server detaches crawl/extract, so we also hit /api/stop)
3614
+ if (window._terminalStop) window._terminalStop();
3579
3615
  fetch('/api/stop', { method: 'POST' })
3580
3616
  .then(function(r) { return r.json(); })
3581
- .then(function(data) {
3582
- if (data.stopped) {
3583
- setButtonsState(false, null);
3584
- }
3617
+ .then(function() { setButtonsState(false, null); })
3618
+ .catch(function() { setButtonsState(false, null); });
3619
+ };
3620
+
3621
+ window.restartServer = function() {
3622
+ if (!confirm('Restart SEO Intel? This will stop any running jobs and refresh the dashboard.')) return;
3623
+ var btnR = document.getElementById('btnRestart' + sfx);
3624
+ if (btnR) { btnR.disabled = true; btnR.innerHTML = '<i class="fa-solid fa-spinner fa-spin"></i> Restarting\u2026'; }
3625
+ // Stop terminal SSE
3626
+ if (window._terminalStop) window._terminalStop();
3627
+ fetch('/api/restart', { method: 'POST' })
3628
+ .then(function() {
3629
+ // Server is restarting — wait a moment then reload
3630
+ setTimeout(function() { window.location.reload(); }, 2000);
3585
3631
  })
3586
- .catch(function(err) { alert('Stop failed: ' + err.message); });
3632
+ .catch(function() {
3633
+ // Server might already be dead — try reloading anyway
3634
+ setTimeout(function() { window.location.reload(); }, 2000);
3635
+ });
3587
3636
  };
3588
3637
 
3589
- function setButtonsState(disabled, activeCmd) {
3638
+ // Expose for terminal IIFE to call back
3639
+ window._setButtonsState = setButtonsState;
3640
+ window._startPolling = startPolling;
3641
+
3642
+ function setButtonsState(isRunning, activeCmd) {
3590
3643
  var btnC = document.getElementById('btnCrawl' + sfx);
3591
3644
  var btnE = document.getElementById('btnExtract' + sfx);
3592
3645
  var btnS = document.getElementById('btnStop' + sfx);
3593
3646
  if (btnC) {
3594
- btnC.disabled = disabled;
3595
- if (disabled && activeCmd === 'crawl') {
3647
+ btnC.disabled = isRunning;
3648
+ if (isRunning && activeCmd === 'crawl') {
3596
3649
  btnC.classList.add('running');
3597
3650
  btnC.innerHTML = '<i class="fa-solid fa-spinner fa-spin"></i> Crawling\u2026';
3598
3651
  } else {
@@ -3601,8 +3654,8 @@ function buildHtmlTemplate(data, opts = {}) {
3601
3654
  }
3602
3655
  }
3603
3656
  if (btnE) {
3604
- btnE.disabled = disabled;
3605
- if (disabled && activeCmd === 'extract') {
3657
+ btnE.disabled = isRunning;
3658
+ if (isRunning && activeCmd === 'extract') {
3606
3659
  btnE.classList.add('running');
3607
3660
  btnE.innerHTML = '<i class="fa-solid fa-spinner fa-spin"></i> Extracting\u2026';
3608
3661
  } else {
@@ -3611,7 +3664,12 @@ function buildHtmlTemplate(data, opts = {}) {
3611
3664
  }
3612
3665
  }
3613
3666
  if (btnS) {
3614
- btnS.style.display = disabled ? 'inline-flex' : 'none';
3667
+ // Stop button always visible turns red+pulsing when something is running
3668
+ if (isRunning) {
3669
+ btnS.classList.add('active');
3670
+ } else {
3671
+ btnS.classList.remove('active');
3672
+ }
3615
3673
  }
3616
3674
  }
3617
3675
 
@@ -4762,50 +4820,57 @@ function buildMultiHtmlTemplate(allProjectData) {
4762
4820
  window.startJob = function(command, proj) {
4763
4821
  var sfx = '-' + proj;
4764
4822
  var stealth = document.getElementById('stealthToggle' + sfx)?.checked || false;
4765
- fetch('/api/' + command, {
4766
- method: 'POST',
4767
- headers: { 'Content-Type': 'application/json' },
4768
- body: JSON.stringify({ project: proj, stealth: stealth }),
4769
- })
4770
- .then(function(r) { return r.json(); })
4771
- .then(function(data) {
4772
- if (data.error) { alert('Cannot start: ' + data.error); return; }
4773
- setButtonsState(true, command);
4774
- startPolling();
4775
- })
4776
- .catch(function(err) { alert('Server error: ' + err.message); });
4823
+ var extra = {};
4824
+ if (stealth) extra.stealth = true;
4825
+
4826
+ // Route through terminal for visible output
4827
+ if (window._terminalRun) {
4828
+ window._terminalRun(command, proj, extra);
4829
+ }
4830
+ setButtonsState(true, command);
4831
+ startPolling();
4777
4832
  };
4778
4833
 
4779
4834
  window.stopJob = function() {
4835
+ if (window._terminalStop) window._terminalStop();
4780
4836
  fetch('/api/stop', { method: 'POST' })
4781
4837
  .then(function(r) { return r.json(); })
4782
- .then(function(data) {
4783
- if (data.stopped) setButtonsState(false, null);
4784
- })
4785
- .catch(function(err) { alert('Stop failed: ' + err.message); });
4838
+ .then(function() { setButtonsState(false, null); })
4839
+ .catch(function() { setButtonsState(false, null); });
4840
+ };
4841
+
4842
+ window.restartServer = function() {
4843
+ if (!confirm('Restart SEO Intel? This will stop any running jobs and refresh the dashboard.')) return;
4844
+ if (window._terminalStop) window._terminalStop();
4845
+ fetch('/api/restart', { method: 'POST' })
4846
+ .then(function() { setTimeout(function() { window.location.reload(); }, 2000); })
4847
+ .catch(function() { setTimeout(function() { window.location.reload(); }, 2000); });
4786
4848
  };
4787
4849
 
4788
- function setButtonsState(disabled, activeCmd) {
4850
+ window._setButtonsState = setButtonsState;
4851
+ window._startPolling = startPolling;
4852
+
4853
+ function setButtonsState(isRunning, activeCmd) {
4789
4854
  var sfx = '-' + currentProject;
4790
4855
  var btnC = document.getElementById('btnCrawl' + sfx);
4791
4856
  var btnE = document.getElementById('btnExtract' + sfx);
4792
4857
  var btnS = document.getElementById('btnStop' + sfx);
4793
4858
  if (btnC) {
4794
- btnC.disabled = disabled;
4795
- btnC.classList.toggle('running', disabled && activeCmd === 'crawl');
4796
- btnC.innerHTML = disabled && activeCmd === 'crawl'
4859
+ btnC.disabled = isRunning;
4860
+ btnC.classList.toggle('running', isRunning && activeCmd === 'crawl');
4861
+ btnC.innerHTML = isRunning && activeCmd === 'crawl'
4797
4862
  ? '<i class="fa-solid fa-spinner fa-spin"></i> Crawling\u2026'
4798
4863
  : '<i class="fa-solid fa-spider"></i> Crawl';
4799
4864
  }
4800
4865
  if (btnE) {
4801
- btnE.disabled = disabled;
4802
- btnE.classList.toggle('running', disabled && activeCmd === 'extract');
4803
- btnE.innerHTML = disabled && activeCmd === 'extract'
4866
+ btnE.disabled = isRunning;
4867
+ btnE.classList.toggle('running', isRunning && activeCmd === 'extract');
4868
+ btnE.innerHTML = isRunning && activeCmd === 'extract'
4804
4869
  ? '<i class="fa-solid fa-spinner fa-spin"></i> Extracting\u2026'
4805
4870
  : '<i class="fa-solid fa-brain"></i> Extract';
4806
4871
  }
4807
4872
  if (btnS) {
4808
- btnS.style.display = disabled ? 'inline-flex' : 'none';
4873
+ if (isRunning) { btnS.classList.add('active'); } else { btnS.classList.remove('active'); }
4809
4874
  }
4810
4875
  }
4811
4876
 
package/server.js CHANGED
@@ -297,7 +297,7 @@ async function handleRequest(req, res) {
297
297
  if (req.method === 'POST' && path === '/api/extract') {
298
298
  try {
299
299
  const body = await readBody(req);
300
- const { project, stealth } = body;
300
+ const { project } = body;
301
301
  if (!project) { json(res, 400, { error: 'Missing project' }); return; }
302
302
 
303
303
  // Conflict guard
@@ -308,7 +308,6 @@ async function handleRequest(req, res) {
308
308
  }
309
309
 
310
310
  const args = ['cli.js', 'extract', project];
311
- if (stealth) args.push('--stealth');
312
311
 
313
312
  const child = spawn(process.execPath, args, {
314
313
  cwd: __dirname,
@@ -372,6 +371,38 @@ async function handleRequest(req, res) {
372
371
  return;
373
372
  }
374
373
 
374
+ // ─── API: Restart — kill running jobs + restart server ───
375
+ if (req.method === 'POST' && path === '/api/restart') {
376
+ try {
377
+ // 1. Kill any running job
378
+ const progress = readProgress();
379
+ if (progress?.status === 'running' && progress.pid) {
380
+ try { process.kill(progress.pid, 'SIGTERM'); } catch {}
381
+ try {
382
+ writeFileSync(PROGRESS_FILE, JSON.stringify({
383
+ ...progress, status: 'stopped', stopped_at: Date.now(), updated_at: Date.now(),
384
+ }, null, 2));
385
+ } catch {}
386
+ }
387
+ json(res, 200, { restarting: true });
388
+
389
+ // 2. Restart the server process after response is sent
390
+ setTimeout(() => {
391
+ const child = spawn(process.execPath, [fileURLToPath(import.meta.url), ...process.argv.slice(2)], {
392
+ cwd: __dirname,
393
+ detached: true,
394
+ stdio: 'ignore',
395
+ env: { ...process.env, SEO_INTEL_AUTO_OPEN: '0' },
396
+ });
397
+ child.unref();
398
+ process.exit(0);
399
+ }, 300);
400
+ } catch (e) {
401
+ json(res, 500, { error: e.message });
402
+ }
403
+ return;
404
+ }
405
+
375
406
  // ─── API: Export actions ───
376
407
  if (req.method === 'POST' && path === '/api/export-actions') {
377
408
  try {
@@ -538,7 +569,7 @@ async function handleRequest(req, res) {
538
569
  // Whitelist allowed commands
539
570
  const ALLOWED = ['crawl', 'extract', 'analyze', 'export-actions', 'competitive-actions',
540
571
  'suggest-usecases', 'html', 'status', 'brief', 'keywords', 'report', 'guide',
541
- 'schemas', 'headings-audit', 'orphans', 'entities', 'friction', 'shallow', 'decay', 'export'];
572
+ 'schemas', 'headings-audit', 'orphans', 'entities', 'friction', 'shallow', 'decay', 'export', 'templates'];
542
573
 
543
574
  if (!command || !ALLOWED.includes(command)) {
544
575
  json(res, 400, { error: `Invalid command. Allowed: ${ALLOWED.join(', ')}` });
@@ -564,14 +595,21 @@ async function handleRequest(req, res) {
564
595
  res.write(`data: ${JSON.stringify({ type, data })}\n\n`);
565
596
  };
566
597
 
598
+ const isLongRunning = ['crawl', 'extract'].includes(command);
599
+
567
600
  send('start', { command, project, args: args.slice(1) });
568
601
 
569
602
  const child = spawn(process.execPath, args, {
570
603
  cwd: __dirname,
571
604
  env: { ...process.env, FORCE_COLOR: '0', NO_COLOR: '1' },
605
+ // Crawl/extract: detach so they survive SSE disconnect
606
+ ...(isLongRunning ? { detached: true } : {}),
572
607
  });
573
608
 
609
+ let clientClosed = false;
610
+
574
611
  child.stdout.on('data', chunk => {
612
+ if (clientClosed) return;
575
613
  const lines = chunk.toString().split('\n');
576
614
  for (const line of lines) {
577
615
  if (line) send('stdout', line);
@@ -579,6 +617,7 @@ async function handleRequest(req, res) {
579
617
  });
580
618
 
581
619
  child.stderr.on('data', chunk => {
620
+ if (clientClosed) return;
582
621
  const lines = chunk.toString().split('\n');
583
622
  for (const line of lines) {
584
623
  if (line) send('stderr', line);
@@ -586,18 +625,24 @@ async function handleRequest(req, res) {
586
625
  });
587
626
 
588
627
  child.on('error', err => {
589
- send('error', err.message);
590
- res.end();
628
+ if (!clientClosed) { send('error', err.message); res.end(); }
591
629
  });
592
630
 
593
631
  child.on('close', code => {
594
- send('exit', { code });
595
- res.end();
632
+ if (!clientClosed) { send('exit', { code }); res.end(); }
596
633
  });
597
634
 
598
- // Kill child if client disconnects
635
+ // Client disconnect: kill short commands, let crawl/extract continue
599
636
  req.on('close', () => {
600
- if (!child.killed) child.kill();
637
+ clientClosed = true;
638
+ if (isLongRunning) {
639
+ // Detach — crawl/extract keeps running, progress file tracks it
640
+ child.unref();
641
+ if (child.stdout) child.stdout.destroy();
642
+ if (child.stderr) child.stderr.destroy();
643
+ } else {
644
+ if (!child.killed) child.kill();
645
+ }
601
646
  });
602
647
 
603
648
  return;