seo-intel 1.1.6 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +270 -88
- package/crawler/index.js +6 -1
- package/db/db.js +152 -5
- package/db/schema.sql +48 -0
- package/lib/gate.js +1 -0
- package/package.json +1 -1
- package/reports/generate-html.js +124 -59
- package/server.js +54 -9
package/cli.js
CHANGED
|
@@ -24,11 +24,11 @@ import { getNextCrawlTarget, needsAnalysis, getCrawlStatus, loadAllConfigs } fro
|
|
|
24
24
|
import {
|
|
25
25
|
getDb, upsertDomain, upsertPage, insertExtraction,
|
|
26
26
|
insertKeywords, insertHeadings, insertLinks, insertPageSchemas,
|
|
27
|
-
upsertTechnical,
|
|
27
|
+
upsertTechnical, pruneStaleDomains,
|
|
28
28
|
getCompetitorSummary, getKeywordMatrix, getHeadingStructure,
|
|
29
29
|
getPageHash, getSchemasByProject
|
|
30
30
|
} from './db/db.js';
|
|
31
|
-
import {
|
|
31
|
+
import { generateMultiDashboard } from './reports/generate-html.js';
|
|
32
32
|
import { buildTechnicalActions } from './exports/technical.js';
|
|
33
33
|
import { buildCompetitiveActions } from './exports/competitive.js';
|
|
34
34
|
import { buildSuggestiveActions } from './exports/suggestive.js';
|
|
@@ -393,6 +393,21 @@ program
|
|
|
393
393
|
}
|
|
394
394
|
}
|
|
395
395
|
|
|
396
|
+
// ── Prune stale domains (DB entries no longer in config) ─────────────
|
|
397
|
+
{
|
|
398
|
+
const configDomains = new Set([
|
|
399
|
+
config.target?.domain,
|
|
400
|
+
...(config.owned || []).map(o => o.domain),
|
|
401
|
+
...(config.competitors || []).map(c => c.domain),
|
|
402
|
+
].filter(Boolean));
|
|
403
|
+
|
|
404
|
+
const pruned = pruneStaleDomains(db, project, configDomains);
|
|
405
|
+
if (pruned.length) {
|
|
406
|
+
console.log(chalk.yellow(`\n 🧹 Pruned ${pruned.length} stale domain(s) from DB (no longer in config):`));
|
|
407
|
+
for (const d of pruned) console.log(chalk.dim(` − ${d}`));
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
|
|
396
411
|
// ── Tier gate: Free tier = crawl-only, no AI extraction ──────────────
|
|
397
412
|
if (opts.extract !== false && !isPro()) {
|
|
398
413
|
console.log(chalk.dim('\n ℹ Free tier: crawl-only mode (AI extraction requires Solo/Agency)'));
|
|
@@ -488,6 +503,9 @@ program
|
|
|
488
503
|
publishedDate: page.publishedDate || null,
|
|
489
504
|
modifiedDate: page.modifiedDate || null,
|
|
490
505
|
contentHash: page.contentHash || null,
|
|
506
|
+
title: page.title || null,
|
|
507
|
+
metaDesc: page.metaDesc || null,
|
|
508
|
+
bodyText: page.fullBodyText || page.bodyText || null,
|
|
491
509
|
});
|
|
492
510
|
const pageId = pageRes?.id;
|
|
493
511
|
|
|
@@ -579,9 +597,10 @@ program
|
|
|
579
597
|
if (totalSkipped > 0) console.log(chalk.blue(`\n📊 Incremental: ${totalSkipped} unchanged pages skipped (same content hash)`));
|
|
580
598
|
if (totalBlocked > 0) console.log(chalk.red(`\n⛔ ${totalBlocked} domain(s) blocked (rate-limited or WAF)`));
|
|
581
599
|
const elapsed = ((Date.now() - crawlStart) / 1000).toFixed(1);
|
|
582
|
-
// Auto-regenerate dashboard
|
|
600
|
+
// Auto-regenerate dashboard (always multi-project so all projects stay current)
|
|
583
601
|
try {
|
|
584
|
-
const
|
|
602
|
+
const allConfigs = loadAllConfigs();
|
|
603
|
+
const dashPath = generateMultiDashboard(db, allConfigs);
|
|
585
604
|
console.log(chalk.dim(` 📊 Dashboard refreshed → ${dashPath}`));
|
|
586
605
|
} catch (dashErr) {
|
|
587
606
|
console.log(chalk.dim(` ⚠ Dashboard refresh skipped: ${dashErr.message}`));
|
|
@@ -697,9 +716,10 @@ program
|
|
|
697
716
|
// Print summary
|
|
698
717
|
printAnalysisSummary(analysis, project);
|
|
699
718
|
|
|
700
|
-
// Auto-regenerate dashboard
|
|
719
|
+
// Auto-regenerate dashboard (always multi-project so all projects stay current)
|
|
701
720
|
try {
|
|
702
|
-
const
|
|
721
|
+
const allConfigs = loadAllConfigs();
|
|
722
|
+
const dashPath = generateMultiDashboard(db, allConfigs);
|
|
703
723
|
console.log(chalk.dim(` 📊 Dashboard refreshed → ${dashPath}`));
|
|
704
724
|
} catch (dashErr) {
|
|
705
725
|
console.log(chalk.dim(` ⚠ Dashboard refresh skipped: ${dashErr.message}`));
|
|
@@ -1074,6 +1094,9 @@ program
|
|
|
1074
1094
|
publishedDate: page.publishedDate || null,
|
|
1075
1095
|
modifiedDate: page.modifiedDate || null,
|
|
1076
1096
|
contentHash: page.contentHash || null,
|
|
1097
|
+
title: page.title || null,
|
|
1098
|
+
metaDesc: page.metaDesc || null,
|
|
1099
|
+
bodyText: page.fullBodyText || page.bodyText || null,
|
|
1077
1100
|
});
|
|
1078
1101
|
const pageId = pageRes?.id;
|
|
1079
1102
|
|
|
@@ -1379,6 +1402,7 @@ program
|
|
|
1379
1402
|
.option('--add-owned <domain>', 'Add an owned subdomain')
|
|
1380
1403
|
.option('--remove-owned <domain>', 'Remove an owned subdomain')
|
|
1381
1404
|
.option('--set-target <domain>', 'Change the target domain')
|
|
1405
|
+
.option('--prune', 'Remove DB data for domains no longer in config')
|
|
1382
1406
|
.action((project, opts) => {
|
|
1383
1407
|
const configPath = join(__dirname, `config/${project}.json`);
|
|
1384
1408
|
let config;
|
|
@@ -1471,6 +1495,24 @@ program
|
|
|
1471
1495
|
console.log(chalk.dim(`\n Saved → config/${project}.json`));
|
|
1472
1496
|
}
|
|
1473
1497
|
|
|
1498
|
+
// ── Prune stale DB data (auto on remove, or manual --prune) ─────────
|
|
1499
|
+
if (modified || opts.prune) {
|
|
1500
|
+
const db = getDb();
|
|
1501
|
+
const configDomains = new Set([
|
|
1502
|
+
config.target?.domain,
|
|
1503
|
+
...(config.owned || []).map(o => o.domain),
|
|
1504
|
+
...(config.competitors || []).map(c => c.domain),
|
|
1505
|
+
].filter(Boolean));
|
|
1506
|
+
|
|
1507
|
+
const pruned = pruneStaleDomains(db, project, configDomains);
|
|
1508
|
+
if (pruned.length) {
|
|
1509
|
+
console.log(chalk.yellow(`\n 🧹 Pruned ${pruned.length} stale domain(s) from DB:`));
|
|
1510
|
+
for (const d of pruned) console.log(chalk.dim(` − ${d}`));
|
|
1511
|
+
} else if (opts.prune) {
|
|
1512
|
+
console.log(chalk.dim('\n ✓ No stale domains to prune'));
|
|
1513
|
+
}
|
|
1514
|
+
}
|
|
1515
|
+
|
|
1474
1516
|
// ── Always show current config
|
|
1475
1517
|
console.log(chalk.bold.cyan(`\n 📋 ${project} — Domain Configuration\n`));
|
|
1476
1518
|
console.log(chalk.white(' Target:'));
|
|
@@ -1560,13 +1602,14 @@ async function runAnalysis(project, db) {
|
|
|
1560
1602
|
program
|
|
1561
1603
|
.command('extract <project>')
|
|
1562
1604
|
.description('Run AI extraction on all crawled-but-not-yet-extracted pages (requires Solo/Agency)')
|
|
1563
|
-
.
|
|
1564
|
-
.action(async (project, opts) => {
|
|
1605
|
+
.action(async (project) => {
|
|
1565
1606
|
if (!requirePro('extract')) return;
|
|
1566
1607
|
const db = getDb();
|
|
1608
|
+
|
|
1609
|
+
// Query pages that have body_text stored (from crawl) but no extraction yet
|
|
1567
1610
|
const pendingPages = db.prepare(`
|
|
1568
|
-
SELECT p.id, p.url, p.word_count,
|
|
1569
|
-
|
|
1611
|
+
SELECT p.id, p.url, p.word_count, p.title, p.meta_desc, p.body_text,
|
|
1612
|
+
p.published_date, p.modified_date
|
|
1570
1613
|
FROM pages p
|
|
1571
1614
|
JOIN domains d ON d.id = p.domain_id
|
|
1572
1615
|
LEFT JOIN extractions e ON e.page_id = p.id
|
|
@@ -1578,102 +1621,241 @@ program
|
|
|
1578
1621
|
process.exit(0);
|
|
1579
1622
|
}
|
|
1580
1623
|
|
|
1581
|
-
|
|
1582
|
-
|
|
1624
|
+
// Check how many have body_text stored vs need re-crawl
|
|
1625
|
+
const withContent = pendingPages.filter(r => r.body_text);
|
|
1626
|
+
const needsRecrawl = pendingPages.length - withContent.length;
|
|
1627
|
+
|
|
1628
|
+
console.log(chalk.bold.cyan(`\n⚙️ Extracting ${pendingPages.length} pages for ${project} via Qwen...\n`));
|
|
1629
|
+
if (needsRecrawl > 0) {
|
|
1630
|
+
console.log(chalk.yellow(` ⚠ ${needsRecrawl} pages have no stored content (crawled before v1.1.6). Re-crawl to populate.\n`));
|
|
1631
|
+
}
|
|
1583
1632
|
|
|
1584
1633
|
const extractStart = Date.now();
|
|
1585
|
-
let done = 0, failed = 0;
|
|
1634
|
+
let done = 0, failed = 0, skipped = 0;
|
|
1635
|
+
|
|
1636
|
+
// ── Pre-extract template grouping: sample N per group, skip the rest ──
|
|
1637
|
+
const SAMPLE_PER_GROUP = 5;
|
|
1638
|
+
const MIN_GROUP_FOR_SAMPLING = 10;
|
|
1639
|
+
let extractQueue = pendingPages.filter(r => r.body_text); // only pages with stored content
|
|
1640
|
+
|
|
1641
|
+
try {
|
|
1642
|
+
const { clusterUrls } = await import('./analyses/templates/cluster.js');
|
|
1643
|
+
const { groups } = clusterUrls(
|
|
1644
|
+
extractQueue.map(r => ({ url: r.url })),
|
|
1645
|
+
{ minGroupSize: MIN_GROUP_FOR_SAMPLING }
|
|
1646
|
+
);
|
|
1647
|
+
|
|
1648
|
+
if (groups.length > 0) {
|
|
1649
|
+
const skipUrls = new Set();
|
|
1586
1650
|
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1651
|
+
for (const group of groups) {
|
|
1652
|
+
const urls = group.urls;
|
|
1653
|
+
if (urls.length <= SAMPLE_PER_GROUP) continue;
|
|
1654
|
+
|
|
1655
|
+
const sampleSet = new Set();
|
|
1656
|
+
sampleSet.add(urls[0]); sampleSet.add(urls[1]);
|
|
1657
|
+
sampleSet.add(urls[urls.length - 1]); sampleSet.add(urls[urls.length - 2]);
|
|
1658
|
+
sampleSet.add(urls[Math.floor(urls.length / 2)]);
|
|
1659
|
+
|
|
1660
|
+
const skippedCount = urls.length - sampleSet.size;
|
|
1661
|
+
for (const u of urls) {
|
|
1662
|
+
if (!sampleSet.has(u)) skipUrls.add(u);
|
|
1663
|
+
}
|
|
1664
|
+
console.log(chalk.yellow(` [template] ${group.pattern} → ${urls.length} pages, sampling ${sampleSet.size}, skipping ${skippedCount}`));
|
|
1665
|
+
}
|
|
1666
|
+
|
|
1667
|
+
if (skipUrls.size > 0) {
|
|
1668
|
+
extractQueue = extractQueue.filter(r => !skipUrls.has(r.url));
|
|
1669
|
+
skipped += skipUrls.size;
|
|
1670
|
+
console.log(chalk.yellow(` [template] ${withContent.length} extractable → ${extractQueue.length} to extract (${skipUrls.size} template-skipped)\n`));
|
|
1671
|
+
}
|
|
1672
|
+
}
|
|
1673
|
+
} catch (e) {
|
|
1674
|
+
console.log(chalk.gray(` [template] Pattern detection skipped: ${e.message}`));
|
|
1593
1675
|
}
|
|
1594
1676
|
|
|
1595
|
-
//
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1677
|
+
// ── Consecutive failure tracking per URL pattern ──
|
|
1678
|
+
const CONSEC_FAIL_THRESHOLD = 3;
|
|
1679
|
+
const patternFailCounts = new Map();
|
|
1680
|
+
const skippedPatterns = new Set();
|
|
1681
|
+
|
|
1682
|
+
function getPatternKey(url) {
|
|
1683
|
+
try {
|
|
1684
|
+
const u = new URL(url);
|
|
1685
|
+
const parts = u.pathname.split('/').filter(Boolean);
|
|
1686
|
+
return u.hostname + '/' + parts.map(p =>
|
|
1687
|
+
(p.length > 20 || /^[0-9a-fA-F]{8,}$/.test(p) || /^0x/.test(p) || /[-_]/.test(p)) ? '{var}' : p
|
|
1688
|
+
).join('/');
|
|
1689
|
+
} catch { return url; }
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
// ── Content similarity detection ──
|
|
1693
|
+
const SIMILARITY_THRESHOLD = 0.80;
|
|
1694
|
+
const SIMILARITY_SAMPLE_SIZE = 3;
|
|
1695
|
+
const patternFingerprints = new Map();
|
|
1696
|
+
|
|
1697
|
+
function textToShingles(text, n = 3) {
|
|
1698
|
+
const words = (text || '').toLowerCase().replace(/[^a-z0-9\s]/g, '').split(/\s+/).filter(Boolean);
|
|
1699
|
+
const shingles = new Set();
|
|
1700
|
+
for (let i = 0; i <= words.length - n; i++) {
|
|
1701
|
+
shingles.add(words.slice(i, i + n).join(' '));
|
|
1702
|
+
}
|
|
1703
|
+
return shingles;
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
function jaccardSimilarity(a, b) {
|
|
1707
|
+
if (!a.size || !b.size) return 0;
|
|
1708
|
+
let intersection = 0;
|
|
1709
|
+
for (const s of a) { if (b.has(s)) intersection++; }
|
|
1710
|
+
return intersection / (a.size + b.size - intersection);
|
|
1711
|
+
}
|
|
1712
|
+
|
|
1713
|
+
function checkPatternSimilarity(patKey, newShingles) {
|
|
1714
|
+
if (!patternFingerprints.has(patKey)) patternFingerprints.set(patKey, []);
|
|
1715
|
+
const fps = patternFingerprints.get(patKey);
|
|
1716
|
+
fps.push(newShingles);
|
|
1717
|
+
if (fps.length < SIMILARITY_SAMPLE_SIZE || fps.length > SIMILARITY_SAMPLE_SIZE) return false;
|
|
1718
|
+
for (let i = 0; i < fps.length; i++) {
|
|
1719
|
+
for (let j = i + 1; j < fps.length; j++) {
|
|
1720
|
+
if (jaccardSimilarity(fps[i], fps[j]) < SIMILARITY_THRESHOLD) return false;
|
|
1721
|
+
}
|
|
1600
1722
|
}
|
|
1601
|
-
|
|
1723
|
+
return true;
|
|
1724
|
+
}
|
|
1602
1725
|
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
if (opts.stealth) process.stdout.write(chalk.magenta('stealth '));
|
|
1607
|
-
process.stdout.write(chalk.gray('fetching...'));
|
|
1608
|
-
|
|
1609
|
-
writeProgress({
|
|
1610
|
-
status: 'running', command: 'extract', project,
|
|
1611
|
-
current_url: row.url,
|
|
1612
|
-
page_index: done + failed + 1, total: pendingPages.length,
|
|
1613
|
-
percent: Math.round(((done + failed) / pendingPages.length) * 100),
|
|
1614
|
-
started_at: extractStart, failed,
|
|
1615
|
-
stealth: !!opts.stealth,
|
|
1616
|
-
});
|
|
1726
|
+
// ── Prepare headings + schema queries (per-page lookups from DB) ──
|
|
1727
|
+
const getHeadings = db.prepare('SELECT level, text FROM headings WHERE page_id = ? ORDER BY id');
|
|
1728
|
+
const getSchemaTypes = db.prepare('SELECT DISTINCT schema_type FROM page_schemas WHERE page_id = ?');
|
|
1617
1729
|
|
|
1618
|
-
|
|
1619
|
-
|
|
1730
|
+
const totalToProcess = extractQueue.length;
|
|
1731
|
+
console.log(chalk.gray(` 📖 Reading from DB — no network needed\n`));
|
|
1620
1732
|
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
const crawled = await crawlAll(row.url);
|
|
1628
|
-
pageData = crawled[0] || null;
|
|
1629
|
-
}
|
|
1733
|
+
for (const row of extractQueue) {
|
|
1734
|
+
const patKey = getPatternKey(row.url);
|
|
1735
|
+
if (skippedPatterns.has(patKey)) {
|
|
1736
|
+
skipped++;
|
|
1737
|
+
continue;
|
|
1738
|
+
}
|
|
1630
1739
|
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
failed++;
|
|
1635
|
-
if (stealthSession) {
|
|
1636
|
-
// Jittered delay even on failure — don't hammer a blocking site
|
|
1637
|
-
await new Promise(r => setTimeout(r, 1500 + Math.random() * 2000));
|
|
1638
|
-
}
|
|
1639
|
-
continue;
|
|
1640
|
-
}
|
|
1740
|
+
const pos = done + failed + 1;
|
|
1741
|
+
process.stdout.write(chalk.gray(` [${pos}/${totalToProcess}] ${row.url.slice(0, 70)} → `));
|
|
1742
|
+
process.stdout.write(chalk.gray('extracting...'));
|
|
1641
1743
|
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
insertLinks(db, row.id, pageData.links);
|
|
1652
|
-
if (pageData.parsedSchemas?.length) insertPageSchemas(db, row.id, pageData.parsedSchemas);
|
|
1653
|
-
}
|
|
1744
|
+
writeProgress({
|
|
1745
|
+
status: 'running', command: 'extract', project,
|
|
1746
|
+
current_url: row.url,
|
|
1747
|
+
page_index: pos, total: totalToProcess,
|
|
1748
|
+
percent: Math.round(((done + failed) / totalToProcess) * 100),
|
|
1749
|
+
started_at: extractStart, failed, skipped,
|
|
1750
|
+
});
|
|
1751
|
+
|
|
1752
|
+
let pageFailed = false;
|
|
1654
1753
|
|
|
1655
|
-
|
|
1754
|
+
try {
|
|
1755
|
+
// Read headings + schema types from DB
|
|
1756
|
+
const headings = getHeadings.all(row.id);
|
|
1757
|
+
const schemaTypes = getSchemaTypes.all(row.id).map(r => r.schema_type);
|
|
1758
|
+
|
|
1759
|
+
const extractFn = await getExtractPage();
|
|
1760
|
+
const extraction = await extractFn({
|
|
1761
|
+
url: row.url,
|
|
1762
|
+
title: row.title || '',
|
|
1763
|
+
metaDesc: row.meta_desc || '',
|
|
1764
|
+
headings,
|
|
1765
|
+
bodyText: row.body_text,
|
|
1766
|
+
schemaTypes,
|
|
1767
|
+
publishedDate: row.published_date,
|
|
1768
|
+
modifiedDate: row.modified_date,
|
|
1769
|
+
});
|
|
1770
|
+
insertExtraction(db, { pageId: row.id, data: extraction });
|
|
1771
|
+
insertKeywords(db, row.id, extraction.keywords);
|
|
1772
|
+
|
|
1773
|
+
const isDegraded = extraction.extraction_source === 'degraded';
|
|
1774
|
+
if (isDegraded) {
|
|
1775
|
+
process.stdout.write(chalk.yellow(` ⚠ degraded\n`));
|
|
1656
1776
|
done++;
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1777
|
+
pageFailed = true;
|
|
1778
|
+
} else {
|
|
1779
|
+
process.stdout.write(chalk.green(` ✓\n`));
|
|
1780
|
+
done++;
|
|
1781
|
+
patternFailCounts.set(patKey, 0);
|
|
1660
1782
|
}
|
|
1661
1783
|
|
|
1662
|
-
//
|
|
1663
|
-
if (
|
|
1664
|
-
|
|
1784
|
+
// ── Content similarity detection ──
|
|
1785
|
+
if (row.body_text.length > 50) {
|
|
1786
|
+
const shingles = textToShingles(row.body_text);
|
|
1787
|
+
if (checkPatternSimilarity(patKey, shingles) && !skippedPatterns.has(patKey)) {
|
|
1788
|
+
const remaining = extractQueue.filter(r => getPatternKey(r.url) === patKey).length - (patternFingerprints.get(patKey)?.length || 0);
|
|
1789
|
+
skippedPatterns.add(patKey);
|
|
1790
|
+
if (remaining > 0) {
|
|
1791
|
+
console.log(chalk.yellow(` [similarity] 🔍 ${SIMILARITY_SAMPLE_SIZE} pages from ${patKey} are ${Math.round(SIMILARITY_THRESHOLD * 100)}%+ identical — skipping ${remaining} remaining`));
|
|
1792
|
+
}
|
|
1793
|
+
}
|
|
1665
1794
|
}
|
|
1795
|
+
} catch (err) {
|
|
1796
|
+
process.stdout.write(chalk.red(` ✗ ${err.message}\n`));
|
|
1797
|
+
failed++;
|
|
1798
|
+
pageFailed = true;
|
|
1666
1799
|
}
|
|
1667
|
-
|
|
1668
|
-
//
|
|
1669
|
-
if (
|
|
1670
|
-
|
|
1671
|
-
|
|
1800
|
+
|
|
1801
|
+
// ── Track consecutive failures per pattern ──
|
|
1802
|
+
if (pageFailed) {
|
|
1803
|
+
const count = (patternFailCounts.get(patKey) || 0) + 1;
|
|
1804
|
+
patternFailCounts.set(patKey, count);
|
|
1805
|
+
if (count >= CONSEC_FAIL_THRESHOLD) {
|
|
1806
|
+
const remaining = extractQueue.filter(r => !skippedPatterns.has(getPatternKey(r.url)) && getPatternKey(r.url) === patKey).length;
|
|
1807
|
+
skippedPatterns.add(patKey);
|
|
1808
|
+
console.log(chalk.yellow(` [template] ⚡ ${count} consecutive failures for ${patKey} — skipping ~${remaining} remaining pages`));
|
|
1809
|
+
}
|
|
1672
1810
|
}
|
|
1673
1811
|
}
|
|
1674
1812
|
|
|
1675
|
-
writeProgress({ status: 'completed', command: 'extract', project, extracted: done, failed, total: pendingPages.length, started_at: extractStart, finished_at: Date.now() });
|
|
1676
|
-
|
|
1813
|
+
writeProgress({ status: 'completed', command: 'extract', project, extracted: done, failed, skipped, total: pendingPages.length, started_at: extractStart, finished_at: Date.now() });
|
|
1814
|
+
const skipMsg = skipped > 0 ? chalk.yellow(`, ${skipped} template-skipped`) : '';
|
|
1815
|
+
const recrawlMsg = needsRecrawl > 0 ? chalk.yellow(`, ${needsRecrawl} need re-crawl`) : '';
|
|
1816
|
+
console.log(chalk.bold.green(`\n✅ Extraction complete: ${done} extracted, ${failed} failed${skipMsg}${recrawlMsg}\n`));
|
|
1817
|
+
});
|
|
1818
|
+
|
|
1819
|
+
// ── TEMPLATES ANALYSIS ────────────────────────────────────────────────────
|
|
1820
|
+
program
|
|
1821
|
+
.command('templates <project>')
|
|
1822
|
+
.description('Detect programmatic template pages — assess SEO value without crawling all of them')
|
|
1823
|
+
.option('--min-group <n>', 'Minimum URLs to qualify as a template group', '10')
|
|
1824
|
+
.option('--sample-size <n>', 'Pages to stealth-crawl per template group', '20')
|
|
1825
|
+
.option('--skip-crawl', 'Skip sample crawl (pattern analysis + GSC only)')
|
|
1826
|
+
.option('--skip-gsc', 'Skip GSC overlay phase')
|
|
1827
|
+
.option('--skip-competitors', 'Skip competitor sitemap census')
|
|
1828
|
+
.action(async (project, opts) => {
|
|
1829
|
+
if (!requirePro('templates')) return;
|
|
1830
|
+
|
|
1831
|
+
console.log(chalk.bold.cyan(`\n🔍 SEO Intel — Template Analysis`));
|
|
1832
|
+
console.log(chalk.dim(` Project: ${project}`));
|
|
1833
|
+
|
|
1834
|
+
try {
|
|
1835
|
+
const { runTemplatesAnalysis } = await import('./analyses/templates/index.js');
|
|
1836
|
+
const report = await runTemplatesAnalysis(project, {
|
|
1837
|
+
minGroupSize: parseInt(opts.minGroup) || 10,
|
|
1838
|
+
sampleSize: parseInt(opts.sampleSize) || 20,
|
|
1839
|
+
skipCrawl: !!opts.skipCrawl,
|
|
1840
|
+
skipGsc: !!opts.skipGsc,
|
|
1841
|
+
skipCompetitors: !!opts.skipCompetitors,
|
|
1842
|
+
log: (msg) => console.log(chalk.gray(msg)),
|
|
1843
|
+
});
|
|
1844
|
+
|
|
1845
|
+
if (report.groups.length === 0) {
|
|
1846
|
+
console.log(chalk.yellow(`\n No template patterns detected.\n`));
|
|
1847
|
+
process.exit(0);
|
|
1848
|
+
}
|
|
1849
|
+
|
|
1850
|
+
// Summary
|
|
1851
|
+
console.log(chalk.bold.green(`\n✅ Template analysis complete`));
|
|
1852
|
+
console.log(chalk.dim(` ${report.stats.totalGroups} groups · ${report.stats.totalGrouped.toLocaleString()} URLs · ${(report.stats.coverage * 100).toFixed(0)}% of sitemap`));
|
|
1853
|
+
console.log(chalk.dim(` Run ${chalk.white('seo-intel html ' + project)} to see the full dashboard.\n`));
|
|
1854
|
+
} catch (err) {
|
|
1855
|
+
console.error(chalk.red(`\n Error: ${err.message}\n`));
|
|
1856
|
+
if (process.env.DEBUG) console.error(err.stack);
|
|
1857
|
+
process.exit(1);
|
|
1858
|
+
}
|
|
1677
1859
|
});
|
|
1678
1860
|
|
|
1679
1861
|
// ── HTML DASHBOARD ─────────────────────────────────────────────────────────
|
|
@@ -1741,10 +1923,10 @@ program
|
|
|
1741
1923
|
}
|
|
1742
1924
|
});
|
|
1743
1925
|
|
|
1744
|
-
// ── HTML ALL-PROJECTS DASHBOARD
|
|
1926
|
+
// ── HTML ALL-PROJECTS DASHBOARD (alias for html — kept for backwards compat) ──
|
|
1745
1927
|
program
|
|
1746
1928
|
.command('html-all')
|
|
1747
|
-
.description('
|
|
1929
|
+
.description('Alias for "html" — generates the all-projects dashboard')
|
|
1748
1930
|
.action(() => {
|
|
1749
1931
|
const db = getDb();
|
|
1750
1932
|
const configs = loadAllConfigs();
|
package/crawler/index.js
CHANGED
|
@@ -547,11 +547,16 @@ async function processPage(page, url, base, depth, queue, maxDepth) {
|
|
|
547
547
|
// ── Quality gate — detect shells, blocked pages, empty content ──
|
|
548
548
|
const quality = assessQuality({ wordCount, bodyText, title, status });
|
|
549
549
|
|
|
550
|
+
// Full body text for DB storage (extraction reads this); truncated for log output
|
|
551
|
+
const fullBodyText = sanitize(bodyText, 50000); // ~200K chars — enough for any real page
|
|
552
|
+
const shortBodyText = sanitize(bodyText, 2000); // compact version for logging
|
|
553
|
+
|
|
550
554
|
return {
|
|
551
555
|
url, depth, status, loadMs, wordCount, isIndexable,
|
|
552
556
|
title, metaDesc, headings,
|
|
553
557
|
links: [...internalLinks, ...externalLinks],
|
|
554
|
-
bodyText:
|
|
558
|
+
bodyText: shortBodyText,
|
|
559
|
+
fullBodyText,
|
|
555
560
|
schemaTypes, parsedSchemas, vitals, publishedDate, modifiedDate,
|
|
556
561
|
contentHash: hash,
|
|
557
562
|
quality: quality.ok, qualityReason: quality.reason,
|
package/db/db.js
CHANGED
|
@@ -21,6 +21,9 @@ export function getDb(dbPath = './seo-intel.db') {
|
|
|
21
21
|
// Migrations for existing databases
|
|
22
22
|
try { _db.exec('ALTER TABLE pages ADD COLUMN content_hash TEXT'); } catch { /* already exists */ }
|
|
23
23
|
try { _db.exec('ALTER TABLE pages ADD COLUMN first_seen_at INTEGER'); } catch { /* already exists */ }
|
|
24
|
+
try { _db.exec('ALTER TABLE pages ADD COLUMN title TEXT'); } catch { /* already exists */ }
|
|
25
|
+
try { _db.exec('ALTER TABLE pages ADD COLUMN meta_desc TEXT'); } catch { /* already exists */ }
|
|
26
|
+
try { _db.exec('ALTER TABLE pages ADD COLUMN body_text TEXT'); } catch { /* already exists */ }
|
|
24
27
|
|
|
25
28
|
// Backfill first_seen_at from crawled_at for existing rows
|
|
26
29
|
_db.exec('UPDATE pages SET first_seen_at = crawled_at WHERE first_seen_at IS NULL');
|
|
@@ -42,11 +45,11 @@ export function upsertDomain(db, { domain, project, role }) {
|
|
|
42
45
|
`).run(domain, project, role, now, now);
|
|
43
46
|
}
|
|
44
47
|
|
|
45
|
-
export function upsertPage(db, { domainId, url, statusCode, wordCount, loadMs, isIndexable, clickDepth = 0, publishedDate = null, modifiedDate = null, contentHash = null }) {
|
|
48
|
+
export function upsertPage(db, { domainId, url, statusCode, wordCount, loadMs, isIndexable, clickDepth = 0, publishedDate = null, modifiedDate = null, contentHash = null, title = null, metaDesc = null, bodyText = null }) {
|
|
46
49
|
const now = Date.now();
|
|
47
50
|
db.prepare(`
|
|
48
|
-
INSERT INTO pages (domain_id, url, crawled_at, first_seen_at, status_code, word_count, load_ms, is_indexable, click_depth, published_date, modified_date, content_hash)
|
|
49
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
51
|
+
INSERT INTO pages (domain_id, url, crawled_at, first_seen_at, status_code, word_count, load_ms, is_indexable, click_depth, published_date, modified_date, content_hash, title, meta_desc, body_text)
|
|
52
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
50
53
|
ON CONFLICT(url) DO UPDATE SET
|
|
51
54
|
crawled_at = excluded.crawled_at,
|
|
52
55
|
status_code = excluded.status_code,
|
|
@@ -55,8 +58,11 @@ export function upsertPage(db, { domainId, url, statusCode, wordCount, loadMs, i
|
|
|
55
58
|
click_depth = excluded.click_depth,
|
|
56
59
|
published_date = excluded.published_date,
|
|
57
60
|
modified_date = excluded.modified_date,
|
|
58
|
-
content_hash = excluded.content_hash
|
|
59
|
-
|
|
61
|
+
content_hash = excluded.content_hash,
|
|
62
|
+
title = excluded.title,
|
|
63
|
+
meta_desc = excluded.meta_desc,
|
|
64
|
+
body_text = excluded.body_text
|
|
65
|
+
`).run(domainId, url, now, now, statusCode, wordCount, loadMs, isIndexable ? 1 : 0, clickDepth, publishedDate, modifiedDate, contentHash, title || null, metaDesc || null, bodyText || null);
|
|
60
66
|
// first_seen_at is NOT in the ON CONFLICT UPDATE — it stays from original INSERT
|
|
61
67
|
return db.prepare('SELECT id FROM pages WHERE url = ?').get(url);
|
|
62
68
|
}
|
|
@@ -214,6 +220,147 @@ export function getKeywordMatrix(db, project) {
|
|
|
214
220
|
`).all(project);
|
|
215
221
|
}
|
|
216
222
|
|
|
223
|
+
// ── Template analysis ─────────────────────────────────────────────────────
|
|
224
|
+
|
|
225
|
+
export function upsertTemplateGroup(db, g) {
|
|
226
|
+
return db.prepare(`
|
|
227
|
+
INSERT INTO template_groups
|
|
228
|
+
(project, domain, pattern, url_count, sample_size,
|
|
229
|
+
avg_word_count, content_similarity, dom_similarity,
|
|
230
|
+
gsc_urls_with_impressions, gsc_total_clicks, gsc_total_impressions,
|
|
231
|
+
gsc_avg_position, indexation_efficiency, score, verdict, recommendation,
|
|
232
|
+
analyzed_at)
|
|
233
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
234
|
+
ON CONFLICT(project, domain, pattern) DO UPDATE SET
|
|
235
|
+
url_count = excluded.url_count,
|
|
236
|
+
sample_size = excluded.sample_size,
|
|
237
|
+
avg_word_count = excluded.avg_word_count,
|
|
238
|
+
content_similarity = excluded.content_similarity,
|
|
239
|
+
dom_similarity = excluded.dom_similarity,
|
|
240
|
+
gsc_urls_with_impressions = excluded.gsc_urls_with_impressions,
|
|
241
|
+
gsc_total_clicks = excluded.gsc_total_clicks,
|
|
242
|
+
gsc_total_impressions = excluded.gsc_total_impressions,
|
|
243
|
+
gsc_avg_position = excluded.gsc_avg_position,
|
|
244
|
+
indexation_efficiency = excluded.indexation_efficiency,
|
|
245
|
+
score = excluded.score,
|
|
246
|
+
verdict = excluded.verdict,
|
|
247
|
+
recommendation = excluded.recommendation,
|
|
248
|
+
analyzed_at = excluded.analyzed_at
|
|
249
|
+
`).run(
|
|
250
|
+
g.project, g.domain, g.pattern, g.urlCount, g.sampleSize || 0,
|
|
251
|
+
g.avgWordCount ?? null, g.contentSimilarity ?? null, g.domSimilarity ?? null,
|
|
252
|
+
g.gscUrlsWithImpressions || 0, g.gscTotalClicks || 0, g.gscTotalImpressions || 0,
|
|
253
|
+
g.gscAvgPosition ?? null, g.indexationEfficiency ?? null,
|
|
254
|
+
g.score ?? null, g.verdict || null, JSON.stringify(g.recommendation || []),
|
|
255
|
+
g.analyzedAt || Date.now()
|
|
256
|
+
);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
export function getTemplateGroupId(db, project, domain, pattern) {
|
|
260
|
+
return db.prepare(
|
|
261
|
+
'SELECT id FROM template_groups WHERE project = ? AND domain = ? AND pattern = ?'
|
|
262
|
+
).get(project, domain, pattern)?.id;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
export function upsertTemplateSample(db, s) {
|
|
266
|
+
db.prepare(`
|
|
267
|
+
INSERT INTO template_samples
|
|
268
|
+
(group_id, url, sample_role, status_code, word_count,
|
|
269
|
+
title, meta_desc, has_canonical, has_schema, is_indexable,
|
|
270
|
+
dom_fingerprint, content_hash, body_text, crawled_at)
|
|
271
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
272
|
+
ON CONFLICT(group_id, url) DO UPDATE SET
|
|
273
|
+
sample_role = excluded.sample_role,
|
|
274
|
+
status_code = excluded.status_code,
|
|
275
|
+
word_count = excluded.word_count,
|
|
276
|
+
title = excluded.title,
|
|
277
|
+
meta_desc = excluded.meta_desc,
|
|
278
|
+
has_canonical = excluded.has_canonical,
|
|
279
|
+
has_schema = excluded.has_schema,
|
|
280
|
+
is_indexable = excluded.is_indexable,
|
|
281
|
+
dom_fingerprint = excluded.dom_fingerprint,
|
|
282
|
+
content_hash = excluded.content_hash,
|
|
283
|
+
body_text = excluded.body_text,
|
|
284
|
+
crawled_at = excluded.crawled_at
|
|
285
|
+
`).run(
|
|
286
|
+
s.groupId, s.url, s.sampleRole, s.statusCode ?? null, s.wordCount ?? null,
|
|
287
|
+
s.title || null, s.metaDesc || null,
|
|
288
|
+
s.hasCanonical ? 1 : 0, s.hasSchema ? 1 : 0, s.isIndexable ? 1 : 0,
|
|
289
|
+
s.domFingerprint || null, s.contentHash || null, s.bodyText || null,
|
|
290
|
+
s.crawledAt || Date.now()
|
|
291
|
+
);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
export function getTemplateGroups(db, project) {
|
|
295
|
+
return db.prepare(
|
|
296
|
+
'SELECT * FROM template_groups WHERE project = ? ORDER BY url_count DESC'
|
|
297
|
+
).all(project);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
export function getTemplateSamples(db, groupId) {
|
|
301
|
+
return db.prepare(
|
|
302
|
+
'SELECT * FROM template_samples WHERE group_id = ? ORDER BY sample_role, url'
|
|
303
|
+
).all(groupId);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
// ── Domain sync / prune ───────────────────────────────────────────────────
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Remove DB domains (+ all child data) that no longer exist in config.
|
|
310
|
+
* Returns array of pruned domain names.
|
|
311
|
+
*/
|
|
312
|
+
export function pruneStaleDomains(db, project, configDomains) {
|
|
313
|
+
// configDomains = Set or array of domain strings currently in config
|
|
314
|
+
const validSet = new Set(configDomains);
|
|
315
|
+
|
|
316
|
+
const dbDomains = db.prepare(
|
|
317
|
+
'SELECT id, domain FROM domains WHERE project = ?'
|
|
318
|
+
).all(project);
|
|
319
|
+
|
|
320
|
+
const stale = dbDomains.filter(d => !validSet.has(d.domain));
|
|
321
|
+
if (!stale.length) return [];
|
|
322
|
+
|
|
323
|
+
db.exec('PRAGMA foreign_keys = OFF');
|
|
324
|
+
db.exec('BEGIN');
|
|
325
|
+
try {
|
|
326
|
+
for (const { id, domain } of stale) {
|
|
327
|
+
// Delete all child tables referencing pages in this domain
|
|
328
|
+
const pageIds = db.prepare(
|
|
329
|
+
'SELECT id FROM pages WHERE domain_id = ?'
|
|
330
|
+
).all(id).map(r => r.id);
|
|
331
|
+
|
|
332
|
+
if (pageIds.length) {
|
|
333
|
+
const placeholders = pageIds.map(() => '?').join(',');
|
|
334
|
+
db.prepare(`DELETE FROM links WHERE source_id IN (${placeholders})`).run(...pageIds);
|
|
335
|
+
db.prepare(`DELETE FROM technical WHERE page_id IN (${placeholders})`).run(...pageIds);
|
|
336
|
+
db.prepare(`DELETE FROM headings WHERE page_id IN (${placeholders})`).run(...pageIds);
|
|
337
|
+
db.prepare(`DELETE FROM page_schemas WHERE page_id IN (${placeholders})`).run(...pageIds);
|
|
338
|
+
db.prepare(`DELETE FROM extractions WHERE page_id IN (${placeholders})`).run(...pageIds);
|
|
339
|
+
db.prepare(`DELETE FROM keywords WHERE page_id IN (${placeholders})`).run(...pageIds);
|
|
340
|
+
db.prepare(`DELETE FROM pages WHERE domain_id = ?`).run(id);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Template groups for this domain
|
|
344
|
+
db.prepare(
|
|
345
|
+
'DELETE FROM template_samples WHERE group_id IN (SELECT id FROM template_groups WHERE project = ? AND domain = ?)'
|
|
346
|
+
).run(project, domain);
|
|
347
|
+
db.prepare(
|
|
348
|
+
'DELETE FROM template_groups WHERE project = ? AND domain = ?'
|
|
349
|
+
).run(project, domain);
|
|
350
|
+
|
|
351
|
+
db.prepare('DELETE FROM domains WHERE id = ?').run(id);
|
|
352
|
+
}
|
|
353
|
+
db.exec('COMMIT');
|
|
354
|
+
} catch (e) {
|
|
355
|
+
db.exec('ROLLBACK');
|
|
356
|
+
throw e;
|
|
357
|
+
} finally {
|
|
358
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
return stale.map(d => d.domain);
|
|
362
|
+
}
|
|
363
|
+
|
|
217
364
|
export function getHeadingStructure(db, project) {
|
|
218
365
|
return db.prepare(`
|
|
219
366
|
SELECT d.domain, d.role, h.level, h.text
|
package/db/schema.sql
CHANGED
|
@@ -23,6 +23,9 @@ CREATE TABLE IF NOT EXISTS pages (
|
|
|
23
23
|
published_date TEXT, -- ISO string or null
|
|
24
24
|
modified_date TEXT, -- ISO string or null
|
|
25
25
|
content_hash TEXT, -- SHA-256 of body text for incremental crawling
|
|
26
|
+
title TEXT, -- page <title>
|
|
27
|
+
meta_desc TEXT, -- meta description
|
|
28
|
+
body_text TEXT, -- cleaned body text for extraction (stored at crawl time)
|
|
26
29
|
FOREIGN KEY (domain_id) REFERENCES domains(id)
|
|
27
30
|
);
|
|
28
31
|
|
|
@@ -110,6 +113,51 @@ CREATE TABLE IF NOT EXISTS page_schemas (
|
|
|
110
113
|
extracted_at INTEGER NOT NULL
|
|
111
114
|
);
|
|
112
115
|
|
|
116
|
+
-- Template analysis tables
|
|
117
|
+
CREATE TABLE IF NOT EXISTS template_groups (
|
|
118
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
119
|
+
project TEXT NOT NULL,
|
|
120
|
+
domain TEXT NOT NULL,
|
|
121
|
+
pattern TEXT NOT NULL,
|
|
122
|
+
url_count INTEGER NOT NULL,
|
|
123
|
+
sample_size INTEGER NOT NULL DEFAULT 0,
|
|
124
|
+
avg_word_count REAL,
|
|
125
|
+
content_similarity REAL,
|
|
126
|
+
dom_similarity REAL,
|
|
127
|
+
gsc_urls_with_impressions INTEGER DEFAULT 0,
|
|
128
|
+
gsc_total_clicks INTEGER DEFAULT 0,
|
|
129
|
+
gsc_total_impressions INTEGER DEFAULT 0,
|
|
130
|
+
gsc_avg_position REAL,
|
|
131
|
+
indexation_efficiency REAL,
|
|
132
|
+
score INTEGER,
|
|
133
|
+
verdict TEXT,
|
|
134
|
+
recommendation TEXT,
|
|
135
|
+
analyzed_at INTEGER NOT NULL,
|
|
136
|
+
UNIQUE(project, domain, pattern)
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
CREATE TABLE IF NOT EXISTS template_samples (
|
|
140
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
141
|
+
group_id INTEGER NOT NULL REFERENCES template_groups(id) ON DELETE CASCADE,
|
|
142
|
+
url TEXT NOT NULL,
|
|
143
|
+
sample_role TEXT NOT NULL,
|
|
144
|
+
status_code INTEGER,
|
|
145
|
+
word_count INTEGER,
|
|
146
|
+
title TEXT,
|
|
147
|
+
meta_desc TEXT,
|
|
148
|
+
has_canonical INTEGER DEFAULT 0,
|
|
149
|
+
has_schema INTEGER DEFAULT 0,
|
|
150
|
+
is_indexable INTEGER DEFAULT 1,
|
|
151
|
+
dom_fingerprint TEXT,
|
|
152
|
+
content_hash TEXT,
|
|
153
|
+
body_text TEXT,
|
|
154
|
+
crawled_at INTEGER,
|
|
155
|
+
UNIQUE(group_id, url)
|
|
156
|
+
);
|
|
157
|
+
|
|
158
|
+
CREATE INDEX IF NOT EXISTS idx_template_groups_project ON template_groups(project);
|
|
159
|
+
CREATE INDEX IF NOT EXISTS idx_template_samples_group ON template_samples(group_id);
|
|
160
|
+
|
|
113
161
|
-- Indexes
|
|
114
162
|
CREATE INDEX IF NOT EXISTS idx_pages_domain ON pages(domain_id);
|
|
115
163
|
CREATE INDEX IF NOT EXISTS idx_keywords_page ON keywords(page_id);
|
package/lib/gate.js
CHANGED
|
@@ -52,6 +52,7 @@ const FEATURE_NAMES = {
|
|
|
52
52
|
'entities': 'Entity Coverage Analysis',
|
|
53
53
|
'friction': 'Friction Point Analysis',
|
|
54
54
|
'js-delta': 'JS Rendering Delta',
|
|
55
|
+
'templates': 'Programmatic Template Intelligence',
|
|
55
56
|
'html': 'HTML Dashboard',
|
|
56
57
|
'html-all': 'HTML Dashboard (All Projects)',
|
|
57
58
|
'gsc-insights': 'GSC Intelligence & Insights',
|
package/package.json
CHANGED
package/reports/generate-html.js
CHANGED
|
@@ -525,8 +525,13 @@ function buildHtmlTemplate(data, opts = {}) {
|
|
|
525
525
|
white-space: nowrap;
|
|
526
526
|
}
|
|
527
527
|
.es-btn:hover { border-color: var(--accent-gold); color: var(--accent-gold); }
|
|
528
|
-
.es-btn-stop { border-color:
|
|
529
|
-
.es-btn-stop:hover { border-color:
|
|
528
|
+
.es-btn-stop { border-color: var(--border-card); color: var(--text-muted); }
|
|
529
|
+
.es-btn-stop:hover { border-color: var(--text-secondary); color: var(--text-secondary); }
|
|
530
|
+
.es-btn-stop.active { border-color: rgba(220,80,80,0.5); color: #dc5050; animation: stopPulse 2s ease-in-out infinite; }
|
|
531
|
+
.es-btn-stop.active:hover { border-color: #dc5050; color: #ff6b6b; background: rgba(220,80,80,0.08); }
|
|
532
|
+
@keyframes stopPulse { 0%,100% { border-color: rgba(220,80,80,0.3); } 50% { border-color: rgba(220,80,80,0.7); } }
|
|
533
|
+
.es-btn-restart { border-color: rgba(100,160,220,0.3); color: #6ca0dc; }
|
|
534
|
+
.es-btn-restart:hover { border-color: #6ca0dc; color: #8fc0f0; background: rgba(100,160,220,0.08); }
|
|
530
535
|
.es-btn:disabled {
|
|
531
536
|
opacity: 0.4; cursor: not-allowed;
|
|
532
537
|
border-color: var(--border-card);
|
|
@@ -1950,9 +1955,12 @@ function buildHtmlTemplate(data, opts = {}) {
|
|
|
1950
1955
|
<i class="fa-solid fa-brain"></i> Extract
|
|
1951
1956
|
</button>`
|
|
1952
1957
|
}
|
|
1953
|
-
<button class="es-btn es-btn-stop
|
|
1958
|
+
<button class="es-btn es-btn-stop${extractionStatus.liveProgress?.status === 'running' ? ' active' : ''}" id="btnStop${suffix}" onclick="stopJob()">
|
|
1954
1959
|
<i class="fa-solid fa-stop"></i> Stop
|
|
1955
1960
|
</button>
|
|
1961
|
+
<button class="es-btn es-btn-restart" id="btnRestart${suffix}" onclick="restartServer()">
|
|
1962
|
+
<i class="fa-solid fa-rotate-right"></i> Restart
|
|
1963
|
+
</button>
|
|
1956
1964
|
<label class="es-stealth-toggle">
|
|
1957
1965
|
<input type="checkbox" id="stealthToggle${suffix}"${extractionStatus.liveProgress?.stealth ? ' checked' : ''}>
|
|
1958
1966
|
<i class="fa-solid fa-user-ninja"></i> Stealth
|
|
@@ -1981,7 +1989,8 @@ function buildHtmlTemplate(data, opts = {}) {
|
|
|
1981
1989
|
${pro ? `<button class="term-btn" data-cmd="extract" data-project="${project}"><i class="fa-solid fa-brain"></i> Extract</button>
|
|
1982
1990
|
<button class="term-btn" data-cmd="analyze" data-project="${project}"><i class="fa-solid fa-chart-column"></i> Analyze</button>
|
|
1983
1991
|
<button class="term-btn" data-cmd="brief" data-project="${project}"><i class="fa-solid fa-file-lines"></i> Brief</button>
|
|
1984
|
-
<button class="term-btn" data-cmd="keywords" data-project="${project}"><i class="fa-solid fa-key"></i> Keywords</button
|
|
1992
|
+
<button class="term-btn" data-cmd="keywords" data-project="${project}"><i class="fa-solid fa-key"></i> Keywords</button>
|
|
1993
|
+
<button class="term-btn" data-cmd="templates" data-project="${project}"><i class="fa-solid fa-clone"></i> Templates</button>` : ''}
|
|
1985
1994
|
<button class="term-btn" data-cmd="status" data-project=""><i class="fa-solid fa-circle-info"></i> Status</button>
|
|
1986
1995
|
<button class="term-btn" data-cmd="guide" data-project="${project}"><i class="fa-solid fa-map"></i> Guide</button>
|
|
1987
1996
|
<button class="term-btn" data-cmd="setup" data-project="" style="margin-left:auto;border-color:rgba(232,213,163,0.25);"><i class="fa-solid fa-gear"></i> Setup</button>
|
|
@@ -2084,7 +2093,10 @@ function buildHtmlTemplate(data, opts = {}) {
|
|
|
2084
2093
|
if (extra?.stealth) params.set('stealth', 'true');
|
|
2085
2094
|
if (extra?.format) params.set('format', extra.format);
|
|
2086
2095
|
|
|
2087
|
-
|
|
2096
|
+
var stealthFlag = extra?.stealth ? ' --stealth' : '';
|
|
2097
|
+
appendLine('$ seo-intel ' + command + (proj ? ' ' + proj : '') + stealthFlag + (extra?.scope ? ' --scope ' + extra.scope : ''), 'cmd');
|
|
2098
|
+
|
|
2099
|
+
var isCrawlOrExtract = (command === 'crawl' || command === 'extract');
|
|
2088
2100
|
|
|
2089
2101
|
eventSource = new EventSource('/api/terminal?' + params.toString());
|
|
2090
2102
|
eventSource.onmessage = function(e) {
|
|
@@ -2101,29 +2113,56 @@ function buildHtmlTemplate(data, opts = {}) {
|
|
|
2101
2113
|
status.style.color = code === 0 ? 'var(--color-success)' : 'var(--color-danger)';
|
|
2102
2114
|
eventSource.close();
|
|
2103
2115
|
eventSource = null;
|
|
2116
|
+
// Update status bar when crawl/extract finishes
|
|
2117
|
+
if (isCrawlOrExtract && window._setButtonsState) window._setButtonsState(false, null);
|
|
2104
2118
|
}
|
|
2105
2119
|
} catch (_) {}
|
|
2106
2120
|
};
|
|
2107
2121
|
eventSource.onerror = function() {
|
|
2108
2122
|
if (running) {
|
|
2109
|
-
|
|
2123
|
+
// SSE disconnected but crawl/extract continues server-side
|
|
2124
|
+
if (isCrawlOrExtract) {
|
|
2125
|
+
appendLine('Terminal disconnected — job continues in background.', 'stderr');
|
|
2126
|
+
} else {
|
|
2127
|
+
appendLine('Connection lost.', 'error');
|
|
2128
|
+
}
|
|
2110
2129
|
running = false;
|
|
2111
|
-
status.textContent = 'disconnected';
|
|
2112
|
-
status.style.color = 'var(--color-danger)';
|
|
2130
|
+
status.textContent = isCrawlOrExtract ? 'backgrounded' : 'disconnected';
|
|
2131
|
+
status.style.color = isCrawlOrExtract ? 'var(--text-muted)' : 'var(--color-danger)';
|
|
2113
2132
|
}
|
|
2114
2133
|
eventSource?.close();
|
|
2115
2134
|
eventSource = null;
|
|
2116
2135
|
};
|
|
2117
2136
|
}
|
|
2118
2137
|
|
|
2119
|
-
//
|
|
2138
|
+
// Expose terminal for status bar buttons
|
|
2139
|
+
window._terminalRun = function(cmd, proj, extra) { runCommand(cmd, proj, extra); };
|
|
2140
|
+
window._terminalStop = function() {
|
|
2141
|
+
if (eventSource) { eventSource.close(); eventSource = null; }
|
|
2142
|
+
if (running) {
|
|
2143
|
+
appendLine('Stopped.', 'exit-err');
|
|
2144
|
+
running = false;
|
|
2145
|
+
status.textContent = 'stopped';
|
|
2146
|
+
status.style.color = 'var(--color-warning)';
|
|
2147
|
+
}
|
|
2148
|
+
};
|
|
2149
|
+
|
|
2150
|
+
// Button clicks — crawl/extract read stealth toggle
|
|
2120
2151
|
document.querySelectorAll('.terminal-panel .term-btn').forEach(function(btn) {
|
|
2121
2152
|
if (btn.closest('.terminal-panel') !== output.closest('.terminal-panel')) return;
|
|
2122
2153
|
btn.addEventListener('click', function() {
|
|
2123
2154
|
const cmd = btn.getAttribute('data-cmd');
|
|
2124
2155
|
const proj = btn.getAttribute('data-project');
|
|
2125
2156
|
const scope = btn.getAttribute('data-scope');
|
|
2126
|
-
|
|
2157
|
+
var extra = scope ? { scope: scope } : {};
|
|
2158
|
+
// Crawl/extract: read stealth toggle + update status bar
|
|
2159
|
+
if (cmd === 'crawl' || cmd === 'extract') {
|
|
2160
|
+
var stealthEl = document.querySelector('[id^="stealthToggle"]');
|
|
2161
|
+
if (stealthEl?.checked) extra.stealth = true;
|
|
2162
|
+
if (window._setButtonsState) window._setButtonsState(true, cmd);
|
|
2163
|
+
if (window._startPolling) window._startPolling();
|
|
2164
|
+
}
|
|
2165
|
+
runCommand(cmd, proj, extra);
|
|
2127
2166
|
});
|
|
2128
2167
|
});
|
|
2129
2168
|
|
|
@@ -3558,41 +3597,55 @@ function buildHtmlTemplate(data, opts = {}) {
|
|
|
3558
3597
|
let pollTimer = null;
|
|
3559
3598
|
|
|
3560
3599
|
window.startJob = function(command, proj) {
|
|
3561
|
-
|
|
3562
|
-
|
|
3600
|
+
var stealth = document.getElementById('stealthToggle' + sfx)?.checked || false;
|
|
3601
|
+
var extra = {};
|
|
3602
|
+
if (stealth) extra.stealth = true;
|
|
3563
3603
|
|
|
3564
|
-
|
|
3565
|
-
|
|
3566
|
-
|
|
3567
|
-
|
|
3568
|
-
|
|
3569
|
-
|
|
3570
|
-
.then(function(data) {
|
|
3571
|
-
if (data.error) { alert('Cannot start: ' + data.error); return; }
|
|
3572
|
-
setButtonsState(true, command);
|
|
3573
|
-
startPolling();
|
|
3574
|
-
})
|
|
3575
|
-
.catch(function(err) { alert('Server error: ' + err.message); });
|
|
3604
|
+
// Route through terminal for visible output
|
|
3605
|
+
if (window._terminalRun) {
|
|
3606
|
+
window._terminalRun(command, proj, extra);
|
|
3607
|
+
}
|
|
3608
|
+
setButtonsState(true, command);
|
|
3609
|
+
startPolling();
|
|
3576
3610
|
};
|
|
3577
3611
|
|
|
3578
3612
|
window.stopJob = function() {
|
|
3613
|
+
// Close terminal SSE (server detaches crawl/extract, so we also hit /api/stop)
|
|
3614
|
+
if (window._terminalStop) window._terminalStop();
|
|
3579
3615
|
fetch('/api/stop', { method: 'POST' })
|
|
3580
3616
|
.then(function(r) { return r.json(); })
|
|
3581
|
-
.then(function(
|
|
3582
|
-
|
|
3583
|
-
|
|
3584
|
-
|
|
3617
|
+
.then(function() { setButtonsState(false, null); })
|
|
3618
|
+
.catch(function() { setButtonsState(false, null); });
|
|
3619
|
+
};
|
|
3620
|
+
|
|
3621
|
+
window.restartServer = function() {
|
|
3622
|
+
if (!confirm('Restart SEO Intel? This will stop any running jobs and refresh the dashboard.')) return;
|
|
3623
|
+
var btnR = document.getElementById('btnRestart' + sfx);
|
|
3624
|
+
if (btnR) { btnR.disabled = true; btnR.innerHTML = '<i class="fa-solid fa-spinner fa-spin"></i> Restarting\u2026'; }
|
|
3625
|
+
// Stop terminal SSE
|
|
3626
|
+
if (window._terminalStop) window._terminalStop();
|
|
3627
|
+
fetch('/api/restart', { method: 'POST' })
|
|
3628
|
+
.then(function() {
|
|
3629
|
+
// Server is restarting — wait a moment then reload
|
|
3630
|
+
setTimeout(function() { window.location.reload(); }, 2000);
|
|
3585
3631
|
})
|
|
3586
|
-
.catch(function(
|
|
3632
|
+
.catch(function() {
|
|
3633
|
+
// Server might already be dead — try reloading anyway
|
|
3634
|
+
setTimeout(function() { window.location.reload(); }, 2000);
|
|
3635
|
+
});
|
|
3587
3636
|
};
|
|
3588
3637
|
|
|
3589
|
-
|
|
3638
|
+
// Expose for terminal IIFE to call back
|
|
3639
|
+
window._setButtonsState = setButtonsState;
|
|
3640
|
+
window._startPolling = startPolling;
|
|
3641
|
+
|
|
3642
|
+
function setButtonsState(isRunning, activeCmd) {
|
|
3590
3643
|
var btnC = document.getElementById('btnCrawl' + sfx);
|
|
3591
3644
|
var btnE = document.getElementById('btnExtract' + sfx);
|
|
3592
3645
|
var btnS = document.getElementById('btnStop' + sfx);
|
|
3593
3646
|
if (btnC) {
|
|
3594
|
-
btnC.disabled =
|
|
3595
|
-
if (
|
|
3647
|
+
btnC.disabled = isRunning;
|
|
3648
|
+
if (isRunning && activeCmd === 'crawl') {
|
|
3596
3649
|
btnC.classList.add('running');
|
|
3597
3650
|
btnC.innerHTML = '<i class="fa-solid fa-spinner fa-spin"></i> Crawling\u2026';
|
|
3598
3651
|
} else {
|
|
@@ -3601,8 +3654,8 @@ function buildHtmlTemplate(data, opts = {}) {
|
|
|
3601
3654
|
}
|
|
3602
3655
|
}
|
|
3603
3656
|
if (btnE) {
|
|
3604
|
-
btnE.disabled =
|
|
3605
|
-
if (
|
|
3657
|
+
btnE.disabled = isRunning;
|
|
3658
|
+
if (isRunning && activeCmd === 'extract') {
|
|
3606
3659
|
btnE.classList.add('running');
|
|
3607
3660
|
btnE.innerHTML = '<i class="fa-solid fa-spinner fa-spin"></i> Extracting\u2026';
|
|
3608
3661
|
} else {
|
|
@@ -3611,7 +3664,12 @@ function buildHtmlTemplate(data, opts = {}) {
|
|
|
3611
3664
|
}
|
|
3612
3665
|
}
|
|
3613
3666
|
if (btnS) {
|
|
3614
|
-
|
|
3667
|
+
// Stop button always visible — turns red+pulsing when something is running
|
|
3668
|
+
if (isRunning) {
|
|
3669
|
+
btnS.classList.add('active');
|
|
3670
|
+
} else {
|
|
3671
|
+
btnS.classList.remove('active');
|
|
3672
|
+
}
|
|
3615
3673
|
}
|
|
3616
3674
|
}
|
|
3617
3675
|
|
|
@@ -4762,50 +4820,57 @@ function buildMultiHtmlTemplate(allProjectData) {
|
|
|
4762
4820
|
window.startJob = function(command, proj) {
|
|
4763
4821
|
var sfx = '-' + proj;
|
|
4764
4822
|
var stealth = document.getElementById('stealthToggle' + sfx)?.checked || false;
|
|
4765
|
-
|
|
4766
|
-
|
|
4767
|
-
|
|
4768
|
-
|
|
4769
|
-
|
|
4770
|
-
|
|
4771
|
-
|
|
4772
|
-
|
|
4773
|
-
|
|
4774
|
-
startPolling();
|
|
4775
|
-
})
|
|
4776
|
-
.catch(function(err) { alert('Server error: ' + err.message); });
|
|
4823
|
+
var extra = {};
|
|
4824
|
+
if (stealth) extra.stealth = true;
|
|
4825
|
+
|
|
4826
|
+
// Route through terminal for visible output
|
|
4827
|
+
if (window._terminalRun) {
|
|
4828
|
+
window._terminalRun(command, proj, extra);
|
|
4829
|
+
}
|
|
4830
|
+
setButtonsState(true, command);
|
|
4831
|
+
startPolling();
|
|
4777
4832
|
};
|
|
4778
4833
|
|
|
4779
4834
|
window.stopJob = function() {
|
|
4835
|
+
if (window._terminalStop) window._terminalStop();
|
|
4780
4836
|
fetch('/api/stop', { method: 'POST' })
|
|
4781
4837
|
.then(function(r) { return r.json(); })
|
|
4782
|
-
.then(function(
|
|
4783
|
-
|
|
4784
|
-
|
|
4785
|
-
|
|
4838
|
+
.then(function() { setButtonsState(false, null); })
|
|
4839
|
+
.catch(function() { setButtonsState(false, null); });
|
|
4840
|
+
};
|
|
4841
|
+
|
|
4842
|
+
window.restartServer = function() {
|
|
4843
|
+
if (!confirm('Restart SEO Intel? This will stop any running jobs and refresh the dashboard.')) return;
|
|
4844
|
+
if (window._terminalStop) window._terminalStop();
|
|
4845
|
+
fetch('/api/restart', { method: 'POST' })
|
|
4846
|
+
.then(function() { setTimeout(function() { window.location.reload(); }, 2000); })
|
|
4847
|
+
.catch(function() { setTimeout(function() { window.location.reload(); }, 2000); });
|
|
4786
4848
|
};
|
|
4787
4849
|
|
|
4788
|
-
|
|
4850
|
+
window._setButtonsState = setButtonsState;
|
|
4851
|
+
window._startPolling = startPolling;
|
|
4852
|
+
|
|
4853
|
+
function setButtonsState(isRunning, activeCmd) {
|
|
4789
4854
|
var sfx = '-' + currentProject;
|
|
4790
4855
|
var btnC = document.getElementById('btnCrawl' + sfx);
|
|
4791
4856
|
var btnE = document.getElementById('btnExtract' + sfx);
|
|
4792
4857
|
var btnS = document.getElementById('btnStop' + sfx);
|
|
4793
4858
|
if (btnC) {
|
|
4794
|
-
btnC.disabled =
|
|
4795
|
-
btnC.classList.toggle('running',
|
|
4796
|
-
btnC.innerHTML =
|
|
4859
|
+
btnC.disabled = isRunning;
|
|
4860
|
+
btnC.classList.toggle('running', isRunning && activeCmd === 'crawl');
|
|
4861
|
+
btnC.innerHTML = isRunning && activeCmd === 'crawl'
|
|
4797
4862
|
? '<i class="fa-solid fa-spinner fa-spin"></i> Crawling\u2026'
|
|
4798
4863
|
: '<i class="fa-solid fa-spider"></i> Crawl';
|
|
4799
4864
|
}
|
|
4800
4865
|
if (btnE) {
|
|
4801
|
-
btnE.disabled =
|
|
4802
|
-
btnE.classList.toggle('running',
|
|
4803
|
-
btnE.innerHTML =
|
|
4866
|
+
btnE.disabled = isRunning;
|
|
4867
|
+
btnE.classList.toggle('running', isRunning && activeCmd === 'extract');
|
|
4868
|
+
btnE.innerHTML = isRunning && activeCmd === 'extract'
|
|
4804
4869
|
? '<i class="fa-solid fa-spinner fa-spin"></i> Extracting\u2026'
|
|
4805
4870
|
: '<i class="fa-solid fa-brain"></i> Extract';
|
|
4806
4871
|
}
|
|
4807
4872
|
if (btnS) {
|
|
4808
|
-
btnS.
|
|
4873
|
+
if (isRunning) { btnS.classList.add('active'); } else { btnS.classList.remove('active'); }
|
|
4809
4874
|
}
|
|
4810
4875
|
}
|
|
4811
4876
|
|
package/server.js
CHANGED
|
@@ -297,7 +297,7 @@ async function handleRequest(req, res) {
|
|
|
297
297
|
if (req.method === 'POST' && path === '/api/extract') {
|
|
298
298
|
try {
|
|
299
299
|
const body = await readBody(req);
|
|
300
|
-
const { project
|
|
300
|
+
const { project } = body;
|
|
301
301
|
if (!project) { json(res, 400, { error: 'Missing project' }); return; }
|
|
302
302
|
|
|
303
303
|
// Conflict guard
|
|
@@ -308,7 +308,6 @@ async function handleRequest(req, res) {
|
|
|
308
308
|
}
|
|
309
309
|
|
|
310
310
|
const args = ['cli.js', 'extract', project];
|
|
311
|
-
if (stealth) args.push('--stealth');
|
|
312
311
|
|
|
313
312
|
const child = spawn(process.execPath, args, {
|
|
314
313
|
cwd: __dirname,
|
|
@@ -372,6 +371,38 @@ async function handleRequest(req, res) {
|
|
|
372
371
|
return;
|
|
373
372
|
}
|
|
374
373
|
|
|
374
|
+
// ─── API: Restart — kill running jobs + restart server ───
|
|
375
|
+
if (req.method === 'POST' && path === '/api/restart') {
|
|
376
|
+
try {
|
|
377
|
+
// 1. Kill any running job
|
|
378
|
+
const progress = readProgress();
|
|
379
|
+
if (progress?.status === 'running' && progress.pid) {
|
|
380
|
+
try { process.kill(progress.pid, 'SIGTERM'); } catch {}
|
|
381
|
+
try {
|
|
382
|
+
writeFileSync(PROGRESS_FILE, JSON.stringify({
|
|
383
|
+
...progress, status: 'stopped', stopped_at: Date.now(), updated_at: Date.now(),
|
|
384
|
+
}, null, 2));
|
|
385
|
+
} catch {}
|
|
386
|
+
}
|
|
387
|
+
json(res, 200, { restarting: true });
|
|
388
|
+
|
|
389
|
+
// 2. Restart the server process after response is sent
|
|
390
|
+
setTimeout(() => {
|
|
391
|
+
const child = spawn(process.execPath, [fileURLToPath(import.meta.url), ...process.argv.slice(2)], {
|
|
392
|
+
cwd: __dirname,
|
|
393
|
+
detached: true,
|
|
394
|
+
stdio: 'ignore',
|
|
395
|
+
env: { ...process.env, SEO_INTEL_AUTO_OPEN: '0' },
|
|
396
|
+
});
|
|
397
|
+
child.unref();
|
|
398
|
+
process.exit(0);
|
|
399
|
+
}, 300);
|
|
400
|
+
} catch (e) {
|
|
401
|
+
json(res, 500, { error: e.message });
|
|
402
|
+
}
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
405
|
+
|
|
375
406
|
// ─── API: Export actions ───
|
|
376
407
|
if (req.method === 'POST' && path === '/api/export-actions') {
|
|
377
408
|
try {
|
|
@@ -538,7 +569,7 @@ async function handleRequest(req, res) {
|
|
|
538
569
|
// Whitelist allowed commands
|
|
539
570
|
const ALLOWED = ['crawl', 'extract', 'analyze', 'export-actions', 'competitive-actions',
|
|
540
571
|
'suggest-usecases', 'html', 'status', 'brief', 'keywords', 'report', 'guide',
|
|
541
|
-
'schemas', 'headings-audit', 'orphans', 'entities', 'friction', 'shallow', 'decay', 'export'];
|
|
572
|
+
'schemas', 'headings-audit', 'orphans', 'entities', 'friction', 'shallow', 'decay', 'export', 'templates'];
|
|
542
573
|
|
|
543
574
|
if (!command || !ALLOWED.includes(command)) {
|
|
544
575
|
json(res, 400, { error: `Invalid command. Allowed: ${ALLOWED.join(', ')}` });
|
|
@@ -564,14 +595,21 @@ async function handleRequest(req, res) {
|
|
|
564
595
|
res.write(`data: ${JSON.stringify({ type, data })}\n\n`);
|
|
565
596
|
};
|
|
566
597
|
|
|
598
|
+
const isLongRunning = ['crawl', 'extract'].includes(command);
|
|
599
|
+
|
|
567
600
|
send('start', { command, project, args: args.slice(1) });
|
|
568
601
|
|
|
569
602
|
const child = spawn(process.execPath, args, {
|
|
570
603
|
cwd: __dirname,
|
|
571
604
|
env: { ...process.env, FORCE_COLOR: '0', NO_COLOR: '1' },
|
|
605
|
+
// Crawl/extract: detach so they survive SSE disconnect
|
|
606
|
+
...(isLongRunning ? { detached: true } : {}),
|
|
572
607
|
});
|
|
573
608
|
|
|
609
|
+
let clientClosed = false;
|
|
610
|
+
|
|
574
611
|
child.stdout.on('data', chunk => {
|
|
612
|
+
if (clientClosed) return;
|
|
575
613
|
const lines = chunk.toString().split('\n');
|
|
576
614
|
for (const line of lines) {
|
|
577
615
|
if (line) send('stdout', line);
|
|
@@ -579,6 +617,7 @@ async function handleRequest(req, res) {
|
|
|
579
617
|
});
|
|
580
618
|
|
|
581
619
|
child.stderr.on('data', chunk => {
|
|
620
|
+
if (clientClosed) return;
|
|
582
621
|
const lines = chunk.toString().split('\n');
|
|
583
622
|
for (const line of lines) {
|
|
584
623
|
if (line) send('stderr', line);
|
|
@@ -586,18 +625,24 @@ async function handleRequest(req, res) {
|
|
|
586
625
|
});
|
|
587
626
|
|
|
588
627
|
child.on('error', err => {
|
|
589
|
-
send('error', err.message);
|
|
590
|
-
res.end();
|
|
628
|
+
if (!clientClosed) { send('error', err.message); res.end(); }
|
|
591
629
|
});
|
|
592
630
|
|
|
593
631
|
child.on('close', code => {
|
|
594
|
-
send('exit', { code });
|
|
595
|
-
res.end();
|
|
632
|
+
if (!clientClosed) { send('exit', { code }); res.end(); }
|
|
596
633
|
});
|
|
597
634
|
|
|
598
|
-
//
|
|
635
|
+
// Client disconnect: kill short commands, let crawl/extract continue
|
|
599
636
|
req.on('close', () => {
|
|
600
|
-
|
|
637
|
+
clientClosed = true;
|
|
638
|
+
if (isLongRunning) {
|
|
639
|
+
// Detach — crawl/extract keeps running, progress file tracks it
|
|
640
|
+
child.unref();
|
|
641
|
+
if (child.stdout) child.stdout.destroy();
|
|
642
|
+
if (child.stderr) child.stderr.destroy();
|
|
643
|
+
} else {
|
|
644
|
+
if (!child.killed) child.kill();
|
|
645
|
+
}
|
|
601
646
|
});
|
|
602
647
|
|
|
603
648
|
return;
|