seo-intel 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +41 -0
- package/LICENSE +75 -0
- package/README.md +243 -0
- package/Start SEO Intel.bat +9 -0
- package/Start SEO Intel.command +8 -0
- package/cli.js +3727 -0
- package/config/example.json +29 -0
- package/config/setup-wizard.js +522 -0
- package/crawler/index.js +566 -0
- package/crawler/robots.js +103 -0
- package/crawler/sanitize.js +124 -0
- package/crawler/schema-parser.js +168 -0
- package/crawler/sitemap.js +103 -0
- package/crawler/stealth.js +393 -0
- package/crawler/subdomain-discovery.js +341 -0
- package/db/db.js +213 -0
- package/db/schema.sql +120 -0
- package/exports/competitive.js +186 -0
- package/exports/heuristics.js +67 -0
- package/exports/queries.js +197 -0
- package/exports/suggestive.js +230 -0
- package/exports/technical.js +180 -0
- package/exports/templates.js +77 -0
- package/lib/gate.js +204 -0
- package/lib/license.js +369 -0
- package/lib/oauth.js +432 -0
- package/lib/updater.js +324 -0
- package/package.json +68 -0
- package/reports/generate-html.js +6194 -0
- package/reports/generate-site-graph.js +949 -0
- package/reports/gsc-loader.js +190 -0
- package/scheduler.js +142 -0
- package/seo-audit.js +619 -0
- package/seo-intel.png +0 -0
- package/server.js +602 -0
- package/setup/ROADMAP.md +109 -0
- package/setup/checks.js +483 -0
- package/setup/config-builder.js +227 -0
- package/setup/engine.js +65 -0
- package/setup/installers.js +197 -0
- package/setup/models.js +328 -0
- package/setup/openclaw-bridge.js +329 -0
- package/setup/validator.js +395 -0
- package/setup/web-routes.js +688 -0
- package/setup/wizard.html +2920 -0
- package/start-seo-intel.sh +8 -0
package/cli.js
ADDED
|
@@ -0,0 +1,3727 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import 'dotenv/config';
|
|
3
|
+
import { program } from 'commander';
|
|
4
|
+
import { spawnSync } from 'child_process';
|
|
5
|
+
import { readFileSync, writeFileSync, readdirSync, unlinkSync, existsSync, mkdirSync } from 'fs';
|
|
6
|
+
import { dirname, join } from 'path';
|
|
7
|
+
import { totalmem } from 'os';
|
|
8
|
+
import { fileURLToPath } from 'url';
|
|
9
|
+
import chalk from 'chalk';
|
|
10
|
+
|
|
11
|
+
import { crawlDomain } from './crawler/index.js';
|
|
12
|
+
// Paid modules — loaded lazily inside gated commands only.
|
|
13
|
+
// analysis/ and extractor/ are NOT shipped in the free npm package.
|
|
14
|
+
let _extractPage, _buildAnalysisPrompt;
|
|
15
|
+
async function getExtractPage() {
|
|
16
|
+
if (!_extractPage) _extractPage = (await import('./extractor/qwen.js')).extractPage;
|
|
17
|
+
return _extractPage;
|
|
18
|
+
}
|
|
19
|
+
async function getBuildAnalysisPrompt() {
|
|
20
|
+
if (!_buildAnalysisPrompt) _buildAnalysisPrompt = (await import('./analysis/prompt-builder.js')).buildAnalysisPrompt;
|
|
21
|
+
return _buildAnalysisPrompt;
|
|
22
|
+
}
|
|
23
|
+
import { getNextCrawlTarget, needsAnalysis, getCrawlStatus, loadAllConfigs } from './scheduler.js';
|
|
24
|
+
import {
|
|
25
|
+
getDb, upsertDomain, upsertPage, insertExtraction,
|
|
26
|
+
insertKeywords, insertHeadings, insertLinks, insertPageSchemas,
|
|
27
|
+
getCompetitorSummary, getKeywordMatrix, getHeadingStructure,
|
|
28
|
+
getPageHash, getSchemasByProject
|
|
29
|
+
} from './db/db.js';
|
|
30
|
+
import { generateHtmlDashboard, generateMultiDashboard } from './reports/generate-html.js';
|
|
31
|
+
import { buildTechnicalActions } from './exports/technical.js';
|
|
32
|
+
import { buildCompetitiveActions } from './exports/competitive.js';
|
|
33
|
+
import { buildSuggestiveActions } from './exports/suggestive.js';
|
|
34
|
+
import { buildExportPayload, formatActionsJson, formatActionsBrief } from './exports/templates.js';
|
|
35
|
+
import { assertHasCrawlData, getLatestAnalysis } from './exports/queries.js';
|
|
36
|
+
import { requirePro, enforceLimits, capPages, printLicenseStatus } from './lib/gate.js';
|
|
37
|
+
import { isPro, loadLicense, activateLicense } from './lib/license.js';
|
|
38
|
+
import { getCurrentVersion, checkForUpdates, printUpdateNotice, forceUpdateCheck } from './lib/updater.js';
|
|
39
|
+
|
|
40
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
41
|
+
|
|
42
|
+
// Start background update check (non-blocking, never slows startup)
|
|
43
|
+
checkForUpdates();
|
|
44
|
+
|
|
45
|
+
// Ensure reports/ and config/ directories exist
|
|
46
|
+
try { mkdirSync(join(__dirname, 'reports'), { recursive: true }); } catch { /* ok */ }
|
|
47
|
+
try { mkdirSync(join(__dirname, 'config'), { recursive: true }); } catch { /* ok */ }
|
|
48
|
+
|
|
49
|
+
// ── AI AVAILABILITY PREFLIGHT ────────────────────────────────────────────
|
|
50
|
+
/**
|
|
51
|
+
* Check if any AI extraction backend is reachable.
|
|
52
|
+
* Tries: primary Ollama → fallback Ollama → returns false.
|
|
53
|
+
* Fast: 2s timeout per host, runs sequentially.
|
|
54
|
+
*/
|
|
55
|
+
async function checkOllamaAvailability() {
|
|
56
|
+
const hosts = [
|
|
57
|
+
process.env.OLLAMA_URL || 'http://localhost:11434',
|
|
58
|
+
...(process.env.OLLAMA_FALLBACK_URL ? [process.env.OLLAMA_FALLBACK_URL] : []),
|
|
59
|
+
];
|
|
60
|
+
|
|
61
|
+
for (const host of hosts) {
|
|
62
|
+
try {
|
|
63
|
+
const controller = new AbortController();
|
|
64
|
+
const timeout = setTimeout(() => controller.abort(), 2000);
|
|
65
|
+
const res = await fetch(`${host}/api/tags`, { signal: controller.signal });
|
|
66
|
+
clearTimeout(timeout);
|
|
67
|
+
if (res.ok) {
|
|
68
|
+
const data = await res.json();
|
|
69
|
+
const models = (data.models || []).map(m => m.name);
|
|
70
|
+
const targetModel = process.env.OLLAMA_MODEL || 'qwen3:4b';
|
|
71
|
+
const hasModel = models.some(m => m.startsWith(targetModel.split(':')[0]));
|
|
72
|
+
if (hasModel) {
|
|
73
|
+
return true; // Ollama reachable + model available
|
|
74
|
+
}
|
|
75
|
+
// Ollama reachable but model missing — warn but allow degraded extraction
|
|
76
|
+
console.log(chalk.yellow(` ⚠️ Ollama at ${host} is reachable but model "${targetModel}" not found`));
|
|
77
|
+
console.log(chalk.dim(` Available models: ${models.join(', ') || 'none'}`));
|
|
78
|
+
console.log(chalk.dim(` Run: ollama pull ${targetModel}`));
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
} catch { /* host unreachable, try next */ }
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// ── EXTRACTION PROGRESS TRACKER ──────────────────────────────────────────
|
|
88
|
+
const PROGRESS_FILE = join(__dirname, '.extraction-progress.json');
|
|
89
|
+
|
|
90
|
+
function writeProgress(data) {
|
|
91
|
+
try {
|
|
92
|
+
writeFileSync(PROGRESS_FILE, JSON.stringify({
|
|
93
|
+
...data,
|
|
94
|
+
updated_at: Date.now(),
|
|
95
|
+
pid: process.pid,
|
|
96
|
+
}, null, 2));
|
|
97
|
+
} catch { /* best-effort */ }
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function clearProgress() {
|
|
101
|
+
try { if (existsSync(PROGRESS_FILE)) unlinkSync(PROGRESS_FILE); } catch { /* ok */ }
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function readProgress() {
|
|
105
|
+
try {
|
|
106
|
+
if (!existsSync(PROGRESS_FILE)) return null;
|
|
107
|
+
const data = JSON.parse(readFileSync(PROGRESS_FILE, 'utf8'));
|
|
108
|
+
|
|
109
|
+
// PID liveness check — if status says "running" but PID is dead, it crashed
|
|
110
|
+
if (data.status === 'running' && data.pid) {
|
|
111
|
+
try { process.kill(data.pid, 0); } catch (e) {
|
|
112
|
+
if (e.code === 'ESRCH') {
|
|
113
|
+
// No such process — it's dead
|
|
114
|
+
data.status = 'crashed';
|
|
115
|
+
data.crashed_at = data.updated_at;
|
|
116
|
+
}
|
|
117
|
+
// EPERM means process exists but we can't signal it — it's alive
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return data;
|
|
122
|
+
} catch { return null; }
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
program
|
|
126
|
+
.name('seo-intel')
|
|
127
|
+
.description('SEO Competitor Intelligence Tool')
|
|
128
|
+
.version(getCurrentVersion());
|
|
129
|
+
|
|
130
|
+
// ── SETUP WIZARD ───────────────────────────────────────────────────────────
|
|
131
|
+
program
|
|
132
|
+
.command('setup')
|
|
133
|
+
.description('Interactive setup wizard — uses OpenClaw agent if available, otherwise standard CLI wizard')
|
|
134
|
+
.option('--project <name>', 'Project name to prefill')
|
|
135
|
+
.option('--classic', 'Force classic CLI wizard (skip OpenClaw agent)')
|
|
136
|
+
.option('--agent', 'Force OpenClaw agent setup (fail if not available)')
|
|
137
|
+
.action(async (opts) => {
|
|
138
|
+
// Check for OpenClaw unless --classic is forced
|
|
139
|
+
if (!opts.classic) {
|
|
140
|
+
try {
|
|
141
|
+
const { checkOpenClaw } = await import('./setup/checks.js');
|
|
142
|
+
const oc = checkOpenClaw();
|
|
143
|
+
|
|
144
|
+
if (oc.installed && oc.gatewayRunning) {
|
|
145
|
+
console.log(chalk.dim('\n OpenClaw detected — using agent-powered setup'));
|
|
146
|
+
console.log(chalk.dim(' (use --classic for the standard wizard)\n'));
|
|
147
|
+
|
|
148
|
+
const { fullSystemCheck } = await import('./setup/engine.js');
|
|
149
|
+
const status = await fullSystemCheck();
|
|
150
|
+
const { cliAgentSetup } = await import('./setup/openclaw-bridge.js');
|
|
151
|
+
await cliAgentSetup(status);
|
|
152
|
+
return;
|
|
153
|
+
} else if (opts.agent) {
|
|
154
|
+
console.error(chalk.red('\n OpenClaw gateway not running.'));
|
|
155
|
+
console.log(chalk.dim(' Start it with: openclaw gateway\n'));
|
|
156
|
+
process.exit(1);
|
|
157
|
+
}
|
|
158
|
+
// Fall through to classic wizard
|
|
159
|
+
} catch (err) {
|
|
160
|
+
if (opts.agent) {
|
|
161
|
+
console.error(chalk.red(`\n OpenClaw setup failed: ${err.message}\n`));
|
|
162
|
+
process.exit(1);
|
|
163
|
+
}
|
|
164
|
+
// Fall through to classic wizard
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Classic CLI wizard
|
|
169
|
+
const args = ['config/setup-wizard.js'];
|
|
170
|
+
if (opts.project) args.push('--project', opts.project);
|
|
171
|
+
const res = spawnSync(process.execPath, args, { stdio: 'inherit', cwd: __dirname });
|
|
172
|
+
process.exit(res.status ?? 0);
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
// ── SUBDOMAIN DISCOVERY ───────────────────────────────────────────────────
|
|
176
|
+
program
|
|
177
|
+
.command('subdomains <domain>')
|
|
178
|
+
.description('Discover subdomains for a domain (crt.sh + DNS + crawl data)')
|
|
179
|
+
.option('--no-http', 'Skip HTTP liveness check (faster, DNS only)')
|
|
180
|
+
.option('--add-to <project>', 'Auto-add SEO-relevant subdomains to a project config')
|
|
181
|
+
.action(async (domain, opts) => {
|
|
182
|
+
const { discoverSubdomains } = await import('./crawler/subdomain-discovery.js');
|
|
183
|
+
|
|
184
|
+
// Clean domain input
|
|
185
|
+
const rootDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/^www\./, '');
|
|
186
|
+
|
|
187
|
+
console.log(chalk.bold.cyan(`\n🔍 Discovering subdomains for ${rootDomain}\n`));
|
|
188
|
+
|
|
189
|
+
// Optionally use DB for crawl data mining
|
|
190
|
+
let db = null;
|
|
191
|
+
try { db = getDb(); } catch { /* no DB yet, that's fine */ }
|
|
192
|
+
|
|
193
|
+
const result = await discoverSubdomains(rootDomain, {
|
|
194
|
+
db,
|
|
195
|
+
httpCheck: opts.http !== false,
|
|
196
|
+
onProgress: ({ phase, message }) => {
|
|
197
|
+
console.log(chalk.dim(` ${message}`));
|
|
198
|
+
},
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
// Display results
|
|
202
|
+
console.log(chalk.bold(`\n Found ${result.discovered} subdomains (${result.live} live, ${result.seoRelevant} SEO-relevant)\n`));
|
|
203
|
+
|
|
204
|
+
if (result.subdomains.length === 0) {
|
|
205
|
+
console.log(chalk.yellow(' No subdomains found.'));
|
|
206
|
+
return;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Table header
|
|
210
|
+
console.log(chalk.dim(' ' + 'Subdomain'.padEnd(30) + 'Status'.padEnd(8) + 'Sitemap'.padEnd(10) + 'Title'.padEnd(35) + 'SEO'));
|
|
211
|
+
console.log(chalk.dim(' ' + '─'.repeat(90)));
|
|
212
|
+
|
|
213
|
+
for (const s of result.subdomains) {
|
|
214
|
+
const statusColor = s.httpStatus === 200 ? chalk.green
|
|
215
|
+
: s.httpStatus >= 300 && s.httpStatus < 400 ? chalk.yellow
|
|
216
|
+
: s.httpStatus >= 400 ? chalk.red
|
|
217
|
+
: chalk.dim;
|
|
218
|
+
|
|
219
|
+
const seoIcon = s.seoRelevant ? chalk.green('✓') : s.redirected ? chalk.yellow('→ ' + (s.redirectTarget || '')) : chalk.dim('–');
|
|
220
|
+
const title = String(s.title || s.error || '').slice(0, 33);
|
|
221
|
+
const sitemapStr = s.sitemapUrls > 0 ? chalk.cyan(s.sitemapUrls.toString()) : chalk.dim('—');
|
|
222
|
+
|
|
223
|
+
console.log(
|
|
224
|
+
' ' +
|
|
225
|
+
chalk.white(s.hostname.padEnd(30)) +
|
|
226
|
+
statusColor((s.httpStatus || '—').toString().padEnd(8)) +
|
|
227
|
+
sitemapStr.padEnd(10 + (sitemapStr.length - String(s.sitemapUrls || '—').length)) +
|
|
228
|
+
chalk.dim(title.padEnd(35)) +
|
|
229
|
+
seoIcon
|
|
230
|
+
);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Summary
|
|
234
|
+
if (result.totalSitemapUrls > 0) {
|
|
235
|
+
console.log(chalk.dim(`\n 📄 Total sitemap URLs across subdomains: ${chalk.cyan(result.totalSitemapUrls)}`));
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Sources breakdown
|
|
239
|
+
console.log(chalk.dim(` Sources: ${Object.entries(result.sources).map(([k,v]) => `${k}: ${v}`).join(', ')}`));
|
|
240
|
+
|
|
241
|
+
// Auto-add to project config
|
|
242
|
+
if (opts.addTo) {
|
|
243
|
+
const relevant = result.subdomains.filter(s => s.seoRelevant && !s.isRoot);
|
|
244
|
+
if (relevant.length === 0) {
|
|
245
|
+
console.log(chalk.yellow('\n No SEO-relevant subdomains to add.'));
|
|
246
|
+
return;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
console.log(chalk.bold(`\n Adding ${relevant.length} subdomains to ${opts.addTo} config as owned domains:\n`));
|
|
250
|
+
|
|
251
|
+
try {
|
|
252
|
+
const configPath = join(__dirname, 'config', opts.addTo + '.json');
|
|
253
|
+
if (!existsSync(configPath)) {
|
|
254
|
+
console.log(chalk.red(` Config not found: ${configPath}`));
|
|
255
|
+
return;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const config = JSON.parse(readFileSync(configPath, 'utf8'));
|
|
259
|
+
if (!config.owned) config.owned = [];
|
|
260
|
+
|
|
261
|
+
let added = 0;
|
|
262
|
+
for (const s of relevant) {
|
|
263
|
+
const alreadyExists = config.owned.some(o => o.domain === s.hostname)
|
|
264
|
+
|| config.target?.domain === s.hostname
|
|
265
|
+
|| config.competitors?.some(c => c.domain === s.hostname);
|
|
266
|
+
|
|
267
|
+
if (!alreadyExists) {
|
|
268
|
+
config.owned.push({
|
|
269
|
+
domain: s.hostname,
|
|
270
|
+
maxPages: 100,
|
|
271
|
+
crawlMode: 'standard',
|
|
272
|
+
});
|
|
273
|
+
console.log(chalk.green(` + ${s.hostname}`) + chalk.dim(` (${s.title || 'no title'})`));
|
|
274
|
+
added++;
|
|
275
|
+
} else {
|
|
276
|
+
console.log(chalk.dim(` ○ ${s.hostname} (already in config)`));
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (added > 0) {
|
|
281
|
+
writeFileSync(configPath, JSON.stringify(config, null, 2), 'utf8');
|
|
282
|
+
console.log(chalk.green(`\n ✓ Added ${added} subdomains. Run: seo-intel crawl ${opts.addTo}`));
|
|
283
|
+
} else {
|
|
284
|
+
console.log(chalk.dim('\n All subdomains already in config.'));
|
|
285
|
+
}
|
|
286
|
+
} catch (err) {
|
|
287
|
+
console.error(chalk.red(` Error updating config: ${err.message}`));
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
console.log('');
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
// ── CRAWL ──────────────────────────────────────────────────────────────────
|
|
295
|
+
program
|
|
296
|
+
.command('crawl <project>')
|
|
297
|
+
.description('Crawl target + competitors for a project')
|
|
298
|
+
.option('--target-only', 'Crawl target site only, skip competitors')
|
|
299
|
+
.option('--domain <domain>', 'Crawl a specific domain only')
|
|
300
|
+
.option('--max-pages <n>', 'Override max pages per domain (default from CRAWL_MAX_PAGES)', null)
|
|
301
|
+
.option('--max-depth <n>', 'Override max click depth (default from CRAWL_MAX_DEPTH)', null)
|
|
302
|
+
.option('--no-extract', 'Skip Qwen extraction (crawl only, extract later)')
|
|
303
|
+
.option('--stealth', 'Advanced browser mode for JS-heavy and dynamic sites')
|
|
304
|
+
.option('--no-tiered', 'Disable section-aware crawling (flat BFS instead)')
|
|
305
|
+
.option('--concurrency <n>', 'Domains to crawl in parallel (auto: 1 if <8GB RAM, 2 if <16GB, 3 otherwise)')
|
|
306
|
+
.option('--no-discover', 'Skip automatic subdomain discovery')
|
|
307
|
+
.action(async (project, opts) => {
|
|
308
|
+
const config = loadConfig(project);
|
|
309
|
+
const db = getDb();
|
|
310
|
+
|
|
311
|
+
// ── Auto-discover subdomains for target domain ──────────────────────
|
|
312
|
+
if (opts.discover !== false && config.target?.domain) {
|
|
313
|
+
const rootDomain = config.target.domain.replace(/^www\./, '');
|
|
314
|
+
console.log(chalk.dim(`\n 🔍 Discovering subdomains for ${rootDomain}...`));
|
|
315
|
+
|
|
316
|
+
try {
|
|
317
|
+
const { discoverSubdomains } = await import('./crawler/subdomain-discovery.js');
|
|
318
|
+
const result = await discoverSubdomains(rootDomain, { db, httpCheck: true });
|
|
319
|
+
|
|
320
|
+
const relevant = result.subdomains.filter(s => s.seoRelevant && !s.isRoot);
|
|
321
|
+
if (relevant.length > 0) {
|
|
322
|
+
// Check which ones are new (not in config)
|
|
323
|
+
if (!config.owned) config.owned = [];
|
|
324
|
+
const allConfigDomains = new Set([
|
|
325
|
+
config.target.domain,
|
|
326
|
+
...(config.owned || []).map(o => o.domain),
|
|
327
|
+
...(config.competitors || []).map(c => c.domain),
|
|
328
|
+
]);
|
|
329
|
+
|
|
330
|
+
const newSubs = relevant.filter(s => !allConfigDomains.has(s.hostname));
|
|
331
|
+
|
|
332
|
+
if (newSubs.length > 0) {
|
|
333
|
+
console.log(chalk.green(` ✓ Found ${newSubs.length} new subdomain(s):`));
|
|
334
|
+
for (const s of newSubs) {
|
|
335
|
+
const sitemapInfo = s.sitemapUrls > 0 ? chalk.cyan(` (${s.sitemapUrls} sitemap URLs)`) : '';
|
|
336
|
+
console.log(chalk.green(` + ${s.hostname}`) + chalk.dim(` — ${s.title || 'no title'}`) + sitemapInfo);
|
|
337
|
+
// Use sitemap count to suggest maxPages (at least 100, capped at 500)
|
|
338
|
+
const suggestedPages = s.sitemapUrls > 0 ? Math.min(500, Math.max(100, s.sitemapUrls)) : 100;
|
|
339
|
+
config.owned.push({
|
|
340
|
+
domain: s.hostname,
|
|
341
|
+
maxPages: suggestedPages,
|
|
342
|
+
crawlMode: 'standard',
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
// Save updated config
|
|
346
|
+
const configPath = join(__dirname, `config/${project}.json`);
|
|
347
|
+
writeFileSync(configPath, JSON.stringify(config, null, 2), 'utf8');
|
|
348
|
+
console.log(chalk.dim(` Config updated → ${newSubs.length} subdomains added as owned`));
|
|
349
|
+
} else {
|
|
350
|
+
console.log(chalk.dim(` ✓ All ${relevant.length} subdomains already in config`));
|
|
351
|
+
}
|
|
352
|
+
} else {
|
|
353
|
+
console.log(chalk.dim(' ✓ No new subdomains found'));
|
|
354
|
+
}
|
|
355
|
+
} catch (err) {
|
|
356
|
+
console.log(chalk.dim(` ⚠ Subdomain discovery skipped: ${err.message}`));
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// ── Tier gate: Free tier = crawl-only, no AI extraction ──────────────
|
|
361
|
+
if (opts.extract !== false && !isPro()) {
|
|
362
|
+
console.log(chalk.dim('\n ℹ Free tier: crawl-only mode (AI extraction requires Solo/Agency)'));
|
|
363
|
+
opts.extract = false;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// ── BUG-003/009: AI preflight — check Ollama availability before crawl ──
|
|
367
|
+
if (opts.extract !== false) {
|
|
368
|
+
const ollamaAvailable = await checkOllamaAvailability();
|
|
369
|
+
if (!ollamaAvailable) {
|
|
370
|
+
console.log(chalk.yellow('\n ⚠️ No AI extraction available (Ollama unreachable, no API keys configured)'));
|
|
371
|
+
console.log(chalk.white(' → Switching to ') + chalk.bold.green('crawl-only mode') + chalk.white(' — raw data will be collected without AI extraction'));
|
|
372
|
+
console.log(chalk.dim(' Tip: Install Ollama (ollama.com) + run `ollama pull qwen3:4b` to enable local AI extraction\n'));
|
|
373
|
+
opts.extract = false;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
const owned = config.owned || [];
|
|
378
|
+
const allSites = [config.target, ...owned, ...config.competitors];
|
|
379
|
+
|
|
380
|
+
// Add role + url to owned entries if missing
|
|
381
|
+
for (const site of allSites) {
|
|
382
|
+
if (!site.role) {
|
|
383
|
+
if (site === config.target) site.role = 'target';
|
|
384
|
+
else if (config.competitors?.includes(site)) site.role = 'competitor';
|
|
385
|
+
else site.role = 'owned';
|
|
386
|
+
}
|
|
387
|
+
if (!site.url && site.domain) site.url = `https://${site.domain}`;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
const sites = opts.domain
|
|
391
|
+
? allSites.filter(s => s.domain === opts.domain)
|
|
392
|
+
: opts.targetOnly
|
|
393
|
+
? [config.target, ...owned]
|
|
394
|
+
: allSites;
|
|
395
|
+
|
|
396
|
+
const stealthLabel = opts.stealth ? chalk.magenta(' [STEALTH]') : '';
|
|
397
|
+
const tieredLabel = opts.tiered === false ? chalk.gray(' [flat BFS]') : chalk.green(' [tiered]');
|
|
398
|
+
console.log(chalk.bold.cyan(`\n🔍 SEO Intel — Crawling ${sites.length} site(s) for project: ${project}`) + stealthLabel + tieredLabel + '\n');
|
|
399
|
+
|
|
400
|
+
const crawlStart = Date.now();
|
|
401
|
+
let totalExtracted = 0;
|
|
402
|
+
let totalFailed = 0;
|
|
403
|
+
let totalSkipped = 0;
|
|
404
|
+
let totalBlocked = 0;
|
|
405
|
+
const ramGb = totalmem() / (1024 ** 3);
|
|
406
|
+
const defaultConcurrency = ramGb < 8 ? 1 : ramGb < 16 ? 2 : 3;
|
|
407
|
+
const concurrency = Math.max(1, parseInt(opts.concurrency) || defaultConcurrency);
|
|
408
|
+
|
|
409
|
+
// ── Per-domain crawl worker ──────────────────────────────────────────
|
|
410
|
+
async function crawlSite(site) {
|
|
411
|
+
const tag = chalk.cyan(`[${site.domain.split('.')[0]}]`);
|
|
412
|
+
console.log(chalk.yellow(`\n${tag} → Crawling ${site.url} [${site.role}]`));
|
|
413
|
+
|
|
414
|
+
upsertDomain(db, { domain: site.domain, project, role: site.role });
|
|
415
|
+
const domainId = db.prepare('SELECT id FROM domains WHERE domain = ? AND project = ?').get(site.domain, project)?.id;
|
|
416
|
+
if (!domainId) { console.error(`${tag} No domainId for`, site.domain); return; }
|
|
417
|
+
|
|
418
|
+
let pageCount = 0;
|
|
419
|
+
let siteExtracted = 0;
|
|
420
|
+
let siteSkipped = 0;
|
|
421
|
+
const requestedPages = opts.maxPages ? parseInt(opts.maxPages) : undefined;
|
|
422
|
+
const crawlOpts = {
|
|
423
|
+
maxPages: requestedPages ? capPages(requestedPages) : capPages(9999),
|
|
424
|
+
maxDepth: opts.maxDepth ? parseInt(opts.maxDepth) : undefined,
|
|
425
|
+
stealth: !!opts.stealth,
|
|
426
|
+
tiered: opts.tiered !== false,
|
|
427
|
+
strictHost: !!opts.domain, // BUG-006: enforce exact hostname when --domain is set
|
|
428
|
+
};
|
|
429
|
+
|
|
430
|
+
for await (const page of crawlDomain(site.url, crawlOpts)) {
|
|
431
|
+
if (page._blocked) {
|
|
432
|
+
totalBlocked++;
|
|
433
|
+
console.log(chalk.bold.red(` ${tag} ⛔ BLOCKED: ${page._blockReason} — stopping ${site.domain}`));
|
|
434
|
+
break;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
const oldHash = (opts.extract !== false && page.contentHash)
|
|
438
|
+
? getPageHash(db, page.url)
|
|
439
|
+
: null;
|
|
440
|
+
const hadExtraction = (opts.extract !== false)
|
|
441
|
+
? !!db.prepare('SELECT 1 FROM extractions e JOIN pages p ON p.id = e.page_id WHERE p.url = ? LIMIT 1').get(page.url)
|
|
442
|
+
: false;
|
|
443
|
+
|
|
444
|
+
const pageRes = upsertPage(db, {
|
|
445
|
+
domainId,
|
|
446
|
+
url: page.url,
|
|
447
|
+
statusCode: page.status,
|
|
448
|
+
wordCount: page.wordCount,
|
|
449
|
+
loadMs: page.loadMs,
|
|
450
|
+
isIndexable: page.isIndexable,
|
|
451
|
+
clickDepth: page.depth ?? 0,
|
|
452
|
+
publishedDate: page.publishedDate || null,
|
|
453
|
+
modifiedDate: page.modifiedDate || null,
|
|
454
|
+
contentHash: page.contentHash || null,
|
|
455
|
+
});
|
|
456
|
+
const pageId = pageRes?.id;
|
|
457
|
+
|
|
458
|
+
if (opts.extract !== false && page.contentHash && hadExtraction && oldHash && oldHash === page.contentHash) {
|
|
459
|
+
totalSkipped++;
|
|
460
|
+
siteSkipped++;
|
|
461
|
+
process.stdout.write(chalk.gray(` ${tag} [${pageCount + 1}] d${page.depth ?? 0} ${page.url.slice(0, 65)} `) + chalk.blue('≡ unchanged\n'));
|
|
462
|
+
pageCount++;
|
|
463
|
+
continue;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
if (!page.quality && page.qualityReason) {
|
|
467
|
+
process.stdout.write(chalk.yellow(` ${tag} [${pageCount + 1}] d${page.depth ?? 0} ${page.url.slice(0, 65)} `) + chalk.yellow(`⚠ ${page.qualityReason} (${page.wordCount}w) — skipped\n`));
|
|
468
|
+
pageCount++;
|
|
469
|
+
continue;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
if (opts.extract !== false) {
|
|
473
|
+
process.stdout.write(chalk.gray(` ${tag} [${pageCount + 1}] d${page.depth ?? 0} ${page.url.slice(0, 65)} → extracting...`));
|
|
474
|
+
writeProgress({
|
|
475
|
+
status: 'running', command: 'crawl', project,
|
|
476
|
+
domain: site.domain, current_url: page.url,
|
|
477
|
+
page_index: totalExtracted + 1,
|
|
478
|
+
started_at: crawlStart,
|
|
479
|
+
failed: totalFailed,
|
|
480
|
+
});
|
|
481
|
+
try {
|
|
482
|
+
const extractFn = await getExtractPage();
|
|
483
|
+
const extraction = await extractFn(page);
|
|
484
|
+
insertExtraction(db, { pageId, data: extraction });
|
|
485
|
+
insertKeywords(db, pageId, extraction.keywords);
|
|
486
|
+
insertHeadings(db, pageId, page.headings);
|
|
487
|
+
insertLinks(db, pageId, page.links);
|
|
488
|
+
if (page.parsedSchemas?.length) insertPageSchemas(db, pageId, page.parsedSchemas);
|
|
489
|
+
process.stdout.write(chalk.green(` ✓${page.parsedSchemas?.length ? ` [${page.parsedSchemas.length} schema]` : ''}\n`));
|
|
490
|
+
totalExtracted++;
|
|
491
|
+
siteExtracted++;
|
|
492
|
+
} catch (err) {
|
|
493
|
+
process.stdout.write(chalk.red(` ✗ ${err.message}\n`));
|
|
494
|
+
totalFailed++;
|
|
495
|
+
}
|
|
496
|
+
} else {
|
|
497
|
+
insertHeadings(db, pageId, page.headings);
|
|
498
|
+
insertLinks(db, pageId, page.links);
|
|
499
|
+
if (page.parsedSchemas?.length) insertPageSchemas(db, pageId, page.parsedSchemas);
|
|
500
|
+
process.stdout.write(chalk.gray(` ${tag} [${pageCount + 1}] d${page.depth ?? 0} ${page.url.slice(0, 65)} ✓${page.parsedSchemas?.length ? ` [${page.parsedSchemas.length} schema]` : ''}\n`));
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
pageCount++;
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
const parts = [`${pageCount} pages`];
|
|
507
|
+
if (siteExtracted > 0) parts.push(chalk.green(`${siteExtracted} extracted`));
|
|
508
|
+
if (siteSkipped > 0) parts.push(chalk.blue(`${siteSkipped} unchanged`));
|
|
509
|
+
console.log(chalk.green(` ${tag} ✅ Done: ${parts.join(' · ')}`));
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// ── Concurrency-limited parallel executor ────────────────────────────
|
|
513
|
+
if (concurrency > 1 && sites.length > 1) {
|
|
514
|
+
console.log(chalk.magenta(`⚡ Parallel mode: ${concurrency} domains at a time\n`));
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
const queue = [...sites];
|
|
518
|
+
const running = new Set();
|
|
519
|
+
const results = [];
|
|
520
|
+
|
|
521
|
+
async function runNext() {
|
|
522
|
+
if (queue.length === 0) return;
|
|
523
|
+
const site = queue.shift();
|
|
524
|
+
const promise = crawlSite(site).catch(err => {
|
|
525
|
+
console.error(chalk.red(`\n✗ ${site.domain} failed: ${err.message}`));
|
|
526
|
+
});
|
|
527
|
+
running.add(promise);
|
|
528
|
+
promise.finally(() => running.delete(promise));
|
|
529
|
+
results.push(promise);
|
|
530
|
+
if (running.size >= concurrency) {
|
|
531
|
+
await Promise.race(running);
|
|
532
|
+
}
|
|
533
|
+
await runNext();
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
await runNext();
|
|
537
|
+
await Promise.all(results);
|
|
538
|
+
|
|
539
|
+
writeProgress({ status: 'completed', command: 'crawl', project, extracted: totalExtracted, failed: totalFailed, skipped: totalSkipped, started_at: crawlStart, finished_at: Date.now() });
|
|
540
|
+
if (totalSkipped > 0) console.log(chalk.blue(`\n📊 Incremental: ${totalSkipped} unchanged pages skipped (same content hash)`));
|
|
541
|
+
if (totalBlocked > 0) console.log(chalk.red(`\n⛔ ${totalBlocked} domain(s) blocked (rate-limited or WAF)`));
|
|
542
|
+
const elapsed = ((Date.now() - crawlStart) / 1000).toFixed(1);
|
|
543
|
+
// Auto-regenerate dashboard so it never goes stale after a crawl
|
|
544
|
+
try {
|
|
545
|
+
const dashPath = generateHtmlDashboard(db, project, config);
|
|
546
|
+
console.log(chalk.dim(` 📊 Dashboard refreshed → ${dashPath}`));
|
|
547
|
+
} catch (dashErr) {
|
|
548
|
+
console.log(chalk.dim(` ⚠ Dashboard refresh skipped: ${dashErr.message}`));
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
if (opts.extract === false && totalExtracted === 0) {
|
|
552
|
+
console.log(chalk.bold.green(`\n✅ Crawl complete (${elapsed}s) — raw data collected.`));
|
|
553
|
+
console.log(chalk.white(' Next steps:'));
|
|
554
|
+
console.log(chalk.cyan(' → seo-intel extract ' + project) + chalk.dim(' (run AI extraction when Ollama is available)'));
|
|
555
|
+
console.log(chalk.cyan(' → seo-intel analyze ' + project) + chalk.dim(' (run full AI analysis)'));
|
|
556
|
+
console.log('');
|
|
557
|
+
} else {
|
|
558
|
+
console.log(chalk.bold.green(`\n✅ Crawl complete (${elapsed}s). Run \`seo-intel analyze ${project}\` next.\n`));
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
// Exit non-zero if any extraction failures or all domains blocked
|
|
562
|
+
if (totalFailed > 0 || totalBlocked === sites.length) {
|
|
563
|
+
process.exit(1);
|
|
564
|
+
}
|
|
565
|
+
});
|
|
566
|
+
|
|
567
|
+
// ── ANALYZE ────────────────────────────────────────────────────────────────
|
|
568
|
+
program
|
|
569
|
+
.command('analyze <project>')
|
|
570
|
+
.description('Run cloud analysis (Gemini) on crawled data')
|
|
571
|
+
.option('--model <model>', 'Model to use', 'gemini')
|
|
572
|
+
.action(async (project, opts) => {
|
|
573
|
+
if (!requirePro('analyze')) return;
|
|
574
|
+
const config = loadConfig(project);
|
|
575
|
+
const db = getDb();
|
|
576
|
+
|
|
577
|
+
console.log(chalk.bold.cyan(`\n🧠 Analyzing ${project} data...\n`));
|
|
578
|
+
|
|
579
|
+
const summary = getCompetitorSummary(db, project);
|
|
580
|
+
const keywordMatrix = getKeywordMatrix(db, project);
|
|
581
|
+
const headings = getHeadingStructure(db, project);
|
|
582
|
+
|
|
583
|
+
if (!summary.length) {
|
|
584
|
+
console.error(chalk.red('No crawl data found. Run `crawl` first.'));
|
|
585
|
+
process.exit(1);
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
const target = summary.find(s => s.role === 'target');
|
|
589
|
+
const competitors = summary.filter(s => s.role === 'competitor');
|
|
590
|
+
|
|
591
|
+
if (!target) {
|
|
592
|
+
console.error(chalk.red('No target site data found.'));
|
|
593
|
+
process.exit(1);
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
// Augment with domain for formatting
|
|
597
|
+
target.domain = config.target.domain;
|
|
598
|
+
competitors.forEach((c, i) => c.domain = config.competitors[i]?.domain || c.domain);
|
|
599
|
+
|
|
600
|
+
const buildPromptFn = await getBuildAnalysisPrompt();
|
|
601
|
+
const prompt = buildPromptFn({
|
|
602
|
+
project,
|
|
603
|
+
target,
|
|
604
|
+
competitors,
|
|
605
|
+
keywordMatrix,
|
|
606
|
+
headingStructure: headings,
|
|
607
|
+
context: config.context,
|
|
608
|
+
});
|
|
609
|
+
|
|
610
|
+
console.log(chalk.yellow(`Prompt length: ~${Math.round(prompt.length / 4)} tokens`));
|
|
611
|
+
console.log(chalk.yellow('Sending to Gemini...\n'));
|
|
612
|
+
|
|
613
|
+
// Save prompt for debugging
|
|
614
|
+
const promptPath = join(__dirname, `reports/${project}-prompt-${Date.now()}.txt`);
|
|
615
|
+
writeFileSync(promptPath, prompt, 'utf8');
|
|
616
|
+
console.log(chalk.gray(`Prompt saved: ${promptPath}`));
|
|
617
|
+
|
|
618
|
+
// Call Gemini via gemini CLI (reuse existing auth)
|
|
619
|
+
const result = await callGemini(prompt);
|
|
620
|
+
|
|
621
|
+
if (!result) {
|
|
622
|
+
console.error(chalk.red('No response from model.'));
|
|
623
|
+
process.exit(1);
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// Parse JSON from response
|
|
627
|
+
let analysis;
|
|
628
|
+
try {
|
|
629
|
+
const jsonMatch = result.match(/\{[\s\S]*\}/);
|
|
630
|
+
analysis = JSON.parse(jsonMatch[0]);
|
|
631
|
+
} catch {
|
|
632
|
+
console.error(chalk.red('Could not parse JSON from response. Saving raw output.'));
|
|
633
|
+
const rawPath = join(__dirname, `reports/${project}-raw-${Date.now()}.txt`);
|
|
634
|
+
writeFileSync(rawPath, result, 'utf8');
|
|
635
|
+
process.exit(1);
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
// Save structured analysis to file
|
|
639
|
+
const outPath = join(__dirname, `reports/${project}-analysis-${Date.now()}.json`);
|
|
640
|
+
writeFileSync(outPath, JSON.stringify(analysis, null, 2), 'utf8');
|
|
641
|
+
|
|
642
|
+
// Save to DB (so HTML dashboard picks it up)
|
|
643
|
+
db.prepare(`
|
|
644
|
+
INSERT INTO analyses (project, generated_at, model, keyword_gaps, long_tails, quick_wins, new_pages, content_gaps, positioning, technical_gaps, raw)
|
|
645
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
646
|
+
`).run(
|
|
647
|
+
project, Date.now(), 'gemini',
|
|
648
|
+
JSON.stringify(analysis.keyword_gaps || []),
|
|
649
|
+
JSON.stringify(analysis.long_tails || []),
|
|
650
|
+
JSON.stringify(analysis.quick_wins || []),
|
|
651
|
+
JSON.stringify(analysis.new_pages || []),
|
|
652
|
+
JSON.stringify(analysis.content_gaps || []),
|
|
653
|
+
JSON.stringify(analysis.positioning || {}),
|
|
654
|
+
JSON.stringify(analysis.technical_gaps || []),
|
|
655
|
+
result,
|
|
656
|
+
);
|
|
657
|
+
|
|
658
|
+
// Print summary
|
|
659
|
+
printAnalysisSummary(analysis, project);
|
|
660
|
+
|
|
661
|
+
// Auto-regenerate dashboard so it reflects the new analysis immediately
|
|
662
|
+
try {
|
|
663
|
+
const dashPath = generateHtmlDashboard(db, project, config);
|
|
664
|
+
console.log(chalk.dim(` 📊 Dashboard refreshed → ${dashPath}`));
|
|
665
|
+
} catch (dashErr) {
|
|
666
|
+
console.log(chalk.dim(` ⚠ Dashboard refresh skipped: ${dashErr.message}`));
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
console.log(chalk.bold.green(`\n✅ Analysis saved: ${outPath}\n`));
|
|
670
|
+
});
|
|
671
|
+
|
|
672
|
+
// ── KEYWORDS ───────────────────────────────────────────────────────────────
|
|
673
|
+
program
|
|
674
|
+
.command('keywords <project>')
|
|
675
|
+
.description('Generate a keyword cluster matrix (traditional + perplexity + agent) via Gemini')
|
|
676
|
+
.option('--count <n>', 'Number of keyword phrases to generate', '120')
|
|
677
|
+
.option('--intent <type>', 'Filter by intent: commercial|informational|all', 'all')
|
|
678
|
+
.option('--save', 'Save output to reports/<project>-keywords-<timestamp>.json')
|
|
679
|
+
.action(async (project, opts) => {
|
|
680
|
+
if (!requirePro('keywords')) return;
|
|
681
|
+
const config = loadConfig(project);
|
|
682
|
+
const db = getDb();
|
|
683
|
+
const count = parseInt(opts.count) || 120;
|
|
684
|
+
const intentFilter = opts.intent || 'all';
|
|
685
|
+
|
|
686
|
+
console.log(chalk.bold.cyan(`\n🔑 Keyword Matrix — ${project.toUpperCase()}\n`));
|
|
687
|
+
console.log(chalk.gray(`Generating ${count} phrases (intent: ${intentFilter})...\n`));
|
|
688
|
+
|
|
689
|
+
const keywordMatrix = getKeywordMatrix(db, project);
|
|
690
|
+
const summary = getCompetitorSummary(db, project);
|
|
691
|
+
|
|
692
|
+
if (!summary.length) {
|
|
693
|
+
console.error(chalk.red('No crawl data found. Run `crawl` first.'));
|
|
694
|
+
process.exit(1);
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
const target = summary.find(s => s.role === 'target');
|
|
698
|
+
const competitors = summary.filter(s => s.role === 'competitor');
|
|
699
|
+
|
|
700
|
+
if (!target) {
|
|
701
|
+
console.error(chalk.red('No target site data found.'));
|
|
702
|
+
process.exit(1);
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
target.domain = config.target.domain;
|
|
706
|
+
competitors.forEach((c, i) => { c.domain = config.competitors[i]?.domain || c.domain; });
|
|
707
|
+
|
|
708
|
+
// Top competitor keywords for context (count unique competitor domains mentioning each keyword)
|
|
709
|
+
const competitorCountByKeyword = new Map();
|
|
710
|
+
for (const row of keywordMatrix) {
|
|
711
|
+
if (row.role !== 'competitor') continue;
|
|
712
|
+
const key = String(row.keyword || '').toLowerCase().trim();
|
|
713
|
+
if (!key) continue;
|
|
714
|
+
if (!competitorCountByKeyword.has(key)) competitorCountByKeyword.set(key, new Set());
|
|
715
|
+
competitorCountByKeyword.get(key).add(row.domain);
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
const topKeywords = [...competitorCountByKeyword.entries()]
|
|
719
|
+
.map(([keyword, domains]) => ({ keyword, competitor_count: domains.size }))
|
|
720
|
+
.sort((a, b) => b.competitor_count - a.competitor_count)
|
|
721
|
+
.slice(0, 60)
|
|
722
|
+
.map(k => `${k.keyword} (${k.competitor_count} competitors)`)
|
|
723
|
+
.join('\n');
|
|
724
|
+
|
|
725
|
+
const competitorDomains = competitors.map(c => c.domain).join(', ');
|
|
726
|
+
|
|
727
|
+
const intentInstruction = intentFilter === 'all'
|
|
728
|
+
? 'Include a mix of informational, commercial, transactional, and navigational intents.'
|
|
729
|
+
: `Focus primarily on ${intentFilter} intent keywords.`;
|
|
730
|
+
|
|
731
|
+
const industry = config.context || `the industry of ${target.domain}`;
|
|
732
|
+
const prompt = `You are an expert SEO strategist. Analyze the competitive landscape and generate keyword opportunities.
|
|
733
|
+
|
|
734
|
+
Project: ${project.toUpperCase()}
|
|
735
|
+
Target site: ${target.domain}
|
|
736
|
+
Competitors: ${competitorDomains}
|
|
737
|
+
Industry context: ${industry}
|
|
738
|
+
|
|
739
|
+
Competitor keyword signals (crawled data):
|
|
740
|
+
${topKeywords || '(no crawl data yet — use your knowledge of the space)'}
|
|
741
|
+
|
|
742
|
+
Generate exactly ${count} keyword phrases organized into clusters. ${intentInstruction}
|
|
743
|
+
|
|
744
|
+
Three keyword types to generate:
|
|
745
|
+
1. **traditional** — how humans search Google (3-5 words, keyword-style)
|
|
746
|
+
2. **perplexity** — how users ask Perplexity/ChatGPT (more complete, question-style)
|
|
747
|
+
3. **agent** — how an AI agent researches on behalf of a user (technical, complete, spec-like queries that include requirements and constraints). Agent queries are a new SEO vector — LLMs cite structured, factual content, so optimizing for agent queries means getting cited by AI assistants.
|
|
748
|
+
|
|
749
|
+
Distribute the ${count} phrases roughly as: 40% traditional, 35% perplexity, 25% agent.
|
|
750
|
+
|
|
751
|
+
Respond ONLY with a single valid JSON object matching this exact schema. No explanation, no markdown, no backticks:
|
|
752
|
+
|
|
753
|
+
{
|
|
754
|
+
"keyword_clusters": [
|
|
755
|
+
{
|
|
756
|
+
"topic": "cluster topic name",
|
|
757
|
+
"funnel_stage": "awareness|consideration|decision",
|
|
758
|
+
"competition": "low|medium|high",
|
|
759
|
+
"keywords": [
|
|
760
|
+
{
|
|
761
|
+
"phrase": "3-6 word keyword phrase or full question",
|
|
762
|
+
"type": "traditional|perplexity|agent",
|
|
763
|
+
"intent": "informational|commercial|navigational|transactional",
|
|
764
|
+
"priority": "high|medium|low",
|
|
765
|
+
"notes": "why this is a good target for ${target.domain}"
|
|
766
|
+
}
|
|
767
|
+
]
|
|
768
|
+
}
|
|
769
|
+
],
|
|
770
|
+
"quick_targets": ["phrase1", "phrase2", "phrase3", "phrase4", "phrase5"],
|
|
771
|
+
"agent_queries": [
|
|
772
|
+
"full question an AI agent would ask to find this product"
|
|
773
|
+
],
|
|
774
|
+
"summary": "2-3 sentence executive summary of the keyword opportunity for ${target.domain}"
|
|
775
|
+
}`;
|
|
776
|
+
|
|
777
|
+
console.log(chalk.yellow(`Prompt length: ~${Math.round(prompt.length / 4)} tokens`));
|
|
778
|
+
console.log(chalk.yellow('Sending to Gemini...\n'));
|
|
779
|
+
|
|
780
|
+
const result = await callGemini(prompt);
|
|
781
|
+
|
|
782
|
+
if (!result) {
|
|
783
|
+
console.error(chalk.red('No response from Gemini.'));
|
|
784
|
+
process.exit(1);
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
let data;
|
|
788
|
+
try {
|
|
789
|
+
const jsonMatch = result.match(/\{[\s\S]*\}/);
|
|
790
|
+
data = JSON.parse(jsonMatch[0]);
|
|
791
|
+
} catch {
|
|
792
|
+
console.error(chalk.red('Could not parse JSON from Gemini response.'));
|
|
793
|
+
const rawPath = join(__dirname, `reports/${project}-keywords-raw-${Date.now()}.txt`);
|
|
794
|
+
writeFileSync(rawPath, result, 'utf8');
|
|
795
|
+
console.error(chalk.gray(`Raw output saved: ${rawPath}`));
|
|
796
|
+
process.exit(1);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// Apply intent filter if needed
|
|
800
|
+
if (intentFilter !== 'all') {
|
|
801
|
+
for (const cluster of (data.keyword_clusters || [])) {
|
|
802
|
+
cluster.keywords = (cluster.keywords || []).filter(k => k.intent === intentFilter);
|
|
803
|
+
}
|
|
804
|
+
data.keyword_clusters = data.keyword_clusters.filter(c => c.keywords.length > 0);
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
// Count totals
|
|
808
|
+
const allKeywords = (data.keyword_clusters || []).flatMap(c => c.keywords || []);
|
|
809
|
+
const byType = { traditional: 0, perplexity: 0, agent: 0 };
|
|
810
|
+
const byStage = { awareness: 0, consideration: 0, decision: 0 };
|
|
811
|
+
for (const kw of allKeywords) {
|
|
812
|
+
if (byType[kw.type] !== undefined) byType[kw.type]++;
|
|
813
|
+
}
|
|
814
|
+
for (const cluster of (data.keyword_clusters || [])) {
|
|
815
|
+
const stage = cluster.funnel_stage;
|
|
816
|
+
if (byStage[stage] !== undefined) byStage[stage] += (cluster.keywords || []).length;
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
// Print terminal output
|
|
820
|
+
console.log(chalk.bold.cyan(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`));
|
|
821
|
+
console.log(chalk.bold.cyan(` 📊 Keyword Matrix Results — ${project.toUpperCase()}`));
|
|
822
|
+
console.log(chalk.bold.cyan(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n`));
|
|
823
|
+
|
|
824
|
+
console.log(chalk.bold(`Total phrases: ${allKeywords.length}`));
|
|
825
|
+
console.log(chalk.gray(` Traditional: ${byType.traditional} · Perplexity: ${byType.perplexity} · Agent: ${byType.agent}\n`));
|
|
826
|
+
|
|
827
|
+
console.log(chalk.bold('By funnel stage:'));
|
|
828
|
+
console.log(` ${chalk.blue('Awareness:')} ${byStage.awareness}`);
|
|
829
|
+
console.log(` ${chalk.yellow('Consideration:')} ${byStage.consideration}`);
|
|
830
|
+
console.log(` ${chalk.green('Decision:')} ${byStage.decision}\n`);
|
|
831
|
+
|
|
832
|
+
if (data.quick_targets?.length) {
|
|
833
|
+
console.log(chalk.bold('⚡ Top Quick Targets:'));
|
|
834
|
+
data.quick_targets.slice(0, 5).forEach((phrase, i) => {
|
|
835
|
+
console.log(` ${chalk.bold.green(`${i + 1}.`)} ${phrase}`);
|
|
836
|
+
});
|
|
837
|
+
console.log();
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
if (data.agent_queries?.length) {
|
|
841
|
+
console.log(chalk.bold.magenta('🤖 Top Agent Queries (AI citation gold):'));
|
|
842
|
+
data.agent_queries.slice(0, 3).forEach((q, i) => {
|
|
843
|
+
console.log(` ${chalk.bold.magenta(`${i + 1}.`)} ${q}`);
|
|
844
|
+
});
|
|
845
|
+
console.log();
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
if (data.summary) {
|
|
849
|
+
console.log(chalk.bold('📝 Summary:'));
|
|
850
|
+
console.log(chalk.gray(` ${data.summary}\n`));
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
if (opts.save) {
|
|
854
|
+
const outPath = join(__dirname, `reports/${project}-keywords-${Date.now()}.json`);
|
|
855
|
+
writeFileSync(outPath, JSON.stringify(data, null, 2), 'utf8');
|
|
856
|
+
console.log(chalk.bold.green(`✅ Report saved: ${outPath}\n`));
|
|
857
|
+
}
|
|
858
|
+
});
|
|
859
|
+
|
|
860
|
+
// ── REPORT ─────────────────────────────────────────────────────────────────
|
|
861
|
+
program
|
|
862
|
+
.command('report <project>')
|
|
863
|
+
.description('Print latest analysis as readable markdown')
|
|
864
|
+
.action((project) => {
|
|
865
|
+
const files = readdirSync(join(__dirname, 'reports'))
|
|
866
|
+
.filter(f => f.startsWith(`${project}-analysis-`))
|
|
867
|
+
.sort().reverse();
|
|
868
|
+
|
|
869
|
+
if (!files.length) {
|
|
870
|
+
console.error(chalk.red('No analysis found. Run `analyze` first.'));
|
|
871
|
+
process.exit(1);
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
const latest = JSON.parse(readFileSync(join(__dirname, 'reports', files[0]), 'utf8'));
|
|
875
|
+
printAnalysisSummary(latest, project);
|
|
876
|
+
});
|
|
877
|
+
|
|
878
|
+
// ── Helpers ────────────────────────────────────────────────────────────────
|
|
879
|
+
|
|
880
|
+
function loadConfig(project) {
|
|
881
|
+
const configDir = join(__dirname, 'config');
|
|
882
|
+
const path = join(configDir, `${project}.json`);
|
|
883
|
+
|
|
884
|
+
try {
|
|
885
|
+
return JSON.parse(readFileSync(path, 'utf8'));
|
|
886
|
+
} catch {
|
|
887
|
+
// BUG-001: If input looks like a domain, try to match against existing project configs
|
|
888
|
+
if (project.includes('.')) {
|
|
889
|
+
const inputDomain = project.replace(/^https?:\/\//, '').replace(/\/.*$/, '');
|
|
890
|
+
try {
|
|
891
|
+
const configs = readdirSync(configDir).filter(f => f.endsWith('.json'));
|
|
892
|
+
for (const file of configs) {
|
|
893
|
+
try {
|
|
894
|
+
const cfg = JSON.parse(readFileSync(join(configDir, file), 'utf8'));
|
|
895
|
+
const allDomains = [
|
|
896
|
+
cfg.target?.domain,
|
|
897
|
+
...(cfg.owned || []).map(o => o.domain),
|
|
898
|
+
...(cfg.competitors || []).map(c => c.domain),
|
|
899
|
+
].filter(Boolean);
|
|
900
|
+
|
|
901
|
+
if (allDomains.some(d => d === inputDomain || d === `www.${inputDomain}` || inputDomain === `www.${d}`)) {
|
|
902
|
+
const projectName = file.replace('.json', '');
|
|
903
|
+
console.error(chalk.yellow(`\n⚠️ "${project}" looks like a domain. Did you mean the project name?`));
|
|
904
|
+
console.error(chalk.bold.cyan(` → seo-intel crawl ${projectName}\n`));
|
|
905
|
+
process.exit(1);
|
|
906
|
+
}
|
|
907
|
+
} catch { /* skip malformed configs */ }
|
|
908
|
+
}
|
|
909
|
+
} catch { /* config dir unreadable */ }
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
// List available projects for guidance
|
|
913
|
+
try {
|
|
914
|
+
const configs = readdirSync(configDir).filter(f => f.endsWith('.json') && f !== 'example.json');
|
|
915
|
+
if (configs.length > 0) {
|
|
916
|
+
console.error(chalk.red(`\n✗ Project "${project}" not found.\n`));
|
|
917
|
+
console.error(chalk.white(' Available projects:'));
|
|
918
|
+
for (const f of configs) {
|
|
919
|
+
console.error(chalk.cyan(` → seo-intel crawl ${f.replace('.json', '')}`));
|
|
920
|
+
}
|
|
921
|
+
console.error(chalk.dim(`\n Or create a new project: seo-intel setup\n`));
|
|
922
|
+
} else {
|
|
923
|
+
console.error(chalk.red(`\n✗ No projects configured yet.\n`));
|
|
924
|
+
console.error(chalk.white(` Get started: `) + chalk.bold.cyan(`seo-intel setup\n`));
|
|
925
|
+
}
|
|
926
|
+
} catch {
|
|
927
|
+
console.error(chalk.red(`\n✗ Config not found: ${path}`));
|
|
928
|
+
console.error(chalk.dim(` Run: seo-intel setup\n`));
|
|
929
|
+
}
|
|
930
|
+
process.exit(1);
|
|
931
|
+
}
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
async function callGemini(prompt) {
|
|
935
|
+
// Use gemini CLI (already auth'd via OpenClaw)
|
|
936
|
+
const { execSync } = await import('child_process');
|
|
937
|
+
try {
|
|
938
|
+
const result = execSync(
|
|
939
|
+
`echo ${JSON.stringify(prompt)} | gemini -p -`,
|
|
940
|
+
{ maxBuffer: 10 * 1024 * 1024, timeout: 120000 }
|
|
941
|
+
).toString();
|
|
942
|
+
return result;
|
|
943
|
+
} catch (err) {
|
|
944
|
+
console.error('[gemini]', err.message);
|
|
945
|
+
return null;
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
function printAnalysisSummary(a, project) {
|
|
950
|
+
console.log(chalk.bold.cyan(`\n📊 SEO Analysis — ${project.toUpperCase()}\n`));
|
|
951
|
+
|
|
952
|
+
if (a.positioning) {
|
|
953
|
+
console.log(chalk.bold('🎯 Positioning'));
|
|
954
|
+
console.log(` Open angle: ${a.positioning.open_angle}`);
|
|
955
|
+
console.log(` Your differentiator: ${a.positioning.target_differentiator}\n`);
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
if (a.keyword_gaps?.length) {
|
|
959
|
+
console.log(chalk.bold(`🔑 Top Keyword Gaps (${a.keyword_gaps.length} total)`));
|
|
960
|
+
a.keyword_gaps.filter(k => k.priority === 'high').slice(0, 10).forEach(k => {
|
|
961
|
+
console.log(` ${chalk.green('+')} [${k.difficulty}] ${k.keyword} (${k.intent})`);
|
|
962
|
+
});
|
|
963
|
+
console.log();
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
if (a.long_tails?.length) {
|
|
967
|
+
console.log(chalk.bold(`🔭 Long-tail Opportunities (${a.long_tails.length} total)`));
|
|
968
|
+
a.long_tails.filter(l => l.priority === 'high').slice(0, 10).forEach(l => {
|
|
969
|
+
console.log(` ${chalk.blue('→')} "${l.phrase}" [${l.page_type}]`);
|
|
970
|
+
});
|
|
971
|
+
console.log();
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
if (a.quick_wins?.length) {
|
|
975
|
+
console.log(chalk.bold(`⚡ Quick Wins (${a.quick_wins.length} total)`));
|
|
976
|
+
a.quick_wins.filter(w => w.impact === 'high').slice(0, 5).forEach(w => {
|
|
977
|
+
console.log(` ${chalk.yellow('!')} ${w.page} → ${w.fix}`);
|
|
978
|
+
});
|
|
979
|
+
console.log();
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
if (a.new_pages?.length) {
|
|
983
|
+
console.log(chalk.bold(`📄 New Pages to Create (${a.new_pages.length} total)`));
|
|
984
|
+
a.new_pages.filter(p => p.priority === 'high').slice(0, 5).forEach(p => {
|
|
985
|
+
console.log(` ${chalk.magenta('*')} /${p.slug} — "${p.title}"`);
|
|
986
|
+
});
|
|
987
|
+
console.log();
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
// ── RUN (cron-friendly) ────────────────────────────────────────────────────
|
|
992
|
+
program
|
|
993
|
+
.command('run')
|
|
994
|
+
.description('Smart cron run: crawl next stale domain, analyze if needed, exit when done')
|
|
995
|
+
.action(async () => {
|
|
996
|
+
if (!requirePro('run')) return;
|
|
997
|
+
const db = getDb();
|
|
998
|
+
const next = getNextCrawlTarget(db);
|
|
999
|
+
|
|
1000
|
+
if (!next) {
|
|
1001
|
+
console.log(chalk.green('✅ All domains fresh. Nothing to crawl.'));
|
|
1002
|
+
console.log('DONE');
|
|
1003
|
+
process.exit(0);
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
console.log(chalk.bold.cyan(`\n🔍 Cron run: crawling ${next.domain} [${next.role}] (project: ${next.project})\n`));
|
|
1007
|
+
|
|
1008
|
+
const runStart = Date.now();
|
|
1009
|
+
|
|
1010
|
+
// Upsert domain
|
|
1011
|
+
upsertDomain(db, { domain: next.domain, project: next.project, role: next.role });
|
|
1012
|
+
const domainRow = db.prepare('SELECT id FROM domains WHERE domain = ? AND project = ?')
|
|
1013
|
+
.get(next.domain, next.project);
|
|
1014
|
+
const domainId = domainRow.id;
|
|
1015
|
+
|
|
1016
|
+
let pageCount = 0;
|
|
1017
|
+
let skipped = 0;
|
|
1018
|
+
let blocked = false;
|
|
1019
|
+
for await (const page of crawlDomain(next.url)) {
|
|
1020
|
+
// ── Handle blocked pages from backoff system ──
|
|
1021
|
+
if (page._blocked) {
|
|
1022
|
+
blocked = true;
|
|
1023
|
+
console.log(chalk.bold.red(` ⛔ BLOCKED: ${page._blockReason} — stopping ${next.domain}`));
|
|
1024
|
+
break;
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
const pageRes = upsertPage(db, {
|
|
1028
|
+
domainId,
|
|
1029
|
+
url: page.url,
|
|
1030
|
+
statusCode: page.status,
|
|
1031
|
+
wordCount: page.wordCount,
|
|
1032
|
+
loadMs: page.loadMs,
|
|
1033
|
+
isIndexable: page.isIndexable,
|
|
1034
|
+
clickDepth: page.depth ?? 0,
|
|
1035
|
+
publishedDate: page.publishedDate || null,
|
|
1036
|
+
modifiedDate: page.modifiedDate || null,
|
|
1037
|
+
contentHash: page.contentHash || null,
|
|
1038
|
+
});
|
|
1039
|
+
const pageId = pageRes?.id;
|
|
1040
|
+
|
|
1041
|
+
if (!pageId) continue;
|
|
1042
|
+
|
|
1043
|
+
// ── Incremental: skip extraction if content unchanged ──
|
|
1044
|
+
if (page.contentHash) {
|
|
1045
|
+
const oldHash = getPageHash(db, page.url);
|
|
1046
|
+
if (oldHash && oldHash === page.contentHash) {
|
|
1047
|
+
skipped++;
|
|
1048
|
+
process.stdout.write(chalk.gray(` [${pageCount + 1}] d${page.depth ?? 0} ${page.url.slice(0, 65)} `) + chalk.blue('≡ unchanged\n'));
|
|
1049
|
+
pageCount++;
|
|
1050
|
+
continue;
|
|
1051
|
+
}
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
process.stdout.write(chalk.gray(` [${pageCount + 1}] d${page.depth ?? 0} ${page.url.slice(0, 65)} → extracting...`));
|
|
1055
|
+
writeProgress({
|
|
1056
|
+
status: 'running', command: 'run', project: next.project,
|
|
1057
|
+
domain: next.domain, current_url: page.url,
|
|
1058
|
+
page_index: pageCount + 1,
|
|
1059
|
+
started_at: runStart,
|
|
1060
|
+
});
|
|
1061
|
+
try {
|
|
1062
|
+
const extractFn = await getExtractPage();
|
|
1063
|
+
const extraction = await extractFn(page);
|
|
1064
|
+
insertExtraction(db, { pageId, data: extraction });
|
|
1065
|
+
insertKeywords(db, pageId, extraction.keywords);
|
|
1066
|
+
insertHeadings(db, pageId, page.headings);
|
|
1067
|
+
insertLinks(db, pageId, page.links);
|
|
1068
|
+
if (page.parsedSchemas?.length) insertPageSchemas(db, pageId, page.parsedSchemas);
|
|
1069
|
+
process.stdout.write(chalk.green(` ✓${page.parsedSchemas?.length ? ` [${page.parsedSchemas.length} schema]` : ''}\n`));
|
|
1070
|
+
} catch (err) {
|
|
1071
|
+
process.stdout.write(chalk.red(` ✗ ${err.message}\n`));
|
|
1072
|
+
}
|
|
1073
|
+
pageCount++;
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
writeProgress({ status: 'completed', command: 'run', project: next.project, domain: next.domain, extracted: pageCount, skipped, started_at: runStart, finished_at: Date.now() });
|
|
1077
|
+
const parts = [`${pageCount} pages from ${next.domain}`];
|
|
1078
|
+
if (skipped > 0) parts.push(chalk.blue(`${skipped} unchanged`));
|
|
1079
|
+
if (blocked) parts.push(chalk.red(`blocked`));
|
|
1080
|
+
console.log(chalk.green(`\n✅ Crawled ${parts.join(' · ')}`));
|
|
1081
|
+
if (skipped > 0) console.log(chalk.blue(` 📊 Incremental: ${skipped} pages skipped (same content hash)`));
|
|
1082
|
+
|
|
1083
|
+
// Check if analysis needed for this project
|
|
1084
|
+
if (needsAnalysis(db, next.project)) {
|
|
1085
|
+
console.log(chalk.yellow(`\n🧠 New crawl data detected — running analysis for ${next.project}...`));
|
|
1086
|
+
await runAnalysis(next.project, db);
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
// Check if more stale domains remain
|
|
1090
|
+
const remaining = getNextCrawlTarget(db);
|
|
1091
|
+
if (remaining) {
|
|
1092
|
+
console.log(chalk.yellow(`\n⏳ More stale domains: ${remaining.domain} (${remaining.project}). Next cron run will handle it.`));
|
|
1093
|
+
} else {
|
|
1094
|
+
console.log(chalk.bold.green('\n🎉 All domains are now fresh!'));
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
process.exit(0);
|
|
1098
|
+
});
|
|
1099
|
+
|
|
1100
|
+
// ── STATUS ─────────────────────────────────────────────────────────────────
|
|
1101
|
+
program
|
|
1102
|
+
.command('status')
|
|
1103
|
+
.description('Show crawl freshness + extraction coverage for all domains')
|
|
1104
|
+
.action(async () => {
|
|
1105
|
+
printLicenseStatus();
|
|
1106
|
+
const db = getDb();
|
|
1107
|
+
|
|
1108
|
+
// 1. Check live progress file (with PID liveness detection)
|
|
1109
|
+
const progress = readProgress();
|
|
1110
|
+
if (progress && progress.status === 'running') {
|
|
1111
|
+
const elapsed = Math.round((Date.now() - progress.started_at) / 1000);
|
|
1112
|
+
const mins = Math.floor(elapsed / 60);
|
|
1113
|
+
const secs = elapsed % 60;
|
|
1114
|
+
console.log(chalk.bold.yellow(`\n⚡ EXTRACTION RUNNING (pid ${progress.pid})`));
|
|
1115
|
+
console.log(chalk.gray(` Command: `) + chalk.white(progress.command));
|
|
1116
|
+
console.log(chalk.gray(` Project: `) + chalk.white(progress.project));
|
|
1117
|
+
if (progress.domain) console.log(chalk.gray(` Domain: `) + chalk.white(progress.domain));
|
|
1118
|
+
if (progress.current_url) console.log(chalk.gray(` Current: `) + chalk.white(progress.current_url.slice(0, 70)));
|
|
1119
|
+
if (progress.total) {
|
|
1120
|
+
const pct = progress.percent || Math.round((progress.page_index / progress.total) * 100);
|
|
1121
|
+
const etaSecs = pct > 0 ? Math.round(elapsed * (100 - pct) / pct) : 0;
|
|
1122
|
+
console.log(chalk.gray(` Progress: `) + chalk.cyan(`${progress.page_index}/${progress.total}`) + chalk.gray(` (${pct}%)`));
|
|
1123
|
+
if (etaSecs > 0) console.log(chalk.gray(` ETA: `) + chalk.white(`~${Math.floor(etaSecs / 60)}m ${etaSecs % 60}s`));
|
|
1124
|
+
} else {
|
|
1125
|
+
console.log(chalk.gray(` Page #: `) + chalk.cyan(progress.page_index));
|
|
1126
|
+
}
|
|
1127
|
+
console.log(chalk.gray(` Elapsed: `) + chalk.white(`${mins}m ${secs}s`));
|
|
1128
|
+
if (progress.failed > 0) console.log(chalk.gray(` Failed: `) + chalk.red(progress.failed));
|
|
1129
|
+
} else if (progress && progress.status === 'crashed') {
|
|
1130
|
+
const ago = Math.round((Date.now() - (progress.crashed_at || progress.updated_at)) / 1000);
|
|
1131
|
+
console.log(chalk.bold.red(`\n💀 EXTRACTION CRASHED (pid ${progress.pid} is dead)`));
|
|
1132
|
+
console.log(chalk.gray(` Command: `) + chalk.white(progress.command));
|
|
1133
|
+
console.log(chalk.gray(` Project: `) + chalk.white(progress.project));
|
|
1134
|
+
if (progress.domain) console.log(chalk.gray(` Domain: `) + chalk.white(progress.domain));
|
|
1135
|
+
if (progress.current_url) console.log(chalk.gray(` Last URL: `) + chalk.white(progress.current_url.slice(0, 70)));
|
|
1136
|
+
console.log(chalk.gray(` Died: `) + chalk.white(`${ago < 60 ? ago + 's' : Math.round(ago / 60) + 'm'} ago`));
|
|
1137
|
+
console.log(chalk.yellow(` → Re-run: node cli.js extract ${progress.project}`));
|
|
1138
|
+
} else if (progress && progress.status === 'completed') {
|
|
1139
|
+
const ago = Math.round((Date.now() - progress.finished_at) / 1000);
|
|
1140
|
+
const duration = Math.round((progress.finished_at - progress.started_at) / 1000);
|
|
1141
|
+
console.log(chalk.bold.green(`\n✅ Last extraction completed`));
|
|
1142
|
+
console.log(chalk.gray(` Command: `) + chalk.white(progress.command));
|
|
1143
|
+
console.log(chalk.gray(` Project: `) + chalk.white(progress.project));
|
|
1144
|
+
console.log(chalk.gray(` Extracted: `) + chalk.cyan(progress.extracted || 0));
|
|
1145
|
+
if (progress.failed > 0) console.log(chalk.gray(` Failed: `) + chalk.red(progress.failed));
|
|
1146
|
+
console.log(chalk.gray(` Duration: `) + chalk.white(`${Math.floor(duration / 60)}m ${duration % 60}s`));
|
|
1147
|
+
console.log(chalk.gray(` Finished: `) + chalk.white(`${ago < 60 ? ago + 's' : Math.round(ago / 60) + 'm'} ago`));
|
|
1148
|
+
} else {
|
|
1149
|
+
console.log(chalk.gray('\n○ No extraction running'));
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
// 2. Crawl freshness
|
|
1153
|
+
const rows = getCrawlStatus(db);
|
|
1154
|
+
if (!rows.length) {
|
|
1155
|
+
console.log(chalk.yellow('\nNo domains configured. Check config/ directory.'));
|
|
1156
|
+
return;
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
console.log(chalk.bold.cyan('\n📊 SEO Intel — Domain Status\n'));
|
|
1160
|
+
console.log('Project Domain Role Last Crawled Age Extraction');
|
|
1161
|
+
console.log('─'.repeat(100));
|
|
1162
|
+
|
|
1163
|
+
// 3. Extraction coverage
|
|
1164
|
+
const coverage = db.prepare(`
|
|
1165
|
+
SELECT d.domain, d.project,
|
|
1166
|
+
COUNT(p.id) as total_pages,
|
|
1167
|
+
COUNT(e.id) as extracted_pages
|
|
1168
|
+
FROM domains d
|
|
1169
|
+
LEFT JOIN pages p ON p.domain_id = d.id
|
|
1170
|
+
LEFT JOIN extractions e ON e.page_id = p.id
|
|
1171
|
+
GROUP BY d.id
|
|
1172
|
+
`).all();
|
|
1173
|
+
const covMap = {};
|
|
1174
|
+
for (const c of coverage) covMap[c.domain] = c;
|
|
1175
|
+
|
|
1176
|
+
for (const r of rows) {
|
|
1177
|
+
const daysStr = r.daysAgo === '—' ? '— ' : `${r.daysAgo}d ago `;
|
|
1178
|
+
const cov = covMap[r.domain] || { total_pages: 0, extracted_pages: 0 };
|
|
1179
|
+
const pct = cov.total_pages > 0 ? Math.round((cov.extracted_pages / cov.total_pages) * 100) : 0;
|
|
1180
|
+
const bar = '█'.repeat(Math.round(pct / 5)) + '░'.repeat(20 - Math.round(pct / 5));
|
|
1181
|
+
const pctColor = pct === 100 ? chalk.green : pct > 50 ? chalk.yellow : chalk.red;
|
|
1182
|
+
console.log(
|
|
1183
|
+
`${(r.project || '').padEnd(12)} ${(r.domain || '').padEnd(30)} ${(r.role || '—').padEnd(11)} ${(r.lastCrawled || '—').padEnd(13)} ${daysStr.padEnd(7)} ${bar} ${pctColor(pct + '%')}`
|
|
1184
|
+
);
|
|
1185
|
+
}
|
|
1186
|
+
console.log();
|
|
1187
|
+
|
|
1188
|
+
// Show update notice at end of status output
|
|
1189
|
+
await printUpdateNotice();
|
|
1190
|
+
});
|
|
1191
|
+
|
|
1192
|
+
// ── UPDATE COMMAND ────────────────────────────────────────────────────────
|
|
1193
|
+
program
|
|
1194
|
+
.command('update')
|
|
1195
|
+
.description('Check for updates and show upgrade instructions')
|
|
1196
|
+
.option('--apply', 'Auto-apply the update via npm')
|
|
1197
|
+
.action(async (opts) => {
|
|
1198
|
+
console.log(chalk.dim('\n Checking for updates...\n'));
|
|
1199
|
+
|
|
1200
|
+
const info = await forceUpdateCheck();
|
|
1201
|
+
|
|
1202
|
+
console.log(chalk.bold.cyan(' SEO Intel — Update Check\n'));
|
|
1203
|
+
console.log(chalk.gray(' Current version: ') + chalk.white(info.current));
|
|
1204
|
+
|
|
1205
|
+
if (info.npmVersion) {
|
|
1206
|
+
console.log(chalk.gray(' npm registry: ') + chalk.white(info.npmVersion));
|
|
1207
|
+
}
|
|
1208
|
+
if (info.froggoVersion) {
|
|
1209
|
+
console.log(chalk.gray(' froggo.pro: ') + chalk.white(info.froggoVersion));
|
|
1210
|
+
}
|
|
1211
|
+
|
|
1212
|
+
if (!info.hasUpdate) {
|
|
1213
|
+
console.log(chalk.green('\n ✓ You\'re on the latest version.\n'));
|
|
1214
|
+
return;
|
|
1215
|
+
}
|
|
1216
|
+
|
|
1217
|
+
console.log(chalk.yellow(`\n ⬆ Update available: ${info.current} → ${info.latest}`));
|
|
1218
|
+
|
|
1219
|
+
if (info.changelog) {
|
|
1220
|
+
console.log(chalk.gray('\n What\'s new:'));
|
|
1221
|
+
for (const line of info.changelog.split('\n').slice(0, 5)) {
|
|
1222
|
+
console.log(chalk.gray(' ') + chalk.white(line));
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
if (opts.apply) {
|
|
1227
|
+
console.log(chalk.dim('\n Applying update...\n'));
|
|
1228
|
+
const { spawnSync } = await import('child_process');
|
|
1229
|
+
const result = spawnSync('npm', ['install', '-g', 'seo-intel@latest'], {
|
|
1230
|
+
stdio: 'inherit',
|
|
1231
|
+
shell: true,
|
|
1232
|
+
});
|
|
1233
|
+
if (result.status === 0) {
|
|
1234
|
+
console.log(chalk.green('\n ✓ Updated successfully! Restart any running seo-intel processes.\n'));
|
|
1235
|
+
} else {
|
|
1236
|
+
console.log(chalk.red('\n ✗ Update failed. Try manually:'));
|
|
1237
|
+
console.log(chalk.cyan(' npm install -g seo-intel@latest\n'));
|
|
1238
|
+
}
|
|
1239
|
+
} else {
|
|
1240
|
+
console.log(chalk.gray('\n To update:'));
|
|
1241
|
+
if (info.source === 'npm' || info.npmVersion) {
|
|
1242
|
+
console.log(chalk.cyan(' npm install -g seo-intel@latest'));
|
|
1243
|
+
console.log(chalk.dim(' or: seo-intel update --apply'));
|
|
1244
|
+
}
|
|
1245
|
+
if (info.downloadUrl) {
|
|
1246
|
+
console.log(chalk.cyan(` ${info.downloadUrl}`));
|
|
1247
|
+
}
|
|
1248
|
+
console.log('');
|
|
1249
|
+
}
|
|
1250
|
+
});
|
|
1251
|
+
|
|
1252
|
+
// ── AUTH (OAuth connections) ──────────────────────────────────────────────
|
|
1253
|
+
program
|
|
1254
|
+
.command('auth [provider]')
|
|
1255
|
+
.description('Connect OAuth services (google, etc.) or show connection status')
|
|
1256
|
+
.option('--disconnect', 'Disconnect / remove stored tokens')
|
|
1257
|
+
.option('--port <port>', 'Callback port for OAuth redirect (default: 9876)')
|
|
1258
|
+
.action(async (provider, opts) => {
|
|
1259
|
+
const { startOAuthFlow, getAllConnectionStatus, clearTokens, getProviderRequirements } = await import('./lib/oauth.js');
|
|
1260
|
+
|
|
1261
|
+
// No provider → show status
|
|
1262
|
+
if (!provider) {
|
|
1263
|
+
const statuses = getAllConnectionStatus();
|
|
1264
|
+
const requirements = getProviderRequirements();
|
|
1265
|
+
|
|
1266
|
+
console.log(chalk.bold.cyan('\n 🔐 OAuth Connections\n'));
|
|
1267
|
+
|
|
1268
|
+
for (const req of requirements) {
|
|
1269
|
+
const status = statuses[req.id];
|
|
1270
|
+
if (status.connected) {
|
|
1271
|
+
console.log(chalk.green(` ✓ ${req.name}`) + chalk.dim(` — connected (${status.scopes.length} scopes)`));
|
|
1272
|
+
} else if (status.hasCredentials) {
|
|
1273
|
+
console.log(chalk.yellow(` ○ ${req.name}`) + chalk.dim(' — credentials configured, not connected'));
|
|
1274
|
+
console.log(chalk.dim(` → seo-intel auth ${req.id}`));
|
|
1275
|
+
} else {
|
|
1276
|
+
console.log(chalk.red(` ✗ ${req.name}`) + chalk.dim(' — not configured'));
|
|
1277
|
+
console.log(chalk.dim(` → Add ${req.envVars.join(' + ')} to .env`));
|
|
1278
|
+
if (req.setupUrl) {
|
|
1279
|
+
console.log(chalk.dim(` → Create credentials: ${req.setupUrl}`));
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
console.log();
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
// Show API key auth alongside OAuth
|
|
1286
|
+
console.log(chalk.bold.cyan(' 🔑 API Key Auth\n'));
|
|
1287
|
+
const env = readFileSync(join(__dirname, '.env'), 'utf8').split('\n');
|
|
1288
|
+
const keys = ['GEMINI_API_KEY', 'ANTHROPIC_API_KEY', 'OPENAI_API_KEY', 'DEEPSEEK_API_KEY'];
|
|
1289
|
+
for (const key of keys) {
|
|
1290
|
+
const line = env.find(l => l.startsWith(key + '='));
|
|
1291
|
+
const hasValue = line && line.split('=')[1]?.trim();
|
|
1292
|
+
const name = key.replace('_API_KEY', '').replace('_', ' ');
|
|
1293
|
+
if (hasValue) {
|
|
1294
|
+
console.log(chalk.green(` ✓ ${name}`) + chalk.dim(` — ${hasValue.slice(0, 8)}...`));
|
|
1295
|
+
} else {
|
|
1296
|
+
console.log(chalk.dim(` ○ ${name} — not set`));
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
1299
|
+
console.log();
|
|
1300
|
+
return;
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
// Disconnect
|
|
1304
|
+
if (opts.disconnect) {
|
|
1305
|
+
clearTokens(provider);
|
|
1306
|
+
console.log(chalk.green(`\n ✓ Disconnected from ${provider}. Tokens removed.\n`));
|
|
1307
|
+
return;
|
|
1308
|
+
}
|
|
1309
|
+
|
|
1310
|
+
// Start OAuth flow
|
|
1311
|
+
console.log(chalk.dim(`\n Starting ${provider} OAuth flow...`));
|
|
1312
|
+
console.log(chalk.dim(' A browser window will open for authorization.\n'));
|
|
1313
|
+
|
|
1314
|
+
try {
|
|
1315
|
+
const result = await startOAuthFlow(provider, {
|
|
1316
|
+
port: opts.port ? parseInt(opts.port) : undefined,
|
|
1317
|
+
});
|
|
1318
|
+
console.log(chalk.green(`\n ✓ Connected to ${provider}!`));
|
|
1319
|
+
console.log(chalk.dim(` Scopes: ${result.scopes.join(', ')}\n`));
|
|
1320
|
+
} catch (err) {
|
|
1321
|
+
console.error(chalk.red(`\n ✗ OAuth failed: ${err.message}\n`));
|
|
1322
|
+
if (err.message.includes('Missing')) {
|
|
1323
|
+
console.log(chalk.yellow(' Setup instructions:'));
|
|
1324
|
+
console.log(chalk.dim(' 1. Go to https://console.cloud.google.com/apis/credentials'));
|
|
1325
|
+
console.log(chalk.dim(' 2. Create OAuth 2.0 Client ID (type: Desktop app)'));
|
|
1326
|
+
console.log(chalk.dim(' 3. Add to .env:'));
|
|
1327
|
+
console.log(chalk.cyan(' GOOGLE_CLIENT_ID=your-client-id'));
|
|
1328
|
+
console.log(chalk.cyan(' GOOGLE_CLIENT_SECRET=your-client-secret\n'));
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
});
|
|
1332
|
+
|
|
1333
|
+
// ── COMPETITORS MANAGEMENT ────────────────────────────────────────────────
|
|
1334
|
+
program
|
|
1335
|
+
.command('competitors <project>')
|
|
1336
|
+
.description('List, add, or remove competitors for a project')
|
|
1337
|
+
.option('--add <domain>', 'Add a competitor domain')
|
|
1338
|
+
.option('--remove <domain>', 'Remove a competitor domain')
|
|
1339
|
+
.option('--add-owned <domain>', 'Add an owned subdomain')
|
|
1340
|
+
.option('--remove-owned <domain>', 'Remove an owned subdomain')
|
|
1341
|
+
.option('--set-target <domain>', 'Change the target domain')
|
|
1342
|
+
.action((project, opts) => {
|
|
1343
|
+
const configPath = join(__dirname, `config/${project}.json`);
|
|
1344
|
+
let config;
|
|
1345
|
+
try {
|
|
1346
|
+
config = JSON.parse(readFileSync(configPath, 'utf8'));
|
|
1347
|
+
} catch {
|
|
1348
|
+
console.error(chalk.red(`Config not found: config/${project}.json`));
|
|
1349
|
+
console.log(chalk.dim(' Run: seo-intel setup'));
|
|
1350
|
+
process.exit(1);
|
|
1351
|
+
}
|
|
1352
|
+
|
|
1353
|
+
const { domainFromUrl } = (() => {
|
|
1354
|
+
// inline domain helper
|
|
1355
|
+
function domainFromUrl(url) {
|
|
1356
|
+
try { return new URL(url.startsWith('http') ? url : `https://${url}`).hostname.replace(/^www\./, ''); }
|
|
1357
|
+
catch { return url; }
|
|
1358
|
+
}
|
|
1359
|
+
return { domainFromUrl };
|
|
1360
|
+
})();
|
|
1361
|
+
|
|
1362
|
+
let modified = false;
|
|
1363
|
+
|
|
1364
|
+
// ── Add competitor
|
|
1365
|
+
if (opts.add) {
|
|
1366
|
+
const domain = domainFromUrl(opts.add);
|
|
1367
|
+
const url = opts.add.startsWith('http') ? opts.add : `https://${opts.add}`;
|
|
1368
|
+
if (config.competitors.some(c => c.domain === domain)) {
|
|
1369
|
+
console.log(chalk.yellow(` ⚠ ${domain} is already a competitor`));
|
|
1370
|
+
} else {
|
|
1371
|
+
config.competitors.push({ url, domain, role: 'competitor' });
|
|
1372
|
+
console.log(chalk.green(` ✓ Added competitor: ${domain}`));
|
|
1373
|
+
modified = true;
|
|
1374
|
+
}
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
// ── Remove competitor
|
|
1378
|
+
if (opts.remove) {
|
|
1379
|
+
const domain = domainFromUrl(opts.remove);
|
|
1380
|
+
const before = config.competitors.length;
|
|
1381
|
+
config.competitors = config.competitors.filter(c => c.domain !== domain);
|
|
1382
|
+
if (config.competitors.length < before) {
|
|
1383
|
+
console.log(chalk.green(` ✓ Removed competitor: ${domain}`));
|
|
1384
|
+
modified = true;
|
|
1385
|
+
} else {
|
|
1386
|
+
console.log(chalk.yellow(` ⚠ ${domain} not found in competitors`));
|
|
1387
|
+
}
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
// ── Add owned subdomain
|
|
1391
|
+
if (opts.addOwned) {
|
|
1392
|
+
if (!config.owned) config.owned = [];
|
|
1393
|
+
const domain = domainFromUrl(opts.addOwned);
|
|
1394
|
+
const url = opts.addOwned.startsWith('http') ? opts.addOwned : `https://${opts.addOwned}`;
|
|
1395
|
+
if (config.owned.some(o => o.domain === domain)) {
|
|
1396
|
+
console.log(chalk.yellow(` ⚠ ${domain} is already an owned domain`));
|
|
1397
|
+
} else {
|
|
1398
|
+
config.owned.push({ url, domain, role: 'owned' });
|
|
1399
|
+
console.log(chalk.green(` ✓ Added owned domain: ${domain}`));
|
|
1400
|
+
modified = true;
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
|
|
1404
|
+
// ── Remove owned subdomain
|
|
1405
|
+
if (opts.removeOwned) {
|
|
1406
|
+
if (!config.owned) config.owned = [];
|
|
1407
|
+
const domain = domainFromUrl(opts.removeOwned);
|
|
1408
|
+
const before = config.owned.length;
|
|
1409
|
+
config.owned = config.owned.filter(o => o.domain !== domain);
|
|
1410
|
+
if (config.owned.length < before) {
|
|
1411
|
+
console.log(chalk.green(` ✓ Removed owned domain: ${domain}`));
|
|
1412
|
+
modified = true;
|
|
1413
|
+
} else {
|
|
1414
|
+
console.log(chalk.yellow(` ⚠ ${domain} not found in owned domains`));
|
|
1415
|
+
}
|
|
1416
|
+
}
|
|
1417
|
+
|
|
1418
|
+
// ── Change target
|
|
1419
|
+
if (opts.setTarget) {
|
|
1420
|
+
const domain = domainFromUrl(opts.setTarget);
|
|
1421
|
+
const url = opts.setTarget.startsWith('http') ? opts.setTarget : `https://${opts.setTarget}`;
|
|
1422
|
+
config.target = { url, domain, role: 'target' };
|
|
1423
|
+
config.context.url = url;
|
|
1424
|
+
console.log(chalk.green(` ✓ Target changed to: ${domain}`));
|
|
1425
|
+
modified = true;
|
|
1426
|
+
}
|
|
1427
|
+
|
|
1428
|
+
// Save if modified
|
|
1429
|
+
if (modified) {
|
|
1430
|
+
writeFileSync(configPath, JSON.stringify(config, null, 2) + '\n');
|
|
1431
|
+
console.log(chalk.dim(`\n Saved → config/${project}.json`));
|
|
1432
|
+
}
|
|
1433
|
+
|
|
1434
|
+
// ── Always show current config
|
|
1435
|
+
console.log(chalk.bold.cyan(`\n 📋 ${project} — Domain Configuration\n`));
|
|
1436
|
+
console.log(chalk.white(' Target:'));
|
|
1437
|
+
console.log(chalk.green(` ● ${config.target.domain}`));
|
|
1438
|
+
|
|
1439
|
+
if (config.owned?.length) {
|
|
1440
|
+
console.log(chalk.white('\n Owned (subdomains):'));
|
|
1441
|
+
for (const o of config.owned) {
|
|
1442
|
+
console.log(chalk.blue(` ○ ${o.domain}`));
|
|
1443
|
+
}
|
|
1444
|
+
}
|
|
1445
|
+
|
|
1446
|
+
console.log(chalk.white('\n Competitors:'));
|
|
1447
|
+
for (const c of config.competitors) {
|
|
1448
|
+
console.log(chalk.red(` ◆ ${c.domain}`));
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
console.log(chalk.dim(`\n Total: ${config.competitors.length} competitors` +
|
|
1452
|
+
(config.owned?.length ? ` + ${config.owned.length} owned` : '') + '\n'));
|
|
1453
|
+
|
|
1454
|
+
// Hint about re-crawl
|
|
1455
|
+
if (modified) {
|
|
1456
|
+
console.log(chalk.yellow(' → Run a crawl to update data for new domains:'));
|
|
1457
|
+
console.log(chalk.cyan(` node cli.js crawl ${project}\n`));
|
|
1458
|
+
}
|
|
1459
|
+
});
|
|
1460
|
+
|
|
1461
|
+
// ── Shared analysis runner ─────────────────────────────────────────────────
|
|
1462
|
+
async function runAnalysis(project, db) {
|
|
1463
|
+
const configs = loadAllConfigs();
|
|
1464
|
+
const config = configs.find(c => c.project === project);
|
|
1465
|
+
if (!config) return;
|
|
1466
|
+
|
|
1467
|
+
const summary = getCompetitorSummary(db, project);
|
|
1468
|
+
const keywordMatrix = getKeywordMatrix(db, project);
|
|
1469
|
+
const headings = getHeadingStructure(db, project);
|
|
1470
|
+
|
|
1471
|
+
const target = summary.find(s => s.role === 'target');
|
|
1472
|
+
const competitors = summary.filter(s => s.role === 'competitor');
|
|
1473
|
+
if (!target) return;
|
|
1474
|
+
|
|
1475
|
+
target.domain = config.target.domain;
|
|
1476
|
+
competitors.forEach((c, i) => { c.domain = config.competitors[i]?.domain || c.domain; });
|
|
1477
|
+
|
|
1478
|
+
const buildPromptFn = await getBuildAnalysisPrompt();
|
|
1479
|
+
const prompt = buildPromptFn({
|
|
1480
|
+
project, target, competitors, keywordMatrix,
|
|
1481
|
+
headingStructure: headings, context: config.context,
|
|
1482
|
+
});
|
|
1483
|
+
|
|
1484
|
+
writeFileSync(join(__dirname, `reports/${project}-prompt-${Date.now()}.txt`), prompt, 'utf8');
|
|
1485
|
+
|
|
1486
|
+
const result = await callGemini(prompt);
|
|
1487
|
+
if (!result) { console.error(chalk.red('Gemini returned no response.')); process.exit(1); }
|
|
1488
|
+
|
|
1489
|
+
try {
|
|
1490
|
+
const jsonMatch = result.match(/\{[\s\S]*\}/);
|
|
1491
|
+
const analysis = JSON.parse(jsonMatch[0]);
|
|
1492
|
+
const outPath = join(__dirname, `reports/${project}-analysis-${Date.now()}.json`);
|
|
1493
|
+
writeFileSync(outPath, JSON.stringify(analysis, null, 2), 'utf8');
|
|
1494
|
+
|
|
1495
|
+
// Save to DB
|
|
1496
|
+
db.prepare(`
|
|
1497
|
+
INSERT INTO analyses (project, generated_at, model, keyword_gaps, long_tails, quick_wins, new_pages, content_gaps, positioning, technical_gaps, raw)
|
|
1498
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1499
|
+
`).run(
|
|
1500
|
+
project, Date.now(), 'gemini',
|
|
1501
|
+
JSON.stringify(analysis.keyword_gaps || []),
|
|
1502
|
+
JSON.stringify(analysis.long_tails || []),
|
|
1503
|
+
JSON.stringify(analysis.quick_wins || []),
|
|
1504
|
+
JSON.stringify(analysis.new_pages || []),
|
|
1505
|
+
JSON.stringify(analysis.content_gaps || []),
|
|
1506
|
+
JSON.stringify(analysis.positioning || {}),
|
|
1507
|
+
JSON.stringify(analysis.technical_gaps || []),
|
|
1508
|
+
result,
|
|
1509
|
+
);
|
|
1510
|
+
|
|
1511
|
+
printAnalysisSummary(analysis, project);
|
|
1512
|
+
console.log(chalk.green(`\n✅ Analysis saved: ${outPath}`));
|
|
1513
|
+
} catch (err) {
|
|
1514
|
+
console.error(chalk.red(`Could not parse analysis JSON: ${err.message}`));
|
|
1515
|
+
process.exit(1);
|
|
1516
|
+
}
|
|
1517
|
+
}
|
|
1518
|
+
|
|
1519
|
+
// ── EXTRACT ────────────────────────────────────────────────────────────────
|
|
1520
|
+
program
|
|
1521
|
+
.command('extract <project>')
|
|
1522
|
+
.description('Run AI extraction on all crawled-but-not-yet-extracted pages (requires Solo/Agency)')
|
|
1523
|
+
.option('--stealth', 'Advanced browser mode for JS-heavy and dynamic sites')
|
|
1524
|
+
.action(async (project, opts) => {
|
|
1525
|
+
if (!requirePro('extract')) return;
|
|
1526
|
+
const db = getDb();
|
|
1527
|
+
const pendingPages = db.prepare(`
|
|
1528
|
+
SELECT p.id, p.url, p.word_count,
|
|
1529
|
+
e.id as extracted
|
|
1530
|
+
FROM pages p
|
|
1531
|
+
JOIN domains d ON d.id = p.domain_id
|
|
1532
|
+
LEFT JOIN extractions e ON e.page_id = p.id
|
|
1533
|
+
WHERE d.project = ? AND e.id IS NULL
|
|
1534
|
+
`).all(project);
|
|
1535
|
+
|
|
1536
|
+
if (!pendingPages.length) {
|
|
1537
|
+
console.log(chalk.green(`✅ All pages already extracted for ${project}`));
|
|
1538
|
+
process.exit(0);
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1541
|
+
const mode = opts.stealth ? chalk.magenta('STEALTH') : chalk.gray('standard');
|
|
1542
|
+
console.log(chalk.bold.cyan(`\n⚙️ Extracting ${pendingPages.length} pages for ${project} via Qwen [${mode}]...\n`));
|
|
1543
|
+
|
|
1544
|
+
const extractStart = Date.now();
|
|
1545
|
+
let done = 0, failed = 0;
|
|
1546
|
+
|
|
1547
|
+
// ── Stealth: single session across all pages (cookie accumulation) ──
|
|
1548
|
+
let stealthSession = null;
|
|
1549
|
+
if (opts.stealth) {
|
|
1550
|
+
const { createStealthSession } = await import('./crawler/stealth.js');
|
|
1551
|
+
stealthSession = await createStealthSession();
|
|
1552
|
+
console.log(chalk.magenta(' 🥷 Advanced mode — full browser rendering, persistent sessions\n'));
|
|
1553
|
+
}
|
|
1554
|
+
|
|
1555
|
+
try {
|
|
1556
|
+
for (const row of pendingPages) {
|
|
1557
|
+
process.stdout.write(chalk.gray(` [${done + failed + 1}/${pendingPages.length}] ${row.url.slice(0, 65)} → `));
|
|
1558
|
+
if (opts.stealth) process.stdout.write(chalk.magenta('stealth '));
|
|
1559
|
+
process.stdout.write(chalk.gray('fetching...'));
|
|
1560
|
+
|
|
1561
|
+
writeProgress({
|
|
1562
|
+
status: 'running', command: 'extract', project,
|
|
1563
|
+
current_url: row.url,
|
|
1564
|
+
page_index: done + failed + 1, total: pendingPages.length,
|
|
1565
|
+
percent: Math.round(((done + failed) / pendingPages.length) * 100),
|
|
1566
|
+
started_at: extractStart, failed,
|
|
1567
|
+
stealth: !!opts.stealth,
|
|
1568
|
+
});
|
|
1569
|
+
|
|
1570
|
+
try {
|
|
1571
|
+
let pageData;
|
|
1572
|
+
|
|
1573
|
+
if (stealthSession) {
|
|
1574
|
+
// Stealth: reuse persistent browser session
|
|
1575
|
+
pageData = await stealthSession.fetchPage(row.url);
|
|
1576
|
+
} else {
|
|
1577
|
+
// Standard: quick single-page crawl
|
|
1578
|
+
const { crawlAll } = await import('./crawler/index.js');
|
|
1579
|
+
const crawled = await crawlAll(row.url);
|
|
1580
|
+
pageData = crawled[0] || null;
|
|
1581
|
+
}
|
|
1582
|
+
|
|
1583
|
+
if (!pageData || pageData.status >= 400) {
|
|
1584
|
+
const reason = pageData ? `HTTP ${pageData.status}` : 'no data';
|
|
1585
|
+
process.stdout.write(chalk.red(` ✗ ${reason}\n`));
|
|
1586
|
+
failed++;
|
|
1587
|
+
if (stealthSession) {
|
|
1588
|
+
// Jittered delay even on failure — don't hammer a blocking site
|
|
1589
|
+
await new Promise(r => setTimeout(r, 1500 + Math.random() * 2000));
|
|
1590
|
+
}
|
|
1591
|
+
continue;
|
|
1592
|
+
}
|
|
1593
|
+
|
|
1594
|
+
process.stdout.write(chalk.gray(' extracting...'));
|
|
1595
|
+
const extractFn = await getExtractPage();
|
|
1596
|
+
const extraction = await extractFn(pageData);
|
|
1597
|
+
insertExtraction(db, { pageId: row.id, data: extraction });
|
|
1598
|
+
insertKeywords(db, row.id, extraction.keywords);
|
|
1599
|
+
|
|
1600
|
+
// Also update headings + links + schemas with fresh data from stealth fetch
|
|
1601
|
+
if (stealthSession) {
|
|
1602
|
+
insertHeadings(db, row.id, pageData.headings);
|
|
1603
|
+
insertLinks(db, row.id, pageData.links);
|
|
1604
|
+
if (pageData.parsedSchemas?.length) insertPageSchemas(db, row.id, pageData.parsedSchemas);
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1607
|
+
process.stdout.write(chalk.green(` ✓${pageData.parsedSchemas?.length ? ` [${pageData.parsedSchemas.length} schema]` : ''}\n`));
|
|
1608
|
+
done++;
|
|
1609
|
+
} catch (err) {
|
|
1610
|
+
process.stdout.write(chalk.red(` ✗ ${err.message}\n`));
|
|
1611
|
+
failed++;
|
|
1612
|
+
}
|
|
1613
|
+
|
|
1614
|
+
// Jittered delay in stealth mode (2-5s) to mimic human browsing
|
|
1615
|
+
if (stealthSession) {
|
|
1616
|
+
await new Promise(r => setTimeout(r, 2000 + Math.random() * 3000));
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
} finally {
|
|
1620
|
+
// Always close stealth session
|
|
1621
|
+
if (stealthSession) {
|
|
1622
|
+
await stealthSession.close();
|
|
1623
|
+
console.log(chalk.magenta(`\n 🥷 Stealth session closed (${stealthSession.getPageCount()} pages fetched)`));
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
|
|
1627
|
+
writeProgress({ status: 'completed', command: 'extract', project, extracted: done, failed, total: pendingPages.length, started_at: extractStart, finished_at: Date.now() });
|
|
1628
|
+
console.log(chalk.bold.green(`\n✅ Extraction complete: ${done} extracted, ${failed} failed\n`));
|
|
1629
|
+
});
|
|
1630
|
+
|
|
1631
|
+
// ── HTML DASHBOARD ─────────────────────────────────────────────────────────
|
|
1632
|
+
program
|
|
1633
|
+
.command('html [project]')
|
|
1634
|
+
.description('Generate HTML dashboard (all projects with switcher)')
|
|
1635
|
+
.option('--open', 'Open dashboard in browser after generation', true)
|
|
1636
|
+
.option('--no-open', 'Do not open browser')
|
|
1637
|
+
.action(async (project, opts) => {
|
|
1638
|
+
// Always generate the unified all-projects dashboard.
|
|
1639
|
+
// project arg is accepted for backwards compatibility but ignored.
|
|
1640
|
+
const db = getDb();
|
|
1641
|
+
const configs = loadAllConfigs();
|
|
1642
|
+
|
|
1643
|
+
if (!configs.length) {
|
|
1644
|
+
console.log(chalk.red('No project configs found in config/ directory.'));
|
|
1645
|
+
process.exit(1);
|
|
1646
|
+
}
|
|
1647
|
+
|
|
1648
|
+
const tierLabel = isPro() ? '' : chalk.dim(' (crawl-only — upgrade to Solo for full dashboard)');
|
|
1649
|
+
console.log(chalk.bold.cyan(`\n📊 Generating dashboard...`) + tierLabel + '\n');
|
|
1650
|
+
configs.forEach(c => console.log(chalk.gray(` • ${c.project} (${c.target.domain})`)));
|
|
1651
|
+
console.log();
|
|
1652
|
+
|
|
1653
|
+
const outPath = generateMultiDashboard(db, configs);
|
|
1654
|
+
|
|
1655
|
+
console.log(chalk.bold.green(`✅ Dashboard generated: ${outPath}\n`));
|
|
1656
|
+
console.log(chalk.dim(` file://${outPath}\n`));
|
|
1657
|
+
|
|
1658
|
+
if (opts.open) {
|
|
1659
|
+
const { exec } = await import('child_process');
|
|
1660
|
+
const cmd = process.platform === 'darwin' ? 'open' : process.platform === 'win32' ? 'start' : 'xdg-open';
|
|
1661
|
+
exec(`${cmd} "${outPath}"`);
|
|
1662
|
+
}
|
|
1663
|
+
});
|
|
1664
|
+
|
|
1665
|
+
// ── SITE GRAPH ────────────────────────────────────────────────────────────────
|
|
1666
|
+
program
|
|
1667
|
+
.command('graph <project>')
|
|
1668
|
+
.description('Generate Obsidian-style site graph visualization')
|
|
1669
|
+
.option('-d, --depth <n>', 'Max click depth to include (default: all)', '99')
|
|
1670
|
+
.option('--open', 'Open in browser after generation')
|
|
1671
|
+
.action(async (project, opts) => {
|
|
1672
|
+
const db = getDb();
|
|
1673
|
+
const config = loadConfig(project);
|
|
1674
|
+
if (!config) {
|
|
1675
|
+
console.log(chalk.red(`No config found for project: ${project}`));
|
|
1676
|
+
return;
|
|
1677
|
+
}
|
|
1678
|
+
|
|
1679
|
+
console.log(chalk.bold.cyan(`\n🕸️ Generating site graph for ${project}...\n`));
|
|
1680
|
+
|
|
1681
|
+
const { generateSiteGraphHtml } = await import('./reports/generate-site-graph.js');
|
|
1682
|
+
const outPath = await generateSiteGraphHtml(db, project, {
|
|
1683
|
+
maxDepth: parseInt(opts.depth) || 99,
|
|
1684
|
+
});
|
|
1685
|
+
|
|
1686
|
+
console.log(chalk.bold.green(`✅ Site graph generated: ${outPath}`));
|
|
1687
|
+
console.log(chalk.dim(` Open in browser to explore.\n`));
|
|
1688
|
+
|
|
1689
|
+
if (opts.open) {
|
|
1690
|
+
const { exec } = await import('child_process');
|
|
1691
|
+
const cmd = process.platform === 'darwin' ? 'open' : process.platform === 'win32' ? 'start' : 'xdg-open';
|
|
1692
|
+
exec(`${cmd} "${outPath}"`);
|
|
1693
|
+
}
|
|
1694
|
+
});
|
|
1695
|
+
|
|
1696
|
+
// ── HTML ALL-PROJECTS DASHBOARD ──────────────────────────────────────────────
|
|
1697
|
+
program
|
|
1698
|
+
.command('html-all')
|
|
1699
|
+
.description('Generate a single HTML dashboard with all projects (dropdown switcher)')
|
|
1700
|
+
.action(() => {
|
|
1701
|
+
const db = getDb();
|
|
1702
|
+
const configs = loadAllConfigs();
|
|
1703
|
+
|
|
1704
|
+
if (!configs.length) {
|
|
1705
|
+
console.log(chalk.red('No project configs found in config/ directory.'));
|
|
1706
|
+
process.exit(1);
|
|
1707
|
+
}
|
|
1708
|
+
|
|
1709
|
+
console.log(chalk.bold.cyan(`\n📊 Generating multi-project dashboard...\n`));
|
|
1710
|
+
configs.forEach(c => console.log(chalk.gray(` • ${c.project} (${c.target.domain})`)));
|
|
1711
|
+
console.log();
|
|
1712
|
+
|
|
1713
|
+
const outPath = generateMultiDashboard(db, configs);
|
|
1714
|
+
|
|
1715
|
+
console.log(chalk.bold.green(`✅ All-projects dashboard generated: ${outPath}\n`));
|
|
1716
|
+
});
|
|
1717
|
+
|
|
1718
|
+
// ── SERVE DASHBOARD ──────────────────────────────────────────────────────
|
|
1719
|
+
program
|
|
1720
|
+
.command('serve')
|
|
1721
|
+
.description('Start dashboard web server with live crawl/extract controls')
|
|
1722
|
+
.option('--port <n>', 'Server port', '3000')
|
|
1723
|
+
.option('--open', 'Open browser automatically', true)
|
|
1724
|
+
.option('--no-open', 'Do not open browser')
|
|
1725
|
+
.action(async (opts) => {
|
|
1726
|
+
const port = parseInt(opts.port, 10);
|
|
1727
|
+
process.env.PORT = String(port);
|
|
1728
|
+
if (opts.open) process.env.SEO_INTEL_AUTO_OPEN = '1';
|
|
1729
|
+
await import('./server.js');
|
|
1730
|
+
});
|
|
1731
|
+
|
|
1732
|
+
// ── SETUP WEB WIZARD ──────────────────────────────────────────────────────
|
|
1733
|
+
program
|
|
1734
|
+
.command('setup-web')
|
|
1735
|
+
.description('Open the web-based setup wizard in your browser')
|
|
1736
|
+
.option('--port <n>', 'Server port', '3000')
|
|
1737
|
+
.action(async (opts) => {
|
|
1738
|
+
const port = parseInt(opts.port, 10);
|
|
1739
|
+
process.env.PORT = String(port);
|
|
1740
|
+
await import('./server.js');
|
|
1741
|
+
|
|
1742
|
+
// Open browser to setup page
|
|
1743
|
+
const url = `http://localhost:${port}/setup`;
|
|
1744
|
+
const { execSync } = await import('child_process');
|
|
1745
|
+
const cmd = process.platform === 'darwin' ? 'open'
|
|
1746
|
+
: process.platform === 'win32' ? 'start'
|
|
1747
|
+
: 'xdg-open';
|
|
1748
|
+
try {
|
|
1749
|
+
execSync(`${cmd} ${url}`, { stdio: 'ignore' });
|
|
1750
|
+
console.log(` Opening ${url} in your browser...`);
|
|
1751
|
+
} catch {
|
|
1752
|
+
console.log(` Open ${url} in your browser to start the setup wizard.`);
|
|
1753
|
+
}
|
|
1754
|
+
});
|
|
1755
|
+
|
|
1756
|
+
// ── ATTACK COMMANDS ────────────────────────────────────────────────────────
|
|
1757
|
+
|
|
1758
|
+
// Shared helper: filter out app routes, login pages, query-string URLs
|
|
1759
|
+
function isContentPage(url) {
|
|
1760
|
+
if (url.includes('?')) return false;
|
|
1761
|
+
const appPaths = ['/signup', '/login', '/register', '/onboarding', '/dashboard',
|
|
1762
|
+
'/app/', '/swap', '/portfolio', '/send', '/rewards', '/perps', '/vaults'];
|
|
1763
|
+
const appSubdomains = ['dashboard.', 'app.', 'customers.', 'console.'];
|
|
1764
|
+
if (appPaths.some(p => url.includes(p))) return false;
|
|
1765
|
+
if (appSubdomains.some(s => url.includes(s))) return false;
|
|
1766
|
+
return true;
|
|
1767
|
+
}
|
|
1768
|
+
|
|
1769
|
+
function printAttackHeader(title, project) {
|
|
1770
|
+
console.log(chalk.bold.cyan(`\n${'═'.repeat(60)}`));
|
|
1771
|
+
console.log(chalk.bold.cyan(` ${title} — ${project.toUpperCase()}`));
|
|
1772
|
+
console.log(chalk.bold.cyan(`${'═'.repeat(60)}\n`));
|
|
1773
|
+
}
|
|
1774
|
+
|
|
1775
|
+
// ── SHALLOW CHAMPION ───────────────────────────────────────────────────────
|
|
1776
|
+
program
|
|
1777
|
+
.command('shallow <project>')
|
|
1778
|
+
.description('Find competitor pages that are important but thin (Shallow Champion attack)')
|
|
1779
|
+
.option('--max-words <n>', 'Max word count threshold', '700')
|
|
1780
|
+
.option('--max-depth <n>', 'Max click depth', '2')
|
|
1781
|
+
.action((project, opts) => {
|
|
1782
|
+
if (!requirePro('shallow')) return;
|
|
1783
|
+
const db = getDb();
|
|
1784
|
+
const maxWords = parseInt(opts.maxWords);
|
|
1785
|
+
const maxDepth = parseInt(opts.maxDepth);
|
|
1786
|
+
|
|
1787
|
+
printAttackHeader('⚡ Shallow Champion Attack', project);
|
|
1788
|
+
|
|
1789
|
+
const rows = db.prepare(`
|
|
1790
|
+
SELECT p.url, p.click_depth, p.word_count, d.domain
|
|
1791
|
+
FROM pages p
|
|
1792
|
+
JOIN domains d ON d.id = p.domain_id
|
|
1793
|
+
WHERE d.project = ? AND d.role = 'competitor'
|
|
1794
|
+
AND p.click_depth <= ? AND p.word_count <= ? AND p.word_count > 80
|
|
1795
|
+
AND p.is_indexable = 1
|
|
1796
|
+
ORDER BY p.click_depth ASC, p.word_count ASC
|
|
1797
|
+
`).all(project, maxDepth, maxWords).filter(r => isContentPage(r.url));
|
|
1798
|
+
|
|
1799
|
+
if (!rows.length) {
|
|
1800
|
+
console.log(chalk.yellow('No shallow champions found with current thresholds.'));
|
|
1801
|
+
return;
|
|
1802
|
+
}
|
|
1803
|
+
|
|
1804
|
+
console.log(chalk.gray(`Found ${rows.length} shallow champion targets (depth ≤${maxDepth}, words ≤${maxWords}):\n`));
|
|
1805
|
+
|
|
1806
|
+
const byDomain = {};
|
|
1807
|
+
for (const r of rows) {
|
|
1808
|
+
if (!byDomain[r.domain]) byDomain[r.domain] = [];
|
|
1809
|
+
byDomain[r.domain].push(r);
|
|
1810
|
+
}
|
|
1811
|
+
|
|
1812
|
+
for (const [domain, pages] of Object.entries(byDomain)) {
|
|
1813
|
+
console.log(chalk.bold.yellow(` ${domain}`));
|
|
1814
|
+
for (const p of pages) {
|
|
1815
|
+
const depthBar = '→'.repeat(p.click_depth + 1);
|
|
1816
|
+
const wordColor = p.word_count < 300 ? chalk.red : chalk.yellow;
|
|
1817
|
+
console.log(` ${chalk.gray(depthBar)} ${p.url.replace(/https?:\/\/[^/]+/, '') || '/'}`);
|
|
1818
|
+
console.log(` ${wordColor(`${p.word_count} words`)} · depth ${p.click_depth}`);
|
|
1819
|
+
}
|
|
1820
|
+
console.log();
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
console.log(chalk.bold.green('💡 Action: Write 1500+ word versions of these pages with proper schema + FAQs.'));
|
|
1824
|
+
console.log(chalk.gray(' These competitors already validated the topic. Out-invest them.\n'));
|
|
1825
|
+
});
|
|
1826
|
+
|
|
1827
|
+
// ── CONTENT DECAY ─────────────────────────────────────────────────────────
|
|
1828
|
+
program
|
|
1829
|
+
.command('decay <project>')
|
|
1830
|
+
.description('Find competitor pages decaying due to staleness (Content Decay Arbitrage)')
|
|
1831
|
+
.option('--months <n>', 'Months since last update to flag as stale', '18')
|
|
1832
|
+
.action((project, opts) => {
|
|
1833
|
+
if (!requirePro('decay')) return;
|
|
1834
|
+
const db = getDb();
|
|
1835
|
+
const monthsAgo = parseInt(opts.months);
|
|
1836
|
+
const cutoffDate = new Date();
|
|
1837
|
+
cutoffDate.setMonth(cutoffDate.getMonth() - monthsAgo);
|
|
1838
|
+
const cutoff = cutoffDate.toISOString().split('T')[0];
|
|
1839
|
+
|
|
1840
|
+
printAttackHeader('📉 Content Decay Arbitrage', project);
|
|
1841
|
+
|
|
1842
|
+
// Pages with known stale modified_date
|
|
1843
|
+
const staleKnown = db.prepare(`
|
|
1844
|
+
SELECT p.url, p.click_depth, p.word_count, p.modified_date, p.published_date, d.domain
|
|
1845
|
+
FROM pages p
|
|
1846
|
+
JOIN domains d ON d.id = p.domain_id
|
|
1847
|
+
WHERE d.project = ? AND d.role = 'competitor'
|
|
1848
|
+
AND p.click_depth <= 2 AND p.word_count > 100
|
|
1849
|
+
AND p.modified_date IS NOT NULL AND p.modified_date < ?
|
|
1850
|
+
AND p.is_indexable = 1
|
|
1851
|
+
ORDER BY p.click_depth ASC, p.modified_date ASC
|
|
1852
|
+
`).all(project, cutoff).filter(r => isContentPage(r.url));
|
|
1853
|
+
|
|
1854
|
+
// High-value pages with NO date metadata at all (unknown freshness = treat as suspect)
|
|
1855
|
+
const staleUnknown = db.prepare(`
|
|
1856
|
+
SELECT p.url, p.click_depth, p.word_count, p.modified_date, p.published_date, d.domain
|
|
1857
|
+
FROM pages p
|
|
1858
|
+
JOIN domains d ON d.id = p.domain_id
|
|
1859
|
+
WHERE d.project = ? AND d.role = 'competitor'
|
|
1860
|
+
AND p.click_depth <= 2 AND p.word_count BETWEEN 300 AND 1500
|
|
1861
|
+
AND p.modified_date IS NULL AND p.published_date IS NULL
|
|
1862
|
+
AND p.is_indexable = 1
|
|
1863
|
+
ORDER BY p.click_depth ASC, p.word_count ASC
|
|
1864
|
+
LIMIT 20
|
|
1865
|
+
`).all(project).filter(r => isContentPage(r.url));
|
|
1866
|
+
|
|
1867
|
+
if (!staleKnown.length && !staleUnknown.length) {
|
|
1868
|
+
console.log(chalk.yellow('No decay targets found. More crawl data or date metadata needed.'));
|
|
1869
|
+
return;
|
|
1870
|
+
}
|
|
1871
|
+
|
|
1872
|
+
if (staleKnown.length) {
|
|
1873
|
+
console.log(chalk.bold.red(`🔴 Confirmed stale (modified > ${monthsAgo} months ago): ${staleKnown.length} pages\n`));
|
|
1874
|
+
for (const r of staleKnown) {
|
|
1875
|
+
console.log(` ${chalk.bold(r.domain)} · depth ${r.click_depth}`);
|
|
1876
|
+
console.log(` ${r.url}`);
|
|
1877
|
+
console.log(` ${chalk.red(`Last modified: ${r.modified_date}`)} · ${r.word_count} words\n`);
|
|
1878
|
+
}
|
|
1879
|
+
}
|
|
1880
|
+
|
|
1881
|
+
if (staleUnknown.length) {
|
|
1882
|
+
console.log(chalk.bold.yellow(`🟡 No date metadata — freshness unknown (${staleUnknown.length} pages):\n`));
|
|
1883
|
+
for (const r of staleUnknown) {
|
|
1884
|
+
console.log(` ${chalk.bold(r.domain)} · depth ${r.click_depth} · ${r.word_count} words`);
|
|
1885
|
+
console.log(` ${r.url}\n`);
|
|
1886
|
+
}
|
|
1887
|
+
}
|
|
1888
|
+
|
|
1889
|
+
console.log(chalk.bold.green('💡 Action: Publish updated versions of these topics now.'));
|
|
1890
|
+
console.log(chalk.gray(' Your 2026 publish date vs their stale content = freshness advantage.\n'));
|
|
1891
|
+
});
|
|
1892
|
+
|
|
1893
|
+
// ── HEADINGS AUDIT ────────────────────────────────────────────────────────
|
|
1894
|
+
program
|
|
1895
|
+
.command('headings-audit <project>')
|
|
1896
|
+
.description('Pull competitor heading structures for AI gap analysis')
|
|
1897
|
+
.option('--domain <domain>', 'Audit a specific competitor domain')
|
|
1898
|
+
.option('--depth <n>', 'Max click depth to include', '2')
|
|
1899
|
+
.action(async (project, opts) => {
|
|
1900
|
+
if (!requirePro('headings-audit')) return;
|
|
1901
|
+
const db = getDb();
|
|
1902
|
+
const maxDepth = parseInt(opts.depth);
|
|
1903
|
+
|
|
1904
|
+
printAttackHeader('🏗️ Heading Architecture Audit', project);
|
|
1905
|
+
|
|
1906
|
+
const domainFilter = opts.domain ? 'AND d.domain = ?' : '';
|
|
1907
|
+
const params = opts.domain ? [project, maxDepth, opts.domain] : [project, maxDepth];
|
|
1908
|
+
|
|
1909
|
+
const pages = db.prepare(`
|
|
1910
|
+
SELECT p.id, p.url, p.word_count, p.click_depth, d.domain
|
|
1911
|
+
FROM pages p JOIN domains d ON d.id = p.domain_id
|
|
1912
|
+
WHERE d.project = ? AND d.role = 'competitor'
|
|
1913
|
+
AND p.click_depth <= ? AND p.word_count > 200
|
|
1914
|
+
${domainFilter}
|
|
1915
|
+
AND p.is_indexable = 1
|
|
1916
|
+
ORDER BY d.domain, p.click_depth ASC, p.word_count DESC
|
|
1917
|
+
`).all(...params).filter(r => isContentPage(r.url));
|
|
1918
|
+
|
|
1919
|
+
if (!pages.length) {
|
|
1920
|
+
console.log(chalk.yellow('No pages found matching criteria.'));
|
|
1921
|
+
return;
|
|
1922
|
+
}
|
|
1923
|
+
|
|
1924
|
+
let report = `# Heading Architecture Audit — ${project.toUpperCase()}\nGenerated: ${new Date().toISOString()}\n\n`;
|
|
1925
|
+
|
|
1926
|
+
for (const page of pages.slice(0, 30)) {
|
|
1927
|
+
const headings = db.prepare(`
|
|
1928
|
+
SELECT level, text FROM headings WHERE page_id = ?
|
|
1929
|
+
ORDER BY rowid ASC
|
|
1930
|
+
`).all(page.id);
|
|
1931
|
+
|
|
1932
|
+
if (!headings.length) continue;
|
|
1933
|
+
|
|
1934
|
+
const structure = headings.map(h => `${'#'.repeat(h.level)} ${h.text}`).join('\n');
|
|
1935
|
+
console.log(chalk.bold(`\n${page.domain} · ${page.url.replace(/https?:\/\/[^/]+/, '') || '/'}`));
|
|
1936
|
+
console.log(chalk.gray(` depth ${page.click_depth} · ${page.word_count} words`));
|
|
1937
|
+
headings.filter(h => h.level <= 3).forEach(h => {
|
|
1938
|
+
const indent = ' '.repeat(h.level - 1);
|
|
1939
|
+
const color = h.level === 1 ? chalk.bold.white : h.level === 2 ? chalk.yellow : chalk.gray;
|
|
1940
|
+
console.log(` ${indent}${color('H' + h.level + ':')} ${h.text}`);
|
|
1941
|
+
});
|
|
1942
|
+
|
|
1943
|
+
report += `## ${page.domain} — ${page.url}\n`;
|
|
1944
|
+
report += `*click depth: ${page.click_depth} · words: ${page.word_count}*\n\n`;
|
|
1945
|
+
report += '```\n' + structure + '\n```\n\n';
|
|
1946
|
+
report += `**Gemini prompt:**\n`;
|
|
1947
|
+
report += `> Analyze this heading structure from ${page.domain}. What H2/H3 sub-topics are logically missing? What would a user expect to find that isn't covered? Be specific.\n\n---\n\n`;
|
|
1948
|
+
}
|
|
1949
|
+
|
|
1950
|
+
const outPath = join(__dirname, `reports/${project}-headings-audit-${Date.now()}.md`);
|
|
1951
|
+
writeFileSync(outPath, report, 'utf8');
|
|
1952
|
+
|
|
1953
|
+
console.log(chalk.bold.green(`\n✅ Full audit saved: ${outPath}`));
|
|
1954
|
+
console.log(chalk.gray(' Feed this to Gemini: "Find the gaps in each heading structure above."\n'));
|
|
1955
|
+
});
|
|
1956
|
+
|
|
1957
|
+
// ── ORPHAN ENTITIES ───────────────────────────────────────────────────────
|
|
1958
|
+
program
|
|
1959
|
+
.command('orphans <project>')
|
|
1960
|
+
.description('Find orphaned entities — mentioned everywhere but no dedicated page (needs Qwen extraction)')
|
|
1961
|
+
.action((project) => {
|
|
1962
|
+
if (!requirePro('orphans')) return;
|
|
1963
|
+
const db = getDb();
|
|
1964
|
+
|
|
1965
|
+
printAttackHeader('👻 Orphan Entity Attack', project);
|
|
1966
|
+
|
|
1967
|
+
// Check if we have any extraction data with primary_entities
|
|
1968
|
+
const extractionCount = db.prepare(`
|
|
1969
|
+
SELECT COUNT(*) as c FROM extractions e
|
|
1970
|
+
JOIN pages p ON p.id = e.page_id
|
|
1971
|
+
JOIN domains d ON d.id = p.domain_id
|
|
1972
|
+
WHERE d.project = ? AND e.primary_entities IS NOT NULL AND e.primary_entities != '[]' AND e.primary_entities != ''
|
|
1973
|
+
`).get(project);
|
|
1974
|
+
|
|
1975
|
+
if (!extractionCount || extractionCount.c === 0) {
|
|
1976
|
+
console.log(chalk.yellow('⚠️ No entity extraction data found.'));
|
|
1977
|
+
console.log(chalk.gray(' Run: node cli.js extract ' + project + ' (requires Ollama + Qwen)\n'));
|
|
1978
|
+
return;
|
|
1979
|
+
}
|
|
1980
|
+
|
|
1981
|
+
// Get all entities from competitor pages
|
|
1982
|
+
const extractions = db.prepare(`
|
|
1983
|
+
SELECT e.primary_entities, p.url, d.domain
|
|
1984
|
+
FROM extractions e
|
|
1985
|
+
JOIN pages p ON p.id = e.page_id
|
|
1986
|
+
JOIN domains d ON d.id = p.domain_id
|
|
1987
|
+
WHERE d.project = ? AND d.role = 'competitor'
|
|
1988
|
+
AND e.primary_entities IS NOT NULL AND e.primary_entities != ''
|
|
1989
|
+
`).all(project);
|
|
1990
|
+
|
|
1991
|
+
// Build entity → pages map
|
|
1992
|
+
const entityMap = new Map();
|
|
1993
|
+
for (const row of extractions) {
|
|
1994
|
+
let entities = [];
|
|
1995
|
+
try { entities = JSON.parse(row.primary_entities); } catch {}
|
|
1996
|
+
for (const entity of entities) {
|
|
1997
|
+
const key = entity.toLowerCase().trim();
|
|
1998
|
+
if (!entityMap.has(key)) entityMap.set(key, new Set());
|
|
1999
|
+
entityMap.get(key).add(row.domain);
|
|
2000
|
+
}
|
|
2001
|
+
}
|
|
2002
|
+
|
|
2003
|
+
// Get all competitor URLs to check for dedicated pages
|
|
2004
|
+
const allUrls = db.prepare(`
|
|
2005
|
+
SELECT p.url FROM pages p
|
|
2006
|
+
JOIN domains d ON d.id = p.domain_id
|
|
2007
|
+
WHERE d.project = ? AND d.role = 'competitor'
|
|
2008
|
+
`).all(project).map(r => r.url.toLowerCase());
|
|
2009
|
+
|
|
2010
|
+
// Find entities mentioned 3+ times with no dedicated URL
|
|
2011
|
+
const orphans = [];
|
|
2012
|
+
for (const [entity, domains] of entityMap.entries()) {
|
|
2013
|
+
if (domains.size < 2) continue; // mentioned by 2+ competitors
|
|
2014
|
+
const slug = entity.replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, '');
|
|
2015
|
+
const hasDedicatedPage = allUrls.some(u => u.includes(slug) || u.includes(entity.replace(/\s+/g, '/')));
|
|
2016
|
+
if (!hasDedicatedPage) {
|
|
2017
|
+
orphans.push({ entity, domains: [...domains], domainCount: domains.size });
|
|
2018
|
+
}
|
|
2019
|
+
}
|
|
2020
|
+
|
|
2021
|
+
orphans.sort((a, b) => b.domainCount - a.domainCount);
|
|
2022
|
+
|
|
2023
|
+
if (!orphans.length) {
|
|
2024
|
+
if (entityMap.size === 0) {
|
|
2025
|
+
console.log(chalk.yellow('⚠️ Entity extraction data exists but no entities were extracted.'));
|
|
2026
|
+
console.log(chalk.gray(' Re-run: node cli.js extract ' + project + '\n'));
|
|
2027
|
+
} else {
|
|
2028
|
+
console.log(chalk.green('No orphaned entities found — competitors have dedicated pages for all major entities.'));
|
|
2029
|
+
}
|
|
2030
|
+
return;
|
|
2031
|
+
}
|
|
2032
|
+
|
|
2033
|
+
console.log(chalk.bold(`Found ${orphans.length} orphaned entities (mentioned by 2+ competitors, no dedicated page):\n`));
|
|
2034
|
+
for (const o of orphans.slice(0, 20)) {
|
|
2035
|
+
console.log(` ${chalk.bold.yellow(o.entity)}`);
|
|
2036
|
+
console.log(` Mentioned by: ${o.domains.join(', ')}`);
|
|
2037
|
+
console.log(` ${chalk.green('→ Build: /solutions/' + o.entity.replace(/\s+/g, '-').toLowerCase())}\n`);
|
|
2038
|
+
}
|
|
2039
|
+
|
|
2040
|
+
console.log(chalk.bold.green('💡 Action: Build dedicated pillar pages for top orphaned entities.'));
|
|
2041
|
+
console.log(chalk.gray(' Focused page > scattered mentions, every time.\n'));
|
|
2042
|
+
});
|
|
2043
|
+
|
|
2044
|
+
// ── ENTITY COVERAGE MAP ──────────────────────────────────────────────────
|
|
2045
|
+
program
|
|
2046
|
+
.command('entities <project>')
|
|
2047
|
+
.description('Entity coverage map — semantic gap at the entity level (concepts competitors mention, you don\'t)')
|
|
2048
|
+
.option('--min-mentions <n>', 'Minimum competitor mentions to show', '2')
|
|
2049
|
+
.option('--save', 'Save entity map to reports/')
|
|
2050
|
+
.action((project, opts) => {
|
|
2051
|
+
if (!requirePro('entities')) return;
|
|
2052
|
+
const db = getDb();
|
|
2053
|
+
const config = loadConfig(project);
|
|
2054
|
+
const minMentions = parseInt(opts.minMentions) || 2;
|
|
2055
|
+
|
|
2056
|
+
printAttackHeader('🧬 Entity Coverage Map', project);
|
|
2057
|
+
|
|
2058
|
+
// ── Gather all entities from all domains ──
|
|
2059
|
+
const allExtractions = db.prepare(`
|
|
2060
|
+
SELECT e.primary_entities, d.domain, d.role, p.url
|
|
2061
|
+
FROM extractions e
|
|
2062
|
+
JOIN pages p ON p.id = e.page_id
|
|
2063
|
+
JOIN domains d ON d.id = p.domain_id
|
|
2064
|
+
WHERE d.project = ?
|
|
2065
|
+
AND e.primary_entities IS NOT NULL AND e.primary_entities != '[]' AND e.primary_entities != ''
|
|
2066
|
+
`).all(project);
|
|
2067
|
+
|
|
2068
|
+
if (!allExtractions.length) {
|
|
2069
|
+
console.log(chalk.yellow('⚠️ No entity extraction data found.'));
|
|
2070
|
+
console.log(chalk.gray(' Run: node cli.js extract ' + project + ' (requires Ollama + Qwen)\n'));
|
|
2071
|
+
return;
|
|
2072
|
+
}
|
|
2073
|
+
|
|
2074
|
+
// Build entity → { targetMentions, competitorMentions, domains, pages }
|
|
2075
|
+
const entityMap = new Map();
|
|
2076
|
+
|
|
2077
|
+
for (const row of allExtractions) {
|
|
2078
|
+
let entities = [];
|
|
2079
|
+
try { entities = JSON.parse(row.primary_entities); } catch { continue; }
|
|
2080
|
+
|
|
2081
|
+
for (const entity of entities) {
|
|
2082
|
+
const key = entity.toLowerCase().trim();
|
|
2083
|
+
if (key.length < 2) continue;
|
|
2084
|
+
|
|
2085
|
+
if (!entityMap.has(key)) {
|
|
2086
|
+
entityMap.set(key, { target: new Set(), competitor: new Set(), owned: new Set(), pages: [] });
|
|
2087
|
+
}
|
|
2088
|
+
const e = entityMap.get(key);
|
|
2089
|
+
if (row.role === 'target') e.target.add(row.domain);
|
|
2090
|
+
else if (row.role === 'owned') e.owned.add(row.domain);
|
|
2091
|
+
else e.competitor.add(row.domain);
|
|
2092
|
+
e.pages.push({ domain: row.domain, url: row.url, role: row.role });
|
|
2093
|
+
}
|
|
2094
|
+
}
|
|
2095
|
+
|
|
2096
|
+
// ── Classify entities ──
|
|
2097
|
+
const gaps = []; // competitor has, you don't
|
|
2098
|
+
const shared = []; // both have
|
|
2099
|
+
const yourOnly = []; // you have, competitor doesn't
|
|
2100
|
+
|
|
2101
|
+
for (const [entity, data] of entityMap) {
|
|
2102
|
+
const compCount = data.competitor.size;
|
|
2103
|
+
const hasTarget = data.target.size > 0 || data.owned.size > 0;
|
|
2104
|
+
|
|
2105
|
+
if (compCount >= minMentions && !hasTarget) {
|
|
2106
|
+
gaps.push({ entity, compCount, domains: [...data.competitor], pages: data.pages });
|
|
2107
|
+
} else if (compCount > 0 && hasTarget) {
|
|
2108
|
+
shared.push({ entity, compCount, targetDomains: [...data.target, ...data.owned], compDomains: [...data.competitor] });
|
|
2109
|
+
} else if (compCount === 0 && hasTarget) {
|
|
2110
|
+
yourOnly.push({ entity, targetDomains: [...data.target, ...data.owned] });
|
|
2111
|
+
}
|
|
2112
|
+
}
|
|
2113
|
+
|
|
2114
|
+
gaps.sort((a, b) => b.compCount - a.compCount);
|
|
2115
|
+
shared.sort((a, b) => b.compCount - a.compCount);
|
|
2116
|
+
|
|
2117
|
+
let mdOutput = `# Entity Coverage Map — ${config.target.domain}\nGenerated: ${new Date().toISOString().slice(0, 10)}\n\n`;
|
|
2118
|
+
|
|
2119
|
+
// ── Coverage summary ──
|
|
2120
|
+
console.log(chalk.bold(` Summary: ${entityMap.size} unique entities across all domains\n`));
|
|
2121
|
+
console.log(` ${chalk.red(`🔴 Gaps:`)} ${chalk.bold(gaps.length)} entities competitors mention, you don't`);
|
|
2122
|
+
console.log(` ${chalk.green('🟢 Shared:')} ${chalk.bold(shared.length)} entities both sides cover`);
|
|
2123
|
+
console.log(` ${chalk.blue('🔵 Yours:')} ${chalk.bold(yourOnly.length)} entities only you mention`);
|
|
2124
|
+
console.log('');
|
|
2125
|
+
|
|
2126
|
+
mdOutput += `## Summary\n- **${gaps.length}** entity gaps (competitors have, you don't)\n- **${shared.length}** shared entities\n- **${yourOnly.length}** your unique entities\n\n`;
|
|
2127
|
+
|
|
2128
|
+
// ── Entity gaps (the actionable ones) ──
|
|
2129
|
+
if (gaps.length > 0) {
|
|
2130
|
+
console.log(chalk.bold.red(` 🔴 Entity Gaps — competitors cover these, you don't:\n`));
|
|
2131
|
+
mdOutput += `## Entity Gaps\n\n`;
|
|
2132
|
+
|
|
2133
|
+
for (const g of gaps.slice(0, 20)) {
|
|
2134
|
+
const domainList = g.domains.join(', ');
|
|
2135
|
+
console.log(` ${chalk.bold.yellow(g.entity)}`);
|
|
2136
|
+
console.log(chalk.gray(` Mentioned by: ${domainList} (${g.compCount} competitor${g.compCount > 1 ? 's' : ''})`));
|
|
2137
|
+
|
|
2138
|
+
// Show example pages
|
|
2139
|
+
const examplePages = g.pages.filter(p => p.role === 'competitor').slice(0, 2);
|
|
2140
|
+
for (const p of examplePages) {
|
|
2141
|
+
const path = p.url.replace(/https?:\/\/[^/]+/, '') || '/';
|
|
2142
|
+
console.log(chalk.gray(` └ ${p.domain}${path.slice(0, 50)}`));
|
|
2143
|
+
}
|
|
2144
|
+
console.log('');
|
|
2145
|
+
|
|
2146
|
+
mdOutput += `### ${g.entity}\n- Competitors: ${domainList}\n`;
|
|
2147
|
+
for (const p of examplePages) {
|
|
2148
|
+
mdOutput += `- Example: \`${p.url}\`\n`;
|
|
2149
|
+
}
|
|
2150
|
+
mdOutput += '\n';
|
|
2151
|
+
}
|
|
2152
|
+
if (gaps.length > 20) {
|
|
2153
|
+
console.log(chalk.gray(` ... and ${gaps.length - 20} more gaps\n`));
|
|
2154
|
+
}
|
|
2155
|
+
}
|
|
2156
|
+
|
|
2157
|
+
// ── Shared entities (competitive overlap) ──
|
|
2158
|
+
if (shared.length > 0) {
|
|
2159
|
+
console.log(chalk.bold.green(` 🟢 Shared Entities — both you and competitors cover:\n`));
|
|
2160
|
+
mdOutput += `## Shared Entities\n\n`;
|
|
2161
|
+
|
|
2162
|
+
for (const s of shared.slice(0, 10)) {
|
|
2163
|
+
console.log(` ${chalk.green('✓')} ${s.entity} ${chalk.gray(`(you + ${s.compCount} competitor${s.compCount > 1 ? 's' : ''})`)}`);
|
|
2164
|
+
mdOutput += `- ✓ ${s.entity} — you + ${s.compCount} competitor(s)\n`;
|
|
2165
|
+
}
|
|
2166
|
+
if (shared.length > 10) {
|
|
2167
|
+
console.log(chalk.gray(` ... and ${shared.length - 10} more shared\n`));
|
|
2168
|
+
}
|
|
2169
|
+
console.log('');
|
|
2170
|
+
}
|
|
2171
|
+
|
|
2172
|
+
// ── Your unique entities ──
|
|
2173
|
+
if (yourOnly.length > 0) {
|
|
2174
|
+
console.log(chalk.bold.blue(` 🔵 Your Unique Entities — competitors don't mention:\n`));
|
|
2175
|
+
mdOutput += `\n## Your Unique Entities\n\n`;
|
|
2176
|
+
|
|
2177
|
+
for (const y of yourOnly.slice(0, 10)) {
|
|
2178
|
+
console.log(` ${chalk.blue('★')} ${y.entity}`);
|
|
2179
|
+
mdOutput += `- ★ ${y.entity}\n`;
|
|
2180
|
+
}
|
|
2181
|
+
if (yourOnly.length > 10) {
|
|
2182
|
+
console.log(chalk.gray(` ... and ${yourOnly.length - 10} more\n`));
|
|
2183
|
+
}
|
|
2184
|
+
console.log('');
|
|
2185
|
+
}
|
|
2186
|
+
|
|
2187
|
+
// ── Action items ──
|
|
2188
|
+
console.log(chalk.bold.green(' 💡 Actions:'));
|
|
2189
|
+
if (gaps.length > 0) {
|
|
2190
|
+
console.log(chalk.green(` 1. Create content covering top entity gaps (start with "${gaps[0].entity}")`));
|
|
2191
|
+
console.log(chalk.green(` 2. Build dedicated pages for high-frequency gap entities`));
|
|
2192
|
+
}
|
|
2193
|
+
if (yourOnly.length > 0) {
|
|
2194
|
+
console.log(chalk.green(` 3. Double down on your unique entities — they're your differentiator`));
|
|
2195
|
+
}
|
|
2196
|
+
console.log('');
|
|
2197
|
+
|
|
2198
|
+
// ── Save ──
|
|
2199
|
+
if (opts.save) {
|
|
2200
|
+
const outPath = join(__dirname, `reports/${project}-entities-${Date.now()}.md`);
|
|
2201
|
+
writeFileSync(outPath, mdOutput, 'utf8');
|
|
2202
|
+
console.log(chalk.bold.green(` ✅ Entity map saved: ${outPath}\n`));
|
|
2203
|
+
}
|
|
2204
|
+
});
|
|
2205
|
+
|
|
2206
|
+
// ── SCHEMA INTEL ─────────────────────────────────────────────────────────
|
|
2207
|
+
program
|
|
2208
|
+
.command('schemas <project>')
|
|
2209
|
+
.description('Deep structured data competitive analysis — ratings, pricing, rich results gaps')
|
|
2210
|
+
.option('--save', 'Save report to reports/')
|
|
2211
|
+
.action((project, opts) => {
|
|
2212
|
+
const db = getDb();
|
|
2213
|
+
|
|
2214
|
+
printAttackHeader('🔬 Schema Intelligence Report', project);
|
|
2215
|
+
|
|
2216
|
+
const rows = getSchemasByProject(db, project);
|
|
2217
|
+
|
|
2218
|
+
if (rows.length === 0) {
|
|
2219
|
+
console.log(chalk.yellow(' No structured data found. Run a crawl first — schemas are parsed from JSON-LD during crawl.'));
|
|
2220
|
+
console.log(chalk.dim(' Tip: node cli.js crawl ' + project + '\n'));
|
|
2221
|
+
return;
|
|
2222
|
+
}
|
|
2223
|
+
|
|
2224
|
+
// Load config to identify target domain
|
|
2225
|
+
const configPath = `./config/${project}.json`;
|
|
2226
|
+
let targetDomain = null;
|
|
2227
|
+
try {
|
|
2228
|
+
const config = JSON.parse(readFileSync(configPath, 'utf8'));
|
|
2229
|
+
targetDomain = config.target?.domain;
|
|
2230
|
+
} catch {}
|
|
2231
|
+
|
|
2232
|
+
// ── Group by domain ──
|
|
2233
|
+
const byDomain = new Map();
|
|
2234
|
+
for (const row of rows) {
|
|
2235
|
+
if (!byDomain.has(row.domain)) byDomain.set(row.domain, []);
|
|
2236
|
+
byDomain.get(row.domain).push(row);
|
|
2237
|
+
}
|
|
2238
|
+
|
|
2239
|
+
// ── Schema type coverage matrix ──
|
|
2240
|
+
console.log(chalk.bold('\n SCHEMA TYPE COVERAGE'));
|
|
2241
|
+
console.log(chalk.dim(' Which structured data types each domain uses\n'));
|
|
2242
|
+
|
|
2243
|
+
const allTypes = [...new Set(rows.map(r => r.schema_type))].sort();
|
|
2244
|
+
const domainList = [...byDomain.keys()].sort((a, b) => {
|
|
2245
|
+
if (a === targetDomain) return -1;
|
|
2246
|
+
if (b === targetDomain) return 1;
|
|
2247
|
+
return a.localeCompare(b);
|
|
2248
|
+
});
|
|
2249
|
+
|
|
2250
|
+
// Header
|
|
2251
|
+
const typeColWidth = 22;
|
|
2252
|
+
const domColWidth = 12;
|
|
2253
|
+
let header = ' ' + 'Schema Type'.padEnd(typeColWidth);
|
|
2254
|
+
for (const dom of domainList) {
|
|
2255
|
+
const label = dom === targetDomain ? chalk.bold.hex('#DAA520')(dom.slice(0, domColWidth - 1)) : dom.slice(0, domColWidth - 1);
|
|
2256
|
+
header += label.padEnd(domColWidth);
|
|
2257
|
+
}
|
|
2258
|
+
console.log(header);
|
|
2259
|
+
console.log(chalk.dim(' ' + '─'.repeat(typeColWidth + domColWidth * domainList.length)));
|
|
2260
|
+
|
|
2261
|
+
for (const type of allTypes) {
|
|
2262
|
+
let line = ' ' + type.padEnd(typeColWidth);
|
|
2263
|
+
for (const dom of domainList) {
|
|
2264
|
+
const domSchemas = byDomain.get(dom) || [];
|
|
2265
|
+
const count = domSchemas.filter(s => s.schema_type === type).length;
|
|
2266
|
+
if (count > 0) {
|
|
2267
|
+
const marker = dom === targetDomain ? chalk.hex('#DAA520')(`✓ ${count}`) : chalk.green(`✓ ${count}`);
|
|
2268
|
+
line += marker.padEnd(domColWidth + 10); // account for ANSI codes
|
|
2269
|
+
} else {
|
|
2270
|
+
const marker = dom === targetDomain ? chalk.red('✗') : chalk.dim('·');
|
|
2271
|
+
line += marker.padEnd(domColWidth + 10);
|
|
2272
|
+
}
|
|
2273
|
+
}
|
|
2274
|
+
console.log(line);
|
|
2275
|
+
}
|
|
2276
|
+
|
|
2277
|
+
// ── Rating intel — who has review stars? ──
|
|
2278
|
+
const withRatings = rows.filter(r => r.rating !== null);
|
|
2279
|
+
if (withRatings.length > 0) {
|
|
2280
|
+
console.log(chalk.bold('\n\n RATING INTELLIGENCE'));
|
|
2281
|
+
console.log(chalk.dim(' Competitors with aggregateRating — rich star snippets in SERPs\n'));
|
|
2282
|
+
|
|
2283
|
+
for (const r of withRatings) {
|
|
2284
|
+
const isTarget = r.domain === targetDomain;
|
|
2285
|
+
const domLabel = isTarget ? chalk.bold.hex('#DAA520')(r.domain) : chalk.white(r.domain);
|
|
2286
|
+
const stars = '★'.repeat(Math.round(r.rating)) + '☆'.repeat(5 - Math.round(r.rating));
|
|
2287
|
+
const ratingStr = `${r.rating}/5 ${chalk.yellow(stars)}`;
|
|
2288
|
+
const countStr = r.rating_count ? chalk.dim(` (${r.rating_count} reviews)`) : '';
|
|
2289
|
+
const nameStr = r.name ? chalk.dim(` — ${r.name.slice(0, 50)}`) : '';
|
|
2290
|
+
console.log(` ${domLabel} ${ratingStr}${countStr}${nameStr}`);
|
|
2291
|
+
console.log(chalk.dim(` ${r.url.slice(0, 80)}`));
|
|
2292
|
+
}
|
|
2293
|
+
|
|
2294
|
+
// Check if target has ratings
|
|
2295
|
+
const targetRatings = withRatings.filter(r => r.domain === targetDomain);
|
|
2296
|
+
const compRatings = withRatings.filter(r => r.domain !== targetDomain);
|
|
2297
|
+
if (targetRatings.length === 0 && compRatings.length > 0) {
|
|
2298
|
+
console.log(chalk.red(`\n ⚠ GAP: ${compRatings.length} competitor page(s) have star ratings — you have NONE`));
|
|
2299
|
+
console.log(chalk.dim(' Adding aggregateRating schema gives you rich star snippets in search results'));
|
|
2300
|
+
}
|
|
2301
|
+
}
|
|
2302
|
+
|
|
2303
|
+
// ── Pricing intel ──
|
|
2304
|
+
const withPricing = rows.filter(r => r.price !== null);
|
|
2305
|
+
if (withPricing.length > 0) {
|
|
2306
|
+
console.log(chalk.bold('\n\n PRICING SCHEMA'));
|
|
2307
|
+
console.log(chalk.dim(' Structured pricing data (enables price rich results)\n'));
|
|
2308
|
+
|
|
2309
|
+
for (const r of withPricing) {
|
|
2310
|
+
const isTarget = r.domain === targetDomain;
|
|
2311
|
+
const domLabel = isTarget ? chalk.bold.hex('#DAA520')(r.domain) : chalk.white(r.domain);
|
|
2312
|
+
const priceStr = r.currency ? `${r.currency} ${r.price}` : r.price;
|
|
2313
|
+
const nameStr = r.name ? ` — ${r.name.slice(0, 40)}` : '';
|
|
2314
|
+
console.log(` ${domLabel} ${chalk.green(priceStr)}${chalk.dim(nameStr)}`);
|
|
2315
|
+
}
|
|
2316
|
+
|
|
2317
|
+
const targetPricing = withPricing.filter(r => r.domain === targetDomain);
|
|
2318
|
+
const compPricing = withPricing.filter(r => r.domain !== targetDomain);
|
|
2319
|
+
if (targetPricing.length === 0 && compPricing.length > 0) {
|
|
2320
|
+
console.log(chalk.red(`\n ⚠ GAP: ${compPricing.length} competitor page(s) have pricing schema — you have NONE`));
|
|
2321
|
+
}
|
|
2322
|
+
}
|
|
2323
|
+
|
|
2324
|
+
// ── Gap analysis — what competitors have that you don't ──
|
|
2325
|
+
const targetTypes = new Set((byDomain.get(targetDomain) || []).map(s => s.schema_type));
|
|
2326
|
+
const compTypes = new Set(rows.filter(r => r.domain !== targetDomain).map(r => r.schema_type));
|
|
2327
|
+
const schemaGaps = [...compTypes].filter(t => !targetTypes.has(t));
|
|
2328
|
+
const yourExclusives = [...targetTypes].filter(t => !compTypes.has(t));
|
|
2329
|
+
|
|
2330
|
+
if (schemaGaps.length > 0 || yourExclusives.length > 0) {
|
|
2331
|
+
console.log(chalk.bold('\n\n COMPETITIVE GAPS'));
|
|
2332
|
+
|
|
2333
|
+
if (schemaGaps.length > 0) {
|
|
2334
|
+
console.log(chalk.red(`\n Missing schema types (competitors have, you don't):`));
|
|
2335
|
+
for (const gap of schemaGaps) {
|
|
2336
|
+
// Find which competitors have it
|
|
2337
|
+
const competitorsWith = [...new Set(rows.filter(r => r.schema_type === gap && r.domain !== targetDomain).map(r => r.domain))];
|
|
2338
|
+
console.log(chalk.red(` ✗ ${gap}`) + chalk.dim(` — used by: ${competitorsWith.join(', ')}`));
|
|
2339
|
+
}
|
|
2340
|
+
}
|
|
2341
|
+
|
|
2342
|
+
if (yourExclusives.length > 0) {
|
|
2343
|
+
console.log(chalk.green(`\n Your exclusive schema types (competitors lack):`));
|
|
2344
|
+
for (const exc of yourExclusives) {
|
|
2345
|
+
console.log(chalk.green(` ✓ ${exc}`) + chalk.dim(' — competitive advantage'));
|
|
2346
|
+
}
|
|
2347
|
+
}
|
|
2348
|
+
}
|
|
2349
|
+
|
|
2350
|
+
// ── Actionable recommendations ──
|
|
2351
|
+
console.log(chalk.bold('\n\n ACTIONS'));
|
|
2352
|
+
|
|
2353
|
+
const actions = [];
|
|
2354
|
+
if (schemaGaps.length > 0) {
|
|
2355
|
+
const highValue = schemaGaps.filter(t => ['Product', 'SoftwareApplication', 'FAQPage', 'HowTo', 'Review', 'AggregateRating'].includes(t));
|
|
2356
|
+
if (highValue.length > 0) {
|
|
2357
|
+
actions.push(`Add high-value schema types: ${highValue.join(', ')}`);
|
|
2358
|
+
}
|
|
2359
|
+
const remaining = schemaGaps.filter(t => !highValue.includes(t));
|
|
2360
|
+
if (remaining.length > 0) {
|
|
2361
|
+
actions.push(`Consider adding: ${remaining.join(', ')}`);
|
|
2362
|
+
}
|
|
2363
|
+
}
|
|
2364
|
+
if (withRatings.length > 0 && !rows.some(r => r.domain === targetDomain && r.rating !== null)) {
|
|
2365
|
+
actions.push('Add aggregateRating schema for star-rich snippets (highest SERP CTR impact)');
|
|
2366
|
+
}
|
|
2367
|
+
if (withPricing.length > 0 && !rows.some(r => r.domain === targetDomain && r.price !== null)) {
|
|
2368
|
+
actions.push('Add pricing schema (Product/Offer) for price-rich results');
|
|
2369
|
+
}
|
|
2370
|
+
if (!targetTypes.has('FAQPage') && compTypes.has('FAQPage')) {
|
|
2371
|
+
actions.push('Add FAQPage schema — expands your SERP real estate with accordion snippets');
|
|
2372
|
+
}
|
|
2373
|
+
if (!targetTypes.has('BreadcrumbList') && compTypes.has('BreadcrumbList')) {
|
|
2374
|
+
actions.push('Add BreadcrumbList schema — improves SERP display and navigation signals');
|
|
2375
|
+
}
|
|
2376
|
+
|
|
2377
|
+
if (actions.length > 0) {
|
|
2378
|
+
for (let i = 0; i < actions.length; i++) {
|
|
2379
|
+
console.log(` ${chalk.cyan(`${i + 1}.`)} ${actions[i]}`);
|
|
2380
|
+
}
|
|
2381
|
+
} else {
|
|
2382
|
+
console.log(chalk.green(' Your schema coverage matches or exceeds competitors!'));
|
|
2383
|
+
}
|
|
2384
|
+
|
|
2385
|
+
// ── Summary stats ──
|
|
2386
|
+
console.log(chalk.bold('\n\n SUMMARY'));
|
|
2387
|
+
console.log(` Total schemas parsed: ${chalk.bold(rows.length)}`);
|
|
2388
|
+
console.log(` Unique types: ${chalk.bold(allTypes.length)}`);
|
|
2389
|
+
console.log(` Domains with schemas: ${chalk.bold(byDomain.size)}`);
|
|
2390
|
+
if (schemaGaps.length > 0) console.log(` Schema gaps: ${chalk.red.bold(schemaGaps.length)}`);
|
|
2391
|
+
if (withRatings.length > 0) console.log(` Pages with ratings: ${chalk.yellow.bold(withRatings.length)}`);
|
|
2392
|
+
if (withPricing.length > 0) console.log(` Pages with pricing: ${chalk.green.bold(withPricing.length)}`);
|
|
2393
|
+
console.log('');
|
|
2394
|
+
|
|
2395
|
+
// ── Save option ──
|
|
2396
|
+
if (opts.save) {
|
|
2397
|
+
const mdLines = [
|
|
2398
|
+
`# Schema Intelligence Report — ${project}`,
|
|
2399
|
+
`Generated: ${new Date().toISOString().split('T')[0]}`,
|
|
2400
|
+
'',
|
|
2401
|
+
`## Coverage Matrix`,
|
|
2402
|
+
'',
|
|
2403
|
+
`| Type | ${domainList.join(' | ')} |`,
|
|
2404
|
+
`| --- | ${domainList.map(() => '---').join(' | ')} |`,
|
|
2405
|
+
];
|
|
2406
|
+
for (const type of allTypes) {
|
|
2407
|
+
const cells = domainList.map(dom => {
|
|
2408
|
+
const count = (byDomain.get(dom) || []).filter(s => s.schema_type === type).length;
|
|
2409
|
+
return count > 0 ? `✓ (${count})` : '✗';
|
|
2410
|
+
});
|
|
2411
|
+
mdLines.push(`| ${type} | ${cells.join(' | ')} |`);
|
|
2412
|
+
}
|
|
2413
|
+
mdLines.push('');
|
|
2414
|
+
if (withRatings.length > 0) {
|
|
2415
|
+
mdLines.push('## Ratings', '');
|
|
2416
|
+
for (const r of withRatings) {
|
|
2417
|
+
mdLines.push(`- **${r.domain}**: ${r.rating}/5 (${r.rating_count || '?'} reviews) — ${r.name || r.url}`);
|
|
2418
|
+
}
|
|
2419
|
+
mdLines.push('');
|
|
2420
|
+
}
|
|
2421
|
+
if (schemaGaps.length > 0) {
|
|
2422
|
+
mdLines.push('## Gaps (competitors have, you don\'t)', '');
|
|
2423
|
+
for (const gap of schemaGaps) mdLines.push(`- ✗ ${gap}`);
|
|
2424
|
+
mdLines.push('');
|
|
2425
|
+
}
|
|
2426
|
+
if (actions.length > 0) {
|
|
2427
|
+
mdLines.push('## Actions', '');
|
|
2428
|
+
for (const a of actions) mdLines.push(`- ${a}`);
|
|
2429
|
+
}
|
|
2430
|
+
|
|
2431
|
+
const outPath = `reports/schema-intel-${project}-${new Date().toISOString().split('T')[0]}.md`;
|
|
2432
|
+
writeFileSync(outPath, mdLines.join('\n'), 'utf8');
|
|
2433
|
+
console.log(chalk.bold.green(` ✅ Report saved: ${outPath}\n`));
|
|
2434
|
+
}
|
|
2435
|
+
});
|
|
2436
|
+
|
|
2437
|
+
// ── SCHEMA BACKFILL ──────────────────────────────────────────────────────
|
|
2438
|
+
program
|
|
2439
|
+
.command('schemas-backfill <project>')
|
|
2440
|
+
.description('Backfill JSON-LD schema data for already-crawled pages (lightweight HTTP fetch, no Playwright)')
|
|
2441
|
+
.option('--max <n>', 'Max pages to backfill', parseInt)
|
|
2442
|
+
.option('--delay <ms>', 'Delay between fetches in ms', parseInt, 500)
|
|
2443
|
+
.action(async (project, opts) => {
|
|
2444
|
+
const db = getDb();
|
|
2445
|
+
const { parseJsonLd } = await import('./crawler/schema-parser.js');
|
|
2446
|
+
const fetch = (await import('node-fetch')).default;
|
|
2447
|
+
|
|
2448
|
+
printAttackHeader('🔬 Schema Backfill', project);
|
|
2449
|
+
|
|
2450
|
+
// Get all pages for this project that don't have schemas yet
|
|
2451
|
+
const pages = db.prepare(`
|
|
2452
|
+
SELECT p.id, p.url, d.domain
|
|
2453
|
+
FROM pages p
|
|
2454
|
+
JOIN domains d ON d.id = p.domain_id
|
|
2455
|
+
LEFT JOIN page_schemas ps ON ps.page_id = p.id
|
|
2456
|
+
WHERE d.project = ? AND p.status_code = 200 AND ps.id IS NULL
|
|
2457
|
+
ORDER BY d.domain, p.url
|
|
2458
|
+
`).all(project);
|
|
2459
|
+
|
|
2460
|
+
const maxPages = opts.max || pages.length;
|
|
2461
|
+
const toProcess = pages.slice(0, maxPages);
|
|
2462
|
+
|
|
2463
|
+
console.log(` Found ${pages.length} pages without schema data`);
|
|
2464
|
+
console.log(` Processing: ${toProcess.length} pages\n`);
|
|
2465
|
+
|
|
2466
|
+
let done = 0, found = 0, failed = 0, totalSchemas = 0;
|
|
2467
|
+
|
|
2468
|
+
for (const page of toProcess) {
|
|
2469
|
+
process.stdout.write(chalk.gray(` [${done + 1}/${toProcess.length}] ${page.url.slice(0, 70)} `));
|
|
2470
|
+
try {
|
|
2471
|
+
const res = await fetch(page.url, {
|
|
2472
|
+
timeout: 10000,
|
|
2473
|
+
headers: {
|
|
2474
|
+
'User-Agent': 'Mozilla/5.0 (compatible; SEOIntelBot/1.0)',
|
|
2475
|
+
'Accept': 'text/html',
|
|
2476
|
+
},
|
|
2477
|
+
});
|
|
2478
|
+
if (!res.ok) {
|
|
2479
|
+
process.stdout.write(chalk.red(`HTTP ${res.status}\n`));
|
|
2480
|
+
failed++;
|
|
2481
|
+
done++;
|
|
2482
|
+
continue;
|
|
2483
|
+
}
|
|
2484
|
+
const html = await res.text();
|
|
2485
|
+
const schemas = parseJsonLd(html);
|
|
2486
|
+
if (schemas.length > 0) {
|
|
2487
|
+
insertPageSchemas(db, page.id, schemas);
|
|
2488
|
+
totalSchemas += schemas.length;
|
|
2489
|
+
found++;
|
|
2490
|
+
process.stdout.write(chalk.green(`✓ ${schemas.length} schema(s)\n`));
|
|
2491
|
+
} else {
|
|
2492
|
+
process.stdout.write(chalk.dim('no JSON-LD\n'));
|
|
2493
|
+
}
|
|
2494
|
+
} catch (err) {
|
|
2495
|
+
process.stdout.write(chalk.red(`✗ ${err.message.slice(0, 40)}\n`));
|
|
2496
|
+
failed++;
|
|
2497
|
+
}
|
|
2498
|
+
done++;
|
|
2499
|
+
if (done < toProcess.length) await new Promise(r => setTimeout(r, opts.delay || 500));
|
|
2500
|
+
}
|
|
2501
|
+
|
|
2502
|
+
console.log('');
|
|
2503
|
+
console.log(chalk.bold.green(` ✅ Backfill complete`));
|
|
2504
|
+
console.log(` Pages processed: ${done}`);
|
|
2505
|
+
console.log(` Pages with schemas: ${chalk.bold(found)}`);
|
|
2506
|
+
console.log(` Total schemas stored: ${chalk.bold(totalSchemas)}`);
|
|
2507
|
+
if (failed > 0) console.log(` Failed: ${chalk.red(failed)}`);
|
|
2508
|
+
console.log(chalk.dim(`\n Run: node cli.js schemas ${project}\n`));
|
|
2509
|
+
});
|
|
2510
|
+
|
|
2511
|
+
// ── INTENT FRICTION ───────────────────────────────────────────────────────
|
|
2512
|
+
program
|
|
2513
|
+
.command('friction <project>')
|
|
2514
|
+
.description('Find competitor pages with intent/CTA mismatch — high friction targets (needs Qwen extraction)')
|
|
2515
|
+
.action((project) => {
|
|
2516
|
+
if (!requirePro('friction')) return;
|
|
2517
|
+
const db = getDb();
|
|
2518
|
+
|
|
2519
|
+
printAttackHeader('🎯 Intent & Friction Hijacking', project);
|
|
2520
|
+
|
|
2521
|
+
const rows = db.prepare(`
|
|
2522
|
+
SELECT e.search_intent, e.cta_primary, e.pricing_tier, p.url, p.word_count, d.domain
|
|
2523
|
+
FROM extractions e
|
|
2524
|
+
JOIN pages p ON p.id = e.page_id
|
|
2525
|
+
JOIN domains d ON d.id = p.domain_id
|
|
2526
|
+
WHERE d.project = ? AND d.role = 'competitor'
|
|
2527
|
+
AND e.search_intent IS NOT NULL AND e.search_intent != ''
|
|
2528
|
+
AND e.cta_primary IS NOT NULL AND e.cta_primary != ''
|
|
2529
|
+
ORDER BY d.domain, p.click_depth ASC
|
|
2530
|
+
`).all(project).filter(r => isContentPage(r.url));
|
|
2531
|
+
|
|
2532
|
+
if (!rows.length) {
|
|
2533
|
+
console.log(chalk.yellow('⚠️ No intent/CTA extraction data found.'));
|
|
2534
|
+
console.log(chalk.gray(' Run: node cli.js extract ' + project + ' (requires Ollama + Qwen)\n'));
|
|
2535
|
+
return;
|
|
2536
|
+
}
|
|
2537
|
+
|
|
2538
|
+
// High friction patterns
|
|
2539
|
+
const highFrictionCTAs = ['enterprise', 'sales', 'contact', 'book a demo', 'request', 'talk to'];
|
|
2540
|
+
const targets = rows.filter(r => {
|
|
2541
|
+
const cta = (r.cta_primary || '').toLowerCase();
|
|
2542
|
+
const intent = (r.search_intent || '').toLowerCase();
|
|
2543
|
+
const isHighFriction = highFrictionCTAs.some(f => cta.includes(f));
|
|
2544
|
+
const isInfoOrCommercial = intent.includes('informational') || intent.includes('commercial');
|
|
2545
|
+
return isHighFriction && isInfoOrCommercial;
|
|
2546
|
+
});
|
|
2547
|
+
|
|
2548
|
+
if (!targets.length) {
|
|
2549
|
+
console.log(chalk.green('No high-friction mismatches found in current extraction data.'));
|
|
2550
|
+
console.log(chalk.gray(` (${rows.length} pages analyzed)\n`));
|
|
2551
|
+
return;
|
|
2552
|
+
}
|
|
2553
|
+
|
|
2554
|
+
console.log(chalk.bold.red(`Found ${targets.length} high-friction targets:\n`));
|
|
2555
|
+
for (const t of targets) {
|
|
2556
|
+
console.log(` ${chalk.bold(t.domain)}`);
|
|
2557
|
+
console.log(` ${t.url}`);
|
|
2558
|
+
console.log(` Intent: ${chalk.yellow(t.search_intent)} · CTA: ${chalk.red(t.cta_primary)}`);
|
|
2559
|
+
console.log(` ${chalk.green('→ Build low-friction alternative: same topic, CTA = "Start Free" or "View Pricing"')}\n`);
|
|
2560
|
+
}
|
|
2561
|
+
|
|
2562
|
+
console.log(chalk.bold.green('💡 Action: Build transactional pages for these exact topics with low-friction CTAs.'));
|
|
2563
|
+
console.log(chalk.gray(' Google rewards pages that solve the user\'s problem without making them jump through hoops.\n'));
|
|
2564
|
+
});
|
|
2565
|
+
|
|
2566
|
+
// ── WEEKLY INTEL BRIEF ───────────────────────────────────────────────────
|
|
2567
|
+
program
|
|
2568
|
+
.command('brief <project>')
|
|
2569
|
+
.description('Weekly SEO Intel Brief — what changed, new gaps, wins, actions')
|
|
2570
|
+
.option('--days <n>', 'Lookback window in days', '7')
|
|
2571
|
+
.option('--save', 'Save brief to reports/')
|
|
2572
|
+
.action((project, opts) => {
|
|
2573
|
+
if (!requirePro('brief')) return;
|
|
2574
|
+
const db = getDb();
|
|
2575
|
+
const config = loadConfig(project);
|
|
2576
|
+
const days = parseInt(opts.days) || 7;
|
|
2577
|
+
const cutoff = Date.now() - (days * 24 * 60 * 60 * 1000);
|
|
2578
|
+
const cutoffISO = new Date(cutoff).toISOString().slice(0, 10);
|
|
2579
|
+
const weekOf = new Date().toISOString().slice(0, 10);
|
|
2580
|
+
|
|
2581
|
+
const hr = '─'.repeat(60);
|
|
2582
|
+
const header = `📊 Weekly SEO Intel Brief — ${config.target.domain}\n Week of ${weekOf} (last ${days} days)`;
|
|
2583
|
+
|
|
2584
|
+
console.log(chalk.bold.cyan(`\n${hr}`));
|
|
2585
|
+
console.log(chalk.bold.cyan(` ${header}`));
|
|
2586
|
+
console.log(chalk.bold.cyan(hr));
|
|
2587
|
+
|
|
2588
|
+
let mdOutput = `# Weekly SEO Intel Brief — ${config.target.domain}\n**Week of ${weekOf}** (last ${days} days)\n\n---\n\n`;
|
|
2589
|
+
|
|
2590
|
+
// ── COMPETITOR MOVES ──
|
|
2591
|
+
console.log(chalk.bold('\n COMPETITOR MOVES\n'));
|
|
2592
|
+
mdOutput += `## Competitor Moves\n\n`;
|
|
2593
|
+
|
|
2594
|
+
const compDomains = config.competitors.map(c => c.domain);
|
|
2595
|
+
const compMoves = [];
|
|
2596
|
+
|
|
2597
|
+
for (const comp of compDomains) {
|
|
2598
|
+
// New pages discovered this week
|
|
2599
|
+
const newPages = db.prepare(`
|
|
2600
|
+
SELECT p.url, p.word_count, p.published_date
|
|
2601
|
+
FROM pages p JOIN domains d ON d.id = p.domain_id
|
|
2602
|
+
WHERE d.domain = ? AND d.project = ? AND p.first_seen_at > ? AND p.is_indexable = 1
|
|
2603
|
+
ORDER BY p.first_seen_at DESC
|
|
2604
|
+
`).all(comp, project, cutoff).filter(r => isContentPage(r.url));
|
|
2605
|
+
|
|
2606
|
+
// Changed pages (content hash changed or re-crawled)
|
|
2607
|
+
const changedPages = db.prepare(`
|
|
2608
|
+
SELECT p.url, p.word_count, p.modified_date
|
|
2609
|
+
FROM pages p JOIN domains d ON d.id = p.domain_id
|
|
2610
|
+
WHERE d.domain = ? AND d.project = ?
|
|
2611
|
+
AND p.crawled_at > ? AND p.first_seen_at < ?
|
|
2612
|
+
AND p.is_indexable = 1
|
|
2613
|
+
ORDER BY p.crawled_at DESC
|
|
2614
|
+
`).all(comp, project, cutoff, cutoff).filter(r => isContentPage(r.url));
|
|
2615
|
+
|
|
2616
|
+
if (newPages.length === 0 && changedPages.length === 0) {
|
|
2617
|
+
console.log(chalk.gray(` ${comp.padEnd(25)} no changes`));
|
|
2618
|
+
mdOutput += `- **${comp}** — no changes\n`;
|
|
2619
|
+
continue;
|
|
2620
|
+
}
|
|
2621
|
+
|
|
2622
|
+
const parts = [];
|
|
2623
|
+
if (newPages.length > 0) parts.push(chalk.green(`+${newPages.length} new`));
|
|
2624
|
+
if (changedPages.length > 0) parts.push(chalk.yellow(`${changedPages.length} updated`));
|
|
2625
|
+
console.log(` ${chalk.bold(comp.padEnd(25))} ${parts.join(' · ')}`);
|
|
2626
|
+
|
|
2627
|
+
mdOutput += `- **${comp}** — `;
|
|
2628
|
+
const mdParts = [];
|
|
2629
|
+
if (newPages.length > 0) mdParts.push(`+${newPages.length} new pages`);
|
|
2630
|
+
if (changedPages.length > 0) mdParts.push(`${changedPages.length} updated`);
|
|
2631
|
+
mdOutput += mdParts.join(', ') + '\n';
|
|
2632
|
+
|
|
2633
|
+
// Show top new pages
|
|
2634
|
+
for (const p of newPages.slice(0, 3)) {
|
|
2635
|
+
const path = p.url.replace(/https?:\/\/[^/]+/, '') || '/';
|
|
2636
|
+
console.log(chalk.green(` + ${path.slice(0, 65)}`));
|
|
2637
|
+
mdOutput += ` - \`${path}\`\n`;
|
|
2638
|
+
}
|
|
2639
|
+
if (newPages.length > 3) {
|
|
2640
|
+
console.log(chalk.gray(` ... and ${newPages.length - 3} more`));
|
|
2641
|
+
}
|
|
2642
|
+
|
|
2643
|
+
compMoves.push({ domain: comp, newPages, changedPages });
|
|
2644
|
+
}
|
|
2645
|
+
|
|
2646
|
+
// ── YOUR SITE ──
|
|
2647
|
+
console.log(chalk.bold('\n YOUR SITE\n'));
|
|
2648
|
+
mdOutput += `\n## Your Site\n\n`;
|
|
2649
|
+
|
|
2650
|
+
const targetDomain = config.target.domain;
|
|
2651
|
+
const ownedDomains = (config.owned || []).map(o => o.domain);
|
|
2652
|
+
const allOwned = [targetDomain, ...ownedDomains];
|
|
2653
|
+
|
|
2654
|
+
for (const dom of allOwned) {
|
|
2655
|
+
const newPages = db.prepare(`
|
|
2656
|
+
SELECT p.url, p.word_count
|
|
2657
|
+
FROM pages p JOIN domains d ON d.id = p.domain_id
|
|
2658
|
+
WHERE d.domain = ? AND d.project = ? AND p.first_seen_at > ? AND p.is_indexable = 1
|
|
2659
|
+
`).all(dom, project, cutoff).filter(r => isContentPage(r.url));
|
|
2660
|
+
|
|
2661
|
+
if (newPages.length > 0) {
|
|
2662
|
+
console.log(` ${chalk.bold.green(dom.padEnd(25))} +${newPages.length} new page(s)`);
|
|
2663
|
+
mdOutput += `- **${dom}** — +${newPages.length} new page(s)\n`;
|
|
2664
|
+
for (const p of newPages.slice(0, 3)) {
|
|
2665
|
+
const path = p.url.replace(/https?:\/\/[^/]+/, '') || '/';
|
|
2666
|
+
console.log(chalk.green(` + ${path.slice(0, 65)}`));
|
|
2667
|
+
mdOutput += ` - \`${path}\`\n`;
|
|
2668
|
+
}
|
|
2669
|
+
} else {
|
|
2670
|
+
console.log(chalk.gray(` ${dom.padEnd(25)} no new pages`));
|
|
2671
|
+
mdOutput += `- **${dom}** — no new pages\n`;
|
|
2672
|
+
}
|
|
2673
|
+
}
|
|
2674
|
+
|
|
2675
|
+
// ── NEW GAPS DETECTED ──
|
|
2676
|
+
console.log(chalk.bold('\n NEW GAPS DETECTED\n'));
|
|
2677
|
+
mdOutput += `\n## New Gaps Detected\n\n`;
|
|
2678
|
+
|
|
2679
|
+
// Find keywords competitors have that target doesn't
|
|
2680
|
+
const targetKeywords = new Set(
|
|
2681
|
+
db.prepare(`
|
|
2682
|
+
SELECT DISTINCT LOWER(k.keyword) as kw
|
|
2683
|
+
FROM keywords k JOIN pages p ON p.id = k.page_id JOIN domains d ON d.id = p.domain_id
|
|
2684
|
+
WHERE d.project = ? AND (d.role = 'target' OR d.role = 'owned')
|
|
2685
|
+
`).all(project).map(r => r.kw)
|
|
2686
|
+
);
|
|
2687
|
+
|
|
2688
|
+
// Keywords from new competitor pages
|
|
2689
|
+
const gapKeywords = new Map();
|
|
2690
|
+
for (const move of compMoves) {
|
|
2691
|
+
for (const np of move.newPages.slice(0, 10)) {
|
|
2692
|
+
const pageRow = db.prepare('SELECT id FROM pages WHERE url = ?').get(np.url);
|
|
2693
|
+
if (!pageRow) continue;
|
|
2694
|
+
const kws = db.prepare('SELECT keyword FROM keywords WHERE page_id = ?').all(pageRow.id);
|
|
2695
|
+
for (const kw of kws) {
|
|
2696
|
+
const key = kw.keyword.toLowerCase().trim();
|
|
2697
|
+
if (key.length < 3) continue;
|
|
2698
|
+
if (targetKeywords.has(key)) continue;
|
|
2699
|
+
if (!gapKeywords.has(key)) gapKeywords.set(key, new Set());
|
|
2700
|
+
gapKeywords.get(key).add(move.domain);
|
|
2701
|
+
}
|
|
2702
|
+
}
|
|
2703
|
+
}
|
|
2704
|
+
|
|
2705
|
+
// Sort by number of competitors mentioning the keyword
|
|
2706
|
+
const sortedGaps = [...gapKeywords.entries()]
|
|
2707
|
+
.map(([kw, domains]) => ({ keyword: kw, domains: [...domains], count: domains.size }))
|
|
2708
|
+
.sort((a, b) => b.count - a.count)
|
|
2709
|
+
.slice(0, 10);
|
|
2710
|
+
|
|
2711
|
+
if (sortedGaps.length > 0) {
|
|
2712
|
+
for (const g of sortedGaps) {
|
|
2713
|
+
console.log(` ${chalk.yellow('⚠️')} ${chalk.bold(g.keyword)} — ${g.domains.join(', ')}`);
|
|
2714
|
+
mdOutput += `- ⚠️ **${g.keyword}** — ${g.domains.join(', ')}\n`;
|
|
2715
|
+
}
|
|
2716
|
+
} else {
|
|
2717
|
+
console.log(chalk.green(' No new keyword gaps detected this week.'));
|
|
2718
|
+
mdOutput += `No new keyword gaps detected this week.\n`;
|
|
2719
|
+
}
|
|
2720
|
+
|
|
2721
|
+
// ── SCHEMA GAPS ──
|
|
2722
|
+
// Check if competitors added schema types target doesn't have
|
|
2723
|
+
const targetSchema = new Set();
|
|
2724
|
+
try {
|
|
2725
|
+
const ts = db.prepare(`
|
|
2726
|
+
SELECT DISTINCT e.schema_types FROM extractions e
|
|
2727
|
+
JOIN pages p ON p.id = e.page_id JOIN domains d ON d.id = p.domain_id
|
|
2728
|
+
WHERE d.project = ? AND (d.role = 'target' OR d.role = 'owned')
|
|
2729
|
+
AND e.schema_types IS NOT NULL AND e.schema_types != '[]'
|
|
2730
|
+
`).all(project);
|
|
2731
|
+
for (const row of ts) {
|
|
2732
|
+
try { for (const t of JSON.parse(row.schema_types)) targetSchema.add(t); } catch {}
|
|
2733
|
+
}
|
|
2734
|
+
} catch {}
|
|
2735
|
+
|
|
2736
|
+
const compSchema = new Map();
|
|
2737
|
+
for (const move of compMoves) {
|
|
2738
|
+
for (const np of move.newPages.slice(0, 10)) {
|
|
2739
|
+
const pageRow = db.prepare('SELECT id FROM pages WHERE url = ?').get(np.url);
|
|
2740
|
+
if (!pageRow) continue;
|
|
2741
|
+
const ext = db.prepare('SELECT schema_types FROM extractions WHERE page_id = ?').get(pageRow.id);
|
|
2742
|
+
if (!ext?.schema_types) continue;
|
|
2743
|
+
try {
|
|
2744
|
+
for (const st of JSON.parse(ext.schema_types)) {
|
|
2745
|
+
if (!targetSchema.has(st)) {
|
|
2746
|
+
if (!compSchema.has(st)) compSchema.set(st, new Set());
|
|
2747
|
+
compSchema.get(st).add(move.domain);
|
|
2748
|
+
}
|
|
2749
|
+
}
|
|
2750
|
+
} catch {}
|
|
2751
|
+
}
|
|
2752
|
+
}
|
|
2753
|
+
|
|
2754
|
+
if (compSchema.size > 0) {
|
|
2755
|
+
console.log('');
|
|
2756
|
+
for (const [schema, domains] of compSchema) {
|
|
2757
|
+
console.log(` ${chalk.yellow('⚠️')} ${chalk.bold(schema + ' schema')} — ${[...domains].join(', ')} has it, you don't`);
|
|
2758
|
+
mdOutput += `- ⚠️ **${schema} schema** — ${[...domains].join(', ')} has it, you don't\n`;
|
|
2759
|
+
}
|
|
2760
|
+
}
|
|
2761
|
+
|
|
2762
|
+
// ── ACTIONS ──
|
|
2763
|
+
console.log(chalk.bold('\n ACTIONS FOR THIS WEEK\n'));
|
|
2764
|
+
mdOutput += `\n## Actions\n\n`;
|
|
2765
|
+
|
|
2766
|
+
let actionNum = 1;
|
|
2767
|
+
const actions = [];
|
|
2768
|
+
|
|
2769
|
+
// Action: cover new competitor topics
|
|
2770
|
+
if (sortedGaps.length > 0) {
|
|
2771
|
+
const topGap = sortedGaps[0];
|
|
2772
|
+
const action = `Write content covering "${topGap.keyword}" — ${topGap.count} competitor(s) rank for it`;
|
|
2773
|
+
actions.push(action);
|
|
2774
|
+
}
|
|
2775
|
+
|
|
2776
|
+
// Action: add missing schema
|
|
2777
|
+
if (compSchema.size > 0) {
|
|
2778
|
+
const [schema, domains] = [...compSchema.entries()][0];
|
|
2779
|
+
const action = `Add ${schema} schema markup to relevant pages (${[...domains][0]} already has it)`;
|
|
2780
|
+
actions.push(action);
|
|
2781
|
+
}
|
|
2782
|
+
|
|
2783
|
+
// Action: match publishing rate
|
|
2784
|
+
const compVelocities = compMoves
|
|
2785
|
+
.map(m => ({ domain: m.domain, rate: m.newPages.length }))
|
|
2786
|
+
.sort((a, b) => b.rate - a.rate);
|
|
2787
|
+
const targetNew = db.prepare(`
|
|
2788
|
+
SELECT COUNT(*) as c FROM pages p JOIN domains d ON d.id = p.domain_id
|
|
2789
|
+
WHERE d.domain = ? AND d.project = ? AND p.first_seen_at > ?
|
|
2790
|
+
`).get(targetDomain, project, cutoff)?.c || 0;
|
|
2791
|
+
|
|
2792
|
+
if (compVelocities.length > 0 && compVelocities[0].rate > targetNew) {
|
|
2793
|
+
const action = `Increase publishing rate — ${compVelocities[0].domain} published ${compVelocities[0].rate} pages vs your ${targetNew}`;
|
|
2794
|
+
actions.push(action);
|
|
2795
|
+
}
|
|
2796
|
+
|
|
2797
|
+
if (actions.length === 0) {
|
|
2798
|
+
actions.push('Re-crawl competitors to detect new content');
|
|
2799
|
+
actions.push('Review dashboard for technical SEO fixes');
|
|
2800
|
+
}
|
|
2801
|
+
|
|
2802
|
+
for (const action of actions.slice(0, 5)) {
|
|
2803
|
+
console.log(` ${chalk.bold.green(`${actionNum}.`)} ${action}`);
|
|
2804
|
+
mdOutput += `${actionNum}. ${action}\n`;
|
|
2805
|
+
actionNum++;
|
|
2806
|
+
}
|
|
2807
|
+
|
|
2808
|
+
console.log('');
|
|
2809
|
+
mdOutput += `\n---\n\nFull report: \`reports/${project}-dashboard.html\`\n`;
|
|
2810
|
+
console.log(chalk.gray(` Full report: reports/${project}-dashboard.html\n`));
|
|
2811
|
+
|
|
2812
|
+
// ── Save ──
|
|
2813
|
+
if (opts.save) {
|
|
2814
|
+
const outPath = join(__dirname, `reports/${project}-brief-${Date.now()}.md`);
|
|
2815
|
+
writeFileSync(outPath, mdOutput, 'utf8');
|
|
2816
|
+
console.log(chalk.bold.green(` ✅ Brief saved: ${outPath}\n`));
|
|
2817
|
+
}
|
|
2818
|
+
|
|
2819
|
+
console.log(chalk.dim(` → Next: node cli.js brief ${project} --save`));
|
|
2820
|
+
console.log(chalk.dim(` → Set on cron: weekly Sunday analysis + brief\n`));
|
|
2821
|
+
});
|
|
2822
|
+
|
|
2823
|
+
// ── CONTENT VELOCITY ─────────────────────────────────────────────────────
|
|
2824
|
+
program
|
|
2825
|
+
.command('velocity <project>')
|
|
2826
|
+
.description('Content velocity — how fast each domain publishes (publishing rate + new page detection)')
|
|
2827
|
+
.option('--days <n>', 'Lookback window in days', '30')
|
|
2828
|
+
.action((project, opts) => {
|
|
2829
|
+
if (!requirePro('velocity')) return;
|
|
2830
|
+
const db = getDb();
|
|
2831
|
+
const days = parseInt(opts.days) || 30;
|
|
2832
|
+
const cutoff = Date.now() - (days * 24 * 60 * 60 * 1000);
|
|
2833
|
+
|
|
2834
|
+
printAttackHeader('📈 Content Velocity Tracker', project);
|
|
2835
|
+
|
|
2836
|
+
// ── 1. Pages discovered recently (first_seen_at within window) ──
|
|
2837
|
+
const newPages = db.prepare(`
|
|
2838
|
+
SELECT d.domain, d.role, p.url, p.first_seen_at, p.published_date, p.word_count, p.click_depth
|
|
2839
|
+
FROM pages p
|
|
2840
|
+
JOIN domains d ON d.id = p.domain_id
|
|
2841
|
+
WHERE d.project = ? AND p.first_seen_at > ? AND p.is_indexable = 1
|
|
2842
|
+
ORDER BY p.first_seen_at DESC
|
|
2843
|
+
`).all(project, cutoff).filter(r => isContentPage(r.url));
|
|
2844
|
+
|
|
2845
|
+
// ── 2. Pages with published_date within window ──
|
|
2846
|
+
const cutoffISO = new Date(cutoff).toISOString().slice(0, 10);
|
|
2847
|
+
const publishedRecently = db.prepare(`
|
|
2848
|
+
SELECT d.domain, d.role, p.url, p.published_date, p.word_count
|
|
2849
|
+
FROM pages p
|
|
2850
|
+
JOIN domains d ON d.id = p.domain_id
|
|
2851
|
+
WHERE d.project = ? AND p.published_date IS NOT NULL AND p.published_date > ?
|
|
2852
|
+
AND p.is_indexable = 1
|
|
2853
|
+
ORDER BY p.published_date DESC
|
|
2854
|
+
`).all(project, cutoffISO).filter(r => isContentPage(r.url));
|
|
2855
|
+
|
|
2856
|
+
// ── 3. Total page counts per domain (for context) ──
|
|
2857
|
+
const totals = db.prepare(`
|
|
2858
|
+
SELECT d.domain, d.role, COUNT(*) as total_pages,
|
|
2859
|
+
COUNT(p.published_date) as pages_with_date,
|
|
2860
|
+
MIN(p.first_seen_at) as earliest_seen,
|
|
2861
|
+
MAX(p.first_seen_at) as latest_seen
|
|
2862
|
+
FROM pages p JOIN domains d ON d.id = p.domain_id
|
|
2863
|
+
WHERE d.project = ? AND p.is_indexable = 1
|
|
2864
|
+
GROUP BY d.domain ORDER BY d.role, d.domain
|
|
2865
|
+
`).all(project);
|
|
2866
|
+
|
|
2867
|
+
// ── Velocity summary per domain ──
|
|
2868
|
+
console.log(chalk.bold(' Domain Velocity Summary') + chalk.gray(` (last ${days} days)\n`));
|
|
2869
|
+
console.log(chalk.gray(' Domain Role Total New Rate/wk Published'));
|
|
2870
|
+
console.log(chalk.gray(' ' + '─'.repeat(85)));
|
|
2871
|
+
|
|
2872
|
+
const domainNewMap = {};
|
|
2873
|
+
for (const np of newPages) {
|
|
2874
|
+
if (!domainNewMap[np.domain]) domainNewMap[np.domain] = [];
|
|
2875
|
+
domainNewMap[np.domain].push(np);
|
|
2876
|
+
}
|
|
2877
|
+
|
|
2878
|
+
const domainPubMap = {};
|
|
2879
|
+
for (const pp of publishedRecently) {
|
|
2880
|
+
if (!domainPubMap[pp.domain]) domainPubMap[pp.domain] = [];
|
|
2881
|
+
domainPubMap[pp.domain].push(pp);
|
|
2882
|
+
}
|
|
2883
|
+
|
|
2884
|
+
const velocities = [];
|
|
2885
|
+
|
|
2886
|
+
for (const t of totals) {
|
|
2887
|
+
const newCount = (domainNewMap[t.domain] || []).length;
|
|
2888
|
+
const pubCount = (domainPubMap[t.domain] || []).length;
|
|
2889
|
+
const weeksInWindow = days / 7;
|
|
2890
|
+
const ratePerWeek = weeksInWindow > 0 ? (Math.max(newCount, pubCount) / weeksInWindow).toFixed(1) : '—';
|
|
2891
|
+
|
|
2892
|
+
velocities.push({ domain: t.domain, role: t.role, total: t.total_pages, newCount, pubCount, ratePerWeek: parseFloat(ratePerWeek) || 0 });
|
|
2893
|
+
|
|
2894
|
+
const roleColor = t.role === 'target' ? chalk.green : t.role === 'owned' ? chalk.blue : chalk.yellow;
|
|
2895
|
+
const rateColor = parseFloat(ratePerWeek) > 2 ? chalk.green : parseFloat(ratePerWeek) > 0 ? chalk.yellow : chalk.gray;
|
|
2896
|
+
|
|
2897
|
+
console.log(` ${t.domain.padEnd(30)} ${roleColor(t.role.padEnd(12))} ${String(t.total_pages).padEnd(7)} ${chalk.cyan(String(newCount).padEnd(6))} ${rateColor(String(ratePerWeek + '/wk').padEnd(8))} ${String(pubCount).padEnd(6)}`);
|
|
2898
|
+
}
|
|
2899
|
+
|
|
2900
|
+
// ── Velocity leader ──
|
|
2901
|
+
const competitors = velocities.filter(v => v.role === 'competitor');
|
|
2902
|
+
const target = velocities.find(v => v.role === 'target');
|
|
2903
|
+
const leader = competitors.sort((a, b) => b.ratePerWeek - a.ratePerWeek)[0];
|
|
2904
|
+
|
|
2905
|
+
if (leader && target) {
|
|
2906
|
+
console.log('');
|
|
2907
|
+
if (leader.ratePerWeek > target.ratePerWeek) {
|
|
2908
|
+
console.log(chalk.bold.yellow(` ⚠️ ${leader.domain} is publishing ${leader.ratePerWeek}/wk vs your ${target.ratePerWeek}/wk`));
|
|
2909
|
+
console.log(chalk.gray(` They're out-publishing you. Check what topics they're covering.\n`));
|
|
2910
|
+
} else if (target.ratePerWeek > 0) {
|
|
2911
|
+
console.log(chalk.bold.green(` ✅ You're leading! ${target.ratePerWeek}/wk vs fastest competitor ${leader?.ratePerWeek || 0}/wk\n`));
|
|
2912
|
+
}
|
|
2913
|
+
}
|
|
2914
|
+
|
|
2915
|
+
// ── Recently published pages (with dates) ──
|
|
2916
|
+
if (publishedRecently.length > 0) {
|
|
2917
|
+
console.log(chalk.bold(`\n 📅 Recently Published (with date metadata):\n`));
|
|
2918
|
+
for (const p of publishedRecently.slice(0, 15)) {
|
|
2919
|
+
const roleColor = p.role === 'target' ? chalk.green : chalk.yellow;
|
|
2920
|
+
const dateStr = p.published_date?.slice(0, 10) || '?';
|
|
2921
|
+
console.log(` ${roleColor(p.domain.padEnd(25))} ${chalk.cyan(dateStr)} ${p.url.replace(/https?:\/\/[^/]+/, '').slice(0, 60)}`);
|
|
2922
|
+
}
|
|
2923
|
+
}
|
|
2924
|
+
|
|
2925
|
+
// ── New pages by section ──
|
|
2926
|
+
if (newPages.length > 0) {
|
|
2927
|
+
console.log(chalk.bold(`\n 🆕 New Pages Discovered (first seen in last ${days} days):\n`));
|
|
2928
|
+
|
|
2929
|
+
// Group by domain
|
|
2930
|
+
const byDomain = {};
|
|
2931
|
+
for (const p of newPages) {
|
|
2932
|
+
if (!byDomain[p.domain]) byDomain[p.domain] = [];
|
|
2933
|
+
byDomain[p.domain].push(p);
|
|
2934
|
+
}
|
|
2935
|
+
|
|
2936
|
+
for (const [domain, pages] of Object.entries(byDomain).slice(0, 6)) {
|
|
2937
|
+
const role = pages[0]?.role || '?';
|
|
2938
|
+
const roleColor = role === 'target' ? chalk.green : role === 'owned' ? chalk.blue : chalk.yellow;
|
|
2939
|
+
console.log(` ${roleColor(chalk.bold(domain))} (${pages.length} new pages)`);
|
|
2940
|
+
for (const p of pages.slice(0, 5)) {
|
|
2941
|
+
const path = p.url.replace(/https?:\/\/[^/]+/, '') || '/';
|
|
2942
|
+
const date = p.first_seen_at ? new Date(p.first_seen_at).toISOString().slice(0, 10) : '?';
|
|
2943
|
+
console.log(chalk.gray(` ${date} ${path.slice(0, 70)}`));
|
|
2944
|
+
}
|
|
2945
|
+
if (pages.length > 5) console.log(chalk.gray(` ... and ${pages.length - 5} more`));
|
|
2946
|
+
console.log('');
|
|
2947
|
+
}
|
|
2948
|
+
}
|
|
2949
|
+
|
|
2950
|
+
// ── Actionable insight ──
|
|
2951
|
+
if (newPages.length === 0 && publishedRecently.length === 0) {
|
|
2952
|
+
console.log(chalk.yellow('\n No velocity data yet. Re-crawl after a few days to detect new content.\n'));
|
|
2953
|
+
console.log(chalk.gray(' Velocity tracking improves over time — each crawl builds a timeline.'));
|
|
2954
|
+
console.log(chalk.gray(' Tip: Set up daily cron: 0 14 * * * node cli.js run\n'));
|
|
2955
|
+
} else {
|
|
2956
|
+
console.log(chalk.bold.green(' 💡 Action: Match or exceed the fastest competitor\'s publishing rate.'));
|
|
2957
|
+
console.log(chalk.gray(' Focus on the topics THEY\'re covering that YOU haven\'t.\n'));
|
|
2958
|
+
}
|
|
2959
|
+
});
|
|
2960
|
+
|
|
2961
|
+
// ── JS RENDERING DELTA ───────────────────────────────────────────────────
|
|
2962
|
+
program
|
|
2963
|
+
.command('js-delta <project>')
|
|
2964
|
+
.description('Compare raw HTML vs rendered DOM — find pages with hidden JS-only content')
|
|
2965
|
+
.option('--domain <domain>', 'Check a specific domain only')
|
|
2966
|
+
.option('--max-pages <n>', 'Max pages to check per domain', '10')
|
|
2967
|
+
.option('--threshold <n>', 'Word count difference threshold to flag', '50')
|
|
2968
|
+
.option('--save', 'Save report to reports/')
|
|
2969
|
+
.action(async (project, opts) => {
|
|
2970
|
+
if (!requirePro('js-delta')) return;
|
|
2971
|
+
const config = loadConfig(project);
|
|
2972
|
+
const db = getDb();
|
|
2973
|
+
const maxPerDomain = parseInt(opts.maxPages) || 10;
|
|
2974
|
+
const threshold = parseInt(opts.threshold) || 50;
|
|
2975
|
+
|
|
2976
|
+
printAttackHeader('🔬 JS Rendering Delta', project);
|
|
2977
|
+
console.log(chalk.gray(' Comparing raw HTML (no JS) vs Playwright render (full JS)\n'));
|
|
2978
|
+
|
|
2979
|
+
// Get pages to check — focus on high-value pages (low depth, indexable)
|
|
2980
|
+
const domainFilter = opts.domain ? 'AND d.domain = ?' : '';
|
|
2981
|
+
const params = opts.domain ? [project, opts.domain] : [project];
|
|
2982
|
+
|
|
2983
|
+
const domains = db.prepare(`
|
|
2984
|
+
SELECT DISTINCT d.domain, d.role FROM domains d WHERE d.project = ? ${opts.domain ? 'AND d.domain = ?' : ''}
|
|
2985
|
+
ORDER BY d.role, d.domain
|
|
2986
|
+
`).all(...params);
|
|
2987
|
+
|
|
2988
|
+
if (!domains.length) {
|
|
2989
|
+
console.log(chalk.red('No domains found for project.'));
|
|
2990
|
+
return;
|
|
2991
|
+
}
|
|
2992
|
+
|
|
2993
|
+
// Lightweight fetch (no JS) using node-fetch
|
|
2994
|
+
let nodeFetch;
|
|
2995
|
+
try {
|
|
2996
|
+
nodeFetch = (await import('node-fetch')).default;
|
|
2997
|
+
} catch {
|
|
2998
|
+
console.log(chalk.red('node-fetch not available. Run: npm install node-fetch'));
|
|
2999
|
+
return;
|
|
3000
|
+
}
|
|
3001
|
+
|
|
3002
|
+
// Playwright for full render
|
|
3003
|
+
let chromium;
|
|
3004
|
+
try {
|
|
3005
|
+
chromium = (await import('playwright')).chromium;
|
|
3006
|
+
} catch {
|
|
3007
|
+
console.log(chalk.red('Playwright not available. Run: npx playwright install'));
|
|
3008
|
+
return;
|
|
3009
|
+
}
|
|
3010
|
+
|
|
3011
|
+
const browser = await chromium.launch({ headless: true });
|
|
3012
|
+
const context = await browser.newContext({
|
|
3013
|
+
userAgent: 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
|
3014
|
+
ignoreHTTPSErrors: true,
|
|
3015
|
+
});
|
|
3016
|
+
|
|
3017
|
+
const results = [];
|
|
3018
|
+
let mdOutput = `# JS Rendering Delta — ${project}\nGenerated: ${new Date().toISOString().slice(0, 10)}\n\n`;
|
|
3019
|
+
mdOutput += `Threshold: ${threshold}+ word difference\n\n`;
|
|
3020
|
+
|
|
3021
|
+
try {
|
|
3022
|
+
for (const dom of domains) {
|
|
3023
|
+
const pages = db.prepare(`
|
|
3024
|
+
SELECT p.url, p.word_count, p.click_depth
|
|
3025
|
+
FROM pages p JOIN domains d ON d.id = p.domain_id
|
|
3026
|
+
WHERE d.domain = ? AND d.project = ? AND p.is_indexable = 1
|
|
3027
|
+
AND p.click_depth <= 2 AND p.word_count > 50
|
|
3028
|
+
ORDER BY p.click_depth ASC, p.word_count DESC
|
|
3029
|
+
LIMIT ?
|
|
3030
|
+
`).all(dom.domain, project, maxPerDomain).filter(r => isContentPage(r.url));
|
|
3031
|
+
|
|
3032
|
+
if (!pages.length) continue;
|
|
3033
|
+
|
|
3034
|
+
const roleColor = dom.role === 'target' ? chalk.green : dom.role === 'owned' ? chalk.blue : chalk.yellow;
|
|
3035
|
+
console.log(roleColor(chalk.bold(` ${dom.domain}`) + chalk.gray(` (${pages.length} pages)`)));
|
|
3036
|
+
|
|
3037
|
+
for (const pg of pages) {
|
|
3038
|
+
process.stdout.write(chalk.gray(` ${pg.url.replace(/https?:\/\/[^/]+/, '').slice(0, 55).padEnd(55)} `));
|
|
3039
|
+
|
|
3040
|
+
try {
|
|
3041
|
+
// 1. Raw HTML fetch (no JS)
|
|
3042
|
+
const rawRes = await nodeFetch(pg.url, {
|
|
3043
|
+
timeout: 10000,
|
|
3044
|
+
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1)' },
|
|
3045
|
+
});
|
|
3046
|
+
const rawHtml = await rawRes.text();
|
|
3047
|
+
const rawText = rawHtml.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
3048
|
+
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
3049
|
+
.replace(/<[^>]+>/g, ' ')
|
|
3050
|
+
.replace(/\s+/g, ' ').trim();
|
|
3051
|
+
const rawWords = rawText.split(/\s+/).filter(w => w.length > 1).length;
|
|
3052
|
+
|
|
3053
|
+
// 2. Playwright render (full JS)
|
|
3054
|
+
const page = await context.newPage();
|
|
3055
|
+
try {
|
|
3056
|
+
await page.goto(pg.url, { waitUntil: 'domcontentloaded', timeout: 15000 });
|
|
3057
|
+
await page.waitForTimeout(2000); // let JS execute
|
|
3058
|
+
const renderedWords = await page.$eval('body', el =>
|
|
3059
|
+
el.innerText.split(/\s+/).filter(w => w.length > 1).length
|
|
3060
|
+
).catch(() => 0);
|
|
3061
|
+
|
|
3062
|
+
const delta = renderedWords - rawWords;
|
|
3063
|
+
const pctDelta = rawWords > 0 ? Math.round((delta / rawWords) * 100) : (renderedWords > 0 ? 100 : 0);
|
|
3064
|
+
|
|
3065
|
+
const result = {
|
|
3066
|
+
url: pg.url, domain: dom.domain, role: dom.role,
|
|
3067
|
+
rawWords, renderedWords, delta, pctDelta,
|
|
3068
|
+
hidden: delta > threshold,
|
|
3069
|
+
};
|
|
3070
|
+
results.push(result);
|
|
3071
|
+
|
|
3072
|
+
if (delta > threshold) {
|
|
3073
|
+
process.stdout.write(chalk.red(`⚠️ raw:${rawWords} → rendered:${renderedWords} (+${delta} words, +${pctDelta}%)\n`));
|
|
3074
|
+
} else if (delta < -threshold) {
|
|
3075
|
+
process.stdout.write(chalk.yellow(`📉 raw:${rawWords} → rendered:${renderedWords} (${delta} words)\n`));
|
|
3076
|
+
} else {
|
|
3077
|
+
process.stdout.write(chalk.green(`✓ raw:${rawWords} ≈ rendered:${renderedWords}\n`));
|
|
3078
|
+
}
|
|
3079
|
+
} finally {
|
|
3080
|
+
await page.close().catch(() => {});
|
|
3081
|
+
}
|
|
3082
|
+
} catch (err) {
|
|
3083
|
+
process.stdout.write(chalk.red(`✗ ${err.message.slice(0, 40)}\n`));
|
|
3084
|
+
}
|
|
3085
|
+
|
|
3086
|
+
// Be respectful
|
|
3087
|
+
await new Promise(r => setTimeout(r, 1000 + Math.random() * 1500));
|
|
3088
|
+
}
|
|
3089
|
+
console.log('');
|
|
3090
|
+
}
|
|
3091
|
+
} finally {
|
|
3092
|
+
await browser.close().catch(() => {});
|
|
3093
|
+
}
|
|
3094
|
+
|
|
3095
|
+
// ── Summary ──
|
|
3096
|
+
const hiddenContent = results.filter(r => r.hidden);
|
|
3097
|
+
const totalChecked = results.length;
|
|
3098
|
+
|
|
3099
|
+
console.log(chalk.bold(` Summary: ${totalChecked} pages checked\n`));
|
|
3100
|
+
console.log(` ${chalk.green('✓')} ${results.filter(r => !r.hidden && r.delta >= -threshold).length} pages render correctly (raw ≈ rendered)`);
|
|
3101
|
+
console.log(` ${chalk.red('⚠️')} ${hiddenContent.length} pages with JS-hidden content (${threshold}+ words invisible to raw crawlers)`);
|
|
3102
|
+
console.log('');
|
|
3103
|
+
|
|
3104
|
+
mdOutput += `## Summary\n- ${totalChecked} pages checked\n- ${hiddenContent.length} with JS-hidden content\n\n`;
|
|
3105
|
+
|
|
3106
|
+
if (hiddenContent.length > 0) {
|
|
3107
|
+
console.log(chalk.bold.red(' Pages with hidden JS content:\n'));
|
|
3108
|
+
mdOutput += `## Hidden Content Detected\n\n`;
|
|
3109
|
+
|
|
3110
|
+
for (const h of hiddenContent.sort((a, b) => b.delta - a.delta)) {
|
|
3111
|
+
const path = h.url.replace(/https?:\/\/[^/]+/, '') || '/';
|
|
3112
|
+
console.log(` ${chalk.bold(h.domain)} ${path.slice(0, 50)}`);
|
|
3113
|
+
console.log(chalk.red(` Raw: ${h.rawWords} words → Rendered: ${h.renderedWords} words (+${h.delta} hidden)`));
|
|
3114
|
+
console.log(chalk.gray(` ${h.pctDelta}% of content is invisible to simple crawlers\n`));
|
|
3115
|
+
|
|
3116
|
+
mdOutput += `### ${h.domain}${path}\n- Raw: ${h.rawWords} words\n- Rendered: ${h.renderedWords} words\n- **+${h.delta} hidden words (${h.pctDelta}%)**\n\n`;
|
|
3117
|
+
}
|
|
3118
|
+
|
|
3119
|
+
console.log(chalk.bold.green(' 💡 Actions:'));
|
|
3120
|
+
const targetHidden = hiddenContent.filter(h => h.role === 'target' || h.role === 'owned');
|
|
3121
|
+
const compHidden = hiddenContent.filter(h => h.role === 'competitor');
|
|
3122
|
+
|
|
3123
|
+
if (targetHidden.length > 0) {
|
|
3124
|
+
console.log(chalk.yellow(` ⚠️ YOUR site has ${targetHidden.length} page(s) with hidden content!`));
|
|
3125
|
+
console.log(chalk.yellow(` → Implement SSR or pre-rendering for these pages`));
|
|
3126
|
+
console.log(chalk.yellow(` → Googlebot can render JS, but it's slower and less reliable\n`));
|
|
3127
|
+
}
|
|
3128
|
+
if (compHidden.length > 0) {
|
|
3129
|
+
console.log(chalk.green(` ✅ ${compHidden.length} competitor page(s) have hidden content`));
|
|
3130
|
+
console.log(chalk.green(` → Their content is harder for Google to index — your opportunity\n`));
|
|
3131
|
+
}
|
|
3132
|
+
} else {
|
|
3133
|
+
console.log(chalk.green(' ✅ No significant JS rendering gaps detected.\n'));
|
|
3134
|
+
}
|
|
3135
|
+
|
|
3136
|
+
if (opts.save) {
|
|
3137
|
+
const outPath = join(__dirname, `reports/${project}-js-delta-${Date.now()}.md`);
|
|
3138
|
+
writeFileSync(outPath, mdOutput, 'utf8');
|
|
3139
|
+
console.log(chalk.bold.green(` ✅ Report saved: ${outPath}\n`));
|
|
3140
|
+
}
|
|
3141
|
+
});
|
|
3142
|
+
|
|
3143
|
+
// ── EXPORT (JSON/CSV for paste-into-any-AI) ─────────────────────────────
|
|
3144
|
+
program
|
|
3145
|
+
.command('export <project>')
|
|
3146
|
+
.description('Export crawl data as JSON or CSV — paste into any AI for analysis')
|
|
3147
|
+
.option('--format <type>', 'Output format: json or csv', 'json')
|
|
3148
|
+
.option('--tables <list>', 'Comma-separated tables to include (pages,keywords,headings,links,technical,extractions,analyses,schemas)', 'pages,keywords,links,technical')
|
|
3149
|
+
.option('--output <path>', 'Output file path (default: reports/<project>-export-<timestamp>.<format>)')
|
|
3150
|
+
.option('--full', 'Export all tables including AI analysis data (requires Solo)')
|
|
3151
|
+
.action(async (project, opts) => {
|
|
3152
|
+
const config = loadConfig(project);
|
|
3153
|
+
const db = getDb();
|
|
3154
|
+
const format = opts.format === 'csv' ? 'csv' : 'json';
|
|
3155
|
+
|
|
3156
|
+
// --full requires Solo (includes extractions + analyses)
|
|
3157
|
+
let tables = opts.tables.split(',').map(t => t.trim()).filter(Boolean);
|
|
3158
|
+
if (opts.full) {
|
|
3159
|
+
if (!requirePro('extract')) return;
|
|
3160
|
+
tables = ['pages', 'keywords', 'headings', 'links', 'technical', 'extractions', 'analyses', 'schemas'];
|
|
3161
|
+
}
|
|
3162
|
+
|
|
3163
|
+
// Gate AI tables behind Solo
|
|
3164
|
+
const proTables = ['extractions', 'analyses'];
|
|
3165
|
+
const requestedProTables = tables.filter(t => proTables.includes(t));
|
|
3166
|
+
if (requestedProTables.length > 0 && !isPro()) {
|
|
3167
|
+
console.log('');
|
|
3168
|
+
console.log(chalk.yellow(` Skipping pro-only tables: ${requestedProTables.join(', ')}`));
|
|
3169
|
+
console.log(chalk.dim(` Upgrade to Solo to export AI analysis data.`));
|
|
3170
|
+
tables = tables.filter(t => !proTables.includes(t));
|
|
3171
|
+
}
|
|
3172
|
+
|
|
3173
|
+
if (tables.length === 0) {
|
|
3174
|
+
console.error(chalk.red('\nNo tables to export.\n'));
|
|
3175
|
+
process.exit(1);
|
|
3176
|
+
}
|
|
3177
|
+
|
|
3178
|
+
console.log(chalk.bold.cyan(`\n📦 Export — ${project.toUpperCase()} (${format.toUpperCase()})\n`));
|
|
3179
|
+
console.log(chalk.dim(` Tables: ${tables.join(', ')}\n`));
|
|
3180
|
+
|
|
3181
|
+
const exportData = {};
|
|
3182
|
+
|
|
3183
|
+
// Get domain IDs for this project
|
|
3184
|
+
const domainRows = db.prepare(`SELECT id, domain, role FROM domains WHERE project = ?`).all(project);
|
|
3185
|
+
const domainIds = domainRows.map(d => d.id);
|
|
3186
|
+
const domainPlaceholders = domainIds.map(() => '?').join(',');
|
|
3187
|
+
|
|
3188
|
+
if (domainIds.length === 0) {
|
|
3189
|
+
console.error(chalk.red(' No crawl data found. Run `crawl` first.\n'));
|
|
3190
|
+
process.exit(1);
|
|
3191
|
+
}
|
|
3192
|
+
|
|
3193
|
+
exportData.project = project;
|
|
3194
|
+
exportData.exported_at = new Date().toISOString();
|
|
3195
|
+
exportData.domains = domainRows;
|
|
3196
|
+
|
|
3197
|
+
for (const table of tables) {
|
|
3198
|
+
try {
|
|
3199
|
+
switch (table) {
|
|
3200
|
+
case 'pages':
|
|
3201
|
+
exportData.pages = db.prepare(`
|
|
3202
|
+
SELECT p.url, d.domain, d.role, p.status_code, p.word_count, p.load_ms,
|
|
3203
|
+
p.is_indexable, p.click_depth, p.published_date, p.modified_date
|
|
3204
|
+
FROM pages p JOIN domains d ON d.id = p.domain_id
|
|
3205
|
+
WHERE d.project = ?
|
|
3206
|
+
ORDER BY d.role, d.domain, p.click_depth
|
|
3207
|
+
`).all(project);
|
|
3208
|
+
break;
|
|
3209
|
+
|
|
3210
|
+
case 'keywords':
|
|
3211
|
+
exportData.keywords = db.prepare(`
|
|
3212
|
+
SELECT k.keyword, k.location, p.url, d.domain, d.role
|
|
3213
|
+
FROM keywords k JOIN pages p ON p.id = k.page_id JOIN domains d ON d.id = p.domain_id
|
|
3214
|
+
WHERE d.project = ?
|
|
3215
|
+
ORDER BY k.keyword
|
|
3216
|
+
`).all(project);
|
|
3217
|
+
break;
|
|
3218
|
+
|
|
3219
|
+
case 'headings':
|
|
3220
|
+
exportData.headings = db.prepare(`
|
|
3221
|
+
SELECT h.level, h.text, p.url, d.domain
|
|
3222
|
+
FROM headings h JOIN pages p ON p.id = h.page_id JOIN domains d ON d.id = p.domain_id
|
|
3223
|
+
WHERE d.project = ?
|
|
3224
|
+
ORDER BY p.url, h.level
|
|
3225
|
+
`).all(project);
|
|
3226
|
+
break;
|
|
3227
|
+
|
|
3228
|
+
case 'links':
|
|
3229
|
+
exportData.links = db.prepare(`
|
|
3230
|
+
SELECT l.target_url, l.anchor_text, l.is_internal, p.url as source_url, d.domain
|
|
3231
|
+
FROM links l JOIN pages p ON p.id = l.source_id JOIN domains d ON d.id = p.domain_id
|
|
3232
|
+
WHERE d.project = ?
|
|
3233
|
+
ORDER BY l.is_internal DESC, d.domain
|
|
3234
|
+
`).all(project);
|
|
3235
|
+
break;
|
|
3236
|
+
|
|
3237
|
+
case 'technical':
|
|
3238
|
+
exportData.technical = db.prepare(`
|
|
3239
|
+
SELECT t.has_canonical, t.has_og_tags, t.has_schema, t.is_mobile_ok,
|
|
3240
|
+
t.has_sitemap, t.has_robots, t.core_web_vitals, p.url, d.domain
|
|
3241
|
+
FROM technical t JOIN pages p ON p.id = t.page_id JOIN domains d ON d.id = p.domain_id
|
|
3242
|
+
WHERE d.project = ?
|
|
3243
|
+
`).all(project);
|
|
3244
|
+
break;
|
|
3245
|
+
|
|
3246
|
+
case 'extractions':
|
|
3247
|
+
exportData.extractions = db.prepare(`
|
|
3248
|
+
SELECT e.title, e.meta_desc, e.h1, e.product_type, e.pricing_tier,
|
|
3249
|
+
e.cta_primary, e.tech_stack, e.schema_types, e.search_intent,
|
|
3250
|
+
e.primary_entities, p.url, d.domain
|
|
3251
|
+
FROM extractions e JOIN pages p ON p.id = e.page_id JOIN domains d ON d.id = p.domain_id
|
|
3252
|
+
WHERE d.project = ?
|
|
3253
|
+
`).all(project);
|
|
3254
|
+
break;
|
|
3255
|
+
|
|
3256
|
+
case 'analyses':
|
|
3257
|
+
exportData.analyses = db.prepare(`
|
|
3258
|
+
SELECT generated_at, model, keyword_gaps, long_tails, quick_wins,
|
|
3259
|
+
new_pages, content_gaps, positioning
|
|
3260
|
+
FROM analyses WHERE project = ?
|
|
3261
|
+
ORDER BY generated_at DESC LIMIT 1
|
|
3262
|
+
`).all(project);
|
|
3263
|
+
break;
|
|
3264
|
+
|
|
3265
|
+
case 'schemas':
|
|
3266
|
+
exportData.schemas = db.prepare(`
|
|
3267
|
+
SELECT ps.schema_type, ps.name, ps.description, ps.rating, ps.rating_count,
|
|
3268
|
+
ps.price, ps.currency, ps.author, ps.date_published, p.url, d.domain
|
|
3269
|
+
FROM page_schemas ps JOIN pages p ON p.id = ps.page_id JOIN domains d ON d.id = p.domain_id
|
|
3270
|
+
WHERE d.project = ?
|
|
3271
|
+
ORDER BY ps.schema_type
|
|
3272
|
+
`).all(project);
|
|
3273
|
+
break;
|
|
3274
|
+
}
|
|
3275
|
+
const count = exportData[table]?.length || 0;
|
|
3276
|
+
console.log(chalk.dim(` ${table}: ${count} rows`));
|
|
3277
|
+
} catch (err) {
|
|
3278
|
+
console.log(chalk.yellow(` ${table}: skipped (${err.message})`));
|
|
3279
|
+
}
|
|
3280
|
+
}
|
|
3281
|
+
|
|
3282
|
+
// Output
|
|
3283
|
+
const timestamp = Date.now();
|
|
3284
|
+
const defaultPath = join(__dirname, `reports/${project}-export-${timestamp}.${format}`);
|
|
3285
|
+
const outPath = opts.output || defaultPath;
|
|
3286
|
+
|
|
3287
|
+
if (format === 'csv') {
|
|
3288
|
+
// Flatten to CSV — export the largest table, or pages by default
|
|
3289
|
+
const primaryTable = tables.includes('pages') ? 'pages' : tables[0];
|
|
3290
|
+
const rows = exportData[primaryTable] || [];
|
|
3291
|
+
if (rows.length === 0) {
|
|
3292
|
+
console.log(chalk.yellow('\n No data to export.\n'));
|
|
3293
|
+
return;
|
|
3294
|
+
}
|
|
3295
|
+
const headers = Object.keys(rows[0]);
|
|
3296
|
+
const csvLines = [headers.join(',')];
|
|
3297
|
+
for (const row of rows) {
|
|
3298
|
+
csvLines.push(headers.map(h => {
|
|
3299
|
+
const val = row[h];
|
|
3300
|
+
if (val == null) return '';
|
|
3301
|
+
const str = String(val);
|
|
3302
|
+
return str.includes(',') || str.includes('"') || str.includes('\n')
|
|
3303
|
+
? `"${str.replace(/"/g, '""')}"` : str;
|
|
3304
|
+
}).join(','));
|
|
3305
|
+
}
|
|
3306
|
+
writeFileSync(outPath, csvLines.join('\n'), 'utf8');
|
|
3307
|
+
console.log(chalk.dim(`\n CSV exports the "${primaryTable}" table. Use --format json for all tables.\n`));
|
|
3308
|
+
} else {
|
|
3309
|
+
writeFileSync(outPath, JSON.stringify(exportData, null, 2), 'utf8');
|
|
3310
|
+
}
|
|
3311
|
+
|
|
3312
|
+
console.log(chalk.bold.green(`\n ✅ Exported to: ${outPath}\n`));
|
|
3313
|
+
console.log(chalk.dim(` Paste this file into Claude, ChatGPT, or any AI for instant analysis.\n`));
|
|
3314
|
+
});
|
|
3315
|
+
|
|
3316
|
+
// ── ACTION EXPORTS (Prioritized recommendations) ─────────────────────────
|
|
3317
|
+
function renderActionOutput(payload, format) {
|
|
3318
|
+
return format === 'json' ? formatActionsJson(payload) : formatActionsBrief(payload);
|
|
3319
|
+
}
|
|
3320
|
+
|
|
3321
|
+
function writeOrPrintActionOutput(output, outPath) {
|
|
3322
|
+
if (outPath) {
|
|
3323
|
+
writeFileSync(outPath, output, 'utf8');
|
|
3324
|
+
console.log(chalk.bold.green(`\n ✅ Exported to: ${outPath}\n`));
|
|
3325
|
+
} else {
|
|
3326
|
+
console.log('');
|
|
3327
|
+
console.log(output);
|
|
3328
|
+
console.log('');
|
|
3329
|
+
}
|
|
3330
|
+
}
|
|
3331
|
+
|
|
3332
|
+
program
|
|
3333
|
+
.command('export-actions <project>')
|
|
3334
|
+
.description('Export prioritized SEO actions across technical, competitive, and suggestive scopes')
|
|
3335
|
+
.option('--scope <type>', 'technical, competitive, suggestive, or all', 'all')
|
|
3336
|
+
.option('--format <type>', 'Output format: json or brief', 'brief')
|
|
3337
|
+
.option('--output <path>', 'Write output to a file instead of stdout')
|
|
3338
|
+
.option('--vs <domain>', 'Filter competitor comparisons to one domain')
|
|
3339
|
+
.action(async (project, opts) => {
|
|
3340
|
+
loadConfig(project);
|
|
3341
|
+
const db = getDb();
|
|
3342
|
+
const scope = ['technical', 'competitive', 'suggestive', 'all'].includes(opts.scope) ? opts.scope : 'all';
|
|
3343
|
+
const format = opts.format === 'json' ? 'json' : 'brief';
|
|
3344
|
+
|
|
3345
|
+
try {
|
|
3346
|
+
assertHasCrawlData(db, project);
|
|
3347
|
+
} catch (err) {
|
|
3348
|
+
console.error(chalk.red(`\n ${err.message}\n`));
|
|
3349
|
+
process.exit(1);
|
|
3350
|
+
}
|
|
3351
|
+
|
|
3352
|
+
if (!isPro() && (scope === 'competitive' || scope === 'suggestive')) {
|
|
3353
|
+
if (!requirePro('competitive')) return;
|
|
3354
|
+
}
|
|
3355
|
+
|
|
3356
|
+
if (scope === 'all' && !isPro()) {
|
|
3357
|
+
console.log('');
|
|
3358
|
+
console.log(chalk.yellow(' Competitive and suggestive actions require SEO Intel Solo.'));
|
|
3359
|
+
console.log(chalk.dim(' Showing technical actions only.'));
|
|
3360
|
+
}
|
|
3361
|
+
|
|
3362
|
+
console.log(chalk.bold.cyan(`\n🎯 Action Export — ${project.toUpperCase()}\n`));
|
|
3363
|
+
console.log(chalk.dim(` Scope: ${scope}`));
|
|
3364
|
+
if (opts.vs) console.log(chalk.dim(` Competitor filter: ${opts.vs}`));
|
|
3365
|
+
console.log(chalk.dim(` Format: ${format}\n`));
|
|
3366
|
+
|
|
3367
|
+
let actions = [];
|
|
3368
|
+
|
|
3369
|
+
if (scope === 'technical' || scope === 'all') {
|
|
3370
|
+
const technicalActions = buildTechnicalActions(db, project);
|
|
3371
|
+
actions.push(...technicalActions);
|
|
3372
|
+
console.log(chalk.dim(` technical: ${technicalActions.length} actions`));
|
|
3373
|
+
}
|
|
3374
|
+
|
|
3375
|
+
if (isPro() && (scope === 'competitive' || scope === 'all')) {
|
|
3376
|
+
const latestAnalysis = getLatestAnalysis(db, project);
|
|
3377
|
+
if (!latestAnalysis) {
|
|
3378
|
+
console.log(chalk.yellow(' competitive: skipped (run `analyze` first for richer gap data)'));
|
|
3379
|
+
} else {
|
|
3380
|
+
const competitiveActions = buildCompetitiveActions(db, project, { vsDomain: opts.vs });
|
|
3381
|
+
actions.push(...competitiveActions);
|
|
3382
|
+
console.log(chalk.dim(` competitive: ${competitiveActions.length} actions`));
|
|
3383
|
+
}
|
|
3384
|
+
}
|
|
3385
|
+
|
|
3386
|
+
if (isPro() && (scope === 'suggestive' || scope === 'all')) {
|
|
3387
|
+
const suggestiveActions = buildSuggestiveActions(db, project, { vsDomain: opts.vs, scope: 'all' });
|
|
3388
|
+
actions.push(...suggestiveActions);
|
|
3389
|
+
console.log(chalk.dim(` suggestive: ${suggestiveActions.length} actions`));
|
|
3390
|
+
}
|
|
3391
|
+
|
|
3392
|
+
const payload = buildExportPayload({ project, scope, actions });
|
|
3393
|
+
const output = renderActionOutput(payload, format);
|
|
3394
|
+
writeOrPrintActionOutput(output, opts.output);
|
|
3395
|
+
});
|
|
3396
|
+
|
|
3397
|
+
program
|
|
3398
|
+
.command('competitive-actions <project>')
|
|
3399
|
+
.description('Shortcut for export-actions --scope competitive')
|
|
3400
|
+
.option('--format <type>', 'Output format: json or brief', 'brief')
|
|
3401
|
+
.option('--output <path>', 'Write output to a file instead of stdout')
|
|
3402
|
+
.option('--vs <domain>', 'Filter to one competitor domain')
|
|
3403
|
+
.action(async (project, opts) => {
|
|
3404
|
+
loadConfig(project);
|
|
3405
|
+
if (!requirePro('competitive')) return;
|
|
3406
|
+
|
|
3407
|
+
const db = getDb();
|
|
3408
|
+
try {
|
|
3409
|
+
assertHasCrawlData(db, project);
|
|
3410
|
+
} catch (err) {
|
|
3411
|
+
console.error(chalk.red(`\n ${err.message}\n`));
|
|
3412
|
+
process.exit(1);
|
|
3413
|
+
}
|
|
3414
|
+
|
|
3415
|
+
console.log(chalk.bold.cyan(`\n⚔️ Competitive Actions — ${project.toUpperCase()}\n`));
|
|
3416
|
+
if (opts.vs) console.log(chalk.dim(` Competitor filter: ${opts.vs}`));
|
|
3417
|
+
|
|
3418
|
+
const latestAnalysis = getLatestAnalysis(db, project);
|
|
3419
|
+
if (!latestAnalysis) {
|
|
3420
|
+
console.error(chalk.red('\n No analysis data found. Run `analyze` first.\n'));
|
|
3421
|
+
process.exit(1);
|
|
3422
|
+
}
|
|
3423
|
+
|
|
3424
|
+
const actions = buildCompetitiveActions(db, project, { vsDomain: opts.vs });
|
|
3425
|
+
const payload = buildExportPayload({ project, scope: 'competitive', actions });
|
|
3426
|
+
const output = renderActionOutput(payload, opts.format === 'json' ? 'json' : 'brief');
|
|
3427
|
+
writeOrPrintActionOutput(output, opts.output);
|
|
3428
|
+
});
|
|
3429
|
+
|
|
3430
|
+
program
|
|
3431
|
+
.command('suggest-usecases <project>')
|
|
3432
|
+
.description('Suggest missing page/use-case opportunities from competitor patterns')
|
|
3433
|
+
.option('--scope <type>', 'docs, product-pages, dashboards, onboarding, or all', 'all')
|
|
3434
|
+
.option('--format <type>', 'Output format: json or brief', 'brief')
|
|
3435
|
+
.option('--vs <domain>', 'Filter to one competitor domain')
|
|
3436
|
+
.option('--output <path>', 'Write output to a file instead of stdout')
|
|
3437
|
+
.action(async (project, opts) => {
|
|
3438
|
+
loadConfig(project);
|
|
3439
|
+
if (!requirePro('competitive')) return;
|
|
3440
|
+
|
|
3441
|
+
const db = getDb();
|
|
3442
|
+
const scope = ['docs', 'product-pages', 'dashboards', 'onboarding', 'all'].includes(opts.scope) ? opts.scope : 'all';
|
|
3443
|
+
try {
|
|
3444
|
+
assertHasCrawlData(db, project);
|
|
3445
|
+
} catch (err) {
|
|
3446
|
+
console.error(chalk.red(`\n ${err.message}\n`));
|
|
3447
|
+
process.exit(1);
|
|
3448
|
+
}
|
|
3449
|
+
|
|
3450
|
+
console.log(chalk.bold.cyan(`\n💡 Suggested Use Cases — ${project.toUpperCase()}\n`));
|
|
3451
|
+
console.log(chalk.dim(` Scope: ${scope}`));
|
|
3452
|
+
if (opts.vs) console.log(chalk.dim(` Competitor filter: ${opts.vs}`));
|
|
3453
|
+
console.log('');
|
|
3454
|
+
|
|
3455
|
+
const actions = buildSuggestiveActions(db, project, { vsDomain: opts.vs, scope });
|
|
3456
|
+
const payload = buildExportPayload({ project, scope, actions });
|
|
3457
|
+
const output = renderActionOutput(payload, opts.format === 'json' ? 'json' : 'brief');
|
|
3458
|
+
writeOrPrintActionOutput(output, opts.output);
|
|
3459
|
+
});
|
|
3460
|
+
|
|
3461
|
+
// ── GUIDE (Coach-style chapter map) ──────────────────────────────────────
|
|
3462
|
+
program
|
|
3463
|
+
.command('guide')
|
|
3464
|
+
.description('Print the 7 Chapters — always know where you are and what comes next')
|
|
3465
|
+
.argument('[project]', 'Show progress for a specific project')
|
|
3466
|
+
.action((project) => {
|
|
3467
|
+
const db = getDb();
|
|
3468
|
+
const hr = chalk.dim('─'.repeat(62));
|
|
3469
|
+
const gold = s => chalk.hex('#d4a853')(s);
|
|
3470
|
+
const dim = chalk.gray;
|
|
3471
|
+
|
|
3472
|
+
console.log('');
|
|
3473
|
+
console.log(gold(chalk.bold(' 🔶 SEO Intel — The 7 Chapters')));
|
|
3474
|
+
console.log(dim(' Your competitive intelligence journey, step by step.'));
|
|
3475
|
+
console.log('');
|
|
3476
|
+
|
|
3477
|
+
// ── Detect state ──
|
|
3478
|
+
const configs = loadAllConfigs();
|
|
3479
|
+
const hasOllama = (() => { try { spawnSync('which', ['ollama'], { stdio: 'ignore' }); return spawnSync('which', ['ollama'], { stdio: 'pipe' }).status === 0; } catch { return false; } })();
|
|
3480
|
+
const env = (() => { try { return readFileSync(join(__dirname, '.env'), 'utf8'); } catch { return ''; } })();
|
|
3481
|
+
const hasGemini = env.includes('GEMINI_API_KEY');
|
|
3482
|
+
const hasOpenAI = env.includes('OPENAI_API_KEY');
|
|
3483
|
+
const hasAnalysisKey = hasGemini || hasOpenAI;
|
|
3484
|
+
|
|
3485
|
+
// Project-specific state
|
|
3486
|
+
let projConfig = null;
|
|
3487
|
+
let pageCount = 0, extractedCount = 0, analysisCount = 0, reportExists = false;
|
|
3488
|
+
if (project) {
|
|
3489
|
+
projConfig = configs.find(c => c.project === project);
|
|
3490
|
+
if (!projConfig) {
|
|
3491
|
+
console.log(chalk.red(` Project "${project}" not found in config/.\n`));
|
|
3492
|
+
console.log(dim(` Available: ${configs.map(c => c.project).join(', ') || 'none'}`));
|
|
3493
|
+
console.log(dim(` Create one: node cli.js setup --project ${project}\n`));
|
|
3494
|
+
return;
|
|
3495
|
+
}
|
|
3496
|
+
try {
|
|
3497
|
+
pageCount = db.prepare(`
|
|
3498
|
+
SELECT COUNT(*) as c FROM pages p JOIN domains d ON d.id = p.domain_id WHERE d.project = ?
|
|
3499
|
+
`).get(project)?.c || 0;
|
|
3500
|
+
extractedCount = db.prepare(`
|
|
3501
|
+
SELECT COUNT(*) as c FROM extractions e JOIN pages p ON p.id = e.page_id JOIN domains d ON d.id = p.domain_id WHERE d.project = ?
|
|
3502
|
+
`).get(project)?.c || 0;
|
|
3503
|
+
analysisCount = db.prepare(`
|
|
3504
|
+
SELECT COUNT(*) as c FROM analyses WHERE project = ?
|
|
3505
|
+
`).get(project)?.c || 0;
|
|
3506
|
+
} catch { /* tables may not exist yet */ }
|
|
3507
|
+
const dashFile = join(__dirname, `reports/${project}-dashboard.html`);
|
|
3508
|
+
reportExists = existsSync(dashFile);
|
|
3509
|
+
}
|
|
3510
|
+
|
|
3511
|
+
// ── Determine current chapter ──
|
|
3512
|
+
let currentChapter = 1;
|
|
3513
|
+
if (hasOllama || hasAnalysisKey) currentChapter = 2;
|
|
3514
|
+
if (project && projConfig) currentChapter = 3;
|
|
3515
|
+
if (project && pageCount > 0) currentChapter = 4;
|
|
3516
|
+
if (project && analysisCount > 0) currentChapter = 5;
|
|
3517
|
+
if (project && reportExists) currentChapter = 6;
|
|
3518
|
+
// Chapter 7 is always "act"
|
|
3519
|
+
|
|
3520
|
+
const chapters = [
|
|
3521
|
+
{
|
|
3522
|
+
num: 1,
|
|
3523
|
+
title: 'Setup',
|
|
3524
|
+
desc: 'Check dependencies, configure API keys',
|
|
3525
|
+
status: (hasOllama ? chalk.green('✓ Ollama') : chalk.red('✗ Ollama')) +
|
|
3526
|
+
dim(' · ') +
|
|
3527
|
+
(hasAnalysisKey ? chalk.green('✓ API key') : chalk.yellow('○ no API key')),
|
|
3528
|
+
cmd: 'node cli.js setup --project <name>',
|
|
3529
|
+
detail: [
|
|
3530
|
+
'Checks Ollama (local extraction) + Playwright (crawling)',
|
|
3531
|
+
'Optionally saves Gemini/OpenAI API key to .env',
|
|
3532
|
+
hasOllama && hasAnalysisKey ? chalk.green(' → You\'re fully set up!') :
|
|
3533
|
+
hasOllama ? chalk.yellow(' → Add an API key for analysis: edit .env') :
|
|
3534
|
+
chalk.yellow(' → Install Ollama: https://ollama.com then: ollama pull qwen3:4b'),
|
|
3535
|
+
].filter(Boolean),
|
|
3536
|
+
},
|
|
3537
|
+
{
|
|
3538
|
+
num: 2,
|
|
3539
|
+
title: 'Add Your Site + Competitors',
|
|
3540
|
+
desc: 'Create a project config with target domain and competitors',
|
|
3541
|
+
status: configs.length > 0
|
|
3542
|
+
? chalk.green(`✓ ${configs.length} project(s): ${configs.map(c => c.project).join(', ')}`)
|
|
3543
|
+
: chalk.yellow('○ no projects yet'),
|
|
3544
|
+
cmd: 'node cli.js setup --project <name>',
|
|
3545
|
+
detail: project && projConfig ? [
|
|
3546
|
+
` Target: ${chalk.bold(projConfig.target.domain)}`,
|
|
3547
|
+
` Competitors: ${projConfig.competitors.map(c => c.domain).join(', ')}`,
|
|
3548
|
+
projConfig.owned?.length ? ` Owned: ${projConfig.owned.map(o => o.domain).join(', ')}` : null,
|
|
3549
|
+
].filter(Boolean) : [
|
|
3550
|
+
' Enter your domain, competitors, crawl settings',
|
|
3551
|
+
' Generates config/<project>.json',
|
|
3552
|
+
],
|
|
3553
|
+
},
|
|
3554
|
+
{
|
|
3555
|
+
num: 3,
|
|
3556
|
+
title: 'Initial Full Crawl',
|
|
3557
|
+
desc: 'Spider your site + competitors, extract SEO signals',
|
|
3558
|
+
status: project && pageCount > 0
|
|
3559
|
+
? chalk.green(`✓ ${pageCount} pages crawled` + (extractedCount > 0 ? `, ${extractedCount} extracted` : ''))
|
|
3560
|
+
: chalk.yellow('○ not crawled yet'),
|
|
3561
|
+
cmd: project ? `node cli.js crawl ${project}` : 'node cli.js crawl <project>',
|
|
3562
|
+
detail: [
|
|
3563
|
+
'BFS from homepage + sitemap discovery',
|
|
3564
|
+
'Each page: status, word count, load time, headings, links',
|
|
3565
|
+
'Qwen3 extracts: intent, entities, CTAs, keywords, tech stack',
|
|
3566
|
+
project && pageCount > 0 ? chalk.green(` → ${project}: ${pageCount} pages in DB`) : null,
|
|
3567
|
+
].filter(Boolean),
|
|
3568
|
+
},
|
|
3569
|
+
{
|
|
3570
|
+
num: 4,
|
|
3571
|
+
title: 'Analysis',
|
|
3572
|
+
desc: 'AI reads everything, finds your gaps and opportunities',
|
|
3573
|
+
status: project && analysisCount > 0
|
|
3574
|
+
? chalk.green(`✓ ${analysisCount} analysis run(s)`)
|
|
3575
|
+
: chalk.yellow('○ not analyzed yet'),
|
|
3576
|
+
cmd: project ? `node cli.js analyze ${project}` : 'node cli.js analyze <project>',
|
|
3577
|
+
detail: [
|
|
3578
|
+
'Sends crawl data to Gemini/GPT for competitive synthesis',
|
|
3579
|
+
'Keyword gaps, quick wins, new pages to create, positioning',
|
|
3580
|
+
project && analysisCount > 0 ? chalk.green(` → Latest analysis ready`) : null,
|
|
3581
|
+
].filter(Boolean),
|
|
3582
|
+
},
|
|
3583
|
+
{
|
|
3584
|
+
num: 5,
|
|
3585
|
+
title: 'Report',
|
|
3586
|
+
desc: 'Interactive HTML dashboard with charts and visualizations',
|
|
3587
|
+
status: project && reportExists
|
|
3588
|
+
? chalk.green('✓ Dashboard generated')
|
|
3589
|
+
: chalk.yellow('○ no dashboard yet'),
|
|
3590
|
+
cmd: project ? `node cli.js html ${project}` : 'node cli.js html <project>',
|
|
3591
|
+
detail: [
|
|
3592
|
+
'Competitor matrix, gap heatmaps, score cards',
|
|
3593
|
+
'Topic cluster network, keyword territories, link DNA',
|
|
3594
|
+
'Open in browser — works offline, shareable',
|
|
3595
|
+
],
|
|
3596
|
+
},
|
|
3597
|
+
{
|
|
3598
|
+
num: 6,
|
|
3599
|
+
title: 'Ongoing Monitoring',
|
|
3600
|
+
desc: 'Keep data fresh automatically',
|
|
3601
|
+
status: dim('runs via cron or manually'),
|
|
3602
|
+
cmd: 'node cli.js run',
|
|
3603
|
+
detail: [
|
|
3604
|
+
'Incremental: only re-crawls changed pages (content hash)',
|
|
3605
|
+
'Set on cron: 0 */6 * * * node cli.js run',
|
|
3606
|
+
'Or use the dashboard: node cli.js serve → click Crawl',
|
|
3607
|
+
],
|
|
3608
|
+
},
|
|
3609
|
+
{
|
|
3610
|
+
num: 7,
|
|
3611
|
+
title: 'Act + Iterate',
|
|
3612
|
+
desc: 'Use attack commands to find specific opportunities',
|
|
3613
|
+
status: dim('always available'),
|
|
3614
|
+
cmd: null,
|
|
3615
|
+
detail: [
|
|
3616
|
+
`${chalk.yellow('brief')} — weekly intel brief (gaps, moves, actions)`,
|
|
3617
|
+
`${chalk.yellow('velocity')} — content publishing rate per domain`,
|
|
3618
|
+
`${chalk.yellow('entities')} — entity coverage map (semantic gaps)`,
|
|
3619
|
+
`${chalk.yellow('shallow')} — thin competitor pages to outwrite`,
|
|
3620
|
+
`${chalk.yellow('decay')} — stale competitor content to replace`,
|
|
3621
|
+
`${chalk.yellow('orphans')} — entities with no dedicated page`,
|
|
3622
|
+
`${chalk.yellow('friction')} — CTA/intent mismatches to exploit`,
|
|
3623
|
+
`${chalk.yellow('js-delta')} — JS rendering delta (hidden content)`,
|
|
3624
|
+
`${chalk.yellow('schemas')} — deep structured data competitive intel`,
|
|
3625
|
+
`${chalk.yellow('keywords')} — keyword cluster matrix (trad + AI + agent)`,
|
|
3626
|
+
],
|
|
3627
|
+
},
|
|
3628
|
+
];
|
|
3629
|
+
|
|
3630
|
+
for (const ch of chapters) {
|
|
3631
|
+
const isCurrent = ch.num === currentChapter;
|
|
3632
|
+
const isDone = ch.num < currentChapter;
|
|
3633
|
+
const marker = isDone ? chalk.green('✓') : isCurrent ? chalk.hex('#d4a853')('▶') : dim('○');
|
|
3634
|
+
const titleColor = isCurrent ? chalk.bold.hex('#d4a853') : isDone ? chalk.green : dim;
|
|
3635
|
+
const pointer = isCurrent ? chalk.hex('#d4a853')(' ← you are here') : '';
|
|
3636
|
+
|
|
3637
|
+
console.log(hr);
|
|
3638
|
+
console.log(` ${marker} ${titleColor(`Chapter ${ch.num} — ${ch.title}`)}${pointer}`);
|
|
3639
|
+
console.log(dim(` ${ch.desc}`));
|
|
3640
|
+
console.log(` ${ch.status}`);
|
|
3641
|
+
if (ch.cmd) console.log(` ${dim('Run:')} ${chalk.cyan(ch.cmd)}`);
|
|
3642
|
+
if (ch.detail?.length) {
|
|
3643
|
+
for (const line of ch.detail) {
|
|
3644
|
+
console.log(dim(` ${line}`));
|
|
3645
|
+
}
|
|
3646
|
+
}
|
|
3647
|
+
}
|
|
3648
|
+
|
|
3649
|
+
console.log(hr);
|
|
3650
|
+
|
|
3651
|
+
// ── Next step suggestion ──
|
|
3652
|
+
console.log('');
|
|
3653
|
+
if (currentChapter <= 2 && !project) {
|
|
3654
|
+
console.log(chalk.cyan(' → Next: Create your first project'));
|
|
3655
|
+
console.log(dim(' node cli.js setup --project mysite\n'));
|
|
3656
|
+
} else if (currentChapter === 3 && project) {
|
|
3657
|
+
console.log(chalk.cyan(` → Next: Run your first crawl`));
|
|
3658
|
+
console.log(dim(` node cli.js crawl ${project}\n`));
|
|
3659
|
+
} else if (currentChapter === 4 && project) {
|
|
3660
|
+
console.log(chalk.cyan(` → Next: Run analysis to find gaps`));
|
|
3661
|
+
console.log(dim(` node cli.js analyze ${project}\n`));
|
|
3662
|
+
} else if (currentChapter === 5 && project) {
|
|
3663
|
+
console.log(chalk.cyan(` → Next: Generate your dashboard`));
|
|
3664
|
+
console.log(dim(` node cli.js html ${project}\n`));
|
|
3665
|
+
} else if (currentChapter >= 6 && project) {
|
|
3666
|
+
console.log(chalk.cyan(` → Next: Find quick wins with attack commands`));
|
|
3667
|
+
console.log(dim(` node cli.js shallow ${project}`));
|
|
3668
|
+
console.log(dim(` node cli.js decay ${project}`));
|
|
3669
|
+
console.log(dim(` node cli.js friction ${project}\n`));
|
|
3670
|
+
} else {
|
|
3671
|
+
console.log(chalk.cyan(` → Pick a project: node cli.js guide <project>\n`));
|
|
3672
|
+
}
|
|
3673
|
+
|
|
3674
|
+
console.log(dim(' Lost? Run this any time: node cli.js guide'));
|
|
3675
|
+
console.log(dim(' Full reference: node cli.js --help'));
|
|
3676
|
+
console.log('');
|
|
3677
|
+
});
|
|
3678
|
+
|
|
3679
|
+
// ── License activation hook — phone-home if cache is stale/missing ──────────
|
|
3680
|
+
program.hook('preAction', async () => {
|
|
3681
|
+
const license = loadLicense();
|
|
3682
|
+
if (license.needsActivation || license.stale) {
|
|
3683
|
+
await activateLicense().catch(() => {});
|
|
3684
|
+
}
|
|
3685
|
+
});
|
|
3686
|
+
|
|
3687
|
+
// ── BUG-002: No-args getting-started handler ─────────────────────────────────
|
|
3688
|
+
// When run with no command, show a friendly entry point instead of generic help
|
|
3689
|
+
if (process.argv.length <= 2) {
|
|
3690
|
+
const gold = s => chalk.hex('#d4a853')(s);
|
|
3691
|
+
const dim = chalk.gray;
|
|
3692
|
+
const configs = (() => {
|
|
3693
|
+
try { return readdirSync(join(__dirname, 'config')).filter(f => f.endsWith('.json') && f !== 'example.json'); }
|
|
3694
|
+
catch { return []; }
|
|
3695
|
+
})();
|
|
3696
|
+
|
|
3697
|
+
console.log('');
|
|
3698
|
+
console.log(gold(chalk.bold(' 🔶 SEO Intel')));
|
|
3699
|
+
console.log(dim(' Competitive intelligence for your site — powered by local AI.'));
|
|
3700
|
+
console.log('');
|
|
3701
|
+
|
|
3702
|
+
if (configs.length === 0) {
|
|
3703
|
+
console.log(chalk.cyan(' → Get started:'));
|
|
3704
|
+
console.log('');
|
|
3705
|
+
console.log(' ' + chalk.bold('seo-intel setup'));
|
|
3706
|
+
console.log(dim(' ↑ Create your first project (target + competitors)'));
|
|
3707
|
+
} else {
|
|
3708
|
+
const projectNames = configs.map(f => f.replace('.json', ''));
|
|
3709
|
+
console.log(dim(` Projects: ${projectNames.join(', ')}`));
|
|
3710
|
+
console.log('');
|
|
3711
|
+
console.log(chalk.cyan(' → Resume your work:'));
|
|
3712
|
+
console.log('');
|
|
3713
|
+
console.log(' ' + chalk.bold(`seo-intel guide ${projectNames[0]}`));
|
|
3714
|
+
console.log(dim(' ↑ See where you are in the pipeline'));
|
|
3715
|
+
}
|
|
3716
|
+
|
|
3717
|
+
console.log('');
|
|
3718
|
+
console.log(dim(' Full command list: seo-intel --help'));
|
|
3719
|
+
console.log('');
|
|
3720
|
+
process.exit(0);
|
|
3721
|
+
}
|
|
3722
|
+
|
|
3723
|
+
// Global error handler — ensures uncaught errors in async actions exit non-zero (BUG-004)
|
|
3724
|
+
program.parseAsync().catch(err => {
|
|
3725
|
+
console.error(chalk.red(`\n✗ ${err.message}\n`));
|
|
3726
|
+
process.exit(1);
|
|
3727
|
+
});
|