seo-intel 1.5.46 → 1.5.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli.js CHANGED
@@ -20,7 +20,6 @@ import { totalmem } from 'os';
20
20
  import { fileURLToPath } from 'url';
21
21
  import chalk from 'chalk';
22
22
 
23
- import { crawlDomain } from './crawler/index.js';
24
23
  // Paid modules — loaded lazily inside gated commands only.
25
24
  let _extractPage, _buildAnalysisPrompt;
26
25
  async function getExtractPage() {
@@ -57,6 +56,13 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
57
56
  // Start background update check (non-blocking, never slows startup)
58
57
  checkForUpdates();
59
58
 
59
+ // Long-running commands (serve, setup-web) intentionally keep the process alive.
60
+ // Everything else is one-shot: once the action resolves we force-exit so a
61
+ // hung background fetch (update check / license phone-home) can't hold the
62
+ // shell — an aborted fetch's socket lingers until the OS connect timeout, so
63
+ // AbortController alone is not enough to make the process exit promptly.
64
+ let _keepProcessAlive = false;
65
+
60
66
  // Ensure reports/ and config/ directories exist
61
67
  try { mkdirSync(join(__dirname, 'reports'), { recursive: true }); } catch { /* ok */ }
62
68
  try { mkdirSync(join(__dirname, 'config'), { recursive: true }); } catch { /* ok */ }
@@ -575,6 +581,8 @@ program
575
581
  },
576
582
  };
577
583
 
584
+ // Lazy: crawler/index.js pulls Playwright + turndown — keep them off CLI startup
585
+ const { crawlDomain } = await import('./crawler/index.js');
578
586
  for await (const page of crawlDomain(site.url, crawlOpts)) {
579
587
  if (page._blocked) {
580
588
  totalBlocked++;
@@ -1323,6 +1331,7 @@ program
1323
1331
  let pageCount = 0;
1324
1332
  let skipped = 0;
1325
1333
  let blocked = false;
1334
+ const { crawlDomain } = await import('./crawler/index.js');
1326
1335
  for await (const page of crawlDomain(next.url, {
1327
1336
  onSitemapDiscovered: (urls) => {
1328
1337
  try { upsertSitemapUrls(db, domainId, urls.map(u => u.url), `${next.url}/sitemap.xml`); }
@@ -1547,6 +1556,84 @@ program
1547
1556
  }
1548
1557
  });
1549
1558
 
1559
+ // ── MODELS ─────────────────────────────────────────────────────────────────
1560
+ program
1561
+ .command('models')
1562
+ .description('Suggest local extraction models for your hardware (Gemma / Qwen) — local is strongly recommended over cloud')
1563
+ .option('--format <type>', 'Output format: brief or json', 'brief')
1564
+ .action(async (opts) => {
1565
+ const isJson = opts.format === 'json';
1566
+ const { suggestExtractionModels, CLOUD_EXTRACTION_DISCLAIMER, detectVRAM } = await import('./setup/engine.js');
1567
+
1568
+ // Detect hardware (best-effort) + installed Ollama models (quick, short timeout).
1569
+ let vram = { available: false, vramMB: 0, gpuName: null };
1570
+ try { vram = detectVRAM(); } catch { /* keep default */ }
1571
+ let installed = [];
1572
+ try {
1573
+ const controller = new AbortController();
1574
+ const t = setTimeout(() => controller.abort(), 1500);
1575
+ const res = await fetch('http://localhost:11434/api/tags', { signal: controller.signal });
1576
+ clearTimeout(t);
1577
+ if (res.ok) {
1578
+ const data = await res.json();
1579
+ installed = (data.models || []).map(m => m.name);
1580
+ }
1581
+ } catch { /* Ollama not reachable — fine */ }
1582
+
1583
+ const { suggestions, recommendedId } = suggestExtractionModels(vram.vramMB || 0, installed);
1584
+
1585
+ if (isJson) {
1586
+ console.log(JSON.stringify({
1587
+ command: 'models',
1588
+ hardware: { gpu: vram.gpuName || null, vramMB: vram.vramMB || 0 },
1589
+ recommended: recommendedId,
1590
+ suggestions,
1591
+ cloud_disclaimer: CLOUD_EXTRACTION_DISCLAIMER,
1592
+ }, null, 2));
1593
+ return;
1594
+ }
1595
+
1596
+ console.log('');
1597
+ console.log(chalk.bold(' 🧠 Local extraction models'));
1598
+ console.log('');
1599
+ if (vram.available && vram.vramMB) {
1600
+ console.log(chalk.gray(` Detected: ${vram.gpuName || 'GPU'} · ~${(vram.vramMB / 1024).toFixed(1)} GB VRAM`));
1601
+ } else {
1602
+ console.log(chalk.gray(' GPU/VRAM not detected — showing the full range. Pick by your machine.'));
1603
+ }
1604
+ console.log('');
1605
+
1606
+ const qFmt = (q) => q === 'excellent' ? chalk.green(q) : q === 'great' || q === 'better' ? chalk.cyan(q) : chalk.gray(q);
1607
+ for (const s of suggestions) {
1608
+ const star = s.id === recommendedId ? chalk.bold.green(' ◀ recommended') : '';
1609
+ const fit = vram.vramMB && !s.fitsVram ? chalk.red(' (needs more VRAM)') : '';
1610
+ const got = s.installed ? chalk.green(' ✓ installed') : chalk.gray(` ollama pull ${s.id}`);
1611
+ console.log(` ${chalk.bold(s.name.padEnd(16))} ${chalk.gray(s.vram.padEnd(8))} ${s.speed.padEnd(11)} ${qFmt(s.quality)}${star}${fit}`);
1612
+ console.log(` ${' '.repeat(16)} ${got}`);
1613
+ }
1614
+ console.log('');
1615
+
1616
+ // The MUST: extraction-should-be-local disclaimer, every time.
1617
+ console.log(chalk.bold.yellow(' ⚠ Extraction should be done with a LOCAL model'));
1618
+ const wrap = (text, width) => {
1619
+ const out = []; let line = '';
1620
+ for (const word of text.split(/\s+/)) {
1621
+ if ((line + ' ' + word).trim().length > width) { out.push(line.trim()); line = word; }
1622
+ else line += ' ' + word;
1623
+ }
1624
+ if (line.trim()) out.push(line.trim());
1625
+ return out;
1626
+ };
1627
+ for (const line of wrap(CLOUD_EXTRACTION_DISCLAIMER, 78)) {
1628
+ console.log(chalk.yellow(' ' + line));
1629
+ }
1630
+ console.log('');
1631
+ if (recommendedId && !suggestions.find(s => s.id === recommendedId)?.installed) {
1632
+ console.log(chalk.gray(` Get started: `) + chalk.white(`ollama pull ${recommendedId}`) + chalk.gray(` then `) + chalk.white(`seo-intel setup`));
1633
+ console.log('');
1634
+ }
1635
+ });
1636
+
1550
1637
  // ── STATUS ─────────────────────────────────────────────────────────────────
1551
1638
  program
1552
1639
  .command('status')
@@ -2357,6 +2444,7 @@ program
2357
2444
  .option('--open', 'Open browser automatically', true)
2358
2445
  .option('--no-open', 'Do not open browser')
2359
2446
  .action(async (opts) => {
2447
+ _keepProcessAlive = true;
2360
2448
  const port = parseInt(opts.port, 10);
2361
2449
  process.env.PORT = String(port);
2362
2450
  if (opts.open) process.env.SEO_INTEL_AUTO_OPEN = '1';
@@ -2369,6 +2457,7 @@ program
2369
2457
  .description('Open the web-based setup wizard in your browser')
2370
2458
  .option('--port <n>', 'Server port', '3000')
2371
2459
  .action(async (opts) => {
2460
+ _keepProcessAlive = true;
2372
2461
  const port = parseInt(opts.port, 10);
2373
2462
  process.env.PORT = String(port);
2374
2463
  await import('./server.js');
@@ -4326,9 +4415,24 @@ program
4326
4415
  }
4327
4416
 
4328
4417
  const { runAeoAnalysis, persistAeoScores, upsertCitabilityInsights } = await import('./analyses/aeo/index.js');
4418
+ const { fetchAiAccessForDomains } = await import('./analyses/aeo/ai-access.js');
4419
+
4420
+ // AI-crawler access (robots.txt) — domain-level signal. Network, but cheap
4421
+ // (one robots.txt per target/owned domain) and best-effort.
4422
+ const targetDomains = db
4423
+ .prepare("SELECT DISTINCT domain FROM domains WHERE project = ? AND role IN ('target','owned')")
4424
+ .all(project)
4425
+ .map(r => r.domain);
4426
+ let aiAccessByDomain = null;
4427
+ if (targetDomains.length) {
4428
+ if (isBrief) console.log(chalk.gray(` Checking AI-crawler access (robots.txt) for ${targetDomains.length} domain(s)…`));
4429
+ try { aiAccessByDomain = await fetchAiAccessForDomains(targetDomains); }
4430
+ catch { aiAccessByDomain = null; }
4431
+ }
4329
4432
 
4330
4433
  const results = runAeoAnalysis(db, project, {
4331
4434
  includeCompetitors: !opts.targetOnly,
4435
+ aiAccessByDomain,
4332
4436
  log: (msg) => isBrief ? console.log(chalk.gray(msg)) : null,
4333
4437
  });
4334
4438
 
@@ -4340,7 +4444,7 @@ program
4340
4444
 
4341
4445
  // Persist scores
4342
4446
  persistAeoScores(db, results);
4343
- upsertCitabilityInsights(db, project, results.target);
4447
+ upsertCitabilityInsights(db, project, results.target, results.summary.aiAccess);
4344
4448
 
4345
4449
  const { summary } = results;
4346
4450
  const { tierCounts } = summary;
@@ -4439,6 +4543,21 @@ program
4439
4543
  console.log(` ${chalk.red('●')} Poor (<35): ${tierCounts.poor}`);
4440
4544
  console.log('');
4441
4545
 
4546
+ if (summary.aiAccess && summary.aiAccess.length) {
4547
+ console.log(chalk.bold(' 🤖 AI Crawler Access (robots.txt)'));
4548
+ console.log('');
4549
+ for (const a of summary.aiAccess) {
4550
+ const icon = a.verdict === 'blocked' ? chalk.red('✗') : a.verdict === 'partial' ? chalk.yellow('⚠') : chalk.green('✓');
4551
+ const label = a.verdict === 'blocked' ? chalk.red('BLOCKED') : a.verdict === 'partial' ? chalk.yellow('PARTIAL') : chalk.green('OPEN');
4552
+ console.log(` ${icon} ${chalk.bold(a.domain)} ${label} ${chalk.gray(a.score + '/100')}`);
4553
+ if (a.verdict !== 'open') console.log(chalk.gray(` ${a.detail}`));
4554
+ }
4555
+ if (summary.gatedPages > 0) {
4556
+ console.log(chalk.red(` ⛔ ${summary.gatedPages} page(s) capped at 30/100 — AI assistants can't read them, so on-page quality can't help.`));
4557
+ }
4558
+ console.log('');
4559
+ }
4560
+
4442
4561
  if (summary.weakestSignals.length) {
4443
4562
  console.log(chalk.bold(' 🔍 Weakest Signals (target average)'));
4444
4563
  console.log('');
@@ -4488,12 +4607,16 @@ program
4488
4607
  }
4489
4608
 
4490
4609
  // ── Regenerate dashboard ──
4491
- try {
4492
- const configs = loadAllConfigs();
4493
- generateMultiDashboard(db, configs);
4494
- console.log(chalk.green(' ✅ Dashboard updated with AI Citability card\n'));
4495
- } catch (e) {
4496
- console.log(chalk.gray(` (Dashboard not updated: ${e.message})\n`));
4610
+ // Skip in JSON mode: generateMultiDashboard logs progress to stdout, which
4611
+ // would corrupt the JSON object that machine/agent consumers parse.
4612
+ if (isBrief) {
4613
+ try {
4614
+ const configs = loadAllConfigs();
4615
+ generateMultiDashboard(db, configs);
4616
+ console.log(chalk.green(' ✅ Dashboard updated with AI Citability card\n'));
4617
+ } catch (e) {
4618
+ console.log(chalk.gray(` (Dashboard not updated: ${e.message})\n`));
4619
+ }
4497
4620
  }
4498
4621
 
4499
4622
  // ── Save report ──
@@ -4674,11 +4797,14 @@ program
4674
4797
  console.log(chalk.green(`\n ✅ Report saved: ${opts.out}\n`));
4675
4798
  }
4676
4799
 
4677
- // Regenerate dashboard
4678
- try {
4679
- const configs = loadAllConfigs();
4680
- generateMultiDashboard(db, configs);
4681
- } catch {}
4800
+ // Regenerate dashboard — skip in JSON mode so generateMultiDashboard's
4801
+ // stdout progress logs don't corrupt the JSON output.
4802
+ if (opts.format !== 'json') {
4803
+ try {
4804
+ const configs = loadAllConfigs();
4805
+ generateMultiDashboard(db, configs);
4806
+ } catch {}
4807
+ }
4682
4808
  });
4683
4809
 
4684
4810
  // ── AEO BLOG DRAFT GENERATOR ─────────────────────────────────────────────
@@ -5057,10 +5183,20 @@ program
5057
5183
  });
5058
5184
 
5059
5185
  // ── License activation hook — phone-home if cache is stale/missing ──────────
5186
+ // Hard cap so a slow/unreachable license server can never block the command.
5187
+ // If activation doesn't finish in time we proceed on cached/offline behavior;
5188
+ // the in-flight request is abandoned when the process exits (see bottom of file).
5189
+ const LICENSE_ACTIVATION_BUDGET_MS = 2500;
5060
5190
  program.hook('preAction', async () => {
5061
5191
  const license = loadLicense();
5062
5192
  if (license.needsActivation || license.stale) {
5063
- await activateLicense().catch(() => {});
5193
+ await Promise.race([
5194
+ activateLicense().catch(() => {}),
5195
+ new Promise((resolve) => {
5196
+ const t = setTimeout(resolve, LICENSE_ACTIVATION_BUDGET_MS);
5197
+ if (typeof t.unref === 'function') t.unref();
5198
+ }),
5199
+ ]);
5064
5200
  }
5065
5201
  });
5066
5202
 
@@ -5164,6 +5300,7 @@ program
5164
5300
  const tag = chalk.cyan(`[${domain.split('.')[0]}]`);
5165
5301
 
5166
5302
  try {
5303
+ const { crawlDomain } = await import('./crawler/index.js');
5167
5304
  for await (const page of crawlDomain(siteUrl, {
5168
5305
  maxPages, stealth: useStealth, tiered: true,
5169
5306
  onSitemapDiscovered: (urls) => {
@@ -5516,7 +5653,27 @@ program
5516
5653
  });
5517
5654
 
5518
5655
  // Global error handler — ensures uncaught errors in async actions exit non-zero (BUG-004)
5519
- program.parseAsync().catch(err => {
5520
- console.error(chalk.red(`\n✗ ${err.message}\n`));
5521
- process.exit(1);
5522
- });
5656
+ program.parseAsync()
5657
+ .then(() => {
5658
+ // One-shot command finished — exit now instead of waiting on the event loop
5659
+ // to drain. A hung background fetch (update check / license phone-home) would
5660
+ // otherwise keep the process alive until the OS connect timeout (~10s).
5661
+ if (!_keepProcessAlive) flushThenExit(process.exitCode ?? 0);
5662
+ })
5663
+ .catch(err => {
5664
+ console.error(chalk.red(`\n✗ ${err.message}\n`));
5665
+ process.exit(1);
5666
+ });
5667
+
5668
+ // Drain any buffered stdout/stderr before exiting. process.exit() truncates
5669
+ // async pipe writes (large `--format json` output piped to another process),
5670
+ // so we wait for both streams to flush, with a short safety net so a stalled
5671
+ // pipe can never hang the process.
5672
+ function flushThenExit(code) {
5673
+ let pending = 2;
5674
+ const done = () => { if (--pending === 0) process.exit(code); };
5675
+ process.stdout.write('', done);
5676
+ process.stderr.write('', done);
5677
+ const t = setTimeout(() => process.exit(code), 2000);
5678
+ if (typeof t.unref === 'function') t.unref();
5679
+ }
package/lib/license.js CHANGED
@@ -36,6 +36,23 @@ const CACHE_PATH = join(CACHE_DIR, 'license-cache.json');
36
36
  const LS_CACHE_TTL = 24 * 60 * 60 * 1000; // 24h fresh
37
37
  const LS_STALE_LIMIT = 7 * 24 * 60 * 60 * 1000; // 7 days stale max
38
38
 
39
+ // Lemon Squeezy License API base — overridable for testing (e.g. point at an
40
+ // unreachable host to verify graceful degradation).
41
+ const LS_API_BASE = (process.env.SEO_INTEL_LICENSE_API || 'https://api.lemonsqueezy.com').replace(/\/+$/, '');
42
+
43
+ // Per-request network budget. A reachable server answers well under this; a
44
+ // slow/unreachable one aborts here instead of hanging. The CLI also caps the
45
+ // total activation wait in its preAction hook, so this is a backstop.
46
+ const NETWORK_TIMEOUT_MS = 6000;
47
+
48
+ /** AbortController whose timeout won't keep the event loop alive on its own. */
49
+ function abortAfter(ms) {
50
+ const controller = new AbortController();
51
+ const timer = setTimeout(() => controller.abort(), ms);
52
+ if (typeof timer.unref === 'function') timer.unref();
53
+ return { controller, timer };
54
+ }
55
+
39
56
  // ── Tiers ──────────────────────────────────────────────────────────────────
40
57
 
41
58
  export const TIERS = {
@@ -132,23 +149,21 @@ function checkCache(key) {
132
149
  * Returns instance_id on success.
133
150
  */
134
151
  async function activateWithLS(key) {
152
+ const { controller, timer } = abortAfter(NETWORK_TIMEOUT_MS);
135
153
  try {
136
- const controller = new AbortController();
137
- const timeout = setTimeout(() => controller.abort(), 8000);
138
-
139
154
  const body = new URLSearchParams({
140
155
  license_key: key,
141
156
  instance_name: `seo-intel-${getMachineId()}`,
142
157
  });
143
158
 
144
- const res = await fetch('https://api.lemonsqueezy.com/v1/licenses/activate', {
159
+ const res = await fetch(`${LS_API_BASE}/v1/licenses/activate`, {
145
160
  signal: controller.signal,
146
161
  method: 'POST',
147
162
  headers: { 'Accept': 'application/json' },
148
163
  body,
149
164
  });
150
165
 
151
- clearTimeout(timeout);
166
+ clearTimeout(timer);
152
167
  const data = await res.json();
153
168
 
154
169
  if (data.activated) {
@@ -179,22 +194,20 @@ async function activateWithLS(key) {
179
194
  * Params: license_key, instance_id (optional)
180
195
  */
181
196
  async function validateWithLS(key, instanceId) {
197
+ const { controller, timer } = abortAfter(NETWORK_TIMEOUT_MS);
182
198
  try {
183
- const controller = new AbortController();
184
- const timeout = setTimeout(() => controller.abort(), 8000);
185
-
186
199
  const params = { license_key: key };
187
200
  if (instanceId) params.instance_id = instanceId;
188
201
  const body = new URLSearchParams(params);
189
202
 
190
- const res = await fetch('https://api.lemonsqueezy.com/v1/licenses/validate', {
203
+ const res = await fetch(`${LS_API_BASE}/v1/licenses/validate`, {
191
204
  signal: controller.signal,
192
205
  method: 'POST',
193
206
  headers: { 'Accept': 'application/json' },
194
207
  body,
195
208
  });
196
209
 
197
- clearTimeout(timeout);
210
+ clearTimeout(timer);
198
211
  const data = await res.json();
199
212
 
200
213
  if (data.valid) {
@@ -228,23 +241,21 @@ export async function deactivateLicense() {
228
241
  const instanceId = cache?.instanceId;
229
242
  if (!instanceId) return { deactivated: false, error: 'No active instance to deactivate' };
230
243
 
244
+ const { controller, timer } = abortAfter(NETWORK_TIMEOUT_MS);
231
245
  try {
232
- const controller = new AbortController();
233
- const timeout = setTimeout(() => controller.abort(), 8000);
234
-
235
246
  const body = new URLSearchParams({
236
247
  license_key: keyInfo.value,
237
248
  instance_id: instanceId,
238
249
  });
239
250
 
240
- const res = await fetch('https://api.lemonsqueezy.com/v1/licenses/deactivate', {
251
+ const res = await fetch(`${LS_API_BASE}/v1/licenses/deactivate`, {
241
252
  signal: controller.signal,
242
253
  method: 'POST',
243
254
  headers: { 'Accept': 'application/json' },
244
255
  body,
245
256
  });
246
257
 
247
- clearTimeout(timeout);
258
+ clearTimeout(timer);
248
259
  const data = await res.json();
249
260
 
250
261
  if (data.deactivated) {
package/lib/updater.js CHANGED
@@ -45,6 +45,19 @@ const CACHE_DIR = join(homedir(), '.seo-intel');
45
45
  const CACHE_FILE = join(CACHE_DIR, 'update-cache.json');
46
46
  const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
47
47
 
48
+ // Network budget for a single update-source fetch. Update checks are advisory
49
+ // and fire-and-forget, so keep this tight — a slow source must never hold up
50
+ // the CLI. The abort timer is unref'd so it can't keep the event loop alive.
51
+ const FETCH_TIMEOUT_MS = 3000;
52
+
53
+ /** Start an AbortController whose timeout won't keep the process alive. */
54
+ function abortAfter(ms) {
55
+ const controller = new AbortController();
56
+ const timer = setTimeout(() => controller.abort(), ms);
57
+ if (typeof timer.unref === 'function') timer.unref();
58
+ return { controller, timer };
59
+ }
60
+
48
61
  function readCache() {
49
62
  try {
50
63
  if (!existsSync(CACHE_FILE)) return null;
@@ -95,8 +108,7 @@ export function compareSemver(a, b) {
95
108
  * Uses the abbreviated metadata endpoint (fast, no auth needed).
96
109
  */
97
110
  async function checkNpm() {
98
- const controller = new AbortController();
99
- const timeout = setTimeout(() => controller.abort(), 5000);
111
+ const { controller, timer } = abortAfter(FETCH_TIMEOUT_MS);
100
112
 
101
113
  try {
102
114
  const res = await fetch('https://registry.npmjs.org/seo-intel/latest', {
@@ -109,7 +121,7 @@ async function checkNpm() {
109
121
  } catch {
110
122
  return null;
111
123
  } finally {
112
- clearTimeout(timeout);
124
+ clearTimeout(timer);
113
125
  }
114
126
  }
115
127
 
@@ -118,8 +130,7 @@ async function checkNpm() {
118
130
  * Endpoint returns { version, changelog?, downloadUrl? }
119
131
  */
120
132
  async function checkUkkometa() {
121
- const controller = new AbortController();
122
- const timeout = setTimeout(() => controller.abort(), 5000);
133
+ const { controller, timer } = abortAfter(FETCH_TIMEOUT_MS);
123
134
 
124
135
  try {
125
136
  const res = await fetch('https://ukkometa.fi/api/seo-intel/version', {
@@ -139,7 +150,7 @@ async function checkUkkometa() {
139
150
  } catch {
140
151
  return null;
141
152
  } finally {
142
- clearTimeout(timeout);
153
+ clearTimeout(timer);
143
154
  }
144
155
  }
145
156