seo-intel 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -27,9 +27,10 @@ GEMINI_API_KEY=
27
27
  # GOOGLE_CLIENT_SECRET=
28
28
 
29
29
  # ── Extraction Model (local Ollama) ───────────────────────────────────────
30
- # Recommended: qwen3.5:9b (balanced), qwen3.5:4b (budget), qwen3.5:27b (quality)
30
+ # Recommended: gemma4:e4b (default), gemma4:e2b (budget), gemma4:26b (quality)
31
+ # Also supported: qwen3.5:9b, qwen3.5:4b, qwen3.5:27b
31
32
  OLLAMA_URL=http://localhost:11434
32
- OLLAMA_MODEL=qwen3.5:9b
33
+ OLLAMA_MODEL=gemma4:e4b
33
34
  OLLAMA_CTX=8192
34
35
  OLLAMA_TIMEOUT_MS=60000 # 60s default — increase to 120000 on slow hardware (BUG-008)
35
36
 
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.4.0 (2026-04-03)
4
+
5
+ ### New Feature: Gap Intelligence
6
+ - `seo-intel gap-intel <project>` — topic/content gap analysis against competitors
7
+ - Extracts topics from your pages and competitor pages via Ollama
8
+ - Fuzzy set comparison identifies coverage gaps with substring matching
9
+ - LLM-powered prioritisation ranks gaps by traffic potential and difficulty
10
+ - Options: `--vs <domains>`, `--type docs|blog|landing|all`, `--limit <n>`, `--raw`, `--format`, `--out`
11
+ - Available from dashboard terminal and CLI (Pro feature)
12
+
13
+ ### New Default: Gemma 4 Models
14
+ - **Gemma 4 e4b** is now the default extraction model (was Qwen 3 4B)
15
+ - Four extraction tiers: e2b (budget, 46 t/s), e4b (balanced, 23 t/s), 26b (quality), 31b (power)
16
+ - Two analysis tiers: 26b (recommended 11GB+ VRAM), 31b (16GB+ VRAM)
17
+ - Qwen models remain fully supported as alternatives
18
+ - Setup wizard, model recommendations, and VRAM tiers updated for Gemma 4
19
+
20
+ ### Server
21
+ - Added `gap-intel` to terminal command whitelist
22
+ - Forward `--vs`, `--type`, `--limit`, `--raw`, `--out` params from dashboard to CLI
23
+
3
24
  ## 1.3.1 (2026-04-02)
4
25
 
5
26
  ### Fixes
package/README.md CHANGED
@@ -157,14 +157,15 @@ SEO Intel uses Ollama for local AI extraction. Edit `.env`:
157
157
 
158
158
  ```bash
159
159
  OLLAMA_URL=http://localhost:11434
160
- OLLAMA_MODEL=qwen3.5:9b # recommended (needs 6GB+ VRAM)
160
+ OLLAMA_MODEL=gemma4:e4b # recommended (MoE, needs 6GB+ VRAM)
161
161
  OLLAMA_CTX=16384
162
162
  ```
163
163
 
164
164
  Model recommendations by VRAM:
165
- - **3-4 GB** → `qwen3.5:4b`
166
- - **6-8 GB** → `qwen3.5:9b` (recommended)
167
- - **16+ GB** → `qwen3.5:27b`
165
+ - **4-5 GB** → `gemma4:e2b` (MoE edge model)
166
+ - **6-10 GB** → `gemma4:e4b` (recommended)
167
+ - **12+ GB** → `gemma4:26b` (MoE, frontier quality)
168
+ - Also supported: `qwen3.5:4b`, `qwen3.5:9b`, `qwen3.5:27b`
168
169
 
169
170
  ### Analysis (cloud, user's API key)
170
171
 
@@ -0,0 +1,339 @@
1
+ /**
2
+ * Gap Intel — Topic/Content Gap Analysis
3
+ *
4
+ * Reads crawled pages for target + competitors from DB,
5
+ * extracts topic clusters via local LLM, compares coverage,
6
+ * and outputs a prioritised gap report.
7
+ *
8
+ * Zero network — reads from SQLite + Ollama only.
9
+ */
10
+
11
+ import { getProjectDomains, getTargetDomains, getCompetitorDomains } from '../../exports/queries.js';
12
+
13
+ // ── Page type URL patterns ───────────────────────────────────────────────────
14
+
15
+ const PAGE_TYPE_PATTERNS = {
16
+ docs: ['/docs/', '/guide', '/api/', '/reference', '/quickstart', '/tutorial', '/learn'],
17
+ blog: ['/blog/', '/post/', '/article/', '/news/'],
18
+ landing: ['/pricing', '/features', '/product', '/solutions', '/use-case', '/compare'],
19
+ };
20
+
21
+ function matchesPageType(url, type) {
22
+ if (!type || type === 'all') return true;
23
+ const patterns = PAGE_TYPE_PATTERNS[type];
24
+ if (!patterns) return true;
25
+ const lower = url.toLowerCase();
26
+ return patterns.some(p => lower.includes(p));
27
+ }
28
+
29
+ // ── Load pages from DB ───────────────────────────────────────────────────────
30
+
31
+ function loadPages(db, project, opts = {}) {
32
+ const { type = 'all', limit = 100, vsDomains = [] } = opts;
33
+
34
+ const domains = getProjectDomains(db, project);
35
+ const targetDomains = getTargetDomains(domains);
36
+ const competitorDomains = vsDomains.length
37
+ ? domains.filter(d => d.role === 'competitor' && vsDomains.some(v => d.domain.includes(v)))
38
+ : getCompetitorDomains(domains);
39
+
40
+ if (!targetDomains.length) return { target: [], competitors: new Map(), targetDomain: null, competitorDomainNames: [] };
41
+
42
+ const loadForDomains = (domainRows) => {
43
+ const allPages = [];
44
+ for (const d of domainRows) {
45
+ const pages = db.prepare(`
46
+ SELECT p.url, p.title, p.meta_desc, p.body_text, p.word_count
47
+ FROM pages p
48
+ WHERE p.domain_id = ?
49
+ AND p.status_code = 200
50
+ AND p.body_text IS NOT NULL AND p.body_text != ''
51
+ ORDER BY p.word_count DESC
52
+ LIMIT ?
53
+ `).all(d.id, limit);
54
+ allPages.push(...pages.filter(p => matchesPageType(p.url, type)).map(p => ({ ...p, domain: d.domain })));
55
+ }
56
+ return allPages;
57
+ };
58
+
59
+ const targetPages = loadForDomains(targetDomains);
60
+ const compPages = new Map();
61
+ for (const d of competitorDomains) {
62
+ const pages = loadForDomains([d]);
63
+ if (pages.length) compPages.set(d.domain, pages);
64
+ }
65
+
66
+ return {
67
+ target: targetPages,
68
+ competitors: compPages,
69
+ targetDomain: targetDomains[0]?.domain,
70
+ competitorDomainNames: competitorDomains.map(d => d.domain),
71
+ };
72
+ }
73
+
74
+ // ── Extract topics from pages (LLM) ─────────────────────────────────────────
75
+
76
+ async function extractTopics(pages, domain, ollamaUrl, ollamaModel, log) {
77
+ const batchSize = 25;
78
+ const allTopics = new Set();
79
+
80
+ for (let i = 0; i < pages.length; i += batchSize) {
81
+ const batch = pages.slice(i, i + batchSize);
82
+ const listing = batch.map((p, idx) => {
83
+ const path = p.url.replace(/https?:\/\/[^/]+/, '') || '/';
84
+ return `${idx + 1}. ${p.title || path}\n ${p.meta_desc || '(no description)'}`;
85
+ }).join('\n');
86
+
87
+ const prompt = `Given these ${batch.length} pages from ${domain}:\n\n${listing}\n\nExtract the main topics and capabilities this site covers.\nReturn ONLY a flat list of specific topic labels, one per line.\nBe specific: "RPC rate limits" not just "rate limits".\n"WebSocket subscription guide" not just "WebSockets".\nNo numbering, no bullets, no explanations — just topic labels.`;
88
+
89
+ try {
90
+ const res = await fetch(`${ollamaUrl}/api/generate`, {
91
+ method: 'POST',
92
+ headers: { 'Content-Type': 'application/json' },
93
+ body: JSON.stringify({
94
+ model: ollamaModel,
95
+ prompt,
96
+ stream: false,
97
+ options: { temperature: 0.2, num_ctx: 8192 },
98
+ }),
99
+ });
100
+
101
+ if (!res.ok) throw new Error(`Ollama ${res.status}`);
102
+ const data = await res.json();
103
+ const lines = (data.response || '').split('\n').map(l => l.trim()).filter(l => l && !l.startsWith('#'));
104
+ for (const line of lines) {
105
+ // Strip bullets, numbers, etc.
106
+ const clean = line.replace(/^[-*•\d.)\s]+/, '').trim();
107
+ if (clean.length > 2 && clean.length < 120) allTopics.add(clean);
108
+ }
109
+ log(` ${domain}: batch ${Math.floor(i / batchSize) + 1} → ${lines.length} topics`);
110
+ } catch (e) {
111
+ log(` ⚠️ ${domain} batch ${Math.floor(i / batchSize) + 1} failed: ${e.message}`);
112
+ }
113
+ }
114
+
115
+ return [...allTopics];
116
+ }
117
+
118
+ // ── Compare topic coverage ───────────────────────────────────────────────────
119
+
120
+ function compareTopics(targetTopics, competitorTopicsMap) {
121
+ const targetSet = new Set(targetTopics.map(t => t.toLowerCase()));
122
+
123
+ const gaps = []; // topics competitors have, target doesn't
124
+ const depthGaps = []; // topics target has but competitors go deeper
125
+
126
+ for (const [domain, topics] of competitorTopicsMap) {
127
+ for (const topic of topics) {
128
+ const lower = topic.toLowerCase();
129
+ // Fuzzy match — check if target covers this topic (substring match)
130
+ const covered = [...targetSet].some(t =>
131
+ t.includes(lower) || lower.includes(t) ||
132
+ (lower.split(' ').length > 1 && t.split(' ').some(w => lower.includes(w) && w.length > 4))
133
+ );
134
+
135
+ if (!covered) {
136
+ const existing = gaps.find(g => g.topic.toLowerCase() === lower);
137
+ if (existing) {
138
+ if (!existing.coveredBy.includes(domain)) existing.coveredBy.push(domain);
139
+ } else {
140
+ gaps.push({ topic, coveredBy: [domain] });
141
+ }
142
+ }
143
+ }
144
+ }
145
+
146
+ return { gaps, depthGaps };
147
+ }
148
+
149
+ // ── LLM gap prioritisation ──────────────────────────────────────────────────
150
+
151
+ async function prioritiseGaps(gaps, targetDomain, context, ollamaUrl, ollamaModel, log) {
152
+ if (!gaps.length) return [];
153
+
154
+ const gapList = gaps.slice(0, 40).map(g =>
155
+ `- ${g.topic} (covered by: ${g.coveredBy.join(', ')})`
156
+ ).join('\n');
157
+
158
+ const prompt = `Target site: ${targetDomain} (${context || 'business website'})
159
+ Topics competitors cover that the target project lacks:
160
+
161
+ ${gapList}
162
+
163
+ For each gap, return a markdown table row with these columns:
164
+ | Topic | Covered by | Buyer Intent | Page Type | Why It Matters |
165
+
166
+ Buyer Intent: high, medium, or low
167
+ Page Type: guide, reference, landing, blog, or comparison
168
+ Why It Matters: one sentence on SEO or sales impact
169
+
170
+ Return ONLY the markdown table rows (no header, no explanation).
171
+ Sort by buyer intent (high first).`;
172
+
173
+ try {
174
+ const res = await fetch(`${ollamaUrl}/api/generate`, {
175
+ method: 'POST',
176
+ headers: { 'Content-Type': 'application/json' },
177
+ body: JSON.stringify({
178
+ model: ollamaModel,
179
+ prompt,
180
+ stream: false,
181
+ options: { temperature: 0.2, num_ctx: 8192 },
182
+ }),
183
+ });
184
+
185
+ if (!res.ok) throw new Error(`Ollama ${res.status}`);
186
+ const data = await res.json();
187
+ return (data.response || '').split('\n').filter(l => l.trim().startsWith('|'));
188
+ } catch (e) {
189
+ log(` ⚠️ LLM prioritisation failed: ${e.message}`);
190
+ return null; // Fall back to raw output
191
+ }
192
+ }
193
+
194
+ // ── Generate report ─────────────────────────────────────────────────────────
195
+
196
+ function generateReport(data) {
197
+ const { targetDomain, competitorDomainNames, targetTopics, competitorTopicsMap, gaps, prioritisedRows, pageData } = data;
198
+ const ts = new Date().toISOString().slice(0, 10);
199
+
200
+ let md = `# Gap Intel Report — ${targetDomain} vs ${competitorDomainNames.join(', ')}\n`;
201
+ md += `Generated: ${ts} | Pages analyzed: ${targetDomain}(${pageData.target.length})`;
202
+ for (const [dom, pages] of pageData.competitors) {
203
+ md += ` ${dom}(${pages.length})`;
204
+ }
205
+ md += '\n\n';
206
+
207
+ // Prioritised gaps
208
+ if (prioritisedRows && prioritisedRows.length) {
209
+ const high = prioritisedRows.filter(r => r.toLowerCase().includes('high'));
210
+ const medium = prioritisedRows.filter(r => r.toLowerCase().includes('medium'));
211
+ const low = prioritisedRows.filter(r => !r.toLowerCase().includes('high') && !r.toLowerCase().includes('medium'));
212
+
213
+ if (high.length) {
214
+ md += `## 🔴 High Priority Gaps\n\n`;
215
+ md += `| Topic | Covered by | Buyer Intent | Page Type | Why It Matters |\n`;
216
+ md += `|-------|-----------|--------------|-----------|----------------|\n`;
217
+ md += high.join('\n') + '\n\n';
218
+ }
219
+ if (medium.length) {
220
+ md += `## 🟡 Medium Priority Gaps\n\n`;
221
+ md += `| Topic | Covered by | Buyer Intent | Page Type | Why It Matters |\n`;
222
+ md += `|-------|-----------|--------------|-----------|----------------|\n`;
223
+ md += medium.join('\n') + '\n\n';
224
+ }
225
+ if (low.length) {
226
+ md += `## 🟢 Lower Priority Gaps\n\n`;
227
+ md += `| Topic | Covered by | Buyer Intent | Page Type | Why It Matters |\n`;
228
+ md += `|-------|-----------|--------------|-----------|----------------|\n`;
229
+ md += low.join('\n') + '\n\n';
230
+ }
231
+ } else {
232
+ // Raw gaps (LLM failed or --raw mode)
233
+ if (gaps.length) {
234
+ md += `## Content Gaps\n\n`;
235
+ md += `| Topic | Covered by |\n`;
236
+ md += `|-------|-----------|\n`;
237
+ for (const g of gaps) {
238
+ md += `| ${g.topic} | ${g.coveredBy.join(', ')} |\n`;
239
+ }
240
+ md += '\n';
241
+ } else {
242
+ md += `> No significant gaps found — target covers all competitor topics.\n\n`;
243
+ }
244
+ }
245
+
246
+ // Raw topic matrix
247
+ md += `## Raw Topic Matrix\n\n`;
248
+ md += `### ${targetDomain} (${targetTopics.length} topics)\n`;
249
+ for (const t of targetTopics.slice(0, 50)) md += `- ${t}\n`;
250
+ if (targetTopics.length > 50) md += `- ... and ${targetTopics.length - 50} more\n`;
251
+ md += '\n';
252
+
253
+ for (const [dom, topics] of competitorTopicsMap) {
254
+ md += `### ${dom} (${topics.length} topics)\n`;
255
+ for (const t of topics.slice(0, 50)) md += `- ${t}\n`;
256
+ if (topics.length > 50) md += `- ... and ${topics.length - 50} more\n`;
257
+ md += '\n';
258
+ }
259
+
260
+ return md;
261
+ }
262
+
263
+ // ── Main entry point ─────────────────────────────────────────────────────────
264
+
265
+ /**
266
+ * Run gap-intel analysis.
267
+ *
268
+ * @param {import('node:sqlite').DatabaseSync} db
269
+ * @param {string} project
270
+ * @param {object} config - project config with context
271
+ * @param {object} opts
272
+ * @param {string[]} [opts.vs] - competitor domains to compare (default: all from config)
273
+ * @param {string} [opts.type] - page type filter: docs, blog, landing, all
274
+ * @param {number} [opts.limit] - max pages per domain
275
+ * @param {boolean} [opts.raw] - skip LLM prioritisation
276
+ * @param {string} [opts.ollamaUrl] - Ollama host
277
+ * @param {string} [opts.ollamaModel] - Ollama model
278
+ * @param {function} [opts.log] - logger function
279
+ * @returns {Promise<string>} markdown report
280
+ */
281
+ export async function runGapIntel(db, project, config, opts = {}) {
282
+ const log = opts.log || console.log;
283
+ const ollamaUrl = opts.ollamaUrl || process.env.OLLAMA_URL || 'http://localhost:11434';
284
+ const ollamaModel = opts.ollamaModel || process.env.OLLAMA_MODEL || 'gemma4:e4b';
285
+ const type = opts.type || 'all';
286
+ const limit = opts.limit || 100;
287
+ const raw = opts.raw || false;
288
+ const vsDomains = opts.vs || [];
289
+
290
+ log(' Loading pages from DB...');
291
+ const pageData = loadPages(db, project, { type, limit, vsDomains });
292
+
293
+ if (!pageData.target.length) {
294
+ return `# Gap Intel — ${project}\n\n> ⚠️ No pages with body_text found for target.\n> Run: seo-intel crawl ${project}\n`;
295
+ }
296
+
297
+ if (!pageData.competitors.size) {
298
+ return `# Gap Intel — ${project}\n\n> ⚠️ No competitor pages found in DB.\n> Check project config competitors and run: seo-intel crawl ${project}\n`;
299
+ }
300
+
301
+ log(` Target: ${pageData.targetDomain} (${pageData.target.length} pages)`);
302
+ for (const [dom, pages] of pageData.competitors) {
303
+ log(` Competitor: ${dom} (${pages.length} pages)`);
304
+ }
305
+
306
+ // Step 2 — Extract topics
307
+ log('\n Extracting topics via LLM...');
308
+ const targetTopics = await extractTopics(pageData.target, pageData.targetDomain, ollamaUrl, ollamaModel, log);
309
+
310
+ const competitorTopicsMap = new Map();
311
+ for (const [dom, pages] of pageData.competitors) {
312
+ const topics = await extractTopics(pages, dom, ollamaUrl, ollamaModel, log);
313
+ competitorTopicsMap.set(dom, topics);
314
+ }
315
+
316
+ // Step 3 — Compare coverage
317
+ log('\n Comparing topic coverage...');
318
+ const { gaps } = compareTopics(targetTopics, competitorTopicsMap);
319
+ log(` Found ${gaps.length} topic gaps`);
320
+
321
+ // Step 4 — LLM prioritisation (unless --raw)
322
+ let prioritisedRows = null;
323
+ if (!raw && gaps.length) {
324
+ log('\n Prioritising gaps via LLM...');
325
+ const context = config?.context?.industry || config?.context?.goal || '';
326
+ prioritisedRows = await prioritiseGaps(gaps, pageData.targetDomain, context, ollamaUrl, ollamaModel, log);
327
+ }
328
+
329
+ // Step 5 — Generate report
330
+ return generateReport({
331
+ targetDomain: pageData.targetDomain,
332
+ competitorDomainNames: [...pageData.competitors.keys()],
333
+ targetTopics,
334
+ competitorTopicsMap,
335
+ gaps,
336
+ prioritisedRows,
337
+ pageData,
338
+ });
339
+ }
package/cli.js CHANGED
@@ -68,24 +68,24 @@ function defaultSiteUrl(domain) {
68
68
 
69
69
  function resolveExtractionRuntime(config) {
70
70
  const primaryUrl = config?.crawl?.ollamaHost || process.env.OLLAMA_URL || 'http://localhost:11434';
71
- const primaryModel = config?.crawl?.extractionModel || process.env.OLLAMA_MODEL || 'qwen3:4b';
71
+ const primaryModel = config?.crawl?.extractionModel || process.env.OLLAMA_MODEL || 'gemma4:e4b';
72
72
  const fallbackUrl = process.env.OLLAMA_FALLBACK_URL || '';
73
73
  const fallbackModel = process.env.OLLAMA_FALLBACK_MODEL || primaryModel;
74
74
  const localhost = 'http://localhost:11434';
75
75
 
76
76
  const candidates = [
77
- { host: String(primaryUrl).trim().replace(/\/+$/, ''), model: String(primaryModel).trim() || 'qwen3:4b' },
77
+ { host: String(primaryUrl).trim().replace(/\/+$/, ''), model: String(primaryModel).trim() || 'gemma4:e4b' },
78
78
  ];
79
79
 
80
80
  if (fallbackUrl) {
81
81
  candidates.push({
82
82
  host: String(fallbackUrl).trim().replace(/\/+$/, ''),
83
- model: String(fallbackModel).trim() || String(primaryModel).trim() || 'qwen3:4b',
83
+ model: String(fallbackModel).trim() || String(primaryModel).trim() || 'gemma4:e4b',
84
84
  });
85
85
  }
86
86
 
87
87
  if (!candidates.some(candidate => candidate.host === localhost)) {
88
- candidates.push({ host: localhost, model: String(primaryModel).trim() || 'qwen3:4b' });
88
+ candidates.push({ host: localhost, model: String(primaryModel).trim() || 'gemma4:e4b' });
89
89
  }
90
90
 
91
91
  const seen = new Set();
@@ -134,8 +134,8 @@ async function checkOllamaAvailability(config) {
134
134
 
135
135
  if (sawReachableHost) {
136
136
  const primary = candidates[0];
137
- console.log(chalk.yellow(` ⚠️ Ollama is reachable but model "${primary?.model || 'qwen3:4b'}" was not found on any live host`));
138
- console.log(chalk.dim(` Run: ollama pull ${primary?.model || 'qwen3:4b'}`));
137
+ console.log(chalk.yellow(` ⚠️ Ollama is reachable but model "${primary?.model || 'gemma4:e4b'}" was not found on any live host`));
138
+ console.log(chalk.dim(` Run: ollama pull ${primary?.model || 'gemma4:e4b'}`));
139
139
  }
140
140
 
141
141
  return false;
@@ -474,7 +474,7 @@ program
474
474
  if (!ollamaAvailable) {
475
475
  console.log(chalk.yellow('\n ⚠️ No AI extraction available (Ollama unreachable, no API keys configured)'));
476
476
  console.log(chalk.white(' → Switching to ') + chalk.bold.green('crawl-only mode') + chalk.white(' — raw data will be collected without AI extraction'));
477
- console.log(chalk.dim(' Tip: Install Ollama (ollama.com) + run `ollama pull qwen3:4b` to enable local AI extraction\n'));
477
+ console.log(chalk.dim(' Tip: Install Ollama (ollama.com) + run `ollama pull gemma4:e4b` to enable local AI extraction\n'));
478
478
  opts.extract = false;
479
479
  }
480
480
  }
@@ -4069,6 +4069,54 @@ program
4069
4069
  }
4070
4070
  });
4071
4071
 
4072
+ // ── GAP INTEL ────────────────────────────────────────────────────────────
4073
+
4074
+ program
4075
+ .command('gap-intel <project>')
4076
+ .description('Topic/content gap analysis — find what competitors cover that you don\'t')
4077
+ .option('--vs <domains>', 'Competitor domains to compare (comma-separated)')
4078
+ .option('--type <type>', 'Page type filter: docs, blog, landing, all', 'all')
4079
+ .option('--limit <n>', 'Max pages per domain', '100')
4080
+ .option('--raw', 'Skip LLM prioritisation, output raw topic matrix only')
4081
+ .option('--format <type>', 'Output format: markdown or json', 'markdown')
4082
+ .option('--out <path>', 'Write report to file (default: stdout)')
4083
+ .action(async (project, opts) => {
4084
+ if (!requirePro('gap-intel')) return;
4085
+ const db = getDb();
4086
+ const config = loadConfig(project);
4087
+
4088
+ printAttackHeader('🔍 Gap Intel — Topic Gap Analysis', project);
4089
+
4090
+ const { runGapIntel } = await import('./analyses/gap-intel/index.js');
4091
+
4092
+ const vsDomains = opts.vs ? opts.vs.split(',').map(s => s.trim()) : [];
4093
+
4094
+ const report = await runGapIntel(db, project, config, {
4095
+ vs: vsDomains,
4096
+ type: opts.type,
4097
+ limit: parseInt(opts.limit, 10) || 100,
4098
+ raw: opts.raw || false,
4099
+ log: (msg) => console.log(chalk.gray(msg)),
4100
+ });
4101
+
4102
+ if (opts.format === 'markdown') {
4103
+ console.log(report);
4104
+ } else {
4105
+ console.log(report);
4106
+ }
4107
+
4108
+ if (opts.out) {
4109
+ writeFileSync(opts.out, report, 'utf8');
4110
+ console.log(chalk.green(`\n ✅ Report saved: ${opts.out}\n`));
4111
+ }
4112
+
4113
+ // Regenerate dashboard
4114
+ try {
4115
+ const configs = loadAllConfigs();
4116
+ generateMultiDashboard(db, configs);
4117
+ } catch {}
4118
+ });
4119
+
4072
4120
  // ── AEO BLOG DRAFT GENERATOR ─────────────────────────────────────────────
4073
4121
 
4074
4122
  let _blogDraftModule;
@@ -4266,7 +4314,7 @@ program
4266
4314
  'Optionally saves Gemini/OpenAI API key to .env',
4267
4315
  hasOllama && hasAnalysisKey ? chalk.green(' → You\'re fully set up!') :
4268
4316
  hasOllama ? chalk.yellow(' → Add an API key for analysis: edit .env') :
4269
- chalk.yellow(' → Install Ollama: https://ollama.com then: ollama pull qwen3:4b'),
4317
+ chalk.yellow(' → Install Ollama: https://ollama.com then: ollama pull gemma4:e4b'),
4270
4318
  ].filter(Boolean),
4271
4319
  },
4272
4320
  {
@@ -265,7 +265,7 @@ async function run() {
265
265
  } else {
266
266
  warn('No Ollama available. Extraction will use degraded mode (regex only).');
267
267
  info('Install Ollama (https://ollama.com) and pull a model for better results.');
268
- info('Recommended: ollama pull qwen3.5:9b');
268
+ info('Recommended: ollama pull gemma4:e4b');
269
269
  }
270
270
 
271
271
  // ── Analysis tier ──
package/extractor/qwen.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import fetch from 'node-fetch';
2
2
 
3
3
  const DEFAULT_OLLAMA_URL = 'http://localhost:11434';
4
- const DEFAULT_OLLAMA_MODEL = 'qwen3:4b';
4
+ const DEFAULT_OLLAMA_MODEL = 'gemma4:e4b';
5
5
  const OLLAMA_CTX = parseInt(process.env.OLLAMA_CTX || '8192', 10);
6
6
  const OLLAMA_TIMEOUT_MS = parseInt(process.env.OLLAMA_TIMEOUT_MS || '60000', 10); // BUG-008: was 5000ms, too short for slow machines
7
7
  const OLLAMA_PREFLIGHT_TIMEOUT_MS = parseInt(process.env.OLLAMA_PREFLIGHT_TIMEOUT_MS || '2500', 10);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "seo-intel",
3
- "version": "1.3.1",
3
+ "version": "1.4.0",
4
4
  "description": "Local Ahrefs-style SEO competitor intelligence. Crawl → SQLite → cloud analysis.",
5
5
  "type": "module",
6
6
  "license": "SEE LICENSE IN LICENSE",
package/server.js CHANGED
@@ -596,7 +596,7 @@ async function handleRequest(req, res) {
596
596
  const ALLOWED = ['crawl', 'extract', 'analyze', 'export-actions', 'competitive-actions',
597
597
  'suggest-usecases', 'html', 'status', 'brief', 'keywords', 'report', 'guide',
598
598
  'schemas', 'headings-audit', 'orphans', 'entities', 'friction', 'shallow', 'decay', 'export', 'templates',
599
- 'aeo', 'blog-draft'];
599
+ 'aeo', 'blog-draft', 'gap-intel'];
600
600
 
601
601
  if (!command || !ALLOWED.includes(command)) {
602
602
  json(res, 400, { error: `Invalid command. Allowed: ${ALLOWED.join(', ')}` });
@@ -613,6 +613,11 @@ async function handleRequest(req, res) {
613
613
  if (params.get('lang')) args.push('--lang', params.get('lang'));
614
614
  if (params.get('model')) args.push('--model', params.get('model'));
615
615
  if (params.has('save')) args.push('--save');
616
+ if (params.get('vs')) args.push('--vs', params.get('vs'));
617
+ if (params.get('type')) args.push('--type', params.get('type'));
618
+ if (params.get('limit')) args.push('--limit', params.get('limit'));
619
+ if (params.has('raw')) args.push('--raw');
620
+ if (params.get('out')) args.push('--out', params.get('out'));
616
621
 
617
622
  // Auto-save exports from dashboard to reports/
618
623
  const EXPORT_CMDS = ['export-actions', 'suggest-usecases', 'competitive-actions'];
@@ -148,7 +148,7 @@ export function* createEnvFile(rootDir = ROOT) {
148
148
  '',
149
149
  '# Local Ollama for extraction',
150
150
  'OLLAMA_URL=http://localhost:11434',
151
- 'OLLAMA_MODEL=qwen3.5:9b',
151
+ 'OLLAMA_MODEL=gemma4:e4b',
152
152
  'OLLAMA_CTX=8192',
153
153
  '',
154
154
  '# Crawler settings',
package/setup/models.js CHANGED
@@ -17,6 +17,56 @@
17
17
  // Minimum viable: 4B parameters for reliable JSON output
18
18
 
19
19
  export const EXTRACTION_MODELS = [
20
+ // ── Gemma 4 (Google, MoE) — new recommended default ──
21
+ {
22
+ id: 'gemma4:e2b',
23
+ name: 'Gemma 4 E2B',
24
+ family: 'gemma4',
25
+ tier: 'budget',
26
+ vram: '~5 GB',
27
+ minVramMB: 4000,
28
+ speed: '~1.5s/page',
29
+ quality: 'good',
30
+ description: 'Google Gemma 4 edge model. MoE (5.1B total, 2.3B active) — fast inference with good JSON output. Great for laptops.',
31
+ recommended: false,
32
+ },
33
+ {
34
+ id: 'gemma4:e4b',
35
+ name: 'Gemma 4 E4B',
36
+ family: 'gemma4',
37
+ tier: 'balanced',
38
+ vram: '~7 GB',
39
+ minVramMB: 5500,
40
+ speed: '~2s/page',
41
+ quality: 'great',
42
+ description: 'Default recommendation. MoE (8B total, 4.5B active) — excellent extraction quality at edge-model speed. Best quality/speed ratio.',
43
+ recommended: true,
44
+ },
45
+ {
46
+ id: 'gemma4:26b',
47
+ name: 'Gemma 4 26B',
48
+ family: 'gemma4',
49
+ tier: 'quality',
50
+ vram: '~13 GB',
51
+ minVramMB: 11000,
52
+ speed: '~4s/page',
53
+ quality: 'excellent',
54
+ description: 'MoE (25.2B total, 3.8B active) — frontier quality at efficient compute. Needs RTX 3090+ or M-series with 16GB+.',
55
+ recommended: false,
56
+ },
57
+ {
58
+ id: 'gemma4:31b',
59
+ name: 'Gemma 4 31B (Dense)',
60
+ family: 'gemma4',
61
+ tier: 'power',
62
+ vram: '~20 GB',
63
+ minVramMB: 16000,
64
+ speed: '~7s/page',
65
+ quality: 'excellent',
66
+ description: 'Dense 30.7B model — maximum extraction quality. Needs RTX 3090/4090 or M2 Pro+ with 24GB+.',
67
+ recommended: false,
68
+ },
69
+ // ── Qwen 3.5 (Alibaba) ──
20
70
  {
21
71
  id: 'qwen3.5:4b',
22
72
  name: 'Qwen 3.5 4B',
@@ -26,7 +76,7 @@ export const EXTRACTION_MODELS = [
26
76
  minVramMB: 2500,
27
77
  speed: '~2s/page',
28
78
  quality: 'good',
29
- description: 'Minimum recommended. Reliable JSON extraction, decent keyword detection. Great for laptops and older GPUs.',
79
+ description: 'Reliable JSON extraction, decent keyword detection. Great for laptops and older GPUs.',
30
80
  recommended: false,
31
81
  },
32
82
  {
@@ -38,8 +88,8 @@ export const EXTRACTION_MODELS = [
38
88
  minVramMB: 4500,
39
89
  speed: '~3s/page',
40
90
  quality: 'better',
41
- description: 'Default recommendation. Better entity detection and intent classification. Works on most modern GPUs.',
42
- recommended: true,
91
+ description: 'Good entity detection and intent classification. Works on most modern GPUs.',
92
+ recommended: false,
43
93
  },
44
94
  {
45
95
  id: 'qwen3.5:27b',
@@ -62,10 +112,10 @@ export const EXTRACTION_MODELS = [
62
112
  minVramMB: 18000,
63
113
  speed: '~8s/page',
64
114
  quality: 'excellent',
65
- description: 'Near-cloud quality extraction. Needs RTX 3090/4090 or M2 Ultra. Overkill for most users.',
115
+ description: 'Near-cloud quality extraction. Needs RTX 3090/4090 or M2 Ultra.',
66
116
  recommended: false,
67
117
  },
68
- // Alternative providers
118
+ // ── Alternative providers ──
69
119
  {
70
120
  id: 'nemotron-nano:4b',
71
121
  name: 'Nemotron 3 Nano 4B',
@@ -78,7 +128,7 @@ export const EXTRACTION_MODELS = [
78
128
  description: 'NVIDIA agentic model. Efficient extraction with tool-use training. Good alternative to Qwen 3.5 4B.',
79
129
  recommended: false,
80
130
  },
81
- // Legacy / fallback models (already installed by many users)
131
+ // ── Legacy / fallback models (already installed by many users) ──
82
132
  {
83
133
  id: 'qwen3:4b',
84
134
  name: 'Qwen 3 4B (legacy)',
@@ -88,7 +138,7 @@ export const EXTRACTION_MODELS = [
88
138
  minVramMB: 2500,
89
139
  speed: '~2s/page',
90
140
  quality: 'good',
91
- description: 'Previous generation. Works well but Qwen 3.5 is better if you can upgrade.',
141
+ description: 'Previous generation. Gemma 4 or Qwen 3.5 recommended for new installs.',
92
142
  recommended: false,
93
143
  legacy: true,
94
144
  },
@@ -101,7 +151,7 @@ export const EXTRACTION_MODELS = [
101
151
  minVramMB: 4500,
102
152
  speed: '~3s/page',
103
153
  quality: 'better',
104
- description: 'Previous generation. Solid extraction. Qwen 3.5 recommended for new installs.',
154
+ description: 'Previous generation. Gemma 4 or Qwen 3.5 recommended for new installs.',
105
155
  recommended: false,
106
156
  legacy: true,
107
157
  },
@@ -117,6 +167,32 @@ export const EXTRACTION_MODELS = [
117
167
  // Cloud models (Claude, GPT-5.4, Gemini) available via OpenClaw agent setup
118
168
 
119
169
  export const ANALYSIS_MODELS = [
170
+ {
171
+ id: 'gemma4:26b',
172
+ name: 'Gemma 4 26B (MoE)',
173
+ family: 'gemma4',
174
+ type: 'local',
175
+ vram: '~13 GB',
176
+ minVramMB: 11000,
177
+ context: '128K tokens',
178
+ costNote: 'Free (your GPU)',
179
+ quality: 'great',
180
+ recommended: true,
181
+ description: 'Google Gemma 4 MoE — 25.2B total, 3.8B active. Fast analysis with frontier quality. Best local value.',
182
+ },
183
+ {
184
+ id: 'gemma4:31b',
185
+ name: 'Gemma 4 31B (Dense)',
186
+ family: 'gemma4',
187
+ type: 'local',
188
+ vram: '~20 GB',
189
+ minVramMB: 16000,
190
+ context: '128K tokens',
191
+ costNote: 'Free (your GPU)',
192
+ quality: 'excellent',
193
+ recommended: false,
194
+ description: 'Google Gemma 4 dense model — maximum quality for local analysis. Needs RTX 3090+ or M2 Pro+ with 24GB.',
195
+ },
120
196
  {
121
197
  id: 'qwen3:14b',
122
198
  name: 'Qwen 3 14B',
@@ -140,8 +216,8 @@ export const ANALYSIS_MODELS = [
140
216
  context: '32K tokens',
141
217
  costNote: 'Free (your GPU)',
142
218
  quality: 'good',
143
- recommended: true,
144
- description: 'Sweet spot for local analysis. Strong reasoning with 27.8B params. Needs RTX 3090/4080+ or M-series with 24GB+.',
219
+ recommended: false,
220
+ description: 'Strong reasoning with 27.8B params. Needs RTX 3090/4080+ or M-series with 24GB+.',
145
221
  },
146
222
  {
147
223
  id: 'qwen3.5:35b',
@@ -228,12 +304,13 @@ export const ANALYSIS_MODELS = [
228
304
  // ── VRAM-Based Recommendations ──────────────────────────────────────────────
229
305
 
230
306
  const VRAM_TIERS = [
231
- { maxMB: 2500, extraction: null, note: 'Not enough VRAM for local extraction. Use cloud or CPU mode (slow).' },
232
- { maxMB: 4500, extraction: 'qwen3.5:4b', note: 'Budget tier — Qwen 3.5 4B fits your GPU.' },
233
- { maxMB: 8000, extraction: 'qwen3.5:9b', note: 'Balanced tier — Qwen 3.5 9B recommended for best quality/speed.' },
234
- { maxMB: 18000, extraction: 'qwen3.5:27b', note: 'Quality tier — Qwen 3.5 27B for nuanced extraction.' },
235
- { maxMB: 48000, extraction: 'qwen3.5:35b', note: 'Power tier — Qwen 3.5 35B for near-cloud quality.' },
236
- { maxMB: Infinity, extraction: 'qwen3.5:35b', note: 'Power tier — Qwen 3.5 35B recommended. Your GPU can handle anything.' },
307
+ { maxMB: 2500, extraction: null, note: 'Not enough VRAM for local extraction. Use cloud or CPU mode (slow).' },
308
+ { maxMB: 4500, extraction: 'qwen3.5:4b', note: 'Budget tier — Qwen 3.5 4B fits your GPU.' },
309
+ { maxMB: 6000, extraction: 'gemma4:e2b', note: 'Edge tier — Gemma 4 E2B (MoE, fast).' },
310
+ { maxMB: 12000, extraction: 'gemma4:e4b', note: 'Balanced tier — Gemma 4 E4B recommended. MoE gives best quality/speed.' },
311
+ { maxMB: 18000, extraction: 'gemma4:26b', note: 'Quality tier — Gemma 4 26B MoE for frontier extraction.' },
312
+ { maxMB: 48000, extraction: 'gemma4:31b', note: 'Power tier — Gemma 4 31B Dense for maximum quality.' },
313
+ { maxMB: Infinity, extraction: 'gemma4:31b', note: 'Power tier — Gemma 4 31B Dense recommended. Your GPU can handle anything.' },
237
314
  ];
238
315
 
239
316
  /**
@@ -250,8 +327,9 @@ export function recommendExtractionModel(availableModels = [], vramMB = 0) {
250
327
 
251
328
  // Preferred model order (newest → legacy)
252
329
  const preferenceOrder = [
330
+ 'gemma4:e4b', 'gemma4:26b', 'gemma4:e2b', 'gemma4:31b',
253
331
  'qwen3.5:9b', 'qwen3.5:27b', 'qwen3.5:4b', 'qwen3.5:35b',
254
- 'qwen3:8b', 'qwen3:4b', 'qwen3.5:0.6b',
332
+ 'qwen3:8b', 'qwen3:4b',
255
333
  ];
256
334
 
257
335
  // Filter to models that fit VRAM
@@ -310,7 +388,7 @@ export function recommendExtractionModel(availableModels = [], vramMB = 0) {
310
388
  */
311
389
  export function recommendAnalysisModel(availableModels = [], vramMB = 0) {
312
390
  const preferenceOrder = [
313
- 'qwen3.5:27b', 'qwen3.5:35b', 'qwen3:14b', 'nemotron-3-super:120b',
391
+ 'gemma4:26b', 'gemma4:31b', 'qwen3.5:27b', 'qwen3.5:35b', 'qwen3:14b', 'nemotron-3-super:120b',
314
392
  ];
315
393
 
316
394
  // Filter to models that fit VRAM
@@ -332,8 +410,9 @@ export function recommendAnalysisModel(availableModels = [], vramMB = 0) {
332
410
  // 2. VRAM-based recommendation
333
411
  let recId = 'qwen3:14b'; // default minimum
334
412
  if (vramMB >= 48000) recId = 'nemotron-3-super:120b';
335
- else if (vramMB >= 18000) recId = 'qwen3.5:35b';
336
- else if (vramMB >= 15000) recId = 'qwen3.5:27b';
413
+ else if (vramMB >= 16000) recId = 'gemma4:31b';
414
+ else if (vramMB >= 11000) recId = 'gemma4:26b';
415
+ else if (vramMB >= 8000) recId = 'qwen3:14b';
337
416
 
338
417
  const recModel = ANALYSIS_MODELS.find(m => m.id === recId);
339
418
  if (recModel) {
@@ -188,7 +188,7 @@ AVAILABLE COMMANDS (run these from ${ROOT}):
188
188
  TO INSTALL THINGS:
189
189
  - npm install (in ${ROOT}) → install Node dependencies
190
190
  - npx playwright install chromium → install browser
191
- - ollama pull qwen3.5:9b → install extraction model
191
+ - ollama pull gemma4:e4b → install extraction model
192
192
 
193
193
  TO CONFIGURE:
194
194
  - Edit ${ROOT}/.env for API keys and settings
@@ -201,9 +201,10 @@ ANALYSIS MODELS (user needs at least one API key):
201
201
  - DeepSeek: Budget option (~$0.02-0.08) → DEEPSEEK_API_KEY
202
202
 
203
203
  EXTRACTION MODELS (local, free):
204
- - qwen3.5:9b (recommended, needs 6GB+ VRAM)
205
- - qwen3.5:4b (budget, needs 3GB+ VRAM)
206
- - qwen3.5:27b (quality, needs 16GB+ VRAM)`;
204
+ - gemma4:e4b (recommended, MoE, needs 6GB+ VRAM)
205
+ - gemma4:e2b (budget, MoE, needs 4GB+ VRAM)
206
+ - gemma4:26b (quality, MoE, needs 12GB+ VRAM)
207
+ - qwen3.5:9b (alternative, needs 5GB+ VRAM)`;
207
208
 
208
209
  // ── Agent-Driven Setup Flow ────────────────────────────────────────────────
209
210
 
@@ -21,7 +21,7 @@ const ROOT = join(__dirname, '..');
21
21
  * Test Ollama host + model by sending a tiny prompt.
22
22
  *
23
23
  * @param {string} host - e.g. 'http://localhost:11434'
24
- * @param {string} model - e.g. 'qwen3.5:9b'
24
+ * @param {string} model - e.g. 'gemma4:e4b'
25
25
  * @returns {{ success: boolean, latencyMs: number, response?: string, error?: string }}
26
26
  */
27
27
  export async function testOllamaConnectivity(host, model) {
package/setup/wizard.html CHANGED
@@ -1359,7 +1359,7 @@ Start by checking what's already installed.</div>
1359
1359
  Tasks:
1360
1360
  1. Verify Node.js 22.5+ (install via nvm if needed)
1361
1361
  2. Install Ollama (https://ollama.com) if not present
1362
- 3. Pull qwen3.5:9b or smaller if VRAM < 6GB
1362
+ 3. Pull gemma4:e4b or smaller if VRAM < 6GB
1363
1363
  4. Create ./config/myproject.json — ask me for my target domain and up to 3 competitor domains
1364
1364
  5. Add GEMINI_API_KEY or ANTHROPIC_API_KEY to .env if I have one
1365
1365
  6. Run: seo-intel crawl myproject
@@ -1380,7 +1380,7 @@ It's installed via: npm install -g seo-intel
1380
1380
  Please help me:
1381
1381
  1. Check that Node.js 22.5+ is installed on my machine
1382
1382
  2. Install Ollama (ollama.com) if it's not there
1383
- 3. Download a Qwen AI model that fits my machine's RAM
1383
+ 3. Download a Gemma 4 AI model that fits my machine's RAM
1384
1384
  4. Create a project config file pointing at my website and competitors
1385
1385
  5. Run the first crawl and show me the results
1386
1386
 
@@ -1403,15 +1403,15 @@ Ask me for my website URL before starting.</div>
1403
1403
  <button class="btn btn-sm agent-runtime-tab-min" data-runtime="openclaw" onclick="selectAgentRuntimeMin('openclaw', this)" style="font-size:0.62rem;"><i class="fa-solid fa-wand-magic-sparkles" style="margin-right:3px;"></i>OpenClaw</button>
1404
1404
  </div>
1405
1405
  <div id="minRuntime-claudecode" class="min-runtime-panel" style="position:relative; padding:7px 9px; background:rgba(10,10,10,0.5); border:1px solid var(--border-subtle); border-radius:var(--radius); font-family:var(--font-mono); font-size:0.6rem; color:var(--text-secondary); line-height:1.5;">
1406
- <div>Set up seo-intel: check Node 22.5+, install Ollama, pull qwen3.5:9b, create config, run crawl.</div>
1406
+ <div>Set up seo-intel: check Node 22.5+, install Ollama, pull gemma4:e4b, create config, run crawl.</div>
1407
1407
  <button class="btn btn-sm" style="position:absolute; top:4px; right:4px; padding:2px 5px; font-size:0.48rem;" onclick="navigator.clipboard.writeText(this.previousElementSibling.textContent.trim());this.textContent='Copied!';setTimeout(()=>this.textContent='Copy',1500);">Copy</button>
1408
1408
  </div>
1409
1409
  <div id="minRuntime-codex" class="min-runtime-panel" style="display:none; position:relative; padding:7px 9px; background:rgba(10,10,10,0.5); border:1px solid var(--border-subtle); border-radius:var(--radius); font-family:var(--font-mono); font-size:0.6rem; color:var(--text-secondary); line-height:1.5;">
1410
- <div>Install seo-intel deps (Node 22.5+, Ollama, qwen3.5:9b), create project config, run: seo-intel crawl myproject. Use full disk access.</div>
1410
+ <div>Install seo-intel deps (Node 22.5+, Ollama, gemma4:e4b), create project config, run: seo-intel crawl myproject. Use full disk access.</div>
1411
1411
  <button class="btn btn-sm" style="position:absolute; top:4px; right:4px; padding:2px 5px; font-size:0.48rem;" onclick="navigator.clipboard.writeText(this.previousElementSibling.textContent.trim());this.textContent='Copied!';setTimeout(()=>this.textContent='Copy',1500);">Copy</button>
1412
1412
  </div>
1413
1413
  <div id="minRuntime-perplexity" class="min-runtime-panel" style="display:none; position:relative; padding:7px 9px; background:rgba(10,10,10,0.5); border:1px solid var(--border-subtle); border-radius:var(--radius); font-family:var(--font-mono); font-size:0.6rem; color:var(--text-secondary); line-height:1.5;">
1414
- <div>Help me set up SEO Intel local SEO tool: install Node.js 22.5+, Ollama, download a Qwen model, create config for my website. Ask for my URL.</div>
1414
+ <div>Help me set up SEO Intel local SEO tool: install Node.js 22.5+, Ollama, download Gemma 4 model, create config for my website. Ask for my URL.</div>
1415
1415
  <button class="btn btn-sm" style="position:absolute; top:4px; right:4px; padding:2px 5px; font-size:0.48rem;" onclick="navigator.clipboard.writeText(this.previousElementSibling.textContent.trim());this.textContent='Copied!';setTimeout(()=>this.textContent='Copy',1500);">Copy</button>
1416
1416
  </div>
1417
1417
  <div id="minRuntime-openclaw" class="min-runtime-panel" style="display:none; position:relative; padding:7px 9px; background:rgba(10,10,10,0.5); border:1px solid var(--border-subtle); border-radius:var(--radius); font-family:var(--font-mono); font-size:0.6rem; color:var(--text-secondary); line-height:1.5;">