seo-intel 1.3.1 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -27,9 +27,10 @@ GEMINI_API_KEY=
27
27
  # GOOGLE_CLIENT_SECRET=
28
28
 
29
29
  # ── Extraction Model (local Ollama) ───────────────────────────────────────
30
- # Recommended: qwen3.5:9b (balanced), qwen3.5:4b (budget), qwen3.5:27b (quality)
30
+ # Recommended: gemma4:e4b (default), gemma4:e2b (budget), gemma4:26b (quality)
31
+ # Also supported: qwen3.5:9b, qwen3.5:4b, qwen3.5:27b
31
32
  OLLAMA_URL=http://localhost:11434
32
- OLLAMA_MODEL=qwen3.5:9b
33
+ OLLAMA_MODEL=gemma4:e4b
33
34
  OLLAMA_CTX=8192
34
35
  OLLAMA_TIMEOUT_MS=60000 # 60s default — increase to 120000 on slow hardware (BUG-008)
35
36
 
package/CHANGELOG.md CHANGED
@@ -1,5 +1,52 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.4.1 (2026-04-03)
4
+
5
+ ### Fixes
6
+ - **CLI JSON output** — all 11 commands now produce clean JSON with zero chalk/ANSI leakage
7
+ - **Brief `--format json`** — full rich data (keyword gaps, schema gaps, actions) instead of lean subset
8
+ - **Templates `--format json`** — suppressed chalk header and log output in JSON mode
9
+ - **JS-Delta `--format json`** — suppressed per-page progress chalk in JSON mode
10
+
11
+ ### Agent Integration
12
+ - Model selection hints (`modelHint`, `modelNote`) on extract, gap-intel, blog-draft capabilities
13
+ - AGENT_GUIDE.md — added Model Selection Guidance table (light-local vs cloud-medium per phase)
14
+ - GitHub Releases now auto-created on tag push via CI
15
+
16
+ ## 1.4.0 (2026-04-03)
17
+
18
+ ### New Feature: Gap Intelligence
19
+ - `seo-intel gap-intel <project>` — topic/content gap analysis against competitors
20
+ - Extracts topics from your pages and competitor pages via Ollama
21
+ - Fuzzy set comparison identifies coverage gaps with substring matching
22
+ - LLM-powered prioritisation ranks gaps by traffic potential and difficulty
23
+ - Options: `--vs <domains>`, `--type docs|blog|landing|all`, `--limit <n>`, `--raw`, `--format`, `--out`
24
+ - Available from dashboard terminal and CLI (Pro feature)
25
+
26
+ ### New Default: Gemma 4 Models
27
+ - **Gemma 4 e4b** is now the default extraction model (was Qwen 3 4B)
28
+ - Four extraction tiers: e2b (budget, 46 t/s), e4b (balanced, 23 t/s), 26b (quality), 31b (power)
29
+ - Two analysis tiers: 26b (recommended 11GB+ VRAM), 31b (16GB+ VRAM)
30
+ - Qwen models remain fully supported as alternatives
31
+ - Setup wizard, model recommendations, and VRAM tiers updated for Gemma 4
32
+
33
+ ### Agent-Ready JSON Output
34
+ - All 11 analysis commands support `--format json` for clean, parseable output
35
+ - JSON output is chalk-free — no ANSI escape codes mixed into structured data
36
+ - Commands: shallow, decay, headings-audit, orphans, entities, schemas, friction, brief, velocity, templates, js-delta
37
+
38
+ ### Programmatic API (`seo-intel/froggo`)
39
+ - Unified agent runner: `run(command, project, opts)` returns `{ ok, command, project, timestamp, data }`
40
+ - 18 capabilities with machine-readable manifest (inputs, outputs, dependencies, tier)
41
+ - Pipeline dependency graph for orchestration
42
+ - Model selection hints per capability (light-local vs cloud-medium)
43
+ - Deep imports: `seo-intel/aeo`, `seo-intel/crawler`, `seo-intel/db`, etc.
44
+ - Agent Guide (`AGENT_GUIDE.md`) with orchestration patterns and model guidance
45
+
46
+ ### Server
47
+ - Added `gap-intel` to terminal command whitelist
48
+ - Forward `--vs`, `--type`, `--limit`, `--raw`, `--out` params from dashboard to CLI
49
+
3
50
  ## 1.3.1 (2026-04-02)
4
51
 
5
52
  ### Fixes
package/README.md CHANGED
@@ -157,14 +157,15 @@ SEO Intel uses Ollama for local AI extraction. Edit `.env`:
157
157
 
158
158
  ```bash
159
159
  OLLAMA_URL=http://localhost:11434
160
- OLLAMA_MODEL=qwen3.5:9b # recommended (needs 6GB+ VRAM)
160
+ OLLAMA_MODEL=gemma4:e4b # recommended (MoE, needs 6GB+ VRAM)
161
161
  OLLAMA_CTX=16384
162
162
  ```
163
163
 
164
164
  Model recommendations by VRAM:
165
- - **3-4 GB** → `qwen3.5:4b`
166
- - **6-8 GB** → `qwen3.5:9b` (recommended)
167
- - **16+ GB** → `qwen3.5:27b`
165
+ - **4-5 GB** → `gemma4:e2b` (MoE edge model)
166
+ - **6-10 GB** → `gemma4:e4b` (recommended)
167
+ - **12+ GB** → `gemma4:26b` (MoE, frontier quality)
168
+ - Also supported: `qwen3.5:4b`, `qwen3.5:9b`, `qwen3.5:27b`
168
169
 
169
170
  ### Analysis (cloud, user's API key)
170
171
 
@@ -0,0 +1,339 @@
1
+ /**
2
+ * Gap Intel — Topic/Content Gap Analysis
3
+ *
4
+ * Reads crawled pages for target + competitors from DB,
5
+ * extracts topic clusters via local LLM, compares coverage,
6
+ * and outputs a prioritised gap report.
7
+ *
8
+ * Zero network — reads from SQLite + Ollama only.
9
+ */
10
+
11
+ import { getProjectDomains, getTargetDomains, getCompetitorDomains } from '../../exports/queries.js';
12
+
13
+ // ── Page type URL patterns ───────────────────────────────────────────────────
14
+
15
+ const PAGE_TYPE_PATTERNS = {
16
+ docs: ['/docs/', '/guide', '/api/', '/reference', '/quickstart', '/tutorial', '/learn'],
17
+ blog: ['/blog/', '/post/', '/article/', '/news/'],
18
+ landing: ['/pricing', '/features', '/product', '/solutions', '/use-case', '/compare'],
19
+ };
20
+
21
+ function matchesPageType(url, type) {
22
+ if (!type || type === 'all') return true;
23
+ const patterns = PAGE_TYPE_PATTERNS[type];
24
+ if (!patterns) return true;
25
+ const lower = url.toLowerCase();
26
+ return patterns.some(p => lower.includes(p));
27
+ }
28
+
29
+ // ── Load pages from DB ───────────────────────────────────────────────────────
30
+
31
+ function loadPages(db, project, opts = {}) {
32
+ const { type = 'all', limit = 100, vsDomains = [] } = opts;
33
+
34
+ const domains = getProjectDomains(db, project);
35
+ const targetDomains = getTargetDomains(domains);
36
+ const competitorDomains = vsDomains.length
37
+ ? domains.filter(d => d.role === 'competitor' && vsDomains.some(v => d.domain.includes(v)))
38
+ : getCompetitorDomains(domains);
39
+
40
+ if (!targetDomains.length) return { target: [], competitors: new Map(), targetDomain: null, competitorDomainNames: [] };
41
+
42
+ const loadForDomains = (domainRows) => {
43
+ const allPages = [];
44
+ for (const d of domainRows) {
45
+ const pages = db.prepare(`
46
+ SELECT p.url, p.title, p.meta_desc, p.body_text, p.word_count
47
+ FROM pages p
48
+ WHERE p.domain_id = ?
49
+ AND p.status_code = 200
50
+ AND p.body_text IS NOT NULL AND p.body_text != ''
51
+ ORDER BY p.word_count DESC
52
+ LIMIT ?
53
+ `).all(d.id, limit);
54
+ allPages.push(...pages.filter(p => matchesPageType(p.url, type)).map(p => ({ ...p, domain: d.domain })));
55
+ }
56
+ return allPages;
57
+ };
58
+
59
+ const targetPages = loadForDomains(targetDomains);
60
+ const compPages = new Map();
61
+ for (const d of competitorDomains) {
62
+ const pages = loadForDomains([d]);
63
+ if (pages.length) compPages.set(d.domain, pages);
64
+ }
65
+
66
+ return {
67
+ target: targetPages,
68
+ competitors: compPages,
69
+ targetDomain: targetDomains[0]?.domain,
70
+ competitorDomainNames: competitorDomains.map(d => d.domain),
71
+ };
72
+ }
73
+
74
+ // ── Extract topics from pages (LLM) ─────────────────────────────────────────
75
+
76
+ async function extractTopics(pages, domain, ollamaUrl, ollamaModel, log) {
77
+ const batchSize = 25;
78
+ const allTopics = new Set();
79
+
80
+ for (let i = 0; i < pages.length; i += batchSize) {
81
+ const batch = pages.slice(i, i + batchSize);
82
+ const listing = batch.map((p, idx) => {
83
+ const path = p.url.replace(/https?:\/\/[^/]+/, '') || '/';
84
+ return `${idx + 1}. ${p.title || path}\n ${p.meta_desc || '(no description)'}`;
85
+ }).join('\n');
86
+
87
+ const prompt = `Given these ${batch.length} pages from ${domain}:\n\n${listing}\n\nExtract the main topics and capabilities this site covers.\nReturn ONLY a flat list of specific topic labels, one per line.\nBe specific: "RPC rate limits" not just "rate limits".\n"WebSocket subscription guide" not just "WebSockets".\nNo numbering, no bullets, no explanations — just topic labels.`;
88
+
89
+ try {
90
+ const res = await fetch(`${ollamaUrl}/api/generate`, {
91
+ method: 'POST',
92
+ headers: { 'Content-Type': 'application/json' },
93
+ body: JSON.stringify({
94
+ model: ollamaModel,
95
+ prompt,
96
+ stream: false,
97
+ options: { temperature: 0.2, num_ctx: 8192 },
98
+ }),
99
+ });
100
+
101
+ if (!res.ok) throw new Error(`Ollama ${res.status}`);
102
+ const data = await res.json();
103
+ const lines = (data.response || '').split('\n').map(l => l.trim()).filter(l => l && !l.startsWith('#'));
104
+ for (const line of lines) {
105
+ // Strip bullets, numbers, etc.
106
+ const clean = line.replace(/^[-*•\d.)\s]+/, '').trim();
107
+ if (clean.length > 2 && clean.length < 120) allTopics.add(clean);
108
+ }
109
+ log(` ${domain}: batch ${Math.floor(i / batchSize) + 1} → ${lines.length} topics`);
110
+ } catch (e) {
111
+ log(` ⚠️ ${domain} batch ${Math.floor(i / batchSize) + 1} failed: ${e.message}`);
112
+ }
113
+ }
114
+
115
+ return [...allTopics];
116
+ }
117
+
118
+ // ── Compare topic coverage ───────────────────────────────────────────────────
119
+
120
+ function compareTopics(targetTopics, competitorTopicsMap) {
121
+ const targetSet = new Set(targetTopics.map(t => t.toLowerCase()));
122
+
123
+ const gaps = []; // topics competitors have, target doesn't
124
+ const depthGaps = []; // topics target has but competitors go deeper
125
+
126
+ for (const [domain, topics] of competitorTopicsMap) {
127
+ for (const topic of topics) {
128
+ const lower = topic.toLowerCase();
129
+ // Fuzzy match — check if target covers this topic (substring match)
130
+ const covered = [...targetSet].some(t =>
131
+ t.includes(lower) || lower.includes(t) ||
132
+ (lower.split(' ').length > 1 && t.split(' ').some(w => lower.includes(w) && w.length > 4))
133
+ );
134
+
135
+ if (!covered) {
136
+ const existing = gaps.find(g => g.topic.toLowerCase() === lower);
137
+ if (existing) {
138
+ if (!existing.coveredBy.includes(domain)) existing.coveredBy.push(domain);
139
+ } else {
140
+ gaps.push({ topic, coveredBy: [domain] });
141
+ }
142
+ }
143
+ }
144
+ }
145
+
146
+ return { gaps, depthGaps };
147
+ }
148
+
149
+ // ── LLM gap prioritisation ──────────────────────────────────────────────────
150
+
151
+ async function prioritiseGaps(gaps, targetDomain, context, ollamaUrl, ollamaModel, log) {
152
+ if (!gaps.length) return [];
153
+
154
+ const gapList = gaps.slice(0, 40).map(g =>
155
+ `- ${g.topic} (covered by: ${g.coveredBy.join(', ')})`
156
+ ).join('\n');
157
+
158
+ const prompt = `Target site: ${targetDomain} (${context || 'business website'})
159
+ Topics competitors cover that the target project lacks:
160
+
161
+ ${gapList}
162
+
163
+ For each gap, return a markdown table row with these columns:
164
+ | Topic | Covered by | Buyer Intent | Page Type | Why It Matters |
165
+
166
+ Buyer Intent: high, medium, or low
167
+ Page Type: guide, reference, landing, blog, or comparison
168
+ Why It Matters: one sentence on SEO or sales impact
169
+
170
+ Return ONLY the markdown table rows (no header, no explanation).
171
+ Sort by buyer intent (high first).`;
172
+
173
+ try {
174
+ const res = await fetch(`${ollamaUrl}/api/generate`, {
175
+ method: 'POST',
176
+ headers: { 'Content-Type': 'application/json' },
177
+ body: JSON.stringify({
178
+ model: ollamaModel,
179
+ prompt,
180
+ stream: false,
181
+ options: { temperature: 0.2, num_ctx: 8192 },
182
+ }),
183
+ });
184
+
185
+ if (!res.ok) throw new Error(`Ollama ${res.status}`);
186
+ const data = await res.json();
187
+ return (data.response || '').split('\n').filter(l => l.trim().startsWith('|'));
188
+ } catch (e) {
189
+ log(` ⚠️ LLM prioritisation failed: ${e.message}`);
190
+ return null; // Fall back to raw output
191
+ }
192
+ }
193
+
194
+ // ── Generate report ─────────────────────────────────────────────────────────
195
+
196
+ function generateReport(data) {
197
+ const { targetDomain, competitorDomainNames, targetTopics, competitorTopicsMap, gaps, prioritisedRows, pageData } = data;
198
+ const ts = new Date().toISOString().slice(0, 10);
199
+
200
+ let md = `# Gap Intel Report — ${targetDomain} vs ${competitorDomainNames.join(', ')}\n`;
201
+ md += `Generated: ${ts} | Pages analyzed: ${targetDomain}(${pageData.target.length})`;
202
+ for (const [dom, pages] of pageData.competitors) {
203
+ md += ` ${dom}(${pages.length})`;
204
+ }
205
+ md += '\n\n';
206
+
207
+ // Prioritised gaps
208
+ if (prioritisedRows && prioritisedRows.length) {
209
+ const high = prioritisedRows.filter(r => r.toLowerCase().includes('high'));
210
+ const medium = prioritisedRows.filter(r => r.toLowerCase().includes('medium'));
211
+ const low = prioritisedRows.filter(r => !r.toLowerCase().includes('high') && !r.toLowerCase().includes('medium'));
212
+
213
+ if (high.length) {
214
+ md += `## 🔴 High Priority Gaps\n\n`;
215
+ md += `| Topic | Covered by | Buyer Intent | Page Type | Why It Matters |\n`;
216
+ md += `|-------|-----------|--------------|-----------|----------------|\n`;
217
+ md += high.join('\n') + '\n\n';
218
+ }
219
+ if (medium.length) {
220
+ md += `## 🟡 Medium Priority Gaps\n\n`;
221
+ md += `| Topic | Covered by | Buyer Intent | Page Type | Why It Matters |\n`;
222
+ md += `|-------|-----------|--------------|-----------|----------------|\n`;
223
+ md += medium.join('\n') + '\n\n';
224
+ }
225
+ if (low.length) {
226
+ md += `## 🟢 Lower Priority Gaps\n\n`;
227
+ md += `| Topic | Covered by | Buyer Intent | Page Type | Why It Matters |\n`;
228
+ md += `|-------|-----------|--------------|-----------|----------------|\n`;
229
+ md += low.join('\n') + '\n\n';
230
+ }
231
+ } else {
232
+ // Raw gaps (LLM failed or --raw mode)
233
+ if (gaps.length) {
234
+ md += `## Content Gaps\n\n`;
235
+ md += `| Topic | Covered by |\n`;
236
+ md += `|-------|-----------|\n`;
237
+ for (const g of gaps) {
238
+ md += `| ${g.topic} | ${g.coveredBy.join(', ')} |\n`;
239
+ }
240
+ md += '\n';
241
+ } else {
242
+ md += `> No significant gaps found — target covers all competitor topics.\n\n`;
243
+ }
244
+ }
245
+
246
+ // Raw topic matrix
247
+ md += `## Raw Topic Matrix\n\n`;
248
+ md += `### ${targetDomain} (${targetTopics.length} topics)\n`;
249
+ for (const t of targetTopics.slice(0, 50)) md += `- ${t}\n`;
250
+ if (targetTopics.length > 50) md += `- ... and ${targetTopics.length - 50} more\n`;
251
+ md += '\n';
252
+
253
+ for (const [dom, topics] of competitorTopicsMap) {
254
+ md += `### ${dom} (${topics.length} topics)\n`;
255
+ for (const t of topics.slice(0, 50)) md += `- ${t}\n`;
256
+ if (topics.length > 50) md += `- ... and ${topics.length - 50} more\n`;
257
+ md += '\n';
258
+ }
259
+
260
+ return md;
261
+ }
262
+
263
+ // ── Main entry point ─────────────────────────────────────────────────────────
264
+
265
+ /**
266
+ * Run gap-intel analysis.
267
+ *
268
+ * @param {import('node:sqlite').DatabaseSync} db
269
+ * @param {string} project
270
+ * @param {object} config - project config with context
271
+ * @param {object} opts
272
+ * @param {string[]} [opts.vs] - competitor domains to compare (default: all from config)
273
+ * @param {string} [opts.type] - page type filter: docs, blog, landing, all
274
+ * @param {number} [opts.limit] - max pages per domain
275
+ * @param {boolean} [opts.raw] - skip LLM prioritisation
276
+ * @param {string} [opts.ollamaUrl] - Ollama host
277
+ * @param {string} [opts.ollamaModel] - Ollama model
278
+ * @param {function} [opts.log] - logger function
279
+ * @returns {Promise<string>} markdown report
280
+ */
281
+ export async function runGapIntel(db, project, config, opts = {}) {
282
+ const log = opts.log || console.log;
283
+ const ollamaUrl = opts.ollamaUrl || process.env.OLLAMA_URL || 'http://localhost:11434';
284
+ const ollamaModel = opts.ollamaModel || process.env.OLLAMA_MODEL || 'gemma4:e4b';
285
+ const type = opts.type || 'all';
286
+ const limit = opts.limit || 100;
287
+ const raw = opts.raw || false;
288
+ const vsDomains = opts.vs || [];
289
+
290
+ log(' Loading pages from DB...');
291
+ const pageData = loadPages(db, project, { type, limit, vsDomains });
292
+
293
+ if (!pageData.target.length) {
294
+ return `# Gap Intel — ${project}\n\n> ⚠️ No pages with body_text found for target.\n> Run: seo-intel crawl ${project}\n`;
295
+ }
296
+
297
+ if (!pageData.competitors.size) {
298
+ return `# Gap Intel — ${project}\n\n> ⚠️ No competitor pages found in DB.\n> Check project config competitors and run: seo-intel crawl ${project}\n`;
299
+ }
300
+
301
+ log(` Target: ${pageData.targetDomain} (${pageData.target.length} pages)`);
302
+ for (const [dom, pages] of pageData.competitors) {
303
+ log(` Competitor: ${dom} (${pages.length} pages)`);
304
+ }
305
+
306
+ // Step 2 — Extract topics
307
+ log('\n Extracting topics via LLM...');
308
+ const targetTopics = await extractTopics(pageData.target, pageData.targetDomain, ollamaUrl, ollamaModel, log);
309
+
310
+ const competitorTopicsMap = new Map();
311
+ for (const [dom, pages] of pageData.competitors) {
312
+ const topics = await extractTopics(pages, dom, ollamaUrl, ollamaModel, log);
313
+ competitorTopicsMap.set(dom, topics);
314
+ }
315
+
316
+ // Step 3 — Compare coverage
317
+ log('\n Comparing topic coverage...');
318
+ const { gaps } = compareTopics(targetTopics, competitorTopicsMap);
319
+ log(` Found ${gaps.length} topic gaps`);
320
+
321
+ // Step 4 — LLM prioritisation (unless --raw)
322
+ let prioritisedRows = null;
323
+ if (!raw && gaps.length) {
324
+ log('\n Prioritising gaps via LLM...');
325
+ const context = config?.context?.industry || config?.context?.goal || '';
326
+ prioritisedRows = await prioritiseGaps(gaps, pageData.targetDomain, context, ollamaUrl, ollamaModel, log);
327
+ }
328
+
329
+ // Step 5 — Generate report
330
+ return generateReport({
331
+ targetDomain: pageData.targetDomain,
332
+ competitorDomainNames: [...pageData.competitors.keys()],
333
+ targetTopics,
334
+ competitorTopicsMap,
335
+ gaps,
336
+ prioritisedRows,
337
+ pageData,
338
+ });
339
+ }