seo-intel 1.2.5 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,34 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.3.0 (2026-04-01)
4
+
5
+ ### New Feature: AEO Blog Draft Generator
6
+ - `seo-intel blog-draft <project>` — generate AEO-optimised blog post drafts from Intelligence Ledger data
7
+ - Gathers keyword gaps, long-tails, citability insights, entities, and top citable pages
8
+ - Builds structured prompt with 10 AEO signal rules for maximum AI citability
9
+ - Pre-scores generated draft against AEO signals before publishing
10
+ - Options: `--topic`, `--lang en|fi`, `--model gemini|claude|gpt|deepseek`, `--save`
11
+ - Pro feature gated via Lemon Squeezy license
12
+
13
+ ### Dashboard
14
+ - New "Create" section in export sidebar with interactive draft generator
15
+ - "Create a Draft" dropdown: select type (Blog Post / Documentation), topic, language, then generate
16
+ - "AI Citability Audit" button added to export sidebar — run AEO from dashboard
17
+ - Both `aeo` and `blog-draft` commands now available via dashboard terminal
18
+
19
+ ### Server
20
+ - Added `aeo` and `blog-draft` to terminal command whitelist
21
+ - Forward `--topic`, `--lang`, `--model`, `--save` params from dashboard to CLI
22
+
23
+ ## 1.2.6 (2026-03-31)
24
+
25
+ ### Critical Fix
26
+ - **Ship analysis, extraction, and AEO modules in npm package** — these were gitignored as "proprietary" from the Froggo era but are required for `extract`, `analyze`, `aeo`, `templates`, and dashboard generation
27
+ - npm users can now run the full pipeline without missing module errors
28
+ - Files added to git: `analyses/aeo/`, `analyses/templates/`, `analysis/`, `extractor/`
29
+ - Removed stale "NOT shipped in free npm package" comment from cli.js
30
+ - Deleted local `froggo-package/` directory
31
+
3
32
  ## 1.2.5 (2026-03-31)
4
33
 
5
34
  ### Skill / OpenClaw
@@ -0,0 +1,252 @@
1
+ /**
2
+ * AEO / AI Citability Analysis — Orchestrator
3
+ *
4
+ * Reads crawled pages from DB, scores each for AI citability,
5
+ * stores results, and optionally feeds low-scoring pages into the Intelligence Ledger.
6
+ */
7
+
8
+ import { scorePage } from './scorer.js';
9
+
10
+ /**
11
+ * Run AEO analysis for a project.
12
+ *
13
+ * @param {import('node:sqlite').DatabaseSync} db
14
+ * @param {string} project
15
+ * @param {object} opts - { includeCompetitors: boolean, log: function }
16
+ * @returns {object} { target: PageScore[], competitors: Map<domain, PageScore[]>, summary }
17
+ */
18
+ export function runAeoAnalysis(db, project, opts = {}) {
19
+ const log = opts.log || console.log;
20
+ const includeCompetitors = opts.includeCompetitors ?? true;
21
+
22
+ // ── Gather pages with body_text ─────────────────────────────────────────
23
+ const roleFilter = includeCompetitors
24
+ ? ''
25
+ : `AND d.role IN ('target', 'owned')`;
26
+
27
+ const pages = db.prepare(`
28
+ SELECT
29
+ p.id, p.url, p.title, p.body_text, p.word_count,
30
+ p.published_date, p.modified_date,
31
+ d.domain, d.role,
32
+ e.primary_entities, e.search_intent, e.schema_types
33
+ FROM pages p
34
+ JOIN domains d ON d.id = p.domain_id
35
+ LEFT JOIN extractions e ON e.page_id = p.id
36
+ WHERE d.project = ?
37
+ AND p.body_text IS NOT NULL AND p.body_text != ''
38
+ AND p.is_indexable = 1
39
+ ${roleFilter}
40
+ ORDER BY d.role ASC, p.url ASC
41
+ `).all(project);
42
+
43
+ if (!pages.length) {
44
+ return { target: [], competitors: new Map(), summary: null };
45
+ }
46
+
47
+ // ── Gather headings + schemas per page ──────────────────────────────────
48
+ const headingsStmt = db.prepare(
49
+ 'SELECT level, text FROM headings WHERE page_id = ? ORDER BY id'
50
+ );
51
+ const schemasStmt = db.prepare(
52
+ 'SELECT schema_type, date_published, date_modified FROM page_schemas WHERE page_id = ?'
53
+ );
54
+
55
+ // ── Score each page ─────────────────────────────────────────────────────
56
+ const targetResults = [];
57
+ const competitorResults = new Map();
58
+ let scored = 0;
59
+
60
+ for (const page of pages) {
61
+ const headings = headingsStmt.all(page.id);
62
+ const pageSchemas = schemasStmt.all(page.id);
63
+ const schemaTypes = pageSchemas.map(s => s.schema_type);
64
+
65
+ // Also merge extraction schema_types if page_schemas is empty
66
+ if (!schemaTypes.length && page.schema_types) {
67
+ try {
68
+ const ext = JSON.parse(page.schema_types);
69
+ if (Array.isArray(ext)) schemaTypes.push(...ext);
70
+ } catch { /* ignore */ }
71
+ }
72
+
73
+ let entities = [];
74
+ try {
75
+ entities = JSON.parse(page.primary_entities || '[]');
76
+ } catch { /* ignore */ }
77
+
78
+ const result = scorePage(
79
+ page, headings, entities, schemaTypes, pageSchemas, page.search_intent
80
+ );
81
+
82
+ const pageScore = {
83
+ pageId: page.id,
84
+ url: page.url,
85
+ title: page.title,
86
+ domain: page.domain,
87
+ role: page.role,
88
+ wordCount: page.word_count,
89
+ ...result,
90
+ };
91
+
92
+ if (page.role === 'target' || page.role === 'owned') {
93
+ targetResults.push(pageScore);
94
+ } else {
95
+ if (!competitorResults.has(page.domain)) competitorResults.set(page.domain, []);
96
+ competitorResults.get(page.domain).push(pageScore);
97
+ }
98
+
99
+ scored++;
100
+ }
101
+
102
+ // Sort by score ascending (worst first — actionable)
103
+ targetResults.sort((a, b) => a.score - b.score);
104
+ for (const [, arr] of competitorResults) arr.sort((a, b) => a.score - b.score);
105
+
106
+ // ── Summary stats ────────────────────────────────────────────────────────
107
+ const targetScores = targetResults.map(r => r.score);
108
+ const avgTarget = targetScores.length
109
+ ? Math.round(targetScores.reduce((a, b) => a + b, 0) / targetScores.length)
110
+ : 0;
111
+
112
+ const compScores = [...competitorResults.values()].flat().map(r => r.score);
113
+ const avgComp = compScores.length
114
+ ? Math.round(compScores.reduce((a, b) => a + b, 0) / compScores.length)
115
+ : 0;
116
+
117
+ const tierCounts = { excellent: 0, good: 0, needs_work: 0, poor: 0 };
118
+ for (const r of targetResults) tierCounts[r.tier]++;
119
+
120
+ const summary = {
121
+ totalScored: scored,
122
+ targetPages: targetResults.length,
123
+ competitorPages: compScores.length,
124
+ avgTargetScore: avgTarget,
125
+ avgCompetitorScore: avgComp,
126
+ scoreDelta: avgTarget - avgComp,
127
+ tierCounts,
128
+ weakestSignals: getWeakestSignals(targetResults),
129
+ };
130
+
131
+ log(` Scored ${scored} pages (${targetResults.length} target, ${compScores.length} competitor)`);
132
+ log(` Target avg: ${avgTarget}/100 | Competitor avg: ${avgComp}/100 | Delta: ${summary.scoreDelta > 0 ? '+' : ''}${summary.scoreDelta}`);
133
+
134
+ return { target: targetResults, competitors: competitorResults, summary };
135
+ }
136
+
137
+ /**
138
+ * Persist AEO scores to citability_scores table
139
+ */
140
+ export function persistAeoScores(db, results) {
141
+ const stmt = db.prepare(`
142
+ INSERT OR REPLACE INTO citability_scores
143
+ (page_id, score, entity_authority, structured_claims, answer_density,
144
+ qa_proximity, freshness, schema_coverage, ai_intents, tier, scored_at)
145
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
146
+ `);
147
+
148
+ const allResults = [
149
+ ...results.target,
150
+ ...[...results.competitors.values()].flat(),
151
+ ];
152
+
153
+ db.exec('BEGIN');
154
+ try {
155
+ for (const r of allResults) {
156
+ stmt.run(
157
+ r.pageId, r.score,
158
+ r.breakdown.entity_authority, r.breakdown.structured_claims,
159
+ r.breakdown.answer_density, r.breakdown.qa_proximity,
160
+ r.breakdown.freshness, r.breakdown.schema_coverage,
161
+ JSON.stringify(r.aiIntents), r.tier, Date.now()
162
+ );
163
+ }
164
+ db.exec('COMMIT');
165
+ } catch (e) {
166
+ db.exec('ROLLBACK');
167
+ throw e;
168
+ }
169
+ }
170
+
171
+ /**
172
+ * Feed low-scoring pages into Intelligence Ledger as citability_gap insights
173
+ */
174
+ export function upsertCitabilityInsights(db, project, targetResults) {
175
+ const upsertStmt = db.prepare(`
176
+ INSERT INTO insights (project, type, status, fingerprint, first_seen, last_seen, source_analysis_id, data)
177
+ VALUES (?, 'citability_gap', 'active', ?, ?, ?, NULL, ?)
178
+ ON CONFLICT(project, type, fingerprint) DO UPDATE SET
179
+ last_seen = excluded.last_seen,
180
+ data = excluded.data
181
+ `);
182
+
183
+ const ts = Date.now();
184
+ db.exec('BEGIN');
185
+ try {
186
+ for (const r of targetResults) {
187
+ if (r.score >= 60) continue; // only flag pages that need work
188
+
189
+ const fp = r.url.toLowerCase().replace(/[^a-z0-9/]/g, '').trim();
190
+ const weakest = Object.entries(r.breakdown)
191
+ .sort(([, a], [, b]) => a - b)
192
+ .slice(0, 2)
193
+ .map(([k]) => k.replace(/_/g, ' '));
194
+
195
+ const data = {
196
+ url: r.url,
197
+ title: r.title,
198
+ score: r.score,
199
+ tier: r.tier,
200
+ weakest_signals: weakest,
201
+ ai_intents: r.aiIntents,
202
+ recommendation: `Improve ${weakest.join(' and ')} to boost AI citability from ${r.score}/100`,
203
+ };
204
+
205
+ upsertStmt.run(project, fp, ts, ts, JSON.stringify(data));
206
+ }
207
+ db.exec('COMMIT');
208
+ } catch (e) {
209
+ db.exec('ROLLBACK');
210
+ console.error('[aeo] insight upsert failed:', e.message);
211
+ }
212
+ }
213
+
214
+ // ── Helpers ────────────────────────────────────────────────────────────────
215
+
216
+ function getWeakestSignals(targetResults) {
217
+ if (!targetResults.length) return [];
218
+
219
+ const signalTotals = {
220
+ entity_authority: 0, structured_claims: 0, answer_density: 0,
221
+ qa_proximity: 0, freshness: 0, schema_coverage: 0,
222
+ };
223
+
224
+ for (const r of targetResults) {
225
+ for (const [k, v] of Object.entries(r.breakdown)) {
226
+ signalTotals[k] += v;
227
+ }
228
+ }
229
+
230
+ return Object.entries(signalTotals)
231
+ .map(([signal, total]) => ({
232
+ signal: signal.replace(/_/g, ' '),
233
+ avg: Math.round(total / targetResults.length),
234
+ }))
235
+ .sort((a, b) => a.avg - b.avg);
236
+ }
237
+
238
+ /**
239
+ * Read stored citability scores for dashboard
240
+ */
241
+ export function getCitabilityScores(db, project) {
242
+ return db.prepare(`
243
+ SELECT
244
+ cs.*, p.url, p.title, p.word_count,
245
+ d.domain, d.role
246
+ FROM citability_scores cs
247
+ JOIN pages p ON p.id = cs.page_id
248
+ JOIN domains d ON d.id = p.domain_id
249
+ WHERE d.project = ?
250
+ ORDER BY d.role ASC, cs.score ASC
251
+ `).all(project);
252
+ }
@@ -0,0 +1,254 @@
1
+ /**
2
+ * AEO Citability Scorer — pure function, zero I/O
3
+ *
4
+ * Scores a page for how well an AI assistant could cite it as a source.
5
+ * All inputs are plain objects from the DB; output is a score breakdown.
6
+ */
7
+
8
+ // ── Question patterns in headings ──────────────────────────────────────────
9
+ const QUESTION_RE = /^(what|how|why|when|where|which|who|can|does|is|are|should|do)\b/i;
10
+ const COMPARISON_RE = /\bvs\.?\b|\bversus\b|\bcompare[d]?\b|\bcomparison\b|\balternative/i;
11
+ const IMPL_RE = /\bhow to\b|\bstep[- ]by[- ]step\b|\btutorial\b|\bguide\b|\bsetup\b|\binstall/i;
12
+
13
+ // ── Freshness scoring ──────────────────────────────────────────────────────
14
+ function freshnessScore(page, schemas) {
15
+ // Best signal: dateModified in schema
16
+ const schemaDate = schemas.find(s => s.date_modified)?.date_modified
17
+ || schemas.find(s => s.date_published)?.date_published;
18
+ const pageDate = page.modified_date || page.published_date;
19
+ const dateStr = schemaDate || pageDate;
20
+
21
+ if (!dateStr) return 0;
22
+
23
+ const d = new Date(dateStr);
24
+ if (isNaN(d.getTime())) return 0;
25
+
26
+ const ageMs = Date.now() - d.getTime();
27
+ const ageDays = ageMs / (1000 * 60 * 60 * 24);
28
+
29
+ if (ageDays < 90) return 100; // < 3 months
30
+ if (ageDays < 180) return 80; // < 6 months
31
+ if (ageDays < 365) return 60; // < 1 year
32
+ if (ageDays < 730) return 30; // < 2 years
33
+ return 10; // 2+ years
34
+ }
35
+
36
+ // ── Entity authority ───────────────────────────────────────────────────────
37
+ function entityAuthorityScore(entities, headings, wordCount) {
38
+ if (!entities.length) return 0;
39
+
40
+ let score = 0;
41
+
42
+ // More entities = deeper coverage
43
+ if (entities.length >= 5) score += 30;
44
+ else if (entities.length >= 3) score += 20;
45
+ else score += 10;
46
+
47
+ // Entities appearing in headings = stronger authority signal
48
+ const headingTexts = headings.map(h => h.text.toLowerCase());
49
+ const entityInHeading = entities.filter(e =>
50
+ headingTexts.some(ht => ht.includes(e.toLowerCase()))
51
+ ).length;
52
+
53
+ score += Math.min(entityInHeading * 15, 40);
54
+
55
+ // Word count indicates depth of coverage
56
+ if (wordCount >= 2000) score += 30;
57
+ else if (wordCount >= 1000) score += 20;
58
+ else if (wordCount >= 500) score += 10;
59
+
60
+ return Math.min(score, 100);
61
+ }
62
+
63
+ // ── Structured claims ──────────────────────────────────────────────────────
64
+ function structuredClaimsScore(bodyText, headings) {
65
+ if (!bodyText) return 0;
66
+
67
+ let score = 0;
68
+ const sentences = bodyText.split(/[.!?]+/).filter(s => s.trim().length > 20);
69
+ if (!sentences.length) return 0;
70
+
71
+ // "X is Y" definitional patterns — highly citable
72
+ const definitional = sentences.filter(s =>
73
+ /\b(?:is|are|means|refers to|defined as|consists of)\b/i.test(s)
74
+ ).length;
75
+ score += Math.min((definitional / sentences.length) * 200, 40);
76
+
77
+ // Numbered/bulleted patterns in body (listicle structure)
78
+ const listPatterns = (bodyText.match(/(?:^|\n)\s*(?:\d+[.)]\s|[-•]\s)/gm) || []).length;
79
+ if (listPatterns >= 5) score += 25;
80
+ else if (listPatterns >= 3) score += 15;
81
+
82
+ // Comparison patterns
83
+ if (COMPARISON_RE.test(bodyText)) score += 15;
84
+
85
+ // Step-by-step / how-to patterns
86
+ if (IMPL_RE.test(bodyText)) score += 20;
87
+
88
+ return Math.min(score, 100);
89
+ }
90
+
91
+ // ── Answer density ─────────────────────────────────────────────────────────
92
+ function answerDensityScore(bodyText, wordCount) {
93
+ if (!bodyText || wordCount < 100) return 0;
94
+
95
+ let score = 0;
96
+
97
+ // Short paragraphs = more scannable = better for AI extraction
98
+ const paragraphs = bodyText.split(/\n\s*\n/).filter(p => p.trim().length > 0);
99
+ if (!paragraphs.length) return 10;
100
+
101
+ const avgParaLength = wordCount / paragraphs.length;
102
+ if (avgParaLength <= 80) score += 30; // concise
103
+ else if (avgParaLength <= 150) score += 20; // moderate
104
+ else score += 5; // wall of text
105
+
106
+ // First 200 words contain a direct answer? (inverted pyramid)
107
+ const first200 = bodyText.split(/\s+/).slice(0, 200).join(' ');
108
+ if (/\b(?:is|are|means|provides?|offers?|enables?|allows?)\b/i.test(first200)) {
109
+ score += 25;
110
+ }
111
+
112
+ // Ratio of informational content (not just navigation/boilerplate)
113
+ if (wordCount >= 300 && wordCount <= 3000) score += 25;
114
+ else if (wordCount > 3000) score += 15; // very long can dilute
115
+ else score += 10; // too short to cite well
116
+
117
+ // Code blocks are highly citable for technical content
118
+ const codeBlocks = (bodyText.match(/```[\s\S]*?```|`[^`]+`/g) || []).length;
119
+ if (codeBlocks >= 3) score += 20;
120
+ else if (codeBlocks >= 1) score += 10;
121
+
122
+ return Math.min(score, 100);
123
+ }
124
+
125
+ // ── Q&A proximity ──────────────────────────────────────────────────────────
126
+ function qaProximityScore(headings, bodyText) {
127
+ if (!headings.length || !bodyText) return 0;
128
+
129
+ const questionHeadings = headings.filter(h =>
130
+ h.level >= 2 && h.level <= 3 && QUESTION_RE.test(h.text)
131
+ );
132
+
133
+ if (!questionHeadings.length) return 10; // no Q&A structure at all
134
+
135
+ let score = 0;
136
+
137
+ // More question headings = better Q&A structure
138
+ const qRatio = questionHeadings.length / headings.filter(h => h.level >= 2).length;
139
+ score += Math.min(qRatio * 60, 40);
140
+
141
+ // FAQ schema present? Huge bonus
142
+ score += 30;
143
+
144
+ // Heading density (one H2/H3 per ~300 words is ideal)
145
+ const h2h3Count = headings.filter(h => h.level >= 2 && h.level <= 3).length;
146
+ const words = bodyText.split(/\s+/).length;
147
+ const idealHeadings = Math.floor(words / 300);
148
+ const headingRatio = idealHeadings > 0 ? Math.min(h2h3Count / idealHeadings, 2) : 0;
149
+ if (headingRatio >= 0.7 && headingRatio <= 1.5) score += 30;
150
+ else if (headingRatio >= 0.4) score += 15;
151
+
152
+ return Math.min(score, 100);
153
+ }
154
+
155
+ // ── Schema coverage ────────────────────────────────────────────────────────
156
+ function schemaCoverageScore(schemaTypes) {
157
+ if (!schemaTypes.length) return 0;
158
+
159
+ let score = 0;
160
+
161
+ // High-value schema types for AI citation
162
+ const highValue = ['FAQPage', 'HowTo', 'Article', 'TechArticle', 'BlogPosting'];
163
+ const medValue = ['Product', 'Review', 'SoftwareApplication', 'WebApplication'];
164
+ const baseValue = ['Organization', 'WebSite', 'WebPage', 'BreadcrumbList'];
165
+
166
+ for (const t of schemaTypes) {
167
+ if (highValue.includes(t)) score += 30;
168
+ else if (medValue.includes(t)) score += 15;
169
+ else if (baseValue.includes(t)) score += 5;
170
+ }
171
+
172
+ // Multiple schema types = richer structured data
173
+ if (schemaTypes.length >= 3) score += 20;
174
+
175
+ return Math.min(score, 100);
176
+ }
177
+
178
+ // ── AI Query Intent Classification ─────────────────────────────────────────
179
+ function classifyAiIntent(headings, bodyText, searchIntent) {
180
+ const allText = [
181
+ ...headings.map(h => h.text),
182
+ (bodyText || '').slice(0, 2000)
183
+ ].join(' ').toLowerCase();
184
+
185
+ const intents = [];
186
+
187
+ if (COMPARISON_RE.test(allText)) intents.push('synthesis');
188
+ if (/\bshould\b|\brecommend|\bbest\b.*\bfor\b|\bchoose\b/i.test(allText)) intents.push('decision_support');
189
+ if (IMPL_RE.test(allText)) intents.push('implementation');
190
+ if (/\bwhat (is|are)\b|\boverview\b|\bintroduc/i.test(allText)) intents.push('exploration');
191
+ if (/\bbest practice|\bshould you\b|\bis it worth/i.test(allText)) intents.push('validation');
192
+
193
+ // Fallback from extraction intent
194
+ if (!intents.length) {
195
+ if (searchIntent === 'Informational') intents.push('exploration');
196
+ else if (searchIntent === 'Commercial') intents.push('decision_support');
197
+ else if (searchIntent === 'Transactional') intents.push('implementation');
198
+ else intents.push('exploration');
199
+ }
200
+
201
+ return intents;
202
+ }
203
+
204
+ // ── Main scorer ────────────────────────────────────────────────────────────
205
+
206
+ /**
207
+ * Score a single page for AI citability.
208
+ *
209
+ * @param {object} page - { url, title, body_text, word_count, published_date, modified_date }
210
+ * @param {object[]} headings - [{ level, text }]
211
+ * @param {string[]} entities - primary_entities array
212
+ * @param {string[]} schemaTypes - schema type strings present on page
213
+ * @param {object[]} schemas - full page_schemas rows
214
+ * @param {string} searchIntent - from extraction
215
+ * @returns {object} { score, breakdown, aiIntents, tier }
216
+ */
217
+ export function scorePage(page, headings, entities, schemaTypes, schemas, searchIntent) {
218
+ const bodyText = page.body_text || '';
219
+ const wordCount = page.word_count || bodyText.split(/\s+/).length;
220
+
221
+ const breakdown = {
222
+ entity_authority: entityAuthorityScore(entities, headings, wordCount),
223
+ structured_claims: structuredClaimsScore(bodyText, headings),
224
+ answer_density: answerDensityScore(bodyText, wordCount),
225
+ qa_proximity: qaProximityScore(headings, bodyText),
226
+ freshness: freshnessScore(page, schemas),
227
+ schema_coverage: schemaCoverageScore(schemaTypes),
228
+ };
229
+
230
+ // Weighted composite — entity authority and structured claims matter most for AI
231
+ const weights = {
232
+ entity_authority: 0.25,
233
+ structured_claims: 0.20,
234
+ answer_density: 0.20,
235
+ qa_proximity: 0.15,
236
+ freshness: 0.10,
237
+ schema_coverage: 0.10,
238
+ };
239
+
240
+ const score = Math.round(
241
+ Object.entries(weights).reduce((sum, [k, w]) => sum + breakdown[k] * w, 0)
242
+ );
243
+
244
+ const aiIntents = classifyAiIntent(headings, bodyText, searchIntent);
245
+
246
+ // Tier classification
247
+ let tier;
248
+ if (score >= 75) tier = 'excellent';
249
+ else if (score >= 55) tier = 'good';
250
+ else if (score >= 35) tier = 'needs_work';
251
+ else tier = 'poor';
252
+
253
+ return { score, breakdown, aiIntents, tier };
254
+ }