seo-intel 1.2.5 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/analyses/aeo/index.js +252 -0
- package/analyses/aeo/scorer.js +254 -0
- package/analyses/blog-draft/index.js +227 -0
- package/analyses/blog-draft/prescorer.js +60 -0
- package/analyses/templates/cluster.js +209 -0
- package/analyses/templates/gsc-overlay.js +93 -0
- package/analyses/templates/index.js +425 -0
- package/analyses/templates/sampler.js +198 -0
- package/analyses/templates/scorer.js +149 -0
- package/analyses/templates/similarity.js +174 -0
- package/analysis/prompt-builder.js +272 -0
- package/analysis/topic-cluster-mapper.js +427 -0
- package/cli.js +124 -1
- package/extractor/qwen.js +558 -0
- package/lib/gate.js +1 -0
- package/package.json +4 -1
- package/reports/generate-html.js +183 -0
- package/server.js +6 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,34 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.3.0 (2026-04-01)
|
|
4
|
+
|
|
5
|
+
### New Feature: AEO Blog Draft Generator
|
|
6
|
+
- `seo-intel blog-draft <project>` — generate AEO-optimised blog post drafts from Intelligence Ledger data
|
|
7
|
+
- Gathers keyword gaps, long-tails, citability insights, entities, and top citable pages
|
|
8
|
+
- Builds structured prompt with 10 AEO signal rules for maximum AI citability
|
|
9
|
+
- Pre-scores generated draft against AEO signals before publishing
|
|
10
|
+
- Options: `--topic`, `--lang en|fi`, `--model gemini|claude|gpt|deepseek`, `--save`
|
|
11
|
+
- Pro feature gated via Lemon Squeezy license
|
|
12
|
+
|
|
13
|
+
### Dashboard
|
|
14
|
+
- New "Create" section in export sidebar with interactive draft generator
|
|
15
|
+
- "Create a Draft" dropdown: select type (Blog Post / Documentation), topic, language, then generate
|
|
16
|
+
- "AI Citability Audit" button added to export sidebar — run AEO from dashboard
|
|
17
|
+
- Both `aeo` and `blog-draft` commands now available via dashboard terminal
|
|
18
|
+
|
|
19
|
+
### Server
|
|
20
|
+
- Added `aeo` and `blog-draft` to terminal command whitelist
|
|
21
|
+
- Forward `--topic`, `--lang`, `--model`, `--save` params from dashboard to CLI
|
|
22
|
+
|
|
23
|
+
## 1.2.6 (2026-03-31)
|
|
24
|
+
|
|
25
|
+
### Critical Fix
|
|
26
|
+
- **Ship analysis, extraction, and AEO modules in npm package** — these were gitignored as "proprietary" from the Froggo era but are required for `extract`, `analyze`, `aeo`, `templates`, and dashboard generation
|
|
27
|
+
- npm users can now run the full pipeline without missing module errors
|
|
28
|
+
- Files added to git: `analyses/aeo/`, `analyses/templates/`, `analysis/`, `extractor/`
|
|
29
|
+
- Removed stale "NOT shipped in free npm package" comment from cli.js
|
|
30
|
+
- Deleted local `froggo-package/` directory
|
|
31
|
+
|
|
3
32
|
## 1.2.5 (2026-03-31)
|
|
4
33
|
|
|
5
34
|
### Skill / OpenClaw
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AEO / AI Citability Analysis — Orchestrator
|
|
3
|
+
*
|
|
4
|
+
* Reads crawled pages from DB, scores each for AI citability,
|
|
5
|
+
* stores results, and optionally feeds low-scoring pages into the Intelligence Ledger.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { scorePage } from './scorer.js';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Run AEO analysis for a project.
|
|
12
|
+
*
|
|
13
|
+
* @param {import('node:sqlite').DatabaseSync} db
|
|
14
|
+
* @param {string} project
|
|
15
|
+
* @param {object} opts - { includeCompetitors: boolean, log: function }
|
|
16
|
+
* @returns {object} { target: PageScore[], competitors: Map<domain, PageScore[]>, summary }
|
|
17
|
+
*/
|
|
18
|
+
export function runAeoAnalysis(db, project, opts = {}) {
|
|
19
|
+
const log = opts.log || console.log;
|
|
20
|
+
const includeCompetitors = opts.includeCompetitors ?? true;
|
|
21
|
+
|
|
22
|
+
// ── Gather pages with body_text ─────────────────────────────────────────
|
|
23
|
+
const roleFilter = includeCompetitors
|
|
24
|
+
? ''
|
|
25
|
+
: `AND d.role IN ('target', 'owned')`;
|
|
26
|
+
|
|
27
|
+
const pages = db.prepare(`
|
|
28
|
+
SELECT
|
|
29
|
+
p.id, p.url, p.title, p.body_text, p.word_count,
|
|
30
|
+
p.published_date, p.modified_date,
|
|
31
|
+
d.domain, d.role,
|
|
32
|
+
e.primary_entities, e.search_intent, e.schema_types
|
|
33
|
+
FROM pages p
|
|
34
|
+
JOIN domains d ON d.id = p.domain_id
|
|
35
|
+
LEFT JOIN extractions e ON e.page_id = p.id
|
|
36
|
+
WHERE d.project = ?
|
|
37
|
+
AND p.body_text IS NOT NULL AND p.body_text != ''
|
|
38
|
+
AND p.is_indexable = 1
|
|
39
|
+
${roleFilter}
|
|
40
|
+
ORDER BY d.role ASC, p.url ASC
|
|
41
|
+
`).all(project);
|
|
42
|
+
|
|
43
|
+
if (!pages.length) {
|
|
44
|
+
return { target: [], competitors: new Map(), summary: null };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ── Gather headings + schemas per page ──────────────────────────────────
|
|
48
|
+
const headingsStmt = db.prepare(
|
|
49
|
+
'SELECT level, text FROM headings WHERE page_id = ? ORDER BY id'
|
|
50
|
+
);
|
|
51
|
+
const schemasStmt = db.prepare(
|
|
52
|
+
'SELECT schema_type, date_published, date_modified FROM page_schemas WHERE page_id = ?'
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
// ── Score each page ─────────────────────────────────────────────────────
|
|
56
|
+
const targetResults = [];
|
|
57
|
+
const competitorResults = new Map();
|
|
58
|
+
let scored = 0;
|
|
59
|
+
|
|
60
|
+
for (const page of pages) {
|
|
61
|
+
const headings = headingsStmt.all(page.id);
|
|
62
|
+
const pageSchemas = schemasStmt.all(page.id);
|
|
63
|
+
const schemaTypes = pageSchemas.map(s => s.schema_type);
|
|
64
|
+
|
|
65
|
+
// Also merge extraction schema_types if page_schemas is empty
|
|
66
|
+
if (!schemaTypes.length && page.schema_types) {
|
|
67
|
+
try {
|
|
68
|
+
const ext = JSON.parse(page.schema_types);
|
|
69
|
+
if (Array.isArray(ext)) schemaTypes.push(...ext);
|
|
70
|
+
} catch { /* ignore */ }
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
let entities = [];
|
|
74
|
+
try {
|
|
75
|
+
entities = JSON.parse(page.primary_entities || '[]');
|
|
76
|
+
} catch { /* ignore */ }
|
|
77
|
+
|
|
78
|
+
const result = scorePage(
|
|
79
|
+
page, headings, entities, schemaTypes, pageSchemas, page.search_intent
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
const pageScore = {
|
|
83
|
+
pageId: page.id,
|
|
84
|
+
url: page.url,
|
|
85
|
+
title: page.title,
|
|
86
|
+
domain: page.domain,
|
|
87
|
+
role: page.role,
|
|
88
|
+
wordCount: page.word_count,
|
|
89
|
+
...result,
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
if (page.role === 'target' || page.role === 'owned') {
|
|
93
|
+
targetResults.push(pageScore);
|
|
94
|
+
} else {
|
|
95
|
+
if (!competitorResults.has(page.domain)) competitorResults.set(page.domain, []);
|
|
96
|
+
competitorResults.get(page.domain).push(pageScore);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
scored++;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Sort by score ascending (worst first — actionable)
|
|
103
|
+
targetResults.sort((a, b) => a.score - b.score);
|
|
104
|
+
for (const [, arr] of competitorResults) arr.sort((a, b) => a.score - b.score);
|
|
105
|
+
|
|
106
|
+
// ── Summary stats ────────────────────────────────────────────────────────
|
|
107
|
+
const targetScores = targetResults.map(r => r.score);
|
|
108
|
+
const avgTarget = targetScores.length
|
|
109
|
+
? Math.round(targetScores.reduce((a, b) => a + b, 0) / targetScores.length)
|
|
110
|
+
: 0;
|
|
111
|
+
|
|
112
|
+
const compScores = [...competitorResults.values()].flat().map(r => r.score);
|
|
113
|
+
const avgComp = compScores.length
|
|
114
|
+
? Math.round(compScores.reduce((a, b) => a + b, 0) / compScores.length)
|
|
115
|
+
: 0;
|
|
116
|
+
|
|
117
|
+
const tierCounts = { excellent: 0, good: 0, needs_work: 0, poor: 0 };
|
|
118
|
+
for (const r of targetResults) tierCounts[r.tier]++;
|
|
119
|
+
|
|
120
|
+
const summary = {
|
|
121
|
+
totalScored: scored,
|
|
122
|
+
targetPages: targetResults.length,
|
|
123
|
+
competitorPages: compScores.length,
|
|
124
|
+
avgTargetScore: avgTarget,
|
|
125
|
+
avgCompetitorScore: avgComp,
|
|
126
|
+
scoreDelta: avgTarget - avgComp,
|
|
127
|
+
tierCounts,
|
|
128
|
+
weakestSignals: getWeakestSignals(targetResults),
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
log(` Scored ${scored} pages (${targetResults.length} target, ${compScores.length} competitor)`);
|
|
132
|
+
log(` Target avg: ${avgTarget}/100 | Competitor avg: ${avgComp}/100 | Delta: ${summary.scoreDelta > 0 ? '+' : ''}${summary.scoreDelta}`);
|
|
133
|
+
|
|
134
|
+
return { target: targetResults, competitors: competitorResults, summary };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Persist AEO scores to citability_scores table
|
|
139
|
+
*/
|
|
140
|
+
export function persistAeoScores(db, results) {
|
|
141
|
+
const stmt = db.prepare(`
|
|
142
|
+
INSERT OR REPLACE INTO citability_scores
|
|
143
|
+
(page_id, score, entity_authority, structured_claims, answer_density,
|
|
144
|
+
qa_proximity, freshness, schema_coverage, ai_intents, tier, scored_at)
|
|
145
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
146
|
+
`);
|
|
147
|
+
|
|
148
|
+
const allResults = [
|
|
149
|
+
...results.target,
|
|
150
|
+
...[...results.competitors.values()].flat(),
|
|
151
|
+
];
|
|
152
|
+
|
|
153
|
+
db.exec('BEGIN');
|
|
154
|
+
try {
|
|
155
|
+
for (const r of allResults) {
|
|
156
|
+
stmt.run(
|
|
157
|
+
r.pageId, r.score,
|
|
158
|
+
r.breakdown.entity_authority, r.breakdown.structured_claims,
|
|
159
|
+
r.breakdown.answer_density, r.breakdown.qa_proximity,
|
|
160
|
+
r.breakdown.freshness, r.breakdown.schema_coverage,
|
|
161
|
+
JSON.stringify(r.aiIntents), r.tier, Date.now()
|
|
162
|
+
);
|
|
163
|
+
}
|
|
164
|
+
db.exec('COMMIT');
|
|
165
|
+
} catch (e) {
|
|
166
|
+
db.exec('ROLLBACK');
|
|
167
|
+
throw e;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Feed low-scoring pages into Intelligence Ledger as citability_gap insights
|
|
173
|
+
*/
|
|
174
|
+
export function upsertCitabilityInsights(db, project, targetResults) {
|
|
175
|
+
const upsertStmt = db.prepare(`
|
|
176
|
+
INSERT INTO insights (project, type, status, fingerprint, first_seen, last_seen, source_analysis_id, data)
|
|
177
|
+
VALUES (?, 'citability_gap', 'active', ?, ?, ?, NULL, ?)
|
|
178
|
+
ON CONFLICT(project, type, fingerprint) DO UPDATE SET
|
|
179
|
+
last_seen = excluded.last_seen,
|
|
180
|
+
data = excluded.data
|
|
181
|
+
`);
|
|
182
|
+
|
|
183
|
+
const ts = Date.now();
|
|
184
|
+
db.exec('BEGIN');
|
|
185
|
+
try {
|
|
186
|
+
for (const r of targetResults) {
|
|
187
|
+
if (r.score >= 60) continue; // only flag pages that need work
|
|
188
|
+
|
|
189
|
+
const fp = r.url.toLowerCase().replace(/[^a-z0-9/]/g, '').trim();
|
|
190
|
+
const weakest = Object.entries(r.breakdown)
|
|
191
|
+
.sort(([, a], [, b]) => a - b)
|
|
192
|
+
.slice(0, 2)
|
|
193
|
+
.map(([k]) => k.replace(/_/g, ' '));
|
|
194
|
+
|
|
195
|
+
const data = {
|
|
196
|
+
url: r.url,
|
|
197
|
+
title: r.title,
|
|
198
|
+
score: r.score,
|
|
199
|
+
tier: r.tier,
|
|
200
|
+
weakest_signals: weakest,
|
|
201
|
+
ai_intents: r.aiIntents,
|
|
202
|
+
recommendation: `Improve ${weakest.join(' and ')} to boost AI citability from ${r.score}/100`,
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
upsertStmt.run(project, fp, ts, ts, JSON.stringify(data));
|
|
206
|
+
}
|
|
207
|
+
db.exec('COMMIT');
|
|
208
|
+
} catch (e) {
|
|
209
|
+
db.exec('ROLLBACK');
|
|
210
|
+
console.error('[aeo] insight upsert failed:', e.message);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// ── Helpers ────────────────────────────────────────────────────────────────
|
|
215
|
+
|
|
216
|
+
function getWeakestSignals(targetResults) {
|
|
217
|
+
if (!targetResults.length) return [];
|
|
218
|
+
|
|
219
|
+
const signalTotals = {
|
|
220
|
+
entity_authority: 0, structured_claims: 0, answer_density: 0,
|
|
221
|
+
qa_proximity: 0, freshness: 0, schema_coverage: 0,
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
for (const r of targetResults) {
|
|
225
|
+
for (const [k, v] of Object.entries(r.breakdown)) {
|
|
226
|
+
signalTotals[k] += v;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return Object.entries(signalTotals)
|
|
231
|
+
.map(([signal, total]) => ({
|
|
232
|
+
signal: signal.replace(/_/g, ' '),
|
|
233
|
+
avg: Math.round(total / targetResults.length),
|
|
234
|
+
}))
|
|
235
|
+
.sort((a, b) => a.avg - b.avg);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Read stored citability scores for dashboard
|
|
240
|
+
*/
|
|
241
|
+
export function getCitabilityScores(db, project) {
|
|
242
|
+
return db.prepare(`
|
|
243
|
+
SELECT
|
|
244
|
+
cs.*, p.url, p.title, p.word_count,
|
|
245
|
+
d.domain, d.role
|
|
246
|
+
FROM citability_scores cs
|
|
247
|
+
JOIN pages p ON p.id = cs.page_id
|
|
248
|
+
JOIN domains d ON d.id = p.domain_id
|
|
249
|
+
WHERE d.project = ?
|
|
250
|
+
ORDER BY d.role ASC, cs.score ASC
|
|
251
|
+
`).all(project);
|
|
252
|
+
}
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AEO Citability Scorer — pure function, zero I/O
|
|
3
|
+
*
|
|
4
|
+
* Scores a page for how well an AI assistant could cite it as a source.
|
|
5
|
+
* All inputs are plain objects from the DB; output is a score breakdown.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// ── Question patterns in headings ──────────────────────────────────────────
|
|
9
|
+
const QUESTION_RE = /^(what|how|why|when|where|which|who|can|does|is|are|should|do)\b/i;
|
|
10
|
+
const COMPARISON_RE = /\bvs\.?\b|\bversus\b|\bcompare[d]?\b|\bcomparison\b|\balternative/i;
|
|
11
|
+
const IMPL_RE = /\bhow to\b|\bstep[- ]by[- ]step\b|\btutorial\b|\bguide\b|\bsetup\b|\binstall/i;
|
|
12
|
+
|
|
13
|
+
// ── Freshness scoring ──────────────────────────────────────────────────────
|
|
14
|
+
function freshnessScore(page, schemas) {
|
|
15
|
+
// Best signal: dateModified in schema
|
|
16
|
+
const schemaDate = schemas.find(s => s.date_modified)?.date_modified
|
|
17
|
+
|| schemas.find(s => s.date_published)?.date_published;
|
|
18
|
+
const pageDate = page.modified_date || page.published_date;
|
|
19
|
+
const dateStr = schemaDate || pageDate;
|
|
20
|
+
|
|
21
|
+
if (!dateStr) return 0;
|
|
22
|
+
|
|
23
|
+
const d = new Date(dateStr);
|
|
24
|
+
if (isNaN(d.getTime())) return 0;
|
|
25
|
+
|
|
26
|
+
const ageMs = Date.now() - d.getTime();
|
|
27
|
+
const ageDays = ageMs / (1000 * 60 * 60 * 24);
|
|
28
|
+
|
|
29
|
+
if (ageDays < 90) return 100; // < 3 months
|
|
30
|
+
if (ageDays < 180) return 80; // < 6 months
|
|
31
|
+
if (ageDays < 365) return 60; // < 1 year
|
|
32
|
+
if (ageDays < 730) return 30; // < 2 years
|
|
33
|
+
return 10; // 2+ years
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ── Entity authority ───────────────────────────────────────────────────────
|
|
37
|
+
function entityAuthorityScore(entities, headings, wordCount) {
|
|
38
|
+
if (!entities.length) return 0;
|
|
39
|
+
|
|
40
|
+
let score = 0;
|
|
41
|
+
|
|
42
|
+
// More entities = deeper coverage
|
|
43
|
+
if (entities.length >= 5) score += 30;
|
|
44
|
+
else if (entities.length >= 3) score += 20;
|
|
45
|
+
else score += 10;
|
|
46
|
+
|
|
47
|
+
// Entities appearing in headings = stronger authority signal
|
|
48
|
+
const headingTexts = headings.map(h => h.text.toLowerCase());
|
|
49
|
+
const entityInHeading = entities.filter(e =>
|
|
50
|
+
headingTexts.some(ht => ht.includes(e.toLowerCase()))
|
|
51
|
+
).length;
|
|
52
|
+
|
|
53
|
+
score += Math.min(entityInHeading * 15, 40);
|
|
54
|
+
|
|
55
|
+
// Word count indicates depth of coverage
|
|
56
|
+
if (wordCount >= 2000) score += 30;
|
|
57
|
+
else if (wordCount >= 1000) score += 20;
|
|
58
|
+
else if (wordCount >= 500) score += 10;
|
|
59
|
+
|
|
60
|
+
return Math.min(score, 100);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ── Structured claims ──────────────────────────────────────────────────────
|
|
64
|
+
function structuredClaimsScore(bodyText, headings) {
|
|
65
|
+
if (!bodyText) return 0;
|
|
66
|
+
|
|
67
|
+
let score = 0;
|
|
68
|
+
const sentences = bodyText.split(/[.!?]+/).filter(s => s.trim().length > 20);
|
|
69
|
+
if (!sentences.length) return 0;
|
|
70
|
+
|
|
71
|
+
// "X is Y" definitional patterns — highly citable
|
|
72
|
+
const definitional = sentences.filter(s =>
|
|
73
|
+
/\b(?:is|are|means|refers to|defined as|consists of)\b/i.test(s)
|
|
74
|
+
).length;
|
|
75
|
+
score += Math.min((definitional / sentences.length) * 200, 40);
|
|
76
|
+
|
|
77
|
+
// Numbered/bulleted patterns in body (listicle structure)
|
|
78
|
+
const listPatterns = (bodyText.match(/(?:^|\n)\s*(?:\d+[.)]\s|[-•]\s)/gm) || []).length;
|
|
79
|
+
if (listPatterns >= 5) score += 25;
|
|
80
|
+
else if (listPatterns >= 3) score += 15;
|
|
81
|
+
|
|
82
|
+
// Comparison patterns
|
|
83
|
+
if (COMPARISON_RE.test(bodyText)) score += 15;
|
|
84
|
+
|
|
85
|
+
// Step-by-step / how-to patterns
|
|
86
|
+
if (IMPL_RE.test(bodyText)) score += 20;
|
|
87
|
+
|
|
88
|
+
return Math.min(score, 100);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// ── Answer density ─────────────────────────────────────────────────────────
|
|
92
|
+
function answerDensityScore(bodyText, wordCount) {
|
|
93
|
+
if (!bodyText || wordCount < 100) return 0;
|
|
94
|
+
|
|
95
|
+
let score = 0;
|
|
96
|
+
|
|
97
|
+
// Short paragraphs = more scannable = better for AI extraction
|
|
98
|
+
const paragraphs = bodyText.split(/\n\s*\n/).filter(p => p.trim().length > 0);
|
|
99
|
+
if (!paragraphs.length) return 10;
|
|
100
|
+
|
|
101
|
+
const avgParaLength = wordCount / paragraphs.length;
|
|
102
|
+
if (avgParaLength <= 80) score += 30; // concise
|
|
103
|
+
else if (avgParaLength <= 150) score += 20; // moderate
|
|
104
|
+
else score += 5; // wall of text
|
|
105
|
+
|
|
106
|
+
// First 200 words contain a direct answer? (inverted pyramid)
|
|
107
|
+
const first200 = bodyText.split(/\s+/).slice(0, 200).join(' ');
|
|
108
|
+
if (/\b(?:is|are|means|provides?|offers?|enables?|allows?)\b/i.test(first200)) {
|
|
109
|
+
score += 25;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Ratio of informational content (not just navigation/boilerplate)
|
|
113
|
+
if (wordCount >= 300 && wordCount <= 3000) score += 25;
|
|
114
|
+
else if (wordCount > 3000) score += 15; // very long can dilute
|
|
115
|
+
else score += 10; // too short to cite well
|
|
116
|
+
|
|
117
|
+
// Code blocks are highly citable for technical content
|
|
118
|
+
const codeBlocks = (bodyText.match(/```[\s\S]*?```|`[^`]+`/g) || []).length;
|
|
119
|
+
if (codeBlocks >= 3) score += 20;
|
|
120
|
+
else if (codeBlocks >= 1) score += 10;
|
|
121
|
+
|
|
122
|
+
return Math.min(score, 100);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// ── Q&A proximity ──────────────────────────────────────────────────────────
|
|
126
|
+
function qaProximityScore(headings, bodyText) {
|
|
127
|
+
if (!headings.length || !bodyText) return 0;
|
|
128
|
+
|
|
129
|
+
const questionHeadings = headings.filter(h =>
|
|
130
|
+
h.level >= 2 && h.level <= 3 && QUESTION_RE.test(h.text)
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
if (!questionHeadings.length) return 10; // no Q&A structure at all
|
|
134
|
+
|
|
135
|
+
let score = 0;
|
|
136
|
+
|
|
137
|
+
// More question headings = better Q&A structure
|
|
138
|
+
const qRatio = questionHeadings.length / headings.filter(h => h.level >= 2).length;
|
|
139
|
+
score += Math.min(qRatio * 60, 40);
|
|
140
|
+
|
|
141
|
+
// FAQ schema present? Huge bonus
|
|
142
|
+
score += 30;
|
|
143
|
+
|
|
144
|
+
// Heading density (one H2/H3 per ~300 words is ideal)
|
|
145
|
+
const h2h3Count = headings.filter(h => h.level >= 2 && h.level <= 3).length;
|
|
146
|
+
const words = bodyText.split(/\s+/).length;
|
|
147
|
+
const idealHeadings = Math.floor(words / 300);
|
|
148
|
+
const headingRatio = idealHeadings > 0 ? Math.min(h2h3Count / idealHeadings, 2) : 0;
|
|
149
|
+
if (headingRatio >= 0.7 && headingRatio <= 1.5) score += 30;
|
|
150
|
+
else if (headingRatio >= 0.4) score += 15;
|
|
151
|
+
|
|
152
|
+
return Math.min(score, 100);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// ── Schema coverage ────────────────────────────────────────────────────────
|
|
156
|
+
function schemaCoverageScore(schemaTypes) {
|
|
157
|
+
if (!schemaTypes.length) return 0;
|
|
158
|
+
|
|
159
|
+
let score = 0;
|
|
160
|
+
|
|
161
|
+
// High-value schema types for AI citation
|
|
162
|
+
const highValue = ['FAQPage', 'HowTo', 'Article', 'TechArticle', 'BlogPosting'];
|
|
163
|
+
const medValue = ['Product', 'Review', 'SoftwareApplication', 'WebApplication'];
|
|
164
|
+
const baseValue = ['Organization', 'WebSite', 'WebPage', 'BreadcrumbList'];
|
|
165
|
+
|
|
166
|
+
for (const t of schemaTypes) {
|
|
167
|
+
if (highValue.includes(t)) score += 30;
|
|
168
|
+
else if (medValue.includes(t)) score += 15;
|
|
169
|
+
else if (baseValue.includes(t)) score += 5;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Multiple schema types = richer structured data
|
|
173
|
+
if (schemaTypes.length >= 3) score += 20;
|
|
174
|
+
|
|
175
|
+
return Math.min(score, 100);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// ── AI Query Intent Classification ─────────────────────────────────────────
|
|
179
|
+
function classifyAiIntent(headings, bodyText, searchIntent) {
|
|
180
|
+
const allText = [
|
|
181
|
+
...headings.map(h => h.text),
|
|
182
|
+
(bodyText || '').slice(0, 2000)
|
|
183
|
+
].join(' ').toLowerCase();
|
|
184
|
+
|
|
185
|
+
const intents = [];
|
|
186
|
+
|
|
187
|
+
if (COMPARISON_RE.test(allText)) intents.push('synthesis');
|
|
188
|
+
if (/\bshould\b|\brecommend|\bbest\b.*\bfor\b|\bchoose\b/i.test(allText)) intents.push('decision_support');
|
|
189
|
+
if (IMPL_RE.test(allText)) intents.push('implementation');
|
|
190
|
+
if (/\bwhat (is|are)\b|\boverview\b|\bintroduc/i.test(allText)) intents.push('exploration');
|
|
191
|
+
if (/\bbest practice|\bshould you\b|\bis it worth/i.test(allText)) intents.push('validation');
|
|
192
|
+
|
|
193
|
+
// Fallback from extraction intent
|
|
194
|
+
if (!intents.length) {
|
|
195
|
+
if (searchIntent === 'Informational') intents.push('exploration');
|
|
196
|
+
else if (searchIntent === 'Commercial') intents.push('decision_support');
|
|
197
|
+
else if (searchIntent === 'Transactional') intents.push('implementation');
|
|
198
|
+
else intents.push('exploration');
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return intents;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// ── Main scorer ────────────────────────────────────────────────────────────
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Score a single page for AI citability.
|
|
208
|
+
*
|
|
209
|
+
* @param {object} page - { url, title, body_text, word_count, published_date, modified_date }
|
|
210
|
+
* @param {object[]} headings - [{ level, text }]
|
|
211
|
+
* @param {string[]} entities - primary_entities array
|
|
212
|
+
* @param {string[]} schemaTypes - schema type strings present on page
|
|
213
|
+
* @param {object[]} schemas - full page_schemas rows
|
|
214
|
+
* @param {string} searchIntent - from extraction
|
|
215
|
+
* @returns {object} { score, breakdown, aiIntents, tier }
|
|
216
|
+
*/
|
|
217
|
+
export function scorePage(page, headings, entities, schemaTypes, schemas, searchIntent) {
|
|
218
|
+
const bodyText = page.body_text || '';
|
|
219
|
+
const wordCount = page.word_count || bodyText.split(/\s+/).length;
|
|
220
|
+
|
|
221
|
+
const breakdown = {
|
|
222
|
+
entity_authority: entityAuthorityScore(entities, headings, wordCount),
|
|
223
|
+
structured_claims: structuredClaimsScore(bodyText, headings),
|
|
224
|
+
answer_density: answerDensityScore(bodyText, wordCount),
|
|
225
|
+
qa_proximity: qaProximityScore(headings, bodyText),
|
|
226
|
+
freshness: freshnessScore(page, schemas),
|
|
227
|
+
schema_coverage: schemaCoverageScore(schemaTypes),
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
// Weighted composite — entity authority and structured claims matter most for AI
|
|
231
|
+
const weights = {
|
|
232
|
+
entity_authority: 0.25,
|
|
233
|
+
structured_claims: 0.20,
|
|
234
|
+
answer_density: 0.20,
|
|
235
|
+
qa_proximity: 0.15,
|
|
236
|
+
freshness: 0.10,
|
|
237
|
+
schema_coverage: 0.10,
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
const score = Math.round(
|
|
241
|
+
Object.entries(weights).reduce((sum, [k, w]) => sum + breakdown[k] * w, 0)
|
|
242
|
+
);
|
|
243
|
+
|
|
244
|
+
const aiIntents = classifyAiIntent(headings, bodyText, searchIntent);
|
|
245
|
+
|
|
246
|
+
// Tier classification
|
|
247
|
+
let tier;
|
|
248
|
+
if (score >= 75) tier = 'excellent';
|
|
249
|
+
else if (score >= 55) tier = 'good';
|
|
250
|
+
else if (score >= 35) tier = 'needs_work';
|
|
251
|
+
else tier = 'poor';
|
|
252
|
+
|
|
253
|
+
return { score, breakdown, aiIntents, tier };
|
|
254
|
+
}
|