seo-intel 1.2.5 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,227 @@
1
+ /**
2
+ * AEO Blog Draft Generator — Data Gathering & Prompt Builder
3
+ *
4
+ * Pulls intelligence from the Ledger (keyword gaps, long-tails, citability gaps,
5
+ * entities, positioning) and builds a prompt that produces a publish-ready,
6
+ * AEO-optimised blog post in .md format with YAML frontmatter.
7
+ */
8
+
9
+ import { getActiveInsights } from '../../db/db.js';
10
+
11
+ // ── Data Gathering ──────────────────────────────────────────────────────────
12
+
13
+ export function gatherBlogDraftContext(db, project, topic = null) {
14
+ const insights = getActiveInsights(db, project);
15
+
16
+ // citability_gap insights — not in getActiveInsights grouped return
17
+ let citabilityGaps = [];
18
+ try {
19
+ citabilityGaps = db.prepare(
20
+ `SELECT data FROM insights WHERE project = ? AND type = 'citability_gap' AND status = 'active' ORDER BY last_seen DESC LIMIT 15`
21
+ ).all(project).map(r => JSON.parse(r.data));
22
+ } catch { /* table may not exist yet */ }
23
+
24
+ // Top entities across target pages
25
+ let entityRows = [];
26
+ try {
27
+ entityRows = db.prepare(`
28
+ SELECT e.primary_entities, p.title, p.url
29
+ FROM extractions e
30
+ JOIN pages p ON p.id = e.page_id
31
+ JOIN domains d ON d.id = p.domain_id
32
+ WHERE d.project = ? AND (d.role = 'target' OR d.role = 'owned')
33
+ AND e.primary_entities IS NOT NULL AND e.primary_entities != '[]'
34
+ ORDER BY p.word_count DESC LIMIT 20
35
+ `).all(project);
36
+ } catch { /* extraction may not have run */ }
37
+
38
+ // Best AEO-scoring pages (content to emulate)
39
+ let topCitablePages = [];
40
+ try {
41
+ topCitablePages = db.prepare(`
42
+ SELECT p.url, p.title, cs.total_score as score, cs.ai_intents, cs.tier
43
+ FROM citability_scores cs
44
+ JOIN pages p ON p.id = cs.page_id
45
+ JOIN domains d ON d.id = p.domain_id
46
+ WHERE d.project = ? AND (d.role = 'target' OR d.role = 'owned') AND cs.total_score >= 55
47
+ ORDER BY cs.total_score DESC LIMIT 5
48
+ `).all(project);
49
+ } catch { /* AEO may not have run */ }
50
+
51
+ // Filter by topic if given
52
+ const matchesTopic = (text) => {
53
+ if (!topic || !text) return true;
54
+ return text.toLowerCase().includes(topic.toLowerCase());
55
+ };
56
+
57
+ const kwInventor = insights.keyword_inventor
58
+ .filter(k => matchesTopic(k.phrase) || matchesTopic(k.cluster))
59
+ .slice(0, 30);
60
+
61
+ const longTails = topic
62
+ ? [
63
+ ...insights.long_tails.filter(lt => matchesTopic(lt.phrase)).slice(0, 20),
64
+ ...insights.long_tails.filter(lt => !matchesTopic(lt.phrase)).slice(0, 10),
65
+ ]
66
+ : insights.long_tails.slice(0, 30);
67
+
68
+ const keywordGaps = topic
69
+ ? [
70
+ ...insights.keyword_gaps.filter(kg => matchesTopic(kg.keyword)).slice(0, 15),
71
+ ...insights.keyword_gaps.filter(kg => !matchesTopic(kg.keyword)).slice(0, 10),
72
+ ]
73
+ : insights.keyword_gaps.filter(kg => kg.priority === 'high').slice(0, 25);
74
+
75
+ const contentGaps = (insights.content_gaps || []).slice(0, 8);
76
+
77
+ return {
78
+ insights,
79
+ citabilityGaps,
80
+ entityRows,
81
+ topCitablePages,
82
+ kwInventor,
83
+ longTails,
84
+ keywordGaps,
85
+ contentGaps,
86
+ topic,
87
+ };
88
+ }
89
+
90
+ // ── Prompt Builder ──────────────────────────────────────────────────────────
91
+
92
+ export function buildBlogDraftPrompt(context, { config, lang = 'en', topic = null }) {
93
+ const { longTails, keywordGaps, citabilityGaps, entityRows, topCitablePages, kwInventor, contentGaps, insights } = context;
94
+ const isFi = lang === 'fi';
95
+
96
+ // Extract unique entities from extraction data
97
+ const allEntities = new Set();
98
+ for (const row of entityRows) {
99
+ try {
100
+ const ents = JSON.parse(row.primary_entities);
101
+ if (Array.isArray(ents)) ents.forEach(e => allEntities.add(typeof e === 'string' ? e : e.name || e));
102
+ } catch { /* skip */ }
103
+ }
104
+ const topEntities = [...allEntities].slice(0, 15);
105
+
106
+ // ── Section 1: Role ──
107
+ let prompt = `You are an expert content strategist and copywriter specialising in AEO (Answer Engine Optimisation).
108
+
109
+ Your task: write a complete, publish-ready blog post draft in ${isFi ? 'Finnish' : 'English'}.
110
+ The post must score 70+ on the AEO citability scale (entity authority, structured claims, answer density, Q&A proximity, freshness signals, schema coverage).
111
+
112
+ `;
113
+
114
+ // ── Section 2: Site intelligence ──
115
+ prompt += `## Site Context
116
+
117
+ - **Site:** ${config.context?.siteName || config.target?.domain} (${config.target?.url})
118
+ - **Industry:** ${config.context?.industry || 'N/A'}
119
+ - **Audience:** ${config.context?.audience || 'N/A'}
120
+ - **Goal:** ${config.context?.goal || 'N/A'}
121
+ `;
122
+
123
+ if (insights.positioning) {
124
+ prompt += `- **Positioning:** ${typeof insights.positioning === 'string' ? insights.positioning : JSON.stringify(insights.positioning)}\n`;
125
+ }
126
+
127
+ if (topEntities.length) {
128
+ prompt += `- **Core entities:** ${topEntities.join(', ')}\n`;
129
+ }
130
+
131
+ if (topCitablePages.length) {
132
+ prompt += `\n### Highest-scoring pages on the site (emulate their structure)\n`;
133
+ for (const p of topCitablePages) {
134
+ prompt += `- ${p.url} — AEO score: ${p.score}/100 (${p.tier})\n`;
135
+ }
136
+ }
137
+
138
+ // ── Section 3: Topic focus ──
139
+ prompt += `\n## Topic\n\n`;
140
+ if (topic) {
141
+ prompt += `Primary focus: **${topic}**. All keyword and gap data below has been filtered to this topic. Build the entire post around this subject.\n`;
142
+ } else {
143
+ prompt += `Select the highest-opportunity topic from the gaps below. Choose the gap that: (a) has the most keyword_gap entries or (b) is flagged as a high priority long-tail. Explain your topic choice in the frontmatter \`topic_selection_rationale\` field.\n`;
144
+ }
145
+
146
+ // ── Section 4: Intelligence data ──
147
+ if (keywordGaps.length) {
148
+ prompt += `\n## Keyword Gaps to Target (include these as primary/secondary keywords)\n\n`;
149
+ prompt += `| Keyword | Priority | Notes |\n|---|---|---|\n`;
150
+ for (const kg of keywordGaps) {
151
+ prompt += `| ${kg.keyword || kg.phrase || '—'} | ${kg.priority || 'medium'} | ${(kg.notes || '').slice(0, 80)} |\n`;
152
+ }
153
+ }
154
+
155
+ if (longTails.length) {
156
+ prompt += `\n## Long-tail Phrases to Answer (each should have a direct answer in the post)\n\n`;
157
+ prompt += `| Phrase | Intent | Priority |\n|---|---|---|\n`;
158
+ for (const lt of longTails) {
159
+ prompt += `| ${lt.phrase || '—'} | ${lt.intent || '—'} | ${lt.priority || 'medium'} |\n`;
160
+ }
161
+ }
162
+
163
+ if (kwInventor.length) {
164
+ prompt += `\n## Keyword Inventor Phrases (weave these naturally into headings/body)\n\n`;
165
+ for (const kw of kwInventor.slice(0, 20)) {
166
+ prompt += `- "${kw.phrase}" (${kw.type || 'traditional'}, ${kw.intent || '—'})\n`;
167
+ }
168
+ }
169
+
170
+ if (citabilityGaps.length) {
171
+ prompt += `\n## Citability Gaps (pages scoring <60 on AEO — model the fix in this post)\n\n`;
172
+ prompt += `| URL | Score | Weakest Signals |\n|---|---|---|\n`;
173
+ for (const cg of citabilityGaps) {
174
+ prompt += `| ${cg.url || '—'} | ${cg.score || '—'} | ${cg.weakest || cg.weakest_signal || '—'} |\n`;
175
+ }
176
+ }
177
+
178
+ if (contentGaps.length) {
179
+ prompt += `\n## Content Gaps (topics competitors cover that you don't)\n\n`;
180
+ for (const cg of contentGaps) {
181
+ const desc = typeof cg === 'string' ? cg : (cg.topic || cg.description || cg.gap || JSON.stringify(cg));
182
+ prompt += `- ${desc}\n`;
183
+ }
184
+ }
185
+
186
+ // ── Section 5: AEO structural requirements ──
187
+ prompt += `
188
+ ## AEO Structural Requirements
189
+
190
+ The draft MUST include:
191
+ 1. YAML frontmatter with: title, slug, description (155 chars max), primary_keyword, secondary_keywords[], date (${new Date().toISOString().slice(0, 10)}), updated (same), lang (${lang}), tags[]${!topic ? ', topic_selection_rationale' : ''}
192
+ 2. An H1 that contains the primary keyword
193
+ 3. A 2-3 sentence summary immediately after the H1 (answer-first structure — inverted pyramid). This paragraph will be cited by AI assistants.
194
+ 4. Minimum 6 H2 subheadings
195
+ 5. At least 3 H2s phrased as direct questions (What is / How to / Why / When)
196
+ 6. At least one numbered or bulleted list with 4+ items
197
+ 7. At least one "X is Y because Z" definitional sentence per major concept
198
+ 8. A FAQ section at the end with minimum 4 Q&A pairs (### H3 questions, 2-4 sentence answers)
199
+ 9. A closing CTA paragraph referencing ${config.context?.siteName || config.target?.domain}
200
+ 10. Word count: 1,200-2,000 words
201
+ 11. Internal link suggestions: include 2-3 \`[anchor text](URL)\` links back to the site where natural
202
+ `;
203
+
204
+ // ── Section 6: Language ──
205
+ if (isFi) {
206
+ prompt += `
207
+ ## Language: Finnish
208
+
209
+ Write in Finnish. Use informal, direct register (sinuttelu where natural). Avoid marketing clichés common in Finnish B2B copy. Prefer short sentences. Finnish SEO keywords must appear in their exact searched base form in headings — Finnish inflection reduces exact-match keyword presence.
210
+ `;
211
+ } else {
212
+ prompt += `
213
+ ## Language: English
214
+
215
+ Write in clear, direct international English. No filler phrases. No "in today's digital landscape" or "it's no secret that" openers. Every sentence should contain a fact, insight, or actionable point.
216
+ `;
217
+ }
218
+
219
+ // ── Section 7: Output format ──
220
+ prompt += `
221
+ ## Output Format
222
+
223
+ Respond with ONLY the complete markdown document. Start with --- (YAML frontmatter open fence). End with the FAQ section and CTA. No explanation before or after. No triple backticks wrapping the response.
224
+ `;
225
+
226
+ return prompt;
227
+ }
@@ -0,0 +1,60 @@
1
+ /**
2
+ * AEO Pre-Scorer — scores a generated markdown draft against citability signals
3
+ *
4
+ * Uses the same scorePage() function as the full AEO audit, but constructs
5
+ * synthetic inputs from the markdown text instead of reading from the DB.
6
+ *
7
+ * Freshness always scores 0 (no publish date yet) — the reported score
8
+ * accounts for this by adding +10 for "what it will score once published."
9
+ */
10
+
11
+ import { scorePage } from '../aeo/scorer.js';
12
+
13
+ export function prescore(markdownText) {
14
+ // Strip YAML frontmatter
15
+ const bodyMatch = markdownText.match(/^---[\s\S]*?---\n([\s\S]*)$/);
16
+ const body = bodyMatch ? bodyMatch[1] : markdownText;
17
+
18
+ // Extract headings
19
+ const headings = [];
20
+ for (const line of body.split('\n')) {
21
+ const m = line.match(/^(#{1,6})\s+(.+)$/);
22
+ if (m) headings.push({ level: m[1].length, text: m[2].trim() });
23
+ }
24
+
25
+ // Word count
26
+ const wordCount = body.split(/\s+/).filter(Boolean).length;
27
+
28
+ // Extract frontmatter fields
29
+ let fmSchemaType = null;
30
+ const fmMatch = markdownText.match(/^---([\s\S]*?)---/);
31
+ if (fmMatch) {
32
+ const schemaLine = fmMatch[1].match(/schema_type:\s*(.+)/);
33
+ if (schemaLine) fmSchemaType = schemaLine[1].trim();
34
+ }
35
+
36
+ // Build synthetic page object
37
+ const syntheticPage = {
38
+ body_text: body,
39
+ word_count: wordCount,
40
+ published_date: null, // not published yet — freshness = 0
41
+ modified_date: null,
42
+ };
43
+
44
+ // Extract entity candidates from headings (capitalised noun phrases)
45
+ const entityCandidates = headings
46
+ .filter(h => h.level <= 3)
47
+ .flatMap(h => h.text.match(/\b[A-ZÄÖÅ][a-zäöå]+(?:\s+[A-ZÄÖÅ][a-zäöå]+)*/g) || []);
48
+ const entities = [...new Set(entityCandidates)].slice(0, 8);
49
+
50
+ const schemaTypes = fmSchemaType ? [fmSchemaType] : [];
51
+ const schemas = [];
52
+
53
+ const result = scorePage(syntheticPage, headings, entities, schemaTypes, schemas, 'Informational');
54
+
55
+ return {
56
+ ...result,
57
+ wordCount,
58
+ headingCount: headings.length,
59
+ };
60
+ }
@@ -0,0 +1,209 @@
1
+ /**
2
+ * URL Pattern Clustering — Phase 1
3
+ *
4
+ * Takes sitemap URLs, detects parametric patterns, groups them.
5
+ * Pure function — no I/O, no side effects.
6
+ */
7
+
8
+ /**
9
+ * Is this path segment a "variable" (one of N possible values)
10
+ * vs a "constant" (structural path like 'swap', 'docs', 'blog')?
11
+ */
12
+ function isVariable(segment) {
13
+ // Version prefixes stay constant: v1, v2, v3...
14
+ if (/^v\d+$/.test(segment)) return false;
15
+
16
+ // Common structural words stay constant
17
+ const STRUCTURAL = new Set([
18
+ 'api', 'docs', 'blog', 'news', 'about', 'pricing', 'features',
19
+ 'help', 'support', 'contact', 'legal', 'terms', 'privacy',
20
+ 'login', 'signup', 'register', 'dashboard', 'settings',
21
+ 'token', 'tokens', 'swap', 'trade', 'perps', 'perpetuals',
22
+ 'pool', 'pools', 'stake', 'staking', 'bridge', 'earn',
23
+ 'governance', 'vote', 'proposals', 'stats', 'analytics',
24
+ 'markets', 'pairs', 'explorer', 'episodes', 'categories',
25
+ 'tags', 'products', 'collections', 'pages', 'posts',
26
+ ]);
27
+ if (STRUCTURAL.has(segment.toLowerCase())) return false;
28
+
29
+ // Purely numeric → variable (IDs, dates)
30
+ if (/^\d+$/.test(segment)) return true;
31
+
32
+ // UUID or hash-like
33
+ if (/^[0-9a-f-]{8,}$/i.test(segment)) return true;
34
+
35
+ // Hex address (0x...)
36
+ if (/^0x[0-9a-fA-F]+$/.test(segment)) return true;
37
+
38
+ // Contains separator characters typical of slugs/pairs: SOL-USDC, my-blog-post
39
+ if (/[-_.]/.test(segment) && segment.length > 2) return true;
40
+
41
+ // All uppercase short string → likely a ticker: SOL, BONK, USDT, ETH
42
+ if (/^[A-Z0-9]{2,10}$/.test(segment)) return true;
43
+
44
+ // Mixed case with digits → product codes, IDs
45
+ if (/[A-Z]/.test(segment) && /\d/.test(segment)) return true;
46
+
47
+ // Very long segments are likely slugs or IDs
48
+ if (segment.length > 30) return true;
49
+
50
+ return false;
51
+ }
52
+
53
+ /**
54
+ * Infer a semantic name for a param position based on observed values.
55
+ */
56
+ function inferParamName(values, position) {
57
+ const sample = values.slice(0, 100);
58
+
59
+ // Crypto pairs: X-Y format where both parts are short uppercase
60
+ const pairCount = sample.filter(v => /^[A-Za-z0-9]+-[A-Za-z0-9]+$/.test(v)).length;
61
+ if (pairCount > sample.length * 0.6) return 'pair';
62
+
63
+ // Token tickers: 2-10 uppercase chars
64
+ const tickerCount = sample.filter(v => /^[A-Z0-9]{2,10}$/.test(v)).length;
65
+ if (tickerCount > sample.length * 0.6) return 'symbol';
66
+
67
+ // Slugs: lowercase with hyphens
68
+ const slugCount = sample.filter(v => /^[a-z0-9]+(-[a-z0-9]+)+$/.test(v)).length;
69
+ if (slugCount > sample.length * 0.6) return 'slug';
70
+
71
+ // Numeric IDs
72
+ const numCount = sample.filter(v => /^\d+$/.test(v)).length;
73
+ if (numCount > sample.length * 0.6) return 'id';
74
+
75
+ // Hex hashes/addresses
76
+ const hexCount = sample.filter(v => /^(0x)?[0-9a-f]{8,}$/i.test(v)).length;
77
+ if (hexCount > sample.length * 0.6) return 'hash';
78
+
79
+ return `param${position}`;
80
+ }
81
+
82
+ /**
83
+ * Cluster sitemap URLs into template groups.
84
+ *
85
+ * @param {Array<{url: string, lastmod?: string}>} sitemapEntries
86
+ * @param {object} opts
87
+ * @param {number} opts.minGroupSize — min URLs to qualify as template (default 10)
88
+ * @param {number} opts.maxSegments — max path depth to consider (default 8)
89
+ * @returns {{ groups: TemplateGroup[], ungrouped: string[], stats: object }}
90
+ */
91
+ export function clusterUrls(sitemapEntries, opts = {}) {
92
+ const minGroupSize = opts.minGroupSize || 10;
93
+ const maxSegments = opts.maxSegments || 8;
94
+
95
+ // patternKey → { pattern parts, urls[], paramValues by position }
96
+ const clusters = new Map();
97
+
98
+ for (const entry of sitemapEntries) {
99
+ let pathname;
100
+ try {
101
+ pathname = new URL(entry.url).pathname;
102
+ } catch { continue; }
103
+
104
+ // Normalize
105
+ pathname = pathname.replace(/\/+$/, '') || '/';
106
+
107
+ // Homepage is always unique
108
+ if (pathname === '/') continue;
109
+
110
+ const segments = pathname.split('/').filter(Boolean).slice(0, maxSegments);
111
+ const patternParts = [];
112
+ const paramPositions = {};
113
+ let paramIdx = 0;
114
+
115
+ for (let i = 0; i < segments.length; i++) {
116
+ if (isVariable(segments[i])) {
117
+ const key = `p${paramIdx}`;
118
+ patternParts.push(`{${key}}`);
119
+ if (!paramPositions[key]) paramPositions[key] = [];
120
+ paramPositions[key].push(segments[i]);
121
+ paramIdx++;
122
+ } else {
123
+ patternParts.push(segments[i].toLowerCase());
124
+ }
125
+ }
126
+
127
+ const patternKey = '/' + patternParts.join('/');
128
+
129
+ if (!clusters.has(patternKey)) {
130
+ clusters.set(patternKey, {
131
+ patternKey,
132
+ patternParts,
133
+ urls: [],
134
+ paramPositions: {},
135
+ lastmods: [],
136
+ });
137
+ }
138
+
139
+ const cluster = clusters.get(patternKey);
140
+ cluster.urls.push(entry.url);
141
+ if (entry.lastmod) cluster.lastmods.push(entry.lastmod);
142
+
143
+ // Collect param values (cap at 200 for memory)
144
+ for (const [key, values] of Object.entries(paramPositions)) {
145
+ if (!cluster.paramPositions[key]) cluster.paramPositions[key] = [];
146
+ if (cluster.paramPositions[key].length < 200) {
147
+ cluster.paramPositions[key].push(...values);
148
+ }
149
+ }
150
+ }
151
+
152
+ // Separate into template groups (>= minGroupSize) and ungrouped
153
+ const groups = [];
154
+ const ungrouped = [];
155
+
156
+ for (const [patternKey, cluster] of clusters) {
157
+ if (cluster.urls.length >= minGroupSize) {
158
+ // Rename params to semantic names
159
+ const params = {};
160
+ const renamedParts = [...cluster.patternParts];
161
+
162
+ let paramIdx = 0;
163
+ for (let i = 0; i < renamedParts.length; i++) {
164
+ const match = renamedParts[i].match(/^\{(p\d+)\}$/);
165
+ if (match) {
166
+ const key = match[1];
167
+ const name = inferParamName(cluster.paramPositions[key] || [], paramIdx);
168
+ renamedParts[i] = `{${name}}`;
169
+ params[name] = (cluster.paramPositions[key] || []).slice(0, 50);
170
+ paramIdx++;
171
+ }
172
+ }
173
+
174
+ const pattern = '/' + renamedParts.join('/');
175
+ const sortedLastmods = cluster.lastmods.sort();
176
+
177
+ groups.push({
178
+ pattern,
179
+ patternKey,
180
+ params,
181
+ urls: cluster.urls,
182
+ urlCount: cluster.urls.length,
183
+ depth: cluster.patternParts.length,
184
+ firstSeen: sortedLastmods[0] || null,
185
+ lastSeen: sortedLastmods[sortedLastmods.length - 1] || null,
186
+ });
187
+ } else {
188
+ ungrouped.push(...cluster.urls);
189
+ }
190
+ }
191
+
192
+ // Sort by URL count descending
193
+ groups.sort((a, b) => b.urlCount - a.urlCount);
194
+
195
+ const totalGrouped = groups.reduce((sum, g) => sum + g.urlCount, 0);
196
+ const totalUrls = sitemapEntries.length;
197
+
198
+ return {
199
+ groups,
200
+ ungrouped,
201
+ stats: {
202
+ totalUrls,
203
+ totalGroups: groups.length,
204
+ totalGrouped,
205
+ largestGroup: groups[0]?.urlCount || 0,
206
+ coverage: totalUrls > 0 ? totalGrouped / totalUrls : 0,
207
+ },
208
+ };
209
+ }
@@ -0,0 +1,93 @@
1
+ /**
2
+ * GSC Overlay — Phase 3
3
+ *
4
+ * Cross-references template groups against Google Search Console per-URL data.
5
+ * Pure computation — no I/O.
6
+ */
7
+
8
+ /**
9
+ * Normalize a URL for GSC matching.
10
+ * GSC reports URLs inconsistently — trailing slashes, www, http vs https.
11
+ */
12
+ function normalizeUrl(url) {
13
+ try {
14
+ const u = new URL(url);
15
+ return (u.protocol + '//' + u.hostname + u.pathname).replace(/\/+$/, '').toLowerCase();
16
+ } catch {
17
+ return url.toLowerCase().replace(/\/+$/, '');
18
+ }
19
+ }
20
+
21
+ /**
22
+ * Cross-reference template groups with GSC pages data.
23
+ *
24
+ * @param {TemplateGroup[]} groups — from cluster.js
25
+ * @param {Array<{url: string, clicks: number, impressions: number, ctr: number, position: number}>|null} gscPages
26
+ * @returns {GscOverlayResult[]}
27
+ */
28
+ export function overlayGsc(groups, gscPages) {
29
+ if (!gscPages || gscPages.length === 0) {
30
+ // No GSC data — return groups with null GSC fields
31
+ return groups.map(g => ({
32
+ ...g,
33
+ gscUrlsWithImpressions: null,
34
+ gscTotalClicks: null,
35
+ gscTotalImpressions: null,
36
+ gscAvgPosition: null,
37
+ indexationEfficiency: null,
38
+ topGscUrls: [],
39
+ }));
40
+ }
41
+
42
+ // Build normalized URL → GSC entry lookup
43
+ const gscMap = new Map();
44
+ for (const entry of gscPages) {
45
+ const key = normalizeUrl(entry.url);
46
+ // Keep the one with more impressions if dupes
47
+ const existing = gscMap.get(key);
48
+ if (!existing || entry.impressions > existing.impressions) {
49
+ gscMap.set(key, entry);
50
+ }
51
+ }
52
+
53
+ return groups.map(group => {
54
+ let urlsWithImpressions = 0;
55
+ let totalClicks = 0;
56
+ let totalImpressions = 0;
57
+ let positionSum = 0;
58
+ let positionCount = 0;
59
+ const topUrls = [];
60
+
61
+ for (const url of group.urls) {
62
+ const gscEntry = gscMap.get(normalizeUrl(url));
63
+ if (gscEntry && gscEntry.impressions > 0) {
64
+ urlsWithImpressions++;
65
+ totalClicks += gscEntry.clicks || 0;
66
+ totalImpressions += gscEntry.impressions || 0;
67
+ if (gscEntry.position > 0) {
68
+ positionSum += gscEntry.position;
69
+ positionCount++;
70
+ }
71
+ topUrls.push({
72
+ url: gscEntry.url,
73
+ clicks: gscEntry.clicks,
74
+ impressions: gscEntry.impressions,
75
+ position: gscEntry.position,
76
+ });
77
+ }
78
+ }
79
+
80
+ // Sort top URLs by impressions desc, take top 10
81
+ topUrls.sort((a, b) => b.impressions - a.impressions);
82
+
83
+ return {
84
+ ...group,
85
+ gscUrlsWithImpressions: urlsWithImpressions,
86
+ gscTotalClicks: totalClicks,
87
+ gscTotalImpressions: totalImpressions,
88
+ gscAvgPosition: positionCount > 0 ? Math.round((positionSum / positionCount) * 10) / 10 : null,
89
+ indexationEfficiency: group.urlCount > 0 ? urlsWithImpressions / group.urlCount : 0,
90
+ topGscUrls: topUrls.slice(0, 10),
91
+ };
92
+ });
93
+ }