seo-intel 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/.env.example +41 -0
  2. package/LICENSE +75 -0
  3. package/README.md +243 -0
  4. package/Start SEO Intel.bat +9 -0
  5. package/Start SEO Intel.command +8 -0
  6. package/cli.js +3727 -0
  7. package/config/example.json +29 -0
  8. package/config/setup-wizard.js +522 -0
  9. package/crawler/index.js +566 -0
  10. package/crawler/robots.js +103 -0
  11. package/crawler/sanitize.js +124 -0
  12. package/crawler/schema-parser.js +168 -0
  13. package/crawler/sitemap.js +103 -0
  14. package/crawler/stealth.js +393 -0
  15. package/crawler/subdomain-discovery.js +341 -0
  16. package/db/db.js +213 -0
  17. package/db/schema.sql +120 -0
  18. package/exports/competitive.js +186 -0
  19. package/exports/heuristics.js +67 -0
  20. package/exports/queries.js +197 -0
  21. package/exports/suggestive.js +230 -0
  22. package/exports/technical.js +180 -0
  23. package/exports/templates.js +77 -0
  24. package/lib/gate.js +204 -0
  25. package/lib/license.js +369 -0
  26. package/lib/oauth.js +432 -0
  27. package/lib/updater.js +324 -0
  28. package/package.json +68 -0
  29. package/reports/generate-html.js +6194 -0
  30. package/reports/generate-site-graph.js +949 -0
  31. package/reports/gsc-loader.js +190 -0
  32. package/scheduler.js +142 -0
  33. package/seo-audit.js +619 -0
  34. package/seo-intel.png +0 -0
  35. package/server.js +602 -0
  36. package/setup/ROADMAP.md +109 -0
  37. package/setup/checks.js +483 -0
  38. package/setup/config-builder.js +227 -0
  39. package/setup/engine.js +65 -0
  40. package/setup/installers.js +197 -0
  41. package/setup/models.js +328 -0
  42. package/setup/openclaw-bridge.js +329 -0
  43. package/setup/validator.js +395 -0
  44. package/setup/web-routes.js +688 -0
  45. package/setup/wizard.html +2920 -0
  46. package/start-seo-intel.sh +8 -0
@@ -0,0 +1,949 @@
1
+ /**
2
+ * SEO Intel — Site Graph Visualization Generator
3
+ *
4
+ * Generates an Obsidian-style force-directed graph of internal links.
5
+ * Self-contained HTML file with D3.js inlined.
6
+ *
7
+ * Usage:
8
+ * import { generateSiteGraphHtml } from './generate-site-graph.js';
9
+ * const outPath = await generateSiteGraphHtml(db, project, config);
10
+ */
11
+
12
+ import { writeFileSync, readFileSync, existsSync } from 'fs';
13
+ import { dirname, join } from 'path';
14
+ import { fileURLToPath } from 'url';
15
+
16
+ const __dirname = dirname(fileURLToPath(import.meta.url));
17
+ const D3_CACHE = join(__dirname, 'd3.v7.min.js');
18
+ const D3_CDN = 'https://cdn.jsdelivr.net/npm/d3@7/dist/d3.min.js';
19
+
20
+ // ── Data Queries ────────────────────────────────────────────────────────────
21
+
22
+ function querySiteGraphData(db, project, maxDepth = 99) {
23
+ // Query 1: Nodes — pages with extraction data and keyword counts
24
+ const nodes = db.prepare(`
25
+ SELECT
26
+ p.id,
27
+ p.url,
28
+ p.status_code,
29
+ p.word_count,
30
+ p.is_indexable,
31
+ p.click_depth,
32
+ d.domain,
33
+ d.role,
34
+ e.title,
35
+ e.meta_desc,
36
+ e.h1,
37
+ e.search_intent,
38
+ e.primary_entities,
39
+ COUNT(DISTINCT k.id) AS keyword_count
40
+ FROM pages p
41
+ JOIN domains d ON d.id = p.domain_id
42
+ LEFT JOIN extractions e ON e.page_id = p.id
43
+ LEFT JOIN keywords k ON k.page_id = p.id
44
+ WHERE d.project = ?
45
+ AND d.role = 'target'
46
+ AND (? >= 99 OR p.click_depth <= ?)
47
+ GROUP BY p.id
48
+ ORDER BY p.click_depth ASC, p.word_count DESC
49
+ LIMIT 500
50
+ `).all(project, maxDepth, maxDepth);
51
+
52
+ // Build a set of node IDs for edge filtering
53
+ const nodeIds = new Set(nodes.map(n => n.id));
54
+ const urlToId = new Map(nodes.map(n => [n.url, n.id]));
55
+ // Also map normalized URLs (without trailing slash)
56
+ for (const n of nodes) {
57
+ const norm = n.url.replace(/\/$/, '');
58
+ if (!urlToId.has(norm)) urlToId.set(norm, n.id);
59
+ }
60
+
61
+ // Query 2: Edges — internal links between crawled pages in this project
62
+ const rawEdges = db.prepare(`
63
+ SELECT
64
+ l.source_id,
65
+ l.target_url,
66
+ l.anchor_text
67
+ FROM links l
68
+ JOIN pages p_src ON p_src.id = l.source_id
69
+ JOIN domains d ON d.id = p_src.domain_id
70
+ WHERE l.is_internal = 1
71
+ AND d.project = ?
72
+ AND d.role = 'target'
73
+ `).all(project);
74
+
75
+ // Resolve target_url → target_id using our URL map
76
+ const links = [];
77
+ const seen = new Set();
78
+ for (const e of rawEdges) {
79
+ if (!nodeIds.has(e.source_id)) continue;
80
+
81
+ // Try exact match, then normalized
82
+ let targetId = urlToId.get(e.target_url);
83
+ if (!targetId) targetId = urlToId.get(e.target_url.replace(/\/$/, ''));
84
+ if (!targetId) continue;
85
+ if (targetId === e.source_id) continue; // skip self-links
86
+
87
+ const key = `${e.source_id}-${targetId}`;
88
+ if (seen.has(key)) continue; // deduplicate
89
+ seen.add(key);
90
+
91
+ links.push({
92
+ source: e.source_id,
93
+ target: targetId,
94
+ anchor: e.anchor_text || '',
95
+ });
96
+ }
97
+
98
+ // Query 3: Inbound link counts
99
+ const inboundRaw = db.prepare(`
100
+ SELECT
101
+ p_target.id AS page_id,
102
+ COUNT(*) AS inbound_count
103
+ FROM links l
104
+ JOIN pages p_src ON p_src.id = l.source_id
105
+ JOIN domains d ON d.id = p_src.domain_id
106
+ JOIN pages p_target ON p_target.url = l.target_url
107
+ WHERE l.is_internal = 1
108
+ AND d.project = ?
109
+ AND d.role = 'target'
110
+ GROUP BY p_target.id
111
+ `).all(project);
112
+
113
+ const inboundMap = new Map(inboundRaw.map(r => [r.page_id, r.inbound_count]));
114
+
115
+ return { nodes, links, inboundMap };
116
+ }
117
+
118
+ // ── Node Enrichment ─────────────────────────────────────────────────────────
119
+
120
+ function enrichNodes(nodes, inboundMap) {
121
+ let issues = 0;
122
+ let opportunities = 0;
123
+ let noindex = 0;
124
+ let orphans = 0;
125
+
126
+ for (const n of nodes) {
127
+ n.inbound_count = inboundMap.get(n.id) || 0;
128
+
129
+ // Compute URL path + subdomain group for display + clustering
130
+ try {
131
+ const u = new URL(n.url);
132
+ n.path = u.pathname;
133
+ n.hostname = u.hostname;
134
+ // Group: subdomain + first path segment (e.g. "docs.example.com/docs")
135
+ const segs = u.pathname.split('/').filter(Boolean);
136
+ n.subdomain = u.hostname.split('.').length > 2 ? u.hostname.split('.')[0] : 'www';
137
+ n.pathGroup = segs[0] || '(root)';
138
+ n.clusterKey = n.subdomain + '/' + n.pathGroup;
139
+ } catch {
140
+ n.path = n.url;
141
+ n.hostname = '';
142
+ n.subdomain = 'www';
143
+ n.pathGroup = '(root)';
144
+ n.clusterKey = 'unknown';
145
+ }
146
+
147
+ // Parse entities if string
148
+ if (typeof n.primary_entities === 'string') {
149
+ try { n.primary_entities = JSON.parse(n.primary_entities); } catch { n.primary_entities = []; }
150
+ }
151
+ if (!Array.isArray(n.primary_entities)) n.primary_entities = [];
152
+
153
+ // Detect issues
154
+ n.issues = [];
155
+ if (!n.title) n.issues.push('Missing title');
156
+ if (!n.h1) n.issues.push('Missing H1');
157
+ if (!n.meta_desc) n.issues.push('Missing meta description');
158
+ if (n.status_code && n.status_code >= 400) n.issues.push(`HTTP ${n.status_code}`);
159
+ if (n.word_count !== null && n.word_count < 100) n.issues.push('Thin content');
160
+
161
+ // Categorize
162
+ if (!n.is_indexable) {
163
+ n.color_category = 'noindex';
164
+ noindex++;
165
+ } else if (n.issues.length > 0) {
166
+ n.color_category = 'issue';
167
+ issues++;
168
+ } else if (n.keyword_count > 3 && n.inbound_count < 2) {
169
+ n.color_category = 'opportunity';
170
+ opportunities++;
171
+ } else {
172
+ n.color_category = 'normal';
173
+ }
174
+
175
+ if (n.inbound_count === 0 && n.click_depth > 0) {
176
+ orphans++;
177
+ }
178
+
179
+ // Radius: sqrt(inbound + 1) * 4, clamped 4–24
180
+ n.radius = Math.max(4, Math.min(24, Math.sqrt(n.inbound_count + 1) * 4));
181
+ }
182
+
183
+ return {
184
+ stats: {
185
+ total_nodes: nodes.length,
186
+ total_edges: 0, // filled in caller
187
+ issues,
188
+ opportunities,
189
+ noindex,
190
+ orphans,
191
+ },
192
+ };
193
+ }
194
+
195
+ // ── D3 Bundle ───────────────────────────────────────────────────────────────
196
+
197
+ async function fetchOrReadD3() {
198
+ if (existsSync(D3_CACHE)) {
199
+ return readFileSync(D3_CACHE, 'utf8');
200
+ }
201
+
202
+ console.log(' Downloading D3.js v7 (one-time, ~280KB)...');
203
+ const res = await fetch(D3_CDN);
204
+ if (!res.ok) throw new Error(`Failed to download D3: ${res.status}`);
205
+ const src = await res.text();
206
+ writeFileSync(D3_CACHE, src, 'utf8');
207
+ return src;
208
+ }
209
+
210
+ // ── HTML Template ───────────────────────────────────────────────────────────
211
+
212
+ function buildSiteGraphHtml(data, d3src) {
213
+ return `<!DOCTYPE html>
214
+ <html lang="en">
215
+ <head>
216
+ <meta charset="UTF-8">
217
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
218
+ <title>Site Graph — ${data.project}</title>
219
+ <style>
220
+ :root {
221
+ --bg-primary: #0a0a0a;
222
+ --bg-card: #111111;
223
+ --bg-elevated: #161616;
224
+ --text-primary: #e8e8e8;
225
+ --text-muted: #888888;
226
+ --text-dim: #666666;
227
+ --accent-gold: #e8d5a3;
228
+ --color-normal: #6ba3c7;
229
+ --color-opportunity: #8ecba8;
230
+ --color-issue: #d98e8e;
231
+ --color-noindex: #444444;
232
+ --color-orphan: #c79b6b;
233
+ --sidebar-width: 340px;
234
+ /* Subdomain cluster ring colors */
235
+ --cluster-www: #6ba3c7;
236
+ --cluster-docs: #a78bfa;
237
+ --cluster-blog: #8ecba8;
238
+ --cluster-app: #f59e0b;
239
+ --cluster-api: #ec4899;
240
+ --cluster-other: #888;
241
+ }
242
+
243
+ * { margin: 0; padding: 0; box-sizing: border-box; }
244
+
245
+ body {
246
+ background: var(--bg-primary);
247
+ color: var(--text-primary);
248
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
249
+ overflow: hidden;
250
+ height: 100vh;
251
+ }
252
+
253
+ /* ── Toolbar ── */
254
+ .toolbar {
255
+ position: fixed;
256
+ top: 0; left: 0; right: 0;
257
+ height: 48px;
258
+ background: var(--bg-card);
259
+ border-bottom: 1px solid #222;
260
+ display: flex;
261
+ align-items: center;
262
+ padding: 0 16px;
263
+ gap: 12px;
264
+ z-index: 100;
265
+ }
266
+
267
+ .toolbar .project-label {
268
+ font-weight: 600;
269
+ font-size: 14px;
270
+ color: var(--accent-gold);
271
+ white-space: nowrap;
272
+ }
273
+
274
+ .toolbar .divider {
275
+ width: 1px;
276
+ height: 24px;
277
+ background: #333;
278
+ }
279
+
280
+ .filter-pills {
281
+ display: flex;
282
+ gap: 6px;
283
+ }
284
+
285
+ .pill {
286
+ padding: 4px 10px;
287
+ border-radius: 12px;
288
+ font-size: 11px;
289
+ cursor: pointer;
290
+ border: 1px solid #333;
291
+ background: transparent;
292
+ color: var(--text-muted);
293
+ transition: all 0.15s;
294
+ white-space: nowrap;
295
+ }
296
+ .pill:hover { border-color: #555; color: var(--text-primary); }
297
+ .pill.active { background: #222; color: var(--text-primary); border-color: #555; }
298
+ .pill .count { opacity: 0.5; margin-left: 3px; }
299
+
300
+ .search-box {
301
+ margin-left: auto;
302
+ padding: 5px 10px;
303
+ border-radius: 6px;
304
+ border: 1px solid #333;
305
+ background: var(--bg-primary);
306
+ color: var(--text-primary);
307
+ font-size: 12px;
308
+ width: 180px;
309
+ outline: none;
310
+ }
311
+ .search-box:focus { border-color: var(--accent-gold); }
312
+ .search-box::placeholder { color: #555; }
313
+
314
+ .depth-control {
315
+ display: flex;
316
+ align-items: center;
317
+ gap: 6px;
318
+ font-size: 11px;
319
+ color: var(--text-muted);
320
+ }
321
+ .depth-control input[type="range"] {
322
+ width: 60px;
323
+ accent-color: var(--accent-gold);
324
+ }
325
+
326
+ .stats-bar {
327
+ font-size: 11px;
328
+ color: var(--text-muted);
329
+ white-space: nowrap;
330
+ }
331
+
332
+ /* ── SVG Canvas ── */
333
+ .graph-container {
334
+ position: fixed;
335
+ top: 48px; left: 0; right: 0; bottom: 0;
336
+ }
337
+
338
+ svg {
339
+ width: 100%;
340
+ height: 100%;
341
+ cursor: grab;
342
+ }
343
+ svg:active { cursor: grabbing; }
344
+
345
+ .link {
346
+ stroke: #1e1e1e;
347
+ stroke-width: 0.5;
348
+ stroke-opacity: 0.6;
349
+ }
350
+ .link.highlighted {
351
+ stroke: var(--accent-gold);
352
+ stroke-width: 1.5;
353
+ stroke-opacity: 1;
354
+ }
355
+ .link.dimmed { stroke-opacity: 0.08; }
356
+
357
+ .node {
358
+ cursor: pointer;
359
+ stroke: #000;
360
+ stroke-width: 0.5;
361
+ transition: opacity 0.2s;
362
+ }
363
+ .node:hover {
364
+ stroke: var(--accent-gold);
365
+ stroke-width: 2;
366
+ }
367
+ .node.selected {
368
+ stroke: #fff;
369
+ stroke-width: 2.5;
370
+ }
371
+ .node.dimmed { opacity: 0.1; }
372
+ .node.filtered-out { opacity: 0.05; pointer-events: none; }
373
+
374
+ .node-ring {
375
+ fill: none;
376
+ stroke-width: 1.5;
377
+ stroke-opacity: 0.4;
378
+ pointer-events: none;
379
+ }
380
+
381
+ .node-label {
382
+ font-size: 10px;
383
+ fill: var(--text-dim);
384
+ pointer-events: none;
385
+ text-anchor: middle;
386
+ dominant-baseline: central;
387
+ opacity: 0.55;
388
+ transition: opacity 0.3s;
389
+ font-weight: 400;
390
+ }
391
+ .node-label.zoomed-far { opacity: 0; font-size: 10px; }
392
+ .node-label.zoomed-mid { opacity: 0.45; font-size: 10px; }
393
+ .node-label.zoomed-close { opacity: 0.75; font-size: 10px; }
394
+ .node-label.hub-label { font-weight: 600; opacity: 0.7; }
395
+
396
+ /* ── Sidebar ── */
397
+ .sidebar {
398
+ position: fixed;
399
+ top: 48px;
400
+ right: 0;
401
+ bottom: 0;
402
+ width: var(--sidebar-width);
403
+ background: var(--bg-card);
404
+ border-left: 1px solid #222;
405
+ transform: translateX(100%);
406
+ transition: transform 0.2s ease;
407
+ overflow-y: auto;
408
+ z-index: 50;
409
+ padding: 20px 16px;
410
+ }
411
+ .sidebar.open { transform: translateX(0); }
412
+
413
+ .sidebar .close-btn {
414
+ position: absolute;
415
+ top: 12px; right: 12px;
416
+ background: none; border: none;
417
+ color: var(--text-muted);
418
+ cursor: pointer;
419
+ font-size: 18px;
420
+ }
421
+
422
+ .sidebar h3 {
423
+ font-size: 13px;
424
+ font-weight: 600;
425
+ margin-bottom: 4px;
426
+ word-break: break-all;
427
+ }
428
+
429
+ .sidebar .url-display {
430
+ font-size: 11px;
431
+ color: var(--text-muted);
432
+ word-break: break-all;
433
+ margin-bottom: 12px;
434
+ }
435
+
436
+ .sidebar .role-badge {
437
+ display: inline-block;
438
+ padding: 2px 8px;
439
+ border-radius: 8px;
440
+ font-size: 10px;
441
+ font-weight: 600;
442
+ margin-bottom: 12px;
443
+ }
444
+ .role-badge.target { background: #1a3a2a; color: var(--color-opportunity); }
445
+ .role-badge.competitor { background: #3a1a1a; color: var(--color-issue); }
446
+
447
+ .sidebar .section {
448
+ border-top: 1px solid #222;
449
+ padding: 10px 0;
450
+ }
451
+
452
+ .sidebar .meta-grid {
453
+ display: grid;
454
+ grid-template-columns: 1fr 1fr;
455
+ gap: 6px;
456
+ font-size: 11px;
457
+ }
458
+ .meta-grid .label { color: var(--text-muted); }
459
+ .meta-grid .value { color: var(--text-primary); }
460
+
461
+ .sidebar .entity-tags {
462
+ display: flex;
463
+ flex-wrap: wrap;
464
+ gap: 4px;
465
+ margin-top: 4px;
466
+ }
467
+ .entity-tag {
468
+ padding: 2px 8px;
469
+ background: #1a1a2a;
470
+ border-radius: 6px;
471
+ font-size: 10px;
472
+ color: var(--color-normal);
473
+ }
474
+
475
+ .sidebar .issue-list {
476
+ list-style: none;
477
+ padding: 0;
478
+ }
479
+ .issue-list li {
480
+ padding: 3px 0;
481
+ font-size: 11px;
482
+ color: var(--color-issue);
483
+ }
484
+ .issue-list li::before { content: '\\26A0 '; }
485
+
486
+ .sidebar .open-link {
487
+ display: inline-block;
488
+ margin-top: 8px;
489
+ color: var(--accent-gold);
490
+ font-size: 11px;
491
+ text-decoration: none;
492
+ }
493
+
494
+ /* ── Legend ── */
495
+ .legend {
496
+ position: fixed;
497
+ bottom: 16px;
498
+ left: 16px;
499
+ display: flex;
500
+ gap: 14px;
501
+ font-size: 10px;
502
+ color: var(--text-muted);
503
+ z-index: 10;
504
+ }
505
+ .legend-item {
506
+ display: flex;
507
+ align-items: center;
508
+ gap: 4px;
509
+ }
510
+ .legend-dot {
511
+ width: 8px;
512
+ height: 8px;
513
+ border-radius: 50%;
514
+ }
515
+
516
+ /* ── Empty state ── */
517
+ .empty-state {
518
+ position: fixed;
519
+ top: 50%; left: 50%;
520
+ transform: translate(-50%, -50%);
521
+ text-align: center;
522
+ color: var(--text-muted);
523
+ font-size: 14px;
524
+ }
525
+ .empty-state h2 { font-size: 18px; margin-bottom: 8px; color: var(--text-primary); }
526
+ </style>
527
+ </head>
528
+ <body>
529
+
530
+ <div class="toolbar">
531
+ <span class="project-label">${data.project}</span>
532
+ <div class="divider"></div>
533
+ <div class="filter-pills">
534
+ <button class="pill active" data-filter="all">All <span class="count">${data.stats.total_nodes}</span></button>
535
+ <button class="pill" data-filter="issue" style="border-color: var(--color-issue)">Issues <span class="count">${data.stats.issues}</span></button>
536
+ <button class="pill" data-filter="opportunity" style="border-color: var(--color-opportunity)">Opportunities <span class="count">${data.stats.opportunities}</span></button>
537
+ <button class="pill" data-filter="noindex">No-index <span class="count">${data.stats.noindex}</span></button>
538
+ <button class="pill" data-filter="orphan" style="border-color: var(--color-orphan)">Orphans <span class="count">${data.stats.orphans}</span></button>
539
+ </div>
540
+ <div class="depth-control">
541
+ <label>Depth</label>
542
+ <input type="range" id="depthSlider" min="0" max="8" value="8">
543
+ <span id="depthValue">all</span>
544
+ </div>
545
+ <input class="search-box" type="text" placeholder="Search pages..." id="searchBox">
546
+ <div class="stats-bar">
547
+ <span id="visibleCount">${data.stats.total_nodes}</span> nodes · <span id="edgeCount">${data.stats.total_edges}</span> edges
548
+ </div>
549
+ </div>
550
+
551
+ <div class="graph-container">
552
+ <svg id="graph"></svg>
553
+ </div>
554
+
555
+ <div class="sidebar" id="sidebar">
556
+ <button class="close-btn" onclick="closeSidebar()">&times;</button>
557
+ <div id="sidebarContent"></div>
558
+ </div>
559
+
560
+ <div class="legend">
561
+ <div class="legend-item"><div class="legend-dot" style="background:var(--color-normal)"></div> Normal</div>
562
+ <div class="legend-item"><div class="legend-dot" style="background:var(--color-opportunity)"></div> Opportunity</div>
563
+ <div class="legend-item"><div class="legend-dot" style="background:var(--color-issue)"></div> Issue</div>
564
+ <div class="legend-item"><div class="legend-dot" style="background:var(--color-noindex)"></div> No-index</div>
565
+ <span style="color:#333;margin:0 6px">│</span>
566
+ <div class="legend-item"><div class="legend-dot" style="background:var(--cluster-www);opacity:0.5;border:1.5px solid var(--cluster-www)"></div> www</div>
567
+ <div class="legend-item"><div class="legend-dot" style="background:var(--cluster-docs);opacity:0.5;border:1.5px solid var(--cluster-docs)"></div> docs</div>
568
+ <div class="legend-item"><div class="legend-dot" style="background:var(--cluster-blog);opacity:0.5;border:1.5px solid var(--cluster-blog)"></div> blog</div>
569
+ <div class="legend-item"><div class="legend-dot" style="background:var(--cluster-app);opacity:0.5;border:1.5px solid var(--cluster-app)"></div> app</div>
570
+ <div class="legend-item"><div class="legend-dot" style="background:var(--cluster-api);opacity:0.5;border:1.5px solid var(--cluster-api)"></div> api</div>
571
+ </div>
572
+
573
+ ${data.stats.total_edges === 0 ? `
574
+ <div class="empty-state">
575
+ <h2>No internal links found</h2>
576
+ <p>Run a crawl with link extraction first:<br><code>seo-intel crawl ${data.project}</code></p>
577
+ </div>
578
+ ` : ''}
579
+
580
+ <script>
581
+ // ── D3.js v7 (inlined) ──
582
+ ${d3src}
583
+ </script>
584
+
585
+ <script>
586
+ // ── Graph Data ──
587
+ const GRAPH_DATA = ${JSON.stringify(data)};
588
+
589
+ // ── Color Map ──
590
+ const COLOR_MAP = {
591
+ normal: '${cssVar('--color-normal', '#6ba3c7')}',
592
+ opportunity: '${cssVar('--color-opportunity', '#8ecba8')}',
593
+ issue: '${cssVar('--color-issue', '#d98e8e')}',
594
+ noindex: '${cssVar('--color-noindex', '#444444')}',
595
+ };
596
+
597
+ // ── State ──
598
+ let currentFilter = 'all';
599
+ let currentSearch = '';
600
+ let currentDepth = 99;
601
+ let selectedNodeId = null;
602
+ let simulation, svgG, nodeEls, linkEls, labelEls;
603
+
604
+ // ── Init ──
605
+ function initGraph() {
606
+ const svg = d3.select('#graph');
607
+ const container = document.querySelector('.graph-container');
608
+ const width = container.clientWidth;
609
+ const height = container.clientHeight;
610
+
611
+ const nodes = GRAPH_DATA.nodes.map(d => ({ ...d }));
612
+ const links = GRAPH_DATA.links.map(d => ({ ...d }));
613
+
614
+ if (nodes.length === 0) return;
615
+
616
+ // Subdomain cluster colors
617
+ const CLUSTER_COLORS = {
618
+ www: 'var(--cluster-www)',
619
+ docs: 'var(--cluster-docs)',
620
+ blog: 'var(--cluster-blog)',
621
+ app: 'var(--cluster-app)',
622
+ api: 'var(--cluster-api)',
623
+ };
624
+ function clusterColor(subdomain) {
625
+ return CLUSTER_COLORS[subdomain] || 'var(--cluster-other)';
626
+ }
627
+
628
+ // Compute cluster centers for subdomain grouping
629
+ const clusterKeys = [...new Set(nodes.map(n => n.subdomain))];
630
+ const clusterCenters = {};
631
+ const angleStep = (2 * Math.PI) / Math.max(clusterKeys.length, 1);
632
+ const clusterRadius = Math.min(width, height) * 0.2;
633
+ clusterKeys.forEach((k, i) => {
634
+ clusterCenters[k] = {
635
+ x: width / 2 + Math.cos(angleStep * i) * clusterRadius,
636
+ y: height / 2 + Math.sin(angleStep * i) * clusterRadius,
637
+ };
638
+ });
639
+
640
+ // Zoom — Obsidian-style: labels appear progressively
641
+ let currentZoomK = 1;
642
+ const zoom = d3.zoom()
643
+ .scaleExtent([0.05, 12])
644
+ .on('zoom', (event) => {
645
+ svgG.attr('transform', event.transform);
646
+ currentZoomK = event.transform.k;
647
+
648
+ // Progressive label visibility based on zoom + node importance
649
+ labelEls
650
+ .classed('zoomed-far', currentZoomK < 0.5)
651
+ .classed('zoomed-mid', currentZoomK >= 0.5 && currentZoomK < 1.5)
652
+ .classed('zoomed-close', currentZoomK >= 1.5);
653
+
654
+ // Hub labels (high link count) visible earlier
655
+ labelEls.classed('hub-label', d => d.inbound >= 5 || d.radius >= 8);
656
+
657
+ // Scale label font inversely so they stay readable at any zoom
658
+ const labelScale = Math.max(0.6, Math.min(2.0, 1.0 / currentZoomK));
659
+ labelEls.style('font-size', (10 * labelScale) + 'px');
660
+ });
661
+ svg.call(zoom);
662
+
663
+ svgG = svg.append('g');
664
+
665
+ // Links
666
+ linkEls = svgG.append('g')
667
+ .selectAll('line')
668
+ .data(links)
669
+ .join('line')
670
+ .attr('class', 'link');
671
+
672
+ // Subdomain rings (outer ring showing cluster membership)
673
+ const ringEls = svgG.append('g')
674
+ .selectAll('circle')
675
+ .data(nodes)
676
+ .join('circle')
677
+ .attr('class', 'node-ring')
678
+ .attr('r', d => d.radius + 3)
679
+ .attr('stroke', d => clusterColor(d.subdomain));
680
+
681
+ // Nodes
682
+ nodeEls = svgG.append('g')
683
+ .selectAll('circle')
684
+ .data(nodes)
685
+ .join('circle')
686
+ .attr('class', 'node')
687
+ .attr('r', d => d.radius)
688
+ .attr('fill', d => COLOR_MAP[d.color_category] || COLOR_MAP.normal)
689
+ .call(drag())
690
+ .on('click', (event, d) => selectNode(d));
691
+
692
+ // Labels — show short title or path slug, Obsidian-style
693
+ function nodeLabel(d) {
694
+ // Prefer a short title if extracted
695
+ if (d.title && d.title.length > 0) {
696
+ const t = d.title.split('|')[0].split('—')[0].split('-')[0].trim();
697
+ return t.length > 30 ? t.slice(0, 28) + '…' : t;
698
+ }
699
+ // Fall back to last path segment
700
+ const slug = d.path.replace(/\\/$/, '').split('/').pop() || d.path;
701
+ return slug.length > 25 ? slug.slice(0, 23) + '…' : slug;
702
+ }
703
+ labelEls = svgG.append('g')
704
+ .selectAll('text')
705
+ .data(nodes)
706
+ .join('text')
707
+ .attr('class', 'node-label')
708
+ .text(d => nodeLabel(d));
709
+
710
+ // Simulation — heavy nodes, subdomain clustering, settles fast
711
+ simulation = d3.forceSimulation(nodes)
712
+ .force('link', d3.forceLink(links).id(d => d.id).distance(90).strength(0.08))
713
+ .force('charge', d3.forceManyBody().strength(-180).distanceMax(500))
714
+ .force('center', d3.forceCenter(width / 2, height / 2).strength(0.08))
715
+ .force('collide', d3.forceCollide().radius(d => d.radius + 12).strength(0.8).iterations(2))
716
+ // Cluster pull: nodes drift toward their subdomain's center
717
+ .force('clusterX', d3.forceX(d => clusterCenters[d.subdomain]?.x || width / 2).strength(0.04))
718
+ .force('clusterY', d3.forceY(d => clusterCenters[d.subdomain]?.y || height / 2).strength(0.04))
719
+ .alphaDecay(0.04)
720
+ .velocityDecay(0.78)
721
+ .on('tick', () => {
722
+ linkEls
723
+ .attr('x1', d => d.source.x).attr('y1', d => d.source.y)
724
+ .attr('x2', d => d.target.x).attr('y2', d => d.target.y);
725
+ nodeEls.attr('cx', d => d.x).attr('cy', d => d.y);
726
+ ringEls.attr('cx', d => d.x).attr('cy', d => d.y);
727
+ labelEls.attr('x', d => d.x).attr('y', d => d.y - d.radius - 6);
728
+ });
729
+
730
+ // Initial zoom to fit — wait for simulation to settle
731
+ setTimeout(() => {
732
+ const bounds = svgG.node().getBBox();
733
+ if (bounds.width === 0) return;
734
+ const pad = 80;
735
+ const scale = Math.min(
736
+ width / (bounds.width + pad * 2),
737
+ height / (bounds.height + pad * 2),
738
+ 1.2
739
+ );
740
+ const tx = (width - bounds.width * scale) / 2 - bounds.x * scale;
741
+ const ty = (height - bounds.height * scale) / 2 - bounds.y * scale;
742
+ svg.transition().duration(1200).ease(d3.easeCubicOut)
743
+ .call(zoom.transform, d3.zoomIdentity.translate(tx, ty).scale(scale));
744
+ }, 1500);
745
+ }
746
+
747
+ // ── Drag — gentle reheat so neighbors don't go flying ──
748
+ function drag() {
749
+ return d3.drag()
750
+ .on('start', (event, d) => {
751
+ if (!event.active) simulation.alphaTarget(0.08).restart();
752
+ d.fx = d.x; d.fy = d.y;
753
+ })
754
+ .on('drag', (event, d) => {
755
+ d.fx = event.x; d.fy = event.y;
756
+ })
757
+ .on('end', (event, d) => {
758
+ if (!event.active) simulation.alphaTarget(0);
759
+ d.fx = null; d.fy = null;
760
+ });
761
+ }
762
+
763
+ // ── Node Selection ──
764
+ function selectNode(node) {
765
+ selectedNodeId = node.id;
766
+
767
+ // Highlight
768
+ nodeEls.classed('selected', d => d.id === node.id);
769
+ nodeEls.classed('dimmed', d => {
770
+ if (d.id === node.id) return false;
771
+ const connected = GRAPH_DATA.links.some(l =>
772
+ (l.source === node.id && l.target === d.id) ||
773
+ (l.target === node.id && l.source === d.id) ||
774
+ (l.source.id === node.id && l.target.id === d.id) ||
775
+ (l.target.id === node.id && l.source.id === d.id)
776
+ );
777
+ return !connected;
778
+ });
779
+ linkEls.classed('highlighted', d => {
780
+ const sid = typeof d.source === 'object' ? d.source.id : d.source;
781
+ const tid = typeof d.target === 'object' ? d.target.id : d.target;
782
+ return sid === node.id || tid === node.id;
783
+ });
784
+ linkEls.classed('dimmed', d => {
785
+ const sid = typeof d.source === 'object' ? d.source.id : d.source;
786
+ const tid = typeof d.target === 'object' ? d.target.id : d.target;
787
+ return sid !== node.id && tid !== node.id;
788
+ });
789
+
790
+ // Sidebar
791
+ const sb = document.getElementById('sidebarContent');
792
+ const roleClass = node.role === 'target' ? 'target' : 'competitor';
793
+ const entities = (node.primary_entities || []).map(e =>
794
+ '<span class="entity-tag">' + esc(e) + '</span>'
795
+ ).join('');
796
+ const issues = (node.issues || []).map(i =>
797
+ '<li>' + esc(i) + '</li>'
798
+ ).join('');
799
+
800
+ sb.innerHTML = \`
801
+ <h3>\${esc(node.title || node.path)}</h3>
802
+ <div class="url-display">\${esc(node.url)}</div>
803
+ <span class="role-badge \${roleClass}">\${node.role}</span>
804
+ <span class="role-badge" style="background:#1a1a2a;color:var(--color-normal);margin-left:4px">depth \${node.click_depth}</span>
805
+ <span class="role-badge" style="background:#1a1a2a;color:\${clusterColor(node.subdomain)};margin-left:4px">\${node.hostname || '—'}</span>
806
+
807
+ <div class="section">
808
+ <div class="meta-grid">
809
+ <span class="label">Status</span><span class="value">\${node.status_code || '—'}</span>
810
+ <span class="label">Words</span><span class="value">\${node.word_count || '—'}</span>
811
+ <span class="label">Links in</span><span class="value">\${node.inbound_count}</span>
812
+ <span class="label">Keywords</span><span class="value">\${node.keyword_count}</span>
813
+ <span class="label">Indexable</span><span class="value">\${node.is_indexable ? 'Yes' : 'No'}</span>
814
+ <span class="label">Intent</span><span class="value">\${node.search_intent || '—'}</span>
815
+ <span class="label">Subdomain</span><span class="value" style="color:\${clusterColor(node.subdomain)}">\${node.subdomain || '—'}</span>
816
+ <span class="label">Path group</span><span class="value">\${node.pathGroup || '—'}</span>
817
+ </div>
818
+ </div>
819
+
820
+ \${node.h1 ? '<div class="section"><div class="label" style="font-size:10px;color:var(--text-muted);margin-bottom:2px">H1</div><div style="font-size:12px">' + esc(node.h1) + '</div></div>' : ''}
821
+ \${node.meta_desc ? '<div class="section"><div class="label" style="font-size:10px;color:var(--text-muted);margin-bottom:2px">Meta</div><div style="font-size:11px;color:var(--text-muted)">' + esc(node.meta_desc.slice(0, 160)) + '</div></div>' : ''}
822
+
823
+ \${entities ? '<div class="section"><div class="label" style="font-size:10px;color:var(--text-muted);margin-bottom:4px">Entities</div><div class="entity-tags">' + entities + '</div></div>' : ''}
824
+
825
+ \${issues ? '<div class="section"><div class="label" style="font-size:10px;color:var(--text-muted);margin-bottom:4px">Issues</div><ul class="issue-list">' + issues + '</ul></div>' : ''}
826
+
827
+ <a class="open-link" href="\${esc(node.url)}" target="_blank">Open in browser &rarr;</a>
828
+ \`;
829
+
830
+ document.getElementById('sidebar').classList.add('open');
831
+ }
832
+
833
+ function closeSidebar() {
834
+ selectedNodeId = null;
835
+ document.getElementById('sidebar').classList.remove('open');
836
+ nodeEls.classed('selected', false).classed('dimmed', false);
837
+ linkEls.classed('highlighted', false).classed('dimmed', false);
838
+ }
839
+
840
+ function esc(s) {
841
+ if (!s) return '';
842
+ return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;');
843
+ }
844
+
845
+ // ── Filters ──
846
+ document.querySelectorAll('.pill').forEach(pill => {
847
+ pill.addEventListener('click', () => {
848
+ document.querySelectorAll('.pill').forEach(p => p.classList.remove('active'));
849
+ pill.classList.add('active');
850
+ currentFilter = pill.dataset.filter;
851
+ applyFilters();
852
+ });
853
+ });
854
+
855
+ document.getElementById('searchBox').addEventListener('input', (e) => {
856
+ currentSearch = e.target.value.toLowerCase();
857
+ applyFilters();
858
+ });
859
+
860
+ document.getElementById('depthSlider').addEventListener('input', (e) => {
861
+ const val = parseInt(e.target.value);
862
+ currentDepth = val >= 8 ? 99 : val;
863
+ document.getElementById('depthValue').textContent = val >= 8 ? 'all' : val;
864
+ applyFilters();
865
+ });
866
+
867
+ function applyFilters() {
868
+ let visible = 0;
869
+
870
+ nodeEls.classed('filtered-out', d => {
871
+ let show = true;
872
+
873
+ // Category filter
874
+ if (currentFilter !== 'all') {
875
+ if (currentFilter === 'orphan') {
876
+ show = d.inbound_count === 0 && d.click_depth > 0;
877
+ } else {
878
+ show = d.color_category === currentFilter;
879
+ }
880
+ }
881
+
882
+ // Depth filter
883
+ if (show && currentDepth < 99) {
884
+ show = d.click_depth <= currentDepth;
885
+ }
886
+
887
+ // Search filter
888
+ if (show && currentSearch) {
889
+ const hay = (d.path + ' ' + (d.title || '') + ' ' + (d.h1 || '')).toLowerCase();
890
+ show = hay.includes(currentSearch);
891
+ }
892
+
893
+ if (show) visible++;
894
+ return !show;
895
+ });
896
+
897
+ // Dim links whose source or target is filtered out
898
+ const visibleIds = new Set();
899
+ nodeEls.each(function(d) {
900
+ if (!d3.select(this).classed('filtered-out')) visibleIds.add(d.id);
901
+ });
902
+ linkEls.attr('stroke-opacity', d => {
903
+ const sid = typeof d.source === 'object' ? d.source.id : d.source;
904
+ const tid = typeof d.target === 'object' ? d.target.id : d.target;
905
+ return visibleIds.has(sid) && visibleIds.has(tid) ? 0.6 : 0.02;
906
+ });
907
+
908
+ document.getElementById('visibleCount').textContent = visible;
909
+ }
910
+
911
+ // ── Start ──
912
+ if (GRAPH_DATA.nodes.length > 0) initGraph();
913
+ </script>
914
+ </body>
915
+ </html>`;
916
+ }
917
+
918
+ function cssVar(name, fallback) { return fallback; }
919
+
920
+ // ── Main Export ─────────────────────────────────────────────────────────────
921
+
922
+ export async function generateSiteGraphHtml(db, project, config = {}) {
923
+ const maxDepth = config.maxDepth || 99;
924
+
925
+ // Query data
926
+ const { nodes, links, inboundMap } = querySiteGraphData(db, project, maxDepth);
927
+ const { stats } = enrichNodes(nodes, inboundMap);
928
+ stats.total_edges = links.length;
929
+
930
+ // Get D3
931
+ const d3src = await fetchOrReadD3();
932
+
933
+ // Build HTML
934
+ const data = {
935
+ project,
936
+ generated: Date.now(),
937
+ nodes,
938
+ links,
939
+ stats,
940
+ };
941
+
942
+ const html = buildSiteGraphHtml(data, d3src);
943
+
944
+ // Write file
945
+ const outPath = join(__dirname, `${project}-site-graph.html`);
946
+ writeFileSync(outPath, html, 'utf8');
947
+
948
+ return outPath;
949
+ }