seo-intel 1.4.1 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,35 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.4.3 (2026-04-07)
4
+
5
+ ### Dashboard: Export & Download
6
+ - Per-card download buttons (Markdown, JSON, CSV) on every dashboard card
7
+ - "Download All Reports (ZIP)" in export sidebar
8
+ - New `/api/export/download` endpoint with section filtering
9
+
10
+ ### Improvements
11
+ - GSC data loader picks most recently modified folder (fixes stale folder selection)
12
+ - Report filenames use `YYYY-MM-DD` dates instead of Unix timestamps
13
+ - Setup wizard: multi-host Ollama support (`OLLAMA_HOSTS` env var)
14
+ - Skill file and Agent Guide updated with `watch`, `blog-draft`, and export features
15
+
16
+ ### Cleanup
17
+ - Removed deprecated agentic setup banner from wizard
18
+ - Consolidated Agent Guide into `skill/` directory
19
+
20
+ ## 1.4.2 (2026-04-05)
21
+
22
+ ### New Feature: Site Watch
23
+ - `seo-intel watch <project>` — detect changes between crawl runs and track site health
24
+ - Health Score (0-100) based on page errors, missing titles, missing H1s
25
+ - Diff engine detects 10 event types: new/removed pages, status changes, title/H1/meta changes, word count shifts, indexability flips, content updates
26
+ - Events classified by severity: critical, warning, notice — with trend arrows
27
+ - Auto-runs after every crawl with a one-liner summary
28
+ - Dashboard card: health score gauge, severity counts with deltas, "What's New" event table
29
+ - Significant changes (critical/warning) feed into Intelligence Ledger as `site_watch` insights
30
+ - Available via CLI, dashboard terminal, and froggo.js API
31
+ - Free tier — no license required
32
+
3
33
  ## 1.4.1 (2026-04-03)
4
34
 
5
35
  ### Fixes
@@ -0,0 +1,158 @@
1
+ /**
2
+ * Site Watch — Diff Engine
3
+ *
4
+ * Pure function: compares two page snapshots and returns change events.
5
+ * Zero I/O, zero side effects — deterministic and testable.
6
+ */
7
+
8
+ /**
9
+ * @param {object[]} currentPages - { url, status_code, title, h1, meta_desc, word_count, is_indexable, content_hash }
10
+ * @param {object[]} previousPages - same shape, from previous snapshot
11
+ * @returns {object[]} Array of { event_type, severity, url, old_value, new_value, details }
12
+ */
13
+ export function diffPages(currentPages, previousPages) {
14
+ const events = [];
15
+
16
+ const currentMap = new Map(currentPages.map(p => [p.url, p]));
17
+ const previousMap = new Map(previousPages.map(p => [p.url, p]));
18
+
19
+ // ── Pages added ──────────────────────────────────────────────────────────
20
+ for (const [url, page] of currentMap) {
21
+ if (!previousMap.has(url)) {
22
+ events.push({
23
+ event_type: 'page_added',
24
+ severity: 'notice',
25
+ url,
26
+ old_value: null,
27
+ new_value: String(page.status_code || 200),
28
+ details: JSON.stringify({ title: page.title, word_count: page.word_count }),
29
+ });
30
+ }
31
+ }
32
+
33
+ // ── Pages removed ────────────────────────────────────────────────────────
34
+ for (const [url, page] of previousMap) {
35
+ if (!currentMap.has(url)) {
36
+ events.push({
37
+ event_type: 'page_removed',
38
+ severity: 'warning',
39
+ url,
40
+ old_value: String(page.status_code || 200),
41
+ new_value: null,
42
+ details: JSON.stringify({ title: page.title, word_count: page.word_count }),
43
+ });
44
+ }
45
+ }
46
+
47
+ // ── Per-page field comparisons ───────────────────────────────────────────
48
+ for (const [url, curr] of currentMap) {
49
+ const prev = previousMap.get(url);
50
+ if (!prev) continue;
51
+
52
+ // Status code change
53
+ if (curr.status_code !== prev.status_code) {
54
+ const isNewError = prev.status_code < 400 && curr.status_code >= 400;
55
+ const isRecovery = prev.status_code >= 400 && curr.status_code < 400;
56
+ const severity = isNewError ? 'critical'
57
+ : curr.status_code >= 400 ? 'critical'
58
+ : isRecovery ? 'notice'
59
+ : 'warning';
60
+
61
+ events.push({
62
+ event_type: isNewError ? 'new_error' : 'status_changed',
63
+ severity,
64
+ url,
65
+ old_value: String(prev.status_code),
66
+ new_value: String(curr.status_code),
67
+ details: null,
68
+ });
69
+ }
70
+
71
+ // Title change
72
+ if (normalise(curr.title) !== normalise(prev.title)) {
73
+ events.push({
74
+ event_type: 'title_changed',
75
+ severity: 'notice',
76
+ url,
77
+ old_value: prev.title || '',
78
+ new_value: curr.title || '',
79
+ details: null,
80
+ });
81
+ }
82
+
83
+ // H1 change
84
+ if (normalise(curr.h1) !== normalise(prev.h1)) {
85
+ events.push({
86
+ event_type: 'h1_changed',
87
+ severity: 'notice',
88
+ url,
89
+ old_value: prev.h1 || '',
90
+ new_value: curr.h1 || '',
91
+ details: null,
92
+ });
93
+ }
94
+
95
+ // Meta description change
96
+ if (normalise(curr.meta_desc) !== normalise(prev.meta_desc)) {
97
+ events.push({
98
+ event_type: 'meta_desc_changed',
99
+ severity: 'notice',
100
+ url,
101
+ old_value: prev.meta_desc || '',
102
+ new_value: curr.meta_desc || '',
103
+ details: null,
104
+ });
105
+ }
106
+
107
+ // Word count significant change (>20%)
108
+ const prevWc = prev.word_count || 0;
109
+ const currWc = curr.word_count || 0;
110
+ if (prevWc > 0 && Math.abs(currWc - prevWc) / prevWc > 0.2) {
111
+ events.push({
112
+ event_type: 'word_count_changed',
113
+ severity: 'notice',
114
+ url,
115
+ old_value: String(prevWc),
116
+ new_value: String(currWc),
117
+ details: JSON.stringify({ delta_pct: Math.round(((currWc - prevWc) / prevWc) * 100) }),
118
+ });
119
+ }
120
+
121
+ // Indexability change
122
+ const prevIdx = prev.is_indexable ? 1 : 0;
123
+ const currIdx = curr.is_indexable ? 1 : 0;
124
+ if (currIdx !== prevIdx) {
125
+ events.push({
126
+ event_type: 'indexability_changed',
127
+ severity: currIdx === 0 ? 'critical' : 'notice',
128
+ url,
129
+ old_value: prevIdx ? 'indexable' : 'non-indexable',
130
+ new_value: currIdx ? 'indexable' : 'non-indexable',
131
+ details: null,
132
+ });
133
+ }
134
+
135
+ // Content hash change (body text changed)
136
+ if (curr.content_hash && prev.content_hash && curr.content_hash !== prev.content_hash) {
137
+ events.push({
138
+ event_type: 'content_changed',
139
+ severity: 'notice',
140
+ url,
141
+ old_value: prev.content_hash?.slice(0, 8) || '',
142
+ new_value: curr.content_hash?.slice(0, 8) || '',
143
+ details: null,
144
+ });
145
+ }
146
+ }
147
+
148
+ // Sort: critical first, then warning, then notice
149
+ const severityOrder = { critical: 0, warning: 1, notice: 2 };
150
+ events.sort((a, b) => (severityOrder[a.severity] ?? 9) - (severityOrder[b.severity] ?? 9));
151
+
152
+ return events;
153
+ }
154
+
155
+ /** Normalise a string for comparison (null-safe, trimmed, lowercased). */
156
+ function normalise(s) {
157
+ return (s || '').trim().toLowerCase();
158
+ }
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Site Watch — Health Score Calculator
3
+ *
4
+ * Pure function: evaluates site health from current page data.
5
+ * Zero I/O, zero side effects.
6
+ */
7
+
8
+ /**
9
+ * @param {object[]} pages - { url, status_code, title, h1, meta_desc, word_count, is_indexable }
10
+ * @returns {object} { score, errors, warnings, notices, details }
11
+ */
12
+ export function calculateHealthScore(pages) {
13
+ if (!pages.length) return { score: 100, errors: 0, warnings: 0, notices: 0, details: [] };
14
+
15
+ const details = [];
16
+ let errorPages = 0;
17
+
18
+ // Track duplicates
19
+ const titleCounts = new Map();
20
+ for (const p of pages) {
21
+ if (p.title && p.status_code < 400) {
22
+ const t = p.title.trim().toLowerCase();
23
+ titleCounts.set(t, (titleCounts.get(t) || 0) + 1);
24
+ }
25
+ }
26
+
27
+ for (const p of pages) {
28
+ let hasError = false;
29
+
30
+ // ── Errors (reduce health score) ───────────────────────────────────────
31
+ if (p.status_code >= 400) {
32
+ details.push({ url: p.url, severity: 'error', issue: `${p.status_code} error` });
33
+ hasError = true;
34
+ }
35
+
36
+ if (p.status_code < 400 && (!p.title || !p.title.trim())) {
37
+ details.push({ url: p.url, severity: 'error', issue: 'Missing title' });
38
+ hasError = true;
39
+ }
40
+
41
+ if (p.status_code < 400 && (!p.h1 || !p.h1.trim())) {
42
+ details.push({ url: p.url, severity: 'error', issue: 'Missing H1' });
43
+ hasError = true;
44
+ }
45
+
46
+ if (hasError) errorPages++;
47
+
48
+ // ── Warnings (tracked, don't reduce score) ────────────────────────────
49
+ if (p.status_code >= 300 && p.status_code < 400) {
50
+ details.push({ url: p.url, severity: 'warning', issue: `${p.status_code} redirect` });
51
+ }
52
+
53
+ if (p.status_code < 400 && (!p.meta_desc || !p.meta_desc.trim())) {
54
+ details.push({ url: p.url, severity: 'warning', issue: 'Missing meta description' });
55
+ }
56
+
57
+ if (p.title && p.status_code < 400) {
58
+ const t = p.title.trim().toLowerCase();
59
+ if (titleCounts.get(t) > 1) {
60
+ details.push({ url: p.url, severity: 'warning', issue: 'Duplicate title' });
61
+ }
62
+ }
63
+
64
+ // ── Notices ────────────────────────────────────────────────────────────
65
+ if (p.status_code < 400 && (p.word_count || 0) < 100 && (p.word_count || 0) > 0) {
66
+ details.push({ url: p.url, severity: 'notice', issue: 'Thin content (<100 words)' });
67
+ }
68
+ }
69
+
70
+ const errors = details.filter(d => d.severity === 'error').length;
71
+ const warnings = details.filter(d => d.severity === 'warning').length;
72
+ const notices = details.filter(d => d.severity === 'notice').length;
73
+
74
+ // Health score = % of pages without errors
75
+ const score = Math.round(((pages.length - errorPages) / pages.length) * 100);
76
+
77
+ return { score, errors, warnings, notices, details };
78
+ }
@@ -0,0 +1,215 @@
1
+ /**
2
+ * Site Watch — Orchestrator
3
+ *
4
+ * Gathers current page state, diffs against previous snapshot,
5
+ * persists results, and feeds significant changes into the Intelligence Ledger.
6
+ */
7
+
8
+ import { diffPages } from './diff.js';
9
+ import { calculateHealthScore } from './health.js';
10
+ import {
11
+ getLatestWatchSnapshot,
12
+ getWatchPageStates,
13
+ getWatchEvents,
14
+ getWatchHistory,
15
+ } from '../../db/db.js';
16
+
17
+ // ═══════════════════════════════════════════════════════════════════════════
18
+ // MAIN RUNNER
19
+ // ═══════════════════════════════════════════════════════════════════════════
20
+
21
+ /**
22
+ * Run site watch analysis for a project.
23
+ *
24
+ * @param {import('node:sqlite').DatabaseSync} db
25
+ * @param {string} project
26
+ * @param {object} [opts] - { log: function }
27
+ * @returns {object} { snapshot, events, healthScore, previousHealthScore, trend, isBaseline }
28
+ */
29
+ export function runWatch(db, project, opts = {}) {
30
+ const log = opts.log || console.log;
31
+
32
+ // ── Gather current page state ──────────────────────────────────────────
33
+ const currentPages = db.prepare(`
34
+ SELECT
35
+ p.url, p.status_code, p.title, p.meta_desc, p.word_count,
36
+ p.is_indexable, p.content_hash,
37
+ (SELECT text FROM headings WHERE page_id = p.id AND level = 1 ORDER BY id LIMIT 1) as h1
38
+ FROM pages p
39
+ JOIN domains d ON d.id = p.domain_id
40
+ WHERE d.project = ? AND d.role IN ('target', 'owned')
41
+ ORDER BY p.url
42
+ `).all(project);
43
+
44
+ if (!currentPages.length) {
45
+ log(' No crawled pages found. Run crawl first.');
46
+ return { snapshot: null, events: [], healthScore: 0, previousHealthScore: null, trend: 0, isBaseline: false };
47
+ }
48
+
49
+ // ── Health score ───────────────────────────────────────────────────────
50
+ const health = calculateHealthScore(currentPages);
51
+
52
+ // ── Load previous snapshot ─────────────────────────────────────────────
53
+ const prevSnapshot = getLatestWatchSnapshot(db, project);
54
+ let events = [];
55
+ let isBaseline = false;
56
+
57
+ if (prevSnapshot) {
58
+ const prevPages = getWatchPageStates(db, prevSnapshot.id);
59
+ events = diffPages(currentPages, prevPages);
60
+ log(` Compared ${currentPages.length} pages against snapshot from ${new Date(prevSnapshot.created_at).toLocaleDateString()}`);
61
+ } else {
62
+ isBaseline = true;
63
+ log(` Baseline snapshot — ${currentPages.length} pages captured`);
64
+ }
65
+
66
+ // ── Persist new snapshot ───────────────────────────────────────────────
67
+ const now = Date.now();
68
+ const criticalCount = events.filter(e => e.severity === 'critical').length;
69
+ const warningCount = events.filter(e => e.severity === 'warning').length;
70
+ const noticeCount = events.filter(e => e.severity === 'notice').length;
71
+
72
+ const snapshotResult = db.prepare(`
73
+ INSERT INTO watch_snapshots (project, created_at, total_pages, health_score, errors_count, warnings_count, notices_count)
74
+ VALUES (?, ?, ?, ?, ?, ?, ?)
75
+ `).run(project, now, currentPages.length, health.score, criticalCount, warningCount, noticeCount);
76
+
77
+ const snapshotId = Number(db.prepare('SELECT last_insert_rowid() as id').get().id);
78
+
79
+ // ── Persist page states ────────────────────────────────────────────────
80
+ const stateStmt = db.prepare(`
81
+ INSERT INTO watch_page_states (snapshot_id, url, status_code, title, h1, meta_desc, word_count, is_indexable, content_hash)
82
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
83
+ `);
84
+
85
+ db.exec('BEGIN');
86
+ try {
87
+ for (const p of currentPages) {
88
+ stateStmt.run(snapshotId, p.url, p.status_code, p.title, p.h1, p.meta_desc, p.word_count, p.is_indexable ? 1 : 0, p.content_hash);
89
+ }
90
+ db.exec('COMMIT');
91
+ } catch (e) {
92
+ db.exec('ROLLBACK');
93
+ throw e;
94
+ }
95
+
96
+ // ── Persist events ─────────────────────────────────────────────────────
97
+ if (events.length) {
98
+ const eventStmt = db.prepare(`
99
+ INSERT INTO watch_events (snapshot_id, event_type, severity, url, old_value, new_value, details)
100
+ VALUES (?, ?, ?, ?, ?, ?, ?)
101
+ `);
102
+
103
+ db.exec('BEGIN');
104
+ try {
105
+ for (const e of events) {
106
+ eventStmt.run(snapshotId, e.event_type, e.severity, e.url, e.old_value, e.new_value, e.details);
107
+ }
108
+ db.exec('COMMIT');
109
+ } catch (e) {
110
+ db.exec('ROLLBACK');
111
+ throw e;
112
+ }
113
+ }
114
+
115
+ // ── Feed Intelligence Ledger (critical + warning only) ─────────────────
116
+ const significant = events.filter(e => e.severity === 'critical' || e.severity === 'warning');
117
+ if (significant.length) {
118
+ _upsertWatchInsights(db, project, significant, now);
119
+ }
120
+
121
+ const previousHealthScore = prevSnapshot?.health_score ?? null;
122
+ const trend = previousHealthScore !== null ? health.score - previousHealthScore : 0;
123
+
124
+ const snapshot = {
125
+ id: snapshotId,
126
+ project,
127
+ created_at: now,
128
+ total_pages: currentPages.length,
129
+ health_score: health.score,
130
+ errors_count: criticalCount,
131
+ warnings_count: warningCount,
132
+ notices_count: noticeCount,
133
+ };
134
+
135
+ return {
136
+ snapshot,
137
+ events,
138
+ healthScore: health.score,
139
+ healthDetails: health,
140
+ previousHealthScore,
141
+ trend,
142
+ isBaseline,
143
+ };
144
+ }
145
+
146
+ // ═══════════════════════════════════════════════════════════════════════════
147
+ // DASHBOARD DATA
148
+ // ═══════════════════════════════════════════════════════════════════════════
149
+
150
+ /**
151
+ * Get watch data for dashboard rendering.
152
+ *
153
+ * @param {import('node:sqlite').DatabaseSync} db
154
+ * @param {string} project
155
+ * @returns {object|null} { current, previous, events, trend }
156
+ */
157
+ export function getWatchData(db, project) {
158
+ const history = getWatchHistory(db, project, 2);
159
+ if (!history.length) return null;
160
+
161
+ const current = history[0];
162
+ const previous = history[1] || null;
163
+ const events = getWatchEvents(db, current.id);
164
+ const trend = previous ? current.health_score - previous.health_score : 0;
165
+
166
+ return { current, previous, events, trend };
167
+ }
168
+
169
+ // ═══════════════════════════════════════════════════════════════════════════
170
+ // INTERNAL HELPERS
171
+ // ═══════════════════════════════════════════════════════════════════════════
172
+
173
+ function _upsertWatchInsights(db, project, events, timestamp) {
174
+ const upsertStmt = db.prepare(`
175
+ INSERT INTO insights (project, type, status, fingerprint, first_seen, last_seen, source_analysis_id, data)
176
+ VALUES (?, 'site_watch', 'active', ?, ?, ?, NULL, ?)
177
+ ON CONFLICT(project, type, fingerprint) DO UPDATE SET
178
+ last_seen = excluded.last_seen,
179
+ data = excluded.data
180
+ `);
181
+
182
+ db.exec('BEGIN');
183
+ try {
184
+ for (const e of events) {
185
+ const raw = `${e.url || ''}::${e.event_type || ''}`;
186
+ const fp = raw.toLowerCase().replace(/[^a-z0-9\s]/g, '').replace(/\s+/g, ' ').trim();
187
+ if (!fp) continue;
188
+
189
+ const data = {
190
+ url: e.url,
191
+ event_type: e.event_type,
192
+ severity: e.severity,
193
+ old_value: e.old_value,
194
+ new_value: e.new_value,
195
+ summary: _eventSummary(e),
196
+ };
197
+
198
+ upsertStmt.run(project, fp, timestamp, timestamp, JSON.stringify(data));
199
+ }
200
+ db.exec('COMMIT');
201
+ } catch (err) {
202
+ db.exec('ROLLBACK');
203
+ console.error('[watch] insight upsert failed:', err.message);
204
+ }
205
+ }
206
+
207
+ function _eventSummary(e) {
208
+ switch (e.event_type) {
209
+ case 'new_error': return `${e.url} returned ${e.new_value} (was ${e.old_value})`;
210
+ case 'status_changed': return `${e.url} status ${e.old_value} → ${e.new_value}`;
211
+ case 'page_removed': return `${e.url} disappeared from crawl`;
212
+ case 'indexability_changed': return `${e.url} became ${e.new_value}`;
213
+ default: return `${e.event_type.replace(/_/g, ' ')} on ${e.url}`;
214
+ }
215
+ }