seo-intel 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,31 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.4.2 (2026-04-05)
4
+
5
+ ### New Feature: Site Watch
6
+ - `seo-intel watch <project>` — detect changes between crawl runs and track site health
7
+ - Health Score (0-100) based on page errors, missing titles, missing H1s
8
+ - Diff engine detects 10 event types: new/removed pages, status changes, title/H1/meta changes, word count shifts, indexability flips, content updates
9
+ - Events classified by severity: critical, warning, notice — with trend arrows
10
+ - Auto-runs after every crawl with a one-liner summary
11
+ - Dashboard card: health score gauge, severity counts with deltas, "What's New" event table
12
+ - Significant changes (critical/warning) feed into Intelligence Ledger as `site_watch` insights
13
+ - Available via CLI, dashboard terminal, and froggo.js API
14
+ - Free tier — no license required
15
+
16
+ ## 1.4.1 (2026-04-03)
17
+
18
+ ### Fixes
19
+ - **CLI JSON output** — all 11 commands now produce clean JSON with zero chalk/ANSI leakage
20
+ - **Brief `--format json`** — full rich data (keyword gaps, schema gaps, actions) instead of lean subset
21
+ - **Templates `--format json`** — suppressed chalk header and log output in JSON mode
22
+ - **JS-Delta `--format json`** — suppressed per-page progress chalk in JSON mode
23
+
24
+ ### Agent Integration
25
+ - Model selection hints (`modelHint`, `modelNote`) on extract, gap-intel, blog-draft capabilities
26
+ - AGENT_GUIDE.md — added Model Selection Guidance table (light-local vs cloud-medium per phase)
27
+ - GitHub Releases now auto-created on tag push via CI
28
+
3
29
  ## 1.4.0 (2026-04-03)
4
30
 
5
31
  ### New Feature: Gap Intelligence
@@ -17,6 +43,19 @@
17
43
  - Qwen models remain fully supported as alternatives
18
44
  - Setup wizard, model recommendations, and VRAM tiers updated for Gemma 4
19
45
 
46
+ ### Agent-Ready JSON Output
47
+ - All 11 analysis commands support `--format json` for clean, parseable output
48
+ - JSON output is chalk-free — no ANSI escape codes mixed into structured data
49
+ - Commands: shallow, decay, headings-audit, orphans, entities, schemas, friction, brief, velocity, templates, js-delta
50
+
51
+ ### Programmatic API (`seo-intel/froggo`)
52
+ - Unified agent runner: `run(command, project, opts)` returns `{ ok, command, project, timestamp, data }`
53
+ - 18 capabilities with machine-readable manifest (inputs, outputs, dependencies, tier)
54
+ - Pipeline dependency graph for orchestration
55
+ - Model selection hints per capability (light-local vs cloud-medium)
56
+ - Deep imports: `seo-intel/aeo`, `seo-intel/crawler`, `seo-intel/db`, etc.
57
+ - Agent Guide (`AGENT_GUIDE.md`) with orchestration patterns and model guidance
58
+
20
59
  ### Server
21
60
  - Added `gap-intel` to terminal command whitelist
22
61
  - Forward `--vs`, `--type`, `--limit`, `--raw`, `--out` params from dashboard to CLI
@@ -0,0 +1,158 @@
1
+ /**
2
+ * Site Watch — Diff Engine
3
+ *
4
+ * Pure function: compares two page snapshots and returns change events.
5
+ * Zero I/O, zero side effects — deterministic and testable.
6
+ */
7
+
8
+ /**
9
+ * @param {object[]} currentPages - { url, status_code, title, h1, meta_desc, word_count, is_indexable, content_hash }
10
+ * @param {object[]} previousPages - same shape, from previous snapshot
11
+ * @returns {object[]} Array of { event_type, severity, url, old_value, new_value, details }
12
+ */
13
+ export function diffPages(currentPages, previousPages) {
14
+ const events = [];
15
+
16
+ const currentMap = new Map(currentPages.map(p => [p.url, p]));
17
+ const previousMap = new Map(previousPages.map(p => [p.url, p]));
18
+
19
+ // ── Pages added ──────────────────────────────────────────────────────────
20
+ for (const [url, page] of currentMap) {
21
+ if (!previousMap.has(url)) {
22
+ events.push({
23
+ event_type: 'page_added',
24
+ severity: 'notice',
25
+ url,
26
+ old_value: null,
27
+ new_value: String(page.status_code || 200),
28
+ details: JSON.stringify({ title: page.title, word_count: page.word_count }),
29
+ });
30
+ }
31
+ }
32
+
33
+ // ── Pages removed ────────────────────────────────────────────────────────
34
+ for (const [url, page] of previousMap) {
35
+ if (!currentMap.has(url)) {
36
+ events.push({
37
+ event_type: 'page_removed',
38
+ severity: 'warning',
39
+ url,
40
+ old_value: String(page.status_code || 200),
41
+ new_value: null,
42
+ details: JSON.stringify({ title: page.title, word_count: page.word_count }),
43
+ });
44
+ }
45
+ }
46
+
47
+ // ── Per-page field comparisons ───────────────────────────────────────────
48
+ for (const [url, curr] of currentMap) {
49
+ const prev = previousMap.get(url);
50
+ if (!prev) continue;
51
+
52
+ // Status code change
53
+ if (curr.status_code !== prev.status_code) {
54
+ const isNewError = prev.status_code < 400 && curr.status_code >= 400;
55
+ const isRecovery = prev.status_code >= 400 && curr.status_code < 400;
56
+ const severity = isNewError ? 'critical'
57
+ : curr.status_code >= 400 ? 'critical'
58
+ : isRecovery ? 'notice'
59
+ : 'warning';
60
+
61
+ events.push({
62
+ event_type: isNewError ? 'new_error' : 'status_changed',
63
+ severity,
64
+ url,
65
+ old_value: String(prev.status_code),
66
+ new_value: String(curr.status_code),
67
+ details: null,
68
+ });
69
+ }
70
+
71
+ // Title change
72
+ if (normalise(curr.title) !== normalise(prev.title)) {
73
+ events.push({
74
+ event_type: 'title_changed',
75
+ severity: 'notice',
76
+ url,
77
+ old_value: prev.title || '',
78
+ new_value: curr.title || '',
79
+ details: null,
80
+ });
81
+ }
82
+
83
+ // H1 change
84
+ if (normalise(curr.h1) !== normalise(prev.h1)) {
85
+ events.push({
86
+ event_type: 'h1_changed',
87
+ severity: 'notice',
88
+ url,
89
+ old_value: prev.h1 || '',
90
+ new_value: curr.h1 || '',
91
+ details: null,
92
+ });
93
+ }
94
+
95
+ // Meta description change
96
+ if (normalise(curr.meta_desc) !== normalise(prev.meta_desc)) {
97
+ events.push({
98
+ event_type: 'meta_desc_changed',
99
+ severity: 'notice',
100
+ url,
101
+ old_value: prev.meta_desc || '',
102
+ new_value: curr.meta_desc || '',
103
+ details: null,
104
+ });
105
+ }
106
+
107
+ // Word count significant change (>20%)
108
+ const prevWc = prev.word_count || 0;
109
+ const currWc = curr.word_count || 0;
110
+ if (prevWc > 0 && Math.abs(currWc - prevWc) / prevWc > 0.2) {
111
+ events.push({
112
+ event_type: 'word_count_changed',
113
+ severity: 'notice',
114
+ url,
115
+ old_value: String(prevWc),
116
+ new_value: String(currWc),
117
+ details: JSON.stringify({ delta_pct: Math.round(((currWc - prevWc) / prevWc) * 100) }),
118
+ });
119
+ }
120
+
121
+ // Indexability change
122
+ const prevIdx = prev.is_indexable ? 1 : 0;
123
+ const currIdx = curr.is_indexable ? 1 : 0;
124
+ if (currIdx !== prevIdx) {
125
+ events.push({
126
+ event_type: 'indexability_changed',
127
+ severity: currIdx === 0 ? 'critical' : 'notice',
128
+ url,
129
+ old_value: prevIdx ? 'indexable' : 'non-indexable',
130
+ new_value: currIdx ? 'indexable' : 'non-indexable',
131
+ details: null,
132
+ });
133
+ }
134
+
135
+ // Content hash change (body text changed)
136
+ if (curr.content_hash && prev.content_hash && curr.content_hash !== prev.content_hash) {
137
+ events.push({
138
+ event_type: 'content_changed',
139
+ severity: 'notice',
140
+ url,
141
+ old_value: prev.content_hash?.slice(0, 8) || '',
142
+ new_value: curr.content_hash?.slice(0, 8) || '',
143
+ details: null,
144
+ });
145
+ }
146
+ }
147
+
148
+ // Sort: critical first, then warning, then notice
149
+ const severityOrder = { critical: 0, warning: 1, notice: 2 };
150
+ events.sort((a, b) => (severityOrder[a.severity] ?? 9) - (severityOrder[b.severity] ?? 9));
151
+
152
+ return events;
153
+ }
154
+
155
+ /** Normalise a string for comparison (null-safe, trimmed, lowercased). */
156
+ function normalise(s) {
157
+ return (s || '').trim().toLowerCase();
158
+ }
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Site Watch — Health Score Calculator
3
+ *
4
+ * Pure function: evaluates site health from current page data.
5
+ * Zero I/O, zero side effects.
6
+ */
7
+
8
+ /**
9
+ * @param {object[]} pages - { url, status_code, title, h1, meta_desc, word_count, is_indexable }
10
+ * @returns {object} { score, errors, warnings, notices, details }
11
+ */
12
+ export function calculateHealthScore(pages) {
13
+ if (!pages.length) return { score: 100, errors: 0, warnings: 0, notices: 0, details: [] };
14
+
15
+ const details = [];
16
+ let errorPages = 0;
17
+
18
+ // Track duplicates
19
+ const titleCounts = new Map();
20
+ for (const p of pages) {
21
+ if (p.title && p.status_code < 400) {
22
+ const t = p.title.trim().toLowerCase();
23
+ titleCounts.set(t, (titleCounts.get(t) || 0) + 1);
24
+ }
25
+ }
26
+
27
+ for (const p of pages) {
28
+ let hasError = false;
29
+
30
+ // ── Errors (reduce health score) ───────────────────────────────────────
31
+ if (p.status_code >= 400) {
32
+ details.push({ url: p.url, severity: 'error', issue: `${p.status_code} error` });
33
+ hasError = true;
34
+ }
35
+
36
+ if (p.status_code < 400 && (!p.title || !p.title.trim())) {
37
+ details.push({ url: p.url, severity: 'error', issue: 'Missing title' });
38
+ hasError = true;
39
+ }
40
+
41
+ if (p.status_code < 400 && (!p.h1 || !p.h1.trim())) {
42
+ details.push({ url: p.url, severity: 'error', issue: 'Missing H1' });
43
+ hasError = true;
44
+ }
45
+
46
+ if (hasError) errorPages++;
47
+
48
+ // ── Warnings (tracked, don't reduce score) ────────────────────────────
49
+ if (p.status_code >= 300 && p.status_code < 400) {
50
+ details.push({ url: p.url, severity: 'warning', issue: `${p.status_code} redirect` });
51
+ }
52
+
53
+ if (p.status_code < 400 && (!p.meta_desc || !p.meta_desc.trim())) {
54
+ details.push({ url: p.url, severity: 'warning', issue: 'Missing meta description' });
55
+ }
56
+
57
+ if (p.title && p.status_code < 400) {
58
+ const t = p.title.trim().toLowerCase();
59
+ if (titleCounts.get(t) > 1) {
60
+ details.push({ url: p.url, severity: 'warning', issue: 'Duplicate title' });
61
+ }
62
+ }
63
+
64
+ // ── Notices ────────────────────────────────────────────────────────────
65
+ if (p.status_code < 400 && (p.word_count || 0) < 100 && (p.word_count || 0) > 0) {
66
+ details.push({ url: p.url, severity: 'notice', issue: 'Thin content (<100 words)' });
67
+ }
68
+ }
69
+
70
+ const errors = details.filter(d => d.severity === 'error').length;
71
+ const warnings = details.filter(d => d.severity === 'warning').length;
72
+ const notices = details.filter(d => d.severity === 'notice').length;
73
+
74
+ // Health score = % of pages without errors
75
+ const score = Math.round(((pages.length - errorPages) / pages.length) * 100);
76
+
77
+ return { score, errors, warnings, notices, details };
78
+ }
@@ -0,0 +1,215 @@
1
+ /**
2
+ * Site Watch — Orchestrator
3
+ *
4
+ * Gathers current page state, diffs against previous snapshot,
5
+ * persists results, and feeds significant changes into the Intelligence Ledger.
6
+ */
7
+
8
+ import { diffPages } from './diff.js';
9
+ import { calculateHealthScore } from './health.js';
10
+ import {
11
+ getLatestWatchSnapshot,
12
+ getWatchPageStates,
13
+ getWatchEvents,
14
+ getWatchHistory,
15
+ } from '../../db/db.js';
16
+
17
+ // ═══════════════════════════════════════════════════════════════════════════
18
+ // MAIN RUNNER
19
+ // ═══════════════════════════════════════════════════════════════════════════
20
+
21
+ /**
22
+ * Run site watch analysis for a project.
23
+ *
24
+ * @param {import('node:sqlite').DatabaseSync} db
25
+ * @param {string} project
26
+ * @param {object} [opts] - { log: function }
27
+ * @returns {object} { snapshot, events, healthScore, previousHealthScore, trend, isBaseline }
28
+ */
29
+ export function runWatch(db, project, opts = {}) {
30
+ const log = opts.log || console.log;
31
+
32
+ // ── Gather current page state ──────────────────────────────────────────
33
+ const currentPages = db.prepare(`
34
+ SELECT
35
+ p.url, p.status_code, p.title, p.meta_desc, p.word_count,
36
+ p.is_indexable, p.content_hash,
37
+ (SELECT text FROM headings WHERE page_id = p.id AND level = 1 ORDER BY id LIMIT 1) as h1
38
+ FROM pages p
39
+ JOIN domains d ON d.id = p.domain_id
40
+ WHERE d.project = ? AND d.role IN ('target', 'owned')
41
+ ORDER BY p.url
42
+ `).all(project);
43
+
44
+ if (!currentPages.length) {
45
+ log(' No crawled pages found. Run crawl first.');
46
+ return { snapshot: null, events: [], healthScore: 0, previousHealthScore: null, trend: 0, isBaseline: false };
47
+ }
48
+
49
+ // ── Health score ───────────────────────────────────────────────────────
50
+ const health = calculateHealthScore(currentPages);
51
+
52
+ // ── Load previous snapshot ─────────────────────────────────────────────
53
+ const prevSnapshot = getLatestWatchSnapshot(db, project);
54
+ let events = [];
55
+ let isBaseline = false;
56
+
57
+ if (prevSnapshot) {
58
+ const prevPages = getWatchPageStates(db, prevSnapshot.id);
59
+ events = diffPages(currentPages, prevPages);
60
+ log(` Compared ${currentPages.length} pages against snapshot from ${new Date(prevSnapshot.created_at).toLocaleDateString()}`);
61
+ } else {
62
+ isBaseline = true;
63
+ log(` Baseline snapshot — ${currentPages.length} pages captured`);
64
+ }
65
+
66
+ // ── Persist new snapshot ───────────────────────────────────────────────
67
+ const now = Date.now();
68
+ const criticalCount = events.filter(e => e.severity === 'critical').length;
69
+ const warningCount = events.filter(e => e.severity === 'warning').length;
70
+ const noticeCount = events.filter(e => e.severity === 'notice').length;
71
+
72
+ const snapshotResult = db.prepare(`
73
+ INSERT INTO watch_snapshots (project, created_at, total_pages, health_score, errors_count, warnings_count, notices_count)
74
+ VALUES (?, ?, ?, ?, ?, ?, ?)
75
+ `).run(project, now, currentPages.length, health.score, criticalCount, warningCount, noticeCount);
76
+
77
+ const snapshotId = Number(db.prepare('SELECT last_insert_rowid() as id').get().id);
78
+
79
+ // ── Persist page states ────────────────────────────────────────────────
80
+ const stateStmt = db.prepare(`
81
+ INSERT INTO watch_page_states (snapshot_id, url, status_code, title, h1, meta_desc, word_count, is_indexable, content_hash)
82
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
83
+ `);
84
+
85
+ db.exec('BEGIN');
86
+ try {
87
+ for (const p of currentPages) {
88
+ stateStmt.run(snapshotId, p.url, p.status_code, p.title, p.h1, p.meta_desc, p.word_count, p.is_indexable ? 1 : 0, p.content_hash);
89
+ }
90
+ db.exec('COMMIT');
91
+ } catch (e) {
92
+ db.exec('ROLLBACK');
93
+ throw e;
94
+ }
95
+
96
+ // ── Persist events ─────────────────────────────────────────────────────
97
+ if (events.length) {
98
+ const eventStmt = db.prepare(`
99
+ INSERT INTO watch_events (snapshot_id, event_type, severity, url, old_value, new_value, details)
100
+ VALUES (?, ?, ?, ?, ?, ?, ?)
101
+ `);
102
+
103
+ db.exec('BEGIN');
104
+ try {
105
+ for (const e of events) {
106
+ eventStmt.run(snapshotId, e.event_type, e.severity, e.url, e.old_value, e.new_value, e.details);
107
+ }
108
+ db.exec('COMMIT');
109
+ } catch (e) {
110
+ db.exec('ROLLBACK');
111
+ throw e;
112
+ }
113
+ }
114
+
115
+ // ── Feed Intelligence Ledger (critical + warning only) ─────────────────
116
+ const significant = events.filter(e => e.severity === 'critical' || e.severity === 'warning');
117
+ if (significant.length) {
118
+ _upsertWatchInsights(db, project, significant, now);
119
+ }
120
+
121
+ const previousHealthScore = prevSnapshot?.health_score ?? null;
122
+ const trend = previousHealthScore !== null ? health.score - previousHealthScore : 0;
123
+
124
+ const snapshot = {
125
+ id: snapshotId,
126
+ project,
127
+ created_at: now,
128
+ total_pages: currentPages.length,
129
+ health_score: health.score,
130
+ errors_count: criticalCount,
131
+ warnings_count: warningCount,
132
+ notices_count: noticeCount,
133
+ };
134
+
135
+ return {
136
+ snapshot,
137
+ events,
138
+ healthScore: health.score,
139
+ healthDetails: health,
140
+ previousHealthScore,
141
+ trend,
142
+ isBaseline,
143
+ };
144
+ }
145
+
146
+ // ═══════════════════════════════════════════════════════════════════════════
147
+ // DASHBOARD DATA
148
+ // ═══════════════════════════════════════════════════════════════════════════
149
+
150
+ /**
151
+ * Get watch data for dashboard rendering.
152
+ *
153
+ * @param {import('node:sqlite').DatabaseSync} db
154
+ * @param {string} project
155
+ * @returns {object|null} { current, previous, events, trend }
156
+ */
157
+ export function getWatchData(db, project) {
158
+ const history = getWatchHistory(db, project, 2);
159
+ if (!history.length) return null;
160
+
161
+ const current = history[0];
162
+ const previous = history[1] || null;
163
+ const events = getWatchEvents(db, current.id);
164
+ const trend = previous ? current.health_score - previous.health_score : 0;
165
+
166
+ return { current, previous, events, trend };
167
+ }
168
+
169
+ // ═══════════════════════════════════════════════════════════════════════════
170
+ // INTERNAL HELPERS
171
+ // ═══════════════════════════════════════════════════════════════════════════
172
+
173
+ function _upsertWatchInsights(db, project, events, timestamp) {
174
+ const upsertStmt = db.prepare(`
175
+ INSERT INTO insights (project, type, status, fingerprint, first_seen, last_seen, source_analysis_id, data)
176
+ VALUES (?, 'site_watch', 'active', ?, ?, ?, NULL, ?)
177
+ ON CONFLICT(project, type, fingerprint) DO UPDATE SET
178
+ last_seen = excluded.last_seen,
179
+ data = excluded.data
180
+ `);
181
+
182
+ db.exec('BEGIN');
183
+ try {
184
+ for (const e of events) {
185
+ const raw = `${e.url || ''}::${e.event_type || ''}`;
186
+ const fp = raw.toLowerCase().replace(/[^a-z0-9\s]/g, '').replace(/\s+/g, ' ').trim();
187
+ if (!fp) continue;
188
+
189
+ const data = {
190
+ url: e.url,
191
+ event_type: e.event_type,
192
+ severity: e.severity,
193
+ old_value: e.old_value,
194
+ new_value: e.new_value,
195
+ summary: _eventSummary(e),
196
+ };
197
+
198
+ upsertStmt.run(project, fp, timestamp, timestamp, JSON.stringify(data));
199
+ }
200
+ db.exec('COMMIT');
201
+ } catch (err) {
202
+ db.exec('ROLLBACK');
203
+ console.error('[watch] insight upsert failed:', err.message);
204
+ }
205
+ }
206
+
207
+ function _eventSummary(e) {
208
+ switch (e.event_type) {
209
+ case 'new_error': return `${e.url} returned ${e.new_value} (was ${e.old_value})`;
210
+ case 'status_changed': return `${e.url} status ${e.old_value} → ${e.new_value}`;
211
+ case 'page_removed': return `${e.url} disappeared from crawl`;
212
+ case 'indexability_changed': return `${e.url} became ${e.new_value}`;
213
+ default: return `${e.event_type.replace(/_/g, ' ')} on ${e.url}`;
214
+ }
215
+ }