seo-intel 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/analyses/watch/diff.js +158 -0
- package/analyses/watch/health.js +78 -0
- package/analyses/watch/index.js +215 -0
- package/cli.js +141 -0
- package/db/db.js +73 -0
- package/package.json +1 -1
- package/reports/generate-html.js +150 -2
- package/reports/gsc-loader.js +14 -4
- package/server.js +1 -1
- package/setup/checks.js +9 -2
- package/setup/web-routes.js +26 -1
- package/setup/wizard.html +480 -323
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.4.2 (2026-04-05)
|
|
4
|
+
|
|
5
|
+
### New Feature: Site Watch
|
|
6
|
+
- `seo-intel watch <project>` — detect changes between crawl runs and track site health
|
|
7
|
+
- Health Score (0-100) based on page errors, missing titles, missing H1s
|
|
8
|
+
- Diff engine detects 10 event types: new/removed pages, status changes, title/H1/meta changes, word count shifts, indexability flips, content updates
|
|
9
|
+
- Events classified by severity: critical, warning, notice — with trend arrows
|
|
10
|
+
- Auto-runs after every crawl with a one-liner summary
|
|
11
|
+
- Dashboard card: health score gauge, severity counts with deltas, "What's New" event table
|
|
12
|
+
- Significant changes (critical/warning) feed into Intelligence Ledger as `site_watch` insights
|
|
13
|
+
- Available via CLI, dashboard terminal, and froggo.js API
|
|
14
|
+
- Free tier — no license required
|
|
15
|
+
|
|
3
16
|
## 1.4.1 (2026-04-03)
|
|
4
17
|
|
|
5
18
|
### Fixes
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Site Watch — Diff Engine
|
|
3
|
+
*
|
|
4
|
+
* Pure function: compares two page snapshots and returns change events.
|
|
5
|
+
* Zero I/O, zero side effects — deterministic and testable.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* @param {object[]} currentPages - { url, status_code, title, h1, meta_desc, word_count, is_indexable, content_hash }
|
|
10
|
+
* @param {object[]} previousPages - same shape, from previous snapshot
|
|
11
|
+
* @returns {object[]} Array of { event_type, severity, url, old_value, new_value, details }
|
|
12
|
+
*/
|
|
13
|
+
export function diffPages(currentPages, previousPages) {
|
|
14
|
+
const events = [];
|
|
15
|
+
|
|
16
|
+
const currentMap = new Map(currentPages.map(p => [p.url, p]));
|
|
17
|
+
const previousMap = new Map(previousPages.map(p => [p.url, p]));
|
|
18
|
+
|
|
19
|
+
// ── Pages added ──────────────────────────────────────────────────────────
|
|
20
|
+
for (const [url, page] of currentMap) {
|
|
21
|
+
if (!previousMap.has(url)) {
|
|
22
|
+
events.push({
|
|
23
|
+
event_type: 'page_added',
|
|
24
|
+
severity: 'notice',
|
|
25
|
+
url,
|
|
26
|
+
old_value: null,
|
|
27
|
+
new_value: String(page.status_code || 200),
|
|
28
|
+
details: JSON.stringify({ title: page.title, word_count: page.word_count }),
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// ── Pages removed ────────────────────────────────────────────────────────
|
|
34
|
+
for (const [url, page] of previousMap) {
|
|
35
|
+
if (!currentMap.has(url)) {
|
|
36
|
+
events.push({
|
|
37
|
+
event_type: 'page_removed',
|
|
38
|
+
severity: 'warning',
|
|
39
|
+
url,
|
|
40
|
+
old_value: String(page.status_code || 200),
|
|
41
|
+
new_value: null,
|
|
42
|
+
details: JSON.stringify({ title: page.title, word_count: page.word_count }),
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ── Per-page field comparisons ───────────────────────────────────────────
|
|
48
|
+
for (const [url, curr] of currentMap) {
|
|
49
|
+
const prev = previousMap.get(url);
|
|
50
|
+
if (!prev) continue;
|
|
51
|
+
|
|
52
|
+
// Status code change
|
|
53
|
+
if (curr.status_code !== prev.status_code) {
|
|
54
|
+
const isNewError = prev.status_code < 400 && curr.status_code >= 400;
|
|
55
|
+
const isRecovery = prev.status_code >= 400 && curr.status_code < 400;
|
|
56
|
+
const severity = isNewError ? 'critical'
|
|
57
|
+
: curr.status_code >= 400 ? 'critical'
|
|
58
|
+
: isRecovery ? 'notice'
|
|
59
|
+
: 'warning';
|
|
60
|
+
|
|
61
|
+
events.push({
|
|
62
|
+
event_type: isNewError ? 'new_error' : 'status_changed',
|
|
63
|
+
severity,
|
|
64
|
+
url,
|
|
65
|
+
old_value: String(prev.status_code),
|
|
66
|
+
new_value: String(curr.status_code),
|
|
67
|
+
details: null,
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Title change
|
|
72
|
+
if (normalise(curr.title) !== normalise(prev.title)) {
|
|
73
|
+
events.push({
|
|
74
|
+
event_type: 'title_changed',
|
|
75
|
+
severity: 'notice',
|
|
76
|
+
url,
|
|
77
|
+
old_value: prev.title || '',
|
|
78
|
+
new_value: curr.title || '',
|
|
79
|
+
details: null,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// H1 change
|
|
84
|
+
if (normalise(curr.h1) !== normalise(prev.h1)) {
|
|
85
|
+
events.push({
|
|
86
|
+
event_type: 'h1_changed',
|
|
87
|
+
severity: 'notice',
|
|
88
|
+
url,
|
|
89
|
+
old_value: prev.h1 || '',
|
|
90
|
+
new_value: curr.h1 || '',
|
|
91
|
+
details: null,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Meta description change
|
|
96
|
+
if (normalise(curr.meta_desc) !== normalise(prev.meta_desc)) {
|
|
97
|
+
events.push({
|
|
98
|
+
event_type: 'meta_desc_changed',
|
|
99
|
+
severity: 'notice',
|
|
100
|
+
url,
|
|
101
|
+
old_value: prev.meta_desc || '',
|
|
102
|
+
new_value: curr.meta_desc || '',
|
|
103
|
+
details: null,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Word count significant change (>20%)
|
|
108
|
+
const prevWc = prev.word_count || 0;
|
|
109
|
+
const currWc = curr.word_count || 0;
|
|
110
|
+
if (prevWc > 0 && Math.abs(currWc - prevWc) / prevWc > 0.2) {
|
|
111
|
+
events.push({
|
|
112
|
+
event_type: 'word_count_changed',
|
|
113
|
+
severity: 'notice',
|
|
114
|
+
url,
|
|
115
|
+
old_value: String(prevWc),
|
|
116
|
+
new_value: String(currWc),
|
|
117
|
+
details: JSON.stringify({ delta_pct: Math.round(((currWc - prevWc) / prevWc) * 100) }),
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Indexability change
|
|
122
|
+
const prevIdx = prev.is_indexable ? 1 : 0;
|
|
123
|
+
const currIdx = curr.is_indexable ? 1 : 0;
|
|
124
|
+
if (currIdx !== prevIdx) {
|
|
125
|
+
events.push({
|
|
126
|
+
event_type: 'indexability_changed',
|
|
127
|
+
severity: currIdx === 0 ? 'critical' : 'notice',
|
|
128
|
+
url,
|
|
129
|
+
old_value: prevIdx ? 'indexable' : 'non-indexable',
|
|
130
|
+
new_value: currIdx ? 'indexable' : 'non-indexable',
|
|
131
|
+
details: null,
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Content hash change (body text changed)
|
|
136
|
+
if (curr.content_hash && prev.content_hash && curr.content_hash !== prev.content_hash) {
|
|
137
|
+
events.push({
|
|
138
|
+
event_type: 'content_changed',
|
|
139
|
+
severity: 'notice',
|
|
140
|
+
url,
|
|
141
|
+
old_value: prev.content_hash?.slice(0, 8) || '',
|
|
142
|
+
new_value: curr.content_hash?.slice(0, 8) || '',
|
|
143
|
+
details: null,
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Sort: critical first, then warning, then notice
|
|
149
|
+
const severityOrder = { critical: 0, warning: 1, notice: 2 };
|
|
150
|
+
events.sort((a, b) => (severityOrder[a.severity] ?? 9) - (severityOrder[b.severity] ?? 9));
|
|
151
|
+
|
|
152
|
+
return events;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/** Normalise a string for comparison (null-safe, trimmed, lowercased). */
|
|
156
|
+
function normalise(s) {
|
|
157
|
+
return (s || '').trim().toLowerCase();
|
|
158
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Site Watch — Health Score Calculator
|
|
3
|
+
*
|
|
4
|
+
* Pure function: evaluates site health from current page data.
|
|
5
|
+
* Zero I/O, zero side effects.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* @param {object[]} pages - { url, status_code, title, h1, meta_desc, word_count, is_indexable }
|
|
10
|
+
* @returns {object} { score, errors, warnings, notices, details }
|
|
11
|
+
*/
|
|
12
|
+
export function calculateHealthScore(pages) {
|
|
13
|
+
if (!pages.length) return { score: 100, errors: 0, warnings: 0, notices: 0, details: [] };
|
|
14
|
+
|
|
15
|
+
const details = [];
|
|
16
|
+
let errorPages = 0;
|
|
17
|
+
|
|
18
|
+
// Track duplicates
|
|
19
|
+
const titleCounts = new Map();
|
|
20
|
+
for (const p of pages) {
|
|
21
|
+
if (p.title && p.status_code < 400) {
|
|
22
|
+
const t = p.title.trim().toLowerCase();
|
|
23
|
+
titleCounts.set(t, (titleCounts.get(t) || 0) + 1);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
for (const p of pages) {
|
|
28
|
+
let hasError = false;
|
|
29
|
+
|
|
30
|
+
// ── Errors (reduce health score) ───────────────────────────────────────
|
|
31
|
+
if (p.status_code >= 400) {
|
|
32
|
+
details.push({ url: p.url, severity: 'error', issue: `${p.status_code} error` });
|
|
33
|
+
hasError = true;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
if (p.status_code < 400 && (!p.title || !p.title.trim())) {
|
|
37
|
+
details.push({ url: p.url, severity: 'error', issue: 'Missing title' });
|
|
38
|
+
hasError = true;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (p.status_code < 400 && (!p.h1 || !p.h1.trim())) {
|
|
42
|
+
details.push({ url: p.url, severity: 'error', issue: 'Missing H1' });
|
|
43
|
+
hasError = true;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (hasError) errorPages++;
|
|
47
|
+
|
|
48
|
+
// ── Warnings (tracked, don't reduce score) ────────────────────────────
|
|
49
|
+
if (p.status_code >= 300 && p.status_code < 400) {
|
|
50
|
+
details.push({ url: p.url, severity: 'warning', issue: `${p.status_code} redirect` });
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (p.status_code < 400 && (!p.meta_desc || !p.meta_desc.trim())) {
|
|
54
|
+
details.push({ url: p.url, severity: 'warning', issue: 'Missing meta description' });
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (p.title && p.status_code < 400) {
|
|
58
|
+
const t = p.title.trim().toLowerCase();
|
|
59
|
+
if (titleCounts.get(t) > 1) {
|
|
60
|
+
details.push({ url: p.url, severity: 'warning', issue: 'Duplicate title' });
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// ── Notices ────────────────────────────────────────────────────────────
|
|
65
|
+
if (p.status_code < 400 && (p.word_count || 0) < 100 && (p.word_count || 0) > 0) {
|
|
66
|
+
details.push({ url: p.url, severity: 'notice', issue: 'Thin content (<100 words)' });
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const errors = details.filter(d => d.severity === 'error').length;
|
|
71
|
+
const warnings = details.filter(d => d.severity === 'warning').length;
|
|
72
|
+
const notices = details.filter(d => d.severity === 'notice').length;
|
|
73
|
+
|
|
74
|
+
// Health score = % of pages without errors
|
|
75
|
+
const score = Math.round(((pages.length - errorPages) / pages.length) * 100);
|
|
76
|
+
|
|
77
|
+
return { score, errors, warnings, notices, details };
|
|
78
|
+
}
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Site Watch — Orchestrator
|
|
3
|
+
*
|
|
4
|
+
* Gathers current page state, diffs against previous snapshot,
|
|
5
|
+
* persists results, and feeds significant changes into the Intelligence Ledger.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { diffPages } from './diff.js';
|
|
9
|
+
import { calculateHealthScore } from './health.js';
|
|
10
|
+
import {
|
|
11
|
+
getLatestWatchSnapshot,
|
|
12
|
+
getWatchPageStates,
|
|
13
|
+
getWatchEvents,
|
|
14
|
+
getWatchHistory,
|
|
15
|
+
} from '../../db/db.js';
|
|
16
|
+
|
|
17
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
18
|
+
// MAIN RUNNER
|
|
19
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Run site watch analysis for a project.
|
|
23
|
+
*
|
|
24
|
+
* @param {import('node:sqlite').DatabaseSync} db
|
|
25
|
+
* @param {string} project
|
|
26
|
+
* @param {object} [opts] - { log: function }
|
|
27
|
+
* @returns {object} { snapshot, events, healthScore, previousHealthScore, trend, isBaseline }
|
|
28
|
+
*/
|
|
29
|
+
export function runWatch(db, project, opts = {}) {
|
|
30
|
+
const log = opts.log || console.log;
|
|
31
|
+
|
|
32
|
+
// ── Gather current page state ──────────────────────────────────────────
|
|
33
|
+
const currentPages = db.prepare(`
|
|
34
|
+
SELECT
|
|
35
|
+
p.url, p.status_code, p.title, p.meta_desc, p.word_count,
|
|
36
|
+
p.is_indexable, p.content_hash,
|
|
37
|
+
(SELECT text FROM headings WHERE page_id = p.id AND level = 1 ORDER BY id LIMIT 1) as h1
|
|
38
|
+
FROM pages p
|
|
39
|
+
JOIN domains d ON d.id = p.domain_id
|
|
40
|
+
WHERE d.project = ? AND d.role IN ('target', 'owned')
|
|
41
|
+
ORDER BY p.url
|
|
42
|
+
`).all(project);
|
|
43
|
+
|
|
44
|
+
if (!currentPages.length) {
|
|
45
|
+
log(' No crawled pages found. Run crawl first.');
|
|
46
|
+
return { snapshot: null, events: [], healthScore: 0, previousHealthScore: null, trend: 0, isBaseline: false };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ── Health score ───────────────────────────────────────────────────────
|
|
50
|
+
const health = calculateHealthScore(currentPages);
|
|
51
|
+
|
|
52
|
+
// ── Load previous snapshot ─────────────────────────────────────────────
|
|
53
|
+
const prevSnapshot = getLatestWatchSnapshot(db, project);
|
|
54
|
+
let events = [];
|
|
55
|
+
let isBaseline = false;
|
|
56
|
+
|
|
57
|
+
if (prevSnapshot) {
|
|
58
|
+
const prevPages = getWatchPageStates(db, prevSnapshot.id);
|
|
59
|
+
events = diffPages(currentPages, prevPages);
|
|
60
|
+
log(` Compared ${currentPages.length} pages against snapshot from ${new Date(prevSnapshot.created_at).toLocaleDateString()}`);
|
|
61
|
+
} else {
|
|
62
|
+
isBaseline = true;
|
|
63
|
+
log(` Baseline snapshot — ${currentPages.length} pages captured`);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// ── Persist new snapshot ───────────────────────────────────────────────
|
|
67
|
+
const now = Date.now();
|
|
68
|
+
const criticalCount = events.filter(e => e.severity === 'critical').length;
|
|
69
|
+
const warningCount = events.filter(e => e.severity === 'warning').length;
|
|
70
|
+
const noticeCount = events.filter(e => e.severity === 'notice').length;
|
|
71
|
+
|
|
72
|
+
const snapshotResult = db.prepare(`
|
|
73
|
+
INSERT INTO watch_snapshots (project, created_at, total_pages, health_score, errors_count, warnings_count, notices_count)
|
|
74
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
75
|
+
`).run(project, now, currentPages.length, health.score, criticalCount, warningCount, noticeCount);
|
|
76
|
+
|
|
77
|
+
const snapshotId = Number(db.prepare('SELECT last_insert_rowid() as id').get().id);
|
|
78
|
+
|
|
79
|
+
// ── Persist page states ────────────────────────────────────────────────
|
|
80
|
+
const stateStmt = db.prepare(`
|
|
81
|
+
INSERT INTO watch_page_states (snapshot_id, url, status_code, title, h1, meta_desc, word_count, is_indexable, content_hash)
|
|
82
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
83
|
+
`);
|
|
84
|
+
|
|
85
|
+
db.exec('BEGIN');
|
|
86
|
+
try {
|
|
87
|
+
for (const p of currentPages) {
|
|
88
|
+
stateStmt.run(snapshotId, p.url, p.status_code, p.title, p.h1, p.meta_desc, p.word_count, p.is_indexable ? 1 : 0, p.content_hash);
|
|
89
|
+
}
|
|
90
|
+
db.exec('COMMIT');
|
|
91
|
+
} catch (e) {
|
|
92
|
+
db.exec('ROLLBACK');
|
|
93
|
+
throw e;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// ── Persist events ─────────────────────────────────────────────────────
|
|
97
|
+
if (events.length) {
|
|
98
|
+
const eventStmt = db.prepare(`
|
|
99
|
+
INSERT INTO watch_events (snapshot_id, event_type, severity, url, old_value, new_value, details)
|
|
100
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
101
|
+
`);
|
|
102
|
+
|
|
103
|
+
db.exec('BEGIN');
|
|
104
|
+
try {
|
|
105
|
+
for (const e of events) {
|
|
106
|
+
eventStmt.run(snapshotId, e.event_type, e.severity, e.url, e.old_value, e.new_value, e.details);
|
|
107
|
+
}
|
|
108
|
+
db.exec('COMMIT');
|
|
109
|
+
} catch (e) {
|
|
110
|
+
db.exec('ROLLBACK');
|
|
111
|
+
throw e;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// ── Feed Intelligence Ledger (critical + warning only) ─────────────────
|
|
116
|
+
const significant = events.filter(e => e.severity === 'critical' || e.severity === 'warning');
|
|
117
|
+
if (significant.length) {
|
|
118
|
+
_upsertWatchInsights(db, project, significant, now);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const previousHealthScore = prevSnapshot?.health_score ?? null;
|
|
122
|
+
const trend = previousHealthScore !== null ? health.score - previousHealthScore : 0;
|
|
123
|
+
|
|
124
|
+
const snapshot = {
|
|
125
|
+
id: snapshotId,
|
|
126
|
+
project,
|
|
127
|
+
created_at: now,
|
|
128
|
+
total_pages: currentPages.length,
|
|
129
|
+
health_score: health.score,
|
|
130
|
+
errors_count: criticalCount,
|
|
131
|
+
warnings_count: warningCount,
|
|
132
|
+
notices_count: noticeCount,
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
return {
|
|
136
|
+
snapshot,
|
|
137
|
+
events,
|
|
138
|
+
healthScore: health.score,
|
|
139
|
+
healthDetails: health,
|
|
140
|
+
previousHealthScore,
|
|
141
|
+
trend,
|
|
142
|
+
isBaseline,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
147
|
+
// DASHBOARD DATA
|
|
148
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Get watch data for dashboard rendering.
|
|
152
|
+
*
|
|
153
|
+
* @param {import('node:sqlite').DatabaseSync} db
|
|
154
|
+
* @param {string} project
|
|
155
|
+
* @returns {object|null} { current, previous, events, trend }
|
|
156
|
+
*/
|
|
157
|
+
export function getWatchData(db, project) {
|
|
158
|
+
const history = getWatchHistory(db, project, 2);
|
|
159
|
+
if (!history.length) return null;
|
|
160
|
+
|
|
161
|
+
const current = history[0];
|
|
162
|
+
const previous = history[1] || null;
|
|
163
|
+
const events = getWatchEvents(db, current.id);
|
|
164
|
+
const trend = previous ? current.health_score - previous.health_score : 0;
|
|
165
|
+
|
|
166
|
+
return { current, previous, events, trend };
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
170
|
+
// INTERNAL HELPERS
|
|
171
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
172
|
+
|
|
173
|
+
function _upsertWatchInsights(db, project, events, timestamp) {
|
|
174
|
+
const upsertStmt = db.prepare(`
|
|
175
|
+
INSERT INTO insights (project, type, status, fingerprint, first_seen, last_seen, source_analysis_id, data)
|
|
176
|
+
VALUES (?, 'site_watch', 'active', ?, ?, ?, NULL, ?)
|
|
177
|
+
ON CONFLICT(project, type, fingerprint) DO UPDATE SET
|
|
178
|
+
last_seen = excluded.last_seen,
|
|
179
|
+
data = excluded.data
|
|
180
|
+
`);
|
|
181
|
+
|
|
182
|
+
db.exec('BEGIN');
|
|
183
|
+
try {
|
|
184
|
+
for (const e of events) {
|
|
185
|
+
const raw = `${e.url || ''}::${e.event_type || ''}`;
|
|
186
|
+
const fp = raw.toLowerCase().replace(/[^a-z0-9\s]/g, '').replace(/\s+/g, ' ').trim();
|
|
187
|
+
if (!fp) continue;
|
|
188
|
+
|
|
189
|
+
const data = {
|
|
190
|
+
url: e.url,
|
|
191
|
+
event_type: e.event_type,
|
|
192
|
+
severity: e.severity,
|
|
193
|
+
old_value: e.old_value,
|
|
194
|
+
new_value: e.new_value,
|
|
195
|
+
summary: _eventSummary(e),
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
upsertStmt.run(project, fp, timestamp, timestamp, JSON.stringify(data));
|
|
199
|
+
}
|
|
200
|
+
db.exec('COMMIT');
|
|
201
|
+
} catch (err) {
|
|
202
|
+
db.exec('ROLLBACK');
|
|
203
|
+
console.error('[watch] insight upsert failed:', err.message);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function _eventSummary(e) {
|
|
208
|
+
switch (e.event_type) {
|
|
209
|
+
case 'new_error': return `${e.url} returned ${e.new_value} (was ${e.old_value})`;
|
|
210
|
+
case 'status_changed': return `${e.url} status ${e.old_value} → ${e.new_value}`;
|
|
211
|
+
case 'page_removed': return `${e.url} disappeared from crawl`;
|
|
212
|
+
case 'indexability_changed': return `${e.url} became ${e.new_value}`;
|
|
213
|
+
default: return `${e.event_type.replace(/_/g, ' ')} on ${e.url}`;
|
|
214
|
+
}
|
|
215
|
+
}
|
package/cli.js
CHANGED
|
@@ -660,6 +660,25 @@ program
|
|
|
660
660
|
console.log(chalk.dim(` ⚠ Dashboard refresh skipped: ${dashErr.message}`));
|
|
661
661
|
}
|
|
662
662
|
|
|
663
|
+
// Auto-run site watch after crawl
|
|
664
|
+
try {
|
|
665
|
+
const { runWatch } = await import('./analyses/watch/index.js');
|
|
666
|
+
const watchResult = runWatch(db, project, { log: () => {} });
|
|
667
|
+
if (watchResult.snapshot) {
|
|
668
|
+
if (watchResult.isBaseline) {
|
|
669
|
+
console.log(chalk.dim(` 👁 Site Watch: baseline snapshot saved (health: ${watchResult.healthScore}/100)`));
|
|
670
|
+
} else if (watchResult.events.length) {
|
|
671
|
+
const t = watchResult.trend;
|
|
672
|
+
const trendStr = t > 0 ? `▲ +${t}` : t < 0 ? `▼ ${t}` : '';
|
|
673
|
+
console.log(chalk.dim(` 👁 Site Watch: ${watchResult.events.length} changes detected (health: ${watchResult.healthScore}/100${trendStr ? ' ' + trendStr : ''})`));
|
|
674
|
+
} else {
|
|
675
|
+
console.log(chalk.dim(` 👁 Site Watch: no changes (health: ${watchResult.healthScore}/100)`));
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
} catch (e) {
|
|
679
|
+
console.log(chalk.dim(` ⚠ Site Watch skipped: ${e.message}`));
|
|
680
|
+
}
|
|
681
|
+
|
|
663
682
|
if (opts.extract === false && totalExtracted === 0) {
|
|
664
683
|
console.log(chalk.bold.green(`\n✅ Crawl complete (${elapsed}s) — raw data collected.`));
|
|
665
684
|
console.log(chalk.white(' Next steps:'));
|
|
@@ -4224,6 +4243,128 @@ program
|
|
|
4224
4243
|
}
|
|
4225
4244
|
});
|
|
4226
4245
|
|
|
4246
|
+
// ── SITE WATCH ──────────────────────────────────────────────────────────
|
|
4247
|
+
program
|
|
4248
|
+
.command('watch <project>')
|
|
4249
|
+
.description('Site health monitor — detect changes between crawl runs')
|
|
4250
|
+
.option('--format <type>', 'Output format: brief or json', 'brief')
|
|
4251
|
+
.action(async (project, opts) => {
|
|
4252
|
+
const db = getDb();
|
|
4253
|
+
const config = loadConfig(project);
|
|
4254
|
+
if (!config) return;
|
|
4255
|
+
const isJson = opts.format === 'json';
|
|
4256
|
+
|
|
4257
|
+
if (!isJson) {
|
|
4258
|
+
printAttackHeader('Site Watch', project);
|
|
4259
|
+
}
|
|
4260
|
+
|
|
4261
|
+
const { runWatch } = await import('./analyses/watch/index.js');
|
|
4262
|
+
|
|
4263
|
+
const result = runWatch(db, project, {
|
|
4264
|
+
log: isJson ? () => {} : (msg) => console.log(chalk.gray(msg)),
|
|
4265
|
+
});
|
|
4266
|
+
|
|
4267
|
+
if (!result.snapshot) {
|
|
4268
|
+
if (isJson) console.log(JSON.stringify({ ok: false, error: 'No crawled pages found' }));
|
|
4269
|
+
return;
|
|
4270
|
+
}
|
|
4271
|
+
|
|
4272
|
+
if (isJson) {
|
|
4273
|
+
console.log(JSON.stringify({
|
|
4274
|
+
snapshot: result.snapshot,
|
|
4275
|
+
events: result.events,
|
|
4276
|
+
healthScore: result.healthScore,
|
|
4277
|
+
previousHealthScore: result.previousHealthScore,
|
|
4278
|
+
trend: result.trend,
|
|
4279
|
+
isBaseline: result.isBaseline,
|
|
4280
|
+
}, null, 2));
|
|
4281
|
+
} else {
|
|
4282
|
+
// ── Health Score ──
|
|
4283
|
+
const score = result.healthScore;
|
|
4284
|
+
const scoreFmt = score >= 80 ? chalk.bold.green(score + '/100')
|
|
4285
|
+
: score >= 60 ? chalk.bold.yellow(score + '/100')
|
|
4286
|
+
: chalk.bold.red(score + '/100');
|
|
4287
|
+
|
|
4288
|
+
console.log('');
|
|
4289
|
+
console.log(` Health Score: ${scoreFmt}`);
|
|
4290
|
+
|
|
4291
|
+
if (result.previousHealthScore !== null) {
|
|
4292
|
+
const t = result.trend;
|
|
4293
|
+
const trendStr = t > 0 ? chalk.green(`▲ +${t}`)
|
|
4294
|
+
: t < 0 ? chalk.red(`▼ ${t}`)
|
|
4295
|
+
: chalk.gray('— unchanged');
|
|
4296
|
+
console.log(` ${trendStr} (was ${result.previousHealthScore}/100)`);
|
|
4297
|
+
}
|
|
4298
|
+
console.log('');
|
|
4299
|
+
|
|
4300
|
+
if (result.isBaseline) {
|
|
4301
|
+
console.log(chalk.bold.green(` ✅ Baseline captured — ${result.snapshot.total_pages} pages`));
|
|
4302
|
+
console.log(chalk.gray(' Run another crawl to see changes.\n'));
|
|
4303
|
+
} else {
|
|
4304
|
+
// ── Severity summary ──
|
|
4305
|
+
const { errors_count: e, warnings_count: w, notices_count: n } = result.snapshot;
|
|
4306
|
+
const prev = result.previousHealthScore !== null ? {
|
|
4307
|
+
e: 0, w: 0, n: 0, // We'll compute deltas from events
|
|
4308
|
+
} : null;
|
|
4309
|
+
|
|
4310
|
+
console.log(` ${chalk.red('●')} Critical: ${e}`);
|
|
4311
|
+
console.log(` ${chalk.yellow('●')} Warning: ${w}`);
|
|
4312
|
+
console.log(` ${chalk.gray('●')} Notice: ${n}`);
|
|
4313
|
+
console.log('');
|
|
4314
|
+
|
|
4315
|
+
// ── Event list ──
|
|
4316
|
+
if (result.events.length) {
|
|
4317
|
+
console.log(chalk.bold(' What\'s New:'));
|
|
4318
|
+
console.log('');
|
|
4319
|
+
|
|
4320
|
+
const shown = result.events.slice(0, 25);
|
|
4321
|
+
for (const ev of shown) {
|
|
4322
|
+
const icon = ev.severity === 'critical' ? chalk.red('●')
|
|
4323
|
+
: ev.severity === 'warning' ? chalk.yellow('▲')
|
|
4324
|
+
: chalk.gray('○');
|
|
4325
|
+
const sev = ev.severity.toUpperCase().padEnd(8);
|
|
4326
|
+
const path = ev.url.replace(/https?:\/\/[^/]+/, '') || '/';
|
|
4327
|
+
|
|
4328
|
+
let desc = '';
|
|
4329
|
+
switch (ev.event_type) {
|
|
4330
|
+
case 'page_added': desc = `${path} — new page`; break;
|
|
4331
|
+
case 'page_removed': desc = `${path} — removed`; break;
|
|
4332
|
+
case 'new_error': desc = `${path} → ${ev.new_value} (was ${ev.old_value})`; break;
|
|
4333
|
+
case 'status_changed': desc = `${path} status ${ev.old_value} → ${ev.new_value}`; break;
|
|
4334
|
+
case 'title_changed': desc = `${path} title: "${(ev.old_value || '').slice(0, 30)}" → "${(ev.new_value || '').slice(0, 30)}"`; break;
|
|
4335
|
+
case 'h1_changed': desc = `${path} H1 changed`; break;
|
|
4336
|
+
case 'meta_desc_changed': desc = `${path} meta description changed`; break;
|
|
4337
|
+
case 'word_count_changed': desc = `${path} word count ${ev.old_value} → ${ev.new_value}`; break;
|
|
4338
|
+
case 'indexability_changed': desc = `${path} became ${ev.new_value}`; break;
|
|
4339
|
+
case 'content_changed': desc = `${path} content updated`; break;
|
|
4340
|
+
default: desc = `${path} — ${ev.event_type.replace(/_/g, ' ')}`;
|
|
4341
|
+
}
|
|
4342
|
+
|
|
4343
|
+
console.log(` ${icon} ${chalk.dim(sev)} ${desc}`);
|
|
4344
|
+
}
|
|
4345
|
+
|
|
4346
|
+
if (result.events.length > 25) {
|
|
4347
|
+
console.log(chalk.gray(` ... and ${result.events.length - 25} more`));
|
|
4348
|
+
}
|
|
4349
|
+
console.log('');
|
|
4350
|
+
} else {
|
|
4351
|
+
console.log(chalk.green(' ✅ No changes detected since last crawl.\n'));
|
|
4352
|
+
}
|
|
4353
|
+
}
|
|
4354
|
+
}
|
|
4355
|
+
|
|
4356
|
+
// ── Regenerate dashboard ──
|
|
4357
|
+
if (!isJson) {
|
|
4358
|
+
try {
|
|
4359
|
+
const configs = loadAllConfigs();
|
|
4360
|
+
generateMultiDashboard(db, configs);
|
|
4361
|
+
console.log(chalk.green(' ✅ Dashboard updated with Site Watch card\n'));
|
|
4362
|
+
} catch (e) {
|
|
4363
|
+
console.log(chalk.gray(` (Dashboard not updated: ${e.message})\n`));
|
|
4364
|
+
}
|
|
4365
|
+
}
|
|
4366
|
+
});
|
|
4367
|
+
|
|
4227
4368
|
// ── GAP INTEL ────────────────────────────────────────────────────────────
|
|
4228
4369
|
|
|
4229
4370
|
program
|