seo-intel 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/.env.example +41 -0
  2. package/LICENSE +75 -0
  3. package/README.md +243 -0
  4. package/Start SEO Intel.bat +9 -0
  5. package/Start SEO Intel.command +8 -0
  6. package/cli.js +3727 -0
  7. package/config/example.json +29 -0
  8. package/config/setup-wizard.js +522 -0
  9. package/crawler/index.js +566 -0
  10. package/crawler/robots.js +103 -0
  11. package/crawler/sanitize.js +124 -0
  12. package/crawler/schema-parser.js +168 -0
  13. package/crawler/sitemap.js +103 -0
  14. package/crawler/stealth.js +393 -0
  15. package/crawler/subdomain-discovery.js +341 -0
  16. package/db/db.js +213 -0
  17. package/db/schema.sql +120 -0
  18. package/exports/competitive.js +186 -0
  19. package/exports/heuristics.js +67 -0
  20. package/exports/queries.js +197 -0
  21. package/exports/suggestive.js +230 -0
  22. package/exports/technical.js +180 -0
  23. package/exports/templates.js +77 -0
  24. package/lib/gate.js +204 -0
  25. package/lib/license.js +369 -0
  26. package/lib/oauth.js +432 -0
  27. package/lib/updater.js +324 -0
  28. package/package.json +68 -0
  29. package/reports/generate-html.js +6194 -0
  30. package/reports/generate-site-graph.js +949 -0
  31. package/reports/gsc-loader.js +190 -0
  32. package/scheduler.js +142 -0
  33. package/seo-audit.js +619 -0
  34. package/seo-intel.png +0 -0
  35. package/server.js +602 -0
  36. package/setup/ROADMAP.md +109 -0
  37. package/setup/checks.js +483 -0
  38. package/setup/config-builder.js +227 -0
  39. package/setup/engine.js +65 -0
  40. package/setup/installers.js +197 -0
  41. package/setup/models.js +328 -0
  42. package/setup/openclaw-bridge.js +329 -0
  43. package/setup/validator.js +395 -0
  44. package/setup/web-routes.js +688 -0
  45. package/setup/wizard.html +2920 -0
  46. package/start-seo-intel.sh +8 -0
@@ -0,0 +1,190 @@
1
+ /**
2
+ * Google Search Console CSV data loader
3
+ * Reads GSC export folders from seo-intel/gsc/<project>*/
4
+ import { readdirSync, readFileSync, existsSync } from 'fs';
5
+ import { join, dirname } from 'path';
6
+ import { fileURLToPath } from 'url';
7
+
8
+ const __dirname = dirname(fileURLToPath(import.meta.url));
9
+ const GSC_DIR = join(__dirname, '..', 'gsc');
10
+
11
+ // ── Robust CSV parser (handles quoted fields with commas/newlines) ──────────
12
+ function parseCSVContent(content) {
13
+ const rows = [];
14
+ let current = '';
15
+ let inQuotes = false;
16
+
17
+ for (let i = 0; i < content.length; i++) {
18
+ const ch = content[i];
19
+ if (ch === '"') {
20
+ if (inQuotes && content[i + 1] === '"') {
21
+ current += '"';
22
+ i++;
23
+ } else {
24
+ inQuotes = !inQuotes;
25
+ }
26
+ } else if (ch === '\n' && !inQuotes) {
27
+ const trimmed = current.replace(/\r$/, '');
28
+ if (trimmed) rows.push(trimmed);
29
+ current = '';
30
+ } else {
31
+ current += ch;
32
+ }
33
+ }
34
+ if (current.trim()) rows.push(current.replace(/\r$/, ''));
35
+
36
+ if (rows.length < 2) return [];
37
+
38
+ const headers = splitCSVRow(rows[0]);
39
+ return rows.slice(1).map(row => {
40
+ const values = splitCSVRow(row);
41
+ const obj = {};
42
+ headers.forEach((h, i) => {
43
+ obj[h.trim()] = (values[i] || '').trim();
44
+ });
45
+ return obj;
46
+ });
47
+ }
48
+
49
+ function splitCSVRow(row) {
50
+ const values = [];
51
+ let current = '';
52
+ let inQuotes = false;
53
+
54
+ for (let i = 0; i < row.length; i++) {
55
+ const ch = row[i];
56
+ if (ch === '"') {
57
+ if (inQuotes && row[i + 1] === '"') {
58
+ current += '"';
59
+ i++;
60
+ } else {
61
+ inQuotes = !inQuotes;
62
+ }
63
+ } else if (ch === ',' && !inQuotes) {
64
+ values.push(current);
65
+ current = '';
66
+ } else {
67
+ current += ch;
68
+ }
69
+ }
70
+ values.push(current);
71
+ return values;
72
+ }
73
+
74
+ // ── Parse numeric GSC fields ────────────────────────────────────────────────
75
+ function parseNum(val) {
76
+ if (!val || val === '') return 0;
77
+ return parseFloat(val.replace('%', '').replace(',', '')) || 0;
78
+ }
79
+
80
+ // ── Load GSC data for a project ─────────────────────────────────────────────
81
+ export function loadGscData(project) {
82
+ if (!existsSync(GSC_DIR)) return null;
83
+
84
+ const folders = readdirSync(GSC_DIR).filter(f =>
85
+ f.toLowerCase().startsWith(project.toLowerCase()) &&
86
+ !f.startsWith('.')
87
+ );
88
+ if (!folders.length) return null;
89
+
90
+ // Use most recent folder (alphabetically last)
91
+ const folder = join(GSC_DIR, folders.sort().pop());
92
+
93
+ function loadCSV(filename) {
94
+ const filepath = join(folder, filename);
95
+ if (!existsSync(filepath)) return [];
96
+ return parseCSVContent(readFileSync(filepath, 'utf8'));
97
+ }
98
+
99
+ // ── Chart (daily time series) ──
100
+ const chartRaw = loadCSV('Chart.csv');
101
+ // GSC exports use 'Date' for daily exports and 'Time (UTC...)' for hourly exports
102
+ // Normalize: extract YYYY-MM-DD from whatever the date/time column is
103
+ const dateKey = Object.keys(chartRaw[0] || {}).find(k =>
104
+ k === 'Date' || k.startsWith('Time')
105
+ ) || 'Date';
106
+ // Aggregate hourly rows to daily
107
+ const dailyMap = new Map();
108
+ for (const r of chartRaw) {
109
+ const rawDate = r[dateKey] || '';
110
+ const date = rawDate.includes('T') ? rawDate.slice(0, 10) : rawDate; // trim to YYYY-MM-DD
111
+ if (!date) continue;
112
+ const existing = dailyMap.get(date) || { clicks: 0, impressions: 0, ctrSum: 0, posSum: 0, count: 0 };
113
+ existing.clicks += parseNum(r.Clicks);
114
+ existing.impressions += parseNum(r.Impressions);
115
+ existing.ctrSum += parseNum(r.CTR);
116
+ existing.posSum += parseNum(r.Position);
117
+ existing.count += 1;
118
+ dailyMap.set(date, existing);
119
+ }
120
+ const chart = Array.from(dailyMap.entries()).map(([date, v]) => ({
121
+ date,
122
+ clicks: v.clicks,
123
+ impressions: v.impressions,
124
+ ctr: v.count > 0 ? v.ctrSum / v.count : 0,
125
+ position: v.count > 0 ? v.posSum / v.count : 0,
126
+ })).sort((a, b) => a.date.localeCompare(b.date));
127
+
128
+ // ── Queries ──
129
+ const queriesRaw = loadCSV('Queries.csv');
130
+ const queries = queriesRaw.map(r => ({
131
+ query: r['Top queries'] || r.Query || '',
132
+ clicks: parseNum(r.Clicks),
133
+ impressions: parseNum(r.Impressions),
134
+ ctr: parseNum(r.CTR),
135
+ position: parseNum(r.Position),
136
+ })).sort((a, b) => b.impressions - a.impressions);
137
+
138
+ // ── Pages ──
139
+ const pagesRaw = loadCSV('Pages.csv');
140
+ const pages = pagesRaw.map(r => ({
141
+ url: r['Top pages'] || r.Page || '',
142
+ clicks: parseNum(r.Clicks),
143
+ impressions: parseNum(r.Impressions),
144
+ ctr: parseNum(r.CTR),
145
+ position: parseNum(r.Position),
146
+ })).sort((a, b) => b.impressions - a.impressions);
147
+
148
+ // ── Countries ──
149
+ const countriesRaw = loadCSV('Countries.csv');
150
+ const countries = countriesRaw.map(r => ({
151
+ country: r.Country || '',
152
+ clicks: parseNum(r.Clicks),
153
+ impressions: parseNum(r.Impressions),
154
+ ctr: parseNum(r.CTR),
155
+ position: parseNum(r.Position),
156
+ })).sort((a, b) => b.impressions - a.impressions);
157
+
158
+ // ── Devices ──
159
+ const devicesRaw = loadCSV('Devices.csv');
160
+ const devices = devicesRaw.map(r => ({
161
+ device: r.Device || '',
162
+ clicks: parseNum(r.Clicks),
163
+ impressions: parseNum(r.Impressions),
164
+ ctr: parseNum(r.CTR),
165
+ position: parseNum(r.Position),
166
+ }));
167
+
168
+ // ── Summary stats ──
169
+ const totalClicks = chart.reduce((s, d) => s + d.clicks, 0);
170
+ const totalImpressions = chart.reduce((s, d) => s + d.impressions, 0);
171
+ const avgPosition = chart.length
172
+ ? (chart.reduce((s, d) => s + d.position, 0) / chart.length).toFixed(1)
173
+ : 0;
174
+ const avgCtr = totalImpressions > 0
175
+ ? ((totalClicks / totalImpressions) * 100).toFixed(2)
176
+ : 0;
177
+ const dateRange = chart.length
178
+ ? `${chart[0].date} → ${chart[chart.length - 1].date}`
179
+ : '';
180
+
181
+ return {
182
+ chart,
183
+ queries,
184
+ pages,
185
+ countries,
186
+ devices,
187
+ summary: { totalClicks, totalImpressions, avgPosition, avgCtr, dateRange },
188
+ folder: folders.sort().pop(),
189
+ };
190
+ }
package/scheduler.js ADDED
@@ -0,0 +1,142 @@
1
+ /**
2
+ * Crawl Scheduler
3
+ *
4
+ * Decides what to crawl next based on freshness windows.
5
+ * Returns ONE domain per run — never hammers multiple sites at once.
6
+ *
7
+ * Freshness windows:
8
+ * target site: 7 days (your own site changes frequently)
9
+ * competitors: 14 days (they don't change that often)
10
+ *
11
+ * Priority order:
12
+ * 1. Target site (always first if stale)
13
+ * 2. Competitor sites (round-robin, oldest-crawled-first)
14
+ * 3. Nothing → exit cleanly (DONE signal)
15
+ */
16
+
17
+ import { readFileSync, readdirSync } from 'fs';
18
+ import { dirname, join } from 'path';
19
+ import { fileURLToPath } from 'url';
20
+
21
+ const __dirname = dirname(fileURLToPath(import.meta.url));
22
+
23
+ const FRESHNESS = {
24
+ target: 7 * 24 * 60 * 60 * 1000, // 7 days
25
+ competitor: 14 * 24 * 60 * 60 * 1000, // 14 days
26
+ };
27
+
28
+ /**
29
+ * Load all project configs.
30
+ */
31
+ export function loadAllConfigs() {
32
+ const configDir = join(__dirname, 'config');
33
+ return readdirSync(configDir)
34
+ .filter(f => f.endsWith('.json') && !f.startsWith('_') && f !== 'example.json')
35
+ .map(f => {
36
+ try { return JSON.parse(readFileSync(join(configDir, f), 'utf8')); }
37
+ catch { return null; }
38
+ })
39
+ .filter(Boolean);
40
+ }
41
+
42
+ /**
43
+ * Get the next domain to crawl across all projects.
44
+ * Returns null if everything is fresh.
45
+ *
46
+ * @param {object} db - DatabaseSync instance
47
+ * @returns {{ project, domain, url, role } | null}
48
+ */
49
+ export function getNextCrawlTarget(db) {
50
+ const configs = loadAllConfigs();
51
+ const now = Date.now();
52
+
53
+ // Build a flat list of all sites with their last crawl time
54
+ const candidates = [];
55
+
56
+ for (const config of configs) {
57
+ const allSites = [config.target, ...(config.owned || []), ...config.competitors];
58
+
59
+ for (const site of allSites) {
60
+ const row = db.prepare(
61
+ 'SELECT last_crawled FROM domains WHERE domain = ? AND project = ?'
62
+ ).get(site.domain, config.project);
63
+
64
+ const lastCrawled = row?.last_crawled || 0;
65
+ const window = site.role === 'competitor' ? FRESHNESS.competitor : FRESHNESS.target;
66
+ const staleSince = now - lastCrawled;
67
+ const isStale = staleSince > window;
68
+
69
+ if (isStale) {
70
+ candidates.push({
71
+ project: config.project,
72
+ domain: site.domain,
73
+ url: site.url,
74
+ role: site.role,
75
+ lastCrawled,
76
+ staleSince,
77
+ });
78
+ }
79
+ }
80
+ }
81
+
82
+ if (!candidates.length) return null;
83
+
84
+ // Owned properties + target site first, then oldest competitor
85
+ const targets = candidates.filter(c => c.role !== 'competitor');
86
+ if (targets.length) return targets[0];
87
+
88
+ // Oldest competitor (most stale first)
89
+ candidates.sort((a, b) => a.lastCrawled - b.lastCrawled);
90
+ return candidates[0];
91
+ }
92
+
93
+ /**
94
+ * Check if analysis is needed for a project.
95
+ * True if any domain was crawled since the last analysis.
96
+ */
97
+ export function needsAnalysis(db, project) {
98
+ const lastAnalysis = db.prepare(
99
+ 'SELECT MAX(generated_at) as t FROM analyses WHERE project = ?'
100
+ ).get(project)?.t || 0;
101
+
102
+ const lastCrawl = db.prepare(`
103
+ SELECT MAX(last_crawled) as t FROM domains WHERE project = ?
104
+ `).get(project)?.t || 0;
105
+
106
+ return lastCrawl > lastAnalysis;
107
+ }
108
+
109
+ /**
110
+ * Human-readable status of all domains.
111
+ */
112
+ export function getCrawlStatus(db) {
113
+ const configs = loadAllConfigs();
114
+ const now = Date.now();
115
+ const rows = [];
116
+
117
+ for (const config of configs) {
118
+ const allSites = [config.target, ...(config.owned || []), ...config.competitors];
119
+ for (const site of allSites) {
120
+ const row = db.prepare(
121
+ 'SELECT last_crawled FROM domains WHERE domain = ? AND project = ?'
122
+ ).get(site.domain, config.project);
123
+
124
+ const lastCrawled = row?.last_crawled;
125
+ const window = site.role === 'competitor' ? FRESHNESS.competitor : FRESHNESS.target;
126
+ const isStale = !lastCrawled || (now - lastCrawled) > window;
127
+ const daysAgo = lastCrawled ? Math.round((now - lastCrawled) / 86400000) : null;
128
+
129
+ rows.push({
130
+ project: config.project,
131
+ domain: site.domain,
132
+ role: site.role,
133
+ lastCrawled: lastCrawled ? new Date(lastCrawled).toISOString().split('T')[0] : 'never',
134
+ daysAgo: daysAgo ?? '—',
135
+ status: isStale ? '🔴 stale' : '✅ fresh',
136
+ freshnessWindow: site.role === 'competitor' ? '14d' : '7d',
137
+ });
138
+ }
139
+ }
140
+
141
+ return rows;
142
+ }