seo-intel 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/.env.example +41 -0
  2. package/LICENSE +75 -0
  3. package/README.md +243 -0
  4. package/Start SEO Intel.bat +9 -0
  5. package/Start SEO Intel.command +8 -0
  6. package/cli.js +3727 -0
  7. package/config/example.json +29 -0
  8. package/config/setup-wizard.js +522 -0
  9. package/crawler/index.js +566 -0
  10. package/crawler/robots.js +103 -0
  11. package/crawler/sanitize.js +124 -0
  12. package/crawler/schema-parser.js +168 -0
  13. package/crawler/sitemap.js +103 -0
  14. package/crawler/stealth.js +393 -0
  15. package/crawler/subdomain-discovery.js +341 -0
  16. package/db/db.js +213 -0
  17. package/db/schema.sql +120 -0
  18. package/exports/competitive.js +186 -0
  19. package/exports/heuristics.js +67 -0
  20. package/exports/queries.js +197 -0
  21. package/exports/suggestive.js +230 -0
  22. package/exports/technical.js +180 -0
  23. package/exports/templates.js +77 -0
  24. package/lib/gate.js +204 -0
  25. package/lib/license.js +369 -0
  26. package/lib/oauth.js +432 -0
  27. package/lib/updater.js +324 -0
  28. package/package.json +68 -0
  29. package/reports/generate-html.js +6194 -0
  30. package/reports/generate-site-graph.js +949 -0
  31. package/reports/gsc-loader.js +190 -0
  32. package/scheduler.js +142 -0
  33. package/seo-audit.js +619 -0
  34. package/seo-intel.png +0 -0
  35. package/server.js +602 -0
  36. package/setup/ROADMAP.md +109 -0
  37. package/setup/checks.js +483 -0
  38. package/setup/config-builder.js +227 -0
  39. package/setup/engine.js +65 -0
  40. package/setup/installers.js +197 -0
  41. package/setup/models.js +328 -0
  42. package/setup/openclaw-bridge.js +329 -0
  43. package/setup/validator.js +395 -0
  44. package/setup/web-routes.js +688 -0
  45. package/setup/wizard.html +2920 -0
  46. package/start-seo-intel.sh +8 -0
@@ -0,0 +1,227 @@
1
+ /**
2
+ * SEO Intel — Config Builder
3
+ *
4
+ * Generates project configuration files and manages .env updates.
5
+ * Extracted from config/setup-wizard.js for reuse by both CLI and web wizard.
6
+ */
7
+
8
+ import { writeFileSync, existsSync, readFileSync } from 'fs';
9
+ import { join, dirname } from 'path';
10
+ import { fileURLToPath } from 'url';
11
+
12
+ const __dirname = dirname(fileURLToPath(import.meta.url));
13
+ const ROOT = join(__dirname, '..');
14
+
15
+ // ── Helpers ────────────────────────────────────────────────────────────────
16
+
17
+ export function slugify(s) {
18
+ return s.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '');
19
+ }
20
+
21
+ export function domainFromUrl(url) {
22
+ try {
23
+ return new URL(url.startsWith('http') ? url : `https://${url}`).hostname.replace(/^www\./, '');
24
+ } catch {
25
+ return url;
26
+ }
27
+ }
28
+
29
+ // ── Build Project Config ────────────────────────────────────────────────────
30
+
31
+ /**
32
+ * Build a complete project config object.
33
+ *
34
+ * @param {object} params
35
+ * @param {string} params.projectName
36
+ * @param {string} params.targetUrl
37
+ * @param {string} params.siteName
38
+ * @param {string} params.industry
39
+ * @param {string} params.audience
40
+ * @param {string} params.goal
41
+ * @param {string} [params.maturity='early stage']
42
+ * @param {Array<{url: string}>} [params.competitors=[]]
43
+ * @param {Array<{url: string}>} [params.owned=[]]
44
+ * @param {string} [params.crawlMode='standard']
45
+ * @param {number} [params.pagesPerDomain=50]
46
+ * @param {string} [params.ollamaHost]
47
+ * @param {string} [params.extractionModel]
48
+ * @returns {object} Full config JSON
49
+ */
50
+ export function buildProjectConfig({
51
+ projectName,
52
+ targetUrl,
53
+ siteName,
54
+ industry,
55
+ audience,
56
+ goal,
57
+ maturity = 'early stage',
58
+ competitors = [],
59
+ owned = [],
60
+ crawlMode = 'standard',
61
+ pagesPerDomain = 50,
62
+ ollamaHost,
63
+ extractionModel,
64
+ }) {
65
+ const slug = slugify(projectName);
66
+ const targetDomain = domainFromUrl(targetUrl);
67
+ const normalizedUrl = targetUrl.startsWith('http') ? targetUrl : `https://${targetUrl}`;
68
+
69
+ const config = {
70
+ project: slug,
71
+ crawl: {
72
+ mode: crawlMode,
73
+ pagesPerDomain,
74
+ depth: 3,
75
+ },
76
+ context: {
77
+ siteName: siteName || slug,
78
+ url: normalizedUrl,
79
+ industry: industry || '',
80
+ audience: audience || '',
81
+ goal: goal || '',
82
+ maturity,
83
+ },
84
+ target: {
85
+ url: normalizedUrl,
86
+ domain: targetDomain,
87
+ role: 'target',
88
+ },
89
+ competitors: competitors.map(c => {
90
+ const url = c.url.startsWith('http') ? c.url : `https://${c.url}`;
91
+ return {
92
+ url,
93
+ domain: domainFromUrl(url),
94
+ role: 'competitor',
95
+ };
96
+ }),
97
+ };
98
+
99
+ // Optional: owned subdomains
100
+ if (owned.length > 0) {
101
+ config.owned = owned.map(o => {
102
+ const url = o.url.startsWith('http') ? o.url : `https://${o.url}`;
103
+ return {
104
+ url,
105
+ domain: domainFromUrl(url),
106
+ role: 'owned',
107
+ };
108
+ });
109
+ }
110
+
111
+ // Optional: Ollama settings
112
+ if (ollamaHost || extractionModel) {
113
+ config.crawl.ollamaHost = ollamaHost;
114
+ config.crawl.extractionModel = extractionModel;
115
+ }
116
+
117
+ return config;
118
+ }
119
+
120
+ // ── Write Project Config ────────────────────────────────────────────────────
121
+
122
+ /**
123
+ * Write a project config to disk.
124
+ *
125
+ * @param {object} config - full config object from buildProjectConfig()
126
+ * @param {string} [rootDir] - override root directory
127
+ * @returns {{ path: string, overwritten: boolean }}
128
+ */
129
+ export function writeProjectConfig(config, rootDir = ROOT) {
130
+ const configPath = join(rootDir, 'config', `${config.project}.json`);
131
+ const overwritten = existsSync(configPath);
132
+ writeFileSync(configPath, JSON.stringify(config, null, 2) + '\n');
133
+ return { path: configPath, overwritten };
134
+ }
135
+
136
+ // ── .env Management ─────────────────────────────────────────────────────────
137
+
138
+ /**
139
+ * Write a single key to .env (create or update).
140
+ */
141
+ export function writeEnvKey(key, value, rootDir = ROOT) {
142
+ const envPath = join(rootDir, '.env');
143
+ let content = existsSync(envPath) ? readFileSync(envPath, 'utf8') : '';
144
+ const regex = new RegExp(`^${key}=.*$`, 'm');
145
+ if (regex.test(content)) {
146
+ content = content.replace(regex, `${key}=${value}`);
147
+ } else {
148
+ content += `\n${key}=${value}`;
149
+ }
150
+ writeFileSync(envPath, content.trim() + '\n');
151
+ }
152
+
153
+ /**
154
+ * Batch-update .env with setup choices.
155
+ * Creates .env from .env.example if it doesn't exist.
156
+ */
157
+ export function updateEnvForSetup(values = {}, rootDir = ROOT) {
158
+ const envPath = join(rootDir, '.env');
159
+ const examplePath = join(rootDir, '.env.example');
160
+
161
+ // Create .env from template if missing
162
+ if (!existsSync(envPath)) {
163
+ if (existsSync(examplePath)) {
164
+ writeFileSync(envPath, readFileSync(examplePath, 'utf8'));
165
+ } else {
166
+ writeFileSync(envPath, '# SEO Intel Configuration\n');
167
+ }
168
+ }
169
+
170
+ // Apply each value
171
+ const keyMap = {
172
+ ollamaUrl: 'OLLAMA_URL',
173
+ ollamaModel: 'OLLAMA_MODEL',
174
+ ollamaCtx: 'OLLAMA_CTX',
175
+ ollamaTimeout: 'OLLAMA_TIMEOUT_MS',
176
+ geminiKey: 'GEMINI_API_KEY',
177
+ anthropicKey: 'ANTHROPIC_API_KEY',
178
+ openaiKey: 'OPENAI_API_KEY',
179
+ deepseekKey: 'DEEPSEEK_API_KEY',
180
+ crawlDelay: 'CRAWL_DELAY_MS',
181
+ crawlMaxPages: 'CRAWL_MAX_PAGES',
182
+ crawlTimeout: 'CRAWL_TIMEOUT_MS',
183
+ };
184
+
185
+ for (const [jsKey, envKey] of Object.entries(keyMap)) {
186
+ if (values[jsKey] !== undefined && values[jsKey] !== null && values[jsKey] !== '') {
187
+ writeEnvKey(envKey, String(values[jsKey]), rootDir);
188
+ }
189
+ }
190
+
191
+ return { path: envPath };
192
+ }
193
+
194
+ /**
195
+ * Validate a project config for completeness.
196
+ *
197
+ * @param {object} config
198
+ * @returns {{ valid: boolean, errors: string[] }}
199
+ */
200
+ export function validateConfig(config) {
201
+ const errors = [];
202
+
203
+ if (!config.project) errors.push('Missing project name');
204
+ if (!config.target?.url) errors.push('Missing target URL');
205
+ if (!config.target?.domain) errors.push('Missing target domain');
206
+ if (!config.context?.siteName) errors.push('Missing site name');
207
+
208
+ // Validate URL format
209
+ if (config.target?.url) {
210
+ try {
211
+ new URL(config.target.url);
212
+ } catch {
213
+ errors.push(`Invalid target URL: ${config.target.url}`);
214
+ }
215
+ }
216
+
217
+ // Validate competitors
218
+ if (config.competitors) {
219
+ for (const c of config.competitors) {
220
+ if (!c.url || !c.domain) {
221
+ errors.push(`Competitor missing URL or domain: ${JSON.stringify(c)}`);
222
+ }
223
+ }
224
+ }
225
+
226
+ return { valid: errors.length === 0, errors };
227
+ }
@@ -0,0 +1,65 @@
1
+ /**
2
+ * SEO Intel — Setup Engine
3
+ *
4
+ * Facade that re-exports all setup modules.
5
+ * Used by both CLI wizard (config/setup-wizard.js) and web wizard (setup/web-routes.js).
6
+ *
7
+ * Usage:
8
+ * import { fullSystemCheck, getModelRecommendations, ... } from './setup/engine.js';
9
+ */
10
+
11
+ // System detection
12
+ export {
13
+ checkNodeVersion,
14
+ checkNpm,
15
+ checkOllamaLocal,
16
+ checkOllamaRemote,
17
+ checkOllamaAuto,
18
+ checkPlaywright,
19
+ checkNpmDeps,
20
+ checkEnvFile,
21
+ checkExistingConfigs,
22
+ checkGscData,
23
+ checkOpenClaw,
24
+ detectOS,
25
+ detectVRAM,
26
+ fullSystemCheck,
27
+ parseEnvFile,
28
+ } from './checks.js';
29
+
30
+ // Model recommendations
31
+ export {
32
+ EXTRACTION_MODELS,
33
+ ANALYSIS_MODELS,
34
+ recommendExtractionModel,
35
+ recommendAnalysisModel,
36
+ getModelRecommendations,
37
+ } from './models.js';
38
+
39
+ // Auto-installers
40
+ export {
41
+ installNpmDeps,
42
+ installPlaywright,
43
+ pullOllamaModel,
44
+ createEnvFile,
45
+ } from './installers.js';
46
+
47
+ // Pipeline validation
48
+ export {
49
+ testOllamaConnectivity,
50
+ testApiKey,
51
+ testCrawl,
52
+ testExtraction,
53
+ runFullValidation,
54
+ } from './validator.js';
55
+
56
+ // Config generation
57
+ export {
58
+ slugify,
59
+ domainFromUrl,
60
+ buildProjectConfig,
61
+ writeProjectConfig,
62
+ writeEnvKey,
63
+ updateEnvForSetup,
64
+ validateConfig,
65
+ } from './config-builder.js';
@@ -0,0 +1,197 @@
1
+ /**
2
+ * SEO Intel — Auto-Installers
3
+ *
4
+ * Async generator functions that install dependencies and yield progress events.
5
+ * Both CLI and web wizard consume the same generators — CLI prints, web streams via SSE.
6
+ *
7
+ * Usage:
8
+ * for await (const ev of installNpmDeps()) console.log(ev.message);
9
+ */
10
+
11
+ import { spawn } from 'child_process';
12
+ import { existsSync, readFileSync, writeFileSync } from 'fs';
13
+ import { join, dirname } from 'path';
14
+ import { fileURLToPath } from 'url';
15
+
16
+ const __dirname = dirname(fileURLToPath(import.meta.url));
17
+ const ROOT = join(__dirname, '..');
18
+
19
+ // ── Event helpers ───────────────────────────────────────────────────────────
20
+
21
+ function ev(phase, status, message, extra = {}) {
22
+ return { phase, status, message, ts: Date.now(), ...extra };
23
+ }
24
+
25
+ // ── npm install ─────────────────────────────────────────────────────────────
26
+
27
+ export async function* installNpmDeps(rootDir = ROOT) {
28
+ yield ev('npm-install', 'start', 'Installing npm dependencies...');
29
+
30
+ try {
31
+ const result = await runCommand('npm', ['install', '--no-audit', '--no-fund'], rootDir);
32
+
33
+ if (result.exitCode === 0) {
34
+ yield ev('npm-install', 'done', 'npm dependencies installed successfully.');
35
+ } else {
36
+ yield ev('npm-install', 'error', `npm install failed (exit ${result.exitCode}): ${result.stderr.slice(0, 500)}`);
37
+ }
38
+ } catch (err) {
39
+ yield ev('npm-install', 'error', `npm install error: ${err.message}`);
40
+ }
41
+ }
42
+
43
+ // ── Playwright Chromium ─────────────────────────────────────────────────────
44
+
45
+ export async function* installPlaywright(rootDir = ROOT) {
46
+ yield ev('playwright', 'start', 'Installing Playwright Chromium browser (~150MB)...');
47
+
48
+ try {
49
+ const result = await runCommand('npx', ['playwright', 'install', 'chromium'], rootDir);
50
+
51
+ if (result.exitCode === 0) {
52
+ yield ev('playwright', 'done', 'Playwright Chromium installed successfully.');
53
+ } else {
54
+ yield ev('playwright', 'error', `Playwright install failed (exit ${result.exitCode}): ${result.stderr.slice(0, 500)}`);
55
+ }
56
+ } catch (err) {
57
+ yield ev('playwright', 'error', `Playwright install error: ${err.message}`);
58
+ }
59
+ }
60
+
61
+ // ── Ollama model pull ───────────────────────────────────────────────────────
62
+
63
+ export async function* pullOllamaModel(model, host = 'http://localhost:11434') {
64
+ yield ev('ollama-pull', 'start', `Pulling model ${model} from Ollama...`);
65
+
66
+ try {
67
+ const controller = new AbortController();
68
+ const timeout = setTimeout(() => controller.abort(), 600000); // 10 min timeout for large models
69
+
70
+ const res = await fetch(`${host}/api/pull`, {
71
+ method: 'POST',
72
+ headers: { 'Content-Type': 'application/json' },
73
+ body: JSON.stringify({ name: model, stream: true }),
74
+ signal: controller.signal,
75
+ });
76
+
77
+ clearTimeout(timeout);
78
+
79
+ if (!res.ok) {
80
+ const text = await res.text().catch(() => '');
81
+ yield ev('ollama-pull', 'error', `Ollama pull failed: HTTP ${res.status} ${text.slice(0, 200)}`);
82
+ return;
83
+ }
84
+
85
+ const reader = res.body;
86
+ let lastPercent = -1;
87
+
88
+ // Stream Ollama's NDJSON progress
89
+ for await (const chunk of reader) {
90
+ const text = typeof chunk === 'string' ? chunk : new TextDecoder().decode(chunk);
91
+ const lines = text.split('\n').filter(Boolean);
92
+
93
+ for (const line of lines) {
94
+ try {
95
+ const data = JSON.parse(line);
96
+
97
+ if (data.error) {
98
+ yield ev('ollama-pull', 'error', `Ollama error: ${data.error}`);
99
+ return;
100
+ }
101
+
102
+ if (data.total && data.completed) {
103
+ const percent = Math.round((data.completed / data.total) * 100);
104
+ if (percent !== lastPercent && percent % 5 === 0) {
105
+ lastPercent = percent;
106
+ yield ev('ollama-pull', 'progress', `Downloading ${model}... ${percent}%`, { progress: percent });
107
+ }
108
+ } else if (data.status) {
109
+ // Status messages like "pulling manifest", "verifying sha256 digest"
110
+ yield ev('ollama-pull', 'progress', data.status);
111
+ }
112
+ } catch {
113
+ // Skip unparseable lines
114
+ }
115
+ }
116
+ }
117
+
118
+ yield ev('ollama-pull', 'done', `Model ${model} pulled successfully.`);
119
+ } catch (err) {
120
+ if (err.name === 'AbortError') {
121
+ yield ev('ollama-pull', 'error', `Ollama pull timed out after 10 minutes.`);
122
+ } else {
123
+ yield ev('ollama-pull', 'error', `Ollama pull error: ${err.message}`);
124
+ }
125
+ }
126
+ }
127
+
128
+ // ── Create .env from template ───────────────────────────────────────────────
129
+
130
+ export function* createEnvFile(rootDir = ROOT) {
131
+ const envPath = join(rootDir, '.env');
132
+ const examplePath = join(rootDir, '.env.example');
133
+
134
+ if (existsSync(envPath)) {
135
+ yield ev('env-create', 'done', '.env file already exists — keeping it.');
136
+ return;
137
+ }
138
+
139
+ if (!existsSync(examplePath)) {
140
+ // Create a minimal .env
141
+ const minimal = [
142
+ '# SEO Intel Configuration',
143
+ '',
144
+ '# Cloud model for analysis (pick one)',
145
+ 'GEMINI_API_KEY=',
146
+ '# ANTHROPIC_API_KEY=',
147
+ '# OPENAI_API_KEY=',
148
+ '',
149
+ '# Local Ollama for extraction',
150
+ 'OLLAMA_URL=http://localhost:11434',
151
+ 'OLLAMA_MODEL=qwen3.5:9b',
152
+ 'OLLAMA_CTX=8192',
153
+ '',
154
+ '# Crawler settings',
155
+ 'CRAWL_DELAY_MS=1500',
156
+ 'CRAWL_MAX_PAGES=50',
157
+ 'CRAWL_TIMEOUT_MS=15000',
158
+ '',
159
+ ].join('\n');
160
+
161
+ writeFileSync(envPath, minimal);
162
+ yield ev('env-create', 'done', 'Created .env with default values.');
163
+ return;
164
+ }
165
+
166
+ writeFileSync(envPath, readFileSync(examplePath, 'utf8'));
167
+ yield ev('env-create', 'done', 'Created .env from .env.example template.');
168
+ }
169
+
170
+ // ── Spawn helper ────────────────────────────────────────────────────────────
171
+
172
+ function runCommand(cmd, args, cwd = ROOT) {
173
+ return new Promise((resolve, reject) => {
174
+ let stdout = '';
175
+ let stderr = '';
176
+
177
+ const proc = spawn(cmd, args, {
178
+ cwd,
179
+ shell: true,
180
+ env: { ...process.env, FORCE_COLOR: '0' },
181
+ });
182
+
183
+ proc.stdout?.on('data', (d) => { stdout += d.toString(); });
184
+ proc.stderr?.on('data', (d) => { stderr += d.toString(); });
185
+
186
+ proc.on('error', reject);
187
+ proc.on('close', (exitCode) => {
188
+ resolve({ exitCode, stdout, stderr });
189
+ });
190
+
191
+ // Timeout after 5 minutes for npm/playwright
192
+ setTimeout(() => {
193
+ proc.kill('SIGTERM');
194
+ reject(new Error('Command timed out after 5 minutes'));
195
+ }, 300000);
196
+ });
197
+ }