seo-intel 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/.env.example +41 -0
  2. package/LICENSE +75 -0
  3. package/README.md +243 -0
  4. package/Start SEO Intel.bat +9 -0
  5. package/Start SEO Intel.command +8 -0
  6. package/cli.js +3727 -0
  7. package/config/example.json +29 -0
  8. package/config/setup-wizard.js +522 -0
  9. package/crawler/index.js +566 -0
  10. package/crawler/robots.js +103 -0
  11. package/crawler/sanitize.js +124 -0
  12. package/crawler/schema-parser.js +168 -0
  13. package/crawler/sitemap.js +103 -0
  14. package/crawler/stealth.js +393 -0
  15. package/crawler/subdomain-discovery.js +341 -0
  16. package/db/db.js +213 -0
  17. package/db/schema.sql +120 -0
  18. package/exports/competitive.js +186 -0
  19. package/exports/heuristics.js +67 -0
  20. package/exports/queries.js +197 -0
  21. package/exports/suggestive.js +230 -0
  22. package/exports/technical.js +180 -0
  23. package/exports/templates.js +77 -0
  24. package/lib/gate.js +204 -0
  25. package/lib/license.js +369 -0
  26. package/lib/oauth.js +432 -0
  27. package/lib/updater.js +324 -0
  28. package/package.json +68 -0
  29. package/reports/generate-html.js +6194 -0
  30. package/reports/generate-site-graph.js +949 -0
  31. package/reports/gsc-loader.js +190 -0
  32. package/scheduler.js +142 -0
  33. package/seo-audit.js +619 -0
  34. package/seo-intel.png +0 -0
  35. package/server.js +602 -0
  36. package/setup/ROADMAP.md +109 -0
  37. package/setup/checks.js +483 -0
  38. package/setup/config-builder.js +227 -0
  39. package/setup/engine.js +65 -0
  40. package/setup/installers.js +197 -0
  41. package/setup/models.js +328 -0
  42. package/setup/openclaw-bridge.js +329 -0
  43. package/setup/validator.js +395 -0
  44. package/setup/web-routes.js +688 -0
  45. package/setup/wizard.html +2920 -0
  46. package/start-seo-intel.sh +8 -0
@@ -0,0 +1,395 @@
1
+ /**
2
+ * SEO Intel — Pipeline Validator
3
+ *
4
+ * End-to-end tests that prove each component works:
5
+ * 1. Ollama connectivity (POST tiny prompt)
6
+ * 2. Analysis API key validity (minimal API call)
7
+ * 3. Test crawl (fetch 1 page with Playwright)
8
+ * 4. Test extraction (run Qwen on crawled content)
9
+ */
10
+
11
+ import { dirname, join } from 'path';
12
+ import { fileURLToPath } from 'url';
13
+
14
+ const __dirname = dirname(fileURLToPath(import.meta.url));
15
+ const ROOT = join(__dirname, '..');
16
+
17
+ // ── Test 1: Ollama Connectivity ─────────────────────────────────────────────
18
+
19
+ /**
20
+ * Test Ollama host + model by sending a tiny prompt.
21
+ *
22
+ * @param {string} host - e.g. 'http://localhost:11434'
23
+ * @param {string} model - e.g. 'qwen3.5:9b'
24
+ * @returns {{ success: boolean, latencyMs: number, response?: string, error?: string }}
25
+ */
26
+ export async function testOllamaConnectivity(host, model) {
27
+ const start = Date.now();
28
+
29
+ try {
30
+ const controller = new AbortController();
31
+ const timeout = setTimeout(() => controller.abort(), 15000);
32
+
33
+ const res = await fetch(`${host}/api/generate`, {
34
+ method: 'POST',
35
+ headers: { 'Content-Type': 'application/json' },
36
+ body: JSON.stringify({
37
+ model,
38
+ prompt: '/no_think\nRespond with exactly this JSON: {"status":"ok"}',
39
+ format: 'json',
40
+ stream: false,
41
+ options: { num_predict: 20, temperature: 0.0 },
42
+ }),
43
+ signal: controller.signal,
44
+ });
45
+
46
+ clearTimeout(timeout);
47
+ const latencyMs = Date.now() - start;
48
+
49
+ if (!res.ok) {
50
+ const text = await res.text().catch(() => '');
51
+ return { success: false, latencyMs, error: `HTTP ${res.status}: ${text.slice(0, 200)}` };
52
+ }
53
+
54
+ const data = await res.json();
55
+ if (data.error) {
56
+ return { success: false, latencyMs, error: data.error };
57
+ }
58
+
59
+ const response = (data.response || data.thinking || '').trim();
60
+ return { success: true, latencyMs, response: response.slice(0, 100) };
61
+ } catch (err) {
62
+ return {
63
+ success: false,
64
+ latencyMs: Date.now() - start,
65
+ error: err.name === 'AbortError' ? 'Timed out after 15s' : err.message,
66
+ };
67
+ }
68
+ }
69
+
70
+ // ── Test 2: API Key Validity ────────────────────────────────────────────────
71
+
72
+ /**
73
+ * Test an analysis API key with a minimal request.
74
+ *
75
+ * @param {'gemini'|'claude'|'openai'|'deepseek'} provider
76
+ * @param {string} key
77
+ * @returns {{ valid: boolean, error?: string, latencyMs: number }}
78
+ */
79
+ export async function testApiKey(provider, key) {
80
+ const start = Date.now();
81
+
82
+ try {
83
+ switch (provider) {
84
+ case 'gemini':
85
+ return await testGeminiKey(key, start);
86
+ case 'claude':
87
+ return await testAnthropicKey(key, start);
88
+ case 'openai':
89
+ return await testOpenAIKey(key, start);
90
+ case 'deepseek':
91
+ return await testDeepSeekKey(key, start);
92
+ default:
93
+ return { valid: false, error: `Unknown provider: ${provider}`, latencyMs: 0 };
94
+ }
95
+ } catch (err) {
96
+ return { valid: false, error: err.message, latencyMs: Date.now() - start };
97
+ }
98
+ }
99
+
100
+ async function testGeminiKey(key, start) {
101
+ const url = `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=${key}`;
102
+ const res = await fetch(url, {
103
+ method: 'POST',
104
+ headers: { 'Content-Type': 'application/json' },
105
+ body: JSON.stringify({
106
+ contents: [{ parts: [{ text: 'Respond with: ok' }] }],
107
+ generationConfig: { maxOutputTokens: 5 },
108
+ }),
109
+ });
110
+
111
+ const latencyMs = Date.now() - start;
112
+ if (res.ok) return { valid: true, latencyMs };
113
+ const data = await res.json().catch(() => ({}));
114
+ return { valid: false, latencyMs, error: data.error?.message || `HTTP ${res.status}` };
115
+ }
116
+
117
+ async function testAnthropicKey(key, start) {
118
+ const res = await fetch('https://api.anthropic.com/v1/messages', {
119
+ method: 'POST',
120
+ headers: {
121
+ 'Content-Type': 'application/json',
122
+ 'x-api-key': key,
123
+ 'anthropic-version': '2023-06-01',
124
+ },
125
+ body: JSON.stringify({
126
+ model: 'claude-sonnet-4-20250514',
127
+ max_tokens: 5,
128
+ messages: [{ role: 'user', content: 'Respond with: ok' }],
129
+ }),
130
+ });
131
+
132
+ const latencyMs = Date.now() - start;
133
+ if (res.ok) return { valid: true, latencyMs };
134
+ const data = await res.json().catch(() => ({}));
135
+ return { valid: false, latencyMs, error: data.error?.message || `HTTP ${res.status}` };
136
+ }
137
+
138
+ async function testOpenAIKey(key, start) {
139
+ const res = await fetch('https://api.openai.com/v1/chat/completions', {
140
+ method: 'POST',
141
+ headers: {
142
+ 'Content-Type': 'application/json',
143
+ 'Authorization': `Bearer ${key}`,
144
+ },
145
+ body: JSON.stringify({
146
+ model: 'gpt-4o-mini',
147
+ max_tokens: 5,
148
+ messages: [{ role: 'user', content: 'Respond with: ok' }],
149
+ }),
150
+ });
151
+
152
+ const latencyMs = Date.now() - start;
153
+ if (res.ok) return { valid: true, latencyMs };
154
+ const data = await res.json().catch(() => ({}));
155
+ return { valid: false, latencyMs, error: data.error?.message || `HTTP ${res.status}` };
156
+ }
157
+
158
+ async function testDeepSeekKey(key, start) {
159
+ const res = await fetch('https://api.deepseek.com/chat/completions', {
160
+ method: 'POST',
161
+ headers: {
162
+ 'Content-Type': 'application/json',
163
+ 'Authorization': `Bearer ${key}`,
164
+ },
165
+ body: JSON.stringify({
166
+ model: 'deepseek-chat',
167
+ max_tokens: 5,
168
+ messages: [{ role: 'user', content: 'Respond with: ok' }],
169
+ }),
170
+ });
171
+
172
+ const latencyMs = Date.now() - start;
173
+ if (res.ok) return { valid: true, latencyMs };
174
+ const data = await res.json().catch(() => ({}));
175
+ return { valid: false, latencyMs, error: data.error?.message || `HTTP ${res.status}` };
176
+ }
177
+
178
+ // ── Test 3: Crawl Test ──────────────────────────────────────────────────────
179
+
180
+ /**
181
+ * Crawl a single page to verify Playwright works.
182
+ *
183
+ * @param {string} url - page to crawl
184
+ * @returns {{ success: boolean, title?: string, wordCount?: number, latencyMs: number, error?: string }}
185
+ */
186
+ export async function testCrawl(url) {
187
+ const start = Date.now();
188
+
189
+ try {
190
+ // Dynamic import to avoid requiring playwright if just checking config
191
+ const { chromium } = await import('playwright');
192
+
193
+ const browser = await chromium.launch({ headless: true });
194
+ const page = await browser.newPage();
195
+
196
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 15000 });
197
+
198
+ const title = await page.title();
199
+ const bodyText = await page.evaluate(() => document.body?.innerText || '');
200
+ const wordCount = bodyText.split(/\s+/).filter(Boolean).length;
201
+
202
+ await browser.close();
203
+
204
+ return {
205
+ success: true,
206
+ title: title.slice(0, 100),
207
+ wordCount,
208
+ latencyMs: Date.now() - start,
209
+ };
210
+ } catch (err) {
211
+ return {
212
+ success: false,
213
+ latencyMs: Date.now() - start,
214
+ error: err.message.slice(0, 300),
215
+ };
216
+ }
217
+ }
218
+
219
+ // ── Test 4: Extraction Test ─────────────────────────────────────────────────
220
+
221
+ /**
222
+ * Run a real extraction on sample content to verify Ollama + Qwen works end-to-end.
223
+ *
224
+ * @param {string} host - Ollama host
225
+ * @param {string} model - Ollama model
226
+ * @param {{ title: string, bodyText: string, url: string }} samplePage - crawled page data
227
+ * @returns {{ success: boolean, keywordsFound?: number, latencyMs: number, preview?: object, error?: string }}
228
+ */
229
+ export async function testExtraction(host, model, samplePage) {
230
+ const start = Date.now();
231
+
232
+ try {
233
+ const { extractPage } = await import(join(ROOT, 'extractor', 'qwen.js'));
234
+
235
+ // Override env vars temporarily for this test
236
+ const origUrl = process.env.OLLAMA_URL;
237
+ const origModel = process.env.OLLAMA_MODEL;
238
+ const origTimeout = process.env.OLLAMA_TIMEOUT_MS;
239
+
240
+ process.env.OLLAMA_URL = host;
241
+ process.env.OLLAMA_MODEL = model;
242
+ process.env.OLLAMA_TIMEOUT_MS = '30000'; // generous timeout for test
243
+
244
+ try {
245
+ const result = await extractPage({
246
+ url: samplePage.url || 'https://example.com',
247
+ title: samplePage.title || 'Test Page',
248
+ metaDesc: samplePage.metaDesc || '',
249
+ headings: samplePage.headings || [{ level: 1, text: samplePage.title || 'Test' }],
250
+ bodyText: (samplePage.bodyText || 'This is a test page for SEO Intel extraction validation.').slice(0, 2000),
251
+ schemaTypes: [],
252
+ publishedDate: null,
253
+ modifiedDate: null,
254
+ });
255
+
256
+ const keywordsFound = (result.keywords || []).length;
257
+ return {
258
+ success: result.extraction_source !== 'degraded',
259
+ keywordsFound,
260
+ latencyMs: Date.now() - start,
261
+ preview: {
262
+ title: result.title?.slice(0, 60),
263
+ intent: result.search_intent,
264
+ keywords: (result.keywords || []).slice(0, 5).map(k => k.keyword),
265
+ source: result.extraction_source,
266
+ },
267
+ };
268
+ } finally {
269
+ // Restore env vars
270
+ if (origUrl !== undefined) process.env.OLLAMA_URL = origUrl;
271
+ else delete process.env.OLLAMA_URL;
272
+ if (origModel !== undefined) process.env.OLLAMA_MODEL = origModel;
273
+ else delete process.env.OLLAMA_MODEL;
274
+ if (origTimeout !== undefined) process.env.OLLAMA_TIMEOUT_MS = origTimeout;
275
+ else delete process.env.OLLAMA_TIMEOUT_MS;
276
+ }
277
+ } catch (err) {
278
+ return {
279
+ success: false,
280
+ latencyMs: Date.now() - start,
281
+ error: err.message.slice(0, 300),
282
+ };
283
+ }
284
+ }
285
+
286
+ // ── Full Validation Pipeline ────────────────────────────────────────────────
287
+
288
+ /**
289
+ * Run all 4 tests sequentially, returning aggregate results.
290
+ * Yields progress events for real-time feedback.
291
+ *
292
+ * @param {object} config
293
+ * @param {string} config.ollamaHost
294
+ * @param {string} config.ollamaModel
295
+ * @param {string} [config.apiProvider] - 'gemini'|'claude'|'openai'|'deepseek'
296
+ * @param {string} [config.apiKey]
297
+ * @param {string} config.targetUrl
298
+ * @returns {AsyncGenerator<{ step: string, status: string, detail: string, latencyMs?: number }>}
299
+ */
300
+ export async function* runFullValidation(config) {
301
+ const steps = [];
302
+
303
+ // Step 1: Ollama
304
+ if (config.ollamaHost && config.ollamaModel) {
305
+ yield { step: 'ollama', status: 'running', detail: `Testing ${config.ollamaModel} at ${config.ollamaHost}...` };
306
+ const result = await testOllamaConnectivity(config.ollamaHost, config.ollamaModel);
307
+ steps.push({ name: 'Ollama Connectivity', ...result, status: result.success ? 'pass' : 'fail' });
308
+ yield {
309
+ step: 'ollama',
310
+ status: result.success ? 'pass' : 'fail',
311
+ detail: result.success ? `Connected (${result.latencyMs}ms)` : `Failed: ${result.error}`,
312
+ latencyMs: result.latencyMs,
313
+ };
314
+ } else {
315
+ steps.push({ name: 'Ollama Connectivity', status: 'skip' });
316
+ yield { step: 'ollama', status: 'skip', detail: 'No Ollama configured — extraction will use degraded mode' };
317
+ }
318
+
319
+ // Step 2: API Key
320
+ if (config.apiProvider && config.apiKey) {
321
+ yield { step: 'api-key', status: 'running', detail: `Validating ${config.apiProvider} API key...` };
322
+ const result = await testApiKey(config.apiProvider, config.apiKey);
323
+ steps.push({ name: 'API Key', ...result, status: result.valid ? 'pass' : 'fail' });
324
+ yield {
325
+ step: 'api-key',
326
+ status: result.valid ? 'pass' : 'fail',
327
+ detail: result.valid ? `${config.apiProvider} key valid (${result.latencyMs}ms)` : `Invalid: ${result.error}`,
328
+ latencyMs: result.latencyMs,
329
+ };
330
+ } else {
331
+ steps.push({ name: 'API Key', status: 'skip' });
332
+ yield { step: 'api-key', status: 'skip', detail: 'No API key configured — analysis unavailable' };
333
+ }
334
+
335
+ // Step 3: Test Crawl
336
+ if (config.targetUrl) {
337
+ yield { step: 'crawl', status: 'running', detail: `Crawling ${config.targetUrl}...` };
338
+ const result = await testCrawl(config.targetUrl);
339
+ steps.push({ name: 'Test Crawl', ...result, status: result.success ? 'pass' : 'fail' });
340
+ yield {
341
+ step: 'crawl',
342
+ status: result.success ? 'pass' : 'fail',
343
+ detail: result.success
344
+ ? `"${result.title}" — ${result.wordCount} words (${result.latencyMs}ms)`
345
+ : `Failed: ${result.error}`,
346
+ latencyMs: result.latencyMs,
347
+ title: result.title,
348
+ wordCount: result.wordCount,
349
+ };
350
+
351
+ // Step 4: Test Extraction (only if crawl succeeded AND Ollama available)
352
+ if (result.success && config.ollamaHost && config.ollamaModel && steps[0]?.status === 'pass') {
353
+ yield { step: 'extraction', status: 'running', detail: `Extracting with ${config.ollamaModel}...` };
354
+ const extractResult = await testExtraction(config.ollamaHost, config.ollamaModel, {
355
+ url: config.targetUrl,
356
+ title: result.title,
357
+ bodyText: '', // Will use the default sample text
358
+ });
359
+ steps.push({ name: 'Test Extraction', ...extractResult, status: extractResult.success ? 'pass' : 'fail' });
360
+ yield {
361
+ step: 'extraction',
362
+ status: extractResult.success ? 'pass' : 'fail',
363
+ detail: extractResult.success
364
+ ? `${extractResult.keywordsFound} keywords extracted (${extractResult.latencyMs}ms)`
365
+ : `Failed: ${extractResult.error}`,
366
+ latencyMs: extractResult.latencyMs,
367
+ preview: extractResult.preview,
368
+ };
369
+ } else if (!config.ollamaHost) {
370
+ steps.push({ name: 'Test Extraction', status: 'skip' });
371
+ yield { step: 'extraction', status: 'skip', detail: 'Skipped — no Ollama configured' };
372
+ } else if (steps[0]?.status !== 'pass') {
373
+ steps.push({ name: 'Test Extraction', status: 'skip' });
374
+ yield { step: 'extraction', status: 'skip', detail: 'Skipped — Ollama connectivity failed' };
375
+ } else {
376
+ steps.push({ name: 'Test Extraction', status: 'skip' });
377
+ yield { step: 'extraction', status: 'skip', detail: 'Skipped — crawl test failed' };
378
+ }
379
+ } else {
380
+ steps.push({ name: 'Test Crawl', status: 'skip' });
381
+ steps.push({ name: 'Test Extraction', status: 'skip' });
382
+ yield { step: 'crawl', status: 'skip', detail: 'No target URL configured' };
383
+ yield { step: 'extraction', status: 'skip', detail: 'Skipped — no target URL' };
384
+ }
385
+
386
+ // Final summary
387
+ const passed = steps.filter(s => s.status === 'pass').length;
388
+ const total = steps.filter(s => s.status !== 'skip').length;
389
+ yield {
390
+ step: 'summary',
391
+ status: passed === total ? 'pass' : 'partial',
392
+ detail: `${passed}/${total} tests passed`,
393
+ steps,
394
+ };
395
+ }