seo-intel 1.5.45 → 1.5.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +60 -0
- package/analyses/aeo/ai-access.js +210 -0
- package/analyses/aeo/index.js +52 -9
- package/analyses/aeo/scorer.js +36 -13
- package/cli.js +175 -18
- package/lib/license.js +26 -15
- package/lib/updater.js +17 -6
- package/mcp/server.js +250 -6
- package/package.json +1 -1
- package/seo-intel.png +0 -0
- package/server.js +47 -2
- package/setup/engine.js +3 -0
- package/setup/models.js +90 -2
package/mcp/server.js
CHANGED
|
@@ -32,8 +32,17 @@ import { readProgress } from '../lib/progress.js';
|
|
|
32
32
|
import { getProblems, getProblemCounts, markProblemStatus, getActiveStatusMap, PROBLEM_CATEGORIES, PROBLEM_STATUSES } from '../lib/problems.js';
|
|
33
33
|
|
|
34
34
|
import { runAeoAnalysis, persistAeoScores, upsertCitabilityInsights } from '../analyses/aeo/index.js';
|
|
35
|
+
import { fetchAiAccessForDomains } from '../analyses/aeo/ai-access.js';
|
|
36
|
+
import { runTechnicalAudit } from '../analysis/technical-audit.js';
|
|
37
|
+
// NOTE: model-suggestion helpers (setup/models.js, setup/checks.js) are loaded
|
|
38
|
+
// lazily inside the suggest_models handler, NOT imported at top level — to keep
|
|
39
|
+
// the setup subtree (and anything it transitively pulls) off the MCP boot path.
|
|
35
40
|
import { prescore, extractDraftTopic } from '../analyses/blog-draft/prescorer.js';
|
|
36
|
-
|
|
41
|
+
// NOTE: lightCrawl (crawler/light.js) is loaded lazily inside the crawl_site
|
|
42
|
+
// handler, NOT imported at top level. Its chain pulls turndown
|
|
43
|
+
// (light.js → html-extract.js → sanitize.js → turndown), and a slow/hanging
|
|
44
|
+
// turndown import would otherwise block the entire MCP stdio boot — no tools,
|
|
45
|
+
// no banner, no handshake. Keep the crawler subtree off the boot path.
|
|
37
46
|
import { runContentLoop } from '../analyses/loop/orchestrator.js';
|
|
38
47
|
import { gatherBlogDraftContext, buildBlogDraftPrompt } from '../analyses/blog-draft/index.js';
|
|
39
48
|
|
|
@@ -383,6 +392,9 @@ server.registerTool(
|
|
|
383
392
|
},
|
|
384
393
|
async ({ url, max_pages, include_citability, same_origin }) => {
|
|
385
394
|
try {
|
|
395
|
+
// Lazy-load the crawler subtree (pulls turndown) only when crawl_site is
|
|
396
|
+
// actually invoked — keeps it off the MCP boot path. See note at top.
|
|
397
|
+
const { lightCrawl } = await import('../crawler/light.js');
|
|
386
398
|
const r = await lightCrawl(url, {
|
|
387
399
|
maxPages: max_pages ?? 10,
|
|
388
400
|
includeCitability: include_citability ?? false,
|
|
@@ -502,21 +514,31 @@ server.registerTool(
|
|
|
502
514
|
server.registerTool(
|
|
503
515
|
'run_citability_audit',
|
|
504
516
|
{
|
|
505
|
-
description: 'Run AEO citability scoring across all crawled pages (
|
|
517
|
+
description: 'Run AEO citability scoring across all crawled pages (7 signals: entity authority, structured claims, answer density, Q&A proximity, freshness, schema coverage, and AI-crawler access). Also checks robots.txt per target domain — if it blocks answer-engine crawlers (ClaudeBot / GPTBot / PerplexityBot / Google-Extended), affected pages are gated low because AI assistants literally cannot read them. Persists scores to citability_scores and upserts citability_gap insights into the ledger. Free tier — analysis of your own site is free.',
|
|
506
518
|
inputSchema: {
|
|
507
519
|
project: z.string(),
|
|
508
520
|
include_competitors: z.boolean().optional().describe('Score competitor pages too (default true)'),
|
|
521
|
+
check_ai_access: z.boolean().optional().describe('Fetch robots.txt per target domain to score AI-crawler access (default true). The only network call this tool makes; set false to keep it fully offline.'),
|
|
509
522
|
},
|
|
510
523
|
},
|
|
511
|
-
async ({ project, include_competitors = true }) => {
|
|
524
|
+
async ({ project, include_competitors = true, check_ai_access = true }) => {
|
|
512
525
|
if (!loadProjectConfig(project)) {
|
|
513
526
|
return { content: [{ type: 'text', text: `Project "${project}" not found. Use list_projects to discover.` }], isError: true };
|
|
514
527
|
}
|
|
515
528
|
try {
|
|
516
529
|
const db = getDb();
|
|
517
|
-
|
|
530
|
+
let aiAccessByDomain = null;
|
|
531
|
+
if (check_ai_access) {
|
|
532
|
+
const targetDomains = db
|
|
533
|
+
.prepare("SELECT DISTINCT domain FROM domains WHERE project = ? AND role IN ('target','owned')")
|
|
534
|
+
.all(project).map(r => r.domain);
|
|
535
|
+
if (targetDomains.length) {
|
|
536
|
+
try { aiAccessByDomain = await fetchAiAccessForDomains(targetDomains); } catch { /* best-effort */ }
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
const results = runAeoAnalysis(db, project, { includeCompetitors: include_competitors, aiAccessByDomain, log: () => {} });
|
|
518
540
|
persistAeoScores(db, results);
|
|
519
|
-
upsertCitabilityInsights(db, project, results.target);
|
|
541
|
+
upsertCitabilityInsights(db, project, results.target, results.summary.aiAccess);
|
|
520
542
|
const competitorPageCount = [...results.competitors.values()].reduce((a, list) => a + list.length, 0);
|
|
521
543
|
const avgTargetScore = results.target.length
|
|
522
544
|
? Math.round(results.target.reduce((s, p) => s + p.score, 0) / results.target.length)
|
|
@@ -532,6 +554,8 @@ server.registerTool(
|
|
|
532
554
|
target_pages_scored: results.target.length,
|
|
533
555
|
competitor_pages_scored: competitorPageCount,
|
|
534
556
|
avg_target_score: avgTargetScore,
|
|
557
|
+
ai_access: results.summary.aiAccess,
|
|
558
|
+
ai_access_gated_pages: results.summary.gatedPages,
|
|
535
559
|
low_score_target_pages: lowScorePages,
|
|
536
560
|
hint: 'Scores persisted to DB. Call get_intel(project, for=audit) to see the full citability matrix + insights ledger.',
|
|
537
561
|
};
|
|
@@ -545,6 +569,226 @@ server.registerTool(
|
|
|
545
569
|
}
|
|
546
570
|
);
|
|
547
571
|
|
|
572
|
+
// ── Tool: tech_audit (FREE) ───────────────────────────────────────────────
|
|
573
|
+
server.registerTool(
|
|
574
|
+
'tech_audit',
|
|
575
|
+
{
|
|
576
|
+
description: [
|
|
577
|
+
'Run the technical SEO audit on already-crawled data for a project — titles, meta descriptions, noindex/robots conflicts, redirect chains, canonical issues, and sitemap-vs-crawl diff. Returns severity-sorted findings (error / warn / info) with the affected URL and a description each.',
|
|
578
|
+
'',
|
|
579
|
+
'Reads from the local DB (no re-crawl). Optionally runs live HEAD checks against sitemap URLs (network) to catch broken/redirected entries. Free tier — covers your own target/owned domains.',
|
|
580
|
+
].join('\n'),
|
|
581
|
+
inputSchema: {
|
|
582
|
+
project: z.string().describe('Project slug. Use list_projects to discover.'),
|
|
583
|
+
domain: z.string().optional().describe('Audit a single domain. Omit to audit all target/owned domains in the project.'),
|
|
584
|
+
sitemap_head: z.boolean().optional().describe('Also run live HEAD checks against sitemap URLs (network-heavy). Default false.'),
|
|
585
|
+
limit: z.number().int().positive().max(200).optional().describe('Max findings to return per domain (default 60).'),
|
|
586
|
+
},
|
|
587
|
+
},
|
|
588
|
+
async ({ project, domain, sitemap_head, limit = 60 }) => {
|
|
589
|
+
if (!loadProjectConfig(project)) {
|
|
590
|
+
return { content: [{ type: 'text', text: `Project "${project}" not found. Use list_projects to discover.` }], isError: true };
|
|
591
|
+
}
|
|
592
|
+
try {
|
|
593
|
+
const db = getDb();
|
|
594
|
+
const domainRows = domain
|
|
595
|
+
? [{ domain }]
|
|
596
|
+
: db.prepare("SELECT domain FROM domains WHERE project = ? AND role IN ('target','owned')").all(project);
|
|
597
|
+
if (!domainRows.length) {
|
|
598
|
+
return { content: [{ type: 'text', text: `No target/owned domains found for project "${project}".` }], isError: true };
|
|
599
|
+
}
|
|
600
|
+
const order = { error: 0, warn: 1, info: 2 };
|
|
601
|
+
const domains = [];
|
|
602
|
+
for (const { domain: d } of domainRows) {
|
|
603
|
+
const res = await runTechnicalAudit(db, { project, domain: d, runSitemapHead: !!sitemap_head });
|
|
604
|
+
if (res.error) { domains.push({ domain: d, error: res.error }); continue; }
|
|
605
|
+
const findings = [...(res.findings || [])]
|
|
606
|
+
.sort((a, b) => (order[a.severity] ?? 3) - (order[b.severity] ?? 3))
|
|
607
|
+
.slice(0, limit)
|
|
608
|
+
.map(f => ({ severity: f.severity, type: f.type, url: f.url || null, details: f.details }));
|
|
609
|
+
domains.push({ domain: d, stats: res.stats, findings, findings_truncated: (res.findings || []).length > limit });
|
|
610
|
+
}
|
|
611
|
+
const out = {
|
|
612
|
+
ok: true,
|
|
613
|
+
project,
|
|
614
|
+
domains,
|
|
615
|
+
hint: 'Findings read from the local crawl DB. Re-run `run_crawl` then this tool to verify fixes cleared. For AI-citability gaps, use run_citability_audit; for the prioritized fix queue, use list_problems.',
|
|
616
|
+
};
|
|
617
|
+
return { content: [{ type: 'text', text: JSON.stringify(out, null, 2) }], structuredContent: out };
|
|
618
|
+
} catch (err) {
|
|
619
|
+
return { content: [{ type: 'text', text: `seo-intel error: ${err.message}` }], isError: true };
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
);
|
|
623
|
+
|
|
624
|
+
// ── Tool: suggest_models (FREE) ───────────────────────────────────────────
|
|
625
|
+
server.registerTool(
|
|
626
|
+
'suggest_models',
|
|
627
|
+
{
|
|
628
|
+
description: [
|
|
629
|
+
'Suggest LOCAL extraction models for the user\'s machine — the small models seo-intel runs once per crawled page to pull structured SEO data. Detects GPU/VRAM and which models are already in Ollama, then recommends from the curated set (Gemma 4 E2B / E4B / 12B, Qwen 3.5 4B / 9B).',
|
|
630
|
+
'',
|
|
631
|
+
'IMPORTANT: extraction should be done with a LOCAL model. The response always includes a cloud disclaimer — surface it to the user. Cloud extraction sends every page off-machine, costs money at scale, and rate-limits; a 4–8B local model handles this task well, offline. Free tier.',
|
|
632
|
+
].join('\n'),
|
|
633
|
+
inputSchema: {
|
|
634
|
+
vram_gb: z.number().positive().optional().describe('Override detected VRAM (GB). Omit to auto-detect the host GPU/unified memory.'),
|
|
635
|
+
},
|
|
636
|
+
},
|
|
637
|
+
async ({ vram_gb }) => {
|
|
638
|
+
try {
|
|
639
|
+
const { suggestExtractionModels, CLOUD_EXTRACTION_DISCLAIMER } = await import('../setup/models.js');
|
|
640
|
+
let vramMB = 0, gpuName = null;
|
|
641
|
+
if (vram_gb) { vramMB = Math.round(vram_gb * 1024); gpuName = 'user-specified'; }
|
|
642
|
+
else { try { const { detectVRAM } = await import('../setup/checks.js'); const v = detectVRAM(); vramMB = v.vramMB || 0; gpuName = v.gpuName || null; } catch { /* unknown */ } }
|
|
643
|
+
|
|
644
|
+
let installed = [];
|
|
645
|
+
try {
|
|
646
|
+
const c = new AbortController();
|
|
647
|
+
const t = setTimeout(() => c.abort(), 1500);
|
|
648
|
+
const r = await fetch('http://localhost:11434/api/tags', { signal: c.signal });
|
|
649
|
+
clearTimeout(t);
|
|
650
|
+
if (r.ok) { const d = await r.json(); installed = (d.models || []).map(m => m.name); }
|
|
651
|
+
} catch { /* Ollama not reachable */ }
|
|
652
|
+
|
|
653
|
+
const { suggestions, recommendedId } = suggestExtractionModels(vramMB, installed);
|
|
654
|
+
const out = {
|
|
655
|
+
hardware: { gpu: gpuName, vram_gb: vramMB ? +(vramMB / 1024).toFixed(1) : null },
|
|
656
|
+
recommended: recommendedId,
|
|
657
|
+
install_hint: recommendedId ? `ollama pull ${recommendedId}` : null,
|
|
658
|
+
suggestions,
|
|
659
|
+
cloud_disclaimer: CLOUD_EXTRACTION_DISCLAIMER,
|
|
660
|
+
note: 'Extraction should be done with a LOCAL model — show cloud_disclaimer to the user before suggesting any cloud option.',
|
|
661
|
+
};
|
|
662
|
+
return { content: [{ type: 'text', text: JSON.stringify(out, null, 2) }], structuredContent: out };
|
|
663
|
+
} catch (err) {
|
|
664
|
+
return { content: [{ type: 'text', text: `seo-intel error: ${err.message}` }], isError: true };
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
);
|
|
668
|
+
|
|
669
|
+
// ── Tool: setup_project (FREE — project creation from chat) ───────────────
|
|
670
|
+
// Closes the "setting up" gap: before this, projects could only be created via
|
|
671
|
+
// the CLI/web wizard. An agent can now take a user from zero → configured →
|
|
672
|
+
// crawled → audited entirely in chat.
|
|
673
|
+
server.registerTool(
|
|
674
|
+
'setup_project',
|
|
675
|
+
{
|
|
676
|
+
description: [
|
|
677
|
+
'Create (or update) a SEO Intel project from chat — no CLI wizard needed. Writes the project config that run_crawl / run_citability_audit / tech_audit / get_intel operate on.',
|
|
678
|
+
'',
|
|
679
|
+
'Minimum: project_name + target_url. Add competitors to unlock the Solo competitive surface later. Industry/audience/goal feed the analysis prompts — better context, better insights. Use suggest_models first to pick a local extraction model for the user\'s hardware.',
|
|
680
|
+
'',
|
|
681
|
+
'Refuses to overwrite an existing project unless overwrite=true. Free tier.',
|
|
682
|
+
].join('\n'),
|
|
683
|
+
inputSchema: {
|
|
684
|
+
project_name: z.string().describe('Human name — slugified for the project id (e.g. "Carbium Docs" → carbium-docs).'),
|
|
685
|
+
target_url: z.string().describe('The site to optimize (scheme optional).'),
|
|
686
|
+
site_name: z.string().optional().describe('Brand/site display name (defaults to project_name).'),
|
|
687
|
+
industry: z.string().optional().describe('What the site/business does — feeds analysis context.'),
|
|
688
|
+
audience: z.string().optional().describe('Who the site serves — feeds analysis context.'),
|
|
689
|
+
goal: z.string().optional().describe('What success looks like — feeds analysis context.'),
|
|
690
|
+
competitors: z.array(z.string()).optional().describe('Competitor URLs/domains to track (Solo features use these).'),
|
|
691
|
+
owned: z.array(z.string()).optional().describe('Other owned domains/subdomains to include.'),
|
|
692
|
+
pages_per_domain: z.number().int().positive().optional().describe('Max pages per domain per crawl (default 50).'),
|
|
693
|
+
extraction_model: z.string().optional().describe('Local extraction model tag (e.g. gemma4:e4b). Get a recommendation from suggest_models.'),
|
|
694
|
+
overwrite: z.boolean().optional().describe('Allow overwriting an existing project config (default false).'),
|
|
695
|
+
},
|
|
696
|
+
},
|
|
697
|
+
async ({ project_name, target_url, site_name, industry, audience, goal, competitors = [], owned = [], pages_per_domain, extraction_model, overwrite = false }) => {
|
|
698
|
+
try {
|
|
699
|
+
const { buildProjectConfig, writeProjectConfig, validateConfig, slugify } = await import('../setup/config-builder.js');
|
|
700
|
+
const slug = slugify(project_name);
|
|
701
|
+
const existing = join(CONFIG_DIR, `${slug}.json`);
|
|
702
|
+
if (existsSync(existing) && !overwrite) {
|
|
703
|
+
return { content: [{ type: 'text', text: `Project "${slug}" already exists. Pass overwrite=true to replace it, or use list_projects to see what's configured.` }], isError: true };
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
const config = buildProjectConfig({
|
|
707
|
+
projectName: project_name,
|
|
708
|
+
targetUrl: target_url,
|
|
709
|
+
siteName: site_name || project_name,
|
|
710
|
+
industry: industry || '',
|
|
711
|
+
audience: audience || '',
|
|
712
|
+
goal: goal || '',
|
|
713
|
+
competitors: competitors.map(u => ({ url: u })),
|
|
714
|
+
owned: owned.map(u => ({ url: u })),
|
|
715
|
+
pagesPerDomain: pages_per_domain || 50,
|
|
716
|
+
extractionModel: extraction_model,
|
|
717
|
+
});
|
|
718
|
+
|
|
719
|
+
const validation = validateConfig(config);
|
|
720
|
+
if (!validation.valid) {
|
|
721
|
+
return { content: [{ type: 'text', text: `Config validation failed: ${validation.errors.join('; ')}` }], isError: true };
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
const written = writeProjectConfig(config, ROOT);
|
|
725
|
+
const out = {
|
|
726
|
+
ok: true,
|
|
727
|
+
project: config.project,
|
|
728
|
+
config_path: written.path,
|
|
729
|
+
overwritten: written.overwritten,
|
|
730
|
+
target: config.target?.domain,
|
|
731
|
+
competitors: (config.competitors || []).map(c => c.domain),
|
|
732
|
+
owned: (config.owned || []).map(o => o.domain),
|
|
733
|
+
extraction_model: config.crawl?.extractionModel || '(default)',
|
|
734
|
+
hint: `Project ready. Next: run_crawl("${config.project}") to crawl, then run_citability_audit + tech_audit + list_problems. For a local extraction model, see suggest_models.`,
|
|
735
|
+
};
|
|
736
|
+
return { content: [{ type: 'text', text: JSON.stringify(out, null, 2) }], structuredContent: out };
|
|
737
|
+
} catch (err) {
|
|
738
|
+
return { content: [{ type: 'text', text: `seo-intel error: ${err.message}` }], isError: true };
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
);
|
|
742
|
+
|
|
743
|
+
// ── Tool: scan_site (PAID — one-shot full audit, no config) ───────────────
|
|
744
|
+
// Mirrors `seo-intel scan <domain>`: crawl → extract → analyze → export. It is
|
|
745
|
+
// heavyweight (browser crawl + extraction + cloud analysis), so it runs as a
|
|
746
|
+
// detached subprocess like run_crawl and returns the report path to poll.
|
|
747
|
+
server.registerTool(
|
|
748
|
+
'scan_site',
|
|
749
|
+
{
|
|
750
|
+
description: [
|
|
751
|
+
'One-shot full SEO audit of any domain with no project setup — crawl → extract → analyze → export. Spawns a detached background job (like run_crawl) and returns immediately with the report path; poll get_crawl_status for progress.',
|
|
752
|
+
'',
|
|
753
|
+
'Heavyweight: full browser crawl, local extraction, and cloud analysis. For a fast, ephemeral, offline read of a single URL use crawl_site instead. Paid tier (Solo).',
|
|
754
|
+
].join('\n'),
|
|
755
|
+
inputSchema: {
|
|
756
|
+
domain: z.string().describe('Domain or URL to audit (e.g. "docs.carbium.sh").'),
|
|
757
|
+
pages: z.number().int().positive().max(500).optional().describe('Max pages to crawl (default 100).'),
|
|
758
|
+
stealth: z.boolean().optional().describe('Enable stealth browser mode for JS-heavy / anti-bot sites.'),
|
|
759
|
+
no_ai: z.boolean().optional().describe('Skip the AI-enriched export (deterministic markdown only).'),
|
|
760
|
+
model: z.enum(['gemini', 'claude', 'gpt']).optional().describe('Model for analysis + AI export (default gemini).'),
|
|
761
|
+
},
|
|
762
|
+
},
|
|
763
|
+
async ({ domain, pages, stealth, no_ai, model }) => {
|
|
764
|
+
if (!isPro()) return paidGate('scan_site');
|
|
765
|
+
const progress = readProgress();
|
|
766
|
+
if (progress?.status === 'running') {
|
|
767
|
+
return { content: [{ type: 'text', text: `A seo-intel job is already running (command="${progress.command}", pid=${progress.pid}). Wait or call get_crawl_status.` }], isError: true };
|
|
768
|
+
}
|
|
769
|
+
const bare = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/^www\./, '');
|
|
770
|
+
const args = ['cli.js', 'scan', bare];
|
|
771
|
+
if (pages) args.push('--pages', String(pages));
|
|
772
|
+
if (stealth) args.push('--stealth');
|
|
773
|
+
if (no_ai) args.push('--no-ai');
|
|
774
|
+
if (model) args.push('--model', model);
|
|
775
|
+
|
|
776
|
+
const child = spawn(process.execPath, args, { cwd: ROOT, detached: true, stdio: 'ignore' });
|
|
777
|
+
child.unref();
|
|
778
|
+
|
|
779
|
+
const reportPath = join(ROOT, 'reports', `scan-${bare.replace(/[^a-z0-9]/gi, '-').toLowerCase()}-${new Date().toISOString().slice(0, 10)}.md`);
|
|
780
|
+
const result = {
|
|
781
|
+
started: true,
|
|
782
|
+
pid: child.pid,
|
|
783
|
+
domain: bare,
|
|
784
|
+
command: `node ${args.join(' ')}`,
|
|
785
|
+
report_path: reportPath,
|
|
786
|
+
hint: 'Scan is running detached (crawl → extract → analyze → export). Poll get_crawl_status; when status="completed" read the markdown at report_path. The ephemeral project is "_scan-<domain>" — tech_audit/run_citability_audit can be run against it once the crawl lands.',
|
|
787
|
+
};
|
|
788
|
+
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }], structuredContent: result };
|
|
789
|
+
}
|
|
790
|
+
);
|
|
791
|
+
|
|
548
792
|
// ── Tool: get_competitor_positioning (PAID) ───────────────────────────────
|
|
549
793
|
server.registerTool(
|
|
550
794
|
'get_competitor_positioning',
|
|
@@ -989,7 +1233,7 @@ async function main() {
|
|
|
989
1233
|
const transport = new StdioServerTransport();
|
|
990
1234
|
await server.connect(transport);
|
|
991
1235
|
// stderr is fine; the host typically surfaces this in its MCP logs panel.
|
|
992
|
-
console.error(`[seo-intel-mcp] v${VERSION} ready on stdio.
|
|
1236
|
+
console.error(`[seo-intel-mcp] v${VERSION} ready on stdio. 21 tools — free: setup_project (zero→configured from chat), crawl_site (ad-hoc, any URL, no config), run_content_loop (gap→draft→close), list_projects, list_problems, mark_problem_status, get_intel(raw/audit/blog), get_pages, list_keywords, get_headings, run_crawl, get_crawl_status, ingest_insight, run_citability_audit (now with AI-crawler access), tech_audit, suggest_models (local-first), prescore_draft, draft_blog_prompt, export_intel (own-site tables); Solo: scan_site (one-shot full audit), get_competitor_positioning, get_intel(competitor), export_intel (analyses table).`);
|
|
993
1237
|
}
|
|
994
1238
|
|
|
995
1239
|
main().catch(err => {
|
package/package.json
CHANGED
package/seo-intel.png
CHANGED
|
Binary file
|
package/server.js
CHANGED
|
@@ -140,11 +140,55 @@ function getProjects() {
|
|
|
140
140
|
.filter(Boolean);
|
|
141
141
|
}
|
|
142
142
|
|
|
143
|
+
// ── Security: loopback-only gate (anti DNS-rebinding + cross-origin/CSRF) ──
|
|
144
|
+
//
|
|
145
|
+
// This server binds 127.0.0.1, but any web page you visit can still fire
|
|
146
|
+
// requests at localhost. Two checks close that whole class:
|
|
147
|
+
// • Host — a DNS-rebinding request arrives carrying the ATTACKER's domain as
|
|
148
|
+
// Host (not localhost), so requiring a loopback Host defeats it.
|
|
149
|
+
// • Origin — a cross-origin page sends its own Origin; requiring a loopback
|
|
150
|
+
// Origin (when present) blocks cross-origin reads and CSRF.
|
|
151
|
+
// Same-origin dashboard use is unaffected: same-origin GET/SSE either sends no
|
|
152
|
+
// Origin or sends our own loopback Origin.
|
|
153
|
+
const LOCAL_HOSTS = new Set(['localhost', '127.0.0.1', '::1']);
|
|
154
|
+
|
|
155
|
+
function normHost(h) {
|
|
156
|
+
if (!h) return '';
|
|
157
|
+
h = String(h).trim().toLowerCase();
|
|
158
|
+
if (h.startsWith('[')) { const i = h.indexOf(']'); return i > 0 ? h.slice(1, i) : h.slice(1); } // [::1]:port → ::1
|
|
159
|
+
return h.split(':')[0]; // host:port → host
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function isLocalRequest(req) {
|
|
163
|
+
const host = normHost(req.headers.host);
|
|
164
|
+
if (!host || !LOCAL_HOSTS.has(host)) return false; // defeats DNS rebinding
|
|
165
|
+
const origin = req.headers.origin;
|
|
166
|
+
if (origin && origin !== 'null') { // defeats cross-origin / CSRF
|
|
167
|
+
try { if (!LOCAL_HOSTS.has(normHost(new URL(origin).host))) return false; }
|
|
168
|
+
catch { return false; }
|
|
169
|
+
}
|
|
170
|
+
return true;
|
|
171
|
+
}
|
|
172
|
+
|
|
143
173
|
// ── Request handler ──
|
|
144
174
|
async function handleRequest(req, res) {
|
|
145
175
|
const url = new URL(req.url, `http://localhost:${PORT}`);
|
|
146
176
|
const path = url.pathname;
|
|
147
177
|
|
|
178
|
+
// Security headers on every response (clickjacking + MIME-sniffing). The
|
|
179
|
+
// frame-ancestors directive only governs who may iframe us — it does NOT
|
|
180
|
+
// restrict the dashboard's own CDN resources, so it is safe to set globally.
|
|
181
|
+
res.setHeader('X-Frame-Options', 'DENY');
|
|
182
|
+
res.setHeader('X-Content-Type-Options', 'nosniff');
|
|
183
|
+
res.setHeader('Content-Security-Policy', "frame-ancestors 'none'");
|
|
184
|
+
|
|
185
|
+
// Loopback-only gate — reject anything not local before any routing happens.
|
|
186
|
+
if (!isLocalRequest(req)) {
|
|
187
|
+
res.writeHead(403, { 'Content-Type': 'text/plain' });
|
|
188
|
+
res.end('Forbidden: SEO Intel only accepts requests from localhost.');
|
|
189
|
+
return;
|
|
190
|
+
}
|
|
191
|
+
|
|
148
192
|
// ─── Setup wizard routes ───
|
|
149
193
|
if (path.startsWith('/setup') || path.startsWith('/api/setup/')) {
|
|
150
194
|
try {
|
|
@@ -1162,12 +1206,13 @@ ${md}`;
|
|
|
1162
1206
|
args.push('--save');
|
|
1163
1207
|
}
|
|
1164
1208
|
|
|
1165
|
-
// SSE headers
|
|
1209
|
+
// SSE headers — no CORS: the dashboard is same-origin, and the loopback
|
|
1210
|
+
// gate already blocks cross-origin callers. (Removed Access-Control-Allow-Origin:*
|
|
1211
|
+
// which previously let any website read this command-execution stream.)
|
|
1166
1212
|
res.writeHead(200, {
|
|
1167
1213
|
'Content-Type': 'text/event-stream',
|
|
1168
1214
|
'Cache-Control': 'no-cache',
|
|
1169
1215
|
'Connection': 'keep-alive',
|
|
1170
|
-
'Access-Control-Allow-Origin': '*',
|
|
1171
1216
|
});
|
|
1172
1217
|
|
|
1173
1218
|
const send = (type, data) => {
|
package/setup/engine.js
CHANGED
package/setup/models.js
CHANGED
|
@@ -42,6 +42,18 @@ export const EXTRACTION_MODELS = [
|
|
|
42
42
|
description: 'Default recommendation. MoE (8B total, 4.5B active) — excellent extraction quality at edge-model speed. Best quality/speed ratio.',
|
|
43
43
|
recommended: true,
|
|
44
44
|
},
|
|
45
|
+
{
|
|
46
|
+
id: 'gemma4:12b',
|
|
47
|
+
name: 'Gemma 4 12B',
|
|
48
|
+
family: 'gemma4',
|
|
49
|
+
tier: 'quality',
|
|
50
|
+
vram: '~10 GB',
|
|
51
|
+
minVramMB: 8500,
|
|
52
|
+
speed: '~3s/page',
|
|
53
|
+
quality: 'excellent',
|
|
54
|
+
description: 'Dense 12B — a clear quality step up from E4B for tricky pages, still fast. Needs RTX 3080+/M-series 16GB+.',
|
|
55
|
+
recommended: false,
|
|
56
|
+
},
|
|
45
57
|
{
|
|
46
58
|
id: 'gemma4:26b',
|
|
47
59
|
name: 'Gemma 4 26B',
|
|
@@ -261,8 +273,8 @@ export const ANALYSIS_MODELS = [
|
|
|
261
273
|
description: 'Google\'s latest frontier model. Massive 2M context handles the largest competitive datasets. Best value for cloud analysis.',
|
|
262
274
|
},
|
|
263
275
|
{
|
|
264
|
-
id: 'claude-opus-4
|
|
265
|
-
name: 'Claude Opus 4.
|
|
276
|
+
id: 'claude-opus-4-8',
|
|
277
|
+
name: 'Claude Opus 4.8',
|
|
266
278
|
family: 'claude',
|
|
267
279
|
type: 'cloud',
|
|
268
280
|
provider: 'anthropic',
|
|
@@ -430,6 +442,82 @@ export function recommendAnalysisModel(availableModels = [], vramMB = 0) {
|
|
|
430
442
|
};
|
|
431
443
|
}
|
|
432
444
|
|
|
445
|
+
// ── Cloud-extraction disclaimer ─────────────────────────────────────────────
|
|
446
|
+
//
|
|
447
|
+
// Extraction runs once per crawled page. At scale (thousands of pages) a cloud
|
|
448
|
+
// provider means every page's content leaves the machine, costs real money, and
|
|
449
|
+
// hits rate limits — for a task a small local model handles well. Surface this
|
|
450
|
+
// EVERYWHERE a cloud model is offered/selected for extraction. Non-negotiable.
|
|
451
|
+
|
|
452
|
+
export const CLOUD_EXTRACTION_DISCLAIMER =
|
|
453
|
+
'Extraction should be done with a LOCAL model. Cloud is a fallback, not the default: ' +
|
|
454
|
+
'it sends every page\'s content to a third-party API, costs money at scale (a 10k-page ' +
|
|
455
|
+
'site is real spend), and hits rate limits — all for a task a 4–8B local model does well, ' +
|
|
456
|
+
'offline, with your data never leaving the machine. Use cloud only if you have no local ' +
|
|
457
|
+
'GPU/Ollama and accept those tradeoffs.';
|
|
458
|
+
|
|
459
|
+
// Short one-liner for tight UIs (status bars, JSON `notice` fields).
|
|
460
|
+
export const CLOUD_EXTRACTION_DISCLAIMER_SHORT =
|
|
461
|
+
'⚠ Use a LOCAL model for extraction — cloud sends page content off-machine, costs money at scale, and rate-limits. Local is private, free, and offline.';
|
|
462
|
+
|
|
463
|
+
// ── Curated extraction suggestions ──────────────────────────────────────────
|
|
464
|
+
//
|
|
465
|
+
// The headline "what should I run locally" set — the families we actually
|
|
466
|
+
// recommend, smallest → largest. Drawn from EXTRACTION_MODELS so VRAM/speed/
|
|
467
|
+
// quality stay in one place. Used by `seo-intel models` and the suggest_models
|
|
468
|
+
// MCP tool so an agent in chat can recommend a model without the full wizard.
|
|
469
|
+
|
|
470
|
+
const SUGGESTED_EXTRACTION_IDS = [
|
|
471
|
+
'gemma4:e2b', // laptop / low VRAM
|
|
472
|
+
'qwen3.5:4b', // budget alt
|
|
473
|
+
'gemma4:e4b', // default — best quality/speed
|
|
474
|
+
'qwen3.5:9b', // ~8B-class alt
|
|
475
|
+
'gemma4:12b', // step-up quality, still fast
|
|
476
|
+
];
|
|
477
|
+
|
|
478
|
+
/**
|
|
479
|
+
* Curated local extraction-model suggestions, annotated for the detected
|
|
480
|
+
* hardware. Always returns the local set; the cloud disclaimer is attached so
|
|
481
|
+
* callers can surface it alongside.
|
|
482
|
+
*
|
|
483
|
+
* @param {number} [vramMB] - detected VRAM in MB (0/unknown = show all, no fit filter)
|
|
484
|
+
* @param {string[]} [installed] - model tags currently in Ollama
|
|
485
|
+
* @returns {{ suggestions: object[], recommendedId: string|null, vramMB: number, disclaimer: string }}
|
|
486
|
+
*/
|
|
487
|
+
export function suggestExtractionModels(vramMB = 0, installed = []) {
|
|
488
|
+
const isInstalled = (id) => {
|
|
489
|
+
const [fam, size] = id.split(':');
|
|
490
|
+
return installed.some(m => m.startsWith(fam) && (!size || m.includes(size)));
|
|
491
|
+
};
|
|
492
|
+
|
|
493
|
+
const suggestions = SUGGESTED_EXTRACTION_IDS
|
|
494
|
+
.map(id => EXTRACTION_MODELS.find(m => m.id === id))
|
|
495
|
+
.filter(Boolean)
|
|
496
|
+
.map(m => ({
|
|
497
|
+
id: m.id,
|
|
498
|
+
name: m.name,
|
|
499
|
+
tier: m.tier,
|
|
500
|
+
vram: m.vram,
|
|
501
|
+
speed: m.speed,
|
|
502
|
+
quality: m.quality,
|
|
503
|
+
fitsVram: !vramMB || vramMB >= m.minVramMB,
|
|
504
|
+
installed: isInstalled(m.id),
|
|
505
|
+
note: m.description,
|
|
506
|
+
}));
|
|
507
|
+
|
|
508
|
+
// Recommend the largest suggested model that fits VRAM (or the default if VRAM unknown).
|
|
509
|
+
let recommendedId = 'gemma4:e4b';
|
|
510
|
+
if (vramMB) {
|
|
511
|
+
const fitting = suggestions.filter(s => s.fitsVram);
|
|
512
|
+
recommendedId = fitting.length ? fitting[fitting.length - 1].id : (suggestions[0]?.id ?? null);
|
|
513
|
+
}
|
|
514
|
+
// Prefer an already-installed fitting model if there is one.
|
|
515
|
+
const installedFitting = suggestions.find(s => s.installed && s.fitsVram);
|
|
516
|
+
if (installedFitting) recommendedId = installedFitting.id;
|
|
517
|
+
|
|
518
|
+
return { suggestions, recommendedId, vramMB, disclaimer: CLOUD_EXTRACTION_DISCLAIMER };
|
|
519
|
+
}
|
|
520
|
+
|
|
433
521
|
/**
|
|
434
522
|
* Get all model recommendations for display.
|
|
435
523
|
*
|