seo-intel 1.5.30 → 1.5.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/mcp/server.js +137 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.5.31 (2026-05-17)
|
|
4
|
+
|
|
5
|
+
### MCP — `export_intel` ships the full data layer to AI agents
|
|
6
|
+
The biggest gap closed: agents can now grab seo-intel's entire structured intelligence in a single call. Mirrors `seo-intel export --full <project>` as an MCP tool, with a sharp safety valve and an explicit "do not blind-ingest" notice.
|
|
7
|
+
|
|
8
|
+
- **`export_intel(project, tables?, max_rows_per_table?)`** — bulk JSON export. Free tables: `pages, keywords, headings, links, technical, sitemap_urls`. Paid (Solo) tables: `extractions, analyses, page_schemas, citability_scores, insights`. Per-table row cap (default 1000, max 50000) so big projects can't OOM Node on `JSON.stringify`.
|
|
9
|
+
- **The notice field is the design point.** Every response includes a top-level `notice` with `level: important|critical`, token estimate, byte size, and a clear instruction set: *"🛑 DO NOT INGEST THIS RESPONSE WHOLESALE. (1) write to file and query with jq/sqlite-utils, (2) use get_intel(for=audit|blog|competitor) for digests, (3) for pre-parsed analysis upgrade to Solo."* Free users see the list of paid tables they're missing + the Solo tool names that return digests instead of raw rows.
|
|
10
|
+
- Truncation is first-class: `counts: { pages: { total: 3422, returned: 1000, truncated: true } }`. Notice flips to `critical` whenever any table truncates, with the explicit "re-call with `max_rows_per_table: <N>` or `tables: ['specific_one']`" guidance.
|
|
11
|
+
- Verified: carbium full free export = 1.2 MB / 314k tokens with 6 tables truncated — still fits the safety valve, won't crash Node. Free-tier `analyses` request → clean paid gate. Small slices (e.g. `tables: ['technical']` on risunouto) → tiny notice, no truncation.
|
|
12
|
+
|
|
13
|
+
**The strategy this lands:** free tier offers the firehose with explicit guardrails ("hiccup with tokens or pay €20"). Paid tools (`run_citability_audit`, `get_competitor_positioning`, `prescore_draft`, `draft_blog_prompt`, `get_intel(for=audit|blog|competitor)`) return *digested, AI-ready* output — the value-add for Solo subscribers vs raw-data parsing on the client side.
|
|
14
|
+
|
|
15
|
+
**MCP surface: 13 tools total** — 9 free (including export_intel for free-table subset) + 5 paid (including export_intel for paid tables).
|
|
16
|
+
|
|
3
17
|
## 1.5.30 (2026-05-17)
|
|
4
18
|
|
|
5
19
|
### MCP — paid analysis tools (the full Solo surface for AI agents)
|
package/mcp/server.js
CHANGED
|
@@ -539,11 +539,147 @@ server.registerTool(
|
|
|
539
539
|
}
|
|
540
540
|
);
|
|
541
541
|
|
|
542
|
+
// ── Tool: export_intel (firehose; free tables + paid tables) ──────────────
|
|
543
|
+
const FREE_EXPORT_TABLES = ['pages', 'keywords', 'headings', 'links', 'technical', 'sitemap_urls'];
|
|
544
|
+
const PAID_EXPORT_TABLES = ['extractions', 'analyses', 'page_schemas', 'citability_scores', 'insights'];
|
|
545
|
+
const ALL_EXPORT_TABLES = [...FREE_EXPORT_TABLES, ...PAID_EXPORT_TABLES];
|
|
546
|
+
|
|
547
|
+
const EXPORT_TABLE_QUERIES = {
|
|
548
|
+
pages: `SELECT p.url, d.domain, d.role, p.status_code, p.word_count, p.load_ms, p.is_indexable, p.click_depth, p.published_date, p.modified_date, p.title, p.meta_desc, p.final_url, p.x_robots_tag
|
|
549
|
+
FROM pages p JOIN domains d ON d.id = p.domain_id WHERE d.project = ? ORDER BY d.role, d.domain, p.click_depth`,
|
|
550
|
+
keywords: `SELECT k.keyword, k.location, p.url, d.domain, d.role FROM keywords k JOIN pages p ON p.id = k.page_id JOIN domains d ON d.id = p.domain_id WHERE d.project = ? ORDER BY k.keyword`,
|
|
551
|
+
headings: `SELECT h.level, h.text, p.url, d.domain FROM headings h JOIN pages p ON p.id = h.page_id JOIN domains d ON d.id = p.domain_id WHERE d.project = ? ORDER BY p.url, h.level`,
|
|
552
|
+
links: `SELECT l.target_url, l.anchor_text, l.is_internal, p.url as source_url, d.domain FROM links l JOIN pages p ON p.id = l.source_id JOIN domains d ON d.id = p.domain_id WHERE d.project = ? ORDER BY l.is_internal DESC, d.domain`,
|
|
553
|
+
technical: `SELECT t.has_canonical, t.has_og_tags, t.has_schema, t.is_mobile_ok, t.has_sitemap, t.has_robots, t.core_web_vitals, p.url, d.domain FROM technical t JOIN pages p ON p.id = t.page_id JOIN domains d ON d.id = p.domain_id WHERE d.project = ?`,
|
|
554
|
+
sitemap_urls: `SELECT s.url, s.sitemap_source, s.head_status, s.head_location, s.discovered_at, d.domain FROM sitemap_urls s JOIN domains d ON d.id = s.domain_id WHERE d.project = ?`,
|
|
555
|
+
extractions: `SELECT e.title, e.meta_desc, e.h1, e.product_type, e.pricing_tier, e.cta_primary, e.tech_stack, e.schema_types, e.search_intent, e.primary_entities, e.intent_scores, p.url, d.domain FROM extractions e JOIN pages p ON p.id = e.page_id JOIN domains d ON d.id = p.domain_id WHERE d.project = ?`,
|
|
556
|
+
analyses: `SELECT generated_at, model, keyword_gaps, long_tails, quick_wins, new_pages, content_gaps, positioning, technical_gaps FROM analyses WHERE project = ? ORDER BY generated_at DESC`,
|
|
557
|
+
page_schemas: `SELECT ps.schema_type, ps.name, ps.description, ps.rating, ps.rating_count, ps.price, ps.currency, ps.author, ps.date_published, p.url, d.domain FROM page_schemas ps JOIN pages p ON p.id = ps.page_id JOIN domains d ON d.id = p.domain_id WHERE d.project = ? ORDER BY ps.schema_type`,
|
|
558
|
+
citability_scores: `SELECT cs.url, cs.score, cs.tier, cs.entity_authority, cs.structured_claims, cs.answer_density, cs.qa_proximity, cs.freshness, cs.schema_coverage, cs.ai_intents, cs.scored_at, p.title, d.domain, d.role FROM citability_scores cs JOIN pages p ON p.id = cs.page_id JOIN domains d ON d.id = p.domain_id WHERE d.project = ? ORDER BY cs.score`,
|
|
559
|
+
insights: `SELECT id, type, status, fingerprint, first_seen, last_seen, source, data FROM insights WHERE project = ? ORDER BY last_seen DESC`,
|
|
560
|
+
};
|
|
561
|
+
|
|
562
|
+
const DEFAULT_MAX_ROWS_PER_TABLE = 1000;
|
|
563
|
+
const MAX_MAX_ROWS_PER_TABLE = 50000;
|
|
564
|
+
|
|
565
|
+
function buildExportNotice({ tokens, bytes, free, paidRequested, paidExcluded, anyTruncated, maxRowsPerTable }) {
|
|
566
|
+
const tooBig = tokens > 50000;
|
|
567
|
+
const upgradeBlurb = free
|
|
568
|
+
? `\n\n📦 Tables NOT in this response (require SEO Intel Solo, €19.99/mo — vs Ahrefs ~$129/mo): ${PAID_EXPORT_TABLES.join(', ')}.\n These are the AI-derived layers: per-page entity/intent/schema extraction, full analysis history, structured @type inventory, citability scores, and the Intelligence Ledger.\n For pre-parsed digests instead of raw rows, the Solo tools return ready-to-use analysis: run_citability_audit, get_competitor_positioning, prescore_draft, draft_blog_prompt.`
|
|
569
|
+
: `\n\nYou have Solo. Paid tables in this export: ${(paidRequested || []).join(', ') || '(none requested)'}.`;
|
|
570
|
+
|
|
571
|
+
const sizeLine = tooBig
|
|
572
|
+
? `\n\n⚠️ HEAVY EXPORT: ${tokens.toLocaleString()} estimated tokens (~${(bytes / 1024 / 1024).toFixed(1)} MB). This WILL blow up a typical agent's context budget.`
|
|
573
|
+
: `\n\nSize: ${tokens.toLocaleString()} estimated tokens (~${(bytes / 1024).toFixed(0)} KB).`;
|
|
574
|
+
|
|
575
|
+
const truncLine = anyTruncated
|
|
576
|
+
? `\n\n✂️ TRUNCATED: some tables hit the per-table row cap (currently ${maxRowsPerTable.toLocaleString()}). Check the per-table \`counts\` map — \`truncated: true\` means there are more rows. To pull more, re-call with \`max_rows_per_table: <N up to ${MAX_MAX_ROWS_PER_TABLE.toLocaleString()}>\` or \`tables: ["specific_one"]\`.`
|
|
577
|
+
: '';
|
|
578
|
+
|
|
579
|
+
return {
|
|
580
|
+
level: tooBig || anyTruncated ? 'critical' : 'important',
|
|
581
|
+
message: [
|
|
582
|
+
'🛑 DO NOT INGEST THIS RESPONSE WHOLESALE INTO YOUR CONTEXT.',
|
|
583
|
+
'',
|
|
584
|
+
'This is a raw structured-data firehose — designed for tooling, not direct LLM consumption. Recommended ways to handle it:',
|
|
585
|
+
' 1. Write it to a file via your shell tool (e.g. `... > intel.json`), then query selectively with jq / sqlite-utils / a small Python script.',
|
|
586
|
+
' 2. For pre-digested intelligence, call get_intel(for=audit|blog|competitor) — same data, summarized.',
|
|
587
|
+
' 3. For specific record lookups, use the targeted tools: get_pages, get_headings, list_keywords, get_competitor_positioning.',
|
|
588
|
+
].join('\n') + sizeLine + truncLine + upgradeBlurb,
|
|
589
|
+
token_estimate: tokens,
|
|
590
|
+
size_bytes: bytes,
|
|
591
|
+
max_rows_per_table: maxRowsPerTable,
|
|
592
|
+
truncated: anyTruncated,
|
|
593
|
+
tables_paid_excluded: paidExcluded,
|
|
594
|
+
};
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
server.registerTool(
|
|
598
|
+
'export_intel',
|
|
599
|
+
{
|
|
600
|
+
description: [
|
|
601
|
+
'Bulk export of raw structured intelligence — pages, keywords, headings, links, technical, sitemap URLs (free), plus extractions, analyses, schemas, citability scores, and insights (Solo). Mirrors `seo-intel export --full <project>` as a single MCP call.',
|
|
602
|
+
'',
|
|
603
|
+
'⚠️ FIREHOSE WARNING: this is raw rows, not summaries. For carbium-sized projects it can be 5–10 MB / 200k+ tokens. The response includes a `notice` field telling the agent how to handle it (pipe to file, use other tools, or upgrade). Agents SHOULD NOT paste the response wholesale into their context — read the `notice` first, then either query selectively or save to a file.',
|
|
604
|
+
'',
|
|
605
|
+
'For pre-parsed AI-ready intel, prefer: get_intel(for=audit|blog|competitor), run_citability_audit, get_competitor_positioning, draft_blog_prompt.',
|
|
606
|
+
].join('\n'),
|
|
607
|
+
inputSchema: {
|
|
608
|
+
project: z.string(),
|
|
609
|
+
tables: z.array(z.enum(ALL_EXPORT_TABLES)).optional().describe(`Tables to include. Free: ${FREE_EXPORT_TABLES.join(', ')}. Paid (Solo only): ${PAID_EXPORT_TABLES.join(', ')}. Omit to get the free subset.`),
|
|
610
|
+
max_rows_per_table: z.number().int().positive().max(MAX_MAX_ROWS_PER_TABLE).optional().describe(`Cap on rows returned per table — safety valve against OOM on large projects (default ${DEFAULT_MAX_ROWS_PER_TABLE}, max ${MAX_MAX_ROWS_PER_TABLE}). When truncated, the per-table counts map shows total + returned so you know what's missing.`),
|
|
611
|
+
},
|
|
612
|
+
},
|
|
613
|
+
async ({ project, tables, max_rows_per_table }) => {
|
|
614
|
+
if (!loadProjectConfig(project)) {
|
|
615
|
+
return { content: [{ type: 'text', text: `Project "${project}" not found. Use list_projects to discover.` }], isError: true };
|
|
616
|
+
}
|
|
617
|
+
const requested = tables && tables.length ? tables : FREE_EXPORT_TABLES;
|
|
618
|
+
const paidRequested = requested.filter(t => PAID_EXPORT_TABLES.includes(t));
|
|
619
|
+
if (paidRequested.length && !isPro()) {
|
|
620
|
+
return paidGate(`export_intel (paid tables: ${paidRequested.join(', ')})`);
|
|
621
|
+
}
|
|
622
|
+
const maxRows = max_rows_per_table || DEFAULT_MAX_ROWS_PER_TABLE;
|
|
623
|
+
try {
|
|
624
|
+
const db = getDb();
|
|
625
|
+
const domains = db.prepare('SELECT domain, role, last_crawled FROM domains WHERE project=? ORDER BY role, domain').all(project);
|
|
626
|
+
const data = {};
|
|
627
|
+
const counts = {};
|
|
628
|
+
let anyTruncated = false;
|
|
629
|
+
for (const table of requested) {
|
|
630
|
+
try {
|
|
631
|
+
// Two-step: count first, then fetch with LIMIT. Cheaper than .all() then .slice() for huge tables.
|
|
632
|
+
const countRow = db.prepare(`SELECT COUNT(*) AS n FROM (${EXPORT_TABLE_QUERIES[table]}) AS sub`).get(project);
|
|
633
|
+
const total = countRow?.n || 0;
|
|
634
|
+
const rows = db.prepare(`${EXPORT_TABLE_QUERIES[table]} LIMIT ?`).all(project, maxRows);
|
|
635
|
+
const truncated = total > rows.length;
|
|
636
|
+
if (truncated) anyTruncated = true;
|
|
637
|
+
data[table] = rows;
|
|
638
|
+
counts[table] = { total, returned: rows.length, truncated };
|
|
639
|
+
} catch (e) {
|
|
640
|
+
data[table] = { error: e.message };
|
|
641
|
+
counts[table] = { total: 0, returned: 0, truncated: false, error: e.message };
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
const free = !isPro();
|
|
645
|
+
const dataJson = JSON.stringify(data);
|
|
646
|
+
const tokenEstimate = Math.ceil(dataJson.length / 4);
|
|
647
|
+
const sizeBytes = Buffer.byteLength(dataJson, 'utf8');
|
|
648
|
+
const notice = buildExportNotice({
|
|
649
|
+
tokens: tokenEstimate,
|
|
650
|
+
bytes: sizeBytes,
|
|
651
|
+
free,
|
|
652
|
+
paidRequested,
|
|
653
|
+
paidExcluded: free ? PAID_EXPORT_TABLES : undefined,
|
|
654
|
+
anyTruncated,
|
|
655
|
+
maxRowsPerTable: maxRows,
|
|
656
|
+
});
|
|
657
|
+
const envelope = {
|
|
658
|
+
project,
|
|
659
|
+
exported_at: new Date().toISOString(),
|
|
660
|
+
seo_intel_version: VERSION,
|
|
661
|
+
tier: free ? 'free' : 'paid',
|
|
662
|
+
tables_included: requested,
|
|
663
|
+
counts,
|
|
664
|
+
domains,
|
|
665
|
+
notice,
|
|
666
|
+
data,
|
|
667
|
+
};
|
|
668
|
+
return {
|
|
669
|
+
content: [{ type: 'text', text: JSON.stringify(envelope, null, 2) }],
|
|
670
|
+
structuredContent: envelope,
|
|
671
|
+
};
|
|
672
|
+
} catch (err) {
|
|
673
|
+
return { content: [{ type: 'text', text: `seo-intel error: ${err.message}` }], isError: true };
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
);
|
|
677
|
+
|
|
542
678
|
async function main() {
|
|
543
679
|
const transport = new StdioServerTransport();
|
|
544
680
|
await server.connect(transport);
|
|
545
681
|
// stderr is fine; the host typically surfaces this in its MCP logs panel.
|
|
546
|
-
console.error(`[seo-intel-mcp] v${VERSION} ready on stdio.
|
|
682
|
+
console.error(`[seo-intel-mcp] v${VERSION} ready on stdio. 13 tools — free: list_projects, get_intel(raw), get_pages, list_keywords, get_headings, run_crawl, get_crawl_status, ingest_insight, export_intel (free-tier subset); paid: get_intel(audit/blog/competitor), run_citability_audit, get_competitor_positioning, prescore_draft, draft_blog_prompt, export_intel (paid tables).`);
|
|
547
683
|
}
|
|
548
684
|
|
|
549
685
|
main().catch(err => {
|