seo-intel 1.5.27 → 1.5.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/db/db.js +54 -0
- package/lib/progress.js +37 -0
- package/mcp/server.js +133 -2
- package/package.json +1 -1
- package/seo-intel.png +0 -0
- package/server.js +1 -18
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,31 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.5.29 (2026-05-17)
|
|
4
|
+
|
|
5
|
+
### MCP — `ingest_insight` closes the loop (agents become collaborators, not consumers)
|
|
6
|
+
The MCP server now accepts write-back. An agent can read your raw data, do its own analysis with its own flagship LLM, and persist findings into the Intelligence Ledger — surviving across sessions, surfacing in the dashboard, deduplicating against future runs.
|
|
7
|
+
|
|
8
|
+
- **`ingest_insight(project, type, data, agent_name?)`** — **free tier**. The agent's LLM did the analysis; we just provide storage. Allowed types mirror what `analyze` writes: `keyword_gap`, `long_tail`, `quick_win`, `new_page`, `content_gap`, `technical_gap`, `positioning`.
|
|
9
|
+
- **Dedup contract**: same `(project, type, fingerprint)` returns the existing row with `deduped: true` and bumps `last_seen` — no duplicate accumulation across sessions.
|
|
10
|
+
- **Provenance**: source is stored as `agent:<name>` (e.g. `agent:claude-opus-4-7`) when `agent_name` is supplied, else just `agent`. Also stamped into the `data` JSON blob as `_source` for downstream consumers that only read `data`.
|
|
11
|
+
- **Schema**: idempotent `ALTER TABLE insights ADD COLUMN source TEXT DEFAULT 'cli'` — existing rows backfill to `'cli'`; analyze-time writes stay as `'cli'`; agent writes flip to `'agent:*'`. Safe on existing DBs.
|
|
12
|
+
|
|
13
|
+
### Logo
|
|
14
|
+
- Updated product logo to the sharp / soft-corners v1 variant. Size dropped 1.46 MB → 953 KB. Dashboard favicon + npm package both pick up the new asset.
|
|
15
|
+
|
|
16
|
+
## 1.5.28 (2026-05-17)
|
|
17
|
+
|
|
18
|
+
### MCP — agents can now trigger crawls and watch progress
|
|
19
|
+
The MCP server gains its first **active** tools — agents move from read-only to actually doing work on the user's machine.
|
|
20
|
+
|
|
21
|
+
- **`run_crawl(project, stealth?, max_pages?)`** — spawn a crawl as a detached subprocess. Returns immediately with `{ started, pid, command, hint }`. Free tier — crawl page limits still apply (Solo unlocks unlimited). Refuses to start if any seo-intel job is already running (conflict guard mirrors the existing HTTP `/api/crawl` behaviour).
|
|
22
|
+
- **`get_crawl_status()`** — read the most recent job's progress: status (`running` / `completed` / `crashed` / `stopped` / `idle`), command, project, pid, timestamps. PID liveness is verified — a "running" job whose process died gets re-tagged as `crashed`.
|
|
23
|
+
|
|
24
|
+
A natural session now looks like: agent calls `run_crawl(carbium)` → polls `get_crawl_status()` every minute → once `completed`, calls `get_intel(carbium, for=raw)` and `get_pages(carbium)` to see new data. Free tier, end to end.
|
|
25
|
+
|
|
26
|
+
### Internal — shared progress reader
|
|
27
|
+
`server.js` and `mcp/server.js` now both read job state from `lib/progress.js` (the canonical implementation, with PID liveness detection). Eliminates a duplicate `readProgress()` and ensures any future progress-file schema changes propagate automatically.
|
|
28
|
+
|
|
3
29
|
## 1.5.27 (2026-05-16)
|
|
4
30
|
|
|
5
31
|
### MCP — three new free-tier read tools
|
package/db/db.js
CHANGED
|
@@ -29,6 +29,7 @@ export function getDb(dbPath = './seo-intel.db') {
|
|
|
29
29
|
try { _db.exec('ALTER TABLE pages ADD COLUMN x_robots_tag TEXT'); } catch { /* already exists */ }
|
|
30
30
|
try { _db.exec('ALTER TABLE analyses ADD COLUMN technical_gaps TEXT'); } catch { /* already exists */ }
|
|
31
31
|
try { _db.exec('ALTER TABLE extractions ADD COLUMN intent_scores TEXT'); } catch { /* already exists */ }
|
|
32
|
+
try { _db.exec("ALTER TABLE insights ADD COLUMN source TEXT DEFAULT 'cli'"); } catch { /* already exists */ }
|
|
32
33
|
|
|
33
34
|
// Backfill first_seen_at from crawled_at for existing rows
|
|
34
35
|
_db.exec('UPDATE pages SET first_seen_at = crawled_at WHERE first_seen_at IS NULL');
|
|
@@ -225,6 +226,59 @@ export function upsertInsightsFromKeywords(db, project, keywordsReport) {
|
|
|
225
226
|
}
|
|
226
227
|
}
|
|
227
228
|
|
|
229
|
+
// ── Agent-ingested insight (write-back from MCP) ────────────────────────────
|
|
230
|
+
|
|
231
|
+
export const AGENT_INSIGHT_TYPES = ['keyword_gap', 'long_tail', 'quick_win', 'new_page', 'content_gap', 'technical_gap', 'positioning'];
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Insert a single insight on behalf of an external agent (e.g. via MCP).
|
|
235
|
+
* Uses the same dedup contract as analyze-time inserts (UNIQUE on
|
|
236
|
+
* project + type + fingerprint), so an agent repeating the same finding
|
|
237
|
+
* across sessions updates `last_seen` instead of duplicating rows.
|
|
238
|
+
*
|
|
239
|
+
* Returns { ok, id, fingerprint, deduped } — `deduped: true` when the row
|
|
240
|
+
* already existed and we only refreshed last_seen.
|
|
241
|
+
*/
|
|
242
|
+
export function insertAgentInsight(db, { project, type, data, agentName }) {
|
|
243
|
+
if (!AGENT_INSIGHT_TYPES.includes(type)) {
|
|
244
|
+
return { ok: false, error: `Unsupported type "${type}". Allowed: ${AGENT_INSIGHT_TYPES.join(', ')}` };
|
|
245
|
+
}
|
|
246
|
+
if (!project) return { ok: false, error: 'project is required' };
|
|
247
|
+
if (!data || typeof data !== 'object') return { ok: false, error: 'data must be an object' };
|
|
248
|
+
|
|
249
|
+
const fingerprint = _insightFingerprint(type, data);
|
|
250
|
+
if (!fingerprint) {
|
|
251
|
+
return { ok: false, error: `data is missing the identifier field this type needs (see _insightFingerprint in db/db.js for the per-type contract)` };
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
const source = agentName ? `agent:${agentName}` : 'agent';
|
|
255
|
+
const ts = Date.now();
|
|
256
|
+
|
|
257
|
+
// Stash provenance inside the data blob too — survives if/when the source
|
|
258
|
+
// column is ever queried separately, but also keeps it visible to consumers
|
|
259
|
+
// that only read `data`.
|
|
260
|
+
const enriched = { ...data, _source: source, _ingested_at: new Date(ts).toISOString() };
|
|
261
|
+
|
|
262
|
+
const existing = db.prepare(
|
|
263
|
+
'SELECT id FROM insights WHERE project = ? AND type = ? AND fingerprint = ?'
|
|
264
|
+
).get(project, type, fingerprint);
|
|
265
|
+
|
|
266
|
+
db.prepare(`
|
|
267
|
+
INSERT INTO insights (project, type, status, fingerprint, first_seen, last_seen, source_analysis_id, data, source)
|
|
268
|
+
VALUES (?, ?, 'active', ?, ?, ?, NULL, ?, ?)
|
|
269
|
+
ON CONFLICT(project, type, fingerprint) DO UPDATE SET
|
|
270
|
+
last_seen = excluded.last_seen,
|
|
271
|
+
data = excluded.data,
|
|
272
|
+
source = excluded.source
|
|
273
|
+
`).run(project, type, fingerprint, ts, ts, JSON.stringify(enriched), source);
|
|
274
|
+
|
|
275
|
+
const row = db.prepare(
|
|
276
|
+
'SELECT id FROM insights WHERE project = ? AND type = ? AND fingerprint = ?'
|
|
277
|
+
).get(project, type, fingerprint);
|
|
278
|
+
|
|
279
|
+
return { ok: true, id: row.id, fingerprint, deduped: !!existing, source, last_seen: ts };
|
|
280
|
+
}
|
|
281
|
+
|
|
228
282
|
// ── Read active insights (accumulated across all runs) ──────────────────────
|
|
229
283
|
|
|
230
284
|
export function getActiveInsights(db, project) {
|
package/lib/progress.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/progress.js — Single source of truth for the seo-intel job progress file.
|
|
3
|
+
*
|
|
4
|
+
* The CLI's crawl/extract/analyze/aeo/... commands all write their state to
|
|
5
|
+
* `.extraction-progress.json` in the project root. Server.js, mcp/server.js,
|
|
6
|
+
* and any future consumer can read job status from here without spawning a
|
|
7
|
+
* subprocess.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { readFileSync, existsSync } from 'fs';
|
|
11
|
+
import { dirname, join } from 'path';
|
|
12
|
+
import { fileURLToPath } from 'url';
|
|
13
|
+
|
|
14
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
|
+
export const PROGRESS_FILE = join(__dirname, '..', '.extraction-progress.json');
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Read the current job progress, with PID liveness detection so a "running"
|
|
19
|
+
* job whose process died gets re-tagged as "crashed".
|
|
20
|
+
*
|
|
21
|
+
* @returns {object|null}
|
|
22
|
+
*/
|
|
23
|
+
export function readProgress() {
|
|
24
|
+
try {
|
|
25
|
+
if (!existsSync(PROGRESS_FILE)) return null;
|
|
26
|
+
const data = JSON.parse(readFileSync(PROGRESS_FILE, 'utf8'));
|
|
27
|
+
if (data.status === 'running' && data.pid) {
|
|
28
|
+
try { process.kill(data.pid, 0); } catch (e) {
|
|
29
|
+
if (e.code === 'ESRCH') {
|
|
30
|
+
data.status = 'crashed';
|
|
31
|
+
data.crashed_at = data.updated_at;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return data;
|
|
36
|
+
} catch { return null; }
|
|
37
|
+
}
|
package/mcp/server.js
CHANGED
|
@@ -21,12 +21,14 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
|
21
21
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
22
22
|
import * as z from 'zod/v4';
|
|
23
23
|
import { readFileSync, readdirSync, existsSync } from 'fs';
|
|
24
|
+
import { spawn } from 'child_process';
|
|
24
25
|
import { dirname, join } from 'path';
|
|
25
26
|
import { fileURLToPath } from 'url';
|
|
26
27
|
|
|
27
|
-
import { getDb } from '../db/db.js';
|
|
28
|
+
import { getDb, insertAgentInsight, AGENT_INSIGHT_TYPES } from '../db/db.js';
|
|
28
29
|
import { getIntel, INTEL_SLICES, FREE_SLICES } from '../lib/intel.js';
|
|
29
30
|
import { isPro } from '../lib/license.js';
|
|
31
|
+
import { readProgress } from '../lib/progress.js';
|
|
30
32
|
|
|
31
33
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
32
34
|
const ROOT = join(__dirname, '..');
|
|
@@ -233,11 +235,140 @@ server.registerTool(
|
|
|
233
235
|
}
|
|
234
236
|
);
|
|
235
237
|
|
|
238
|
+
// ── Tool: run_crawl (free) ────────────────────────────────────────────────
|
|
239
|
+
server.registerTool(
|
|
240
|
+
'run_crawl',
|
|
241
|
+
{
|
|
242
|
+
description: [
|
|
243
|
+
'Trigger a background crawl for an existing project. Spawns the crawl as a detached subprocess and returns immediately — the crawl will keep running even if this MCP server exits. Use get_crawl_status to monitor progress, or call get_intel/get_pages once the crawl completes to see results.',
|
|
244
|
+
'',
|
|
245
|
+
'Conflict guard: refuses to start if any seo-intel job is already running. Free tier — crawl page limits still apply (configurable via setup / Solo license unlocks unlimited).',
|
|
246
|
+
].join('\n'),
|
|
247
|
+
inputSchema: {
|
|
248
|
+
project: z.string().describe('Existing project slug. Use list_projects to discover.'),
|
|
249
|
+
stealth: z.boolean().optional().describe('Enable stealth browser mode for JS-heavy or anti-bot sites'),
|
|
250
|
+
max_pages: z.number().int().positive().optional().describe('Override max pages per domain'),
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
async ({ project, stealth, max_pages }) => {
|
|
254
|
+
const configPath = join(CONFIG_DIR, `${project}.json`);
|
|
255
|
+
if (!existsSync(configPath)) {
|
|
256
|
+
const available = listConfigProjects().map(p => p.project).join(', ') || '(none configured)';
|
|
257
|
+
return {
|
|
258
|
+
content: [{ type: 'text', text: `Project "${project}" not found. Available: ${available}. Use list_projects to discover, or run \`seo-intel setup\` to add a new project.` }],
|
|
259
|
+
isError: true,
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
const progress = readProgress();
|
|
263
|
+
if (progress?.status === 'running') {
|
|
264
|
+
return {
|
|
265
|
+
content: [{ type: 'text', text: `A seo-intel job is already running (command="${progress.command}", project="${progress.project}", pid=${progress.pid}). Call get_crawl_status to monitor, or wait for it to finish before starting another.` }],
|
|
266
|
+
isError: true,
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const args = ['cli.js', 'crawl', project];
|
|
271
|
+
if (stealth) args.push('--stealth');
|
|
272
|
+
if (max_pages) args.push('--max-pages', String(max_pages));
|
|
273
|
+
|
|
274
|
+
const child = spawn(process.execPath, args, {
|
|
275
|
+
cwd: ROOT,
|
|
276
|
+
detached: true,
|
|
277
|
+
stdio: 'ignore',
|
|
278
|
+
});
|
|
279
|
+
child.unref();
|
|
280
|
+
|
|
281
|
+
const result = {
|
|
282
|
+
started: true,
|
|
283
|
+
pid: child.pid,
|
|
284
|
+
project,
|
|
285
|
+
command: `node ${args.join(' ')}`,
|
|
286
|
+
hint: 'Crawl is running detached. Call get_crawl_status to check progress (updates every few seconds), or call get_intel(project, for=raw) in a minute or two to see new data.',
|
|
287
|
+
};
|
|
288
|
+
return {
|
|
289
|
+
content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
|
|
290
|
+
structuredContent: result,
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
);
|
|
294
|
+
|
|
295
|
+
// ── Tool: get_crawl_status (free) ─────────────────────────────────────────
|
|
296
|
+
server.registerTool(
|
|
297
|
+
'get_crawl_status',
|
|
298
|
+
{
|
|
299
|
+
description: 'Read the current state of the most recent seo-intel job (crawl/extract/analyze/etc). Returns status: running | completed | crashed | stopped | idle, plus project/command/pid/timestamps when available. Use this after run_crawl to monitor progress. Free tier.',
|
|
300
|
+
},
|
|
301
|
+
async () => {
|
|
302
|
+
const progress = readProgress() || { status: 'idle', note: 'No seo-intel job has been recorded since startup. Use run_crawl to start one.' };
|
|
303
|
+
return {
|
|
304
|
+
content: [{ type: 'text', text: JSON.stringify(progress, null, 2) }],
|
|
305
|
+
structuredContent: progress,
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
);
|
|
309
|
+
|
|
310
|
+
// ── Tool: ingest_insight (free — write-back closes the loop) ──────────────
|
|
311
|
+
server.registerTool(
|
|
312
|
+
'ingest_insight',
|
|
313
|
+
{
|
|
314
|
+
description: [
|
|
315
|
+
'Persist an agent-generated insight into the SEO Intel Intelligence Ledger so it shows up in the dashboard and survives across sessions. Free tier — the agent\'s own LLM did the analysis; we just provide storage.',
|
|
316
|
+
'',
|
|
317
|
+
'Dedup contract: same (project, type, fingerprint) updates `last_seen` instead of creating a duplicate row. So an agent rediscovering the same finding across sessions cleanly bumps the timestamp.',
|
|
318
|
+
'',
|
|
319
|
+
'Allowed types (mirror what the cloud `analyze` command writes):',
|
|
320
|
+
' keyword_gap data: { keyword, ... } fingerprint = keyword',
|
|
321
|
+
' long_tail data: { phrase, ... } fingerprint = phrase',
|
|
322
|
+
' quick_win data: { page, issue, ... } fingerprint = page::issue',
|
|
323
|
+
' new_page data: { target_keyword | title, ... }',
|
|
324
|
+
' content_gap data: { topic, ... } fingerprint = topic',
|
|
325
|
+
' technical_gap data: { gap, ... } fingerprint = gap',
|
|
326
|
+
' positioning data: { ...free-form... } one slot per project',
|
|
327
|
+
'',
|
|
328
|
+
'data must include the identifier field above; otherwise the tool returns an error.',
|
|
329
|
+
].join('\n'),
|
|
330
|
+
inputSchema: {
|
|
331
|
+
project: z.string().describe('Project slug'),
|
|
332
|
+
type: z.enum(AGENT_INSIGHT_TYPES).describe('Insight type from the allowed set'),
|
|
333
|
+
data: z.record(z.any()).describe('Insight payload — JSON object. Must include the identifier field for the chosen type.'),
|
|
334
|
+
agent_name: z.string().optional().describe('Optional provenance tag (e.g. "claude-opus-4-7"). Stored as source="agent:<name>".'),
|
|
335
|
+
},
|
|
336
|
+
},
|
|
337
|
+
async ({ project, type, data, agent_name }) => {
|
|
338
|
+
try {
|
|
339
|
+
const db = getDb();
|
|
340
|
+
const result = insertAgentInsight(db, { project, type, data, agentName: agent_name });
|
|
341
|
+
if (!result.ok) {
|
|
342
|
+
return { content: [{ type: 'text', text: `seo-intel ingest error: ${result.error}` }], isError: true };
|
|
343
|
+
}
|
|
344
|
+
const payload = {
|
|
345
|
+
ok: true,
|
|
346
|
+
project,
|
|
347
|
+
type,
|
|
348
|
+
insight_id: result.id,
|
|
349
|
+
fingerprint: result.fingerprint,
|
|
350
|
+
deduped: result.deduped,
|
|
351
|
+
source: result.source,
|
|
352
|
+
last_seen: new Date(result.last_seen).toISOString(),
|
|
353
|
+
hint: result.deduped
|
|
354
|
+
? 'Insight already existed; last_seen refreshed.'
|
|
355
|
+
: 'New insight persisted. It will appear in the dashboard ledger and in get_intel(for=audit).',
|
|
356
|
+
};
|
|
357
|
+
return {
|
|
358
|
+
content: [{ type: 'text', text: JSON.stringify(payload, null, 2) }],
|
|
359
|
+
structuredContent: payload,
|
|
360
|
+
};
|
|
361
|
+
} catch (err) {
|
|
362
|
+
return { content: [{ type: 'text', text: `seo-intel error: ${err.message}` }], isError: true };
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
);
|
|
366
|
+
|
|
236
367
|
async function main() {
|
|
237
368
|
const transport = new StdioServerTransport();
|
|
238
369
|
await server.connect(transport);
|
|
239
370
|
// stderr is fine; the host typically surfaces this in its MCP logs panel.
|
|
240
|
-
console.error(`[seo-intel-mcp] v${VERSION} ready on stdio. Tools: list_projects, get_intel, get_pages, list_keywords, get_headings.`);
|
|
371
|
+
console.error(`[seo-intel-mcp] v${VERSION} ready on stdio. Tools: list_projects, get_intel, get_pages, list_keywords, get_headings, run_crawl, get_crawl_status, ingest_insight.`);
|
|
241
372
|
}
|
|
242
373
|
|
|
243
374
|
main().catch(err => {
|
package/package.json
CHANGED
package/seo-intel.png
CHANGED
|
Binary file
|
package/server.js
CHANGED
|
@@ -4,10 +4,10 @@ import { spawn } from 'child_process';
|
|
|
4
4
|
import { dirname, join, extname } from 'path';
|
|
5
5
|
import { fileURLToPath } from 'url';
|
|
6
6
|
import { checkForUpdates, getUpdateInfo } from './lib/updater.js';
|
|
7
|
+
import { readProgress, PROGRESS_FILE } from './lib/progress.js';
|
|
7
8
|
|
|
8
9
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
9
10
|
const PORT = parseInt(process.env.PORT || '3000', 10);
|
|
10
|
-
const PROGRESS_FILE = join(__dirname, '.extraction-progress.json');
|
|
11
11
|
const REPORTS_DIR = join(__dirname, 'reports');
|
|
12
12
|
|
|
13
13
|
|
|
@@ -100,23 +100,6 @@ const MIME = {
|
|
|
100
100
|
'.zip': 'application/zip',
|
|
101
101
|
};
|
|
102
102
|
|
|
103
|
-
// ── Read progress with PID liveness check (mirrors cli.js) ──
|
|
104
|
-
function readProgress() {
|
|
105
|
-
try {
|
|
106
|
-
if (!existsSync(PROGRESS_FILE)) return null;
|
|
107
|
-
const data = JSON.parse(readFileSync(PROGRESS_FILE, 'utf8'));
|
|
108
|
-
if (data.status === 'running' && data.pid) {
|
|
109
|
-
try { process.kill(data.pid, 0); } catch (e) {
|
|
110
|
-
if (e.code === 'ESRCH') {
|
|
111
|
-
data.status = 'crashed';
|
|
112
|
-
data.crashed_at = data.updated_at;
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
return data;
|
|
117
|
-
} catch { return null; }
|
|
118
|
-
}
|
|
119
|
-
|
|
120
103
|
// ── Parse JSON body from request ──
|
|
121
104
|
function readBody(req) {
|
|
122
105
|
return new Promise((resolve, reject) => {
|