seo-intel 1.5.27 → 1.5.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/lib/progress.js +37 -0
- package/mcp/server.js +75 -1
- package/package.json +1 -1
- package/server.js +1 -18
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.5.28 (2026-05-17)
|
|
4
|
+
|
|
5
|
+
### MCP — agents can now trigger crawls and watch progress
|
|
6
|
+
The MCP server gains its first **active** tools — agents move from read-only to actually doing work on the user's machine.
|
|
7
|
+
|
|
8
|
+
- **`run_crawl(project, stealth?, max_pages?)`** — spawn a crawl as a detached subprocess. Returns immediately with `{ started, pid, command, hint }`. Free tier — crawl page limits still apply (Solo unlocks unlimited). Refuses to start if any seo-intel job is already running (conflict guard mirrors the existing HTTP `/api/crawl` behaviour).
|
|
9
|
+
- **`get_crawl_status()`** — read the most recent job's progress: status (`running` / `completed` / `crashed` / `stopped` / `idle`), command, project, pid, timestamps. PID liveness is verified — a "running" job whose process died gets re-tagged as `crashed`.
|
|
10
|
+
|
|
11
|
+
A natural session now looks like: agent calls `run_crawl(carbium)` → polls `get_crawl_status()` every minute → once `completed`, calls `get_intel(carbium, for=raw)` and `get_pages(carbium)` to see new data. Free tier, end to end.
|
|
12
|
+
|
|
13
|
+
### Internal — shared progress reader
|
|
14
|
+
`server.js` and `mcp/server.js` now both read job state from `lib/progress.js` (the canonical implementation, with PID liveness detection). Eliminates a duplicate `readProgress()` and ensures any future progress-file schema changes propagate automatically.
|
|
15
|
+
|
|
3
16
|
## 1.5.27 (2026-05-16)
|
|
4
17
|
|
|
5
18
|
### MCP — three new free-tier read tools
|
package/lib/progress.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/progress.js — Single source of truth for the seo-intel job progress file.
|
|
3
|
+
*
|
|
4
|
+
* The CLI's crawl/extract/analyze/aeo/... commands all write their state to
|
|
5
|
+
* `.extraction-progress.json` in the project root. Server.js, mcp/server.js,
|
|
6
|
+
* and any future consumer can read job status from here without spawning a
|
|
7
|
+
* subprocess.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { readFileSync, existsSync } from 'fs';
|
|
11
|
+
import { dirname, join } from 'path';
|
|
12
|
+
import { fileURLToPath } from 'url';
|
|
13
|
+
|
|
14
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
|
+
export const PROGRESS_FILE = join(__dirname, '..', '.extraction-progress.json');
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Read the current job progress, with PID liveness detection so a "running"
|
|
19
|
+
* job whose process died gets re-tagged as "crashed".
|
|
20
|
+
*
|
|
21
|
+
* @returns {object|null}
|
|
22
|
+
*/
|
|
23
|
+
export function readProgress() {
|
|
24
|
+
try {
|
|
25
|
+
if (!existsSync(PROGRESS_FILE)) return null;
|
|
26
|
+
const data = JSON.parse(readFileSync(PROGRESS_FILE, 'utf8'));
|
|
27
|
+
if (data.status === 'running' && data.pid) {
|
|
28
|
+
try { process.kill(data.pid, 0); } catch (e) {
|
|
29
|
+
if (e.code === 'ESRCH') {
|
|
30
|
+
data.status = 'crashed';
|
|
31
|
+
data.crashed_at = data.updated_at;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return data;
|
|
36
|
+
} catch { return null; }
|
|
37
|
+
}
|
package/mcp/server.js
CHANGED
|
@@ -21,12 +21,14 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
|
21
21
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
22
22
|
import * as z from 'zod/v4';
|
|
23
23
|
import { readFileSync, readdirSync, existsSync } from 'fs';
|
|
24
|
+
import { spawn } from 'child_process';
|
|
24
25
|
import { dirname, join } from 'path';
|
|
25
26
|
import { fileURLToPath } from 'url';
|
|
26
27
|
|
|
27
28
|
import { getDb } from '../db/db.js';
|
|
28
29
|
import { getIntel, INTEL_SLICES, FREE_SLICES } from '../lib/intel.js';
|
|
29
30
|
import { isPro } from '../lib/license.js';
|
|
31
|
+
import { readProgress } from '../lib/progress.js';
|
|
30
32
|
|
|
31
33
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
32
34
|
const ROOT = join(__dirname, '..');
|
|
@@ -233,11 +235,83 @@ server.registerTool(
|
|
|
233
235
|
}
|
|
234
236
|
);
|
|
235
237
|
|
|
238
|
+
// ── Tool: run_crawl (free) ────────────────────────────────────────────────
|
|
239
|
+
server.registerTool(
|
|
240
|
+
'run_crawl',
|
|
241
|
+
{
|
|
242
|
+
description: [
|
|
243
|
+
'Trigger a background crawl for an existing project. Spawns the crawl as a detached subprocess and returns immediately — the crawl will keep running even if this MCP server exits. Use get_crawl_status to monitor progress, or call get_intel/get_pages once the crawl completes to see results.',
|
|
244
|
+
'',
|
|
245
|
+
'Conflict guard: refuses to start if any seo-intel job is already running. Free tier — crawl page limits still apply (configurable via setup / Solo license unlocks unlimited).',
|
|
246
|
+
].join('\n'),
|
|
247
|
+
inputSchema: {
|
|
248
|
+
project: z.string().describe('Existing project slug. Use list_projects to discover.'),
|
|
249
|
+
stealth: z.boolean().optional().describe('Enable stealth browser mode for JS-heavy or anti-bot sites'),
|
|
250
|
+
max_pages: z.number().int().positive().optional().describe('Override max pages per domain'),
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
async ({ project, stealth, max_pages }) => {
|
|
254
|
+
const configPath = join(CONFIG_DIR, `${project}.json`);
|
|
255
|
+
if (!existsSync(configPath)) {
|
|
256
|
+
const available = listConfigProjects().map(p => p.project).join(', ') || '(none configured)';
|
|
257
|
+
return {
|
|
258
|
+
content: [{ type: 'text', text: `Project "${project}" not found. Available: ${available}. Use list_projects to discover, or run \`seo-intel setup\` to add a new project.` }],
|
|
259
|
+
isError: true,
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
const progress = readProgress();
|
|
263
|
+
if (progress?.status === 'running') {
|
|
264
|
+
return {
|
|
265
|
+
content: [{ type: 'text', text: `A seo-intel job is already running (command="${progress.command}", project="${progress.project}", pid=${progress.pid}). Call get_crawl_status to monitor, or wait for it to finish before starting another.` }],
|
|
266
|
+
isError: true,
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const args = ['cli.js', 'crawl', project];
|
|
271
|
+
if (stealth) args.push('--stealth');
|
|
272
|
+
if (max_pages) args.push('--max-pages', String(max_pages));
|
|
273
|
+
|
|
274
|
+
const child = spawn(process.execPath, args, {
|
|
275
|
+
cwd: ROOT,
|
|
276
|
+
detached: true,
|
|
277
|
+
stdio: 'ignore',
|
|
278
|
+
});
|
|
279
|
+
child.unref();
|
|
280
|
+
|
|
281
|
+
const result = {
|
|
282
|
+
started: true,
|
|
283
|
+
pid: child.pid,
|
|
284
|
+
project,
|
|
285
|
+
command: `node ${args.join(' ')}`,
|
|
286
|
+
hint: 'Crawl is running detached. Call get_crawl_status to check progress (updates every few seconds), or call get_intel(project, for=raw) in a minute or two to see new data.',
|
|
287
|
+
};
|
|
288
|
+
return {
|
|
289
|
+
content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
|
|
290
|
+
structuredContent: result,
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
);
|
|
294
|
+
|
|
295
|
+
// ── Tool: get_crawl_status (free) ─────────────────────────────────────────
|
|
296
|
+
server.registerTool(
|
|
297
|
+
'get_crawl_status',
|
|
298
|
+
{
|
|
299
|
+
description: 'Read the current state of the most recent seo-intel job (crawl/extract/analyze/etc). Returns status: running | completed | crashed | stopped | idle, plus project/command/pid/timestamps when available. Use this after run_crawl to monitor progress. Free tier.',
|
|
300
|
+
},
|
|
301
|
+
async () => {
|
|
302
|
+
const progress = readProgress() || { status: 'idle', note: 'No seo-intel job has been recorded since startup. Use run_crawl to start one.' };
|
|
303
|
+
return {
|
|
304
|
+
content: [{ type: 'text', text: JSON.stringify(progress, null, 2) }],
|
|
305
|
+
structuredContent: progress,
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
);
|
|
309
|
+
|
|
236
310
|
async function main() {
|
|
237
311
|
const transport = new StdioServerTransport();
|
|
238
312
|
await server.connect(transport);
|
|
239
313
|
// stderr is fine; the host typically surfaces this in its MCP logs panel.
|
|
240
|
-
console.error(`[seo-intel-mcp] v${VERSION} ready on stdio. Tools: list_projects, get_intel, get_pages, list_keywords, get_headings.`);
|
|
314
|
+
console.error(`[seo-intel-mcp] v${VERSION} ready on stdio. Tools: list_projects, get_intel, get_pages, list_keywords, get_headings, run_crawl, get_crawl_status.`);
|
|
241
315
|
}
|
|
242
316
|
|
|
243
317
|
main().catch(err => {
|
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -4,10 +4,10 @@ import { spawn } from 'child_process';
|
|
|
4
4
|
import { dirname, join, extname } from 'path';
|
|
5
5
|
import { fileURLToPath } from 'url';
|
|
6
6
|
import { checkForUpdates, getUpdateInfo } from './lib/updater.js';
|
|
7
|
+
import { readProgress, PROGRESS_FILE } from './lib/progress.js';
|
|
7
8
|
|
|
8
9
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
9
10
|
const PORT = parseInt(process.env.PORT || '3000', 10);
|
|
10
|
-
const PROGRESS_FILE = join(__dirname, '.extraction-progress.json');
|
|
11
11
|
const REPORTS_DIR = join(__dirname, 'reports');
|
|
12
12
|
|
|
13
13
|
|
|
@@ -100,23 +100,6 @@ const MIME = {
|
|
|
100
100
|
'.zip': 'application/zip',
|
|
101
101
|
};
|
|
102
102
|
|
|
103
|
-
// ── Read progress with PID liveness check (mirrors cli.js) ──
|
|
104
|
-
function readProgress() {
|
|
105
|
-
try {
|
|
106
|
-
if (!existsSync(PROGRESS_FILE)) return null;
|
|
107
|
-
const data = JSON.parse(readFileSync(PROGRESS_FILE, 'utf8'));
|
|
108
|
-
if (data.status === 'running' && data.pid) {
|
|
109
|
-
try { process.kill(data.pid, 0); } catch (e) {
|
|
110
|
-
if (e.code === 'ESRCH') {
|
|
111
|
-
data.status = 'crashed';
|
|
112
|
-
data.crashed_at = data.updated_at;
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
return data;
|
|
117
|
-
} catch { return null; }
|
|
118
|
-
}
|
|
119
|
-
|
|
120
103
|
// ── Parse JSON body from request ──
|
|
121
104
|
function readBody(req) {
|
|
122
105
|
return new Promise((resolve, reject) => {
|