scorchcrawl-mcp 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +117 -0
- package/package.json +37 -0
package/dist/cli.js
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* scorchcrawl-mcp CLI
|
|
4
|
+
*
|
|
5
|
+
* A thin wrapper that connects to a ScorchCrawl server and exposes its MCP
|
|
6
|
+
* tools over stdio transport. This lets MCP clients (VS Code, Copilot CLI,
|
|
7
|
+
* etc.) use a remote ScorchCrawl server as if it were a local MCP server.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* SCORCHCRAWL_URL=http://localhost:24787 scorchcrawl-mcp
|
|
11
|
+
*
|
|
12
|
+
* Or with a remote server + API key:
|
|
13
|
+
* SCORCHCRAWL_URL=https://your-server.com/mcp-api/scorchcrawl/YOUR_KEY scorchcrawl-mcp
|
|
14
|
+
*
|
|
15
|
+
* Environment variables:
|
|
16
|
+
* SCORCHCRAWL_URL - Base URL of the ScorchCrawl MCP server (required)
|
|
17
|
+
* GITHUB_TOKEN - GitHub PAT for Copilot SDK agent (optional, sent as x-copilot-token)
|
|
18
|
+
* SCORCHCRAWL_LOCAL_PROXY - Set to "true" to route scraping through your local IP
|
|
19
|
+
*/
|
|
20
|
+
import { config } from 'dotenv';
|
|
21
|
+
config({ quiet: true });
|
|
22
|
+
const SCORCHCRAWL_URL = process.env.SCORCHCRAWL_URL || 'http://localhost:24787';
|
|
23
|
+
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
|
|
24
|
+
const LOCAL_PROXY = process.env.SCORCHCRAWL_LOCAL_PROXY === 'true';
|
|
25
|
+
/**
|
|
26
|
+
* Forward a JSON-RPC request to the remote ScorchCrawl server.
|
|
27
|
+
*/
|
|
28
|
+
async function forwardToServer(request) {
|
|
29
|
+
const url = `${SCORCHCRAWL_URL.replace(/\/$/, '')}/mcp`;
|
|
30
|
+
const headers = {
|
|
31
|
+
'Content-Type': 'application/json',
|
|
32
|
+
'Accept': 'application/json, text/event-stream',
|
|
33
|
+
};
|
|
34
|
+
if (GITHUB_TOKEN) {
|
|
35
|
+
headers['x-copilot-token'] = GITHUB_TOKEN;
|
|
36
|
+
}
|
|
37
|
+
try {
|
|
38
|
+
const response = await fetch(url, {
|
|
39
|
+
method: 'POST',
|
|
40
|
+
headers,
|
|
41
|
+
body: JSON.stringify(request),
|
|
42
|
+
});
|
|
43
|
+
const text = await response.text();
|
|
44
|
+
// Handle SSE responses (event: message\ndata: {...})
|
|
45
|
+
if (text.startsWith('event:') || text.startsWith('data:')) {
|
|
46
|
+
const dataLine = text.split('\n').find(l => l.startsWith('data:'));
|
|
47
|
+
if (dataLine) {
|
|
48
|
+
return JSON.parse(dataLine.slice(5).trim());
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
// Handle direct JSON responses
|
|
52
|
+
if (text.trim().startsWith('{')) {
|
|
53
|
+
return JSON.parse(text);
|
|
54
|
+
}
|
|
55
|
+
return {
|
|
56
|
+
jsonrpc: '2.0',
|
|
57
|
+
error: { code: -32603, message: `Unexpected response: ${text.substring(0, 200)}` },
|
|
58
|
+
id: request.id,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
catch (err) {
|
|
62
|
+
return {
|
|
63
|
+
jsonrpc: '2.0',
|
|
64
|
+
error: { code: -32603, message: `Connection failed: ${err.message}` },
|
|
65
|
+
id: request.id,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Read JSON-RPC messages from stdin and forward to the server.
|
|
71
|
+
*/
|
|
72
|
+
async function main() {
|
|
73
|
+
const serverUrl = SCORCHCRAWL_URL;
|
|
74
|
+
if (!serverUrl || serverUrl === 'http://localhost:24787') {
|
|
75
|
+
process.stderr.write(`[scorchcrawl-mcp] Connecting to ${serverUrl}\n` +
|
|
76
|
+
`[scorchcrawl-mcp] Set SCORCHCRAWL_URL to change the server address\n`);
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
process.stderr.write(`[scorchcrawl-mcp] Connecting to ${serverUrl}\n`);
|
|
80
|
+
}
|
|
81
|
+
if (LOCAL_PROXY) {
|
|
82
|
+
process.stderr.write('[scorchcrawl-mcp] Local proxy mode: ON (scraping through your IP)\n');
|
|
83
|
+
}
|
|
84
|
+
// Read from stdin line by line
|
|
85
|
+
let buffer = '';
|
|
86
|
+
process.stdin.setEncoding('utf8');
|
|
87
|
+
process.stdin.on('data', async (chunk) => {
|
|
88
|
+
buffer += chunk;
|
|
89
|
+
// Process complete lines
|
|
90
|
+
const lines = buffer.split('\n');
|
|
91
|
+
buffer = lines.pop() || '';
|
|
92
|
+
for (const line of lines) {
|
|
93
|
+
const trimmed = line.trim();
|
|
94
|
+
if (!trimmed)
|
|
95
|
+
continue;
|
|
96
|
+
try {
|
|
97
|
+
const request = JSON.parse(trimmed);
|
|
98
|
+
const response = await forwardToServer(request);
|
|
99
|
+
process.stdout.write(JSON.stringify(response) + '\n');
|
|
100
|
+
}
|
|
101
|
+
catch (err) {
|
|
102
|
+
const errorResponse = {
|
|
103
|
+
jsonrpc: '2.0',
|
|
104
|
+
error: { code: -32700, message: `Parse error: ${err.message}` },
|
|
105
|
+
};
|
|
106
|
+
process.stdout.write(JSON.stringify(errorResponse) + '\n');
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
process.stdin.on('end', () => {
|
|
111
|
+
process.exit(0);
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
main().catch((err) => {
|
|
115
|
+
process.stderr.write(`[scorchcrawl-mcp] Fatal error: ${err.message}\n`);
|
|
116
|
+
process.exit(1);
|
|
117
|
+
});
|
package/package.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "scorchcrawl-mcp",
|
|
3
|
+
"version": "1.0.1",
|
|
4
|
+
"description": "MCP client for ScorchCrawl — connect Copilot to a self-hosted stealth web scraping server",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"scorchcrawl-mcp": "dist/cli.js"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"dist"
|
|
11
|
+
],
|
|
12
|
+
"publishConfig": {
|
|
13
|
+
"access": "public"
|
|
14
|
+
},
|
|
15
|
+
"scripts": {
|
|
16
|
+
"build": "tsc",
|
|
17
|
+
"start": "node dist/cli.js",
|
|
18
|
+
"prepare": "npm run build"
|
|
19
|
+
},
|
|
20
|
+
"license": "AGPL-3.0",
|
|
21
|
+
"dependencies": {
|
|
22
|
+
"dotenv": "^17.2.2"
|
|
23
|
+
},
|
|
24
|
+
"engines": {
|
|
25
|
+
"node": ">=18.0.0"
|
|
26
|
+
},
|
|
27
|
+
"keywords": [
|
|
28
|
+
"mcp",
|
|
29
|
+
"scorchcrawl",
|
|
30
|
+
"copilot",
|
|
31
|
+
"web-scraping",
|
|
32
|
+
"stealth"
|
|
33
|
+
],
|
|
34
|
+
"devDependencies": {
|
|
35
|
+
"@types/node": "^25.3.5"
|
|
36
|
+
}
|
|
37
|
+
}
|