@deventerprisesoftware/scrapi-mcp 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -4
- package/dist/index.js +155 -60
- package/package.json +46 -44
package/README.md
CHANGED
|
@@ -1,7 +1,13 @@
|
|
|
1
|
+

|
|
1
2
|
|
|
2
3
|
# ScrAPI MCP Server
|
|
3
4
|
|
|
4
|
-
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://www.npmjs.com/package/@deventerprisesoftware/scrapi-mcp)
|
|
7
|
+
[](https://hub.docker.com/r/deventerprisesoftware/scrapi-mcp)
|
|
8
|
+
[](https://smithery.ai/server/@DevEnterpriseSoftware/scrapi-mcp)
|
|
9
|
+
|
|
10
|
+
MCP server for using [ScrAPI](https://scrapi.tech) to scrape web pages.
|
|
5
11
|
|
|
6
12
|
ScrAPI is your ultimate web scraping solution, offering powerful, reliable, and easy-to-use features to extract data from any website effortlessly.
|
|
7
13
|
|
|
@@ -29,6 +35,14 @@ Optionally get an API key from the [ScrAPI website](https://scrapi.tech).
|
|
|
29
35
|
|
|
30
36
|
Without an API key you will be limited to one concurrent call and twenty free calls per day with minimal queuing capabilities.
|
|
31
37
|
|
|
38
|
+
### Cloud Server
|
|
39
|
+
|
|
40
|
+
The ScrAPI MCP Server is also available in the cloud over SSE at https://api.scrapi.tech/mcp/sse and streamable HTTP at https://api.scrapi.tech/mcp
|
|
41
|
+
|
|
42
|
+
Cloud MCP servers are not widely supported yet but you can access this directly from your own custom clients or use [MCP Inspector](https://github.com/modelcontextprotocol/inspector) to test it. There is currently no facility to pass through your API key when connecting to the cloud MCP server.
|
|
43
|
+
|
|
44
|
+

|
|
45
|
+
|
|
32
46
|
### Usage with Claude Desktop
|
|
33
47
|
|
|
34
48
|
Add the following to your `claude_desktop_config.json`:
|
|
@@ -38,7 +52,7 @@ Add the following to your `claude_desktop_config.json`:
|
|
|
38
52
|
```json
|
|
39
53
|
{
|
|
40
54
|
"mcpServers": {
|
|
41
|
-
"
|
|
55
|
+
"ScrAPI": {
|
|
42
56
|
"command": "docker",
|
|
43
57
|
"args": [
|
|
44
58
|
"run",
|
|
@@ -56,12 +70,12 @@ Add the following to your `claude_desktop_config.json`:
|
|
|
56
70
|
}
|
|
57
71
|
```
|
|
58
72
|
|
|
59
|
-
|
|
73
|
+
#### NPX
|
|
60
74
|
|
|
61
75
|
```json
|
|
62
76
|
{
|
|
63
77
|
"mcpServers": {
|
|
64
|
-
"
|
|
78
|
+
"ScrAPI": {
|
|
65
79
|
"command": "npx",
|
|
66
80
|
"args": [
|
|
67
81
|
"-y",
|
|
@@ -75,6 +89,8 @@ Add the following to your `claude_desktop_config.json`:
|
|
|
75
89
|
}
|
|
76
90
|
```
|
|
77
91
|
|
|
92
|
+

|
|
93
|
+
|
|
78
94
|
## Build
|
|
79
95
|
|
|
80
96
|
Docker build:
|
package/dist/index.js
CHANGED
|
@@ -1,30 +1,108 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import express from "express";
|
|
3
|
+
import cors from "cors";
|
|
2
4
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3
5
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
6
|
+
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
4
7
|
import { z } from "zod";
|
|
8
|
+
const PORT = process.env.PORT || 5000;
|
|
5
9
|
const SCRAPI_API_KEY = process.env.SCRAPI_API_KEY || "00000000-0000-0000-0000-000000000000";
|
|
6
10
|
const SCRAPI_SERVER_NAME = "ScrAPI MCP Server";
|
|
7
|
-
const SCRAPI_SERVER_VERSION = "0.0
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
|
|
11
|
+
const SCRAPI_SERVER_VERSION = "0.1.0";
|
|
12
|
+
const app = express();
|
|
13
|
+
app.use(cors({
|
|
14
|
+
origin: "*",
|
|
15
|
+
exposedHeaders: ["Mcp-Session-Id", "mcp-protocol-version"],
|
|
16
|
+
allowedHeaders: ["Content-Type", "mcp-session-id"],
|
|
17
|
+
}));
|
|
18
|
+
app.use(express.json());
|
|
19
|
+
// Define session configuration schema
|
|
20
|
+
export const configSchema = z.object({
|
|
21
|
+
scrapiApiKey: z.string().optional().describe("ScrAPI API key for scraping. Leave empty for default limited usage."),
|
|
11
22
|
});
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
23
|
+
// Parse configuration from query parameters
|
|
24
|
+
function parseConfig(req) {
|
|
25
|
+
const configParam = req.query.config;
|
|
26
|
+
if (configParam) {
|
|
27
|
+
return JSON.parse(Buffer.from(configParam, "base64").toString());
|
|
28
|
+
}
|
|
29
|
+
return {};
|
|
30
|
+
}
|
|
31
|
+
// Create MCP server with your tools
|
|
32
|
+
export default function createServer({ config, }) {
|
|
33
|
+
const server = new McpServer({
|
|
34
|
+
name: SCRAPI_SERVER_NAME,
|
|
35
|
+
version: SCRAPI_SERVER_VERSION,
|
|
36
|
+
});
|
|
37
|
+
server.registerTool("scrape_url_html", {
|
|
38
|
+
title: "Scrape URL and respond with HTML",
|
|
39
|
+
description: "Use a URL to scrape a website using the ScrAPI service and retrieve the result as HTML. " +
|
|
40
|
+
"Use this for scraping website content that is difficult to access because of bot detection, captchas or even geolocation restrictions. " +
|
|
41
|
+
"The result will be in HTML which is preferable if advanced parsing is required.",
|
|
42
|
+
inputSchema: {
|
|
43
|
+
url: z
|
|
44
|
+
.string()
|
|
45
|
+
.url({ message: "Invalid URL" })
|
|
46
|
+
.describe("The URL to scrape"),
|
|
47
|
+
},
|
|
48
|
+
}, async ({ url }) => await scrapeUrl(url, "HTML"));
|
|
49
|
+
server.registerTool("scrape_url_markdown", {
|
|
50
|
+
title: "Scrape URL and respond with Markdown",
|
|
51
|
+
description: "Use a URL to scrape a website using the ScrAPI service and retrieve the result as Markdown. " +
|
|
52
|
+
"Use this for scraping website content that is difficult to access because of bot detection, captchas or even geolocation restrictions. " +
|
|
53
|
+
"The result will be in Markdown which is preferable if the text content of the webpage is important and not the structural information of the page.",
|
|
54
|
+
inputSchema: {
|
|
55
|
+
url: z
|
|
56
|
+
.string()
|
|
57
|
+
.url({ message: "Invalid URL" })
|
|
58
|
+
.describe("The URL to scrape"),
|
|
59
|
+
},
|
|
60
|
+
}, async ({ url }) => await scrapeUrl(url, "Markdown"));
|
|
61
|
+
async function scrapeUrl(url, format) {
|
|
62
|
+
var body = {
|
|
63
|
+
url: url,
|
|
64
|
+
useBrowser: true,
|
|
65
|
+
solveCaptchas: true,
|
|
66
|
+
acceptDialogs: true,
|
|
67
|
+
proxyType: "Residential",
|
|
68
|
+
responseFormat: format,
|
|
69
|
+
};
|
|
70
|
+
try {
|
|
71
|
+
const response = await fetch("https://api.scrapi.tech/v1/scrape", {
|
|
72
|
+
method: "POST",
|
|
73
|
+
headers: {
|
|
74
|
+
"User-Agent": `${SCRAPI_SERVER_NAME} - ${SCRAPI_SERVER_VERSION}`,
|
|
75
|
+
"Content-Type": "application/json",
|
|
76
|
+
"X-API-KEY": config.scrapiApiKey || SCRAPI_API_KEY,
|
|
77
|
+
},
|
|
78
|
+
body: JSON.stringify(body),
|
|
79
|
+
signal: AbortSignal.timeout(30000),
|
|
80
|
+
});
|
|
81
|
+
const data = await response.text();
|
|
82
|
+
if (response.ok) {
|
|
83
|
+
return {
|
|
84
|
+
content: [
|
|
85
|
+
{
|
|
86
|
+
type: "text",
|
|
87
|
+
mimeType: `text/${format.toLowerCase()}`,
|
|
88
|
+
text: data,
|
|
89
|
+
},
|
|
90
|
+
],
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
return {
|
|
94
|
+
content: [
|
|
95
|
+
{
|
|
96
|
+
type: "text",
|
|
97
|
+
text: data,
|
|
98
|
+
},
|
|
99
|
+
],
|
|
100
|
+
isError: true,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
catch (error) {
|
|
104
|
+
console.error("Error calling API:", error);
|
|
105
|
+
}
|
|
28
106
|
const response = await fetch("https://api.scrapi.tech/v1/scrape", {
|
|
29
107
|
method: "POST",
|
|
30
108
|
headers: {
|
|
@@ -36,57 +114,74 @@ async function scrapeUrl(url, format) {
|
|
|
36
114
|
signal: AbortSignal.timeout(30000),
|
|
37
115
|
});
|
|
38
116
|
const data = await response.text();
|
|
39
|
-
if (response.ok) {
|
|
40
|
-
return {
|
|
41
|
-
content: [
|
|
42
|
-
{
|
|
43
|
-
type: "text",
|
|
44
|
-
mimeType: `text/${format.toLowerCase()}`,
|
|
45
|
-
text: data,
|
|
46
|
-
},
|
|
47
|
-
],
|
|
48
|
-
};
|
|
49
|
-
}
|
|
50
117
|
return {
|
|
51
118
|
content: [
|
|
52
119
|
{
|
|
53
120
|
type: "text",
|
|
121
|
+
mimeType: `text/${format.toLowerCase()}`,
|
|
54
122
|
text: data,
|
|
55
123
|
},
|
|
56
124
|
],
|
|
57
|
-
isError: true,
|
|
58
125
|
};
|
|
59
126
|
}
|
|
127
|
+
return server.server;
|
|
128
|
+
}
|
|
129
|
+
app.all("/mcp", async (req, res) => {
|
|
130
|
+
try {
|
|
131
|
+
// Parse configuration
|
|
132
|
+
const rawConfig = parseConfig ? parseConfig(req) : {};
|
|
133
|
+
// Validate and parse configuration
|
|
134
|
+
const config = configSchema
|
|
135
|
+
? configSchema.parse({ scrapiApiKey: rawConfig.scrapiApiKey || SCRAPI_API_KEY })
|
|
136
|
+
: {};
|
|
137
|
+
const server = createServer({ config });
|
|
138
|
+
const transport = new StreamableHTTPServerTransport({
|
|
139
|
+
sessionIdGenerator: undefined,
|
|
140
|
+
});
|
|
141
|
+
// Clean up on request close
|
|
142
|
+
res.on("close", () => {
|
|
143
|
+
transport.close();
|
|
144
|
+
server.close();
|
|
145
|
+
});
|
|
146
|
+
await server.connect(transport);
|
|
147
|
+
await transport.handleRequest(req, res, req.body);
|
|
148
|
+
}
|
|
60
149
|
catch (error) {
|
|
61
|
-
console.error("Error
|
|
150
|
+
console.error("Error handling MCP request:", error);
|
|
151
|
+
if (!res.headersSent) {
|
|
152
|
+
res.status(500).json({
|
|
153
|
+
jsonrpc: "2.0",
|
|
154
|
+
error: { code: -32603, message: "Internal server error" },
|
|
155
|
+
id: null,
|
|
156
|
+
});
|
|
157
|
+
}
|
|
62
158
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
text: data,
|
|
159
|
+
});
|
|
160
|
+
// Main function to start the server in the appropriate mode
|
|
161
|
+
async function main() {
|
|
162
|
+
const transport = process.env.TRANSPORT || "stdio";
|
|
163
|
+
if (transport === "http") {
|
|
164
|
+
// Run in HTTP mode
|
|
165
|
+
app.listen(PORT, () => {
|
|
166
|
+
console.log(`MCP HTTP Server listening on port ${PORT}`);
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
else {
|
|
170
|
+
const scrapiApiKey = SCRAPI_API_KEY;
|
|
171
|
+
// Create server with configuration
|
|
172
|
+
const server = createServer({
|
|
173
|
+
config: {
|
|
174
|
+
scrapiApiKey,
|
|
80
175
|
},
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
console.error(`${SCRAPI_SERVER_NAME} running on stdio.`);
|
|
176
|
+
});
|
|
177
|
+
// Start receiving messages on stdin and sending messages on stdout
|
|
178
|
+
const stdioTransport = new StdioServerTransport();
|
|
179
|
+
await server.connect(stdioTransport);
|
|
180
|
+
console.error("MCP Server running in stdio mode");
|
|
181
|
+
}
|
|
88
182
|
}
|
|
89
|
-
|
|
90
|
-
|
|
183
|
+
// Start the server
|
|
184
|
+
main().catch((error) => {
|
|
185
|
+
console.error("Server error:", error);
|
|
91
186
|
process.exit(1);
|
|
92
|
-
}
|
|
187
|
+
});
|
package/package.json
CHANGED
|
@@ -1,44 +1,46 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@deventerprisesoftware/scrapi-mcp",
|
|
3
|
-
"version": "0.0
|
|
4
|
-
"description": "MCP server for using ScrAPI to scrape web pages.",
|
|
5
|
-
"keywords": [
|
|
6
|
-
"mcp",
|
|
7
|
-
"web scraper",
|
|
8
|
-
"web scraping",
|
|
9
|
-
"web data extractor",
|
|
10
|
-
"claude",
|
|
11
|
-
"ai"
|
|
12
|
-
],
|
|
13
|
-
"homepage": "https://scrapi.tech",
|
|
14
|
-
"bugs": {
|
|
15
|
-
"url": "https://github.com/DevEnterpriseSoftware/scrapi-mcp/issues"
|
|
16
|
-
},
|
|
17
|
-
"repository": {
|
|
18
|
-
"type": "git",
|
|
19
|
-
"url": "git+https://github.com/DevEnterpriseSoftware/scrapi-mcp.git"
|
|
20
|
-
},
|
|
21
|
-
"license": "MIT",
|
|
22
|
-
"author": "DevEnterprise Software (https://deventerprise.com)",
|
|
23
|
-
"type": "module",
|
|
24
|
-
"bin":
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
"
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
"
|
|
37
|
-
"zod": "^3.
|
|
38
|
-
},
|
|
39
|
-
"devDependencies": {
|
|
40
|
-
"@types/
|
|
41
|
-
"
|
|
42
|
-
"
|
|
43
|
-
|
|
44
|
-
|
|
1
|
+
{
|
|
2
|
+
"name": "@deventerprisesoftware/scrapi-mcp",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "MCP server for using ScrAPI to scrape web pages.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"mcp",
|
|
7
|
+
"web scraper",
|
|
8
|
+
"web scraping",
|
|
9
|
+
"web data extractor",
|
|
10
|
+
"claude",
|
|
11
|
+
"ai"
|
|
12
|
+
],
|
|
13
|
+
"homepage": "https://scrapi.tech",
|
|
14
|
+
"bugs": {
|
|
15
|
+
"url": "https://github.com/DevEnterpriseSoftware/scrapi-mcp/issues"
|
|
16
|
+
},
|
|
17
|
+
"repository": {
|
|
18
|
+
"type": "git",
|
|
19
|
+
"url": "git+https://github.com/DevEnterpriseSoftware/scrapi-mcp.git"
|
|
20
|
+
},
|
|
21
|
+
"license": "MIT",
|
|
22
|
+
"author": "DevEnterprise Software (https://deventerprise.com)",
|
|
23
|
+
"type": "module",
|
|
24
|
+
"bin": "dist/index.js",
|
|
25
|
+
"files": [
|
|
26
|
+
"dist"
|
|
27
|
+
],
|
|
28
|
+
"scripts": {
|
|
29
|
+
"build": "tsc && shx chmod +x dist/*.js",
|
|
30
|
+
"prepare": "npm run build",
|
|
31
|
+
"watch": "tsc --watch"
|
|
32
|
+
},
|
|
33
|
+
"dependencies": {
|
|
34
|
+
"@modelcontextprotocol/sdk": "^1.17.4",
|
|
35
|
+
"cors": "^2.8.5",
|
|
36
|
+
"express": "^5.1.0",
|
|
37
|
+
"zod": "^3.25.76"
|
|
38
|
+
},
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@types/cors": "^2.8.19",
|
|
41
|
+
"@types/express": "^5.0.3",
|
|
42
|
+
"@types/node": "^24.3.0",
|
|
43
|
+
"shx": "^0.4.0",
|
|
44
|
+
"typescript": "^5.9.2"
|
|
45
|
+
}
|
|
46
|
+
}
|