@deventerprisesoftware/scrapi-mcp 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -0
- package/dist/index.js +92 -0
- package/package.json +43 -0
package/README.md
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
|
|
2
|
+
# ScrAPI MCP Server
|
|
3
|
+
|
|
4
|
+
MCP server for using ScrAPI to scrape web pages.
|
|
5
|
+
|
|
6
|
+
ScrAPI is your ultimate web scraping solution, offering powerful, reliable, and easy-to-use features to extract data from any website effortlessly.
|
|
7
|
+
|
|
8
|
+
## Tools
|
|
9
|
+
|
|
10
|
+
1. `scrape_url_html`
|
|
11
|
+
- Use a URL to scrape a website using the ScrAPI service and retrieve the result as HTML.
|
|
12
|
+
Use this for scraping website content that is difficult to access because of bot detection, captchas or even geolocation restrictions.
|
|
13
|
+
The result will be in HTML which is preferable if advanced parsing is required.
|
|
14
|
+
- Input: `url` (string)
|
|
15
|
+
- Returns: HTML content of the URL
|
|
16
|
+
|
|
17
|
+
2. `scrape_url_markdown`
|
|
18
|
+
- Use a URL to scrape a website using the ScrAPI service and retrieve the result as Markdown.
|
|
19
|
+
Use this for scraping website content that is difficult to access because of bot detection, captchas or even geolocation restrictions.
|
|
20
|
+
The result will be in Markdown which is preferable if the text content of the webpage is important and not the structural information of the page.
|
|
21
|
+
- Input: `url` (string)
|
|
22
|
+
- Returns: Markdown content of the URL
|
|
23
|
+
|
|
24
|
+
## Setup
|
|
25
|
+
|
|
26
|
+
### API Key (optional)
|
|
27
|
+
|
|
28
|
+
Optionally get an API key from the [ScrAPI website](https://scrapi.tech).
|
|
29
|
+
|
|
30
|
+
Without an API key you will be limited to one concurrent call and twenty free calls per day with minimal queuing capabilities.
|
|
31
|
+
|
|
32
|
+
### Usage with Claude Desktop
|
|
33
|
+
|
|
34
|
+
Add the following to your `claude_desktop_config.json`:
|
|
35
|
+
|
|
36
|
+
#### Docker
|
|
37
|
+
|
|
38
|
+
```json
|
|
39
|
+
{
|
|
40
|
+
"mcpServers": {
|
|
41
|
+
"scrapi": {
|
|
42
|
+
"command": "docker",
|
|
43
|
+
"args": [
|
|
44
|
+
"run",
|
|
45
|
+
"-i",
|
|
46
|
+
"--rm",
|
|
47
|
+
"-e",
|
|
48
|
+
"SCRAPI_API_KEY",
|
|
49
|
+
"deventerprisesoftware/scrapi-mcp"
|
|
50
|
+
],
|
|
51
|
+
"env": {
|
|
52
|
+
"SCRAPI_API_KEY": "<YOUR_API_KEY>"
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### NPX
|
|
60
|
+
|
|
61
|
+
```json
|
|
62
|
+
{
|
|
63
|
+
"mcpServers": {
|
|
64
|
+
"scrapi": {
|
|
65
|
+
"command": "npx",
|
|
66
|
+
"args": [
|
|
67
|
+
"-y",
|
|
68
|
+
"@deventerprisesoftware/scrapi-mcp"
|
|
69
|
+
],
|
|
70
|
+
"env": {
|
|
71
|
+
"SCRAPI_API_KEY": "<YOUR_API_KEY>"
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Build
|
|
79
|
+
|
|
80
|
+
Docker build:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
docker build -t deventerprisesoftware/scrapi-mcp -f Dockerfile .
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## License
|
|
87
|
+
|
|
88
|
+
This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository.
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
|
+
import { z } from "zod";
|
|
5
|
+
const SCRAPI_API_KEY = process.env.SCRAPI_API_KEY || "00000000-0000-0000-0000-000000000000";
|
|
6
|
+
const SCRAPI_SERVER_NAME = "ScrAPI MCP Server";
|
|
7
|
+
const SCRAPI_SERVER_VERSION = "0.0.1";
|
|
8
|
+
const server = new McpServer({
|
|
9
|
+
name: SCRAPI_SERVER_NAME,
|
|
10
|
+
version: SCRAPI_SERVER_VERSION,
|
|
11
|
+
});
|
|
12
|
+
server.tool("scrape_url_html", "Use a URL to scrape a website using the ScrAPI service and retrieve the result as HTML. " +
|
|
13
|
+
"Use this for scraping website content that is difficult to access because of bot detection, captchas or even geolocation restrictions. " +
|
|
14
|
+
"The result will be in HTML which is preferable if advanced parsing is required.", { url: z.string().url({ message: "Invalid URL" }) }, async ({ url }) => await scrapeUrl(url, "HTML"));
|
|
15
|
+
server.tool("scrape_url_markdown", "Use a URL to scrape a website using the ScrAPI service and retrieve the result as Markdown. " +
|
|
16
|
+
"Use this for scraping website content that is difficult to access because of bot detection, captchas or even geolocation restrictions. " +
|
|
17
|
+
"The result will be in Markdown which is preferable if the text content of the webpage is important and not the structural information of the page.", { url: z.string().url({ message: "Invalid URL" }) }, async ({ url }) => await scrapeUrl(url, "Markdown"));
|
|
18
|
+
async function scrapeUrl(url, format) {
|
|
19
|
+
var body = {
|
|
20
|
+
url: url,
|
|
21
|
+
useBrowser: true,
|
|
22
|
+
solveCaptchas: true,
|
|
23
|
+
acceptDialogs: true,
|
|
24
|
+
proxyType: "Residential",
|
|
25
|
+
responseFormat: format,
|
|
26
|
+
};
|
|
27
|
+
try {
|
|
28
|
+
const response = await fetch("https://api.scrapi.tech/v1/scrape", {
|
|
29
|
+
method: "POST",
|
|
30
|
+
headers: {
|
|
31
|
+
"User-Agent": `${SCRAPI_SERVER_NAME} - ${SCRAPI_SERVER_VERSION}`,
|
|
32
|
+
"Content-Type": "application/json",
|
|
33
|
+
"X-API-KEY": SCRAPI_API_KEY,
|
|
34
|
+
},
|
|
35
|
+
body: JSON.stringify(body),
|
|
36
|
+
signal: AbortSignal.timeout(30000),
|
|
37
|
+
});
|
|
38
|
+
const data = await response.text();
|
|
39
|
+
if (response.ok) {
|
|
40
|
+
return {
|
|
41
|
+
content: [
|
|
42
|
+
{
|
|
43
|
+
type: "text",
|
|
44
|
+
mimeType: `text/${format.toLowerCase()}`,
|
|
45
|
+
text: data,
|
|
46
|
+
},
|
|
47
|
+
],
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
return {
|
|
51
|
+
content: [
|
|
52
|
+
{
|
|
53
|
+
type: "text",
|
|
54
|
+
text: data,
|
|
55
|
+
},
|
|
56
|
+
],
|
|
57
|
+
isError: true,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
catch (error) {
|
|
61
|
+
console.error("Error calling API:", error);
|
|
62
|
+
}
|
|
63
|
+
const response = await fetch("https://api.scrapi.tech/v1/scrape", {
|
|
64
|
+
method: "POST",
|
|
65
|
+
headers: {
|
|
66
|
+
"User-Agent": `${SCRAPI_SERVER_NAME} - ${SCRAPI_SERVER_VERSION}`,
|
|
67
|
+
"Content-Type": "application/json",
|
|
68
|
+
"X-API-KEY": SCRAPI_API_KEY,
|
|
69
|
+
},
|
|
70
|
+
body: JSON.stringify(body),
|
|
71
|
+
signal: AbortSignal.timeout(30000),
|
|
72
|
+
});
|
|
73
|
+
const data = await response.text();
|
|
74
|
+
return {
|
|
75
|
+
content: [
|
|
76
|
+
{
|
|
77
|
+
type: "text",
|
|
78
|
+
mimeType: `text/${format.toLowerCase()}`,
|
|
79
|
+
text: data,
|
|
80
|
+
},
|
|
81
|
+
],
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
try {
|
|
85
|
+
const transport = new StdioServerTransport();
|
|
86
|
+
await server.connect(transport);
|
|
87
|
+
console.error(`${SCRAPI_SERVER_NAME} running on stdio.`);
|
|
88
|
+
}
|
|
89
|
+
catch (error) {
|
|
90
|
+
console.error("Fatal error running server:", error);
|
|
91
|
+
process.exit(1);
|
|
92
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@deventerprisesoftware/scrapi-mcp",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "MCP server for using ScrAPI to scrape web pages.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"mcp",
|
|
7
|
+
"scraper",
|
|
8
|
+
"scraping",
|
|
9
|
+
"extractor",
|
|
10
|
+
"crawler"
|
|
11
|
+
],
|
|
12
|
+
"homepage": "https://scrapi.tech",
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/DevEnterpriseSoftware/scrapi-mcp/issues"
|
|
15
|
+
},
|
|
16
|
+
"repository": {
|
|
17
|
+
"type": "git",
|
|
18
|
+
"url": "git+https://github.com/DevEnterpriseSoftware/scrapi-mcp.git"
|
|
19
|
+
},
|
|
20
|
+
"license": "MIT",
|
|
21
|
+
"author": "DevEnterprise Software (https://deventerprise.com)",
|
|
22
|
+
"type": "module",
|
|
23
|
+
"bin": {
|
|
24
|
+
"mcp-server-scrapi": "dist/index.js"
|
|
25
|
+
},
|
|
26
|
+
"files": [
|
|
27
|
+
"dist"
|
|
28
|
+
],
|
|
29
|
+
"scripts": {
|
|
30
|
+
"build": "tsc && shx chmod +x dist/*.js",
|
|
31
|
+
"prepare": "npm run build",
|
|
32
|
+
"watch": "tsc --watch"
|
|
33
|
+
},
|
|
34
|
+
"dependencies": {
|
|
35
|
+
"@modelcontextprotocol/sdk": "^1.9.0",
|
|
36
|
+
"zod": "^3.24.2"
|
|
37
|
+
},
|
|
38
|
+
"devDependencies": {
|
|
39
|
+
"@types/node": "^22.14.0",
|
|
40
|
+
"shx": "^0.4.0",
|
|
41
|
+
"typescript": "^5.8.3"
|
|
42
|
+
}
|
|
43
|
+
}
|