scrape-do-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +100 -0
- package/package.json +38 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
4
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
5
|
+
};
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
const mcp_js_1 = require("@modelcontextprotocol/sdk/server/mcp.js");
|
|
8
|
+
const stdio_js_1 = require("@modelcontextprotocol/sdk/server/stdio.js");
|
|
9
|
+
const zod_1 = require("zod");
|
|
10
|
+
const axios_1 = __importDefault(require("axios"));
|
|
11
|
+
const SCRAPE_DO_TOKEN = process.env.SCRAPE_DO_TOKEN || "";
|
|
12
|
+
const SCRAPE_API_BASE = "https://api.scrape.do";
|
|
13
|
+
const server = new mcp_js_1.McpServer({
|
|
14
|
+
name: "scrape-do-mcp",
|
|
15
|
+
version: "0.1.0",
|
|
16
|
+
});
|
|
17
|
+
// ─── Tool 1: scrape_url ──────────────────────────────────────────────────────
|
|
18
|
+
server.tool("scrape_url", "Scrape any webpage and return its content as Markdown. Automatically bypasses Cloudflare, WAFs, CAPTCHAs, and anti-bot protection. Supports JavaScript-rendered pages.", {
|
|
19
|
+
url: zod_1.z.string().url().describe("The target URL to scrape"),
|
|
20
|
+
render_js: zod_1.z.boolean().optional().default(false).describe("Render JavaScript (use for React/Vue/SPA pages)"),
|
|
21
|
+
super_proxy: zod_1.z.boolean().optional().default(false).describe("Use residential/mobile proxies for harder-to-detect requests (costs 10 credits instead of 1)"),
|
|
22
|
+
output: zod_1.z.enum(["markdown", "raw"]).optional().default("markdown").describe("Output format: markdown (default) or raw HTML"),
|
|
23
|
+
}, async ({ url, render_js, super_proxy, output }) => {
|
|
24
|
+
if (!SCRAPE_DO_TOKEN) {
|
|
25
|
+
return {
|
|
26
|
+
content: [{ type: "text", text: "Error: SCRAPE_DO_TOKEN is not set. Get your free token at https://app.scrape.do" }],
|
|
27
|
+
isError: true,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
try {
|
|
31
|
+
const response = await axios_1.default.get(SCRAPE_API_BASE, {
|
|
32
|
+
params: {
|
|
33
|
+
token: SCRAPE_DO_TOKEN,
|
|
34
|
+
url,
|
|
35
|
+
render: render_js,
|
|
36
|
+
super: super_proxy,
|
|
37
|
+
output,
|
|
38
|
+
},
|
|
39
|
+
timeout: 60000,
|
|
40
|
+
});
|
|
41
|
+
return {
|
|
42
|
+
content: [{ type: "text", text: response.data }],
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
catch (error) {
|
|
46
|
+
const msg = error.response?.data || error.message;
|
|
47
|
+
return {
|
|
48
|
+
content: [{ type: "text", text: `Error: ${msg}` }],
|
|
49
|
+
isError: true,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
// ─── Tool 2: google_search ───────────────────────────────────────────────────
|
|
54
|
+
server.tool("google_search", "Search Google and return structured SERP results as JSON. Returns organic results, knowledge graph, local businesses, news stories, related questions (People Also Ask), video results, and more.", {
|
|
55
|
+
query: zod_1.z.string().describe("Search query, e.g. 'best python frameworks 2026'"),
|
|
56
|
+
country: zod_1.z.string().optional().default("us").describe("Country code for results, e.g. 'us', 'cn', 'gb', 'jp'"),
|
|
57
|
+
language: zod_1.z.string().optional().default("en").describe("Interface language, e.g. 'en', 'zh', 'ja', 'de'"),
|
|
58
|
+
page: zod_1.z.number().optional().default(1).describe("Page number (1 = first page, 2 = second page)"),
|
|
59
|
+
time_period: zod_1.z.enum(["", "last_hour", "last_day", "last_week", "last_month", "last_year"]).optional().default("").describe("Filter results by time period"),
|
|
60
|
+
device: zod_1.z.enum(["desktop", "mobile"]).optional().default("desktop").describe("Device type affecting SERP layout"),
|
|
61
|
+
}, async ({ query, country, language, page, time_period, device }) => {
|
|
62
|
+
if (!SCRAPE_DO_TOKEN) {
|
|
63
|
+
return {
|
|
64
|
+
content: [{ type: "text", text: "Error: SCRAPE_DO_TOKEN is not set. Get your free token at https://app.scrape.do" }],
|
|
65
|
+
isError: true,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
try {
|
|
69
|
+
const params = {
|
|
70
|
+
token: SCRAPE_DO_TOKEN,
|
|
71
|
+
q: query,
|
|
72
|
+
gl: country,
|
|
73
|
+
hl: language,
|
|
74
|
+
start: (page - 1) * 10,
|
|
75
|
+
device,
|
|
76
|
+
};
|
|
77
|
+
if (time_period)
|
|
78
|
+
params.time_period = time_period;
|
|
79
|
+
const response = await axios_1.default.get(`${SCRAPE_API_BASE}/plugin/google/search`, {
|
|
80
|
+
params,
|
|
81
|
+
timeout: 60000,
|
|
82
|
+
});
|
|
83
|
+
return {
|
|
84
|
+
content: [{ type: "text", text: JSON.stringify(response.data, null, 2) }],
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
catch (error) {
|
|
88
|
+
const msg = error.response?.data || error.message;
|
|
89
|
+
return {
|
|
90
|
+
content: [{ type: "text", text: `Error: ${msg}` }],
|
|
91
|
+
isError: true,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
});
|
|
95
|
+
// ─── Start Server ────────────────────────────────────────────────────────────
|
|
96
|
+
async function main() {
|
|
97
|
+
const transport = new stdio_js_1.StdioServerTransport();
|
|
98
|
+
await server.connect(transport);
|
|
99
|
+
}
|
|
100
|
+
main().catch(console.error);
|
package/package.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "scrape-do-mcp",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "MCP Server for Scrape.do - Web Scraping & Google Search with anti-bot bypass",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"scrape-do-mcp": "dist/index.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"build": "tsc",
|
|
11
|
+
"start": "node dist/index.js",
|
|
12
|
+
"dev": "ts-node src/index.ts"
|
|
13
|
+
},
|
|
14
|
+
"keywords": [
|
|
15
|
+
"mcp",
|
|
16
|
+
"scraping",
|
|
17
|
+
"web-scraper",
|
|
18
|
+
"google-search",
|
|
19
|
+
"firecrawl-alternative"
|
|
20
|
+
],
|
|
21
|
+
"license": "MIT",
|
|
22
|
+
"files": [
|
|
23
|
+
"dist"
|
|
24
|
+
],
|
|
25
|
+
"engines": {
|
|
26
|
+
"node": ">=18.0.0"
|
|
27
|
+
},
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
30
|
+
"axios": "^1.13.6",
|
|
31
|
+
"zod": "^4.3.6"
|
|
32
|
+
},
|
|
33
|
+
"devDependencies": {
|
|
34
|
+
"@types/node": "^25.5.0",
|
|
35
|
+
"ts-node": "^10.9.2",
|
|
36
|
+
"typescript": "^5.9.3"
|
|
37
|
+
}
|
|
38
|
+
}
|