scrape-do-mcp 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +99 -0
  2. package/dist/index.js +46 -3
  3. package/package.json +7 -2
package/README.md ADDED
@@ -0,0 +1,99 @@
1
+ # scrape-do-mcp
2
+
3
+ MCP Server for Scrape.do - Web Scraping & Google Search with anti-bot bypass
4
+
5
+ ## Features
6
+
7
+ - **scrape_url**: Scrape any webpage and return content as Markdown. Automatically bypasses Cloudflare, WAFs, CAPTCHAs, and anti-bot protection. Supports JavaScript-rendered pages.
8
+ - **google_search**: Search Google and return structured SERP results as JSON. Returns organic results, knowledge graph, local businesses, news stories, and more.
9
+
10
+ ## Installation
11
+
12
+ ### Claude Code / Claude Desktop
13
+
14
+ Add to your `~/.claude.json`:
15
+
16
+ ```json
17
+ {
18
+ "mcpServers": {
19
+ "scrape-do": {
20
+ "command": "npx",
21
+ "args": ["-y", "scrape-do-mcp"],
22
+ "env": {
23
+ "SCRAPE_DO_TOKEN": "your_token_here"
24
+ }
25
+ }
26
+ }
27
+ }
28
+ ```
29
+
30
+ Get your free API token at: https://app.scrape.do
31
+
32
+ ### Smithery.ai
33
+
34
+ Published on [Smithery.ai](https://smithery.ai) - Search for "scrape-do" to install.
35
+
36
+ ### HTTP Server Mode
37
+
38
+ The server supports both STDIO and HTTP modes:
39
+
40
+ - **STDIO mode** (default): For local Claude Code / Claude Desktop usage
41
+ - **HTTP mode**: For Smithery托管或 custom HTTP deployment
42
+
43
+ ```bash
44
+ # HTTP mode
45
+ TRANSPORT=http PORT=3000 SCRAPE_DO_TOKEN=your_token npm start
46
+
47
+ # Health check
48
+ curl http://localhost:3000/health
49
+ ```
50
+
51
+ ## Usage
52
+
53
+ ### scrape_url
54
+
55
+ ```typescript
56
+ // Parameters
57
+ {
58
+ url: string, // Target URL to scrape
59
+ render_js?: boolean, // Render JavaScript (default: false)
60
+ super_proxy?: boolean, // Use residential proxies (costs 10 credits, default: false)
61
+ output?: "markdown" | "raw" // Output format (default: markdown)
62
+ }
63
+ ```
64
+
65
+ ### google_search
66
+
67
+ ```typescript
68
+ // Parameters
69
+ {
70
+ query: string, // Search query
71
+ country?: string, // Country code (default: "us")
72
+ language?: string, // Interface language (default: "en")
73
+ page?: number, // Page number (default: 1)
74
+ time_period?: "" | "last_hour" | "last_day" | "last_week" | "last_month" | "last_year",
75
+ device?: "desktop" | "mobile" // Device type (default: desktop)
76
+ }
77
+ ```
78
+
79
+ ## Credit Usage
80
+
81
+ | Tool | Credit Cost |
82
+ |------|-------------|
83
+ | scrape_url (regular) | 1 credit/request |
84
+ | scrape_url (super_proxy) | 10 credits/request |
85
+ | google_search | 1 credit/request |
86
+
87
+ Free registration includes **1,000 credits**: https://app.scrape.do
88
+
89
+ ## Development
90
+
91
+ ```bash
92
+ npm install
93
+ npm run build
94
+ npm run dev # Run in development mode
95
+ ```
96
+
97
+ ## License
98
+
99
+ MIT
package/dist/index.js CHANGED
@@ -6,13 +6,16 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
6
6
  Object.defineProperty(exports, "__esModule", { value: true });
7
7
  const mcp_js_1 = require("@modelcontextprotocol/sdk/server/mcp.js");
8
8
  const stdio_js_1 = require("@modelcontextprotocol/sdk/server/stdio.js");
9
+ const streamableHttp_js_1 = require("@modelcontextprotocol/sdk/server/streamableHttp.js");
9
10
  const zod_1 = require("zod");
10
11
  const axios_1 = __importDefault(require("axios"));
12
+ const http_1 = __importDefault(require("http"));
11
13
  const SCRAPE_DO_TOKEN = process.env.SCRAPE_DO_TOKEN || "";
12
14
  const SCRAPE_API_BASE = "https://api.scrape.do";
15
+ const HTTP_PORT = process.env.PORT || process.env.HTTP_PORT || 3000;
13
16
  const server = new mcp_js_1.McpServer({
14
17
  name: "scrape-do-mcp",
15
- version: "0.1.0",
18
+ version: "0.1.1",
16
19
  });
17
20
  // ─── Tool 1: scrape_url ──────────────────────────────────────────────────────
18
21
  server.tool("scrape_url", "Scrape any webpage and return its content as Markdown. Automatically bypasses Cloudflare, WAFs, CAPTCHAs, and anti-bot protection. Supports JavaScript-rendered pages.", {
@@ -94,7 +97,47 @@ server.tool("google_search", "Search Google and return structured SERP results a
94
97
  });
95
98
  // ─── Start Server ────────────────────────────────────────────────────────────
96
99
  async function main() {
97
- const transport = new stdio_js_1.StdioServerTransport();
98
- await server.connect(transport);
100
+ const transportMode = process.env.TRANSPORT || "stdio";
101
+ if (transportMode === "http" || transportMode === "streamable-http") {
102
+ console.error(`Starting Streamable HTTP server on port ${HTTP_PORT}...`);
103
+ const transport = new streamableHttp_js_1.StreamableHTTPServerTransport({
104
+ sessionIdGenerator: () => Math.random().toString(36).substring(2, 15),
105
+ });
106
+ await server.connect(transport);
107
+ const serverInstance = http_1.default.createServer();
108
+ serverInstance.on("request", async (req, res) => {
109
+ // Handle CORS
110
+ res.setHeader("Access-Control-Allow-Origin", "*");
111
+ res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
112
+ res.setHeader("Access-Control-Allow-Headers", "Content-Type");
113
+ if (req.method === "OPTIONS") {
114
+ res.writeHead(204);
115
+ res.end();
116
+ return;
117
+ }
118
+ // Health check
119
+ if (req.url === "/health") {
120
+ res.writeHead(200, { "Content-Type": "application/json" });
121
+ res.end(JSON.stringify({ status: "ok", name: "scrape-do-mcp", version: "0.1.1" }));
122
+ return;
123
+ }
124
+ // MCP endpoint
125
+ if (req.url === "/" || req.url?.startsWith("/mcp")) {
126
+ await transport.handleRequest(req, res);
127
+ return;
128
+ }
129
+ res.writeHead(404);
130
+ res.end("Not found");
131
+ });
132
+ serverInstance.listen(parseInt(String(HTTP_PORT), 10), () => {
133
+ console.error(`MCP server running on http://localhost:${HTTP_PORT}`);
134
+ });
135
+ }
136
+ else {
137
+ // Default to stdio mode
138
+ console.error("Starting STDIO server...");
139
+ const transport = new stdio_js_1.StdioServerTransport();
140
+ await server.connect(transport);
141
+ }
99
142
  }
100
143
  main().catch(console.error);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scrape-do-mcp",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "MCP Server for Scrape.do - Web Scraping & Google Search with anti-bot bypass",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -19,8 +19,13 @@
19
19
  "firecrawl-alternative"
20
20
  ],
21
21
  "license": "MIT",
22
+ "repository": {
23
+ "type": "git",
24
+ "url": "https://github.com/dztabel-happy/scrape-do-mcp.git"
25
+ },
22
26
  "files": [
23
- "dist"
27
+ "dist",
28
+ "README.md"
24
29
  ],
25
30
  "engines": {
26
31
  "node": ">=18.0.0"