scrape-do-mcp 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README-ZH.md CHANGED
@@ -6,8 +6,15 @@ Scrape.do 网页抓取和 Google 搜索 MCP 服务器 - 支持反机器人保护
6
6
 
7
7
  ## 功能特点
8
8
 
9
- - **scrape_url**: 抓取任意网页并返回 Markdown 格式内容。自动绕过 Cloudflare、WAF、CAPTCHA 和反爬虫保护。支持 JavaScript 渲染页面。
10
- - **google_search**: 搜索 Google 并返回结构化的 SERP 结果 JSON。包含自然搜索结果、知识图谱、本地商家、新闻、相关问题(People Also Ask)等。
9
+ - **scrape_url**: 抓取任意网页并返回 Markdown 格式内容。自动绕过 Cloudflare、WAF、CAPTCHA 和反爬虫保护。支持 JavaScript 渲染、截图、地理定位(150+ 国家)、设备模拟、会话保持、自定义请求头/Cookie、超时控制等。
10
+ - **google_search**: 搜索 Google 并返回结构化的 SERP 结果 JSON。包含自然搜索结果、知识图谱、本地商家、新闻、相关问题等。支持地理定位和设备筛选。
11
+
12
+ ## 可用工具
13
+
14
+ | 工具 | 描述 |
15
+ |------|------|
16
+ | `scrape_url` | 全功能网页抓取,反机器人绕过。支持:JavaScript 渲染、截图(PNG)、地理定位(150+ 国家)、设备模拟(桌面/手机/平板)、会话保持、自定义请求头/Cookie、超时控制等。 |
17
+ | `google_search` | Google SERP 结构化抓取,返回 JSON。支持:自然搜索结果、知识图谱、本地商家、新闻、People Also Ask、视频结果等,支持地理定位、设备筛选、时间筛选。 |
11
18
 
12
19
  ## 安装
13
20
 
@@ -55,12 +62,52 @@ claude mcp add-json scrape-do --scope user '{
55
62
  抓取任意网页并获取 Markdown 内容。
56
63
 
57
64
  ```typescript
58
- // 参数
65
+ // 完整参数
59
66
  {
60
- url: string, // 要抓取的网址
61
- render_js?: boolean, // 渲染 JavaScript(默认 false)
62
- super_proxy?: boolean, // 使用住宅代理(消耗 10 积分,默认 false)
63
- output?: "markdown" | "raw" // 输出格式(默认 markdown)
67
+ // 必需
68
+ url: string, // 要抓取的网址
69
+
70
+ // 代理和渲染
71
+ render_js?: boolean, // 渲染 JavaScript(默认 false)
72
+ super_proxy?: boolean, // 使用住宅/移动代理(消耗 10 积分)
73
+ geoCode?: string, // 国家代码(如 'us', 'cn', 'gb')
74
+ regionalGeoCode?: string, // 区域(如 'asia', 'europe')
75
+ device?: "desktop" | "mobile" | "tablet", // 设备类型
76
+ sessionId?: number, // 保持相同 IP 的会话
77
+
78
+ // 超时和重试
79
+ timeout?: number, // 最大超时时间(毫秒,默认 60000)
80
+ retryTimeout?: number, // 重试超时(毫秒)
81
+ disableRetry?: boolean, // 禁用自动重试
82
+
83
+ // 输出格式
84
+ output?: "markdown" | "raw", // 输出格式(默认 markdown)
85
+ returnJSON?: boolean, // 以 JSON 形式返回网络请求
86
+ transparentResponse?: boolean, // 返回原始响应
87
+
88
+ // 截图
89
+ screenshot?: boolean, // 截图(PNG)
90
+ fullScreenShot?: boolean, // 全页截图
91
+ particularScreenShot?: string, // 元素截图(CSS 选择器)
92
+
93
+ // 浏览器控制
94
+ waitSelector?: string, // 等待元素(CSS 选择器)
95
+ customWait?: number, // 加载后等待时间(毫秒)
96
+ waitUntil?: "domcontentloaded" | "load" | "networkidle" | "networkidle0" | "networkidle2",
97
+ width?: number, // 视口宽度(默认 1920)
98
+ height?: number, // 视口高度(默认 1080)
99
+ blockResources?: boolean, // 阻止 CSS/图片/字体(默认 true)
100
+
101
+ // 请求头和 Cookie
102
+ customHeaders?: boolean, // 处理所有请求头
103
+ extraHeaders?: boolean, // 添加额外请求头
104
+ forwardHeaders?: boolean, // 转发你的请求头
105
+ setCookies?: string, // 设置 Cookie(格式:'name=value; name2=value2')
106
+ pureCookies?: boolean, // 返回原始 Cookie
107
+
108
+ // 其他
109
+ disableRedirection?: boolean, // 禁用重定向
110
+ callback?: string // Webhook URL 异步接收结果
64
111
  }
65
112
  ```
66
113
 
@@ -69,14 +116,22 @@ claude mcp add-json scrape-do --scope user '{
69
116
  搜索 Google 并获取结构化结果。
70
117
 
71
118
  ```typescript
72
- // 参数
119
+ // 完整参数
73
120
  {
74
- query: string, // 搜索关键词
75
- country?: string, // 国家代码(默认 "us")
76
- language?: string, // 界面语言(默认 "en")
77
- page?: number, // 页码(默认 1)
121
+ // 必需
122
+ query: string, // 搜索关键词
123
+
124
+ // 搜索选项
125
+ country?: string, // 国家代码(默认 'us')
126
+ language?: string, // 界面语言(默认 'en')
127
+ domain?: string, // Google 域名(如 'com', 'co.uk')
128
+ page?: number, // 页码(默认 1)
129
+ num?: number, // 每页结果数(默认 10)
78
130
  time_period?: "" | "last_hour" | "last_day" | "last_week" | "last_month" | "last_year",
79
- device?: "desktop" | "mobile" // 设备类型(默认 desktop)
131
+ device?: "desktop" | "mobile", // 设备类型
132
+
133
+ // 高级
134
+ includeHtml?: boolean // 在响应中包含原始 HTML
80
135
  }
81
136
  ```
82
137
 
@@ -108,6 +163,31 @@ claude mcp add-json scrape-do --scope user '{
108
163
  抓取 https://example.com 并返回原始 HTML 而不是 markdown。
109
164
  ```
110
165
 
166
+ ### 地理定位抓取
167
+ ```
168
+ 用日本(geoCode: jp)的 IP 抓取 https://www.amazon.com/product/12345
169
+ ```
170
+
171
+ ### 移动设备模拟
172
+ ```
173
+ 用移动设备抓取 https://example.com 来查看移动版页面。
174
+ ```
175
+
176
+ ### 截图
177
+ ```
178
+ 截取 https://example.com 的屏幕截图并返回图片。
179
+ ```
180
+
181
+ ### 等待元素加载
182
+ ```
183
+ 抓取 https://example.com 但先等待 id 为 "content" 的元素加载完成。
184
+ ```
185
+
186
+ ### 会话保持
187
+ ```
188
+ 使用会话 ID 12345 抓取 https://example.com 的多个页面,以保持相同的 IP。
189
+ ```
190
+
111
191
  ## 与其他工具对比
112
192
 
113
193
  | 功能 | scrape-do-mcp | Firecrawl | Browserbase |
@@ -133,7 +213,7 @@ claude mcp add-json scrape-do --scope user '{
133
213
  | scrape_url(super_proxy) | 10 积分/次 |
134
214
  | google_search | 1 积分/次 |
135
215
 
136
- 注册即送 **1,000 积分**:https://app.scrape.do
216
+ **免费:每月 1,000 积分** - 无需信用卡:https://app.scrape.do
137
217
 
138
218
  ## 开发
139
219
 
package/README.md CHANGED
@@ -6,8 +6,15 @@ MCP Server for Scrape.do - Web Scraping & Google Search with anti-bot bypass
6
6
 
7
7
  ## Features
8
8
 
9
- - **scrape_url**: Scrape any webpage and return content as Markdown. Automatically bypasses Cloudflare, WAFs, CAPTCHAs, and anti-bot protection. Supports JavaScript-rendered pages.
10
- - **google_search**: Search Google and return structured SERP results as JSON. Returns organic results, knowledge graph, local businesses, news stories, and more.
9
+ - **scrape_url**: Scrape any webpage and return content as Markdown. Automatically bypasses Cloudflare, WAFs, CAPTCHAs, and anti-bot protection. Supports JavaScript rendering, screenshots, geo-targeting (150+ countries), device emulation, session persistence, and more.
10
+ - **google_search**: Search Google and return structured SERP results as JSON. Returns organic results, knowledge graph, local businesses, news stories, and more. Supports geo-targeting and device filtering.
11
+
12
+ ## Available Tools
13
+
14
+ | Tool | Description |
15
+ |------|-------------|
16
+ | `scrape_url` | Full-featured web scraping with anti-bot bypass. Supports: JavaScript rendering, screenshots (PNG), geo-targeting (150+ countries), device emulation (desktop/mobile/tablet), session persistence, custom headers/cookies, timeout control, and more. |
17
+ | `google_search` | Google SERP scraping returning structured JSON. Supports: organic results, knowledge graph, local businesses, news, People Also Ask, video results, geo-targeting, device filtering, and time-based filtering. |
11
18
 
12
19
  ## Installation
13
20
 
@@ -55,12 +62,52 @@ Get your free API token at: https://app.scrape.do
55
62
  Scrape any webpage and get content as Markdown.
56
63
 
57
64
  ```typescript
58
- // Parameters
65
+ // All Parameters
59
66
  {
60
- url: string, // Target URL to scrape
61
- render_js?: boolean, // Render JavaScript (default: false)
62
- super_proxy?: boolean, // Use residential proxies (costs 10 credits, default: false)
63
- output?: "markdown" | "raw" // Output format (default: markdown)
67
+ // Required
68
+ url: string, // Target URL to scrape
69
+
70
+ // Proxy & Rendering
71
+ render_js?: boolean, // Render JavaScript (default: false)
72
+ super_proxy?: boolean, // Use residential/mobile proxies (costs 10 credits)
73
+ geoCode?: string, // Country code (e.g., 'us', 'cn', 'gb')
74
+ regionalGeoCode?: string, // Region (e.g., 'asia', 'europe')
75
+ device?: "desktop" | "mobile" | "tablet", // Device type
76
+ sessionId?: number, // Keep same IP for session
77
+
78
+ // Timeout & Retry
79
+ timeout?: number, // Max timeout in ms (default: 60000)
80
+ retryTimeout?: number, // Retry timeout in ms
81
+ disableRetry?: boolean, // Disable auto retry
82
+
83
+ // Output Format
84
+ output?: "markdown" | "raw", // Output format (default: markdown)
85
+ returnJSON?: boolean, // Return network requests as JSON
86
+ transparentResponse?: boolean, // Return pure response
87
+
88
+ // Screenshot
89
+ screenshot?: boolean, // Take screenshot (PNG)
90
+ fullScreenShot?: boolean, // Full page screenshot
91
+ particularScreenShot?: string, // Screenshot of element (CSS selector)
92
+
93
+ // Browser Control
94
+ waitSelector?: string, // Wait for element (CSS selector)
95
+ customWait?: number, // Wait time after load (ms)
96
+ waitUntil?: "domcontentloaded" | "load" | "networkidle" | "networkidle0" | "networkidle2",
97
+ width?: number, // Viewport width (default: 1920)
98
+ height?: number, // Viewport height (default: 1080)
99
+ blockResources?: boolean, // Block CSS/images/fonts (default: true)
100
+
101
+ // Headers & Cookies
102
+ customHeaders?: boolean, // Handle all headers
103
+ extraHeaders?: boolean, // Add extra headers
104
+ forwardHeaders?: boolean, // Forward your headers
105
+ setCookies?: string, // Set cookies ('name=value; name2=value2')
106
+ pureCookies?: boolean, // Return original cookies
107
+
108
+ // Other
109
+ disableRedirection?: boolean, // Disable redirect
110
+ callback?: string // Webhook URL for async results
64
111
  }
65
112
  ```
66
113
 
@@ -69,14 +116,22 @@ Scrape any webpage and get content as Markdown.
69
116
  Search Google and get structured results.
70
117
 
71
118
  ```typescript
72
- // Parameters
119
+ // All Parameters
73
120
  {
74
- query: string, // Search query
75
- country?: string, // Country code (default: "us")
76
- language?: string, // Interface language (default: "en")
77
- page?: number, // Page number (default: 1)
121
+ // Required
122
+ query: string, // Search query
123
+
124
+ // Search Options
125
+ country?: string, // Country code (default: 'us')
126
+ language?: string, // Interface language (default: 'en')
127
+ domain?: string, // Google domain (e.g., 'com', 'co.uk')
128
+ page?: number, // Page number (default: 1)
129
+ num?: number, // Results per page (default: 10)
78
130
  time_period?: "" | "last_hour" | "last_day" | "last_week" | "last_month" | "last_year",
79
- device?: "desktop" | "mobile" // Device type (default: desktop)
131
+ device?: "desktop" | "mobile", // Device type
132
+
133
+ // Advanced
134
+ includeHtml?: boolean // Include raw HTML in response
80
135
  }
81
136
  ```
82
137
 
@@ -110,6 +165,31 @@ Use render_js=true to get the fully rendered content.
110
165
  Scrape https://example.com and return raw HTML instead of markdown.
111
166
  ```
112
167
 
168
+ ### Geo-targeting
169
+ ```
170
+ Scrape https://www.amazon.com/product/12345 as if I'm in Japan (geoCode: jp)
171
+ ```
172
+
173
+ ### Mobile Device
174
+ ```
175
+ Scrape https://example.com using a mobile device to see the mobile version.
176
+ ```
177
+
178
+ ### Take Screenshot
179
+ ```
180
+ Take a screenshot of https://example.com and return the image.
181
+ ```
182
+
183
+ ### Wait for Element
184
+ ```
185
+ Scrape https://example.com but wait for the element with id "content" to load first.
186
+ ```
187
+
188
+ ### Session Persistence
189
+ ```
190
+ Scrape multiple pages of https://example.com using sessionId 12345 to maintain the same IP.
191
+ ```
192
+
113
193
  ## Comparison with Alternatives
114
194
 
115
195
  | Feature | scrape-do-mcp | Firecrawl | Browserbase |
@@ -135,7 +215,7 @@ Scrape https://example.com and return raw HTML instead of markdown.
135
215
  | scrape_url (super_proxy) | 10 credits/request |
136
216
  | google_search | 1 credit/request |
137
217
 
138
- Free registration includes **1,000 credits**: https://app.scrape.do
218
+ **Free: 1,000 credits/month** - No credit card required: https://app.scrape.do
139
219
 
140
220
  ## Development
141
221
 
package/dist/index.js CHANGED
@@ -12,32 +12,126 @@ const SCRAPE_DO_TOKEN = process.env.SCRAPE_DO_TOKEN || "";
12
12
  const SCRAPE_API_BASE = "https://api.scrape.do";
13
13
  const server = new mcp_js_1.McpServer({
14
14
  name: "scrape-do-mcp",
15
- version: "0.1.3",
15
+ version: "0.2.0",
16
16
  });
17
17
  // ─── Tool 1: scrape_url ──────────────────────────────────────────────────────
18
- server.tool("scrape_url", "Scrape any webpage and return its content as Markdown. Automatically bypasses Cloudflare, WAFs, CAPTCHAs, and anti-bot protection. Supports JavaScript-rendered pages.", {
18
+ server.tool("scrape_url", "Scrape any webpage and return its content as Markdown. Automatically bypasses Cloudflare, WAFs, CAPTCHAs, and anti-bot protection. Supports JavaScript-rendered pages, screenshots, geo-targeting, and more.", {
19
+ // Required
19
20
  url: zod_1.z.string().url().describe("The target URL to scrape"),
21
+ // Proxy & Rendering
20
22
  render_js: zod_1.z.boolean().optional().default(false).describe("Render JavaScript (use for React/Vue/SPA pages)"),
21
- super_proxy: zod_1.z.boolean().optional().default(false).describe("Use residential/mobile proxies for harder-to-detect requests (costs 10 credits instead of 1)"),
23
+ super_proxy: zod_1.z.boolean().optional().default(false).describe("Use residential & mobile proxy networks (costs 10 credits instead of 1)"),
24
+ geoCode: zod_1.z.string().optional().describe("Country code for geo-targeting (e.g., 'us', 'cn', 'gb', 'jp'). See full list at https://scrape.do/features/geo-targeting/"),
25
+ regionalGeoCode: zod_1.z.string().optional().describe("Regional geo targeting (e.g., 'asia', 'europe', 'africa')"),
26
+ device: zod_1.z.enum(["desktop", "mobile", "tablet"]).optional().default("desktop").describe("Device type to emulate"),
27
+ sessionId: zod_1.z.number().optional().describe("Use the same IP address continuously with a session (0-999999999)"),
28
+ // Timeout & Retry
29
+ timeout: zod_1.z.number().optional().default(60000).describe("Maximum timeout for request in milliseconds (max 120000)"),
30
+ retryTimeout: zod_1.z.number().optional().describe("Maximum timeout for retry mechanism in milliseconds"),
31
+ disableRetry: zod_1.z.boolean().optional().default(false).describe("Disable automatic retry on failure"),
32
+ // Output Format
22
33
  output: zod_1.z.enum(["markdown", "raw"]).optional().default("markdown").describe("Output format: markdown (default) or raw HTML"),
23
- }, async ({ url, render_js, super_proxy, output }) => {
34
+ returnJSON: zod_1.z.boolean().optional().default(false).describe("Returns network requests with content as JSON"),
35
+ transparentResponse: zod_1.z.boolean().optional().default(false).describe("Return pure response without Scrape.do processing"),
36
+ // Screenshot
37
+ screenshot: zod_1.z.boolean().optional().default(false).describe("Return a screenshot from the webpage (PNG)"),
38
+ fullScreenShot: zod_1.z.boolean().optional().default(false).describe("Return a full page screenshot"),
39
+ particularScreenShot: zod_1.z.string().optional().describe("Return screenshot of a specific area (CSS selector)"),
40
+ // Browser Control
41
+ waitSelector: zod_1.z.string().optional().describe("CSS selector to wait for before returning"),
42
+ customWait: zod_1.z.number().optional().describe("Wait time in milliseconds after content loaded"),
43
+ waitUntil: zod_1.z.enum(["domcontentloaded", "load", "networkidle", "networkidle0", "networkidle2"]).optional().default("domcontentloaded").describe("When to consider page loaded"),
44
+ width: zod_1.z.number().optional().default(1920).describe("Browser viewport width in pixels"),
45
+ height: zod_1.z.number().optional().default(1080).describe("Browser viewport height in pixels"),
46
+ blockResources: zod_1.z.boolean().optional().default(true).describe("Block CSS, images, and fonts to speed up loading"),
47
+ // Headers & Cookies
48
+ customHeaders: zod_1.z.boolean().optional().default(false).describe("Handle all request headers for the target webpage"),
49
+ extraHeaders: zod_1.z.boolean().optional().default(false).describe("Add extra headers or change header values"),
50
+ forwardHeaders: zod_1.z.boolean().optional().default(false).describe("Forward your own headers to the target website"),
51
+ setCookies: zod_1.z.string().optional().describe("Set cookies for the target webpage (format: 'name=value; name2=value2')"),
52
+ pureCookies: zod_1.z.boolean().optional().default(false).describe("Return original Set-Cookie headers from target website"),
53
+ // Other
54
+ disableRedirection: zod_1.z.boolean().optional().default(false).describe("Disable request redirection"),
55
+ callback: zod_1.z.string().optional().describe("Get results via webhook URL without waiting"),
56
+ }, async (params) => {
24
57
  if (!SCRAPE_DO_TOKEN) {
25
58
  return {
26
59
  content: [{ type: "text", text: "Error: SCRAPE_DO_TOKEN is not set. Get your free token at https://app.scrape.do" }],
27
60
  isError: true,
28
61
  };
29
62
  }
63
+ const { url, render_js, super_proxy, geoCode, regionalGeoCode, device, sessionId, timeout, retryTimeout, disableRetry, output, returnJSON, transparentResponse, screenshot, fullScreenShot, particularScreenShot, waitSelector, customWait, waitUntil, width, height, blockResources, customHeaders, extraHeaders, forwardHeaders, setCookies, pureCookies, disableRedirection, callback, } = params;
30
64
  try {
65
+ const requestParams = {
66
+ token: SCRAPE_DO_TOKEN,
67
+ url,
68
+ render: render_js,
69
+ super: super_proxy,
70
+ output,
71
+ };
72
+ // Add optional parameters if provided
73
+ if (geoCode)
74
+ requestParams.geoCode = geoCode;
75
+ if (regionalGeoCode)
76
+ requestParams.regionalGeoCode = regionalGeoCode;
77
+ if (device && device !== "desktop")
78
+ requestParams.device = device;
79
+ if (sessionId)
80
+ requestParams.sessionId = sessionId;
81
+ if (timeout && timeout !== 60000)
82
+ requestParams.timeout = timeout;
83
+ if (retryTimeout)
84
+ requestParams.retryTimeout = retryTimeout;
85
+ if (disableRetry)
86
+ requestParams.disableRetry = disableRetry;
87
+ if (returnJSON)
88
+ requestParams.returnJSON = returnJSON;
89
+ if (transparentResponse)
90
+ requestParams.transparentResponse = transparentResponse;
91
+ if (screenshot)
92
+ requestParams.screenShot = screenshot;
93
+ if (fullScreenShot)
94
+ requestParams.fullScreenShot = fullScreenShot;
95
+ if (particularScreenShot)
96
+ requestParams.particularScreenShot = particularScreenShot;
97
+ if (waitSelector)
98
+ requestParams.waitSelector = waitSelector;
99
+ if (customWait)
100
+ requestParams.customWait = customWait;
101
+ if (waitUntil && waitUntil !== "domcontentloaded")
102
+ requestParams.waitUntil = waitUntil;
103
+ if (width && width !== 1920)
104
+ requestParams.width = width;
105
+ if (height && height !== 1080)
106
+ requestParams.height = height;
107
+ if (blockResources === false)
108
+ requestParams.blockResources = false;
109
+ if (customHeaders)
110
+ requestParams.customHeaders = customHeaders;
111
+ if (extraHeaders)
112
+ requestParams.extraHeaders = extraHeaders;
113
+ if (forwardHeaders)
114
+ requestParams.forwardHeaders = forwardHeaders;
115
+ if (setCookies)
116
+ requestParams.setCookies = setCookies;
117
+ if (pureCookies)
118
+ requestParams.pureCookies = pureCookies;
119
+ if (disableRedirection)
120
+ requestParams.disableRedirection = disableRedirection;
121
+ if (callback)
122
+ requestParams.callback = callback;
31
123
  const response = await axios_1.default.get(SCRAPE_API_BASE, {
32
- params: {
33
- token: SCRAPE_DO_TOKEN,
34
- url,
35
- render: render_js,
36
- super: super_proxy,
37
- output,
38
- },
39
- timeout: 60000,
124
+ params: requestParams,
125
+ timeout: Math.min(timeout || 60000, 120000),
126
+ responseType: screenshot || fullScreenShot || particularScreenShot ? 'arraybuffer' : 'text',
40
127
  });
128
+ // If screenshot, return as base64
129
+ if (screenshot || fullScreenShot || particularScreenShot) {
130
+ const base64 = Buffer.from(response.data, 'binary').toString('base64');
131
+ return {
132
+ content: [{ type: "text", text: `Screenshot (base64): ${base64}` }],
133
+ };
134
+ }
41
135
  return {
42
136
  content: [{ type: "text", text: response.data }],
43
137
  };
@@ -52,13 +146,19 @@ server.tool("scrape_url", "Scrape any webpage and return its content as Markdown
52
146
  });
53
147
  // ─── Tool 2: google_search ───────────────────────────────────────────────────
54
148
  server.tool("google_search", "Search Google and return structured SERP results as JSON. Returns organic results, knowledge graph, local businesses, news stories, related questions (People Also Ask), video results, and more.", {
149
+ // Required
55
150
  query: zod_1.z.string().describe("Search query, e.g. 'best python frameworks 2026'"),
56
- country: zod_1.z.string().optional().default("us").describe("Country code for results, e.g. 'us', 'cn', 'gb', 'jp'"),
57
- language: zod_1.z.string().optional().default("en").describe("Interface language, e.g. 'en', 'zh', 'ja', 'de'"),
151
+ // Search Options
152
+ country: zod_1.z.string().optional().default("us").describe("Country code for results (e.g., 'us', 'cn', 'gb', 'jp'). See: https://scrape.do/features/geo-targeting/"),
153
+ language: zod_1.z.string().optional().default("en").describe("Interface language (e.g., 'en', 'zh', 'ja', 'de')"),
154
+ domain: zod_1.z.string().optional().describe("Google domain (e.g., 'com', 'co.uk', 'de', 'fr')"),
58
155
  page: zod_1.z.number().optional().default(1).describe("Page number (1 = first page, 2 = second page)"),
59
156
  time_period: zod_1.z.enum(["", "last_hour", "last_day", "last_week", "last_month", "last_year"]).optional().default("").describe("Filter results by time period"),
60
157
  device: zod_1.z.enum(["desktop", "mobile"]).optional().default("desktop").describe("Device type affecting SERP layout"),
61
- }, async ({ query, country, language, page, time_period, device }) => {
158
+ // Advanced
159
+ num: zod_1.z.number().optional().describe("Number of results per page (default: 10)"),
160
+ includeHtml: zod_1.z.boolean().optional().default(false).describe("Include raw HTML alongside parsed JSON"),
161
+ }, async ({ query, country, language, domain, page, time_period, device, num, includeHtml }) => {
62
162
  if (!SCRAPE_DO_TOKEN) {
63
163
  return {
64
164
  content: [{ type: "text", text: "Error: SCRAPE_DO_TOKEN is not set. Get your free token at https://app.scrape.do" }],
@@ -71,9 +171,15 @@ server.tool("google_search", "Search Google and return structured SERP results a
71
171
  q: query,
72
172
  gl: country,
73
173
  hl: language,
74
- start: (page - 1) * 10,
174
+ start: (page - 1) * (num || 10),
75
175
  device,
76
176
  };
177
+ if (domain)
178
+ params.domain = domain;
179
+ if (num)
180
+ params.num = num;
181
+ if (includeHtml)
182
+ params.include_html = includeHtml;
77
183
  if (time_period)
78
184
  params.time_period = time_period;
79
185
  const response = await axios_1.default.get(`${SCRAPE_API_BASE}/plugin/google/search`, {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scrape-do-mcp",
3
- "version": "0.1.5",
3
+ "version": "0.2.0",
4
4
  "description": "MCP Server for Scrape.do - Web Scraping & Google Search with anti-bot bypass",
5
5
  "main": "dist/index.js",
6
6
  "bin": {