scrape-do-mcp 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README-ZH.md CHANGED
@@ -14,8 +14,8 @@
14
14
  - `amazon_offer_listing`:Amazon 卖家报价接口。
15
15
  - `amazon_search`:Amazon 搜索 / 类目结果接口。
16
16
  - `amazon_raw_html`:Amazon 原始 HTML 接口。
17
- - `async_create_job`、`async_get_job`、`async_get_task`、`async_list_jobs`、`async_cancel_job`、`async_get_account`:Async API
18
- - `proxy_mode_config`:生成 Proxy Mode 的连接信息和参数字符串,不会在工具输出里泄露你的 token。
17
+ - `async_create_job`、`async_get_job`、`async_get_task`、`async_list_jobs`、`async_cancel_job`、`async_get_account`:Async API,并同时兼容 MCP 风格字段和官方字段名。
18
+ - `proxy_mode_config`:生成更贴近官方文档的 Proxy Mode 连接信息、默认参数串和证书信息。
19
19
 
20
20
  ## 兼容性说明
21
21
 
@@ -29,9 +29,13 @@
29
29
  - `language` 或 `hl`
30
30
  - `domain` 或 `google_domain`
31
31
  - `includeHtml` 或 `include_html`
32
+ - `async_create_job` 同时接受 `targets`、`render`、`webhookUrl` 这类别名,以及官方字段 `Targets`、`Render`、`WebhookURL`。
33
+ - `async_get_job`、`async_get_task`、`async_cancel_job` 同时接受 `jobId` / `taskId` 和官方 `jobID` / `taskID`。
34
+ - `async_list_jobs` 同时支持 `pageSize` 和官方 `page_size`。
32
35
  - `scrape_url` 里的 Header 转发请使用 `headers` + `header_mode`(`custom` / `extra` / `forward`)。
33
- - 截图结果会以 MCP 图片内容返回,而不是单纯的 base64 文本。
34
- - `scrape_url` 在未启用 ReturnJSON 时默认使用 `output="markdown"`,更适合 LLM 读取;如果你想更贴近原始 HTTP API 的行为,请手动设置 `output="raw"`。
36
+ - 截图结果会保留官方 JSON 响应,同时附加 MCP 图片内容,尽量兼顾官方格式和 MCP 可视化体验。
37
+ - `scrape_url` 现在默认使用 `output="raw"`,更贴近官方 API
38
+ - `scrape_url` 会在 `structuredContent` 里附带响应元数据,便于在 MCP 中查看 `pureCookies`、`transparentResponse` 和二进制响应信息。
35
39
 
36
40
  ## 安装
37
41
 
package/README.md CHANGED
@@ -14,8 +14,8 @@ Official docs: https://scrape.do/documentation/
14
14
  - `amazon_offer_listing`: Amazon offer listing endpoint.
15
15
  - `amazon_search`: Amazon search/category endpoint.
16
16
  - `amazon_raw_html`: Raw HTML Amazon endpoint with geo-targeting.
17
- - `async_create_job`, `async_get_job`, `async_get_task`, `async_list_jobs`, `async_cancel_job`, `async_get_account`: Async API coverage.
18
- - `proxy_mode_config`: Builds Proxy Mode connection details and parameter strings without exposing your token in tool output.
17
+ - `async_create_job`, `async_get_job`, `async_get_task`, `async_list_jobs`, `async_cancel_job`, `async_get_account`: Async API coverage with both MCP-friendly aliases and official field names.
18
+ - `proxy_mode_config`: Builds official Proxy Mode connection details, default parameter strings, and CA certificate references.
19
19
 
20
20
  ## Compatibility Notes
21
21
 
@@ -29,9 +29,13 @@ Official docs: https://scrape.do/documentation/
29
29
  - `language` or `hl`
30
30
  - `domain` or `google_domain`
31
31
  - `includeHtml` or `include_html`
32
+ - `async_create_job` accepts both alias fields like `targets`, `render`, `webhookUrl` and official Async API fields like `Targets`, `Render`, `WebhookURL`.
33
+ - `async_get_job`, `async_get_task`, and `async_cancel_job` accept both `jobId`/`taskId` and official `jobID`/`taskID`.
34
+ - `async_list_jobs` accepts both `pageSize` and official `page_size`.
32
35
  - For header forwarding in `scrape_url`, pass `headers` plus `header_mode` (`custom`, `extra`, or `forward`).
33
- - Screenshot responses are returned as MCP image content instead of plain base64 text.
34
- - `scrape_url` defaults to `output="markdown"` when ReturnJSON is not used so the tool stays LLM-friendly. Set `output="raw"` if you want the raw API-style output.
36
+ - Screenshot responses preserve the official Scrape.do JSON body and also attach MCP image content when screenshots are present.
37
+ - `scrape_url` now defaults to `output="raw"` to match the official API more closely.
38
+ - `scrape_url` includes response metadata in `structuredContent`, which helps surface `pureCookies`, `transparentResponse`, and binary responses inside MCP.
35
39
 
36
40
  ## Installation
37
41
 
package/dist/index.js CHANGED
@@ -8,7 +8,7 @@ const axios_1 = __importDefault(require("axios"));
8
8
  const mcp_js_1 = require("@modelcontextprotocol/sdk/server/mcp.js");
9
9
  const stdio_js_1 = require("@modelcontextprotocol/sdk/server/stdio.js");
10
10
  const zod_1 = require("zod");
11
- const SERVER_VERSION = "0.3.0";
11
+ const SERVER_VERSION = "0.4.0";
12
12
  const SCRAPE_DO_TOKEN = process.env.SCRAPE_DO_TOKEN || "";
13
13
  const SCRAPE_API_BASE = "https://api.scrape.do";
14
14
  const ASYNC_API_BASE = "https://q.scrape.do";
@@ -70,18 +70,29 @@ function createTextResult(text, structuredContent) {
70
70
  ...(structuredContent ? { structuredContent } : {}),
71
71
  };
72
72
  }
73
- function createJsonResult(value) {
73
+ function createJsonResult(value, options) {
74
+ const rawText = options?.rawText ?? JSON.stringify(value, null, 2);
75
+ const responseMetadata = options?.response ? createResponseMetadata(options.response) : undefined;
74
76
  if (isRecord(value)) {
75
77
  return {
76
- content: [{ type: "text", text: JSON.stringify(value, null, 2) }],
77
- structuredContent: value,
78
+ content: [{ type: "text", text: rawText }],
79
+ structuredContent: responseMetadata ? { ...value, ...responseMetadata } : value,
78
80
  };
79
81
  }
80
- return createTextResult(JSON.stringify(value, null, 2));
82
+ if (Array.isArray(value) && responseMetadata) {
83
+ return createTextResult(rawText, {
84
+ ...responseMetadata,
85
+ _responseBody: value,
86
+ });
87
+ }
88
+ return createTextResult(rawText);
81
89
  }
82
- function createImageResult(images, note) {
90
+ function createImageResult(images, note, structuredContent, rawText) {
83
91
  const content = [];
84
- if (note) {
92
+ if (rawText) {
93
+ content.push({ type: "text", text: rawText });
94
+ }
95
+ else if (note) {
85
96
  content.push({ type: "text", text: note });
86
97
  }
87
98
  for (const image of images) {
@@ -91,7 +102,10 @@ function createImageResult(images, note) {
91
102
  mimeType: image.mimeType,
92
103
  });
93
104
  }
94
- return { content };
105
+ return {
106
+ content,
107
+ ...(structuredContent ? { structuredContent } : {}),
108
+ };
95
109
  }
96
110
  function getErrorMessage(error) {
97
111
  if (axios_1.default.isAxiosError(error)) {
@@ -106,15 +120,67 @@ function getErrorMessage(error) {
106
120
  }
107
121
  return String(error);
108
122
  }
109
- async function requestText(config) {
123
+ function getMimeType(contentType) {
124
+ return contentType?.split(";")[0]?.trim().toLowerCase();
125
+ }
126
+ function isTextLikeMimeType(mimeType) {
127
+ if (!mimeType) {
128
+ return true;
129
+ }
130
+ return mimeType.startsWith("text/") || mimeType.includes("json") || mimeType.includes("xml") || mimeType === "application/javascript" || mimeType === "application/x-javascript" || mimeType === "application/xhtml+xml";
131
+ }
132
+ function normalizeResponseHeaders(headers) {
133
+ const entries = Object.entries(headers).filter(([, value]) => value !== undefined);
134
+ if (entries.length === 0) {
135
+ return undefined;
136
+ }
137
+ return Object.fromEntries(entries.map(([key, value]) => [key, Array.isArray(value) && value.length === 1 ? value[0] : value]));
138
+ }
139
+ function createResponseMetadata(response) {
140
+ return compactObject({
141
+ _contentType: response.contentType,
142
+ _responseHeaders: normalizeResponseHeaders(response.headers),
143
+ _statusCode: response.statusCode,
144
+ });
145
+ }
146
+ function createBinaryResult(response) {
147
+ const mimeType = getMimeType(response.contentType) ?? "application/octet-stream";
148
+ if (mimeType.startsWith("image/")) {
149
+ return {
150
+ content: [
151
+ { type: "image", data: response.data.toString("base64"), mimeType },
152
+ ],
153
+ structuredContent: createResponseMetadata(response),
154
+ };
155
+ }
156
+ return createTextResult(`Binary response returned with content-type ${mimeType}. See structuredContent._bodyBase64 for the raw bytes.`, {
157
+ ...createResponseMetadata(response),
158
+ _bodyBase64: response.data.toString("base64"),
159
+ });
160
+ }
161
+ function createTextBodyResult(text, response) {
162
+ if (!response) {
163
+ return createTextResult(text);
164
+ }
165
+ return createTextResult(text, createResponseMetadata(response));
166
+ }
167
+ async function requestResponse(config, options) {
110
168
  const response = await axios_1.default.request({
111
169
  ...config,
112
- responseType: "text",
170
+ responseType: "arraybuffer",
113
171
  transformResponse: [(value) => value],
172
+ validateStatus: options?.acceptAnyStatus ? () => true : undefined,
114
173
  });
174
+ const data = Buffer.isBuffer(response.data) ? response.data : Buffer.from(response.data);
175
+ const headers = response.headers;
176
+ const contentTypeHeader = headers["content-type"];
177
+ const contentType = Array.isArray(contentTypeHeader) ? contentTypeHeader[0] : contentTypeHeader;
115
178
  return {
116
- text: stringifyUnknown(response.data),
117
- headers: response.headers,
179
+ contentType,
180
+ data,
181
+ headers,
182
+ statusCode: response.status,
183
+ text: data.toString("utf8"),
118
184
  };
119
185
  }
120
186
  function normalizeHeaderRecord(value) {
@@ -214,7 +280,7 @@ function collectImageMatches(value, results = [], seen = new Set()) {
214
280
  if (!isRecord(value)) {
215
281
  return results;
216
282
  }
217
- const prioritizedKeys = ["screenShot", "screenshot", "fullScreenShot", "particularScreenShot", "image", "images"];
283
+ const prioritizedKeys = ["screenShot", "screenShots", "screenshot", "fullScreenShot", "particularScreenShot", "image", "images"];
218
284
  for (const key of prioritizedKeys) {
219
285
  if (key in value) {
220
286
  collectImageMatches(value[key], results, seen);
@@ -237,6 +303,48 @@ function buildProxyParameterString(params) {
237
303
  }
238
304
  return searchParams.toString();
239
305
  }
306
+ const asyncRenderSchema = zod_1.z.object({
307
+ blockResources: zod_1.z.boolean().optional(),
308
+ BlockResources: zod_1.z.boolean().optional(),
309
+ waitUntil: asyncWaitUntilSchema.optional(),
310
+ WaitUntil: asyncWaitUntilSchema.optional(),
311
+ customWait: zod_1.z.number().int().min(0).max(35000).optional(),
312
+ CustomWait: zod_1.z.number().int().min(0).max(35000).optional(),
313
+ waitSelector: zod_1.z.string().optional(),
314
+ WaitSelector: zod_1.z.string().optional(),
315
+ playWithBrowser: zod_1.z.array(browserActionSchema).optional(),
316
+ PlayWithBrowser: zod_1.z.array(browserActionSchema).optional(),
317
+ returnJSON: zod_1.z.boolean().optional(),
318
+ ReturnJSON: zod_1.z.boolean().optional(),
319
+ showWebsocketRequests: zod_1.z.boolean().optional(),
320
+ ShowWebsocketRequests: zod_1.z.boolean().optional(),
321
+ showFrames: zod_1.z.boolean().optional(),
322
+ ShowFrames: zod_1.z.boolean().optional(),
323
+ screenshot: zod_1.z.boolean().optional(),
324
+ Screenshot: zod_1.z.boolean().optional(),
325
+ fullScreenshot: zod_1.z.boolean().optional(),
326
+ FullScreenshot: zod_1.z.boolean().optional(),
327
+ particularScreenshot: zod_1.z.string().optional(),
328
+ ParticularScreenshot: zod_1.z.string().optional(),
329
+ });
330
+ function normalizeAsyncRenderInput(input) {
331
+ if (!input) {
332
+ return undefined;
333
+ }
334
+ return compactObject({
335
+ BlockResources: input.BlockResources ?? input.blockResources,
336
+ WaitUntil: input.WaitUntil ?? input.waitUntil,
337
+ CustomWait: input.CustomWait ?? input.customWait,
338
+ WaitSelector: input.WaitSelector ?? input.waitSelector,
339
+ PlayWithBrowser: input.PlayWithBrowser ?? input.playWithBrowser,
340
+ ReturnJSON: input.ReturnJSON ?? input.returnJSON,
341
+ ShowWebsocketRequests: input.ShowWebsocketRequests ?? input.showWebsocketRequests,
342
+ ShowFrames: input.ShowFrames ?? input.showFrames,
343
+ Screenshot: input.Screenshot ?? input.screenshot,
344
+ FullScreenshot: input.FullScreenshot ?? input.fullScreenshot,
345
+ ParticularScreenshot: input.ParticularScreenshot ?? input.particularScreenshot,
346
+ });
347
+ }
240
348
  function ensureToken() {
241
349
  if (!SCRAPE_DO_TOKEN) {
242
350
  throw new Error("SCRAPE_DO_TOKEN is not set. Get your token at https://app.scrape.do");
@@ -255,7 +363,7 @@ server.tool("scrape_url", "Scrape a webpage with the official Scrape.do API. Sup
255
363
  timeout: zod_1.z.number().int().positive().optional().default(60000).describe("Maximum timeout in milliseconds."),
256
364
  retryTimeout: zod_1.z.number().int().positive().optional().describe("Retry timeout in milliseconds."),
257
365
  disableRetry: zod_1.z.boolean().optional().default(false).describe("Disable automatic retries."),
258
- output: zod_1.z.enum(["markdown", "raw"]).optional().describe("Output format. MCP defaults to markdown unless ReturnJSON is used."),
366
+ output: zod_1.z.enum(["markdown", "raw"]).optional().describe("Output format. Matches Scrape.do's official raw/markdown output."),
259
367
  returnJSON: zod_1.z.boolean().optional().default(false).describe("Return JSON with network requests/content."),
260
368
  transparentResponse: zod_1.z.boolean().optional().default(false).describe("Return the target response without Scrape.do post-processing."),
261
369
  screenshot: zod_1.z.boolean().optional().describe("Alias for screenShot. Capture a viewport screenshot."),
@@ -297,7 +405,7 @@ server.tool("scrape_url", "Scrape a webpage with the official Scrape.do API. Sup
297
405
  const effectiveRender = (params.render_js ?? params.render ?? false) || params.returnJSON || params.showFrames || params.showWebsocketRequests || screenshotRequested || interactionRequested;
298
406
  const effectiveReturnJSON = params.returnJSON || params.showFrames || params.showWebsocketRequests || screenshotRequested || interactionRequested;
299
407
  const effectiveBlockResources = screenshotRequested || interactionRequested ? false : params.blockResources;
300
- const effectiveOutput = effectiveReturnJSON ? params.output : params.output ?? "markdown";
408
+ const effectiveOutput = effectiveReturnJSON ? params.output : params.output ?? "raw";
301
409
  const requestParams = compactObject({
302
410
  token: SCRAPE_DO_TOKEN,
303
411
  url: params.url,
@@ -334,23 +442,32 @@ server.tool("scrape_url", "Scrape a webpage with the official Scrape.do API. Sup
334
442
  callback: params.callback,
335
443
  });
336
444
  const headers = buildForwardedHeaders(params.headers, headerMode);
337
- const { text } = await requestText({
445
+ const response = await requestResponse({
338
446
  method: "GET",
339
447
  url: SCRAPE_API_BASE,
340
448
  params: requestParams,
341
449
  headers,
342
450
  timeout: Math.min(params.timeout ?? 60000, 120000),
343
- });
344
- const parsed = tryParseJson(text);
345
- const images = screenshotRequested || interactionRequested ? collectImageMatches(parsed ?? text) : [];
451
+ }, { acceptAnyStatus: true });
452
+ if (response.statusCode >= 400 && !params.transparentResponse) {
453
+ return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
454
+ }
455
+ const responseMimeType = getMimeType(response.contentType);
456
+ const parsed = isTextLikeMimeType(responseMimeType) ? tryParseJson(response.text) : undefined;
457
+ const images = screenshotRequested || interactionRequested ? collectImageMatches(parsed ?? response.text) : [];
346
458
  if (images.length > 0) {
347
- const note = images.length === 1 ? "Captured screenshot from Scrape.do." : `Captured ${images.length} screenshots from Scrape.do.`;
348
- return createImageResult(images, note);
459
+ const structuredContent = parsed && isRecord(parsed)
460
+ ? { ...parsed, ...createResponseMetadata(response) }
461
+ : createResponseMetadata(response);
462
+ return createImageResult(images, undefined, structuredContent, parsed ? response.text : undefined);
349
463
  }
350
464
  if (parsed !== undefined) {
351
- return createJsonResult(parsed);
465
+ return createJsonResult(parsed, { rawText: response.text, response });
352
466
  }
353
- return createTextResult(text);
467
+ if (isTextLikeMimeType(responseMimeType)) {
468
+ return createTextBodyResult(response.text, response);
469
+ }
470
+ return createBinaryResult(response);
354
471
  }
355
472
  catch (error) {
356
473
  return createErrorResult(`Error: ${getErrorMessage(error)}`);
@@ -406,17 +523,20 @@ server.tool("google_search", "Search Google with Scrape.do's structured SERP API
406
523
  nfpr: params.nfpr,
407
524
  filter: params.filter,
408
525
  });
409
- const { text } = await requestText({
526
+ const response = await requestResponse({
410
527
  method: "GET",
411
528
  url: `${SCRAPE_API_BASE}/plugin/google/search`,
412
529
  params: requestParams,
413
530
  timeout: 60000,
414
- });
415
- const parsed = tryParseJson(text);
531
+ }, { acceptAnyStatus: true });
532
+ if (response.statusCode >= 400) {
533
+ return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
534
+ }
535
+ const parsed = tryParseJson(response.text);
416
536
  if (parsed !== undefined) {
417
- return createJsonResult(parsed);
537
+ return createJsonResult(parsed, { rawText: response.text, response });
418
538
  }
419
- return createTextResult(text);
539
+ return createTextBodyResult(response.text, response);
420
540
  }
421
541
  catch (error) {
422
542
  return createErrorResult(`Error: ${getErrorMessage(error)}`);
@@ -443,17 +563,20 @@ server.tool("amazon_product", "Get structured Amazon product detail data with th
443
563
  language: params.language,
444
564
  include_html: params.include_html ?? params.includeHtml ? true : undefined,
445
565
  });
446
- const { text } = await requestText({
566
+ const response = await requestResponse({
447
567
  method: "GET",
448
568
  url: `${SCRAPE_API_BASE}/plugin/amazon/pdp`,
449
569
  params: requestParams,
450
570
  timeout: 60000,
451
- });
452
- const parsed = tryParseJson(text);
571
+ }, { acceptAnyStatus: true });
572
+ if (response.statusCode >= 400) {
573
+ return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
574
+ }
575
+ const parsed = tryParseJson(response.text);
453
576
  if (parsed !== undefined) {
454
- return createJsonResult(parsed);
577
+ return createJsonResult(parsed, { rawText: response.text, response });
455
578
  }
456
- return createTextResult(text);
579
+ return createTextBodyResult(response.text, response);
457
580
  }
458
581
  catch (error) {
459
582
  return createErrorResult(`Error: ${getErrorMessage(error)}`);
@@ -478,17 +601,20 @@ server.tool("amazon_offer_listing", "Get all seller offers for an Amazon product
478
601
  super: params.super_proxy ?? params.super,
479
602
  include_html: params.include_html ?? params.includeHtml ? true : undefined,
480
603
  });
481
- const { text } = await requestText({
604
+ const response = await requestResponse({
482
605
  method: "GET",
483
606
  url: `${SCRAPE_API_BASE}/plugin/amazon/offer-listing`,
484
607
  params: requestParams,
485
608
  timeout: 60000,
486
- });
487
- const parsed = tryParseJson(text);
609
+ }, { acceptAnyStatus: true });
610
+ if (response.statusCode >= 400) {
611
+ return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
612
+ }
613
+ const parsed = tryParseJson(response.text);
488
614
  if (parsed !== undefined) {
489
- return createJsonResult(parsed);
615
+ return createJsonResult(parsed, { rawText: response.text, response });
490
616
  }
491
- return createTextResult(text);
617
+ return createTextBodyResult(response.text, response);
492
618
  }
493
619
  catch (error) {
494
620
  return createErrorResult(`Error: ${getErrorMessage(error)}`);
@@ -517,17 +643,20 @@ server.tool("amazon_search", "Search Amazon or scrape Amazon category-style resu
517
643
  language: params.language,
518
644
  include_html: params.include_html ?? params.includeHtml ? true : undefined,
519
645
  });
520
- const { text } = await requestText({
646
+ const response = await requestResponse({
521
647
  method: "GET",
522
648
  url: `${SCRAPE_API_BASE}/plugin/amazon/search`,
523
649
  params: requestParams,
524
650
  timeout: 60000,
525
- });
526
- const parsed = tryParseJson(text);
651
+ }, { acceptAnyStatus: true });
652
+ if (response.statusCode >= 400) {
653
+ return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
654
+ }
655
+ const parsed = tryParseJson(response.text);
527
656
  if (parsed !== undefined) {
528
- return createJsonResult(parsed);
657
+ return createJsonResult(parsed, { rawText: response.text, response });
529
658
  }
530
- return createTextResult(text);
659
+ return createTextBodyResult(response.text, response);
531
660
  }
532
661
  catch (error) {
533
662
  return createErrorResult(`Error: ${getErrorMessage(error)}`);
@@ -554,95 +683,94 @@ server.tool("amazon_raw_html", "Get raw HTML from any Amazon URL with ZIP-code g
554
683
  language: params.language,
555
684
  timeout: params.timeout,
556
685
  });
557
- const { text } = await requestText({
686
+ const response = await requestResponse({
558
687
  method: "GET",
559
688
  url: `${SCRAPE_API_BASE}/plugin/amazon/`,
560
689
  params: requestParams,
561
690
  timeout: params.timeout ?? 60000,
562
- });
563
- return createTextResult(text);
691
+ }, { acceptAnyStatus: true });
692
+ if (response.statusCode >= 400) {
693
+ return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
694
+ }
695
+ return createTextBodyResult(response.text, response);
564
696
  }
565
697
  catch (error) {
566
698
  return createErrorResult(`Error: ${getErrorMessage(error)}`);
567
699
  }
568
700
  });
569
701
  server.tool("async_create_job", "Create a Scrape.do Async API job for batch/background scraping.", {
570
- targets: zod_1.z.array(zod_1.z.string().url()).min(1).describe("URLs to scrape."),
571
- method: asyncMethodSchema.optional().default("GET").describe("HTTP method for the job."),
572
- body: zod_1.z.string().optional().describe("Request body for POST/PUT/PATCH jobs."),
573
- geoCode: zod_1.z.string().optional().describe("Country code."),
574
- regionalGeoCode: zod_1.z.string().optional().describe("Regional code."),
575
- super_proxy: zod_1.z.boolean().optional().describe("Use residential/mobile proxies."),
576
- headers: headerRecordSchema.optional().describe("Headers to send with the upstream request."),
577
- forwardHeaders: zod_1.z.boolean().optional().describe("Use only provided headers instead of merging with Scrape.do headers."),
578
- sessionId: zod_1.z.union([zod_1.z.number().int(), zod_1.z.string()]).optional().describe("Sticky session ID."),
579
- device: zod_1.z.enum(["desktop", "mobile", "tablet"]).optional().describe("Device type."),
580
- setCookies: zod_1.z.string().optional().describe("Cookies to include."),
581
- timeout: zod_1.z.number().int().positive().optional().describe("Request timeout in milliseconds."),
582
- retryTimeout: zod_1.z.number().int().positive().optional().describe("Retry timeout in milliseconds."),
583
- disableRetry: zod_1.z.boolean().optional().describe("Disable automatic retries."),
584
- transparentResponse: zod_1.z.boolean().optional().describe("Return raw target response."),
585
- disableRedirection: zod_1.z.boolean().optional().describe("Disable redirects."),
586
- output: zod_1.z.enum(["raw", "markdown"]).optional().describe("Output format."),
587
- render: zod_1.z
588
- .object({
589
- blockResources: zod_1.z.boolean().optional(),
590
- waitUntil: asyncWaitUntilSchema.optional(),
591
- customWait: zod_1.z.number().int().min(0).max(35000).optional(),
592
- waitSelector: zod_1.z.string().optional(),
593
- playWithBrowser: zod_1.z.array(browserActionSchema).optional(),
594
- returnJSON: zod_1.z.boolean().optional(),
595
- showWebsocketRequests: zod_1.z.boolean().optional(),
596
- showFrames: zod_1.z.boolean().optional(),
597
- screenshot: zod_1.z.boolean().optional(),
598
- fullScreenshot: zod_1.z.boolean().optional(),
599
- particularScreenshot: zod_1.z.string().optional(),
600
- })
601
- .optional()
602
- .describe("Headless browser configuration."),
603
- webhookUrl: zod_1.z.string().url().optional().describe("Webhook URL to receive results."),
604
- webhookHeaders: headerRecordSchema.optional().describe("Extra headers for the webhook request."),
702
+ targets: zod_1.z.array(zod_1.z.string().url()).optional().describe("Alias for Targets."),
703
+ Targets: zod_1.z.array(zod_1.z.string().url()).optional().describe("Official Async API Targets field."),
704
+ method: asyncMethodSchema.optional().describe("Alias for Method."),
705
+ Method: asyncMethodSchema.optional().describe("Official Async API Method field."),
706
+ body: zod_1.z.string().optional().describe("Alias for Body."),
707
+ Body: zod_1.z.string().optional().describe("Official Async API Body field."),
708
+ geoCode: zod_1.z.string().optional().describe("Alias for GeoCode."),
709
+ GeoCode: zod_1.z.string().optional().describe("Official Async API GeoCode field."),
710
+ regionalGeoCode: zod_1.z.string().optional().describe("Alias for RegionalGeoCode."),
711
+ RegionalGeoCode: zod_1.z.string().optional().describe("Official Async API RegionalGeoCode field."),
712
+ super_proxy: zod_1.z.boolean().optional().describe("Alias for Super."),
713
+ super: zod_1.z.boolean().optional().describe("Alias for Super."),
714
+ Super: zod_1.z.boolean().optional().describe("Official Async API Super field."),
715
+ headers: headerRecordSchema.optional().describe("Alias for Headers."),
716
+ Headers: headerRecordSchema.optional().describe("Official Async API Headers field."),
717
+ forwardHeaders: zod_1.z.boolean().optional().describe("Alias for ForwardHeaders."),
718
+ ForwardHeaders: zod_1.z.boolean().optional().describe("Official Async API ForwardHeaders field."),
719
+ sessionId: zod_1.z.union([zod_1.z.number().int(), zod_1.z.string()]).optional().describe("Alias for SessionID."),
720
+ SessionID: zod_1.z.union([zod_1.z.number().int(), zod_1.z.string()]).optional().describe("Official Async API SessionID field."),
721
+ device: zod_1.z.enum(["desktop", "mobile", "tablet"]).optional().describe("Alias for Device."),
722
+ Device: zod_1.z.enum(["desktop", "mobile", "tablet"]).optional().describe("Official Async API Device field."),
723
+ setCookies: zod_1.z.string().optional().describe("Alias for SetCookies."),
724
+ SetCookies: zod_1.z.string().optional().describe("Official Async API SetCookies field."),
725
+ timeout: zod_1.z.number().int().positive().optional().describe("Alias for Timeout."),
726
+ Timeout: zod_1.z.number().int().positive().optional().describe("Official Async API Timeout field."),
727
+ retryTimeout: zod_1.z.number().int().positive().optional().describe("Alias for RetryTimeout."),
728
+ RetryTimeout: zod_1.z.number().int().positive().optional().describe("Official Async API RetryTimeout field."),
729
+ disableRetry: zod_1.z.boolean().optional().describe("Alias for DisableRetry."),
730
+ DisableRetry: zod_1.z.boolean().optional().describe("Official Async API DisableRetry field."),
731
+ transparentResponse: zod_1.z.boolean().optional().describe("Alias for TransparentResponse."),
732
+ TransparentResponse: zod_1.z.boolean().optional().describe("Official Async API TransparentResponse field."),
733
+ disableRedirection: zod_1.z.boolean().optional().describe("Alias for DisableRedirection."),
734
+ DisableRedirection: zod_1.z.boolean().optional().describe("Official Async API DisableRedirection field."),
735
+ output: zod_1.z.enum(["raw", "markdown"]).optional().describe("Alias for Output."),
736
+ Output: zod_1.z.enum(["raw", "markdown"]).optional().describe("Official Async API Output field."),
737
+ render: asyncRenderSchema.optional().describe("Alias for Render."),
738
+ Render: asyncRenderSchema.optional().describe("Official Async API Render field."),
739
+ webhookUrl: zod_1.z.string().url().optional().describe("Alias for WebhookURL."),
740
+ WebhookURL: zod_1.z.string().url().optional().describe("Official Async API WebhookURL field."),
741
+ webhookHeaders: headerRecordSchema.optional().describe("Alias for WebhookHeaders."),
742
+ WebhookHeaders: headerRecordSchema.optional().describe("Official Async API WebhookHeaders field."),
605
743
  }, async (params) => {
606
744
  try {
607
745
  ensureToken();
608
- const render = params.render
609
- ? compactObject({
610
- BlockResources: params.render.blockResources,
611
- WaitUntil: params.render.waitUntil,
612
- CustomWait: params.render.customWait,
613
- WaitSelector: params.render.waitSelector,
614
- PlayWithBrowser: params.render.playWithBrowser,
615
- ReturnJSON: params.render.returnJSON,
616
- ShowWebsocketRequests: params.render.showWebsocketRequests,
617
- ShowFrames: params.render.showFrames,
618
- Screenshot: params.render.screenshot,
619
- FullScreenshot: params.render.fullScreenshot,
620
- ParticularScreenshot: params.render.particularScreenshot,
621
- })
622
- : undefined;
746
+ const targets = params.Targets ?? params.targets;
747
+ if (!targets?.length) {
748
+ return createErrorResult("Error: targets or Targets is required.");
749
+ }
750
+ const render = normalizeAsyncRenderInput(params.Render ?? params.render);
623
751
  const body = compactObject({
624
- Targets: params.targets,
625
- Method: params.method,
626
- Body: params.body,
627
- GeoCode: params.geoCode,
628
- RegionalGeoCode: params.regionalGeoCode,
629
- Super: params.super_proxy,
630
- Headers: normalizeHeaderRecord(params.headers),
631
- ForwardHeaders: params.forwardHeaders,
632
- SessionID: params.sessionId !== undefined ? String(params.sessionId) : undefined,
633
- Device: params.device,
634
- SetCookies: params.setCookies,
635
- Timeout: params.timeout,
636
- RetryTimeout: params.retryTimeout,
637
- DisableRetry: params.disableRetry,
638
- TransparentResponse: params.transparentResponse,
639
- DisableRedirection: params.disableRedirection,
640
- Output: params.output,
752
+ Targets: targets,
753
+ Method: params.Method ?? params.method ?? "GET",
754
+ Body: params.Body ?? params.body,
755
+ GeoCode: params.GeoCode ?? params.geoCode,
756
+ RegionalGeoCode: params.RegionalGeoCode ?? params.regionalGeoCode,
757
+ Super: params.Super ?? params.super ?? params.super_proxy,
758
+ Headers: normalizeHeaderRecord(params.Headers ?? params.headers),
759
+ ForwardHeaders: params.ForwardHeaders ?? params.forwardHeaders,
760
+ SessionID: params.SessionID !== undefined ? String(params.SessionID) : params.sessionId !== undefined ? String(params.sessionId) : undefined,
761
+ Device: params.Device ?? params.device,
762
+ SetCookies: params.SetCookies ?? params.setCookies,
763
+ Timeout: params.Timeout ?? params.timeout,
764
+ RetryTimeout: params.RetryTimeout ?? params.retryTimeout,
765
+ DisableRetry: params.DisableRetry ?? params.disableRetry,
766
+ TransparentResponse: params.TransparentResponse ?? params.transparentResponse,
767
+ DisableRedirection: params.DisableRedirection ?? params.disableRedirection,
768
+ Output: params.Output ?? params.output,
641
769
  Render: render && Object.keys(render).length > 0 ? render : undefined,
642
- WebhookURL: params.webhookUrl,
643
- WebhookHeaders: normalizeHeaderRecord(params.webhookHeaders),
770
+ WebhookURL: params.WebhookURL ?? params.webhookUrl,
771
+ WebhookHeaders: normalizeHeaderRecord(params.WebhookHeaders ?? params.webhookHeaders),
644
772
  });
645
- const { text } = await requestText({
773
+ const response = await requestResponse({
646
774
  method: "POST",
647
775
  url: `${ASYNC_API_BASE}/api/v1/jobs`,
648
776
  headers: {
@@ -651,59 +779,80 @@ server.tool("async_create_job", "Create a Scrape.do Async API job for batch/back
651
779
  },
652
780
  data: body,
653
781
  timeout: 60000,
654
- });
655
- const parsed = tryParseJson(text);
782
+ }, { acceptAnyStatus: true });
783
+ if (response.statusCode >= 400) {
784
+ return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
785
+ }
786
+ const parsed = tryParseJson(response.text);
656
787
  if (parsed !== undefined) {
657
- return createJsonResult(parsed);
788
+ return createJsonResult(parsed, { rawText: response.text, response });
658
789
  }
659
- return createTextResult(text);
790
+ return createTextBodyResult(response.text, response);
660
791
  }
661
792
  catch (error) {
662
793
  return createErrorResult(`Error: ${getErrorMessage(error)}`);
663
794
  }
664
795
  });
665
796
  server.tool("async_get_job", "Get Scrape.do Async API job details by job ID.", {
666
- jobId: zod_1.z.string().min(1).describe("Job ID returned by async_create_job."),
667
- }, async ({ jobId }) => {
797
+ jobId: zod_1.z.string().min(1).optional().describe("Alias for jobID."),
798
+ jobID: zod_1.z.string().min(1).optional().describe("Official Async API jobID path parameter."),
799
+ }, async ({ jobId, jobID }) => {
668
800
  try {
669
801
  ensureToken();
670
- const { text } = await requestText({
802
+ const resolvedJobId = jobID ?? jobId;
803
+ if (!resolvedJobId) {
804
+ return createErrorResult("Error: jobId or jobID is required.");
805
+ }
806
+ const response = await requestResponse({
671
807
  method: "GET",
672
- url: `${ASYNC_API_BASE}/api/v1/jobs/${encodeURIComponent(jobId)}`,
808
+ url: `${ASYNC_API_BASE}/api/v1/jobs/${encodeURIComponent(resolvedJobId)}`,
673
809
  headers: {
674
810
  "X-Token": SCRAPE_DO_TOKEN,
675
811
  },
676
812
  timeout: 60000,
677
- });
678
- const parsed = tryParseJson(text);
813
+ }, { acceptAnyStatus: true });
814
+ if (response.statusCode >= 400) {
815
+ return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
816
+ }
817
+ const parsed = tryParseJson(response.text);
679
818
  if (parsed !== undefined) {
680
- return createJsonResult(parsed);
819
+ return createJsonResult(parsed, { rawText: response.text, response });
681
820
  }
682
- return createTextResult(text);
821
+ return createTextBodyResult(response.text, response);
683
822
  }
684
823
  catch (error) {
685
824
  return createErrorResult(`Error: ${getErrorMessage(error)}`);
686
825
  }
687
826
  });
688
827
  server.tool("async_get_task", "Get Scrape.do Async API task details by job ID and task ID.", {
689
- jobId: zod_1.z.string().min(1).describe("Job ID."),
690
- taskId: zod_1.z.string().min(1).describe("Task ID."),
691
- }, async ({ jobId, taskId }) => {
828
+ jobId: zod_1.z.string().min(1).optional().describe("Alias for jobID."),
829
+ jobID: zod_1.z.string().min(1).optional().describe("Official Async API jobID path parameter."),
830
+ taskId: zod_1.z.string().min(1).optional().describe("Alias for taskID."),
831
+ taskID: zod_1.z.string().min(1).optional().describe("Official Async API taskID path parameter."),
832
+ }, async ({ jobId, jobID, taskId, taskID }) => {
692
833
  try {
693
834
  ensureToken();
694
- const { text } = await requestText({
835
+ const resolvedJobId = jobID ?? jobId;
836
+ const resolvedTaskId = taskID ?? taskId;
837
+ if (!resolvedJobId || !resolvedTaskId) {
838
+ return createErrorResult("Error: jobId/jobID and taskId/taskID are required.");
839
+ }
840
+ const response = await requestResponse({
695
841
  method: "GET",
696
- url: `${ASYNC_API_BASE}/api/v1/jobs/${encodeURIComponent(jobId)}/${encodeURIComponent(taskId)}`,
842
+ url: `${ASYNC_API_BASE}/api/v1/jobs/${encodeURIComponent(resolvedJobId)}/${encodeURIComponent(resolvedTaskId)}`,
697
843
  headers: {
698
844
  "X-Token": SCRAPE_DO_TOKEN,
699
845
  },
700
846
  timeout: 60000,
701
- });
702
- const parsed = tryParseJson(text);
847
+ }, { acceptAnyStatus: true });
848
+ if (response.statusCode >= 400) {
849
+ return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
850
+ }
851
+ const parsed = tryParseJson(response.text);
703
852
  if (parsed !== undefined) {
704
- return createJsonResult(parsed);
853
+ return createJsonResult(parsed, { rawText: response.text, response });
705
854
  }
706
- return createTextResult(text);
855
+ return createTextBodyResult(response.text, response);
707
856
  }
708
857
  catch (error) {
709
858
  return createErrorResult(`Error: ${getErrorMessage(error)}`);
@@ -712,49 +861,61 @@ server.tool("async_get_task", "Get Scrape.do Async API task details by job ID an
712
861
  server.tool("async_list_jobs", "List Scrape.do Async API jobs with pagination.", {
713
862
  page: zod_1.z.number().int().positive().optional().default(1).describe("Page number."),
714
863
  pageSize: zod_1.z.number().int().positive().max(100).optional().default(10).describe("Items per page."),
715
- }, async ({ page, pageSize }) => {
864
+ page_size: zod_1.z.number().int().positive().max(100).optional().describe("Official Async API page_size query parameter."),
865
+ }, async ({ page, pageSize, page_size }) => {
716
866
  try {
717
867
  ensureToken();
718
- const { text } = await requestText({
868
+ const response = await requestResponse({
719
869
  method: "GET",
720
870
  url: `${ASYNC_API_BASE}/api/v1/jobs`,
721
871
  params: {
722
872
  page,
723
- page_size: pageSize,
873
+ page_size: page_size ?? pageSize,
724
874
  },
725
875
  headers: {
726
876
  "X-Token": SCRAPE_DO_TOKEN,
727
877
  },
728
878
  timeout: 60000,
729
- });
730
- const parsed = tryParseJson(text);
879
+ }, { acceptAnyStatus: true });
880
+ if (response.statusCode >= 400) {
881
+ return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
882
+ }
883
+ const parsed = tryParseJson(response.text);
731
884
  if (parsed !== undefined) {
732
- return createJsonResult(parsed);
885
+ return createJsonResult(parsed, { rawText: response.text, response });
733
886
  }
734
- return createTextResult(text);
887
+ return createTextBodyResult(response.text, response);
735
888
  }
736
889
  catch (error) {
737
890
  return createErrorResult(`Error: ${getErrorMessage(error)}`);
738
891
  }
739
892
  });
740
893
  server.tool("async_cancel_job", "Cancel a Scrape.do Async API job.", {
741
- jobId: zod_1.z.string().min(1).describe("Job ID to cancel."),
742
- }, async ({ jobId }) => {
894
+ jobId: zod_1.z.string().min(1).optional().describe("Alias for jobID."),
895
+ jobID: zod_1.z.string().min(1).optional().describe("Official Async API jobID path parameter."),
896
+ }, async ({ jobId, jobID }) => {
743
897
  try {
744
898
  ensureToken();
745
- const { text } = await requestText({
899
+ const resolvedJobId = jobID ?? jobId;
900
+ if (!resolvedJobId) {
901
+ return createErrorResult("Error: jobId or jobID is required.");
902
+ }
903
+ const response = await requestResponse({
746
904
  method: "DELETE",
747
- url: `${ASYNC_API_BASE}/api/v1/jobs/${encodeURIComponent(jobId)}`,
905
+ url: `${ASYNC_API_BASE}/api/v1/jobs/${encodeURIComponent(resolvedJobId)}`,
748
906
  headers: {
749
907
  "X-Token": SCRAPE_DO_TOKEN,
750
908
  },
751
909
  timeout: 60000,
752
- });
753
- const parsed = tryParseJson(text);
910
+ }, { acceptAnyStatus: true });
911
+ if (response.statusCode >= 400) {
912
+ return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
913
+ }
914
+ const parsed = tryParseJson(response.text);
754
915
  if (parsed !== undefined) {
755
- return createJsonResult(parsed);
916
+ return createJsonResult(parsed, { rawText: response.text, response });
756
917
  }
757
- return createTextResult(text);
918
+ return createTextBodyResult(response.text, response);
758
919
  }
759
920
  catch (error) {
760
921
  return createErrorResult(`Error: ${getErrorMessage(error)}`);
@@ -763,19 +924,22 @@ server.tool("async_cancel_job", "Cancel a Scrape.do Async API job.", {
763
924
  server.tool("async_get_account", "Get Scrape.do Async API account/concurrency information.", {}, async () => {
764
925
  try {
765
926
  ensureToken();
766
- const { text } = await requestText({
927
+ const response = await requestResponse({
767
928
  method: "GET",
768
929
  url: `${ASYNC_API_BASE}/api/v1/me`,
769
930
  headers: {
770
931
  "X-Token": SCRAPE_DO_TOKEN,
771
932
  },
772
933
  timeout: 60000,
773
- });
774
- const parsed = tryParseJson(text);
934
+ }, { acceptAnyStatus: true });
935
+ if (response.statusCode >= 400) {
936
+ return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
937
+ }
938
+ const parsed = tryParseJson(response.text);
775
939
  if (parsed !== undefined) {
776
- return createJsonResult(parsed);
940
+ return createJsonResult(parsed, { rawText: response.text, response });
777
941
  }
778
- return createTextResult(text);
942
+ return createTextBodyResult(response.text, response);
779
943
  }
780
944
  catch (error) {
781
945
  return createErrorResult(`Error: ${getErrorMessage(error)}`);
@@ -785,16 +949,17 @@ server.tool("proxy_mode_config", "Generate Scrape.do Proxy Mode configuration an
785
949
  params: headerRecordSchema.optional().describe("Proxy mode query parameters to place into the password segment."),
786
950
  }, async ({ params }) => {
787
951
  try {
788
- ensureToken();
789
952
  const parameterString = buildProxyParameterString(params);
790
953
  return createJsonResult({
791
954
  protocol: "http or https",
792
955
  host: "proxy.scrape.do",
793
956
  port: 8080,
794
- username: "<SCRAPE_DO_TOKEN>",
957
+ username: "<YOUR_SCRAPE_DO_TOKEN>",
795
958
  password: parameterString,
796
- proxy_url_template: `http://<SCRAPE_DO_TOKEN>:${parameterString}@proxy.scrape.do:8080`,
959
+ proxy_url_template: `http://<YOUR_SCRAPE_DO_TOKEN>:${parameterString}@proxy.scrape.do:8080`,
797
960
  ca_certificate_url: "https://scrape.do/scrapedo_ca.crt",
961
+ default_customHeaders: true,
962
+ disable_customHeaders_hint: "Append customHeaders=false to the password parameters if you need to disable the Proxy Mode default.",
798
963
  });
799
964
  }
800
965
  catch (error) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scrape-do-mcp",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "MCP Server for Scrape.do - scraping, Google Search, Amazon, Async API, and Proxy Mode helpers",
5
5
  "main": "dist/index.js",
6
6
  "bin": {