scrape-do-mcp 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README-ZH.md +8 -4
- package/README.md +8 -4
- package/dist/index.js +325 -160
- package/package.json +1 -1
package/README-ZH.md
CHANGED
|
@@ -14,8 +14,8 @@
|
|
|
14
14
|
- `amazon_offer_listing`:Amazon 卖家报价接口。
|
|
15
15
|
- `amazon_search`:Amazon 搜索 / 类目结果接口。
|
|
16
16
|
- `amazon_raw_html`:Amazon 原始 HTML 接口。
|
|
17
|
-
- `async_create_job`、`async_get_job`、`async_get_task`、`async_list_jobs`、`async_cancel_job`、`async_get_account`:Async API
|
|
18
|
-
- `proxy_mode_config
|
|
17
|
+
- `async_create_job`、`async_get_job`、`async_get_task`、`async_list_jobs`、`async_cancel_job`、`async_get_account`:Async API,并同时兼容 MCP 风格字段和官方字段名。
|
|
18
|
+
- `proxy_mode_config`:生成更贴近官方文档的 Proxy Mode 连接信息、默认参数串和证书信息。
|
|
19
19
|
|
|
20
20
|
## 兼容性说明
|
|
21
21
|
|
|
@@ -29,9 +29,13 @@
|
|
|
29
29
|
- `language` 或 `hl`
|
|
30
30
|
- `domain` 或 `google_domain`
|
|
31
31
|
- `includeHtml` 或 `include_html`
|
|
32
|
+
- `async_create_job` 同时接受 `targets`、`render`、`webhookUrl` 这类别名,以及官方字段 `Targets`、`Render`、`WebhookURL`。
|
|
33
|
+
- `async_get_job`、`async_get_task`、`async_cancel_job` 同时接受 `jobId` / `taskId` 和官方 `jobID` / `taskID`。
|
|
34
|
+
- `async_list_jobs` 同时支持 `pageSize` 和官方 `page_size`。
|
|
32
35
|
- `scrape_url` 里的 Header 转发请使用 `headers` + `header_mode`(`custom` / `extra` / `forward`)。
|
|
33
|
-
-
|
|
34
|
-
- `scrape_url`
|
|
36
|
+
- 截图结果会保留官方 JSON 响应,同时附加 MCP 图片内容,尽量兼顾官方格式和 MCP 可视化体验。
|
|
37
|
+
- `scrape_url` 现在默认使用 `output="raw"`,更贴近官方 API。
|
|
38
|
+
- `scrape_url` 会在 `structuredContent` 里附带响应元数据,便于在 MCP 中查看 `pureCookies`、`transparentResponse` 和二进制响应信息。
|
|
35
39
|
|
|
36
40
|
## 安装
|
|
37
41
|
|
package/README.md
CHANGED
|
@@ -14,8 +14,8 @@ Official docs: https://scrape.do/documentation/
|
|
|
14
14
|
- `amazon_offer_listing`: Amazon offer listing endpoint.
|
|
15
15
|
- `amazon_search`: Amazon search/category endpoint.
|
|
16
16
|
- `amazon_raw_html`: Raw HTML Amazon endpoint with geo-targeting.
|
|
17
|
-
- `async_create_job`, `async_get_job`, `async_get_task`, `async_list_jobs`, `async_cancel_job`, `async_get_account`: Async API coverage.
|
|
18
|
-
- `proxy_mode_config`: Builds Proxy Mode connection details
|
|
17
|
+
- `async_create_job`, `async_get_job`, `async_get_task`, `async_list_jobs`, `async_cancel_job`, `async_get_account`: Async API coverage with both MCP-friendly aliases and official field names.
|
|
18
|
+
- `proxy_mode_config`: Builds official Proxy Mode connection details, default parameter strings, and CA certificate references.
|
|
19
19
|
|
|
20
20
|
## Compatibility Notes
|
|
21
21
|
|
|
@@ -29,9 +29,13 @@ Official docs: https://scrape.do/documentation/
|
|
|
29
29
|
- `language` or `hl`
|
|
30
30
|
- `domain` or `google_domain`
|
|
31
31
|
- `includeHtml` or `include_html`
|
|
32
|
+
- `async_create_job` accepts both alias fields like `targets`, `render`, `webhookUrl` and official Async API fields like `Targets`, `Render`, `WebhookURL`.
|
|
33
|
+
- `async_get_job`, `async_get_task`, and `async_cancel_job` accept both `jobId`/`taskId` and official `jobID`/`taskID`.
|
|
34
|
+
- `async_list_jobs` accepts both `pageSize` and official `page_size`.
|
|
32
35
|
- For header forwarding in `scrape_url`, pass `headers` plus `header_mode` (`custom`, `extra`, or `forward`).
|
|
33
|
-
- Screenshot responses
|
|
34
|
-
- `scrape_url` defaults to `output="
|
|
36
|
+
- Screenshot responses preserve the official Scrape.do JSON body and also attach MCP image content when screenshots are present.
|
|
37
|
+
- `scrape_url` now defaults to `output="raw"` to match the official API more closely.
|
|
38
|
+
- `scrape_url` includes response metadata in `structuredContent`, which helps surface `pureCookies`, `transparentResponse`, and binary responses inside MCP.
|
|
35
39
|
|
|
36
40
|
## Installation
|
|
37
41
|
|
package/dist/index.js
CHANGED
|
@@ -8,7 +8,7 @@ const axios_1 = __importDefault(require("axios"));
|
|
|
8
8
|
const mcp_js_1 = require("@modelcontextprotocol/sdk/server/mcp.js");
|
|
9
9
|
const stdio_js_1 = require("@modelcontextprotocol/sdk/server/stdio.js");
|
|
10
10
|
const zod_1 = require("zod");
|
|
11
|
-
const SERVER_VERSION = "0.
|
|
11
|
+
const SERVER_VERSION = "0.4.0";
|
|
12
12
|
const SCRAPE_DO_TOKEN = process.env.SCRAPE_DO_TOKEN || "";
|
|
13
13
|
const SCRAPE_API_BASE = "https://api.scrape.do";
|
|
14
14
|
const ASYNC_API_BASE = "https://q.scrape.do";
|
|
@@ -70,18 +70,29 @@ function createTextResult(text, structuredContent) {
|
|
|
70
70
|
...(structuredContent ? { structuredContent } : {}),
|
|
71
71
|
};
|
|
72
72
|
}
|
|
73
|
-
function createJsonResult(value) {
|
|
73
|
+
function createJsonResult(value, options) {
|
|
74
|
+
const rawText = options?.rawText ?? JSON.stringify(value, null, 2);
|
|
75
|
+
const responseMetadata = options?.response ? createResponseMetadata(options.response) : undefined;
|
|
74
76
|
if (isRecord(value)) {
|
|
75
77
|
return {
|
|
76
|
-
content: [{ type: "text", text:
|
|
77
|
-
structuredContent: value,
|
|
78
|
+
content: [{ type: "text", text: rawText }],
|
|
79
|
+
structuredContent: responseMetadata ? { ...value, ...responseMetadata } : value,
|
|
78
80
|
};
|
|
79
81
|
}
|
|
80
|
-
|
|
82
|
+
if (Array.isArray(value) && responseMetadata) {
|
|
83
|
+
return createTextResult(rawText, {
|
|
84
|
+
...responseMetadata,
|
|
85
|
+
_responseBody: value,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
return createTextResult(rawText);
|
|
81
89
|
}
|
|
82
|
-
function createImageResult(images, note) {
|
|
90
|
+
function createImageResult(images, note, structuredContent, rawText) {
|
|
83
91
|
const content = [];
|
|
84
|
-
if (
|
|
92
|
+
if (rawText) {
|
|
93
|
+
content.push({ type: "text", text: rawText });
|
|
94
|
+
}
|
|
95
|
+
else if (note) {
|
|
85
96
|
content.push({ type: "text", text: note });
|
|
86
97
|
}
|
|
87
98
|
for (const image of images) {
|
|
@@ -91,7 +102,10 @@ function createImageResult(images, note) {
|
|
|
91
102
|
mimeType: image.mimeType,
|
|
92
103
|
});
|
|
93
104
|
}
|
|
94
|
-
return {
|
|
105
|
+
return {
|
|
106
|
+
content,
|
|
107
|
+
...(structuredContent ? { structuredContent } : {}),
|
|
108
|
+
};
|
|
95
109
|
}
|
|
96
110
|
function getErrorMessage(error) {
|
|
97
111
|
if (axios_1.default.isAxiosError(error)) {
|
|
@@ -106,15 +120,67 @@ function getErrorMessage(error) {
|
|
|
106
120
|
}
|
|
107
121
|
return String(error);
|
|
108
122
|
}
|
|
109
|
-
|
|
123
|
+
function getMimeType(contentType) {
|
|
124
|
+
return contentType?.split(";")[0]?.trim().toLowerCase();
|
|
125
|
+
}
|
|
126
|
+
function isTextLikeMimeType(mimeType) {
|
|
127
|
+
if (!mimeType) {
|
|
128
|
+
return true;
|
|
129
|
+
}
|
|
130
|
+
return mimeType.startsWith("text/") || mimeType.includes("json") || mimeType.includes("xml") || mimeType === "application/javascript" || mimeType === "application/x-javascript" || mimeType === "application/xhtml+xml";
|
|
131
|
+
}
|
|
132
|
+
function normalizeResponseHeaders(headers) {
|
|
133
|
+
const entries = Object.entries(headers).filter(([, value]) => value !== undefined);
|
|
134
|
+
if (entries.length === 0) {
|
|
135
|
+
return undefined;
|
|
136
|
+
}
|
|
137
|
+
return Object.fromEntries(entries.map(([key, value]) => [key, Array.isArray(value) && value.length === 1 ? value[0] : value]));
|
|
138
|
+
}
|
|
139
|
+
function createResponseMetadata(response) {
|
|
140
|
+
return compactObject({
|
|
141
|
+
_contentType: response.contentType,
|
|
142
|
+
_responseHeaders: normalizeResponseHeaders(response.headers),
|
|
143
|
+
_statusCode: response.statusCode,
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
function createBinaryResult(response) {
|
|
147
|
+
const mimeType = getMimeType(response.contentType) ?? "application/octet-stream";
|
|
148
|
+
if (mimeType.startsWith("image/")) {
|
|
149
|
+
return {
|
|
150
|
+
content: [
|
|
151
|
+
{ type: "image", data: response.data.toString("base64"), mimeType },
|
|
152
|
+
],
|
|
153
|
+
structuredContent: createResponseMetadata(response),
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
return createTextResult(`Binary response returned with content-type ${mimeType}. See structuredContent._bodyBase64 for the raw bytes.`, {
|
|
157
|
+
...createResponseMetadata(response),
|
|
158
|
+
_bodyBase64: response.data.toString("base64"),
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
function createTextBodyResult(text, response) {
|
|
162
|
+
if (!response) {
|
|
163
|
+
return createTextResult(text);
|
|
164
|
+
}
|
|
165
|
+
return createTextResult(text, createResponseMetadata(response));
|
|
166
|
+
}
|
|
167
|
+
async function requestResponse(config, options) {
|
|
110
168
|
const response = await axios_1.default.request({
|
|
111
169
|
...config,
|
|
112
|
-
responseType: "
|
|
170
|
+
responseType: "arraybuffer",
|
|
113
171
|
transformResponse: [(value) => value],
|
|
172
|
+
validateStatus: options?.acceptAnyStatus ? () => true : undefined,
|
|
114
173
|
});
|
|
174
|
+
const data = Buffer.isBuffer(response.data) ? response.data : Buffer.from(response.data);
|
|
175
|
+
const headers = response.headers;
|
|
176
|
+
const contentTypeHeader = headers["content-type"];
|
|
177
|
+
const contentType = Array.isArray(contentTypeHeader) ? contentTypeHeader[0] : contentTypeHeader;
|
|
115
178
|
return {
|
|
116
|
-
|
|
117
|
-
|
|
179
|
+
contentType,
|
|
180
|
+
data,
|
|
181
|
+
headers,
|
|
182
|
+
statusCode: response.status,
|
|
183
|
+
text: data.toString("utf8"),
|
|
118
184
|
};
|
|
119
185
|
}
|
|
120
186
|
function normalizeHeaderRecord(value) {
|
|
@@ -214,7 +280,7 @@ function collectImageMatches(value, results = [], seen = new Set()) {
|
|
|
214
280
|
if (!isRecord(value)) {
|
|
215
281
|
return results;
|
|
216
282
|
}
|
|
217
|
-
const prioritizedKeys = ["screenShot", "screenshot", "fullScreenShot", "particularScreenShot", "image", "images"];
|
|
283
|
+
const prioritizedKeys = ["screenShot", "screenShots", "screenshot", "fullScreenShot", "particularScreenShot", "image", "images"];
|
|
218
284
|
for (const key of prioritizedKeys) {
|
|
219
285
|
if (key in value) {
|
|
220
286
|
collectImageMatches(value[key], results, seen);
|
|
@@ -237,6 +303,48 @@ function buildProxyParameterString(params) {
|
|
|
237
303
|
}
|
|
238
304
|
return searchParams.toString();
|
|
239
305
|
}
|
|
306
|
+
const asyncRenderSchema = zod_1.z.object({
|
|
307
|
+
blockResources: zod_1.z.boolean().optional(),
|
|
308
|
+
BlockResources: zod_1.z.boolean().optional(),
|
|
309
|
+
waitUntil: asyncWaitUntilSchema.optional(),
|
|
310
|
+
WaitUntil: asyncWaitUntilSchema.optional(),
|
|
311
|
+
customWait: zod_1.z.number().int().min(0).max(35000).optional(),
|
|
312
|
+
CustomWait: zod_1.z.number().int().min(0).max(35000).optional(),
|
|
313
|
+
waitSelector: zod_1.z.string().optional(),
|
|
314
|
+
WaitSelector: zod_1.z.string().optional(),
|
|
315
|
+
playWithBrowser: zod_1.z.array(browserActionSchema).optional(),
|
|
316
|
+
PlayWithBrowser: zod_1.z.array(browserActionSchema).optional(),
|
|
317
|
+
returnJSON: zod_1.z.boolean().optional(),
|
|
318
|
+
ReturnJSON: zod_1.z.boolean().optional(),
|
|
319
|
+
showWebsocketRequests: zod_1.z.boolean().optional(),
|
|
320
|
+
ShowWebsocketRequests: zod_1.z.boolean().optional(),
|
|
321
|
+
showFrames: zod_1.z.boolean().optional(),
|
|
322
|
+
ShowFrames: zod_1.z.boolean().optional(),
|
|
323
|
+
screenshot: zod_1.z.boolean().optional(),
|
|
324
|
+
Screenshot: zod_1.z.boolean().optional(),
|
|
325
|
+
fullScreenshot: zod_1.z.boolean().optional(),
|
|
326
|
+
FullScreenshot: zod_1.z.boolean().optional(),
|
|
327
|
+
particularScreenshot: zod_1.z.string().optional(),
|
|
328
|
+
ParticularScreenshot: zod_1.z.string().optional(),
|
|
329
|
+
});
|
|
330
|
+
function normalizeAsyncRenderInput(input) {
|
|
331
|
+
if (!input) {
|
|
332
|
+
return undefined;
|
|
333
|
+
}
|
|
334
|
+
return compactObject({
|
|
335
|
+
BlockResources: input.BlockResources ?? input.blockResources,
|
|
336
|
+
WaitUntil: input.WaitUntil ?? input.waitUntil,
|
|
337
|
+
CustomWait: input.CustomWait ?? input.customWait,
|
|
338
|
+
WaitSelector: input.WaitSelector ?? input.waitSelector,
|
|
339
|
+
PlayWithBrowser: input.PlayWithBrowser ?? input.playWithBrowser,
|
|
340
|
+
ReturnJSON: input.ReturnJSON ?? input.returnJSON,
|
|
341
|
+
ShowWebsocketRequests: input.ShowWebsocketRequests ?? input.showWebsocketRequests,
|
|
342
|
+
ShowFrames: input.ShowFrames ?? input.showFrames,
|
|
343
|
+
Screenshot: input.Screenshot ?? input.screenshot,
|
|
344
|
+
FullScreenshot: input.FullScreenshot ?? input.fullScreenshot,
|
|
345
|
+
ParticularScreenshot: input.ParticularScreenshot ?? input.particularScreenshot,
|
|
346
|
+
});
|
|
347
|
+
}
|
|
240
348
|
function ensureToken() {
|
|
241
349
|
if (!SCRAPE_DO_TOKEN) {
|
|
242
350
|
throw new Error("SCRAPE_DO_TOKEN is not set. Get your token at https://app.scrape.do");
|
|
@@ -255,7 +363,7 @@ server.tool("scrape_url", "Scrape a webpage with the official Scrape.do API. Sup
|
|
|
255
363
|
timeout: zod_1.z.number().int().positive().optional().default(60000).describe("Maximum timeout in milliseconds."),
|
|
256
364
|
retryTimeout: zod_1.z.number().int().positive().optional().describe("Retry timeout in milliseconds."),
|
|
257
365
|
disableRetry: zod_1.z.boolean().optional().default(false).describe("Disable automatic retries."),
|
|
258
|
-
output: zod_1.z.enum(["markdown", "raw"]).optional().describe("Output format.
|
|
366
|
+
output: zod_1.z.enum(["markdown", "raw"]).optional().describe("Output format. Matches Scrape.do's official raw/markdown output."),
|
|
259
367
|
returnJSON: zod_1.z.boolean().optional().default(false).describe("Return JSON with network requests/content."),
|
|
260
368
|
transparentResponse: zod_1.z.boolean().optional().default(false).describe("Return the target response without Scrape.do post-processing."),
|
|
261
369
|
screenshot: zod_1.z.boolean().optional().describe("Alias for screenShot. Capture a viewport screenshot."),
|
|
@@ -297,7 +405,7 @@ server.tool("scrape_url", "Scrape a webpage with the official Scrape.do API. Sup
|
|
|
297
405
|
const effectiveRender = (params.render_js ?? params.render ?? false) || params.returnJSON || params.showFrames || params.showWebsocketRequests || screenshotRequested || interactionRequested;
|
|
298
406
|
const effectiveReturnJSON = params.returnJSON || params.showFrames || params.showWebsocketRequests || screenshotRequested || interactionRequested;
|
|
299
407
|
const effectiveBlockResources = screenshotRequested || interactionRequested ? false : params.blockResources;
|
|
300
|
-
const effectiveOutput = effectiveReturnJSON ? params.output : params.output ?? "
|
|
408
|
+
const effectiveOutput = effectiveReturnJSON ? params.output : params.output ?? "raw";
|
|
301
409
|
const requestParams = compactObject({
|
|
302
410
|
token: SCRAPE_DO_TOKEN,
|
|
303
411
|
url: params.url,
|
|
@@ -334,23 +442,32 @@ server.tool("scrape_url", "Scrape a webpage with the official Scrape.do API. Sup
|
|
|
334
442
|
callback: params.callback,
|
|
335
443
|
});
|
|
336
444
|
const headers = buildForwardedHeaders(params.headers, headerMode);
|
|
337
|
-
const
|
|
445
|
+
const response = await requestResponse({
|
|
338
446
|
method: "GET",
|
|
339
447
|
url: SCRAPE_API_BASE,
|
|
340
448
|
params: requestParams,
|
|
341
449
|
headers,
|
|
342
450
|
timeout: Math.min(params.timeout ?? 60000, 120000),
|
|
343
|
-
});
|
|
344
|
-
|
|
345
|
-
|
|
451
|
+
}, { acceptAnyStatus: true });
|
|
452
|
+
if (response.statusCode >= 400 && !params.transparentResponse) {
|
|
453
|
+
return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
|
|
454
|
+
}
|
|
455
|
+
const responseMimeType = getMimeType(response.contentType);
|
|
456
|
+
const parsed = isTextLikeMimeType(responseMimeType) ? tryParseJson(response.text) : undefined;
|
|
457
|
+
const images = screenshotRequested || interactionRequested ? collectImageMatches(parsed ?? response.text) : [];
|
|
346
458
|
if (images.length > 0) {
|
|
347
|
-
const
|
|
348
|
-
|
|
459
|
+
const structuredContent = parsed && isRecord(parsed)
|
|
460
|
+
? { ...parsed, ...createResponseMetadata(response) }
|
|
461
|
+
: createResponseMetadata(response);
|
|
462
|
+
return createImageResult(images, undefined, structuredContent, parsed ? response.text : undefined);
|
|
349
463
|
}
|
|
350
464
|
if (parsed !== undefined) {
|
|
351
|
-
return createJsonResult(parsed);
|
|
465
|
+
return createJsonResult(parsed, { rawText: response.text, response });
|
|
352
466
|
}
|
|
353
|
-
|
|
467
|
+
if (isTextLikeMimeType(responseMimeType)) {
|
|
468
|
+
return createTextBodyResult(response.text, response);
|
|
469
|
+
}
|
|
470
|
+
return createBinaryResult(response);
|
|
354
471
|
}
|
|
355
472
|
catch (error) {
|
|
356
473
|
return createErrorResult(`Error: ${getErrorMessage(error)}`);
|
|
@@ -406,17 +523,20 @@ server.tool("google_search", "Search Google with Scrape.do's structured SERP API
|
|
|
406
523
|
nfpr: params.nfpr,
|
|
407
524
|
filter: params.filter,
|
|
408
525
|
});
|
|
409
|
-
const
|
|
526
|
+
const response = await requestResponse({
|
|
410
527
|
method: "GET",
|
|
411
528
|
url: `${SCRAPE_API_BASE}/plugin/google/search`,
|
|
412
529
|
params: requestParams,
|
|
413
530
|
timeout: 60000,
|
|
414
|
-
});
|
|
415
|
-
|
|
531
|
+
}, { acceptAnyStatus: true });
|
|
532
|
+
if (response.statusCode >= 400) {
|
|
533
|
+
return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
|
|
534
|
+
}
|
|
535
|
+
const parsed = tryParseJson(response.text);
|
|
416
536
|
if (parsed !== undefined) {
|
|
417
|
-
return createJsonResult(parsed);
|
|
537
|
+
return createJsonResult(parsed, { rawText: response.text, response });
|
|
418
538
|
}
|
|
419
|
-
return
|
|
539
|
+
return createTextBodyResult(response.text, response);
|
|
420
540
|
}
|
|
421
541
|
catch (error) {
|
|
422
542
|
return createErrorResult(`Error: ${getErrorMessage(error)}`);
|
|
@@ -443,17 +563,20 @@ server.tool("amazon_product", "Get structured Amazon product detail data with th
|
|
|
443
563
|
language: params.language,
|
|
444
564
|
include_html: params.include_html ?? params.includeHtml ? true : undefined,
|
|
445
565
|
});
|
|
446
|
-
const
|
|
566
|
+
const response = await requestResponse({
|
|
447
567
|
method: "GET",
|
|
448
568
|
url: `${SCRAPE_API_BASE}/plugin/amazon/pdp`,
|
|
449
569
|
params: requestParams,
|
|
450
570
|
timeout: 60000,
|
|
451
|
-
});
|
|
452
|
-
|
|
571
|
+
}, { acceptAnyStatus: true });
|
|
572
|
+
if (response.statusCode >= 400) {
|
|
573
|
+
return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
|
|
574
|
+
}
|
|
575
|
+
const parsed = tryParseJson(response.text);
|
|
453
576
|
if (parsed !== undefined) {
|
|
454
|
-
return createJsonResult(parsed);
|
|
577
|
+
return createJsonResult(parsed, { rawText: response.text, response });
|
|
455
578
|
}
|
|
456
|
-
return
|
|
579
|
+
return createTextBodyResult(response.text, response);
|
|
457
580
|
}
|
|
458
581
|
catch (error) {
|
|
459
582
|
return createErrorResult(`Error: ${getErrorMessage(error)}`);
|
|
@@ -478,17 +601,20 @@ server.tool("amazon_offer_listing", "Get all seller offers for an Amazon product
|
|
|
478
601
|
super: params.super_proxy ?? params.super,
|
|
479
602
|
include_html: params.include_html ?? params.includeHtml ? true : undefined,
|
|
480
603
|
});
|
|
481
|
-
const
|
|
604
|
+
const response = await requestResponse({
|
|
482
605
|
method: "GET",
|
|
483
606
|
url: `${SCRAPE_API_BASE}/plugin/amazon/offer-listing`,
|
|
484
607
|
params: requestParams,
|
|
485
608
|
timeout: 60000,
|
|
486
|
-
});
|
|
487
|
-
|
|
609
|
+
}, { acceptAnyStatus: true });
|
|
610
|
+
if (response.statusCode >= 400) {
|
|
611
|
+
return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
|
|
612
|
+
}
|
|
613
|
+
const parsed = tryParseJson(response.text);
|
|
488
614
|
if (parsed !== undefined) {
|
|
489
|
-
return createJsonResult(parsed);
|
|
615
|
+
return createJsonResult(parsed, { rawText: response.text, response });
|
|
490
616
|
}
|
|
491
|
-
return
|
|
617
|
+
return createTextBodyResult(response.text, response);
|
|
492
618
|
}
|
|
493
619
|
catch (error) {
|
|
494
620
|
return createErrorResult(`Error: ${getErrorMessage(error)}`);
|
|
@@ -517,17 +643,20 @@ server.tool("amazon_search", "Search Amazon or scrape Amazon category-style resu
|
|
|
517
643
|
language: params.language,
|
|
518
644
|
include_html: params.include_html ?? params.includeHtml ? true : undefined,
|
|
519
645
|
});
|
|
520
|
-
const
|
|
646
|
+
const response = await requestResponse({
|
|
521
647
|
method: "GET",
|
|
522
648
|
url: `${SCRAPE_API_BASE}/plugin/amazon/search`,
|
|
523
649
|
params: requestParams,
|
|
524
650
|
timeout: 60000,
|
|
525
|
-
});
|
|
526
|
-
|
|
651
|
+
}, { acceptAnyStatus: true });
|
|
652
|
+
if (response.statusCode >= 400) {
|
|
653
|
+
return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
|
|
654
|
+
}
|
|
655
|
+
const parsed = tryParseJson(response.text);
|
|
527
656
|
if (parsed !== undefined) {
|
|
528
|
-
return createJsonResult(parsed);
|
|
657
|
+
return createJsonResult(parsed, { rawText: response.text, response });
|
|
529
658
|
}
|
|
530
|
-
return
|
|
659
|
+
return createTextBodyResult(response.text, response);
|
|
531
660
|
}
|
|
532
661
|
catch (error) {
|
|
533
662
|
return createErrorResult(`Error: ${getErrorMessage(error)}`);
|
|
@@ -554,95 +683,94 @@ server.tool("amazon_raw_html", "Get raw HTML from any Amazon URL with ZIP-code g
|
|
|
554
683
|
language: params.language,
|
|
555
684
|
timeout: params.timeout,
|
|
556
685
|
});
|
|
557
|
-
const
|
|
686
|
+
const response = await requestResponse({
|
|
558
687
|
method: "GET",
|
|
559
688
|
url: `${SCRAPE_API_BASE}/plugin/amazon/`,
|
|
560
689
|
params: requestParams,
|
|
561
690
|
timeout: params.timeout ?? 60000,
|
|
562
|
-
});
|
|
563
|
-
|
|
691
|
+
}, { acceptAnyStatus: true });
|
|
692
|
+
if (response.statusCode >= 400) {
|
|
693
|
+
return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
|
|
694
|
+
}
|
|
695
|
+
return createTextBodyResult(response.text, response);
|
|
564
696
|
}
|
|
565
697
|
catch (error) {
|
|
566
698
|
return createErrorResult(`Error: ${getErrorMessage(error)}`);
|
|
567
699
|
}
|
|
568
700
|
});
|
|
569
701
|
server.tool("async_create_job", "Create a Scrape.do Async API job for batch/background scraping.", {
|
|
570
|
-
targets: zod_1.z.array(zod_1.z.string().url()).
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
702
|
+
targets: zod_1.z.array(zod_1.z.string().url()).optional().describe("Alias for Targets."),
|
|
703
|
+
Targets: zod_1.z.array(zod_1.z.string().url()).optional().describe("Official Async API Targets field."),
|
|
704
|
+
method: asyncMethodSchema.optional().describe("Alias for Method."),
|
|
705
|
+
Method: asyncMethodSchema.optional().describe("Official Async API Method field."),
|
|
706
|
+
body: zod_1.z.string().optional().describe("Alias for Body."),
|
|
707
|
+
Body: zod_1.z.string().optional().describe("Official Async API Body field."),
|
|
708
|
+
geoCode: zod_1.z.string().optional().describe("Alias for GeoCode."),
|
|
709
|
+
GeoCode: zod_1.z.string().optional().describe("Official Async API GeoCode field."),
|
|
710
|
+
regionalGeoCode: zod_1.z.string().optional().describe("Alias for RegionalGeoCode."),
|
|
711
|
+
RegionalGeoCode: zod_1.z.string().optional().describe("Official Async API RegionalGeoCode field."),
|
|
712
|
+
super_proxy: zod_1.z.boolean().optional().describe("Alias for Super."),
|
|
713
|
+
super: zod_1.z.boolean().optional().describe("Alias for Super."),
|
|
714
|
+
Super: zod_1.z.boolean().optional().describe("Official Async API Super field."),
|
|
715
|
+
headers: headerRecordSchema.optional().describe("Alias for Headers."),
|
|
716
|
+
Headers: headerRecordSchema.optional().describe("Official Async API Headers field."),
|
|
717
|
+
forwardHeaders: zod_1.z.boolean().optional().describe("Alias for ForwardHeaders."),
|
|
718
|
+
ForwardHeaders: zod_1.z.boolean().optional().describe("Official Async API ForwardHeaders field."),
|
|
719
|
+
sessionId: zod_1.z.union([zod_1.z.number().int(), zod_1.z.string()]).optional().describe("Alias for SessionID."),
|
|
720
|
+
SessionID: zod_1.z.union([zod_1.z.number().int(), zod_1.z.string()]).optional().describe("Official Async API SessionID field."),
|
|
721
|
+
device: zod_1.z.enum(["desktop", "mobile", "tablet"]).optional().describe("Alias for Device."),
|
|
722
|
+
Device: zod_1.z.enum(["desktop", "mobile", "tablet"]).optional().describe("Official Async API Device field."),
|
|
723
|
+
setCookies: zod_1.z.string().optional().describe("Alias for SetCookies."),
|
|
724
|
+
SetCookies: zod_1.z.string().optional().describe("Official Async API SetCookies field."),
|
|
725
|
+
timeout: zod_1.z.number().int().positive().optional().describe("Alias for Timeout."),
|
|
726
|
+
Timeout: zod_1.z.number().int().positive().optional().describe("Official Async API Timeout field."),
|
|
727
|
+
retryTimeout: zod_1.z.number().int().positive().optional().describe("Alias for RetryTimeout."),
|
|
728
|
+
RetryTimeout: zod_1.z.number().int().positive().optional().describe("Official Async API RetryTimeout field."),
|
|
729
|
+
disableRetry: zod_1.z.boolean().optional().describe("Alias for DisableRetry."),
|
|
730
|
+
DisableRetry: zod_1.z.boolean().optional().describe("Official Async API DisableRetry field."),
|
|
731
|
+
transparentResponse: zod_1.z.boolean().optional().describe("Alias for TransparentResponse."),
|
|
732
|
+
TransparentResponse: zod_1.z.boolean().optional().describe("Official Async API TransparentResponse field."),
|
|
733
|
+
disableRedirection: zod_1.z.boolean().optional().describe("Alias for DisableRedirection."),
|
|
734
|
+
DisableRedirection: zod_1.z.boolean().optional().describe("Official Async API DisableRedirection field."),
|
|
735
|
+
output: zod_1.z.enum(["raw", "markdown"]).optional().describe("Alias for Output."),
|
|
736
|
+
Output: zod_1.z.enum(["raw", "markdown"]).optional().describe("Official Async API Output field."),
|
|
737
|
+
render: asyncRenderSchema.optional().describe("Alias for Render."),
|
|
738
|
+
Render: asyncRenderSchema.optional().describe("Official Async API Render field."),
|
|
739
|
+
webhookUrl: zod_1.z.string().url().optional().describe("Alias for WebhookURL."),
|
|
740
|
+
WebhookURL: zod_1.z.string().url().optional().describe("Official Async API WebhookURL field."),
|
|
741
|
+
webhookHeaders: headerRecordSchema.optional().describe("Alias for WebhookHeaders."),
|
|
742
|
+
WebhookHeaders: headerRecordSchema.optional().describe("Official Async API WebhookHeaders field."),
|
|
605
743
|
}, async (params) => {
|
|
606
744
|
try {
|
|
607
745
|
ensureToken();
|
|
608
|
-
const
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
WaitSelector: params.render.waitSelector,
|
|
614
|
-
PlayWithBrowser: params.render.playWithBrowser,
|
|
615
|
-
ReturnJSON: params.render.returnJSON,
|
|
616
|
-
ShowWebsocketRequests: params.render.showWebsocketRequests,
|
|
617
|
-
ShowFrames: params.render.showFrames,
|
|
618
|
-
Screenshot: params.render.screenshot,
|
|
619
|
-
FullScreenshot: params.render.fullScreenshot,
|
|
620
|
-
ParticularScreenshot: params.render.particularScreenshot,
|
|
621
|
-
})
|
|
622
|
-
: undefined;
|
|
746
|
+
const targets = params.Targets ?? params.targets;
|
|
747
|
+
if (!targets?.length) {
|
|
748
|
+
return createErrorResult("Error: targets or Targets is required.");
|
|
749
|
+
}
|
|
750
|
+
const render = normalizeAsyncRenderInput(params.Render ?? params.render);
|
|
623
751
|
const body = compactObject({
|
|
624
|
-
Targets:
|
|
625
|
-
Method: params.method,
|
|
626
|
-
Body: params.body,
|
|
627
|
-
GeoCode: params.geoCode,
|
|
628
|
-
RegionalGeoCode: params.regionalGeoCode,
|
|
629
|
-
Super: params.super_proxy,
|
|
630
|
-
Headers: normalizeHeaderRecord(params.headers),
|
|
631
|
-
ForwardHeaders: params.forwardHeaders,
|
|
632
|
-
SessionID: params.sessionId !== undefined ? String(params.sessionId) : undefined,
|
|
633
|
-
Device: params.device,
|
|
634
|
-
SetCookies: params.setCookies,
|
|
635
|
-
Timeout: params.timeout,
|
|
636
|
-
RetryTimeout: params.retryTimeout,
|
|
637
|
-
DisableRetry: params.disableRetry,
|
|
638
|
-
TransparentResponse: params.transparentResponse,
|
|
639
|
-
DisableRedirection: params.disableRedirection,
|
|
640
|
-
Output: params.output,
|
|
752
|
+
Targets: targets,
|
|
753
|
+
Method: params.Method ?? params.method ?? "GET",
|
|
754
|
+
Body: params.Body ?? params.body,
|
|
755
|
+
GeoCode: params.GeoCode ?? params.geoCode,
|
|
756
|
+
RegionalGeoCode: params.RegionalGeoCode ?? params.regionalGeoCode,
|
|
757
|
+
Super: params.Super ?? params.super ?? params.super_proxy,
|
|
758
|
+
Headers: normalizeHeaderRecord(params.Headers ?? params.headers),
|
|
759
|
+
ForwardHeaders: params.ForwardHeaders ?? params.forwardHeaders,
|
|
760
|
+
SessionID: params.SessionID !== undefined ? String(params.SessionID) : params.sessionId !== undefined ? String(params.sessionId) : undefined,
|
|
761
|
+
Device: params.Device ?? params.device,
|
|
762
|
+
SetCookies: params.SetCookies ?? params.setCookies,
|
|
763
|
+
Timeout: params.Timeout ?? params.timeout,
|
|
764
|
+
RetryTimeout: params.RetryTimeout ?? params.retryTimeout,
|
|
765
|
+
DisableRetry: params.DisableRetry ?? params.disableRetry,
|
|
766
|
+
TransparentResponse: params.TransparentResponse ?? params.transparentResponse,
|
|
767
|
+
DisableRedirection: params.DisableRedirection ?? params.disableRedirection,
|
|
768
|
+
Output: params.Output ?? params.output,
|
|
641
769
|
Render: render && Object.keys(render).length > 0 ? render : undefined,
|
|
642
|
-
WebhookURL: params.webhookUrl,
|
|
643
|
-
WebhookHeaders: normalizeHeaderRecord(params.webhookHeaders),
|
|
770
|
+
WebhookURL: params.WebhookURL ?? params.webhookUrl,
|
|
771
|
+
WebhookHeaders: normalizeHeaderRecord(params.WebhookHeaders ?? params.webhookHeaders),
|
|
644
772
|
});
|
|
645
|
-
const
|
|
773
|
+
const response = await requestResponse({
|
|
646
774
|
method: "POST",
|
|
647
775
|
url: `${ASYNC_API_BASE}/api/v1/jobs`,
|
|
648
776
|
headers: {
|
|
@@ -651,59 +779,80 @@ server.tool("async_create_job", "Create a Scrape.do Async API job for batch/back
|
|
|
651
779
|
},
|
|
652
780
|
data: body,
|
|
653
781
|
timeout: 60000,
|
|
654
|
-
});
|
|
655
|
-
|
|
782
|
+
}, { acceptAnyStatus: true });
|
|
783
|
+
if (response.statusCode >= 400) {
|
|
784
|
+
return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
|
|
785
|
+
}
|
|
786
|
+
const parsed = tryParseJson(response.text);
|
|
656
787
|
if (parsed !== undefined) {
|
|
657
|
-
return createJsonResult(parsed);
|
|
788
|
+
return createJsonResult(parsed, { rawText: response.text, response });
|
|
658
789
|
}
|
|
659
|
-
return
|
|
790
|
+
return createTextBodyResult(response.text, response);
|
|
660
791
|
}
|
|
661
792
|
catch (error) {
|
|
662
793
|
return createErrorResult(`Error: ${getErrorMessage(error)}`);
|
|
663
794
|
}
|
|
664
795
|
});
|
|
665
796
|
server.tool("async_get_job", "Get Scrape.do Async API job details by job ID.", {
|
|
666
|
-
jobId: zod_1.z.string().min(1).describe("
|
|
667
|
-
|
|
797
|
+
jobId: zod_1.z.string().min(1).optional().describe("Alias for jobID."),
|
|
798
|
+
jobID: zod_1.z.string().min(1).optional().describe("Official Async API jobID path parameter."),
|
|
799
|
+
}, async ({ jobId, jobID }) => {
|
|
668
800
|
try {
|
|
669
801
|
ensureToken();
|
|
670
|
-
const
|
|
802
|
+
const resolvedJobId = jobID ?? jobId;
|
|
803
|
+
if (!resolvedJobId) {
|
|
804
|
+
return createErrorResult("Error: jobId or jobID is required.");
|
|
805
|
+
}
|
|
806
|
+
const response = await requestResponse({
|
|
671
807
|
method: "GET",
|
|
672
|
-
url: `${ASYNC_API_BASE}/api/v1/jobs/${encodeURIComponent(
|
|
808
|
+
url: `${ASYNC_API_BASE}/api/v1/jobs/${encodeURIComponent(resolvedJobId)}`,
|
|
673
809
|
headers: {
|
|
674
810
|
"X-Token": SCRAPE_DO_TOKEN,
|
|
675
811
|
},
|
|
676
812
|
timeout: 60000,
|
|
677
|
-
});
|
|
678
|
-
|
|
813
|
+
}, { acceptAnyStatus: true });
|
|
814
|
+
if (response.statusCode >= 400) {
|
|
815
|
+
return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
|
|
816
|
+
}
|
|
817
|
+
const parsed = tryParseJson(response.text);
|
|
679
818
|
if (parsed !== undefined) {
|
|
680
|
-
return createJsonResult(parsed);
|
|
819
|
+
return createJsonResult(parsed, { rawText: response.text, response });
|
|
681
820
|
}
|
|
682
|
-
return
|
|
821
|
+
return createTextBodyResult(response.text, response);
|
|
683
822
|
}
|
|
684
823
|
catch (error) {
|
|
685
824
|
return createErrorResult(`Error: ${getErrorMessage(error)}`);
|
|
686
825
|
}
|
|
687
826
|
});
|
|
688
827
|
server.tool("async_get_task", "Get Scrape.do Async API task details by job ID and task ID.", {
|
|
689
|
-
jobId: zod_1.z.string().min(1).describe("
|
|
690
|
-
|
|
691
|
-
|
|
828
|
+
jobId: zod_1.z.string().min(1).optional().describe("Alias for jobID."),
|
|
829
|
+
jobID: zod_1.z.string().min(1).optional().describe("Official Async API jobID path parameter."),
|
|
830
|
+
taskId: zod_1.z.string().min(1).optional().describe("Alias for taskID."),
|
|
831
|
+
taskID: zod_1.z.string().min(1).optional().describe("Official Async API taskID path parameter."),
|
|
832
|
+
}, async ({ jobId, jobID, taskId, taskID }) => {
|
|
692
833
|
try {
|
|
693
834
|
ensureToken();
|
|
694
|
-
const
|
|
835
|
+
const resolvedJobId = jobID ?? jobId;
|
|
836
|
+
const resolvedTaskId = taskID ?? taskId;
|
|
837
|
+
if (!resolvedJobId || !resolvedTaskId) {
|
|
838
|
+
return createErrorResult("Error: jobId/jobID and taskId/taskID are required.");
|
|
839
|
+
}
|
|
840
|
+
const response = await requestResponse({
|
|
695
841
|
method: "GET",
|
|
696
|
-
url: `${ASYNC_API_BASE}/api/v1/jobs/${encodeURIComponent(
|
|
842
|
+
url: `${ASYNC_API_BASE}/api/v1/jobs/${encodeURIComponent(resolvedJobId)}/${encodeURIComponent(resolvedTaskId)}`,
|
|
697
843
|
headers: {
|
|
698
844
|
"X-Token": SCRAPE_DO_TOKEN,
|
|
699
845
|
},
|
|
700
846
|
timeout: 60000,
|
|
701
|
-
});
|
|
702
|
-
|
|
847
|
+
}, { acceptAnyStatus: true });
|
|
848
|
+
if (response.statusCode >= 400) {
|
|
849
|
+
return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
|
|
850
|
+
}
|
|
851
|
+
const parsed = tryParseJson(response.text);
|
|
703
852
|
if (parsed !== undefined) {
|
|
704
|
-
return createJsonResult(parsed);
|
|
853
|
+
return createJsonResult(parsed, { rawText: response.text, response });
|
|
705
854
|
}
|
|
706
|
-
return
|
|
855
|
+
return createTextBodyResult(response.text, response);
|
|
707
856
|
}
|
|
708
857
|
catch (error) {
|
|
709
858
|
return createErrorResult(`Error: ${getErrorMessage(error)}`);
|
|
@@ -712,49 +861,61 @@ server.tool("async_get_task", "Get Scrape.do Async API task details by job ID an
|
|
|
712
861
|
server.tool("async_list_jobs", "List Scrape.do Async API jobs with pagination.", {
|
|
713
862
|
page: zod_1.z.number().int().positive().optional().default(1).describe("Page number."),
|
|
714
863
|
pageSize: zod_1.z.number().int().positive().max(100).optional().default(10).describe("Items per page."),
|
|
715
|
-
|
|
864
|
+
page_size: zod_1.z.number().int().positive().max(100).optional().describe("Official Async API page_size query parameter."),
|
|
865
|
+
}, async ({ page, pageSize, page_size }) => {
|
|
716
866
|
try {
|
|
717
867
|
ensureToken();
|
|
718
|
-
const
|
|
868
|
+
const response = await requestResponse({
|
|
719
869
|
method: "GET",
|
|
720
870
|
url: `${ASYNC_API_BASE}/api/v1/jobs`,
|
|
721
871
|
params: {
|
|
722
872
|
page,
|
|
723
|
-
page_size: pageSize,
|
|
873
|
+
page_size: page_size ?? pageSize,
|
|
724
874
|
},
|
|
725
875
|
headers: {
|
|
726
876
|
"X-Token": SCRAPE_DO_TOKEN,
|
|
727
877
|
},
|
|
728
878
|
timeout: 60000,
|
|
729
|
-
});
|
|
730
|
-
|
|
879
|
+
}, { acceptAnyStatus: true });
|
|
880
|
+
if (response.statusCode >= 400) {
|
|
881
|
+
return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
|
|
882
|
+
}
|
|
883
|
+
const parsed = tryParseJson(response.text);
|
|
731
884
|
if (parsed !== undefined) {
|
|
732
|
-
return createJsonResult(parsed);
|
|
885
|
+
return createJsonResult(parsed, { rawText: response.text, response });
|
|
733
886
|
}
|
|
734
|
-
return
|
|
887
|
+
return createTextBodyResult(response.text, response);
|
|
735
888
|
}
|
|
736
889
|
catch (error) {
|
|
737
890
|
return createErrorResult(`Error: ${getErrorMessage(error)}`);
|
|
738
891
|
}
|
|
739
892
|
});
|
|
740
893
|
server.tool("async_cancel_job", "Cancel a Scrape.do Async API job.", {
|
|
741
|
-
jobId: zod_1.z.string().min(1).describe("
|
|
742
|
-
|
|
894
|
+
jobId: zod_1.z.string().min(1).optional().describe("Alias for jobID."),
|
|
895
|
+
jobID: zod_1.z.string().min(1).optional().describe("Official Async API jobID path parameter."),
|
|
896
|
+
}, async ({ jobId, jobID }) => {
|
|
743
897
|
try {
|
|
744
898
|
ensureToken();
|
|
745
|
-
const
|
|
899
|
+
const resolvedJobId = jobID ?? jobId;
|
|
900
|
+
if (!resolvedJobId) {
|
|
901
|
+
return createErrorResult("Error: jobId or jobID is required.");
|
|
902
|
+
}
|
|
903
|
+
const response = await requestResponse({
|
|
746
904
|
method: "DELETE",
|
|
747
|
-
url: `${ASYNC_API_BASE}/api/v1/jobs/${encodeURIComponent(
|
|
905
|
+
url: `${ASYNC_API_BASE}/api/v1/jobs/${encodeURIComponent(resolvedJobId)}`,
|
|
748
906
|
headers: {
|
|
749
907
|
"X-Token": SCRAPE_DO_TOKEN,
|
|
750
908
|
},
|
|
751
909
|
timeout: 60000,
|
|
752
|
-
});
|
|
753
|
-
|
|
910
|
+
}, { acceptAnyStatus: true });
|
|
911
|
+
if (response.statusCode >= 400) {
|
|
912
|
+
return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
|
|
913
|
+
}
|
|
914
|
+
const parsed = tryParseJson(response.text);
|
|
754
915
|
if (parsed !== undefined) {
|
|
755
|
-
return createJsonResult(parsed);
|
|
916
|
+
return createJsonResult(parsed, { rawText: response.text, response });
|
|
756
917
|
}
|
|
757
|
-
return
|
|
918
|
+
return createTextBodyResult(response.text, response);
|
|
758
919
|
}
|
|
759
920
|
catch (error) {
|
|
760
921
|
return createErrorResult(`Error: ${getErrorMessage(error)}`);
|
|
@@ -763,19 +924,22 @@ server.tool("async_cancel_job", "Cancel a Scrape.do Async API job.", {
|
|
|
763
924
|
server.tool("async_get_account", "Get Scrape.do Async API account/concurrency information.", {}, async () => {
|
|
764
925
|
try {
|
|
765
926
|
ensureToken();
|
|
766
|
-
const
|
|
927
|
+
const response = await requestResponse({
|
|
767
928
|
method: "GET",
|
|
768
929
|
url: `${ASYNC_API_BASE}/api/v1/me`,
|
|
769
930
|
headers: {
|
|
770
931
|
"X-Token": SCRAPE_DO_TOKEN,
|
|
771
932
|
},
|
|
772
933
|
timeout: 60000,
|
|
773
|
-
});
|
|
774
|
-
|
|
934
|
+
}, { acceptAnyStatus: true });
|
|
935
|
+
if (response.statusCode >= 400) {
|
|
936
|
+
return createErrorResult(`Error (${response.statusCode}): ${response.text}`);
|
|
937
|
+
}
|
|
938
|
+
const parsed = tryParseJson(response.text);
|
|
775
939
|
if (parsed !== undefined) {
|
|
776
|
-
return createJsonResult(parsed);
|
|
940
|
+
return createJsonResult(parsed, { rawText: response.text, response });
|
|
777
941
|
}
|
|
778
|
-
return
|
|
942
|
+
return createTextBodyResult(response.text, response);
|
|
779
943
|
}
|
|
780
944
|
catch (error) {
|
|
781
945
|
return createErrorResult(`Error: ${getErrorMessage(error)}`);
|
|
@@ -785,16 +949,17 @@ server.tool("proxy_mode_config", "Generate Scrape.do Proxy Mode configuration an
|
|
|
785
949
|
params: headerRecordSchema.optional().describe("Proxy mode query parameters to place into the password segment."),
|
|
786
950
|
}, async ({ params }) => {
|
|
787
951
|
try {
|
|
788
|
-
ensureToken();
|
|
789
952
|
const parameterString = buildProxyParameterString(params);
|
|
790
953
|
return createJsonResult({
|
|
791
954
|
protocol: "http or https",
|
|
792
955
|
host: "proxy.scrape.do",
|
|
793
956
|
port: 8080,
|
|
794
|
-
username: "<
|
|
957
|
+
username: "<YOUR_SCRAPE_DO_TOKEN>",
|
|
795
958
|
password: parameterString,
|
|
796
|
-
proxy_url_template: `http://<
|
|
959
|
+
proxy_url_template: `http://<YOUR_SCRAPE_DO_TOKEN>:${parameterString}@proxy.scrape.do:8080`,
|
|
797
960
|
ca_certificate_url: "https://scrape.do/scrapedo_ca.crt",
|
|
961
|
+
default_customHeaders: true,
|
|
962
|
+
disable_customHeaders_hint: "Append customHeaders=false to the password parameters if you need to disable the Proxy Mode default.",
|
|
798
963
|
});
|
|
799
964
|
}
|
|
800
965
|
catch (error) {
|