xcrawl-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.editorconfig +12 -0
- package/.env.example +3 -0
- package/.prettierrc +6 -0
- package/README.md +244 -0
- package/claude.md +295 -0
- package/dist/core/crawl.d.ts +246 -0
- package/dist/core/crawl.d.ts.map +1 -0
- package/dist/core/crawl.js +141 -0
- package/dist/core/crawl.js.map +1 -0
- package/dist/core/map.d.ts +34 -0
- package/dist/core/map.d.ts.map +1 -0
- package/dist/core/map.js +50 -0
- package/dist/core/map.js.map +1 -0
- package/dist/core/scrape.d.ts +201 -0
- package/dist/core/scrape.d.ts.map +1 -0
- package/dist/core/scrape.js +148 -0
- package/dist/core/scrape.js.map +1 -0
- package/dist/core/search.d.ts +144 -0
- package/dist/core/search.d.ts.map +1 -0
- package/dist/core/search.js +75 -0
- package/dist/core/search.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +516 -0
- package/dist/index.js.map +1 -0
- package/dist/stdio.d.ts +3 -0
- package/dist/stdio.d.ts.map +1 -0
- package/dist/stdio.js +551 -0
- package/dist/stdio.js.map +1 -0
- package/dist/tools.d.ts +540 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +528 -0
- package/dist/tools.js.map +1 -0
- package/dist/types.d.ts +214 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +5 -0
- package/dist/types.js.map +1 -0
- package/package.json +33 -0
- package/src/core/crawl.ts +149 -0
- package/src/core/map.ts +56 -0
- package/src/core/scrape.ts +156 -0
- package/src/core/search.ts +81 -0
- package/src/index.ts +565 -0
- package/src/stdio.ts +584 -0
- package/src/tools.ts +539 -0
- package/src/types.ts +221 -0
- package/tsconfig.build.json +14 -0
- package/tsconfig.json +45 -0
- package/vitest.config.mts +11 -0
- package/worker-configuration.d.ts +10848 -0
- package/wrangler.jsonc +26 -0
package/dist/stdio.js
ADDED
|
@@ -0,0 +1,551 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
3
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
|
+
import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js";
|
|
5
|
+
import { callXCrawlAPI, checkScrapeStatus, formatScrapeResponse, scrapeToolSchema } from "./core/scrape.js";
|
|
6
|
+
import { callXCrawlSearchAPI, formatSearchResponse, searchToolSchema } from "./core/search.js";
|
|
7
|
+
import { callXCrawlMapAPI, formatMapResponse, mapToolSchema } from "./core/map.js";
|
|
8
|
+
import { callXCrawlCrawlAPI, checkCrawlStatus, formatCrawlResponse, crawlToolSchema } from "./core/crawl.js";
|
|
9
|
+
import { XCRAWL_SCRAPE_TOOL, XCRAWL_CHECK_STATUS_TOOL, XCRAWL_SEARCH_TOOL, XCRAWL_MAP_TOOL, XCRAWL_CRAWL_TOOL, XCRAWL_CHECK_CRAWL_STATUS_TOOL, } from "./tools.js";
|
|
10
|
+
// Get API key from environment variable
|
|
11
|
+
const API_KEY = process.env.XCRAWL_API_KEY;
|
|
12
|
+
if (!API_KEY) {
|
|
13
|
+
console.error("Error: XCRAWL_API_KEY environment variable is required");
|
|
14
|
+
console.error("Usage: XCRAWL_API_KEY=your-api-key xcrawl-mcp");
|
|
15
|
+
process.exit(1);
|
|
16
|
+
}
|
|
17
|
+
// Create MCP server
|
|
18
|
+
const server = new Server({
|
|
19
|
+
name: "xCrawl MCP Server",
|
|
20
|
+
version: "1.0.0",
|
|
21
|
+
}, {
|
|
22
|
+
capabilities: {
|
|
23
|
+
tools: {},
|
|
24
|
+
},
|
|
25
|
+
});
|
|
26
|
+
// Register tools/list handler
|
|
27
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
28
|
+
tools: [
|
|
29
|
+
XCRAWL_SCRAPE_TOOL,
|
|
30
|
+
XCRAWL_CHECK_STATUS_TOOL,
|
|
31
|
+
XCRAWL_SEARCH_TOOL,
|
|
32
|
+
XCRAWL_MAP_TOOL,
|
|
33
|
+
XCRAWL_CRAWL_TOOL,
|
|
34
|
+
XCRAWL_CHECK_CRAWL_STATUS_TOOL,
|
|
35
|
+
],
|
|
36
|
+
}));
|
|
37
|
+
// Keep old code for reference, but use shared tools above
|
|
38
|
+
/*
|
|
39
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
40
|
+
tools: [
|
|
41
|
+
{
|
|
42
|
+
name: "xcrawl_scrape",
|
|
43
|
+
description: `Scrape web pages using xCrawl API. Supports multiple output formats including markdown, HTML, screenshots, and AI-powered structured data extraction.
|
|
44
|
+
|
|
45
|
+
**CRITICAL - When presenting results to user:**
|
|
46
|
+
🔴 If response.data.screenshot exists: You MUST display the full screenshot URL to the user. Never skip this.
|
|
47
|
+
🔴 If response.data.json exists: Display the COMPLETE RAW JSON data to the user. Do NOT extract, summarize, or reformat it unless the user specifically asks to "extract information" or "summarize". Just show the JSON as-is.
|
|
48
|
+
🔴 If response.data.links exists: Show the complete links list to the user.
|
|
49
|
+
🔴 If response.data.html exists: Provide a brief summary only, never paste the full HTML.
|
|
50
|
+
|
|
51
|
+
**Best for:**
|
|
52
|
+
- Extracting content from web pages in various formats
|
|
53
|
+
- Converting web pages to markdown
|
|
54
|
+
- Taking screenshots of web pages
|
|
55
|
+
- Extracting structured data using AI with JSON schema
|
|
56
|
+
- Bypassing anti-bot measures with residential proxies
|
|
57
|
+
|
|
58
|
+
**Features:**
|
|
59
|
+
- Sync/async modes (use async for complex pages)
|
|
60
|
+
- Proxy support with location selection (US, SG, JP, GB, etc.)
|
|
61
|
+
- JavaScript rendering with configurable wait conditions
|
|
62
|
+
- Multiple output formats: html, raw_html, markdown, links, summary, screenshot, json
|
|
63
|
+
- AI-powered structured data extraction
|
|
64
|
+
|
|
65
|
+
**Usage Examples:**
|
|
66
|
+
|
|
67
|
+
1. Basic markdown extraction:
|
|
68
|
+
\`\`\`json
|
|
69
|
+
{ "url": "https://example.com", "output": { "formats": ["markdown"] } }
|
|
70
|
+
\`\`\`
|
|
71
|
+
|
|
72
|
+
2. Extract structured JSON data (DEFAULT - use prompt only):
|
|
73
|
+
\`\`\`json
|
|
74
|
+
{
|
|
75
|
+
"url": "https://github.com/trending",
|
|
76
|
+
"mode": "async",
|
|
77
|
+
"output": {
|
|
78
|
+
"formats": ["json"],
|
|
79
|
+
"json": {
|
|
80
|
+
"prompt": "Extract repository names, owners, languages, and star counts"
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
\`\`\`
|
|
85
|
+
CRITICAL STRUCTURE NOTES:
|
|
86
|
+
✅ CORRECT structure:
|
|
87
|
+
{
|
|
88
|
+
"url": "...",
|
|
89
|
+
"output": {
|
|
90
|
+
"formats": ["json"],
|
|
91
|
+
"json": { "prompt": "..." }
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
❌ WRONG - "json" at top level:
|
|
96
|
+
{
|
|
97
|
+
"url": "...",
|
|
98
|
+
"output": { "formats": ["json"] },
|
|
99
|
+
"json": { "prompt": "..." }
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
❌ WRONG - "output" as array:
|
|
103
|
+
{
|
|
104
|
+
"url": "...",
|
|
105
|
+
"output": [
|
|
106
|
+
{ "formats": ["json"] },
|
|
107
|
+
{ "json": { "prompt": "..." } }
|
|
108
|
+
]
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
**Important:** Always use ONLY the "prompt" field for JSON extraction. The AI-powered extraction engine automatically infers the structure from your prompt - no need for json_schema in most cases.
|
|
112
|
+
|
|
113
|
+
**Async Mode Workflow:**
|
|
114
|
+
When mode is "async", follow this workflow:
|
|
115
|
+
1. Call xcrawl_scrape with mode: "async"
|
|
116
|
+
2. Response will have status: "pending" and a scrape_id field
|
|
117
|
+
3. Extract the scrape_id from response.scrape_id (NOT response.data.scrape_id)
|
|
118
|
+
4. Wait 10-15 seconds for complex pages
|
|
119
|
+
5. Call xcrawl_check_status with the scrape_id you extracted
|
|
120
|
+
6. If status is "completed", data is ready in response.data
|
|
121
|
+
7. If status is "pending" or "crawling", wait and check again
|
|
122
|
+
|
|
123
|
+
Example async workflow:
|
|
124
|
+
Step 1: xcrawl_scrape returns { "scrape_id": "job_abc123", "status": "pending" }
|
|
125
|
+
Step 2: Extract "job_abc123" from response.scrape_id
|
|
126
|
+
Step 3: Call xcrawl_check_status({ "scrape_id": "job_abc123" })
|
|
127
|
+
|
|
128
|
+
**Response Format:**
|
|
129
|
+
The API returns a JSON object with this structure:
|
|
130
|
+
\`\`\`json
|
|
131
|
+
{
|
|
132
|
+
"scrape_id": "e9b3b340-8edf-4655-a99d-15edda9883a7",
|
|
133
|
+
"endpoint": "scrape",
|
|
134
|
+
"version": "ff3b3840061e8dc6",
|
|
135
|
+
"status": "completed",
|
|
136
|
+
"url": "https://example.com",
|
|
137
|
+
"data": {
|
|
138
|
+
"html": "cleaned HTML without scripts...",
|
|
139
|
+
"raw_html": "<!DOCTYPE html>... original HTML ...",
|
|
140
|
+
"markdown": "# Markdown content...",
|
|
141
|
+
"links": ["https://url1.com", "https://url2.com"],
|
|
142
|
+
"summary": "AI-generated summary...",
|
|
143
|
+
"screenshot": "https://web-scraper-prod.cos.ap-hongkong.myqcloud.com/.../screenshot.png",
|
|
144
|
+
"metadata": { "title": "Page Title", "status_code": 200, "content_type": "text/html" },
|
|
145
|
+
"json": { "extracted": "data" }
|
|
146
|
+
},
|
|
147
|
+
"started_at": "2025-12-24T02:40:54Z",
|
|
148
|
+
"ended_at": "2025-12-24T02:41:20Z"
|
|
149
|
+
}
|
|
150
|
+
\`\`\`
|
|
151
|
+
|
|
152
|
+
**Accessing response data:**
|
|
153
|
+
- Status: \`response.status\` (string: "completed" once finished)
|
|
154
|
+
- Markdown: \`response.data.markdown\` (string)
|
|
155
|
+
- HTML cleaned: \`response.data.html\` (string, without scripts)
|
|
156
|
+
- HTML original: \`response.data.raw_html\` (string, with all scripts and styles)
|
|
157
|
+
- Links: \`response.data.links\` (array of URL strings)
|
|
158
|
+
- Summary: \`response.data.summary\` (string, AI-generated)
|
|
159
|
+
- Screenshot URL: \`response.data.screenshot\` (string, direct download link)
|
|
160
|
+
- Metadata: \`response.data.metadata\` (object with title, status_code, etc.)
|
|
161
|
+
- JSON data: \`response.data.json\` (object or array, structured extraction result)
|
|
162
|
+
|
|
163
|
+
**IMPORTANT - How to present results to user:**
|
|
164
|
+
- If response contains \`screenshot\`: ALWAYS show the screenshot URL to the user. Do NOT just summarize - provide the actual URL link.
|
|
165
|
+
- If response contains \`json\`: Display the COMPLETE RAW JSON data. Do NOT extract info or summarize unless user explicitly asks for it.
|
|
166
|
+
- If response contains \`links\`: Show the complete list of links to the user.
|
|
167
|
+
- If response contains \`html\` or \`raw_html\`: Summarize the content, do NOT dump the entire HTML.
|
|
168
|
+
- If response contains \`markdown\`: Show key sections or summarize, unless user asks for full content.`,
|
|
169
|
+
inputSchema: {
|
|
170
|
+
type: "object",
|
|
171
|
+
properties: {
|
|
172
|
+
url: {
|
|
173
|
+
type: "string",
|
|
174
|
+
format: "uri",
|
|
175
|
+
description: "The URL to scrape",
|
|
176
|
+
},
|
|
177
|
+
mode: {
|
|
178
|
+
type: "string",
|
|
179
|
+
enum: ["sync", "async"],
|
|
180
|
+
default: "sync",
|
|
181
|
+
description: "Sync returns results immediately, async returns a scrape_id",
|
|
182
|
+
},
|
|
183
|
+
proxy: {
|
|
184
|
+
type: "object",
|
|
185
|
+
properties: {
|
|
186
|
+
location: {
|
|
187
|
+
type: "string",
|
|
188
|
+
description: "ISO-3166-1 alpha-2 country code (US, JP, SG, etc.)",
|
|
189
|
+
},
|
|
190
|
+
sticky_session: {
|
|
191
|
+
type: "string",
|
|
192
|
+
description: "Session ID to reuse the same proxy exit",
|
|
193
|
+
},
|
|
194
|
+
},
|
|
195
|
+
description: "Proxy configuration",
|
|
196
|
+
},
|
|
197
|
+
request: {
|
|
198
|
+
type: "object",
|
|
199
|
+
properties: {
|
|
200
|
+
locale: {
|
|
201
|
+
type: "string",
|
|
202
|
+
description: "Accept-Language header value",
|
|
203
|
+
},
|
|
204
|
+
device: {
|
|
205
|
+
type: "string",
|
|
206
|
+
enum: ["desktop", "mobile"],
|
|
207
|
+
description: "Device type for user agent and viewport",
|
|
208
|
+
},
|
|
209
|
+
cookies: {
|
|
210
|
+
type: "object",
|
|
211
|
+
additionalProperties: { type: "string" },
|
|
212
|
+
description: "Cookies to send with the request",
|
|
213
|
+
},
|
|
214
|
+
headers: {
|
|
215
|
+
type: "object",
|
|
216
|
+
additionalProperties: { type: "string" },
|
|
217
|
+
description: "Custom HTTP headers",
|
|
218
|
+
},
|
|
219
|
+
},
|
|
220
|
+
description: "Request configuration",
|
|
221
|
+
},
|
|
222
|
+
js_render: {
|
|
223
|
+
type: "object",
|
|
224
|
+
properties: {
|
|
225
|
+
enabled: {
|
|
226
|
+
type: "boolean",
|
|
227
|
+
default: true,
|
|
228
|
+
description: "Enable JavaScript rendering",
|
|
229
|
+
},
|
|
230
|
+
wait_until: {
|
|
231
|
+
type: "string",
|
|
232
|
+
enum: ["load", "domcontentloaded", "networkidle"],
|
|
233
|
+
description: "Wait condition for page load",
|
|
234
|
+
},
|
|
235
|
+
viewport: {
|
|
236
|
+
type: "object",
|
|
237
|
+
properties: {
|
|
238
|
+
width: { type: "number", description: "Viewport width" },
|
|
239
|
+
height: { type: "number", description: "Viewport height" },
|
|
240
|
+
},
|
|
241
|
+
description: "Viewport dimensions",
|
|
242
|
+
},
|
|
243
|
+
},
|
|
244
|
+
description: "JavaScript rendering configuration",
|
|
245
|
+
},
|
|
246
|
+
output: {
|
|
247
|
+
type: "object",
|
|
248
|
+
description: "Output configuration (IMPORTANT: this is an OBJECT, not an array)",
|
|
249
|
+
properties: {
|
|
250
|
+
formats: {
|
|
251
|
+
type: "array",
|
|
252
|
+
items: {
|
|
253
|
+
type: "string",
|
|
254
|
+
enum: ["html", "raw_html", "markdown", "links", "summary", "screenshot", "json"],
|
|
255
|
+
},
|
|
256
|
+
default: ["markdown"],
|
|
257
|
+
description: "Output formats: 'html' (cleaned HTML without scripts), 'raw_html' (original HTML with all scripts and styles), 'markdown' (Markdown format), 'links' (all page links), 'summary' (AI-generated summary), 'screenshot' (page screenshot), 'json' (structured data extraction)",
|
|
258
|
+
},
|
|
259
|
+
screenshot: {
|
|
260
|
+
type: "string",
|
|
261
|
+
enum: ["full_page", "viewport"],
|
|
262
|
+
description: "Screenshot type (only when 'screenshot' in formats)",
|
|
263
|
+
},
|
|
264
|
+
json: {
|
|
265
|
+
type: "object",
|
|
266
|
+
properties: {
|
|
267
|
+
prompt: {
|
|
268
|
+
type: "string",
|
|
269
|
+
description: "Natural language description of what data to extract. Example: 'Extract product names, prices, and ratings'. The AI engine automatically structures the output.",
|
|
270
|
+
},
|
|
271
|
+
json_schema: {
|
|
272
|
+
type: "object",
|
|
273
|
+
description: "Optional JSON Schema for strict output validation. Rarely needed - the prompt field is usually sufficient.",
|
|
274
|
+
additionalProperties: true,
|
|
275
|
+
},
|
|
276
|
+
},
|
|
277
|
+
description: "JSON extraction config. Use 'prompt' field to describe what to extract - json_schema is optional and rarely necessary.",
|
|
278
|
+
},
|
|
279
|
+
},
|
|
280
|
+
},
|
|
281
|
+
},
|
|
282
|
+
required: ["url"],
|
|
283
|
+
},
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
name: "xcrawl_check_status",
|
|
287
|
+
description: `Check the status and results of an async scrape task.
|
|
288
|
+
|
|
289
|
+
**CRITICAL - When presenting results to user:**
|
|
290
|
+
🔴 If response.data.screenshot exists: You MUST show the full URL to the user.
|
|
291
|
+
🔴 If response.data.json exists: Display the COMPLETE RAW JSON data to the user. Do NOT extract or summarize unless explicitly asked.
|
|
292
|
+
🔴 If response.data.links exists: Show the complete links list to the user.
|
|
293
|
+
|
|
294
|
+
**CRITICAL - How to get scrape_id:**
|
|
295
|
+
When you call xcrawl_scrape with mode: "async", the response contains:
|
|
296
|
+
{ "scrape_id": "job_abc123", "status": "pending", ... }
|
|
297
|
+
|
|
298
|
+
Extract the scrape_id from the TOP LEVEL of the response object (response.scrape_id).
|
|
299
|
+
DO NOT look for it in response.data.scrape_id - it's at response.scrape_id.
|
|
300
|
+
|
|
301
|
+
Then pass this scrape_id to xcrawl_check_status:
|
|
302
|
+
{ "scrape_id": "job_abc123" }
|
|
303
|
+
|
|
304
|
+
Use this tool when:
|
|
305
|
+
- You have a scrape_id from a previous async scrape request
|
|
306
|
+
- You want to check if the scraping task is completed
|
|
307
|
+
- You want to retrieve the results of a completed scrape
|
|
308
|
+
|
|
309
|
+
Parameters:
|
|
310
|
+
- scrape_id: The ID from response.scrape_id of the async xcrawl_scrape call
|
|
311
|
+
|
|
312
|
+
**Response Format:**
|
|
313
|
+
Returns the same structure as xcrawl_scrape:
|
|
314
|
+
\`\`\`json
|
|
315
|
+
{
|
|
316
|
+
"scrape_id": "c2a8adb8-b279-4cc5-adce-00fce9e48487",
|
|
317
|
+
"endpoint": "scrape",
|
|
318
|
+
"version": "ff3b3840061e8dc6",
|
|
319
|
+
"status": "completed",
|
|
320
|
+
"url": "https://example.com",
|
|
321
|
+
"data": {
|
|
322
|
+
"markdown": "content...",
|
|
323
|
+
"html": "cleaned HTML...",
|
|
324
|
+
"screenshot": "https://web-scraper-prod.cos.ap-hongkong.myqcloud.com/.../screenshot.png",
|
|
325
|
+
"metadata": { "title": "...", "status_code": 200 },
|
|
326
|
+
"json": { "extracted": "data" }
|
|
327
|
+
},
|
|
328
|
+
"started_at": "2025-12-24T02:40:54Z",
|
|
329
|
+
"ended_at": "2025-12-24T02:41:20Z"
|
|
330
|
+
}
|
|
331
|
+
\`\`\`
|
|
332
|
+
|
|
333
|
+
**Status values:**
|
|
334
|
+
- "pending": Task is still being processed
|
|
335
|
+
- "crawling": Task started and is still running
|
|
336
|
+
- "completed": Task finished successfully, data is available
|
|
337
|
+
- "failed": Task failed, check error message
|
|
338
|
+
|
|
339
|
+
**Accessing data:**
|
|
340
|
+
- Status: \`response.status\` ("pending", "crawling", "completed", or "failed")
|
|
341
|
+
- Screenshot URL: \`response.data.screenshot\` (direct download link)
|
|
342
|
+
- JSON data: \`response.data.json\` (extracted structured data)
|
|
343
|
+
- Metadata: \`response.data.metadata\` (page title, status code, etc.)
|
|
344
|
+
- Other formats: \`response.data.markdown\`, \`response.data.html\`, \`response.data.links\`, etc.
|
|
345
|
+
|
|
346
|
+
**IMPORTANT - How to present results:**
|
|
347
|
+
- ALWAYS show the screenshot URL if present in response.data.screenshot
|
|
348
|
+
- Display COMPLETE RAW JSON if present in response.data.json (do NOT extract or summarize)
|
|
349
|
+
- Show complete links list if present in response.data.links
|
|
350
|
+
- Summarize HTML/markdown content, don't dump raw text`,
|
|
351
|
+
inputSchema: {
|
|
352
|
+
type: "object",
|
|
353
|
+
properties: {
|
|
354
|
+
scrape_id: {
|
|
355
|
+
type: "string",
|
|
356
|
+
description: "The scrape ID returned from an async scrape request",
|
|
357
|
+
},
|
|
358
|
+
},
|
|
359
|
+
required: ["scrape_id"],
|
|
360
|
+
},
|
|
361
|
+
},
|
|
362
|
+
],
|
|
363
|
+
}));
|
|
364
|
+
*/
|
|
365
|
+
// Register tools/call handler
|
|
366
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
367
|
+
if (request.params.name === "xcrawl_scrape") {
|
|
368
|
+
try {
|
|
369
|
+
// Validate arguments using Zod schema
|
|
370
|
+
const validatedArgs = scrapeToolSchema.parse(request.params.arguments);
|
|
371
|
+
// Call xCrawl API
|
|
372
|
+
const response = await callXCrawlAPI(API_KEY, validatedArgs);
|
|
373
|
+
return {
|
|
374
|
+
content: [
|
|
375
|
+
{
|
|
376
|
+
type: "text",
|
|
377
|
+
text: formatScrapeResponse(response),
|
|
378
|
+
},
|
|
379
|
+
],
|
|
380
|
+
};
|
|
381
|
+
}
|
|
382
|
+
catch (error) {
|
|
383
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
384
|
+
return {
|
|
385
|
+
content: [
|
|
386
|
+
{
|
|
387
|
+
type: "text",
|
|
388
|
+
text: `Error: ${errorMessage}`,
|
|
389
|
+
},
|
|
390
|
+
],
|
|
391
|
+
isError: true,
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
if (request.params.name === "xcrawl_check_status") {
|
|
396
|
+
try {
|
|
397
|
+
const args = request.params.arguments;
|
|
398
|
+
if (!args.scrape_id || typeof args.scrape_id !== "string") {
|
|
399
|
+
throw new Error("scrape_id is required and must be a string");
|
|
400
|
+
}
|
|
401
|
+
// Check scrape status
|
|
402
|
+
const response = await checkScrapeStatus(API_KEY, args.scrape_id);
|
|
403
|
+
return {
|
|
404
|
+
content: [
|
|
405
|
+
{
|
|
406
|
+
type: "text",
|
|
407
|
+
text: formatScrapeResponse(response),
|
|
408
|
+
},
|
|
409
|
+
],
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
catch (error) {
|
|
413
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
414
|
+
return {
|
|
415
|
+
content: [
|
|
416
|
+
{
|
|
417
|
+
type: "text",
|
|
418
|
+
text: `Error: ${errorMessage}`,
|
|
419
|
+
},
|
|
420
|
+
],
|
|
421
|
+
isError: true,
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
if (request.params.name === "xcrawl_search") {
|
|
426
|
+
try {
|
|
427
|
+
// Validate arguments using Zod schema
|
|
428
|
+
const validatedArgs = searchToolSchema.parse(request.params.arguments);
|
|
429
|
+
// Call xCrawl Search API
|
|
430
|
+
const response = await callXCrawlSearchAPI(API_KEY, validatedArgs);
|
|
431
|
+
return {
|
|
432
|
+
content: [
|
|
433
|
+
{
|
|
434
|
+
type: "text",
|
|
435
|
+
text: formatSearchResponse(response),
|
|
436
|
+
},
|
|
437
|
+
],
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
catch (error) {
|
|
441
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
442
|
+
return {
|
|
443
|
+
content: [
|
|
444
|
+
{
|
|
445
|
+
type: "text",
|
|
446
|
+
text: `Error: ${errorMessage}`,
|
|
447
|
+
},
|
|
448
|
+
],
|
|
449
|
+
isError: true,
|
|
450
|
+
};
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
if (request.params.name === "xcrawl_map") {
|
|
454
|
+
try {
|
|
455
|
+
// Validate arguments using Zod schema
|
|
456
|
+
const validatedArgs = mapToolSchema.parse(request.params.arguments);
|
|
457
|
+
// Call xCrawl Map API
|
|
458
|
+
const response = await callXCrawlMapAPI(API_KEY, validatedArgs);
|
|
459
|
+
return {
|
|
460
|
+
content: [
|
|
461
|
+
{
|
|
462
|
+
type: "text",
|
|
463
|
+
text: formatMapResponse(response),
|
|
464
|
+
},
|
|
465
|
+
],
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
catch (error) {
|
|
469
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
470
|
+
return {
|
|
471
|
+
content: [
|
|
472
|
+
{
|
|
473
|
+
type: "text",
|
|
474
|
+
text: `Error: ${errorMessage}`,
|
|
475
|
+
},
|
|
476
|
+
],
|
|
477
|
+
isError: true,
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
if (request.params.name === "xcrawl_crawl") {
|
|
482
|
+
try {
|
|
483
|
+
// Validate arguments using Zod schema
|
|
484
|
+
const validatedArgs = crawlToolSchema.parse(request.params.arguments);
|
|
485
|
+
// Call xCrawl Crawl API
|
|
486
|
+
const response = await callXCrawlCrawlAPI(API_KEY, validatedArgs);
|
|
487
|
+
return {
|
|
488
|
+
content: [
|
|
489
|
+
{
|
|
490
|
+
type: "text",
|
|
491
|
+
text: formatCrawlResponse(response),
|
|
492
|
+
},
|
|
493
|
+
],
|
|
494
|
+
};
|
|
495
|
+
}
|
|
496
|
+
catch (error) {
|
|
497
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
498
|
+
return {
|
|
499
|
+
content: [
|
|
500
|
+
{
|
|
501
|
+
type: "text",
|
|
502
|
+
text: `Error: ${errorMessage}`,
|
|
503
|
+
},
|
|
504
|
+
],
|
|
505
|
+
isError: true,
|
|
506
|
+
};
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
if (request.params.name === "xcrawl_check_crawl_status") {
|
|
510
|
+
try {
|
|
511
|
+
const args = request.params.arguments;
|
|
512
|
+
if (!args.crawl_id || typeof args.crawl_id !== "string") {
|
|
513
|
+
throw new Error("crawl_id is required and must be a string");
|
|
514
|
+
}
|
|
515
|
+
// Check crawl status
|
|
516
|
+
const response = await checkCrawlStatus(API_KEY, args.crawl_id);
|
|
517
|
+
return {
|
|
518
|
+
content: [
|
|
519
|
+
{
|
|
520
|
+
type: "text",
|
|
521
|
+
text: formatCrawlResponse(response),
|
|
522
|
+
},
|
|
523
|
+
],
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
catch (error) {
|
|
527
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
528
|
+
return {
|
|
529
|
+
content: [
|
|
530
|
+
{
|
|
531
|
+
type: "text",
|
|
532
|
+
text: `Error: ${errorMessage}`,
|
|
533
|
+
},
|
|
534
|
+
],
|
|
535
|
+
isError: true,
|
|
536
|
+
};
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
throw new Error(`Unknown tool: ${request.params.name}`);
|
|
540
|
+
});
|
|
541
|
+
// Start server with stdio transport
|
|
542
|
+
async function main() {
|
|
543
|
+
const transport = new StdioServerTransport();
|
|
544
|
+
await server.connect(transport);
|
|
545
|
+
console.error("xCrawl MCP Server running on stdio");
|
|
546
|
+
}
|
|
547
|
+
main().catch((error) => {
|
|
548
|
+
console.error("Fatal error:", error);
|
|
549
|
+
process.exit(1);
|
|
550
|
+
});
|
|
551
|
+
//# sourceMappingURL=stdio.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stdio.js","sourceRoot":"","sources":["../src/stdio.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AACnG,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAC5G,OAAO,EAAE,mBAAmB,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAC/F,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AACnF,OAAO,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAG7G,OAAO,EACN,kBAAkB,EAClB,wBAAwB,EACxB,kBAAkB,EAClB,eAAe,EACf,iBAAiB,EACjB,8BAA8B,GAC9B,MAAM,YAAY,CAAC;AAEpB,wCAAwC;AACxC,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;AAC3C,IAAI,CAAC,OAAO,EAAE,CAAC;IACd,OAAO,CAAC,KAAK,CAAC,wDAAwD,CAAC,CAAC;IACxE,OAAO,CAAC,KAAK,CAAC,+CAA+C,CAAC,CAAC;IAC/D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,CAAC;AAED,oBAAoB;AACpB,MAAM,MAAM,GAAG,IAAI,MAAM,CACxB;IACC,IAAI,EAAE,mBAAmB;IACzB,OAAO,EAAE,OAAO;CAChB,EACD;IACC,YAAY,EAAE;QACb,KAAK,EAAE,EAAE;KACT;CACD,CACD,CAAC;AAEF,8BAA8B;AAC9B,MAAM,CAAC,iBAAiB,CAAC,sBAAsB,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC;IAC7D,KAAK,EAAE;QACN,kBAAkB;QAClB,wBAAwB;QACxB,kBAAkB;QAClB,eAAe;QACf,iBAAiB;QACjB,8BAA8B;KAC9B;CACD,CAAC,CAAC,CAAC;AAEJ,0DAA0D;AAC1D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAsUE;AAEF,8BAA8B;AAC9B,MAAM,CAAC,iBAAiB,CAAC,qBAAqB,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;IACjE,IAAI,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;QAC7C,IAAI,CAAC;YACJ,sCAAsC;YACtC,MAAM,aAAa,GAAG,gBAAgB,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEvE,kBAAkB;YAClB,MAAM,QAAQ,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,aAAoC,CAAC,CAAC;YAEpF,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,oBAAoB,CAAC,QAAQ,CAAC;qBACpC;iBACD;aACD,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5E,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,UAAU,YAAY,EAAE;qBAC9B;iBACD;gBACD,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;IACF,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,qBAAqB,EAAE,CAAC;QACnD,IAAI,CAAC;YACJ,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,SAAkC,CAAC;YAE/D,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,OAAO,IAAI,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;gBAC3D,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;YAC/D,CAAC;YAED,sBAAsB;YACtB,MAAM,QAAQ,GAAG,MAAM,iBAAiB,CAAC,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;YAElE,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,oBAAoB,CAAC,QAAQ,CAAC;qBACpC;iBACD;aACD,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5E,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,UAAU,YAAY,EAAE;qBAC9B;iBACD;gBACD,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;IACF,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;QAC7C,IAAI,CAAC;YACJ,sCAAsC;YACtC,MAAM,aAAa,GAAG,gBAAgB,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEvE,yBAAyB;YACzB,MAAM,QAAQ,GAAG,MAAM,mBAAmB,CAAC,OAAO,EAAE,aAAoC,CAAC,CAAC;YAE1F,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,oBAAoB,CAAC,QAAQ,CAAC;qBACpC;iBACD;aACD,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5E,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,UAAU,YAAY,EAAE;qBAC9B;iBACD;gBACD,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;IACF,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;QAC1C,IAAI,CAAC;YACJ,sCAAsC;YACtC,MAAM,aAAa,GAAG,aAAa,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEpE,sBAAsB;YACtB,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,OAAO,EAAE,aAAiC,CAAC,CAAC;YAEpF,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,iBAAiB,CAAC,QAAQ,CAAC;qBACjC;iBACD;aACD,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5E,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,UAAU,YAAY,EAAE;qBAC9B;iBACD;gBACD,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;IACF,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;QAC5C,IAAI,CAAC;YACJ,sCAAsC;YACtC,MAAM,aAAa,GAAG,eAAe,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEtE,wBAAwB;YACxB,MAAM,QAAQ,GAAG,MAAM,kBAAkB,CAAC,OAAO,EAAE,aAAmC,CAAC,CAAC;YAExF,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,mBAAmB,CAAC,QAAQ,CAAC;qBACnC;iBACD;aACD,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5E,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,UAAU,YAAY,EAAE;qBAC9B;iBACD;gBACD,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;IACF,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,CAAC,IAAI,KAAK,2BAA2B,EAAE,CAAC;QACzD,IAAI,CAAC;YACJ,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,SAAiC,CAAC;YAE9D,IAAI,CAAC,IAAI,CAAC,QAAQ,IAAI,OAAO,IAAI,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;gBACzD,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;YAC9D,CAAC;YAED,qBAAqB;YACrB,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;YAEhE,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,mBAAmB,CAAC,QAAQ,CAAC;qBACnC;iBACD;aACD,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC5E,OAAO;gBACN,OAAO,EAAE;oBACR;wBACC,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,UAAU,YAAY,EAAE;qBAC9B;iBACD;gBACD,OAAO,EAAE,IAAI;aACb,CAAC;QACH,CAAC;IACF,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,iBAAiB,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;AACzD,CAAC,CAAC,CAAC;AAEH,oCAAoC;AACpC,KAAK,UAAU,IAAI;IAClB,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAC;AACrD,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACtB,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;IACrC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,CAAC,CAAC,CAAC"}
|