xcrawl-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/.editorconfig +12 -0
  2. package/.env.example +3 -0
  3. package/.prettierrc +6 -0
  4. package/README.md +244 -0
  5. package/claude.md +295 -0
  6. package/dist/core/crawl.d.ts +246 -0
  7. package/dist/core/crawl.d.ts.map +1 -0
  8. package/dist/core/crawl.js +141 -0
  9. package/dist/core/crawl.js.map +1 -0
  10. package/dist/core/map.d.ts +34 -0
  11. package/dist/core/map.d.ts.map +1 -0
  12. package/dist/core/map.js +50 -0
  13. package/dist/core/map.js.map +1 -0
  14. package/dist/core/scrape.d.ts +201 -0
  15. package/dist/core/scrape.d.ts.map +1 -0
  16. package/dist/core/scrape.js +148 -0
  17. package/dist/core/scrape.js.map +1 -0
  18. package/dist/core/search.d.ts +144 -0
  19. package/dist/core/search.d.ts.map +1 -0
  20. package/dist/core/search.js +75 -0
  21. package/dist/core/search.js.map +1 -0
  22. package/dist/index.d.ts +8 -0
  23. package/dist/index.d.ts.map +1 -0
  24. package/dist/index.js +516 -0
  25. package/dist/index.js.map +1 -0
  26. package/dist/stdio.d.ts +3 -0
  27. package/dist/stdio.d.ts.map +1 -0
  28. package/dist/stdio.js +551 -0
  29. package/dist/stdio.js.map +1 -0
  30. package/dist/tools.d.ts +540 -0
  31. package/dist/tools.d.ts.map +1 -0
  32. package/dist/tools.js +528 -0
  33. package/dist/tools.js.map +1 -0
  34. package/dist/types.d.ts +214 -0
  35. package/dist/types.d.ts.map +1 -0
  36. package/dist/types.js +5 -0
  37. package/dist/types.js.map +1 -0
  38. package/package.json +33 -0
  39. package/src/core/crawl.ts +149 -0
  40. package/src/core/map.ts +56 -0
  41. package/src/core/scrape.ts +156 -0
  42. package/src/core/search.ts +81 -0
  43. package/src/index.ts +565 -0
  44. package/src/stdio.ts +584 -0
  45. package/src/tools.ts +539 -0
  46. package/src/types.ts +221 -0
  47. package/tsconfig.build.json +14 -0
  48. package/tsconfig.json +45 -0
  49. package/vitest.config.mts +11 -0
  50. package/worker-configuration.d.ts +10848 -0
  51. package/wrangler.jsonc +26 -0
@@ -0,0 +1,81 @@
1
+ import { z } from "zod";
2
+ import type { XCrawlSearchRequest, XCrawlSearchResponse } from "../types.js";
3
+
4
+ /**
5
+ * Zod schema for xcrawl_search tool parameters
6
+ */
7
+ export const searchToolSchema = z.object({
8
+ query: z.string().min(1).describe("Search keywords"),
9
+ location: z.string().optional().describe("Search location (country/city name or ISO code)"),
10
+ language: z.string().optional().describe("Search language (ISO 639-1 code)"),
11
+ limit: z.number().int().min(1).max(100).optional().describe("Number of results to return (1-100)"),
12
+ serp_options: z
13
+ .object({
14
+ q: z.string().optional(),
15
+ location: z.string().optional(),
16
+ uule: z.string().optional(),
17
+ google_domain: z.string().optional(),
18
+ gl: z.string().optional(),
19
+ hl: z.string().optional(),
20
+ cr: z.string().optional(),
21
+ lr: z.string().optional(),
22
+ safe: z.number().int().optional(),
23
+ nfpr: z.boolean().optional(),
24
+ filter: z.boolean().optional(),
25
+ tbs: z.string().optional(),
26
+ start: z.number().int().optional(),
27
+ num: z.number().int().optional(),
28
+ ludocid: z.string().optional(),
29
+ lsig: z.string().optional(),
30
+ kgmid: z.string().optional(),
31
+ si: z.string().optional(),
32
+ ibp: z.string().optional(),
33
+ uds: z.string().optional(),
34
+ no_cache: z.boolean().optional(),
35
+ })
36
+ .optional()
37
+ .describe("Advanced Google SERP parameters"),
38
+ });
39
+
40
+ export type SearchToolParams = z.infer<typeof searchToolSchema>;
41
+
42
+ /**
43
+ * Call xCrawl Search API to get Google SERP results
44
+ */
45
+ export async function callXCrawlSearchAPI(apiKey: string, params: XCrawlSearchRequest): Promise<XCrawlSearchResponse> {
46
+ const controller = new AbortController();
47
+ const timeoutId = setTimeout(() => controller.abort(), 300000); // 300 seconds timeout
48
+
49
+ try {
50
+ const response = await fetch("https://run.xcrawl.com/v1/search", {
51
+ method: "POST",
52
+ headers: {
53
+ "Content-Type": "application/json",
54
+ Authorization: `Bearer ${apiKey}`,
55
+ },
56
+ body: JSON.stringify(params),
57
+ signal: controller.signal,
58
+ });
59
+
60
+ if (!response.ok) {
61
+ const errorText = await response.text();
62
+ throw new Error(`xCrawl Search API error: ${response.status} ${response.statusText} - ${errorText}`);
63
+ }
64
+
65
+ return (await response.json()) as XCrawlSearchResponse;
66
+ } catch (error) {
67
+ if (error instanceof Error && error.name === "AbortError") {
68
+ throw new Error("Request timeout after 300 seconds");
69
+ }
70
+ throw error;
71
+ } finally {
72
+ clearTimeout(timeoutId);
73
+ }
74
+ }
75
+
76
+ /**
77
+ * Format search response for MCP tool output
78
+ */
79
+ export function formatSearchResponse(response: XCrawlSearchResponse): string {
80
+ return JSON.stringify(response, null, 2);
81
+ }
package/src/index.ts ADDED
@@ -0,0 +1,565 @@
1
+ import { Server } from "@modelcontextprotocol/sdk/server/index.js";
2
+ import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js";
3
+ import { callXCrawlAPI, checkScrapeStatus, formatScrapeResponse, scrapeToolSchema } from "./core/scrape.js";
4
+ import { callXCrawlSearchAPI, formatSearchResponse, searchToolSchema } from "./core/search.js";
5
+ import { callXCrawlMapAPI, formatMapResponse, mapToolSchema } from "./core/map.js";
6
+ import { callXCrawlCrawlAPI, checkCrawlStatus, formatCrawlResponse, crawlToolSchema } from "./core/crawl.js";
7
+ import type { XCrawlScrapeRequest, XCrawlSearchRequest, XCrawlMapRequest, XCrawlCrawlRequest } from "./types.js";
8
+ import {
9
+ XCRAWL_SCRAPE_TOOL,
10
+ XCRAWL_CHECK_STATUS_TOOL,
11
+ XCRAWL_SEARCH_TOOL,
12
+ XCRAWL_MAP_TOOL,
13
+ XCRAWL_CRAWL_TOOL,
14
+ XCRAWL_CHECK_CRAWL_STATUS_TOOL,
15
+ } from "./tools.js";
16
+
17
+ /**
18
+ * Extract API key from request headers
19
+ */
20
+ function extractApiKey(request: Request): string | undefined {
21
+ const authHeader = request.headers.get("Authorization");
22
+ if (authHeader?.startsWith("Bearer ")) {
23
+ return authHeader.slice(7);
24
+ }
25
+ return request.headers.get("x-api-key") || request.headers.get("x-xcrawl-api-key") || undefined;
26
+ }
27
+
28
+ /**
29
+ * Create MCP server instance
30
+ */
31
+ function createMCPServer(apiKey: string): Server {
32
+ const server = new Server(
33
+ {
34
+ name: "xCrawl MCP Server",
35
+ version: "1.0.0",
36
+ },
37
+ {
38
+ capabilities: {
39
+ tools: {},
40
+ },
41
+ }
42
+ );
43
+
44
+ // Register tools/list handler
45
+ server.setRequestHandler(ListToolsRequestSchema, async () => ({
46
+ tools: [
47
+ XCRAWL_SCRAPE_TOOL,
48
+ XCRAWL_CHECK_STATUS_TOOL,
49
+ XCRAWL_SEARCH_TOOL,
50
+ XCRAWL_MAP_TOOL,
51
+ XCRAWL_CRAWL_TOOL,
52
+ XCRAWL_CHECK_CRAWL_STATUS_TOOL,
53
+ ],
54
+ }));
55
+
56
+ // Register tools/call handler
57
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
58
+ if (request.params.name === "xcrawl_scrape") {
59
+ try {
60
+ const validatedArgs = scrapeToolSchema.parse(request.params.arguments);
61
+ const response = await callXCrawlAPI(apiKey, validatedArgs as XCrawlScrapeRequest);
62
+
63
+ return {
64
+ content: [
65
+ {
66
+ type: "text" as const,
67
+ text: formatScrapeResponse(response),
68
+ },
69
+ ],
70
+ };
71
+ } catch (error) {
72
+ const errorMessage = error instanceof Error ? error.message : String(error);
73
+ return {
74
+ content: [
75
+ {
76
+ type: "text" as const,
77
+ text: `Error: ${errorMessage}`,
78
+ },
79
+ ],
80
+ isError: true,
81
+ };
82
+ }
83
+ }
84
+
85
+ if (request.params.name === "xcrawl_check_status") {
86
+ try {
87
+ const args = request.params.arguments as { scrape_id: string };
88
+
89
+ if (!args.scrape_id || typeof args.scrape_id !== "string") {
90
+ throw new Error("scrape_id is required and must be a string");
91
+ }
92
+
93
+ const response = await checkScrapeStatus(apiKey, args.scrape_id);
94
+
95
+ return {
96
+ content: [
97
+ {
98
+ type: "text" as const,
99
+ text: formatScrapeResponse(response),
100
+ },
101
+ ],
102
+ };
103
+ } catch (error) {
104
+ const errorMessage = error instanceof Error ? error.message : String(error);
105
+ return {
106
+ content: [
107
+ {
108
+ type: "text" as const,
109
+ text: `Error: ${errorMessage}`,
110
+ },
111
+ ],
112
+ isError: true,
113
+ };
114
+ }
115
+ }
116
+
117
+ if (request.params.name === "xcrawl_search") {
118
+ try {
119
+ const validatedArgs = searchToolSchema.parse(request.params.arguments);
120
+ const response = await callXCrawlSearchAPI(apiKey, validatedArgs as XCrawlSearchRequest);
121
+
122
+ return {
123
+ content: [
124
+ {
125
+ type: "text" as const,
126
+ text: formatSearchResponse(response),
127
+ },
128
+ ],
129
+ };
130
+ } catch (error) {
131
+ const errorMessage = error instanceof Error ? error.message : String(error);
132
+ return {
133
+ content: [
134
+ {
135
+ type: "text" as const,
136
+ text: `Error: ${errorMessage}`,
137
+ },
138
+ ],
139
+ isError: true,
140
+ };
141
+ }
142
+ }
143
+
144
+ if (request.params.name === "xcrawl_map") {
145
+ try {
146
+ const validatedArgs = mapToolSchema.parse(request.params.arguments);
147
+ const response = await callXCrawlMapAPI(apiKey, validatedArgs as XCrawlMapRequest);
148
+
149
+ return {
150
+ content: [
151
+ {
152
+ type: "text" as const,
153
+ text: formatMapResponse(response),
154
+ },
155
+ ],
156
+ };
157
+ } catch (error) {
158
+ const errorMessage = error instanceof Error ? error.message : String(error);
159
+ return {
160
+ content: [
161
+ {
162
+ type: "text" as const,
163
+ text: `Error: ${errorMessage}`,
164
+ },
165
+ ],
166
+ isError: true,
167
+ };
168
+ }
169
+ }
170
+
171
+ if (request.params.name === "xcrawl_crawl") {
172
+ try {
173
+ const validatedArgs = crawlToolSchema.parse(request.params.arguments);
174
+ const response = await callXCrawlCrawlAPI(apiKey, validatedArgs as XCrawlCrawlRequest);
175
+
176
+ return {
177
+ content: [
178
+ {
179
+ type: "text" as const,
180
+ text: formatCrawlResponse(response),
181
+ },
182
+ ],
183
+ };
184
+ } catch (error) {
185
+ const errorMessage = error instanceof Error ? error.message : String(error);
186
+ return {
187
+ content: [
188
+ {
189
+ type: "text" as const,
190
+ text: `Error: ${errorMessage}`,
191
+ },
192
+ ],
193
+ isError: true,
194
+ };
195
+ }
196
+ }
197
+
198
+ if (request.params.name === "xcrawl_check_crawl_status") {
199
+ try {
200
+ const args = request.params.arguments as { crawl_id: string };
201
+ if (!args.crawl_id || typeof args.crawl_id !== "string") {
202
+ throw new Error("crawl_id is required and must be a string");
203
+ }
204
+
205
+ const response = await checkCrawlStatus(apiKey, args.crawl_id);
206
+
207
+ return {
208
+ content: [
209
+ {
210
+ type: "text" as const,
211
+ text: formatCrawlResponse(response),
212
+ },
213
+ ],
214
+ };
215
+ } catch (error) {
216
+ const errorMessage = error instanceof Error ? error.message : String(error);
217
+ return {
218
+ content: [
219
+ {
220
+ type: "text" as const,
221
+ text: `Error: ${errorMessage}`,
222
+ },
223
+ ],
224
+ isError: true,
225
+ };
226
+ }
227
+ }
228
+
229
+ throw new Error(`Unknown tool: ${request.params.name}`);
230
+ });
231
+
232
+ return server;
233
+ }
234
+
235
+ /**
236
+ * Cloudflare Workers fetch handler
237
+ */
238
+ export default {
239
+ async fetch(request: Request, env: Record<string, string>, ctx: any): Promise<Response> {
240
+ const url = new URL(request.url);
241
+
242
+ // Health check endpoint
243
+ if (url.pathname === "/health") {
244
+ return new Response("OK", { status: 200 });
245
+ }
246
+
247
+ // Extract API key from request headers only
248
+ const apiKey = extractApiKey(request);
249
+ if (!apiKey) {
250
+ return new Response(
251
+ JSON.stringify({
252
+ error: "API key required",
253
+ message: "Please provide xCrawl API key via one of these headers:\n- Authorization: Bearer <your-api-key>\n- x-api-key: <your-api-key>\n- x-xcrawl-api-key: <your-api-key>",
254
+ }),
255
+ {
256
+ status: 401,
257
+ headers: { "Content-Type": "application/json" },
258
+ }
259
+ );
260
+ }
261
+
262
+ // SSE endpoint
263
+ if (url.pathname === "/sse" || url.pathname.startsWith("/sse/")) {
264
+ // SSE transport requires streaming support which is complex in Workers
265
+ // For now, return a message directing to use stdio mode
266
+ return new Response(
267
+ JSON.stringify({
268
+ message: "SSE endpoint not yet implemented in Workers mode. Please use stdio mode for local development.",
269
+ }),
270
+ {
271
+ status: 501,
272
+ headers: { "Content-Type": "application/json" },
273
+ }
274
+ );
275
+ }
276
+
277
+ // MCP endpoint (streamable HTTP / JSON-RPC)
278
+ if (url.pathname === "/mcp") {
279
+ try {
280
+ // Parse JSON-RPC request
281
+ const body = (await request.json()) as any;
282
+ const { jsonrpc, id, method, params } = body;
283
+
284
+ // Validate JSON-RPC format
285
+ if (jsonrpc !== "2.0") {
286
+ return new Response(
287
+ JSON.stringify({
288
+ jsonrpc: "2.0",
289
+ id: id || null,
290
+ error: { code: -32600, message: "Invalid Request - jsonrpc must be '2.0'" },
291
+ }),
292
+ {
293
+ status: 400,
294
+ headers: { "Content-Type": "application/json" },
295
+ }
296
+ );
297
+ }
298
+
299
+ let result: any;
300
+
301
+ // Handle initialize
302
+ if (method === "initialize") {
303
+ result = {
304
+ protocolVersion: "2024-11-05",
305
+ capabilities: {
306
+ tools: {},
307
+ },
308
+ serverInfo: {
309
+ name: "xCrawl MCP Server",
310
+ version: "1.0.0",
311
+ },
312
+ };
313
+ }
314
+ // Handle notifications/initialized (no response needed for notifications)
315
+ else if (method === "notifications/initialized") {
316
+ // This is a notification, return empty success response
317
+ return new Response(
318
+ JSON.stringify({
319
+ jsonrpc: "2.0",
320
+ id,
321
+ result: {},
322
+ }),
323
+ {
324
+ status: 200,
325
+ headers: { "Content-Type": "application/json" },
326
+ }
327
+ );
328
+ }
329
+ // Handle tools/list
330
+ else if (method === "tools/list") {
331
+ result = {
332
+ tools: [
333
+ XCRAWL_SCRAPE_TOOL,
334
+ XCRAWL_CHECK_STATUS_TOOL,
335
+ XCRAWL_SEARCH_TOOL,
336
+ XCRAWL_MAP_TOOL,
337
+ XCRAWL_CRAWL_TOOL,
338
+ XCRAWL_CHECK_CRAWL_STATUS_TOOL,
339
+ ],
340
+ };
341
+ }
342
+ // Handle tools/call
343
+ else if (method === "tools/call") {
344
+ const toolName = params?.name;
345
+ const toolArgs = params?.arguments || {};
346
+
347
+ if (toolName === "xcrawl_scrape") {
348
+ try {
349
+ const validatedArgs = scrapeToolSchema.parse(toolArgs);
350
+ const response = await callXCrawlAPI(apiKey, validatedArgs as XCrawlScrapeRequest);
351
+ result = {
352
+ content: [
353
+ {
354
+ type: "text",
355
+ text: formatScrapeResponse(response),
356
+ },
357
+ ],
358
+ };
359
+ } catch (error) {
360
+ const errorMessage = error instanceof Error ? error.message : String(error);
361
+ result = {
362
+ content: [
363
+ {
364
+ type: "text",
365
+ text: `Error: ${errorMessage}`,
366
+ },
367
+ ],
368
+ isError: true,
369
+ };
370
+ }
371
+ } else if (toolName === "xcrawl_check_status") {
372
+ try {
373
+ const scrapeId = toolArgs.scrape_id;
374
+ if (!scrapeId || typeof scrapeId !== "string") {
375
+ throw new Error("scrape_id is required and must be a string");
376
+ }
377
+ const response = await checkScrapeStatus(apiKey, scrapeId);
378
+ result = {
379
+ content: [
380
+ {
381
+ type: "text",
382
+ text: formatScrapeResponse(response),
383
+ },
384
+ ],
385
+ };
386
+ } catch (error) {
387
+ const errorMessage = error instanceof Error ? error.message : String(error);
388
+ result = {
389
+ content: [
390
+ {
391
+ type: "text",
392
+ text: `Error: ${errorMessage}`,
393
+ },
394
+ ],
395
+ isError: true,
396
+ };
397
+ }
398
+ } else if (toolName === "xcrawl_search") {
399
+ try {
400
+ const validatedArgs = searchToolSchema.parse(toolArgs);
401
+ const response = await callXCrawlSearchAPI(apiKey, validatedArgs as XCrawlSearchRequest);
402
+ result = {
403
+ content: [
404
+ {
405
+ type: "text",
406
+ text: formatSearchResponse(response),
407
+ },
408
+ ],
409
+ };
410
+ } catch (error) {
411
+ const errorMessage = error instanceof Error ? error.message : String(error);
412
+ result = {
413
+ content: [
414
+ {
415
+ type: "text",
416
+ text: `Error: ${errorMessage}`,
417
+ },
418
+ ],
419
+ isError: true,
420
+ };
421
+ }
422
+ } else if (toolName === "xcrawl_map") {
423
+ try {
424
+ const validatedArgs = mapToolSchema.parse(toolArgs);
425
+ const response = await callXCrawlMapAPI(apiKey, validatedArgs as XCrawlMapRequest);
426
+ result = {
427
+ content: [
428
+ {
429
+ type: "text",
430
+ text: formatMapResponse(response),
431
+ },
432
+ ],
433
+ };
434
+ } catch (error) {
435
+ const errorMessage = error instanceof Error ? error.message : String(error);
436
+ result = {
437
+ content: [
438
+ {
439
+ type: "text",
440
+ text: `Error: ${errorMessage}`,
441
+ },
442
+ ],
443
+ isError: true,
444
+ };
445
+ }
446
+ } else if (toolName === "xcrawl_crawl") {
447
+ try {
448
+ const validatedArgs = crawlToolSchema.parse(toolArgs);
449
+ const response = await callXCrawlCrawlAPI(apiKey, validatedArgs as XCrawlCrawlRequest);
450
+ result = {
451
+ content: [
452
+ {
453
+ type: "text",
454
+ text: formatCrawlResponse(response),
455
+ },
456
+ ],
457
+ };
458
+ } catch (error) {
459
+ const errorMessage = error instanceof Error ? error.message : String(error);
460
+ result = {
461
+ content: [
462
+ {
463
+ type: "text",
464
+ text: `Error: ${errorMessage}`,
465
+ },
466
+ ],
467
+ isError: true,
468
+ };
469
+ }
470
+ } else if (toolName === "xcrawl_check_crawl_status") {
471
+ try {
472
+ const crawlId = toolArgs.crawl_id;
473
+ if (!crawlId || typeof crawlId !== "string") {
474
+ throw new Error("crawl_id is required and must be a string");
475
+ }
476
+ const response = await checkCrawlStatus(apiKey, crawlId);
477
+ result = {
478
+ content: [
479
+ {
480
+ type: "text",
481
+ text: formatCrawlResponse(response),
482
+ },
483
+ ],
484
+ };
485
+ } catch (error) {
486
+ const errorMessage = error instanceof Error ? error.message : String(error);
487
+ result = {
488
+ content: [
489
+ {
490
+ type: "text",
491
+ text: `Error: ${errorMessage}`,
492
+ },
493
+ ],
494
+ isError: true,
495
+ };
496
+ }
497
+ } else {
498
+ return new Response(
499
+ JSON.stringify({
500
+ jsonrpc: "2.0",
501
+ id,
502
+ error: { code: -32601, message: `Unknown tool: ${toolName}` },
503
+ }),
504
+ {
505
+ status: 404,
506
+ headers: { "Content-Type": "application/json" },
507
+ }
508
+ );
509
+ }
510
+ }
511
+ // Method not found
512
+ else {
513
+ return new Response(
514
+ JSON.stringify({
515
+ jsonrpc: "2.0",
516
+ id,
517
+ error: { code: -32601, message: `Method not found: ${method}` },
518
+ }),
519
+ {
520
+ status: 404,
521
+ headers: { "Content-Type": "application/json" },
522
+ }
523
+ );
524
+ }
525
+
526
+ return new Response(
527
+ JSON.stringify({
528
+ jsonrpc: "2.0",
529
+ id,
530
+ result,
531
+ }),
532
+ {
533
+ status: 200,
534
+ headers: { "Content-Type": "application/json" },
535
+ }
536
+ );
537
+ } catch (error) {
538
+ const errorMessage = error instanceof Error ? error.message : String(error);
539
+ return new Response(
540
+ JSON.stringify({
541
+ jsonrpc: "2.0",
542
+ id: null,
543
+ error: { code: -32700, message: `Parse error: ${errorMessage}` },
544
+ }),
545
+ {
546
+ status: 400,
547
+ headers: { "Content-Type": "application/json" },
548
+ }
549
+ );
550
+ }
551
+ }
552
+
553
+ // Default 404 response
554
+ return new Response(
555
+ JSON.stringify({
556
+ error: "Not found",
557
+ message: "Available endpoints: /mcp, /sse, /health",
558
+ }),
559
+ {
560
+ status: 404,
561
+ headers: { "Content-Type": "application/json" },
562
+ }
563
+ );
564
+ },
565
+ };