firecrawl-mcp 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -95,6 +95,16 @@ Add this to your `./codeium/windsurf/model_config.json`:
95
95
  }
96
96
  ```
97
97
 
98
+ ### Running with SSE Local Mode
99
+
100
+ To run the server using Server-Sent Events (SSE) locally instead of the default stdio transport:
101
+
102
+ ```bash
103
+ env SSE_LOCAL=true FIRECRAWL_API_KEY=fc-YOUR_API_KEY npx -y firecrawl-mcp
104
+ ```
105
+
106
+ Use the url: http://localhost:3000/sse
107
+
98
108
  ### Installing via Smithery (Legacy)
99
109
 
100
110
  To install Firecrawl for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@mendableai/mcp-server-firecrawl):
package/dist/index.js CHANGED
@@ -1021,6 +1021,18 @@ async function runSSELocalServer() {
1021
1021
  transport.handlePostMessage(req, res);
1022
1022
  }
1023
1023
  });
1024
+ const PORT = process.env.PORT || 3000;
1025
+ console.log('Starting server on port', PORT);
1026
+ try {
1027
+ app.listen(PORT, () => {
1028
+ console.log(`MCP SSE Server listening on http://localhost:${PORT}`);
1029
+ console.log(`SSE endpoint: http://localhost:${PORT}/sse`);
1030
+ console.log(`Message endpoint: http://localhost:${PORT}/messages`);
1031
+ });
1032
+ }
1033
+ catch (error) {
1034
+ console.error('Error starting server:', error);
1035
+ }
1024
1036
  }
1025
1037
  async function runSSECloudServer() {
1026
1038
  const transports = {};
@@ -0,0 +1,58 @@
1
+ import { jest } from '@jest/globals';
2
+ // Set test timeout
3
+ jest.setTimeout(30000);
4
+ // Create mock responses
5
+ const mockSearchResponse = {
6
+ success: true,
7
+ data: [
8
+ {
9
+ url: 'https://example.com',
10
+ title: 'Test Page',
11
+ description: 'Test Description',
12
+ markdown: '# Test Content',
13
+ actions: null,
14
+ },
15
+ ],
16
+ };
17
+ const mockBatchScrapeResponse = {
18
+ success: true,
19
+ id: 'test-batch-id',
20
+ };
21
+ const mockBatchStatusResponse = {
22
+ success: true,
23
+ status: 'completed',
24
+ completed: 1,
25
+ total: 1,
26
+ creditsUsed: 1,
27
+ expiresAt: new Date(),
28
+ data: [
29
+ {
30
+ url: 'https://example.com',
31
+ title: 'Test Page',
32
+ description: 'Test Description',
33
+ markdown: '# Test Content',
34
+ actions: null,
35
+ },
36
+ ],
37
+ };
38
+ // Create mock instance methods
39
+ const mockSearch = jest.fn().mockImplementation(async () => mockSearchResponse);
40
+ const mockAsyncBatchScrapeUrls = jest
41
+ .fn()
42
+ .mockImplementation(async () => mockBatchScrapeResponse);
43
+ const mockCheckBatchScrapeStatus = jest
44
+ .fn()
45
+ .mockImplementation(async () => mockBatchStatusResponse);
46
+ // Create mock instance
47
+ const mockInstance = {
48
+ apiKey: 'test-api-key',
49
+ apiUrl: 'test-api-url',
50
+ search: mockSearch,
51
+ asyncBatchScrapeUrls: mockAsyncBatchScrapeUrls,
52
+ checkBatchScrapeStatus: mockCheckBatchScrapeStatus,
53
+ };
54
+ // Mock the module
55
+ jest.mock('@mendable/firecrawl-js', () => ({
56
+ __esModule: true,
57
+ default: jest.fn().mockImplementation(() => mockInstance),
58
+ }));
@@ -0,0 +1,1053 @@
1
+ #!/usr/bin/env node
2
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
3
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
+ import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
5
+ import FirecrawlApp from '@mendable/firecrawl-js';
6
+ import PQueue from 'p-queue';
7
+ import dotenv from 'dotenv';
8
+ dotenv.config();
9
+ // Tool definitions
10
+ const SCRAPE_TOOL = {
11
+ name: 'firecrawl_scrape',
12
+ description: 'Scrape a single webpage with advanced options for content extraction. ' +
13
+ 'Supports various formats including markdown, HTML, and screenshots. ' +
14
+ 'Can execute custom actions like clicking or scrolling before scraping.',
15
+ inputSchema: {
16
+ type: 'object',
17
+ properties: {
18
+ url: {
19
+ type: 'string',
20
+ description: 'The URL to scrape',
21
+ },
22
+ formats: {
23
+ type: 'array',
24
+ items: {
25
+ type: 'string',
26
+ enum: [
27
+ 'markdown',
28
+ 'html',
29
+ 'rawHtml',
30
+ 'screenshot',
31
+ 'links',
32
+ 'screenshot@fullPage',
33
+ 'extract',
34
+ ],
35
+ },
36
+ description: "Content formats to extract (default: ['markdown'])",
37
+ },
38
+ onlyMainContent: {
39
+ type: 'boolean',
40
+ description: 'Extract only the main content, filtering out navigation, footers, etc.',
41
+ },
42
+ includeTags: {
43
+ type: 'array',
44
+ items: { type: 'string' },
45
+ description: 'HTML tags to specifically include in extraction',
46
+ },
47
+ excludeTags: {
48
+ type: 'array',
49
+ items: { type: 'string' },
50
+ description: 'HTML tags to exclude from extraction',
51
+ },
52
+ waitFor: {
53
+ type: 'number',
54
+ description: 'Time in milliseconds to wait for dynamic content to load',
55
+ },
56
+ timeout: {
57
+ type: 'number',
58
+ description: 'Maximum time in milliseconds to wait for the page to load',
59
+ },
60
+ actions: {
61
+ type: 'array',
62
+ items: {
63
+ type: 'object',
64
+ properties: {
65
+ type: {
66
+ type: 'string',
67
+ enum: [
68
+ 'wait',
69
+ 'click',
70
+ 'screenshot',
71
+ 'write',
72
+ 'press',
73
+ 'scroll',
74
+ 'scrape',
75
+ 'executeJavascript',
76
+ ],
77
+ description: 'Type of action to perform',
78
+ },
79
+ selector: {
80
+ type: 'string',
81
+ description: 'CSS selector for the target element',
82
+ },
83
+ milliseconds: {
84
+ type: 'number',
85
+ description: 'Time to wait in milliseconds (for wait action)',
86
+ },
87
+ text: {
88
+ type: 'string',
89
+ description: 'Text to write (for write action)',
90
+ },
91
+ key: {
92
+ type: 'string',
93
+ description: 'Key to press (for press action)',
94
+ },
95
+ direction: {
96
+ type: 'string',
97
+ enum: ['up', 'down'],
98
+ description: 'Scroll direction',
99
+ },
100
+ script: {
101
+ type: 'string',
102
+ description: 'JavaScript code to execute',
103
+ },
104
+ fullPage: {
105
+ type: 'boolean',
106
+ description: 'Take full page screenshot',
107
+ },
108
+ },
109
+ required: ['type'],
110
+ },
111
+ description: 'List of actions to perform before scraping',
112
+ },
113
+ extract: {
114
+ type: 'object',
115
+ properties: {
116
+ schema: {
117
+ type: 'object',
118
+ description: 'Schema for structured data extraction',
119
+ },
120
+ systemPrompt: {
121
+ type: 'string',
122
+ description: 'System prompt for LLM extraction',
123
+ },
124
+ prompt: {
125
+ type: 'string',
126
+ description: 'User prompt for LLM extraction',
127
+ },
128
+ },
129
+ description: 'Configuration for structured data extraction',
130
+ },
131
+ mobile: {
132
+ type: 'boolean',
133
+ description: 'Use mobile viewport',
134
+ },
135
+ skipTlsVerification: {
136
+ type: 'boolean',
137
+ description: 'Skip TLS certificate verification',
138
+ },
139
+ removeBase64Images: {
140
+ type: 'boolean',
141
+ description: 'Remove base64 encoded images from output',
142
+ },
143
+ location: {
144
+ type: 'object',
145
+ properties: {
146
+ country: {
147
+ type: 'string',
148
+ description: 'Country code for geolocation',
149
+ },
150
+ languages: {
151
+ type: 'array',
152
+ items: { type: 'string' },
153
+ description: 'Language codes for content',
154
+ },
155
+ },
156
+ description: 'Location settings for scraping',
157
+ },
158
+ },
159
+ required: ['url'],
160
+ },
161
+ };
162
+ const MAP_TOOL = {
163
+ name: 'firecrawl_map',
164
+ description: 'Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.',
165
+ inputSchema: {
166
+ type: 'object',
167
+ properties: {
168
+ url: {
169
+ type: 'string',
170
+ description: 'Starting URL for URL discovery',
171
+ },
172
+ search: {
173
+ type: 'string',
174
+ description: 'Optional search term to filter URLs',
175
+ },
176
+ ignoreSitemap: {
177
+ type: 'boolean',
178
+ description: 'Skip sitemap.xml discovery and only use HTML links',
179
+ },
180
+ sitemapOnly: {
181
+ type: 'boolean',
182
+ description: 'Only use sitemap.xml for discovery, ignore HTML links',
183
+ },
184
+ includeSubdomains: {
185
+ type: 'boolean',
186
+ description: 'Include URLs from subdomains in results',
187
+ },
188
+ limit: {
189
+ type: 'number',
190
+ description: 'Maximum number of URLs to return',
191
+ },
192
+ },
193
+ required: ['url'],
194
+ },
195
+ };
196
+ const CRAWL_TOOL = {
197
+ name: 'firecrawl_crawl',
198
+ description: 'Start an asynchronous crawl of multiple pages from a starting URL. ' +
199
+ 'Supports depth control, path filtering, and webhook notifications.',
200
+ inputSchema: {
201
+ type: 'object',
202
+ properties: {
203
+ url: {
204
+ type: 'string',
205
+ description: 'Starting URL for the crawl',
206
+ },
207
+ excludePaths: {
208
+ type: 'array',
209
+ items: { type: 'string' },
210
+ description: 'URL paths to exclude from crawling',
211
+ },
212
+ includePaths: {
213
+ type: 'array',
214
+ items: { type: 'string' },
215
+ description: 'Only crawl these URL paths',
216
+ },
217
+ maxDepth: {
218
+ type: 'number',
219
+ description: 'Maximum link depth to crawl',
220
+ },
221
+ ignoreSitemap: {
222
+ type: 'boolean',
223
+ description: 'Skip sitemap.xml discovery',
224
+ },
225
+ limit: {
226
+ type: 'number',
227
+ description: 'Maximum number of pages to crawl',
228
+ },
229
+ allowBackwardLinks: {
230
+ type: 'boolean',
231
+ description: 'Allow crawling links that point to parent directories',
232
+ },
233
+ allowExternalLinks: {
234
+ type: 'boolean',
235
+ description: 'Allow crawling links to external domains',
236
+ },
237
+ webhook: {
238
+ oneOf: [
239
+ {
240
+ type: 'string',
241
+ description: 'Webhook URL to notify when crawl is complete',
242
+ },
243
+ {
244
+ type: 'object',
245
+ properties: {
246
+ url: {
247
+ type: 'string',
248
+ description: 'Webhook URL',
249
+ },
250
+ headers: {
251
+ type: 'object',
252
+ description: 'Custom headers for webhook requests',
253
+ },
254
+ },
255
+ required: ['url'],
256
+ },
257
+ ],
258
+ },
259
+ deduplicateSimilarURLs: {
260
+ type: 'boolean',
261
+ description: 'Remove similar URLs during crawl',
262
+ },
263
+ ignoreQueryParameters: {
264
+ type: 'boolean',
265
+ description: 'Ignore query parameters when comparing URLs',
266
+ },
267
+ scrapeOptions: {
268
+ type: 'object',
269
+ properties: {
270
+ formats: {
271
+ type: 'array',
272
+ items: {
273
+ type: 'string',
274
+ enum: [
275
+ 'markdown',
276
+ 'html',
277
+ 'rawHtml',
278
+ 'screenshot',
279
+ 'links',
280
+ 'screenshot@fullPage',
281
+ 'extract',
282
+ ],
283
+ },
284
+ },
285
+ onlyMainContent: {
286
+ type: 'boolean',
287
+ },
288
+ includeTags: {
289
+ type: 'array',
290
+ items: { type: 'string' },
291
+ },
292
+ excludeTags: {
293
+ type: 'array',
294
+ items: { type: 'string' },
295
+ },
296
+ waitFor: {
297
+ type: 'number',
298
+ },
299
+ },
300
+ description: 'Options for scraping each page',
301
+ },
302
+ },
303
+ required: ['url'],
304
+ },
305
+ };
306
+ const BATCH_SCRAPE_TOOL = {
307
+ name: 'firecrawl_batch_scrape',
308
+ description: 'Scrape multiple URLs in batch mode. Returns a job ID that can be used to check status.',
309
+ inputSchema: {
310
+ type: 'object',
311
+ properties: {
312
+ urls: {
313
+ type: 'array',
314
+ items: { type: 'string' },
315
+ description: 'List of URLs to scrape',
316
+ },
317
+ options: {
318
+ type: 'object',
319
+ properties: {
320
+ formats: {
321
+ type: 'array',
322
+ items: {
323
+ type: 'string',
324
+ enum: [
325
+ 'markdown',
326
+ 'html',
327
+ 'rawHtml',
328
+ 'screenshot',
329
+ 'links',
330
+ 'screenshot@fullPage',
331
+ 'extract',
332
+ ],
333
+ },
334
+ },
335
+ onlyMainContent: {
336
+ type: 'boolean',
337
+ },
338
+ includeTags: {
339
+ type: 'array',
340
+ items: { type: 'string' },
341
+ },
342
+ excludeTags: {
343
+ type: 'array',
344
+ items: { type: 'string' },
345
+ },
346
+ waitFor: {
347
+ type: 'number',
348
+ },
349
+ },
350
+ },
351
+ },
352
+ required: ['urls'],
353
+ },
354
+ };
355
+ const CHECK_BATCH_STATUS_TOOL = {
356
+ name: 'firecrawl_check_batch_status',
357
+ description: 'Check the status of a batch scraping job.',
358
+ inputSchema: {
359
+ type: 'object',
360
+ properties: {
361
+ id: {
362
+ type: 'string',
363
+ description: 'Batch job ID to check',
364
+ },
365
+ },
366
+ required: ['id'],
367
+ },
368
+ };
369
+ const CHECK_CRAWL_STATUS_TOOL = {
370
+ name: 'firecrawl_check_crawl_status',
371
+ description: 'Check the status of a crawl job.',
372
+ inputSchema: {
373
+ type: 'object',
374
+ properties: {
375
+ id: {
376
+ type: 'string',
377
+ description: 'Crawl job ID to check',
378
+ },
379
+ },
380
+ required: ['id'],
381
+ },
382
+ };
383
+ const SEARCH_TOOL = {
384
+ name: 'firecrawl_search',
385
+ description: 'Search and retrieve content from web pages with optional scraping. ' +
386
+ 'Returns SERP results by default (url, title, description) or full page content when scrapeOptions are provided.',
387
+ inputSchema: {
388
+ type: 'object',
389
+ properties: {
390
+ query: {
391
+ type: 'string',
392
+ description: 'Search query string',
393
+ },
394
+ limit: {
395
+ type: 'number',
396
+ description: 'Maximum number of results to return (default: 5)',
397
+ },
398
+ lang: {
399
+ type: 'string',
400
+ description: 'Language code for search results (default: en)',
401
+ },
402
+ country: {
403
+ type: 'string',
404
+ description: 'Country code for search results (default: us)',
405
+ },
406
+ tbs: {
407
+ type: 'string',
408
+ description: 'Time-based search filter',
409
+ },
410
+ filter: {
411
+ type: 'string',
412
+ description: 'Search filter',
413
+ },
414
+ location: {
415
+ type: 'object',
416
+ properties: {
417
+ country: {
418
+ type: 'string',
419
+ description: 'Country code for geolocation',
420
+ },
421
+ languages: {
422
+ type: 'array',
423
+ items: { type: 'string' },
424
+ description: 'Language codes for content',
425
+ },
426
+ },
427
+ description: 'Location settings for search',
428
+ },
429
+ scrapeOptions: {
430
+ type: 'object',
431
+ properties: {
432
+ formats: {
433
+ type: 'array',
434
+ items: {
435
+ type: 'string',
436
+ enum: ['markdown', 'html', 'rawHtml'],
437
+ },
438
+ description: 'Content formats to extract from search results',
439
+ },
440
+ onlyMainContent: {
441
+ type: 'boolean',
442
+ description: 'Extract only the main content from results',
443
+ },
444
+ waitFor: {
445
+ type: 'number',
446
+ description: 'Time in milliseconds to wait for dynamic content',
447
+ },
448
+ },
449
+ description: 'Options for scraping search results',
450
+ },
451
+ },
452
+ required: ['query'],
453
+ },
454
+ };
455
+ const EXTRACT_TOOL = {
456
+ name: 'firecrawl_extract',
457
+ description: 'Extract structured information from web pages using LLM. ' +
458
+ 'Supports both cloud AI and self-hosted LLM extraction.',
459
+ inputSchema: {
460
+ type: 'object',
461
+ properties: {
462
+ urls: {
463
+ type: 'array',
464
+ items: { type: 'string' },
465
+ description: 'List of URLs to extract information from',
466
+ },
467
+ prompt: {
468
+ type: 'string',
469
+ description: 'Prompt for the LLM extraction',
470
+ },
471
+ systemPrompt: {
472
+ type: 'string',
473
+ description: 'System prompt for LLM extraction',
474
+ },
475
+ schema: {
476
+ type: 'object',
477
+ description: 'JSON schema for structured data extraction',
478
+ },
479
+ allowExternalLinks: {
480
+ type: 'boolean',
481
+ description: 'Allow extraction from external links',
482
+ },
483
+ enableWebSearch: {
484
+ type: 'boolean',
485
+ description: 'Enable web search for additional context',
486
+ },
487
+ includeSubdomains: {
488
+ type: 'boolean',
489
+ description: 'Include subdomains in extraction',
490
+ },
491
+ },
492
+ required: ['urls'],
493
+ },
494
+ };
495
+ // Type guards
496
+ function isScrapeOptions(args) {
497
+ return (typeof args === 'object' &&
498
+ args !== null &&
499
+ 'url' in args &&
500
+ typeof args.url === 'string');
501
+ }
502
+ function isMapOptions(args) {
503
+ return (typeof args === 'object' &&
504
+ args !== null &&
505
+ 'url' in args &&
506
+ typeof args.url === 'string');
507
+ }
508
+ function isCrawlOptions(args) {
509
+ return (typeof args === 'object' &&
510
+ args !== null &&
511
+ 'url' in args &&
512
+ typeof args.url === 'string');
513
+ }
514
+ function isBatchScrapeOptions(args) {
515
+ return (typeof args === 'object' &&
516
+ args !== null &&
517
+ 'urls' in args &&
518
+ Array.isArray(args.urls) &&
519
+ args.urls.every((url) => typeof url === 'string'));
520
+ }
521
+ function isStatusCheckOptions(args) {
522
+ return (typeof args === 'object' &&
523
+ args !== null &&
524
+ 'id' in args &&
525
+ typeof args.id === 'string');
526
+ }
527
+ function isSearchOptions(args) {
528
+ return (typeof args === 'object' &&
529
+ args !== null &&
530
+ 'query' in args &&
531
+ typeof args.query === 'string');
532
+ }
533
+ function isExtractOptions(args) {
534
+ if (typeof args !== 'object' || args === null)
535
+ return false;
536
+ const { urls } = args;
537
+ return (Array.isArray(urls) &&
538
+ urls.every((url) => typeof url === 'string'));
539
+ }
540
+ // Server implementation
541
+ const server = new Server({
542
+ name: 'firecrawl-mcp',
543
+ version: '1.3.2',
544
+ }, {
545
+ capabilities: {
546
+ tools: {},
547
+ logging: {},
548
+ },
549
+ });
550
+ // Get optional API URL
551
+ const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL;
552
+ const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
553
+ // Check if API key is required (only for cloud service)
554
+ if (!FIRECRAWL_API_URL && !FIRECRAWL_API_KEY) {
555
+ console.error('Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service');
556
+ process.exit(1);
557
+ }
558
+ // Initialize FireCrawl client with optional API URL
559
+ const client = new FirecrawlApp({
560
+ apiKey: FIRECRAWL_API_KEY || '',
561
+ ...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
562
+ });
563
+ // Configuration for retries and monitoring
564
+ const CONFIG = {
565
+ retry: {
566
+ maxAttempts: Number(process.env.FIRECRAWL_RETRY_MAX_ATTEMPTS) || 3,
567
+ initialDelay: Number(process.env.FIRECRAWL_RETRY_INITIAL_DELAY) || 1000,
568
+ maxDelay: Number(process.env.FIRECRAWL_RETRY_MAX_DELAY) || 10000,
569
+ backoffFactor: Number(process.env.FIRECRAWL_RETRY_BACKOFF_FACTOR) || 2,
570
+ },
571
+ credit: {
572
+ warningThreshold: Number(process.env.FIRECRAWL_CREDIT_WARNING_THRESHOLD) || 1000,
573
+ criticalThreshold: Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100,
574
+ },
575
+ };
576
+ const creditUsage = {
577
+ total: 0,
578
+ lastCheck: Date.now(),
579
+ };
580
+ // Add utility function for delay
581
+ function delay(ms) {
582
+ return new Promise((resolve) => setTimeout(resolve, ms));
583
+ }
584
+ // Add retry logic with exponential backoff
585
+ async function withRetry(operation, context, attempt = 1) {
586
+ try {
587
+ return await operation();
588
+ }
589
+ catch (error) {
590
+ const isRateLimit = error instanceof Error &&
591
+ (error.message.includes('rate limit') || error.message.includes('429'));
592
+ if (isRateLimit && attempt < CONFIG.retry.maxAttempts) {
593
+ const delayMs = Math.min(CONFIG.retry.initialDelay *
594
+ Math.pow(CONFIG.retry.backoffFactor, attempt - 1), CONFIG.retry.maxDelay);
595
+ server.sendLoggingMessage({
596
+ level: 'warning',
597
+ data: `Rate limit hit for ${context}. Attempt ${attempt}/${CONFIG.retry.maxAttempts}. Retrying in ${delayMs}ms`,
598
+ });
599
+ await delay(delayMs);
600
+ return withRetry(operation, context, attempt + 1);
601
+ }
602
+ throw error;
603
+ }
604
+ }
605
+ // Add credit monitoring
606
+ async function updateCreditUsage(creditsUsed) {
607
+ creditUsage.total += creditsUsed;
608
+ // Log credit usage
609
+ server.sendLoggingMessage({
610
+ level: 'info',
611
+ data: `Credit usage: ${creditUsage.total} credits used total`,
612
+ });
613
+ // Check thresholds
614
+ if (creditUsage.total >= CONFIG.credit.criticalThreshold) {
615
+ server.sendLoggingMessage({
616
+ level: 'error',
617
+ data: `CRITICAL: Credit usage has reached ${creditUsage.total}`,
618
+ });
619
+ }
620
+ else if (creditUsage.total >= CONFIG.credit.warningThreshold) {
621
+ server.sendLoggingMessage({
622
+ level: 'warning',
623
+ data: `WARNING: Credit usage has reached ${creditUsage.total}`,
624
+ });
625
+ }
626
+ }
627
+ // Initialize queue system
628
+ const batchQueue = new PQueue({ concurrency: 1 });
629
+ const batchOperations = new Map();
630
+ let operationCounter = 0;
631
+ async function processBatchOperation(operation) {
632
+ try {
633
+ operation.status = 'processing';
634
+ let totalCreditsUsed = 0;
635
+ // Use library's built-in batch processing
636
+ const response = await withRetry(async () => client.asyncBatchScrapeUrls(operation.urls, operation.options), `batch ${operation.id} processing`);
637
+ if (!response.success) {
638
+ throw new Error(response.error || 'Batch operation failed');
639
+ }
640
+ // Track credits if using cloud API
641
+ if (!FIRECRAWL_API_URL && hasCredits(response)) {
642
+ totalCreditsUsed += response.creditsUsed;
643
+ await updateCreditUsage(response.creditsUsed);
644
+ }
645
+ operation.status = 'completed';
646
+ operation.result = response;
647
+ // Log final credit usage for the batch
648
+ if (!FIRECRAWL_API_URL) {
649
+ server.sendLoggingMessage({
650
+ level: 'info',
651
+ data: `Batch ${operation.id} completed. Total credits used: ${totalCreditsUsed}`,
652
+ });
653
+ }
654
+ }
655
+ catch (error) {
656
+ operation.status = 'failed';
657
+ operation.error = error instanceof Error ? error.message : String(error);
658
+ server.sendLoggingMessage({
659
+ level: 'error',
660
+ data: `Batch ${operation.id} failed: ${operation.error}`,
661
+ });
662
+ }
663
+ }
664
+ // Tool handlers
665
+ server.setRequestHandler(ListToolsRequestSchema, async () => ({
666
+ tools: [
667
+ SCRAPE_TOOL,
668
+ MAP_TOOL,
669
+ CRAWL_TOOL,
670
+ BATCH_SCRAPE_TOOL,
671
+ CHECK_BATCH_STATUS_TOOL,
672
+ CHECK_CRAWL_STATUS_TOOL,
673
+ SEARCH_TOOL,
674
+ EXTRACT_TOOL,
675
+ ],
676
+ }));
677
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
678
+ const startTime = Date.now();
679
+ try {
680
+ const { name, arguments: args } = request.params;
681
+ // Log incoming request with timestamp
682
+ server.sendLoggingMessage({
683
+ level: 'info',
684
+ data: `[${new Date().toISOString()}] Received request for tool: ${name}`,
685
+ });
686
+ if (!args) {
687
+ throw new Error('No arguments provided');
688
+ }
689
+ switch (name) {
690
+ case 'firecrawl_scrape': {
691
+ if (!isScrapeOptions(args)) {
692
+ throw new Error('Invalid arguments for firecrawl_scrape');
693
+ }
694
+ const { url, ...options } = args;
695
+ try {
696
+ const scrapeStartTime = Date.now();
697
+ server.sendLoggingMessage({
698
+ level: 'info',
699
+ data: `Starting scrape for URL: ${url} with options: ${JSON.stringify(options)}`,
700
+ });
701
+ const response = await client.scrapeUrl(url, options);
702
+ // Log performance metrics
703
+ server.sendLoggingMessage({
704
+ level: 'info',
705
+ data: `Scrape completed in ${Date.now() - scrapeStartTime}ms`,
706
+ });
707
+ if ('success' in response && !response.success) {
708
+ throw new Error(response.error || 'Scraping failed');
709
+ }
710
+ const content = 'markdown' in response
711
+ ? response.markdown || response.html || response.rawHtml
712
+ : null;
713
+ return {
714
+ content: [
715
+ { type: 'text', text: content || 'No content available' },
716
+ ],
717
+ isError: false,
718
+ };
719
+ }
720
+ catch (error) {
721
+ const errorMessage = error instanceof Error ? error.message : String(error);
722
+ return {
723
+ content: [{ type: 'text', text: errorMessage }],
724
+ isError: true,
725
+ };
726
+ }
727
+ }
728
+ case 'firecrawl_map': {
729
+ if (!isMapOptions(args)) {
730
+ throw new Error('Invalid arguments for firecrawl_map');
731
+ }
732
+ const { url, ...options } = args;
733
+ const response = await client.mapUrl(url, options);
734
+ if ('error' in response) {
735
+ throw new Error(response.error);
736
+ }
737
+ if (!response.links) {
738
+ throw new Error('No links received from FireCrawl API');
739
+ }
740
+ return {
741
+ content: [{ type: 'text', text: response.links.join('\n') }],
742
+ isError: false,
743
+ };
744
+ }
745
+ case 'firecrawl_batch_scrape': {
746
+ if (!isBatchScrapeOptions(args)) {
747
+ throw new Error('Invalid arguments for firecrawl_batch_scrape');
748
+ }
749
+ try {
750
+ const operationId = `batch_${++operationCounter}`;
751
+ const operation = {
752
+ id: operationId,
753
+ urls: args.urls,
754
+ options: args.options,
755
+ status: 'pending',
756
+ progress: {
757
+ completed: 0,
758
+ total: args.urls.length,
759
+ },
760
+ };
761
+ batchOperations.set(operationId, operation);
762
+ // Queue the operation
763
+ batchQueue.add(() => processBatchOperation(operation));
764
+ server.sendLoggingMessage({
765
+ level: 'info',
766
+ data: `Queued batch operation ${operationId} with ${args.urls.length} URLs`,
767
+ });
768
+ return {
769
+ content: [
770
+ {
771
+ type: 'text',
772
+ text: `Batch operation queued with ID: ${operationId}. Use firecrawl_check_batch_status to check progress.`,
773
+ },
774
+ ],
775
+ isError: false,
776
+ };
777
+ }
778
+ catch (error) {
779
+ const errorMessage = error instanceof Error
780
+ ? error.message
781
+ : `Batch operation failed: ${JSON.stringify(error)}`;
782
+ return {
783
+ content: [{ type: 'text', text: errorMessage }],
784
+ isError: true,
785
+ };
786
+ }
787
+ }
788
+ case 'firecrawl_check_batch_status': {
789
+ if (!isStatusCheckOptions(args)) {
790
+ throw new Error('Invalid arguments for firecrawl_check_batch_status');
791
+ }
792
+ const operation = batchOperations.get(args.id);
793
+ if (!operation) {
794
+ return {
795
+ content: [
796
+ {
797
+ type: 'text',
798
+ text: `No batch operation found with ID: ${args.id}`,
799
+ },
800
+ ],
801
+ isError: true,
802
+ };
803
+ }
804
+ const status = `Batch Status:
805
+ Status: ${operation.status}
806
+ Progress: ${operation.progress.completed}/${operation.progress.total}
807
+ ${operation.error ? `Error: ${operation.error}` : ''}
808
+ ${operation.result
809
+ ? `Results: ${JSON.stringify(operation.result, null, 2)}`
810
+ : ''}`;
811
+ return {
812
+ content: [{ type: 'text', text: status }],
813
+ isError: false,
814
+ };
815
+ }
816
+ case 'firecrawl_crawl': {
817
+ if (!isCrawlOptions(args)) {
818
+ throw new Error('Invalid arguments for firecrawl_crawl');
819
+ }
820
+ const { url, ...options } = args;
821
+ const response = await withRetry(async () => client.asyncCrawlUrl(url, options), 'crawl operation');
822
+ if (!response.success) {
823
+ throw new Error(response.error);
824
+ }
825
+ // Monitor credits for cloud API
826
+ if (!FIRECRAWL_API_URL && hasCredits(response)) {
827
+ await updateCreditUsage(response.creditsUsed);
828
+ }
829
+ return {
830
+ content: [
831
+ {
832
+ type: 'text',
833
+ text: `Started crawl for ${url} with job ID: ${response.id}`,
834
+ },
835
+ ],
836
+ isError: false,
837
+ };
838
+ }
839
+ case 'firecrawl_check_crawl_status': {
840
+ if (!isStatusCheckOptions(args)) {
841
+ throw new Error('Invalid arguments for firecrawl_check_crawl_status');
842
+ }
843
+ const response = await client.checkCrawlStatus(args.id);
844
+ if (!response.success) {
845
+ throw new Error(response.error);
846
+ }
847
+ const status = `Crawl Status:
848
+ Status: ${response.status}
849
+ Progress: ${response.completed}/${response.total}
850
+ Credits Used: ${response.creditsUsed}
851
+ Expires At: ${response.expiresAt}
852
+ ${response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''}`;
853
+ return {
854
+ content: [{ type: 'text', text: status }],
855
+ isError: false,
856
+ };
857
+ }
858
+ case 'firecrawl_search': {
859
+ if (!isSearchOptions(args)) {
860
+ throw new Error('Invalid arguments for firecrawl_search');
861
+ }
862
+ try {
863
+ const response = await withRetry(async () => client.search(args.query, args), 'search operation');
864
+ if (!response.success) {
865
+ throw new Error(`Search failed: ${response.error || 'Unknown error'}`);
866
+ }
867
+ // Monitor credits for cloud API
868
+ if (!FIRECRAWL_API_URL && hasCredits(response)) {
869
+ await updateCreditUsage(response.creditsUsed);
870
+ }
871
+ // Format the results
872
+ const results = response.data
873
+ .map((result) => `URL: ${result.url}
874
+ Title: ${result.title || 'No title'}
875
+ Description: ${result.description || 'No description'}
876
+ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
877
+ .join('\n\n');
878
+ return {
879
+ content: [{ type: 'text', text: results }],
880
+ isError: false,
881
+ };
882
+ }
883
+ catch (error) {
884
+ const errorMessage = error instanceof Error
885
+ ? error.message
886
+ : `Search failed: ${JSON.stringify(error)}`;
887
+ return {
888
+ content: [{ type: 'text', text: errorMessage }],
889
+ isError: true,
890
+ };
891
+ }
892
+ }
893
+ case 'firecrawl_extract': {
894
+ if (!isExtractOptions(args)) {
895
+ throw new Error('Invalid arguments for firecrawl_extract');
896
+ }
897
+ try {
898
+ const extractStartTime = Date.now();
899
+ server.sendLoggingMessage({
900
+ level: 'info',
901
+ data: `Starting extraction for URLs: ${args.urls.join(', ')}`,
902
+ });
903
+ // Log if using self-hosted instance
904
+ if (FIRECRAWL_API_URL) {
905
+ server.sendLoggingMessage({
906
+ level: 'info',
907
+ data: 'Using self-hosted instance for extraction',
908
+ });
909
+ }
910
+ const extractResponse = await withRetry(async () => client.extract(args.urls, {
911
+ prompt: args.prompt,
912
+ systemPrompt: args.systemPrompt,
913
+ schema: args.schema,
914
+ allowExternalLinks: args.allowExternalLinks,
915
+ enableWebSearch: args.enableWebSearch,
916
+ includeSubdomains: args.includeSubdomains,
917
+ origin: 'mcp-server',
918
+ }), 'extract operation');
919
+ // Type guard for successful response
920
+ if (!('success' in extractResponse) || !extractResponse.success) {
921
+ throw new Error(extractResponse.error || 'Extraction failed');
922
+ }
923
+ const response = extractResponse;
924
+ // Monitor credits for cloud API
925
+ if (!FIRECRAWL_API_URL && hasCredits(response)) {
926
+ await updateCreditUsage(response.creditsUsed || 0);
927
+ }
928
+ // Log performance metrics
929
+ server.sendLoggingMessage({
930
+ level: 'info',
931
+ data: `Extraction completed in ${Date.now() - extractStartTime}ms`,
932
+ });
933
+ // Add warning to response if present
934
+ const result = {
935
+ content: [
936
+ {
937
+ type: 'text',
938
+ text: JSON.stringify(response.data, null, 2),
939
+ },
940
+ ],
941
+ isError: false,
942
+ };
943
+ if (response.warning) {
944
+ server.sendLoggingMessage({
945
+ level: 'warning',
946
+ data: response.warning,
947
+ });
948
+ }
949
+ return result;
950
+ }
951
+ catch (error) {
952
+ const errorMessage = error instanceof Error ? error.message : String(error);
953
+ // Special handling for self-hosted instance errors
954
+ if (FIRECRAWL_API_URL &&
955
+ errorMessage.toLowerCase().includes('not supported')) {
956
+ server.sendLoggingMessage({
957
+ level: 'error',
958
+ data: 'Extraction is not supported by this self-hosted instance',
959
+ });
960
+ return {
961
+ content: [
962
+ {
963
+ type: 'text',
964
+ text: 'Extraction is not supported by this self-hosted instance. Please ensure LLM support is configured.',
965
+ },
966
+ ],
967
+ isError: true,
968
+ };
969
+ }
970
+ return {
971
+ content: [{ type: 'text', text: errorMessage }],
972
+ isError: true,
973
+ };
974
+ }
975
+ }
976
+ default:
977
+ return {
978
+ content: [{ type: 'text', text: `Unknown tool: ${name}` }],
979
+ isError: true,
980
+ };
981
+ }
982
+ }
983
+ catch (error) {
984
+ // Log detailed error information
985
+ server.sendLoggingMessage({
986
+ level: 'error',
987
+ data: {
988
+ message: `Request failed: ${error instanceof Error ? error.message : String(error)}`,
989
+ tool: request.params.name,
990
+ arguments: request.params.arguments,
991
+ timestamp: new Date().toISOString(),
992
+ duration: Date.now() - startTime,
993
+ },
994
+ });
995
+ return {
996
+ content: [
997
+ {
998
+ type: 'text',
999
+ text: `Error: ${error instanceof Error ? error.message : String(error)}`,
1000
+ },
1001
+ ],
1002
+ isError: true,
1003
+ };
1004
+ }
1005
+ finally {
1006
+ // Log request completion with performance metrics
1007
+ server.sendLoggingMessage({
1008
+ level: 'info',
1009
+ data: `Request completed in ${Date.now() - startTime}ms`,
1010
+ });
1011
+ }
1012
+ });
1013
+ // Helper function to format results
1014
+ function formatResults(data) {
1015
+ return data
1016
+ .map((doc) => {
1017
+ const content = doc.markdown || doc.html || doc.rawHtml || 'No content';
1018
+ return `URL: ${doc.url || 'Unknown URL'}
1019
+ Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''}
1020
+ ${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
1021
+ })
1022
+ .join('\n\n');
1023
+ }
1024
+ // Server startup
1025
+ async function runServer() {
1026
+ try {
1027
+ console.error('Initializing FireCrawl MCP Server...');
1028
+ const transport = new StdioServerTransport();
1029
+ await server.connect(transport);
1030
+ // Now that we're connected, we can send logging messages
1031
+ server.sendLoggingMessage({
1032
+ level: 'info',
1033
+ data: 'FireCrawl MCP Server initialized successfully',
1034
+ });
1035
+ server.sendLoggingMessage({
1036
+ level: 'info',
1037
+ data: `Configuration: API URL: ${FIRECRAWL_API_URL || 'default'}`,
1038
+ });
1039
+ console.error('FireCrawl MCP Server running on stdio');
1040
+ }
1041
+ catch (error) {
1042
+ console.error('Fatal error running server:', error);
1043
+ process.exit(1);
1044
+ }
1045
+ }
1046
+ runServer().catch((error) => {
1047
+ console.error('Fatal error running server:', error);
1048
+ process.exit(1);
1049
+ });
1050
+ // Add type guard for credit usage
1051
+ function hasCredits(response) {
1052
+ return 'creditsUsed' in response && typeof response.creditsUsed === 'number';
1053
+ }
@@ -0,0 +1,225 @@
1
+ import FirecrawlApp from '@mendable/firecrawl-js';
2
+ import { describe, expect, jest, test, beforeEach, afterEach, } from '@jest/globals';
3
+ import { mock } from 'jest-mock-extended';
4
+ // Mock FirecrawlApp
5
+ jest.mock('@mendable/firecrawl-js');
6
+ describe('FireCrawl Tool Tests', () => {
7
+ let mockClient;
8
+ let requestHandler;
9
+ beforeEach(() => {
10
+ jest.clearAllMocks();
11
+ mockClient = mock();
12
+ // Set up mock implementations
13
+ const mockInstance = new FirecrawlApp({ apiKey: 'test' });
14
+ Object.assign(mockInstance, mockClient);
15
+ // Create request handler
16
+ requestHandler = async (request) => {
17
+ const { name, arguments: args } = request.params;
18
+ if (!args) {
19
+ throw new Error('No arguments provided');
20
+ }
21
+ return handleRequest(name, args, mockClient);
22
+ };
23
+ });
24
+ afterEach(() => {
25
+ jest.clearAllMocks();
26
+ });
27
+ // Test scrape functionality
28
+ test('should handle scrape request', async () => {
29
+ const url = 'https://example.com';
30
+ const options = { formats: ['markdown'] };
31
+ const mockResponse = {
32
+ success: true,
33
+ markdown: '# Test Content',
34
+ html: undefined,
35
+ rawHtml: undefined,
36
+ url: 'https://example.com',
37
+ actions: undefined,
38
+ };
39
+ mockClient.scrapeUrl.mockResolvedValueOnce(mockResponse);
40
+ const response = await requestHandler({
41
+ method: 'call_tool',
42
+ params: {
43
+ name: 'firecrawl_scrape',
44
+ arguments: { url, ...options },
45
+ },
46
+ });
47
+ expect(response).toEqual({
48
+ content: [{ type: 'text', text: '# Test Content' }],
49
+ isError: false,
50
+ });
51
+ expect(mockClient.scrapeUrl).toHaveBeenCalledWith(url, {
52
+ formats: ['markdown'],
53
+ url,
54
+ });
55
+ });
56
+ // Test batch scrape functionality
57
+ test('should handle batch scrape request', async () => {
58
+ const urls = ['https://example.com'];
59
+ const options = { formats: ['markdown'] };
60
+ mockClient.asyncBatchScrapeUrls.mockResolvedValueOnce({
61
+ success: true,
62
+ id: 'test-batch-id',
63
+ });
64
+ const response = await requestHandler({
65
+ method: 'call_tool',
66
+ params: {
67
+ name: 'firecrawl_batch_scrape',
68
+ arguments: { urls, options },
69
+ },
70
+ });
71
+ expect(response.content[0].text).toContain('Batch operation queued with ID: batch_');
72
+ expect(mockClient.asyncBatchScrapeUrls).toHaveBeenCalledWith(urls, options);
73
+ });
74
+ // Test search functionality
75
+ test('should handle search request', async () => {
76
+ const query = 'test query';
77
+ const scrapeOptions = { formats: ['markdown'] };
78
+ const mockSearchResponse = {
79
+ success: true,
80
+ data: [
81
+ {
82
+ url: 'https://example.com',
83
+ title: 'Test Page',
84
+ description: 'Test Description',
85
+ markdown: '# Test Content',
86
+ actions: undefined,
87
+ },
88
+ ],
89
+ };
90
+ mockClient.search.mockResolvedValueOnce(mockSearchResponse);
91
+ const response = await requestHandler({
92
+ method: 'call_tool',
93
+ params: {
94
+ name: 'firecrawl_search',
95
+ arguments: { query, scrapeOptions },
96
+ },
97
+ });
98
+ expect(response.isError).toBe(false);
99
+ expect(response.content[0].text).toContain('Test Page');
100
+ expect(mockClient.search).toHaveBeenCalledWith(query, scrapeOptions);
101
+ });
102
+ // Test crawl functionality
103
+ test('should handle crawl request', async () => {
104
+ const url = 'https://example.com';
105
+ const options = { maxDepth: 2 };
106
+ mockClient.asyncCrawlUrl.mockResolvedValueOnce({
107
+ success: true,
108
+ id: 'test-crawl-id',
109
+ });
110
+ const response = await requestHandler({
111
+ method: 'call_tool',
112
+ params: {
113
+ name: 'firecrawl_crawl',
114
+ arguments: { url, ...options },
115
+ },
116
+ });
117
+ expect(response.isError).toBe(false);
118
+ expect(response.content[0].text).toContain('test-crawl-id');
119
+ expect(mockClient.asyncCrawlUrl).toHaveBeenCalledWith(url, {
120
+ maxDepth: 2,
121
+ url,
122
+ });
123
+ });
124
+ // Test error handling
125
+ test('should handle API errors', async () => {
126
+ const url = 'https://example.com';
127
+ mockClient.scrapeUrl.mockRejectedValueOnce(new Error('API Error'));
128
+ const response = await requestHandler({
129
+ method: 'call_tool',
130
+ params: {
131
+ name: 'firecrawl_scrape',
132
+ arguments: { url },
133
+ },
134
+ });
135
+ expect(response.isError).toBe(true);
136
+ expect(response.content[0].text).toContain('API Error');
137
+ });
138
+ // Test rate limiting
139
+ test('should handle rate limits', async () => {
140
+ const url = 'https://example.com';
141
+ // Mock rate limit error
142
+ mockClient.scrapeUrl.mockRejectedValueOnce(new Error('rate limit exceeded'));
143
+ const response = await requestHandler({
144
+ method: 'call_tool',
145
+ params: {
146
+ name: 'firecrawl_scrape',
147
+ arguments: { url },
148
+ },
149
+ });
150
+ expect(response.isError).toBe(true);
151
+ expect(response.content[0].text).toContain('rate limit exceeded');
152
+ });
153
+ });
154
+ // Helper function to simulate request handling
155
+ async function handleRequest(name, args, client) {
156
+ try {
157
+ switch (name) {
158
+ case 'firecrawl_scrape': {
159
+ const response = await client.scrapeUrl(args.url, args);
160
+ if (!response.success) {
161
+ throw new Error(response.error || 'Scraping failed');
162
+ }
163
+ return {
164
+ content: [
165
+ { type: 'text', text: response.markdown || 'No content available' },
166
+ ],
167
+ isError: false,
168
+ };
169
+ }
170
+ case 'firecrawl_batch_scrape': {
171
+ const response = await client.asyncBatchScrapeUrls(args.urls, args.options);
172
+ return {
173
+ content: [
174
+ {
175
+ type: 'text',
176
+ text: `Batch operation queued with ID: batch_1. Use firecrawl_check_batch_status to check progress.`,
177
+ },
178
+ ],
179
+ isError: false,
180
+ };
181
+ }
182
+ case 'firecrawl_search': {
183
+ const response = await client.search(args.query, args.scrapeOptions);
184
+ if (!response.success) {
185
+ throw new Error(response.error || 'Search failed');
186
+ }
187
+ const results = response.data
188
+ .map((result) => `URL: ${result.url}\nTitle: ${result.title || 'No title'}\nDescription: ${result.description || 'No description'}\n${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
189
+ .join('\n\n');
190
+ return {
191
+ content: [{ type: 'text', text: results }],
192
+ isError: false,
193
+ };
194
+ }
195
+ case 'firecrawl_crawl': {
196
+ const response = await client.asyncCrawlUrl(args.url, args);
197
+ if (!response.success) {
198
+ throw new Error(response.error);
199
+ }
200
+ return {
201
+ content: [
202
+ {
203
+ type: 'text',
204
+ text: `Started crawl for ${args.url} with job ID: ${response.id}`,
205
+ },
206
+ ],
207
+ isError: false,
208
+ };
209
+ }
210
+ default:
211
+ throw new Error(`Unknown tool: ${name}`);
212
+ }
213
+ }
214
+ catch (error) {
215
+ return {
216
+ content: [
217
+ {
218
+ type: 'text',
219
+ text: error instanceof Error ? error.message : String(error),
220
+ },
221
+ ],
222
+ isError: true,
223
+ };
224
+ }
225
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl-mcp",
3
- "version": "1.9.0",
3
+ "version": "1.10.0",
4
4
  "description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
5
5
  "type": "module",
6
6
  "bin": {