firecrawl-mcp 3.6.2 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/src/index.js DELETED
@@ -1,1053 +0,0 @@
1
- #!/usr/bin/env node
2
- import { Server } from '@modelcontextprotocol/sdk/server/index.js';
3
- import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
- import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
5
- import FirecrawlApp from '@mendable/firecrawl-js';
6
- import PQueue from 'p-queue';
7
- import dotenv from 'dotenv';
8
- dotenv.config();
9
- // Tool definitions
10
- const SCRAPE_TOOL = {
11
- name: 'firecrawl_scrape',
12
- description: 'Scrape a single webpage with advanced options for content extraction. ' +
13
- 'Supports various formats including markdown, HTML, and screenshots. ' +
14
- 'Can execute custom actions like clicking or scrolling before scraping.',
15
- inputSchema: {
16
- type: 'object',
17
- properties: {
18
- url: {
19
- type: 'string',
20
- description: 'The URL to scrape',
21
- },
22
- formats: {
23
- type: 'array',
24
- items: {
25
- type: 'string',
26
- enum: [
27
- 'markdown',
28
- 'html',
29
- 'rawHtml',
30
- 'screenshot',
31
- 'links',
32
- 'screenshot@fullPage',
33
- 'extract',
34
- ],
35
- },
36
- description: "Content formats to extract (default: ['markdown'])",
37
- },
38
- onlyMainContent: {
39
- type: 'boolean',
40
- description: 'Extract only the main content, filtering out navigation, footers, etc.',
41
- },
42
- includeTags: {
43
- type: 'array',
44
- items: { type: 'string' },
45
- description: 'HTML tags to specifically include in extraction',
46
- },
47
- excludeTags: {
48
- type: 'array',
49
- items: { type: 'string' },
50
- description: 'HTML tags to exclude from extraction',
51
- },
52
- waitFor: {
53
- type: 'number',
54
- description: 'Time in milliseconds to wait for dynamic content to load',
55
- },
56
- timeout: {
57
- type: 'number',
58
- description: 'Maximum time in milliseconds to wait for the page to load',
59
- },
60
- actions: {
61
- type: 'array',
62
- items: {
63
- type: 'object',
64
- properties: {
65
- type: {
66
- type: 'string',
67
- enum: [
68
- 'wait',
69
- 'click',
70
- 'screenshot',
71
- 'write',
72
- 'press',
73
- 'scroll',
74
- 'scrape',
75
- 'executeJavascript',
76
- ],
77
- description: 'Type of action to perform',
78
- },
79
- selector: {
80
- type: 'string',
81
- description: 'CSS selector for the target element',
82
- },
83
- milliseconds: {
84
- type: 'number',
85
- description: 'Time to wait in milliseconds (for wait action)',
86
- },
87
- text: {
88
- type: 'string',
89
- description: 'Text to write (for write action)',
90
- },
91
- key: {
92
- type: 'string',
93
- description: 'Key to press (for press action)',
94
- },
95
- direction: {
96
- type: 'string',
97
- enum: ['up', 'down'],
98
- description: 'Scroll direction',
99
- },
100
- script: {
101
- type: 'string',
102
- description: 'JavaScript code to execute',
103
- },
104
- fullPage: {
105
- type: 'boolean',
106
- description: 'Take full page screenshot',
107
- },
108
- },
109
- required: ['type'],
110
- },
111
- description: 'List of actions to perform before scraping',
112
- },
113
- extract: {
114
- type: 'object',
115
- properties: {
116
- schema: {
117
- type: 'object',
118
- description: 'Schema for structured data extraction',
119
- },
120
- systemPrompt: {
121
- type: 'string',
122
- description: 'System prompt for LLM extraction',
123
- },
124
- prompt: {
125
- type: 'string',
126
- description: 'User prompt for LLM extraction',
127
- },
128
- },
129
- description: 'Configuration for structured data extraction',
130
- },
131
- mobile: {
132
- type: 'boolean',
133
- description: 'Use mobile viewport',
134
- },
135
- skipTlsVerification: {
136
- type: 'boolean',
137
- description: 'Skip TLS certificate verification',
138
- },
139
- removeBase64Images: {
140
- type: 'boolean',
141
- description: 'Remove base64 encoded images from output',
142
- },
143
- location: {
144
- type: 'object',
145
- properties: {
146
- country: {
147
- type: 'string',
148
- description: 'Country code for geolocation',
149
- },
150
- languages: {
151
- type: 'array',
152
- items: { type: 'string' },
153
- description: 'Language codes for content',
154
- },
155
- },
156
- description: 'Location settings for scraping',
157
- },
158
- },
159
- required: ['url'],
160
- },
161
- };
162
- const MAP_TOOL = {
163
- name: 'firecrawl_map',
164
- description: 'Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.',
165
- inputSchema: {
166
- type: 'object',
167
- properties: {
168
- url: {
169
- type: 'string',
170
- description: 'Starting URL for URL discovery',
171
- },
172
- search: {
173
- type: 'string',
174
- description: 'Optional search term to filter URLs',
175
- },
176
- ignoreSitemap: {
177
- type: 'boolean',
178
- description: 'Skip sitemap.xml discovery and only use HTML links',
179
- },
180
- sitemapOnly: {
181
- type: 'boolean',
182
- description: 'Only use sitemap.xml for discovery, ignore HTML links',
183
- },
184
- includeSubdomains: {
185
- type: 'boolean',
186
- description: 'Include URLs from subdomains in results',
187
- },
188
- limit: {
189
- type: 'number',
190
- description: 'Maximum number of URLs to return',
191
- },
192
- },
193
- required: ['url'],
194
- },
195
- };
196
- const CRAWL_TOOL = {
197
- name: 'firecrawl_crawl',
198
- description: 'Start an asynchronous crawl of multiple pages from a starting URL. ' +
199
- 'Supports depth control, path filtering, and webhook notifications.',
200
- inputSchema: {
201
- type: 'object',
202
- properties: {
203
- url: {
204
- type: 'string',
205
- description: 'Starting URL for the crawl',
206
- },
207
- excludePaths: {
208
- type: 'array',
209
- items: { type: 'string' },
210
- description: 'URL paths to exclude from crawling',
211
- },
212
- includePaths: {
213
- type: 'array',
214
- items: { type: 'string' },
215
- description: 'Only crawl these URL paths',
216
- },
217
- maxDepth: {
218
- type: 'number',
219
- description: 'Maximum link depth to crawl',
220
- },
221
- ignoreSitemap: {
222
- type: 'boolean',
223
- description: 'Skip sitemap.xml discovery',
224
- },
225
- limit: {
226
- type: 'number',
227
- description: 'Maximum number of pages to crawl',
228
- },
229
- allowBackwardLinks: {
230
- type: 'boolean',
231
- description: 'Allow crawling links that point to parent directories',
232
- },
233
- allowExternalLinks: {
234
- type: 'boolean',
235
- description: 'Allow crawling links to external domains',
236
- },
237
- webhook: {
238
- oneOf: [
239
- {
240
- type: 'string',
241
- description: 'Webhook URL to notify when crawl is complete',
242
- },
243
- {
244
- type: 'object',
245
- properties: {
246
- url: {
247
- type: 'string',
248
- description: 'Webhook URL',
249
- },
250
- headers: {
251
- type: 'object',
252
- description: 'Custom headers for webhook requests',
253
- },
254
- },
255
- required: ['url'],
256
- },
257
- ],
258
- },
259
- deduplicateSimilarURLs: {
260
- type: 'boolean',
261
- description: 'Remove similar URLs during crawl',
262
- },
263
- ignoreQueryParameters: {
264
- type: 'boolean',
265
- description: 'Ignore query parameters when comparing URLs',
266
- },
267
- scrapeOptions: {
268
- type: 'object',
269
- properties: {
270
- formats: {
271
- type: 'array',
272
- items: {
273
- type: 'string',
274
- enum: [
275
- 'markdown',
276
- 'html',
277
- 'rawHtml',
278
- 'screenshot',
279
- 'links',
280
- 'screenshot@fullPage',
281
- 'extract',
282
- ],
283
- },
284
- },
285
- onlyMainContent: {
286
- type: 'boolean',
287
- },
288
- includeTags: {
289
- type: 'array',
290
- items: { type: 'string' },
291
- },
292
- excludeTags: {
293
- type: 'array',
294
- items: { type: 'string' },
295
- },
296
- waitFor: {
297
- type: 'number',
298
- },
299
- },
300
- description: 'Options for scraping each page',
301
- },
302
- },
303
- required: ['url'],
304
- },
305
- };
306
- const BATCH_SCRAPE_TOOL = {
307
- name: 'firecrawl_batch_scrape',
308
- description: 'Scrape multiple URLs in batch mode. Returns a job ID that can be used to check status.',
309
- inputSchema: {
310
- type: 'object',
311
- properties: {
312
- urls: {
313
- type: 'array',
314
- items: { type: 'string' },
315
- description: 'List of URLs to scrape',
316
- },
317
- options: {
318
- type: 'object',
319
- properties: {
320
- formats: {
321
- type: 'array',
322
- items: {
323
- type: 'string',
324
- enum: [
325
- 'markdown',
326
- 'html',
327
- 'rawHtml',
328
- 'screenshot',
329
- 'links',
330
- 'screenshot@fullPage',
331
- 'extract',
332
- ],
333
- },
334
- },
335
- onlyMainContent: {
336
- type: 'boolean',
337
- },
338
- includeTags: {
339
- type: 'array',
340
- items: { type: 'string' },
341
- },
342
- excludeTags: {
343
- type: 'array',
344
- items: { type: 'string' },
345
- },
346
- waitFor: {
347
- type: 'number',
348
- },
349
- },
350
- },
351
- },
352
- required: ['urls'],
353
- },
354
- };
355
- const CHECK_BATCH_STATUS_TOOL = {
356
- name: 'firecrawl_check_batch_status',
357
- description: 'Check the status of a batch scraping job.',
358
- inputSchema: {
359
- type: 'object',
360
- properties: {
361
- id: {
362
- type: 'string',
363
- description: 'Batch job ID to check',
364
- },
365
- },
366
- required: ['id'],
367
- },
368
- };
369
- const CHECK_CRAWL_STATUS_TOOL = {
370
- name: 'firecrawl_check_crawl_status',
371
- description: 'Check the status of a crawl job.',
372
- inputSchema: {
373
- type: 'object',
374
- properties: {
375
- id: {
376
- type: 'string',
377
- description: 'Crawl job ID to check',
378
- },
379
- },
380
- required: ['id'],
381
- },
382
- };
383
- const SEARCH_TOOL = {
384
- name: 'firecrawl_search',
385
- description: 'Search and retrieve content from web pages with optional scraping. ' +
386
- 'Returns SERP results by default (url, title, description) or full page content when scrapeOptions are provided.',
387
- inputSchema: {
388
- type: 'object',
389
- properties: {
390
- query: {
391
- type: 'string',
392
- description: 'Search query string',
393
- },
394
- limit: {
395
- type: 'number',
396
- description: 'Maximum number of results to return (default: 5)',
397
- },
398
- lang: {
399
- type: 'string',
400
- description: 'Language code for search results (default: en)',
401
- },
402
- country: {
403
- type: 'string',
404
- description: 'Country code for search results (default: us)',
405
- },
406
- tbs: {
407
- type: 'string',
408
- description: 'Time-based search filter',
409
- },
410
- filter: {
411
- type: 'string',
412
- description: 'Search filter',
413
- },
414
- location: {
415
- type: 'object',
416
- properties: {
417
- country: {
418
- type: 'string',
419
- description: 'Country code for geolocation',
420
- },
421
- languages: {
422
- type: 'array',
423
- items: { type: 'string' },
424
- description: 'Language codes for content',
425
- },
426
- },
427
- description: 'Location settings for search',
428
- },
429
- scrapeOptions: {
430
- type: 'object',
431
- properties: {
432
- formats: {
433
- type: 'array',
434
- items: {
435
- type: 'string',
436
- enum: ['markdown', 'html', 'rawHtml'],
437
- },
438
- description: 'Content formats to extract from search results',
439
- },
440
- onlyMainContent: {
441
- type: 'boolean',
442
- description: 'Extract only the main content from results',
443
- },
444
- waitFor: {
445
- type: 'number',
446
- description: 'Time in milliseconds to wait for dynamic content',
447
- },
448
- },
449
- description: 'Options for scraping search results',
450
- },
451
- },
452
- required: ['query'],
453
- },
454
- };
455
- const EXTRACT_TOOL = {
456
- name: 'firecrawl_extract',
457
- description: 'Extract structured information from web pages using LLM. ' +
458
- 'Supports both cloud AI and self-hosted LLM extraction.',
459
- inputSchema: {
460
- type: 'object',
461
- properties: {
462
- urls: {
463
- type: 'array',
464
- items: { type: 'string' },
465
- description: 'List of URLs to extract information from',
466
- },
467
- prompt: {
468
- type: 'string',
469
- description: 'Prompt for the LLM extraction',
470
- },
471
- systemPrompt: {
472
- type: 'string',
473
- description: 'System prompt for LLM extraction',
474
- },
475
- schema: {
476
- type: 'object',
477
- description: 'JSON schema for structured data extraction',
478
- },
479
- allowExternalLinks: {
480
- type: 'boolean',
481
- description: 'Allow extraction from external links',
482
- },
483
- enableWebSearch: {
484
- type: 'boolean',
485
- description: 'Enable web search for additional context',
486
- },
487
- includeSubdomains: {
488
- type: 'boolean',
489
- description: 'Include subdomains in extraction',
490
- },
491
- },
492
- required: ['urls'],
493
- },
494
- };
495
- // Type guards
496
- function isScrapeOptions(args) {
497
- return (typeof args === 'object' &&
498
- args !== null &&
499
- 'url' in args &&
500
- typeof args.url === 'string');
501
- }
502
- function isMapOptions(args) {
503
- return (typeof args === 'object' &&
504
- args !== null &&
505
- 'url' in args &&
506
- typeof args.url === 'string');
507
- }
508
- function isCrawlOptions(args) {
509
- return (typeof args === 'object' &&
510
- args !== null &&
511
- 'url' in args &&
512
- typeof args.url === 'string');
513
- }
514
- function isBatchScrapeOptions(args) {
515
- return (typeof args === 'object' &&
516
- args !== null &&
517
- 'urls' in args &&
518
- Array.isArray(args.urls) &&
519
- args.urls.every((url) => typeof url === 'string'));
520
- }
521
- function isStatusCheckOptions(args) {
522
- return (typeof args === 'object' &&
523
- args !== null &&
524
- 'id' in args &&
525
- typeof args.id === 'string');
526
- }
527
- function isSearchOptions(args) {
528
- return (typeof args === 'object' &&
529
- args !== null &&
530
- 'query' in args &&
531
- typeof args.query === 'string');
532
- }
533
- function isExtractOptions(args) {
534
- if (typeof args !== 'object' || args === null)
535
- return false;
536
- const { urls } = args;
537
- return (Array.isArray(urls) &&
538
- urls.every((url) => typeof url === 'string'));
539
- }
540
- // Server implementation
541
- const server = new Server({
542
- name: 'firecrawl-mcp',
543
- version: '1.3.2',
544
- }, {
545
- capabilities: {
546
- tools: {},
547
- logging: {},
548
- },
549
- });
550
- // Get optional API URL
551
- const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL;
552
- const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
553
- // Check if API key is required (only for cloud service)
554
- if (!FIRECRAWL_API_URL && !FIRECRAWL_API_KEY) {
555
- console.error('Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service');
556
- process.exit(1);
557
- }
558
- // Initialize FireCrawl client with optional API URL
559
- const client = new FirecrawlApp({
560
- apiKey: FIRECRAWL_API_KEY || '',
561
- ...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
562
- });
563
- // Configuration for retries and monitoring
564
- const CONFIG = {
565
- retry: {
566
- maxAttempts: Number(process.env.FIRECRAWL_RETRY_MAX_ATTEMPTS) || 3,
567
- initialDelay: Number(process.env.FIRECRAWL_RETRY_INITIAL_DELAY) || 1000,
568
- maxDelay: Number(process.env.FIRECRAWL_RETRY_MAX_DELAY) || 10000,
569
- backoffFactor: Number(process.env.FIRECRAWL_RETRY_BACKOFF_FACTOR) || 2,
570
- },
571
- credit: {
572
- warningThreshold: Number(process.env.FIRECRAWL_CREDIT_WARNING_THRESHOLD) || 1000,
573
- criticalThreshold: Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100,
574
- },
575
- };
576
- const creditUsage = {
577
- total: 0,
578
- lastCheck: Date.now(),
579
- };
580
- // Add utility function for delay
581
- function delay(ms) {
582
- return new Promise((resolve) => setTimeout(resolve, ms));
583
- }
584
- // Add retry logic with exponential backoff
585
- async function withRetry(operation, context, attempt = 1) {
586
- try {
587
- return await operation();
588
- }
589
- catch (error) {
590
- const isRateLimit = error instanceof Error &&
591
- (error.message.includes('rate limit') || error.message.includes('429'));
592
- if (isRateLimit && attempt < CONFIG.retry.maxAttempts) {
593
- const delayMs = Math.min(CONFIG.retry.initialDelay *
594
- Math.pow(CONFIG.retry.backoffFactor, attempt - 1), CONFIG.retry.maxDelay);
595
- server.sendLoggingMessage({
596
- level: 'warning',
597
- data: `Rate limit hit for ${context}. Attempt ${attempt}/${CONFIG.retry.maxAttempts}. Retrying in ${delayMs}ms`,
598
- });
599
- await delay(delayMs);
600
- return withRetry(operation, context, attempt + 1);
601
- }
602
- throw error;
603
- }
604
- }
605
- // Add credit monitoring
606
- async function updateCreditUsage(creditsUsed) {
607
- creditUsage.total += creditsUsed;
608
- // Log credit usage
609
- server.sendLoggingMessage({
610
- level: 'info',
611
- data: `Credit usage: ${creditUsage.total} credits used total`,
612
- });
613
- // Check thresholds
614
- if (creditUsage.total >= CONFIG.credit.criticalThreshold) {
615
- server.sendLoggingMessage({
616
- level: 'error',
617
- data: `CRITICAL: Credit usage has reached ${creditUsage.total}`,
618
- });
619
- }
620
- else if (creditUsage.total >= CONFIG.credit.warningThreshold) {
621
- server.sendLoggingMessage({
622
- level: 'warning',
623
- data: `WARNING: Credit usage has reached ${creditUsage.total}`,
624
- });
625
- }
626
- }
627
- // Initialize queue system
628
- const batchQueue = new PQueue({ concurrency: 1 });
629
- const batchOperations = new Map();
630
- let operationCounter = 0;
631
- async function processBatchOperation(operation) {
632
- try {
633
- operation.status = 'processing';
634
- let totalCreditsUsed = 0;
635
- // Use library's built-in batch processing
636
- const response = await withRetry(async () => client.asyncBatchScrapeUrls(operation.urls, operation.options), `batch ${operation.id} processing`);
637
- if (!response.success) {
638
- throw new Error(response.error || 'Batch operation failed');
639
- }
640
- // Track credits if using cloud API
641
- if (!FIRECRAWL_API_URL && hasCredits(response)) {
642
- totalCreditsUsed += response.creditsUsed;
643
- await updateCreditUsage(response.creditsUsed);
644
- }
645
- operation.status = 'completed';
646
- operation.result = response;
647
- // Log final credit usage for the batch
648
- if (!FIRECRAWL_API_URL) {
649
- server.sendLoggingMessage({
650
- level: 'info',
651
- data: `Batch ${operation.id} completed. Total credits used: ${totalCreditsUsed}`,
652
- });
653
- }
654
- }
655
- catch (error) {
656
- operation.status = 'failed';
657
- operation.error = error instanceof Error ? error.message : String(error);
658
- server.sendLoggingMessage({
659
- level: 'error',
660
- data: `Batch ${operation.id} failed: ${operation.error}`,
661
- });
662
- }
663
- }
664
- // Tool handlers
665
- server.setRequestHandler(ListToolsRequestSchema, async () => ({
666
- tools: [
667
- SCRAPE_TOOL,
668
- MAP_TOOL,
669
- CRAWL_TOOL,
670
- BATCH_SCRAPE_TOOL,
671
- CHECK_BATCH_STATUS_TOOL,
672
- CHECK_CRAWL_STATUS_TOOL,
673
- SEARCH_TOOL,
674
- EXTRACT_TOOL,
675
- ],
676
- }));
677
- server.setRequestHandler(CallToolRequestSchema, async (request) => {
678
- const startTime = Date.now();
679
- try {
680
- const { name, arguments: args } = request.params;
681
- // Log incoming request with timestamp
682
- server.sendLoggingMessage({
683
- level: 'info',
684
- data: `[${new Date().toISOString()}] Received request for tool: ${name}`,
685
- });
686
- if (!args) {
687
- throw new Error('No arguments provided');
688
- }
689
- switch (name) {
690
- case 'firecrawl_scrape': {
691
- if (!isScrapeOptions(args)) {
692
- throw new Error('Invalid arguments for firecrawl_scrape');
693
- }
694
- const { url, ...options } = args;
695
- try {
696
- const scrapeStartTime = Date.now();
697
- server.sendLoggingMessage({
698
- level: 'info',
699
- data: `Starting scrape for URL: ${url} with options: ${JSON.stringify(options)}`,
700
- });
701
- const response = await client.scrapeUrl(url, options);
702
- // Log performance metrics
703
- server.sendLoggingMessage({
704
- level: 'info',
705
- data: `Scrape completed in ${Date.now() - scrapeStartTime}ms`,
706
- });
707
- if ('success' in response && !response.success) {
708
- throw new Error(response.error || 'Scraping failed');
709
- }
710
- const content = 'markdown' in response
711
- ? response.markdown || response.html || response.rawHtml
712
- : null;
713
- return {
714
- content: [
715
- { type: 'text', text: content || 'No content available' },
716
- ],
717
- isError: false,
718
- };
719
- }
720
- catch (error) {
721
- const errorMessage = error instanceof Error ? error.message : String(error);
722
- return {
723
- content: [{ type: 'text', text: errorMessage }],
724
- isError: true,
725
- };
726
- }
727
- }
728
- case 'firecrawl_map': {
729
- if (!isMapOptions(args)) {
730
- throw new Error('Invalid arguments for firecrawl_map');
731
- }
732
- const { url, ...options } = args;
733
- const response = await client.mapUrl(url, options);
734
- if ('error' in response) {
735
- throw new Error(response.error);
736
- }
737
- if (!response.links) {
738
- throw new Error('No links received from FireCrawl API');
739
- }
740
- return {
741
- content: [{ type: 'text', text: response.links.join('\n') }],
742
- isError: false,
743
- };
744
- }
745
- case 'firecrawl_batch_scrape': {
746
- if (!isBatchScrapeOptions(args)) {
747
- throw new Error('Invalid arguments for firecrawl_batch_scrape');
748
- }
749
- try {
750
- const operationId = `batch_${++operationCounter}`;
751
- const operation = {
752
- id: operationId,
753
- urls: args.urls,
754
- options: args.options,
755
- status: 'pending',
756
- progress: {
757
- completed: 0,
758
- total: args.urls.length,
759
- },
760
- };
761
- batchOperations.set(operationId, operation);
762
- // Queue the operation
763
- batchQueue.add(() => processBatchOperation(operation));
764
- server.sendLoggingMessage({
765
- level: 'info',
766
- data: `Queued batch operation ${operationId} with ${args.urls.length} URLs`,
767
- });
768
- return {
769
- content: [
770
- {
771
- type: 'text',
772
- text: `Batch operation queued with ID: ${operationId}. Use firecrawl_check_batch_status to check progress.`,
773
- },
774
- ],
775
- isError: false,
776
- };
777
- }
778
- catch (error) {
779
- const errorMessage = error instanceof Error
780
- ? error.message
781
- : `Batch operation failed: ${JSON.stringify(error)}`;
782
- return {
783
- content: [{ type: 'text', text: errorMessage }],
784
- isError: true,
785
- };
786
- }
787
- }
788
- case 'firecrawl_check_batch_status': {
789
- if (!isStatusCheckOptions(args)) {
790
- throw new Error('Invalid arguments for firecrawl_check_batch_status');
791
- }
792
- const operation = batchOperations.get(args.id);
793
- if (!operation) {
794
- return {
795
- content: [
796
- {
797
- type: 'text',
798
- text: `No batch operation found with ID: ${args.id}`,
799
- },
800
- ],
801
- isError: true,
802
- };
803
- }
804
- const status = `Batch Status:
805
- Status: ${operation.status}
806
- Progress: ${operation.progress.completed}/${operation.progress.total}
807
- ${operation.error ? `Error: ${operation.error}` : ''}
808
- ${operation.result
809
- ? `Results: ${JSON.stringify(operation.result, null, 2)}`
810
- : ''}`;
811
- return {
812
- content: [{ type: 'text', text: status }],
813
- isError: false,
814
- };
815
- }
816
- case 'firecrawl_crawl': {
817
- if (!isCrawlOptions(args)) {
818
- throw new Error('Invalid arguments for firecrawl_crawl');
819
- }
820
- const { url, ...options } = args;
821
- const response = await withRetry(async () => client.asyncCrawlUrl(url, options), 'crawl operation');
822
- if (!response.success) {
823
- throw new Error(response.error);
824
- }
825
- // Monitor credits for cloud API
826
- if (!FIRECRAWL_API_URL && hasCredits(response)) {
827
- await updateCreditUsage(response.creditsUsed);
828
- }
829
- return {
830
- content: [
831
- {
832
- type: 'text',
833
- text: `Started crawl for ${url} with job ID: ${response.id}`,
834
- },
835
- ],
836
- isError: false,
837
- };
838
- }
839
- case 'firecrawl_check_crawl_status': {
840
- if (!isStatusCheckOptions(args)) {
841
- throw new Error('Invalid arguments for firecrawl_check_crawl_status');
842
- }
843
- const response = await client.checkCrawlStatus(args.id);
844
- if (!response.success) {
845
- throw new Error(response.error);
846
- }
847
- const status = `Crawl Status:
848
- Status: ${response.status}
849
- Progress: ${response.completed}/${response.total}
850
- Credits Used: ${response.creditsUsed}
851
- Expires At: ${response.expiresAt}
852
- ${response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''}`;
853
- return {
854
- content: [{ type: 'text', text: status }],
855
- isError: false,
856
- };
857
- }
858
- case 'firecrawl_search': {
859
- if (!isSearchOptions(args)) {
860
- throw new Error('Invalid arguments for firecrawl_search');
861
- }
862
- try {
863
- const response = await withRetry(async () => client.search(args.query, args), 'search operation');
864
- if (!response.success) {
865
- throw new Error(`Search failed: ${response.error || 'Unknown error'}`);
866
- }
867
- // Monitor credits for cloud API
868
- if (!FIRECRAWL_API_URL && hasCredits(response)) {
869
- await updateCreditUsage(response.creditsUsed);
870
- }
871
- // Format the results
872
- const results = response.data
873
- .map((result) => `URL: ${result.url}
874
- Title: ${result.title || 'No title'}
875
- Description: ${result.description || 'No description'}
876
- ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
877
- .join('\n\n');
878
- return {
879
- content: [{ type: 'text', text: results }],
880
- isError: false,
881
- };
882
- }
883
- catch (error) {
884
- const errorMessage = error instanceof Error
885
- ? error.message
886
- : `Search failed: ${JSON.stringify(error)}`;
887
- return {
888
- content: [{ type: 'text', text: errorMessage }],
889
- isError: true,
890
- };
891
- }
892
- }
893
- case 'firecrawl_extract': {
894
- if (!isExtractOptions(args)) {
895
- throw new Error('Invalid arguments for firecrawl_extract');
896
- }
897
- try {
898
- const extractStartTime = Date.now();
899
- server.sendLoggingMessage({
900
- level: 'info',
901
- data: `Starting extraction for URLs: ${args.urls.join(', ')}`,
902
- });
903
- // Log if using self-hosted instance
904
- if (FIRECRAWL_API_URL) {
905
- server.sendLoggingMessage({
906
- level: 'info',
907
- data: 'Using self-hosted instance for extraction',
908
- });
909
- }
910
- const extractResponse = await withRetry(async () => client.extract(args.urls, {
911
- prompt: args.prompt,
912
- systemPrompt: args.systemPrompt,
913
- schema: args.schema,
914
- allowExternalLinks: args.allowExternalLinks,
915
- enableWebSearch: args.enableWebSearch,
916
- includeSubdomains: args.includeSubdomains,
917
- origin: 'mcp-server',
918
- }), 'extract operation');
919
- // Type guard for successful response
920
- if (!('success' in extractResponse) || !extractResponse.success) {
921
- throw new Error(extractResponse.error || 'Extraction failed');
922
- }
923
- const response = extractResponse;
924
- // Monitor credits for cloud API
925
- if (!FIRECRAWL_API_URL && hasCredits(response)) {
926
- await updateCreditUsage(response.creditsUsed || 0);
927
- }
928
- // Log performance metrics
929
- server.sendLoggingMessage({
930
- level: 'info',
931
- data: `Extraction completed in ${Date.now() - extractStartTime}ms`,
932
- });
933
- // Add warning to response if present
934
- const result = {
935
- content: [
936
- {
937
- type: 'text',
938
- text: JSON.stringify(response.data, null, 2),
939
- },
940
- ],
941
- isError: false,
942
- };
943
- if (response.warning) {
944
- server.sendLoggingMessage({
945
- level: 'warning',
946
- data: response.warning,
947
- });
948
- }
949
- return result;
950
- }
951
- catch (error) {
952
- const errorMessage = error instanceof Error ? error.message : String(error);
953
- // Special handling for self-hosted instance errors
954
- if (FIRECRAWL_API_URL &&
955
- errorMessage.toLowerCase().includes('not supported')) {
956
- server.sendLoggingMessage({
957
- level: 'error',
958
- data: 'Extraction is not supported by this self-hosted instance',
959
- });
960
- return {
961
- content: [
962
- {
963
- type: 'text',
964
- text: 'Extraction is not supported by this self-hosted instance. Please ensure LLM support is configured.',
965
- },
966
- ],
967
- isError: true,
968
- };
969
- }
970
- return {
971
- content: [{ type: 'text', text: errorMessage }],
972
- isError: true,
973
- };
974
- }
975
- }
976
- default:
977
- return {
978
- content: [{ type: 'text', text: `Unknown tool: ${name}` }],
979
- isError: true,
980
- };
981
- }
982
- }
983
- catch (error) {
984
- // Log detailed error information
985
- server.sendLoggingMessage({
986
- level: 'error',
987
- data: {
988
- message: `Request failed: ${error instanceof Error ? error.message : String(error)}`,
989
- tool: request.params.name,
990
- arguments: request.params.arguments,
991
- timestamp: new Date().toISOString(),
992
- duration: Date.now() - startTime,
993
- },
994
- });
995
- return {
996
- content: [
997
- {
998
- type: 'text',
999
- text: `Error: ${error instanceof Error ? error.message : String(error)}`,
1000
- },
1001
- ],
1002
- isError: true,
1003
- };
1004
- }
1005
- finally {
1006
- // Log request completion with performance metrics
1007
- server.sendLoggingMessage({
1008
- level: 'info',
1009
- data: `Request completed in ${Date.now() - startTime}ms`,
1010
- });
1011
- }
1012
- });
1013
- // Helper function to format results
1014
- function formatResults(data) {
1015
- return data
1016
- .map((doc) => {
1017
- const content = doc.markdown || doc.html || doc.rawHtml || 'No content';
1018
- return `URL: ${doc.url || 'Unknown URL'}
1019
- Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''}
1020
- ${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
1021
- })
1022
- .join('\n\n');
1023
- }
1024
- // Server startup
1025
- async function runServer() {
1026
- try {
1027
- console.error('Initializing FireCrawl MCP Server...');
1028
- const transport = new StdioServerTransport();
1029
- await server.connect(transport);
1030
- // Now that we're connected, we can send logging messages
1031
- server.sendLoggingMessage({
1032
- level: 'info',
1033
- data: 'FireCrawl MCP Server initialized successfully',
1034
- });
1035
- server.sendLoggingMessage({
1036
- level: 'info',
1037
- data: `Configuration: API URL: ${FIRECRAWL_API_URL || 'default'}`,
1038
- });
1039
- console.error('FireCrawl MCP Server running on stdio');
1040
- }
1041
- catch (error) {
1042
- console.error('Fatal error running server:', error);
1043
- process.exit(1);
1044
- }
1045
- }
1046
- runServer().catch((error) => {
1047
- console.error('Fatal error running server:', error);
1048
- process.exit(1);
1049
- });
1050
- // Add type guard for credit usage
1051
- function hasCredits(response) {
1052
- return 'creditsUsed' in response && typeof response.creditsUsed === 'number';
1053
- }