firecrawl-mcp 1.7.1 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +90 -14
- package/dist/index.js +125 -225
- package/dist/index.test.js +1 -1
- package/package.json +7 -5
- package/dist/jest.setup.js +0 -58
- package/dist/src/index.js +0 -1053
- package/dist/src/index.test.js +0 -225
package/LICENSE
CHANGED
package/README.md
CHANGED
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
A Model Context Protocol (MCP) server implementation that integrates with [Firecrawl](https://github.com/mendableai/firecrawl) for web scraping capabilities.
|
|
4
4
|
|
|
5
|
-
Big thanks to [@vrknetha](https://github.com/vrknetha), [@cawstudios](https://caw.tech) for the initial implementation!
|
|
5
|
+
> Big thanks to [@vrknetha](https://github.com/vrknetha), [@cawstudios](https://caw.tech) for the initial implementation!
|
|
6
|
+
>
|
|
7
|
+
> You can also play around with [our MCP Server on MCP.so's playground](https://mcp.so/playground?server=firecrawl-mcp-server) or on [Klavis AI](https://www.klavis.ai/mcp-servers). Thanks to MCP.so and Klavis AI for hosting and [@gstarwd](https://github.com/gstarwd) and [@xiangkaiz](https://github.com/xiangkaiz) for integrating our server.
|
|
6
8
|
|
|
7
9
|
## Features
|
|
8
10
|
|
|
@@ -11,10 +13,10 @@ Big thanks to [@vrknetha](https://github.com/vrknetha), [@cawstudios](https://ca
|
|
|
11
13
|
- URL discovery and crawling
|
|
12
14
|
- Web search with content extraction
|
|
13
15
|
- Automatic retries with exponential backoff
|
|
14
|
-
-
|
|
16
|
+
- Efficient batch processing with built-in rate limiting
|
|
15
17
|
- Credit usage monitoring for cloud API
|
|
16
18
|
- Comprehensive logging system
|
|
17
|
-
- Support for cloud and self-hosted
|
|
19
|
+
- Support for cloud and self-hosted Firecrawl instances
|
|
18
20
|
- Mobile/Desktop viewport support
|
|
19
21
|
- Smart content filtering with tag inclusion/exclusion
|
|
20
22
|
|
|
@@ -36,22 +38,44 @@ npm install -g firecrawl-mcp
|
|
|
36
38
|
|
|
37
39
|
Configuring Cursor 🖥️
|
|
38
40
|
Note: Requires Cursor version 0.45.6+
|
|
41
|
+
For the most up-to-date configuration instructions, please refer to the official Cursor documentation on configuring MCP servers:
|
|
42
|
+
[Cursor MCP Server Configuration Guide](https://docs.cursor.com/context/model-context-protocol#configuring-mcp-servers)
|
|
39
43
|
|
|
40
|
-
To configure
|
|
44
|
+
To configure Firecrawl MCP in Cursor **v0.45.6**
|
|
41
45
|
|
|
42
46
|
1. Open Cursor Settings
|
|
43
|
-
2. Go to Features > MCP Servers
|
|
47
|
+
2. Go to Features > MCP Servers
|
|
44
48
|
3. Click "+ Add New MCP Server"
|
|
45
49
|
4. Enter the following:
|
|
46
50
|
- Name: "firecrawl-mcp" (or your preferred name)
|
|
47
51
|
- Type: "command"
|
|
48
52
|
- Command: `env FIRECRAWL_API_KEY=your-api-key npx -y firecrawl-mcp`
|
|
49
53
|
|
|
54
|
+
To configure Firecrawl MCP in Cursor **v0.48.6**
|
|
55
|
+
|
|
56
|
+
1. Open Cursor Settings
|
|
57
|
+
2. Go to Features > MCP Servers
|
|
58
|
+
3. Click "+ Add new global MCP server"
|
|
59
|
+
4. Enter the following code:
|
|
60
|
+
```json
|
|
61
|
+
{
|
|
62
|
+
"mcpServers": {
|
|
63
|
+
"firecrawl-mcp": {
|
|
64
|
+
"command": "npx",
|
|
65
|
+
"args": ["-y", "firecrawl-mcp"],
|
|
66
|
+
"env": {
|
|
67
|
+
"FIRECRAWL_API_KEY": "YOUR-API-KEY"
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
50
74
|
> If you are using Windows and are running into issues, try `cmd /c "set FIRECRAWL_API_KEY=your-api-key && npx -y firecrawl-mcp"`
|
|
51
75
|
|
|
52
|
-
Replace `your-api-key` with your
|
|
76
|
+
Replace `your-api-key` with your Firecrawl API key. If you don't have one yet, you can create an account and get it from https://www.firecrawl.dev/app/api-keys
|
|
53
77
|
|
|
54
|
-
After adding, refresh the MCP server list to see the new tools. The Composer Agent will automatically use
|
|
78
|
+
After adding, refresh the MCP server list to see the new tools. The Composer Agent will automatically use Firecrawl MCP when appropriate, but you can explicitly request it by describing your web scraping needs. Access the Composer via Command+L (Mac), select "Agent" next to the submit button, and enter your query.
|
|
55
79
|
|
|
56
80
|
### Running on Windsurf
|
|
57
81
|
|
|
@@ -64,17 +88,16 @@ Add this to your `./codeium/windsurf/model_config.json`:
|
|
|
64
88
|
"command": "npx",
|
|
65
89
|
"args": ["-y", "firecrawl-mcp"],
|
|
66
90
|
"env": {
|
|
67
|
-
"FIRECRAWL_API_KEY": "
|
|
91
|
+
"FIRECRAWL_API_KEY": "YOUR_API_KEY"
|
|
68
92
|
}
|
|
69
93
|
}
|
|
70
94
|
}
|
|
71
95
|
}
|
|
72
96
|
```
|
|
73
97
|
|
|
74
|
-
|
|
75
98
|
### Installing via Smithery (Legacy)
|
|
76
99
|
|
|
77
|
-
To install
|
|
100
|
+
To install Firecrawl for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@mendableai/mcp-server-firecrawl):
|
|
78
101
|
|
|
79
102
|
```bash
|
|
80
103
|
npx -y @smithery/cli install @mendableai/mcp-server-firecrawl --client claude
|
|
@@ -86,7 +109,7 @@ npx -y @smithery/cli install @mendableai/mcp-server-firecrawl --client claude
|
|
|
86
109
|
|
|
87
110
|
#### Required for Cloud API
|
|
88
111
|
|
|
89
|
-
- `FIRECRAWL_API_KEY`: Your
|
|
112
|
+
- `FIRECRAWL_API_KEY`: Your Firecrawl API key
|
|
90
113
|
- Required when using cloud API (default)
|
|
91
114
|
- Optional when using self-hosted instance with `FIRECRAWL_API_URL`
|
|
92
115
|
- `FIRECRAWL_API_URL` (Optional): Custom API endpoint for self-hosted instances
|
|
@@ -206,7 +229,7 @@ These configurations control:
|
|
|
206
229
|
|
|
207
230
|
### Rate Limiting and Batch Processing
|
|
208
231
|
|
|
209
|
-
The server utilizes
|
|
232
|
+
The server utilizes Firecrawl's built-in rate limiting and batch processing capabilities:
|
|
210
233
|
|
|
211
234
|
- Automatic rate limit handling with exponential backoff
|
|
212
235
|
- Efficient parallel processing for batch operations
|
|
@@ -372,7 +395,60 @@ Example response:
|
|
|
372
395
|
- `enableWebSearch`: Enable web search for additional context
|
|
373
396
|
- `includeSubdomains`: Include subdomains in extraction
|
|
374
397
|
|
|
375
|
-
When using a self-hosted instance, the extraction will use your configured LLM. For cloud API, it uses
|
|
398
|
+
When using a self-hosted instance, the extraction will use your configured LLM. For cloud API, it uses Firecrawl's managed LLM service.
|
|
399
|
+
|
|
400
|
+
### 7. Deep Research Tool (firecrawl_deep_research)
|
|
401
|
+
|
|
402
|
+
Conduct deep web research on a query using intelligent crawling, search, and LLM analysis.
|
|
403
|
+
|
|
404
|
+
```json
|
|
405
|
+
{
|
|
406
|
+
"name": "firecrawl_deep_research",
|
|
407
|
+
"arguments": {
|
|
408
|
+
"query": "how does carbon capture technology work?",
|
|
409
|
+
"maxDepth": 3,
|
|
410
|
+
"timeLimit": 120,
|
|
411
|
+
"maxUrls": 50
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
```
|
|
415
|
+
|
|
416
|
+
Arguments:
|
|
417
|
+
|
|
418
|
+
- query (string, required): The research question or topic to explore.
|
|
419
|
+
- maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3).
|
|
420
|
+
- timeLimit (number, optional): Time limit in seconds for the research session (default: 120).
|
|
421
|
+
- maxUrls (number, optional): Maximum number of URLs to analyze (default: 50).
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
|
|
425
|
+
- Final analysis generated by an LLM based on research. (data.finalAnalysis)
|
|
426
|
+
- May also include structured activities and sources used in the research process.
|
|
427
|
+
|
|
428
|
+
### 8. Generate LLMs.txt Tool (firecrawl_generate_llmstxt)
|
|
429
|
+
|
|
430
|
+
Generate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact with the site.
|
|
431
|
+
|
|
432
|
+
```json
|
|
433
|
+
{
|
|
434
|
+
"name": "firecrawl_generate_llmstxt",
|
|
435
|
+
"arguments": {
|
|
436
|
+
"url": "https://example.com",
|
|
437
|
+
"maxUrls": 20,
|
|
438
|
+
"showFullText": true
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
Arguments:
|
|
444
|
+
|
|
445
|
+
- url (string, required): The base URL of the website to analyze.
|
|
446
|
+
- maxUrls (number, optional): Max number of URLs to include (default: 10).
|
|
447
|
+
- showFullText (boolean, optional): Whether to include llms-full.txt contents in the response.
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
|
|
451
|
+
- Generated llms.txt file contents and optionally the llms-full.txt (data.llmstxt and/or data.llmsfulltxt)
|
|
376
452
|
|
|
377
453
|
## Logging System
|
|
378
454
|
|
|
@@ -387,7 +463,7 @@ The server includes comprehensive logging:
|
|
|
387
463
|
Example log messages:
|
|
388
464
|
|
|
389
465
|
```
|
|
390
|
-
[INFO]
|
|
466
|
+
[INFO] Firecrawl MCP Server initialized successfully
|
|
391
467
|
[INFO] Starting scrape for URL: https://example.com
|
|
392
468
|
[INFO] Batch operation queued with ID: batch_1
|
|
393
469
|
[WARNING] Credit usage has reached warning threshold
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
3
3
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
|
+
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
|
|
4
5
|
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
5
6
|
import FirecrawlApp from '@mendable/firecrawl-js';
|
|
6
|
-
import
|
|
7
|
+
import express from 'express';
|
|
7
8
|
import dotenv from 'dotenv';
|
|
8
9
|
dotenv.config();
|
|
9
10
|
// Tool definitions
|
|
@@ -33,6 +34,7 @@ const SCRAPE_TOOL = {
|
|
|
33
34
|
'extract',
|
|
34
35
|
],
|
|
35
36
|
},
|
|
37
|
+
default: ['markdown'],
|
|
36
38
|
description: "Content formats to extract (default: ['markdown'])",
|
|
37
39
|
},
|
|
38
40
|
onlyMainContent: {
|
|
@@ -303,69 +305,6 @@ const CRAWL_TOOL = {
|
|
|
303
305
|
required: ['url'],
|
|
304
306
|
},
|
|
305
307
|
};
|
|
306
|
-
const BATCH_SCRAPE_TOOL = {
|
|
307
|
-
name: 'firecrawl_batch_scrape',
|
|
308
|
-
description: 'Scrape multiple URLs in batch mode. Returns a job ID that can be used to check status.',
|
|
309
|
-
inputSchema: {
|
|
310
|
-
type: 'object',
|
|
311
|
-
properties: {
|
|
312
|
-
urls: {
|
|
313
|
-
type: 'array',
|
|
314
|
-
items: { type: 'string' },
|
|
315
|
-
description: 'List of URLs to scrape',
|
|
316
|
-
},
|
|
317
|
-
options: {
|
|
318
|
-
type: 'object',
|
|
319
|
-
properties: {
|
|
320
|
-
formats: {
|
|
321
|
-
type: 'array',
|
|
322
|
-
items: {
|
|
323
|
-
type: 'string',
|
|
324
|
-
enum: [
|
|
325
|
-
'markdown',
|
|
326
|
-
'html',
|
|
327
|
-
'rawHtml',
|
|
328
|
-
'screenshot',
|
|
329
|
-
'links',
|
|
330
|
-
'screenshot@fullPage',
|
|
331
|
-
'extract',
|
|
332
|
-
],
|
|
333
|
-
},
|
|
334
|
-
},
|
|
335
|
-
onlyMainContent: {
|
|
336
|
-
type: 'boolean',
|
|
337
|
-
},
|
|
338
|
-
includeTags: {
|
|
339
|
-
type: 'array',
|
|
340
|
-
items: { type: 'string' },
|
|
341
|
-
},
|
|
342
|
-
excludeTags: {
|
|
343
|
-
type: 'array',
|
|
344
|
-
items: { type: 'string' },
|
|
345
|
-
},
|
|
346
|
-
waitFor: {
|
|
347
|
-
type: 'number',
|
|
348
|
-
},
|
|
349
|
-
},
|
|
350
|
-
},
|
|
351
|
-
},
|
|
352
|
-
required: ['urls'],
|
|
353
|
-
},
|
|
354
|
-
};
|
|
355
|
-
const CHECK_BATCH_STATUS_TOOL = {
|
|
356
|
-
name: 'firecrawl_check_batch_status',
|
|
357
|
-
description: 'Check the status of a batch scraping job.',
|
|
358
|
-
inputSchema: {
|
|
359
|
-
type: 'object',
|
|
360
|
-
properties: {
|
|
361
|
-
id: {
|
|
362
|
-
type: 'string',
|
|
363
|
-
description: 'Batch job ID to check',
|
|
364
|
-
},
|
|
365
|
-
},
|
|
366
|
-
required: ['id'],
|
|
367
|
-
},
|
|
368
|
-
};
|
|
369
308
|
const CHECK_CRAWL_STATUS_TOOL = {
|
|
370
309
|
name: 'firecrawl_check_crawl_status',
|
|
371
310
|
description: 'Check the status of a crawl job.',
|
|
@@ -559,13 +498,6 @@ function isCrawlOptions(args) {
|
|
|
559
498
|
'url' in args &&
|
|
560
499
|
typeof args.url === 'string');
|
|
561
500
|
}
|
|
562
|
-
function isBatchScrapeOptions(args) {
|
|
563
|
-
return (typeof args === 'object' &&
|
|
564
|
-
args !== null &&
|
|
565
|
-
'urls' in args &&
|
|
566
|
-
Array.isArray(args.urls) &&
|
|
567
|
-
args.urls.every((url) => typeof url === 'string'));
|
|
568
|
-
}
|
|
569
501
|
function isStatusCheckOptions(args) {
|
|
570
502
|
return (typeof args === 'object' &&
|
|
571
503
|
args !== null &&
|
|
@@ -605,15 +537,13 @@ const server = new Server({
|
|
|
605
537
|
const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL;
|
|
606
538
|
const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
|
|
607
539
|
// Check if API key is required (only for cloud service)
|
|
608
|
-
if (
|
|
540
|
+
if (process.env.CLOUD_SERVICE !== 'true' &&
|
|
541
|
+
!FIRECRAWL_API_URL &&
|
|
542
|
+
!FIRECRAWL_API_KEY) {
|
|
609
543
|
console.error('Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service');
|
|
610
544
|
process.exit(1);
|
|
611
545
|
}
|
|
612
|
-
// Initialize
|
|
613
|
-
const client = new FirecrawlApp({
|
|
614
|
-
apiKey: FIRECRAWL_API_KEY || '',
|
|
615
|
-
...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
|
|
616
|
-
});
|
|
546
|
+
// Initialize Firecrawl client with optional API URL
|
|
617
547
|
// Configuration for retries and monitoring
|
|
618
548
|
const CONFIG = {
|
|
619
549
|
retry: {
|
|
@@ -627,10 +557,6 @@ const CONFIG = {
|
|
|
627
557
|
criticalThreshold: Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100,
|
|
628
558
|
},
|
|
629
559
|
};
|
|
630
|
-
const creditUsage = {
|
|
631
|
-
total: 0,
|
|
632
|
-
lastCheck: Date.now(),
|
|
633
|
-
};
|
|
634
560
|
// Add utility function for delay
|
|
635
561
|
function delay(ms) {
|
|
636
562
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
@@ -664,58 +590,12 @@ async function withRetry(operation, context, attempt = 1) {
|
|
|
664
590
|
throw error;
|
|
665
591
|
}
|
|
666
592
|
}
|
|
667
|
-
// Add credit monitoring
|
|
668
|
-
async function updateCreditUsage(creditsUsed) {
|
|
669
|
-
creditUsage.total += creditsUsed;
|
|
670
|
-
// Log credit usage
|
|
671
|
-
safeLog('info', `Credit usage: ${creditUsage.total} credits used total`);
|
|
672
|
-
// Check thresholds
|
|
673
|
-
if (creditUsage.total >= CONFIG.credit.criticalThreshold) {
|
|
674
|
-
safeLog('error', `CRITICAL: Credit usage has reached ${creditUsage.total}`);
|
|
675
|
-
}
|
|
676
|
-
else if (creditUsage.total >= CONFIG.credit.warningThreshold) {
|
|
677
|
-
safeLog('warning', `WARNING: Credit usage has reached ${creditUsage.total}`);
|
|
678
|
-
}
|
|
679
|
-
}
|
|
680
|
-
// Initialize queue system
|
|
681
|
-
const batchQueue = new PQueue({ concurrency: 1 });
|
|
682
|
-
const batchOperations = new Map();
|
|
683
|
-
let operationCounter = 0;
|
|
684
|
-
async function processBatchOperation(operation) {
|
|
685
|
-
try {
|
|
686
|
-
operation.status = 'processing';
|
|
687
|
-
let totalCreditsUsed = 0;
|
|
688
|
-
// Use library's built-in batch processing
|
|
689
|
-
const response = await withRetry(async () => client.asyncBatchScrapeUrls(operation.urls, operation.options), `batch ${operation.id} processing`);
|
|
690
|
-
if (!response.success) {
|
|
691
|
-
throw new Error(response.error || 'Batch operation failed');
|
|
692
|
-
}
|
|
693
|
-
// Track credits if using cloud API
|
|
694
|
-
if (!FIRECRAWL_API_URL && hasCredits(response)) {
|
|
695
|
-
totalCreditsUsed += response.creditsUsed;
|
|
696
|
-
await updateCreditUsage(response.creditsUsed);
|
|
697
|
-
}
|
|
698
|
-
operation.status = 'completed';
|
|
699
|
-
operation.result = response;
|
|
700
|
-
// Log final credit usage for the batch
|
|
701
|
-
if (!FIRECRAWL_API_URL) {
|
|
702
|
-
safeLog('info', `Batch ${operation.id} completed. Total credits used: ${totalCreditsUsed}`);
|
|
703
|
-
}
|
|
704
|
-
}
|
|
705
|
-
catch (error) {
|
|
706
|
-
operation.status = 'failed';
|
|
707
|
-
operation.error = error instanceof Error ? error.message : String(error);
|
|
708
|
-
safeLog('error', `Batch ${operation.id} failed: ${operation.error}`);
|
|
709
|
-
}
|
|
710
|
-
}
|
|
711
593
|
// Tool handlers
|
|
712
594
|
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
713
595
|
tools: [
|
|
714
596
|
SCRAPE_TOOL,
|
|
715
597
|
MAP_TOOL,
|
|
716
598
|
CRAWL_TOOL,
|
|
717
|
-
BATCH_SCRAPE_TOOL,
|
|
718
|
-
CHECK_BATCH_STATUS_TOOL,
|
|
719
599
|
CHECK_CRAWL_STATUS_TOOL,
|
|
720
600
|
SEARCH_TOOL,
|
|
721
601
|
EXTRACT_TOOL,
|
|
@@ -727,6 +607,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
727
607
|
const startTime = Date.now();
|
|
728
608
|
try {
|
|
729
609
|
const { name, arguments: args } = request.params;
|
|
610
|
+
const apiKey = process.env.CLOUD_SERVICE
|
|
611
|
+
? request.params._meta?.apiKey
|
|
612
|
+
: FIRECRAWL_API_KEY;
|
|
613
|
+
if (process.env.CLOUD_SERVICE && !apiKey) {
|
|
614
|
+
throw new Error('No API key provided');
|
|
615
|
+
}
|
|
616
|
+
const client = new FirecrawlApp({
|
|
617
|
+
apiKey,
|
|
618
|
+
...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
|
|
619
|
+
});
|
|
730
620
|
// Log incoming request with timestamp
|
|
731
621
|
safeLog('info', `[${new Date().toISOString()}] Received request for tool: ${name}`);
|
|
732
622
|
if (!args) {
|
|
@@ -741,7 +631,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
741
631
|
try {
|
|
742
632
|
const scrapeStartTime = Date.now();
|
|
743
633
|
safeLog('info', `Starting scrape for URL: ${url} with options: ${JSON.stringify(options)}`);
|
|
744
|
-
const response = await client.scrapeUrl(url,
|
|
634
|
+
const response = await client.scrapeUrl(url, {
|
|
635
|
+
...options,
|
|
636
|
+
// @ts-expect-error Extended API options including origin
|
|
637
|
+
origin: 'mcp-server',
|
|
638
|
+
});
|
|
745
639
|
// Log performance metrics
|
|
746
640
|
safeLog('info', `Scrape completed in ${Date.now() - scrapeStartTime}ms`);
|
|
747
641
|
if ('success' in response && !response.success) {
|
|
@@ -798,12 +692,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
798
692
|
throw new Error('Invalid arguments for firecrawl_map');
|
|
799
693
|
}
|
|
800
694
|
const { url, ...options } = args;
|
|
801
|
-
const response = await client.mapUrl(url,
|
|
695
|
+
const response = await client.mapUrl(url, {
|
|
696
|
+
...options,
|
|
697
|
+
// @ts-expect-error Extended API options including origin
|
|
698
|
+
origin: 'mcp-server',
|
|
699
|
+
});
|
|
802
700
|
if ('error' in response) {
|
|
803
701
|
throw new Error(response.error);
|
|
804
702
|
}
|
|
805
703
|
if (!response.links) {
|
|
806
|
-
throw new Error('No links received from
|
|
704
|
+
throw new Error('No links received from Firecrawl API');
|
|
807
705
|
}
|
|
808
706
|
return {
|
|
809
707
|
content: [
|
|
@@ -812,87 +710,17 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
812
710
|
isError: false,
|
|
813
711
|
};
|
|
814
712
|
}
|
|
815
|
-
case 'firecrawl_batch_scrape': {
|
|
816
|
-
if (!isBatchScrapeOptions(args)) {
|
|
817
|
-
throw new Error('Invalid arguments for firecrawl_batch_scrape');
|
|
818
|
-
}
|
|
819
|
-
try {
|
|
820
|
-
const operationId = `batch_${++operationCounter}`;
|
|
821
|
-
const operation = {
|
|
822
|
-
id: operationId,
|
|
823
|
-
urls: args.urls,
|
|
824
|
-
options: args.options,
|
|
825
|
-
status: 'pending',
|
|
826
|
-
progress: {
|
|
827
|
-
completed: 0,
|
|
828
|
-
total: args.urls.length,
|
|
829
|
-
},
|
|
830
|
-
};
|
|
831
|
-
batchOperations.set(operationId, operation);
|
|
832
|
-
// Queue the operation
|
|
833
|
-
batchQueue.add(() => processBatchOperation(operation));
|
|
834
|
-
safeLog('info', `Queued batch operation ${operationId} with ${args.urls.length} URLs`);
|
|
835
|
-
return {
|
|
836
|
-
content: [
|
|
837
|
-
{
|
|
838
|
-
type: 'text',
|
|
839
|
-
text: trimResponseText(`Batch operation queued with ID: ${operationId}. Use firecrawl_check_batch_status to check progress.`),
|
|
840
|
-
},
|
|
841
|
-
],
|
|
842
|
-
isError: false,
|
|
843
|
-
};
|
|
844
|
-
}
|
|
845
|
-
catch (error) {
|
|
846
|
-
const errorMessage = error instanceof Error
|
|
847
|
-
? error.message
|
|
848
|
-
: `Batch operation failed: ${JSON.stringify(error)}`;
|
|
849
|
-
return {
|
|
850
|
-
content: [{ type: 'text', text: trimResponseText(errorMessage) }],
|
|
851
|
-
isError: true,
|
|
852
|
-
};
|
|
853
|
-
}
|
|
854
|
-
}
|
|
855
|
-
case 'firecrawl_check_batch_status': {
|
|
856
|
-
if (!isStatusCheckOptions(args)) {
|
|
857
|
-
throw new Error('Invalid arguments for firecrawl_check_batch_status');
|
|
858
|
-
}
|
|
859
|
-
const operation = batchOperations.get(args.id);
|
|
860
|
-
if (!operation) {
|
|
861
|
-
return {
|
|
862
|
-
content: [
|
|
863
|
-
{
|
|
864
|
-
type: 'text',
|
|
865
|
-
text: trimResponseText(`No batch operation found with ID: ${args.id}`),
|
|
866
|
-
},
|
|
867
|
-
],
|
|
868
|
-
isError: true,
|
|
869
|
-
};
|
|
870
|
-
}
|
|
871
|
-
const status = `Batch Status:
|
|
872
|
-
Status: ${operation.status}
|
|
873
|
-
Progress: ${operation.progress.completed}/${operation.progress.total}
|
|
874
|
-
${operation.error ? `Error: ${operation.error}` : ''}
|
|
875
|
-
${operation.result
|
|
876
|
-
? `Results: ${JSON.stringify(operation.result, null, 2)}`
|
|
877
|
-
: ''}`;
|
|
878
|
-
return {
|
|
879
|
-
content: [{ type: 'text', text: trimResponseText(status) }],
|
|
880
|
-
isError: false,
|
|
881
|
-
};
|
|
882
|
-
}
|
|
883
713
|
case 'firecrawl_crawl': {
|
|
884
714
|
if (!isCrawlOptions(args)) {
|
|
885
715
|
throw new Error('Invalid arguments for firecrawl_crawl');
|
|
886
716
|
}
|
|
887
717
|
const { url, ...options } = args;
|
|
888
|
-
const response = await withRetry(async () =>
|
|
718
|
+
const response = await withRetry(async () =>
|
|
719
|
+
// @ts-expect-error Extended API options including origin
|
|
720
|
+
client.asyncCrawlUrl(url, { ...options, origin: 'mcp-server' }), 'crawl operation');
|
|
889
721
|
if (!response.success) {
|
|
890
722
|
throw new Error(response.error);
|
|
891
723
|
}
|
|
892
|
-
// Monitor credits for cloud API
|
|
893
|
-
if (!FIRECRAWL_API_URL && hasCredits(response)) {
|
|
894
|
-
await updateCreditUsage(response.creditsUsed);
|
|
895
|
-
}
|
|
896
724
|
return {
|
|
897
725
|
content: [
|
|
898
726
|
{
|
|
@@ -927,14 +755,10 @@ ${response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''}
|
|
|
927
755
|
throw new Error('Invalid arguments for firecrawl_search');
|
|
928
756
|
}
|
|
929
757
|
try {
|
|
930
|
-
const response = await withRetry(async () => client.search(args.query, args), 'search operation');
|
|
758
|
+
const response = await withRetry(async () => client.search(args.query, { ...args, origin: 'mcp-server' }), 'search operation');
|
|
931
759
|
if (!response.success) {
|
|
932
760
|
throw new Error(`Search failed: ${response.error || 'Unknown error'}`);
|
|
933
761
|
}
|
|
934
|
-
// Monitor credits for cloud API
|
|
935
|
-
if (!FIRECRAWL_API_URL && hasCredits(response)) {
|
|
936
|
-
await updateCreditUsage(response.creditsUsed);
|
|
937
|
-
}
|
|
938
762
|
// Format the results
|
|
939
763
|
const results = response.data
|
|
940
764
|
.map((result) => `URL: ${result.url}
|
|
@@ -982,10 +806,6 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
|
982
806
|
throw new Error(extractResponse.error || 'Extraction failed');
|
|
983
807
|
}
|
|
984
808
|
const response = extractResponse;
|
|
985
|
-
// Monitor credits for cloud API
|
|
986
|
-
if (!FIRECRAWL_API_URL && hasCredits(response)) {
|
|
987
|
-
await updateCreditUsage(response.creditsUsed || 0);
|
|
988
|
-
}
|
|
989
809
|
// Log performance metrics
|
|
990
810
|
safeLog('info', `Extraction completed in ${Date.now() - extractStartTime}ms`);
|
|
991
811
|
// Add warning to response if present
|
|
@@ -1036,6 +856,8 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
|
1036
856
|
maxDepth: args.maxDepth,
|
|
1037
857
|
timeLimit: args.timeLimit,
|
|
1038
858
|
maxUrls: args.maxUrls,
|
|
859
|
+
// @ts-expect-error Extended API options including origin
|
|
860
|
+
origin: 'mcp-server',
|
|
1039
861
|
},
|
|
1040
862
|
// Activity callback
|
|
1041
863
|
(activity) => {
|
|
@@ -1083,7 +905,9 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
|
1083
905
|
const generateStartTime = Date.now();
|
|
1084
906
|
safeLog('info', `Starting LLMs.txt generation for URL: ${url}`);
|
|
1085
907
|
// Start the generation process
|
|
1086
|
-
const response = await withRetry(async () =>
|
|
908
|
+
const response = await withRetry(async () =>
|
|
909
|
+
// @ts-expect-error Extended API options including origin
|
|
910
|
+
client.generateLLMsText(url, { ...params, origin: 'mcp-server' }), 'LLMs.txt generation');
|
|
1087
911
|
if (!response.success) {
|
|
1088
912
|
throw new Error(response.error || 'LLMs.txt generation failed');
|
|
1089
913
|
}
|
|
@@ -1154,19 +978,15 @@ ${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
|
|
|
1154
978
|
})
|
|
1155
979
|
.join('\n\n');
|
|
1156
980
|
}
|
|
1157
|
-
// Add type guard for credit usage
|
|
1158
|
-
function hasCredits(response) {
|
|
1159
|
-
return 'creditsUsed' in response && typeof response.creditsUsed === 'number';
|
|
1160
|
-
}
|
|
1161
981
|
// Utility function to trim trailing whitespace from text responses
|
|
1162
982
|
// This prevents Claude API errors with "final assistant content cannot end with trailing whitespace"
|
|
1163
983
|
function trimResponseText(text) {
|
|
1164
984
|
return text.trim();
|
|
1165
985
|
}
|
|
1166
986
|
// Server startup
|
|
1167
|
-
async function
|
|
987
|
+
async function runLocalServer() {
|
|
1168
988
|
try {
|
|
1169
|
-
console.error('Initializing
|
|
989
|
+
console.error('Initializing Firecrawl MCP Server...');
|
|
1170
990
|
const transport = new StdioServerTransport();
|
|
1171
991
|
// Detect if we're using stdio transport
|
|
1172
992
|
isStdioTransport = transport instanceof StdioServerTransport;
|
|
@@ -1175,16 +995,96 @@ async function runServer() {
|
|
|
1175
995
|
}
|
|
1176
996
|
await server.connect(transport);
|
|
1177
997
|
// Now that we're connected, we can send logging messages
|
|
1178
|
-
safeLog('info', '
|
|
998
|
+
safeLog('info', 'Firecrawl MCP Server initialized successfully');
|
|
1179
999
|
safeLog('info', `Configuration: API URL: ${FIRECRAWL_API_URL || 'default'}`);
|
|
1180
|
-
console.error('
|
|
1000
|
+
console.error('Firecrawl MCP Server running on stdio');
|
|
1181
1001
|
}
|
|
1182
1002
|
catch (error) {
|
|
1183
1003
|
console.error('Fatal error running server:', error);
|
|
1184
1004
|
process.exit(1);
|
|
1185
1005
|
}
|
|
1186
1006
|
}
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1007
|
+
async function runSSELocalServer() {
|
|
1008
|
+
let transport = null;
|
|
1009
|
+
const app = express();
|
|
1010
|
+
app.get('/sse', async (req, res) => {
|
|
1011
|
+
transport = new SSEServerTransport(`/messages`, res);
|
|
1012
|
+
res.on('close', () => {
|
|
1013
|
+
transport = null;
|
|
1014
|
+
});
|
|
1015
|
+
await server.connect(transport);
|
|
1016
|
+
});
|
|
1017
|
+
// Endpoint for the client to POST messages
|
|
1018
|
+
// Remove express.json() middleware - let the transport handle the body
|
|
1019
|
+
app.post('/messages', (req, res) => {
|
|
1020
|
+
if (transport) {
|
|
1021
|
+
transport.handlePostMessage(req, res);
|
|
1022
|
+
}
|
|
1023
|
+
});
|
|
1024
|
+
}
|
|
1025
|
+
async function runSSECloudServer() {
|
|
1026
|
+
const transports = {};
|
|
1027
|
+
const app = express();
|
|
1028
|
+
app.get('/:apiKey/sse', async (req, res) => {
|
|
1029
|
+
const apiKey = req.params.apiKey;
|
|
1030
|
+
const transport = new SSEServerTransport(`/${apiKey}/messages`, res);
|
|
1031
|
+
//todo: validate api key, close if invalid
|
|
1032
|
+
const compositeKey = `${apiKey}-${transport.sessionId}`;
|
|
1033
|
+
transports[compositeKey] = transport;
|
|
1034
|
+
res.on('close', () => {
|
|
1035
|
+
delete transports[compositeKey];
|
|
1036
|
+
});
|
|
1037
|
+
await server.connect(transport);
|
|
1038
|
+
});
|
|
1039
|
+
// Endpoint for the client to POST messages
|
|
1040
|
+
// Remove express.json() middleware - let the transport handle the body
|
|
1041
|
+
app.post('/:apiKey/messages', express.json(), async (req, res) => {
|
|
1042
|
+
const apiKey = req.params.apiKey;
|
|
1043
|
+
const body = req.body;
|
|
1044
|
+
const enrichedBody = {
|
|
1045
|
+
...body,
|
|
1046
|
+
};
|
|
1047
|
+
if (enrichedBody && enrichedBody.params && !enrichedBody.params._meta) {
|
|
1048
|
+
enrichedBody.params._meta = { apiKey };
|
|
1049
|
+
}
|
|
1050
|
+
else if (enrichedBody &&
|
|
1051
|
+
enrichedBody.params &&
|
|
1052
|
+
enrichedBody.params._meta) {
|
|
1053
|
+
enrichedBody.params._meta.apiKey = apiKey;
|
|
1054
|
+
}
|
|
1055
|
+
console.log('enrichedBody', enrichedBody);
|
|
1056
|
+
const sessionId = req.query.sessionId;
|
|
1057
|
+
const compositeKey = `${apiKey}-${sessionId}`;
|
|
1058
|
+
const transport = transports[compositeKey];
|
|
1059
|
+
if (transport) {
|
|
1060
|
+
await transport.handlePostMessage(req, res, enrichedBody);
|
|
1061
|
+
}
|
|
1062
|
+
else {
|
|
1063
|
+
res.status(400).send('No transport found for sessionId');
|
|
1064
|
+
}
|
|
1065
|
+
});
|
|
1066
|
+
const PORT = 3000;
|
|
1067
|
+
app.listen(PORT, () => {
|
|
1068
|
+
console.log(`MCP SSE Server listening on http://localhost:${PORT}`);
|
|
1069
|
+
console.log(`SSE endpoint: http://localhost:${PORT}/sse`);
|
|
1070
|
+
console.log(`Message endpoint: http://localhost:${PORT}/messages`);
|
|
1071
|
+
});
|
|
1072
|
+
}
|
|
1073
|
+
if (process.env.CLOUD_SERVICE === 'true') {
|
|
1074
|
+
runSSECloudServer().catch((error) => {
|
|
1075
|
+
console.error('Fatal error running server:', error);
|
|
1076
|
+
process.exit(1);
|
|
1077
|
+
});
|
|
1078
|
+
}
|
|
1079
|
+
else if (process.env.SSE_LOCAL === 'true') {
|
|
1080
|
+
runSSELocalServer().catch((error) => {
|
|
1081
|
+
console.error('Fatal error running server:', error);
|
|
1082
|
+
process.exit(1);
|
|
1083
|
+
});
|
|
1084
|
+
}
|
|
1085
|
+
else {
|
|
1086
|
+
runLocalServer().catch((error) => {
|
|
1087
|
+
console.error('Fatal error running server:', error);
|
|
1088
|
+
process.exit(1);
|
|
1089
|
+
});
|
|
1090
|
+
}
|