firecrawl-mcp 1.9.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/dist/index.js +12 -0
- package/dist/jest.setup.js +58 -0
- package/dist/src/index.js +1053 -0
- package/dist/src/index.test.js +225 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -95,6 +95,16 @@ Add this to your `./codeium/windsurf/model_config.json`:
|
|
|
95
95
|
}
|
|
96
96
|
```
|
|
97
97
|
|
|
98
|
+
### Running with SSE Local Mode
|
|
99
|
+
|
|
100
|
+
To run the server using Server-Sent Events (SSE) locally instead of the default stdio transport:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
env SSE_LOCAL=true FIRECRAWL_API_KEY=fc-YOUR_API_KEY npx -y firecrawl-mcp
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Use the url: http://localhost:3000/sse
|
|
107
|
+
|
|
98
108
|
### Installing via Smithery (Legacy)
|
|
99
109
|
|
|
100
110
|
To install Firecrawl for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@mendableai/mcp-server-firecrawl):
|
package/dist/index.js
CHANGED
|
@@ -1021,6 +1021,18 @@ async function runSSELocalServer() {
|
|
|
1021
1021
|
transport.handlePostMessage(req, res);
|
|
1022
1022
|
}
|
|
1023
1023
|
});
|
|
1024
|
+
const PORT = process.env.PORT || 3000;
|
|
1025
|
+
console.log('Starting server on port', PORT);
|
|
1026
|
+
try {
|
|
1027
|
+
app.listen(PORT, () => {
|
|
1028
|
+
console.log(`MCP SSE Server listening on http://localhost:${PORT}`);
|
|
1029
|
+
console.log(`SSE endpoint: http://localhost:${PORT}/sse`);
|
|
1030
|
+
console.log(`Message endpoint: http://localhost:${PORT}/messages`);
|
|
1031
|
+
});
|
|
1032
|
+
}
|
|
1033
|
+
catch (error) {
|
|
1034
|
+
console.error('Error starting server:', error);
|
|
1035
|
+
}
|
|
1024
1036
|
}
|
|
1025
1037
|
async function runSSECloudServer() {
|
|
1026
1038
|
const transports = {};
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { jest } from '@jest/globals';
|
|
2
|
+
// Set test timeout
|
|
3
|
+
jest.setTimeout(30000);
|
|
4
|
+
// Create mock responses
|
|
5
|
+
const mockSearchResponse = {
|
|
6
|
+
success: true,
|
|
7
|
+
data: [
|
|
8
|
+
{
|
|
9
|
+
url: 'https://example.com',
|
|
10
|
+
title: 'Test Page',
|
|
11
|
+
description: 'Test Description',
|
|
12
|
+
markdown: '# Test Content',
|
|
13
|
+
actions: null,
|
|
14
|
+
},
|
|
15
|
+
],
|
|
16
|
+
};
|
|
17
|
+
const mockBatchScrapeResponse = {
|
|
18
|
+
success: true,
|
|
19
|
+
id: 'test-batch-id',
|
|
20
|
+
};
|
|
21
|
+
const mockBatchStatusResponse = {
|
|
22
|
+
success: true,
|
|
23
|
+
status: 'completed',
|
|
24
|
+
completed: 1,
|
|
25
|
+
total: 1,
|
|
26
|
+
creditsUsed: 1,
|
|
27
|
+
expiresAt: new Date(),
|
|
28
|
+
data: [
|
|
29
|
+
{
|
|
30
|
+
url: 'https://example.com',
|
|
31
|
+
title: 'Test Page',
|
|
32
|
+
description: 'Test Description',
|
|
33
|
+
markdown: '# Test Content',
|
|
34
|
+
actions: null,
|
|
35
|
+
},
|
|
36
|
+
],
|
|
37
|
+
};
|
|
38
|
+
// Create mock instance methods
|
|
39
|
+
const mockSearch = jest.fn().mockImplementation(async () => mockSearchResponse);
|
|
40
|
+
const mockAsyncBatchScrapeUrls = jest
|
|
41
|
+
.fn()
|
|
42
|
+
.mockImplementation(async () => mockBatchScrapeResponse);
|
|
43
|
+
const mockCheckBatchScrapeStatus = jest
|
|
44
|
+
.fn()
|
|
45
|
+
.mockImplementation(async () => mockBatchStatusResponse);
|
|
46
|
+
// Create mock instance
|
|
47
|
+
const mockInstance = {
|
|
48
|
+
apiKey: 'test-api-key',
|
|
49
|
+
apiUrl: 'test-api-url',
|
|
50
|
+
search: mockSearch,
|
|
51
|
+
asyncBatchScrapeUrls: mockAsyncBatchScrapeUrls,
|
|
52
|
+
checkBatchScrapeStatus: mockCheckBatchScrapeStatus,
|
|
53
|
+
};
|
|
54
|
+
// Mock the module
|
|
55
|
+
jest.mock('@mendable/firecrawl-js', () => ({
|
|
56
|
+
__esModule: true,
|
|
57
|
+
default: jest.fn().mockImplementation(() => mockInstance),
|
|
58
|
+
}));
|
|
@@ -0,0 +1,1053 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
3
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
|
+
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
5
|
+
import FirecrawlApp from '@mendable/firecrawl-js';
|
|
6
|
+
import PQueue from 'p-queue';
|
|
7
|
+
import dotenv from 'dotenv';
|
|
8
|
+
dotenv.config();
|
|
9
|
+
// Tool definitions
|
|
10
|
+
const SCRAPE_TOOL = {
|
|
11
|
+
name: 'firecrawl_scrape',
|
|
12
|
+
description: 'Scrape a single webpage with advanced options for content extraction. ' +
|
|
13
|
+
'Supports various formats including markdown, HTML, and screenshots. ' +
|
|
14
|
+
'Can execute custom actions like clicking or scrolling before scraping.',
|
|
15
|
+
inputSchema: {
|
|
16
|
+
type: 'object',
|
|
17
|
+
properties: {
|
|
18
|
+
url: {
|
|
19
|
+
type: 'string',
|
|
20
|
+
description: 'The URL to scrape',
|
|
21
|
+
},
|
|
22
|
+
formats: {
|
|
23
|
+
type: 'array',
|
|
24
|
+
items: {
|
|
25
|
+
type: 'string',
|
|
26
|
+
enum: [
|
|
27
|
+
'markdown',
|
|
28
|
+
'html',
|
|
29
|
+
'rawHtml',
|
|
30
|
+
'screenshot',
|
|
31
|
+
'links',
|
|
32
|
+
'screenshot@fullPage',
|
|
33
|
+
'extract',
|
|
34
|
+
],
|
|
35
|
+
},
|
|
36
|
+
description: "Content formats to extract (default: ['markdown'])",
|
|
37
|
+
},
|
|
38
|
+
onlyMainContent: {
|
|
39
|
+
type: 'boolean',
|
|
40
|
+
description: 'Extract only the main content, filtering out navigation, footers, etc.',
|
|
41
|
+
},
|
|
42
|
+
includeTags: {
|
|
43
|
+
type: 'array',
|
|
44
|
+
items: { type: 'string' },
|
|
45
|
+
description: 'HTML tags to specifically include in extraction',
|
|
46
|
+
},
|
|
47
|
+
excludeTags: {
|
|
48
|
+
type: 'array',
|
|
49
|
+
items: { type: 'string' },
|
|
50
|
+
description: 'HTML tags to exclude from extraction',
|
|
51
|
+
},
|
|
52
|
+
waitFor: {
|
|
53
|
+
type: 'number',
|
|
54
|
+
description: 'Time in milliseconds to wait for dynamic content to load',
|
|
55
|
+
},
|
|
56
|
+
timeout: {
|
|
57
|
+
type: 'number',
|
|
58
|
+
description: 'Maximum time in milliseconds to wait for the page to load',
|
|
59
|
+
},
|
|
60
|
+
actions: {
|
|
61
|
+
type: 'array',
|
|
62
|
+
items: {
|
|
63
|
+
type: 'object',
|
|
64
|
+
properties: {
|
|
65
|
+
type: {
|
|
66
|
+
type: 'string',
|
|
67
|
+
enum: [
|
|
68
|
+
'wait',
|
|
69
|
+
'click',
|
|
70
|
+
'screenshot',
|
|
71
|
+
'write',
|
|
72
|
+
'press',
|
|
73
|
+
'scroll',
|
|
74
|
+
'scrape',
|
|
75
|
+
'executeJavascript',
|
|
76
|
+
],
|
|
77
|
+
description: 'Type of action to perform',
|
|
78
|
+
},
|
|
79
|
+
selector: {
|
|
80
|
+
type: 'string',
|
|
81
|
+
description: 'CSS selector for the target element',
|
|
82
|
+
},
|
|
83
|
+
milliseconds: {
|
|
84
|
+
type: 'number',
|
|
85
|
+
description: 'Time to wait in milliseconds (for wait action)',
|
|
86
|
+
},
|
|
87
|
+
text: {
|
|
88
|
+
type: 'string',
|
|
89
|
+
description: 'Text to write (for write action)',
|
|
90
|
+
},
|
|
91
|
+
key: {
|
|
92
|
+
type: 'string',
|
|
93
|
+
description: 'Key to press (for press action)',
|
|
94
|
+
},
|
|
95
|
+
direction: {
|
|
96
|
+
type: 'string',
|
|
97
|
+
enum: ['up', 'down'],
|
|
98
|
+
description: 'Scroll direction',
|
|
99
|
+
},
|
|
100
|
+
script: {
|
|
101
|
+
type: 'string',
|
|
102
|
+
description: 'JavaScript code to execute',
|
|
103
|
+
},
|
|
104
|
+
fullPage: {
|
|
105
|
+
type: 'boolean',
|
|
106
|
+
description: 'Take full page screenshot',
|
|
107
|
+
},
|
|
108
|
+
},
|
|
109
|
+
required: ['type'],
|
|
110
|
+
},
|
|
111
|
+
description: 'List of actions to perform before scraping',
|
|
112
|
+
},
|
|
113
|
+
extract: {
|
|
114
|
+
type: 'object',
|
|
115
|
+
properties: {
|
|
116
|
+
schema: {
|
|
117
|
+
type: 'object',
|
|
118
|
+
description: 'Schema for structured data extraction',
|
|
119
|
+
},
|
|
120
|
+
systemPrompt: {
|
|
121
|
+
type: 'string',
|
|
122
|
+
description: 'System prompt for LLM extraction',
|
|
123
|
+
},
|
|
124
|
+
prompt: {
|
|
125
|
+
type: 'string',
|
|
126
|
+
description: 'User prompt for LLM extraction',
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
description: 'Configuration for structured data extraction',
|
|
130
|
+
},
|
|
131
|
+
mobile: {
|
|
132
|
+
type: 'boolean',
|
|
133
|
+
description: 'Use mobile viewport',
|
|
134
|
+
},
|
|
135
|
+
skipTlsVerification: {
|
|
136
|
+
type: 'boolean',
|
|
137
|
+
description: 'Skip TLS certificate verification',
|
|
138
|
+
},
|
|
139
|
+
removeBase64Images: {
|
|
140
|
+
type: 'boolean',
|
|
141
|
+
description: 'Remove base64 encoded images from output',
|
|
142
|
+
},
|
|
143
|
+
location: {
|
|
144
|
+
type: 'object',
|
|
145
|
+
properties: {
|
|
146
|
+
country: {
|
|
147
|
+
type: 'string',
|
|
148
|
+
description: 'Country code for geolocation',
|
|
149
|
+
},
|
|
150
|
+
languages: {
|
|
151
|
+
type: 'array',
|
|
152
|
+
items: { type: 'string' },
|
|
153
|
+
description: 'Language codes for content',
|
|
154
|
+
},
|
|
155
|
+
},
|
|
156
|
+
description: 'Location settings for scraping',
|
|
157
|
+
},
|
|
158
|
+
},
|
|
159
|
+
required: ['url'],
|
|
160
|
+
},
|
|
161
|
+
};
|
|
162
|
+
const MAP_TOOL = {
|
|
163
|
+
name: 'firecrawl_map',
|
|
164
|
+
description: 'Discover URLs from a starting point. Can use both sitemap.xml and HTML link discovery.',
|
|
165
|
+
inputSchema: {
|
|
166
|
+
type: 'object',
|
|
167
|
+
properties: {
|
|
168
|
+
url: {
|
|
169
|
+
type: 'string',
|
|
170
|
+
description: 'Starting URL for URL discovery',
|
|
171
|
+
},
|
|
172
|
+
search: {
|
|
173
|
+
type: 'string',
|
|
174
|
+
description: 'Optional search term to filter URLs',
|
|
175
|
+
},
|
|
176
|
+
ignoreSitemap: {
|
|
177
|
+
type: 'boolean',
|
|
178
|
+
description: 'Skip sitemap.xml discovery and only use HTML links',
|
|
179
|
+
},
|
|
180
|
+
sitemapOnly: {
|
|
181
|
+
type: 'boolean',
|
|
182
|
+
description: 'Only use sitemap.xml for discovery, ignore HTML links',
|
|
183
|
+
},
|
|
184
|
+
includeSubdomains: {
|
|
185
|
+
type: 'boolean',
|
|
186
|
+
description: 'Include URLs from subdomains in results',
|
|
187
|
+
},
|
|
188
|
+
limit: {
|
|
189
|
+
type: 'number',
|
|
190
|
+
description: 'Maximum number of URLs to return',
|
|
191
|
+
},
|
|
192
|
+
},
|
|
193
|
+
required: ['url'],
|
|
194
|
+
},
|
|
195
|
+
};
|
|
196
|
+
const CRAWL_TOOL = {
|
|
197
|
+
name: 'firecrawl_crawl',
|
|
198
|
+
description: 'Start an asynchronous crawl of multiple pages from a starting URL. ' +
|
|
199
|
+
'Supports depth control, path filtering, and webhook notifications.',
|
|
200
|
+
inputSchema: {
|
|
201
|
+
type: 'object',
|
|
202
|
+
properties: {
|
|
203
|
+
url: {
|
|
204
|
+
type: 'string',
|
|
205
|
+
description: 'Starting URL for the crawl',
|
|
206
|
+
},
|
|
207
|
+
excludePaths: {
|
|
208
|
+
type: 'array',
|
|
209
|
+
items: { type: 'string' },
|
|
210
|
+
description: 'URL paths to exclude from crawling',
|
|
211
|
+
},
|
|
212
|
+
includePaths: {
|
|
213
|
+
type: 'array',
|
|
214
|
+
items: { type: 'string' },
|
|
215
|
+
description: 'Only crawl these URL paths',
|
|
216
|
+
},
|
|
217
|
+
maxDepth: {
|
|
218
|
+
type: 'number',
|
|
219
|
+
description: 'Maximum link depth to crawl',
|
|
220
|
+
},
|
|
221
|
+
ignoreSitemap: {
|
|
222
|
+
type: 'boolean',
|
|
223
|
+
description: 'Skip sitemap.xml discovery',
|
|
224
|
+
},
|
|
225
|
+
limit: {
|
|
226
|
+
type: 'number',
|
|
227
|
+
description: 'Maximum number of pages to crawl',
|
|
228
|
+
},
|
|
229
|
+
allowBackwardLinks: {
|
|
230
|
+
type: 'boolean',
|
|
231
|
+
description: 'Allow crawling links that point to parent directories',
|
|
232
|
+
},
|
|
233
|
+
allowExternalLinks: {
|
|
234
|
+
type: 'boolean',
|
|
235
|
+
description: 'Allow crawling links to external domains',
|
|
236
|
+
},
|
|
237
|
+
webhook: {
|
|
238
|
+
oneOf: [
|
|
239
|
+
{
|
|
240
|
+
type: 'string',
|
|
241
|
+
description: 'Webhook URL to notify when crawl is complete',
|
|
242
|
+
},
|
|
243
|
+
{
|
|
244
|
+
type: 'object',
|
|
245
|
+
properties: {
|
|
246
|
+
url: {
|
|
247
|
+
type: 'string',
|
|
248
|
+
description: 'Webhook URL',
|
|
249
|
+
},
|
|
250
|
+
headers: {
|
|
251
|
+
type: 'object',
|
|
252
|
+
description: 'Custom headers for webhook requests',
|
|
253
|
+
},
|
|
254
|
+
},
|
|
255
|
+
required: ['url'],
|
|
256
|
+
},
|
|
257
|
+
],
|
|
258
|
+
},
|
|
259
|
+
deduplicateSimilarURLs: {
|
|
260
|
+
type: 'boolean',
|
|
261
|
+
description: 'Remove similar URLs during crawl',
|
|
262
|
+
},
|
|
263
|
+
ignoreQueryParameters: {
|
|
264
|
+
type: 'boolean',
|
|
265
|
+
description: 'Ignore query parameters when comparing URLs',
|
|
266
|
+
},
|
|
267
|
+
scrapeOptions: {
|
|
268
|
+
type: 'object',
|
|
269
|
+
properties: {
|
|
270
|
+
formats: {
|
|
271
|
+
type: 'array',
|
|
272
|
+
items: {
|
|
273
|
+
type: 'string',
|
|
274
|
+
enum: [
|
|
275
|
+
'markdown',
|
|
276
|
+
'html',
|
|
277
|
+
'rawHtml',
|
|
278
|
+
'screenshot',
|
|
279
|
+
'links',
|
|
280
|
+
'screenshot@fullPage',
|
|
281
|
+
'extract',
|
|
282
|
+
],
|
|
283
|
+
},
|
|
284
|
+
},
|
|
285
|
+
onlyMainContent: {
|
|
286
|
+
type: 'boolean',
|
|
287
|
+
},
|
|
288
|
+
includeTags: {
|
|
289
|
+
type: 'array',
|
|
290
|
+
items: { type: 'string' },
|
|
291
|
+
},
|
|
292
|
+
excludeTags: {
|
|
293
|
+
type: 'array',
|
|
294
|
+
items: { type: 'string' },
|
|
295
|
+
},
|
|
296
|
+
waitFor: {
|
|
297
|
+
type: 'number',
|
|
298
|
+
},
|
|
299
|
+
},
|
|
300
|
+
description: 'Options for scraping each page',
|
|
301
|
+
},
|
|
302
|
+
},
|
|
303
|
+
required: ['url'],
|
|
304
|
+
},
|
|
305
|
+
};
|
|
306
|
+
const BATCH_SCRAPE_TOOL = {
|
|
307
|
+
name: 'firecrawl_batch_scrape',
|
|
308
|
+
description: 'Scrape multiple URLs in batch mode. Returns a job ID that can be used to check status.',
|
|
309
|
+
inputSchema: {
|
|
310
|
+
type: 'object',
|
|
311
|
+
properties: {
|
|
312
|
+
urls: {
|
|
313
|
+
type: 'array',
|
|
314
|
+
items: { type: 'string' },
|
|
315
|
+
description: 'List of URLs to scrape',
|
|
316
|
+
},
|
|
317
|
+
options: {
|
|
318
|
+
type: 'object',
|
|
319
|
+
properties: {
|
|
320
|
+
formats: {
|
|
321
|
+
type: 'array',
|
|
322
|
+
items: {
|
|
323
|
+
type: 'string',
|
|
324
|
+
enum: [
|
|
325
|
+
'markdown',
|
|
326
|
+
'html',
|
|
327
|
+
'rawHtml',
|
|
328
|
+
'screenshot',
|
|
329
|
+
'links',
|
|
330
|
+
'screenshot@fullPage',
|
|
331
|
+
'extract',
|
|
332
|
+
],
|
|
333
|
+
},
|
|
334
|
+
},
|
|
335
|
+
onlyMainContent: {
|
|
336
|
+
type: 'boolean',
|
|
337
|
+
},
|
|
338
|
+
includeTags: {
|
|
339
|
+
type: 'array',
|
|
340
|
+
items: { type: 'string' },
|
|
341
|
+
},
|
|
342
|
+
excludeTags: {
|
|
343
|
+
type: 'array',
|
|
344
|
+
items: { type: 'string' },
|
|
345
|
+
},
|
|
346
|
+
waitFor: {
|
|
347
|
+
type: 'number',
|
|
348
|
+
},
|
|
349
|
+
},
|
|
350
|
+
},
|
|
351
|
+
},
|
|
352
|
+
required: ['urls'],
|
|
353
|
+
},
|
|
354
|
+
};
|
|
355
|
+
const CHECK_BATCH_STATUS_TOOL = {
|
|
356
|
+
name: 'firecrawl_check_batch_status',
|
|
357
|
+
description: 'Check the status of a batch scraping job.',
|
|
358
|
+
inputSchema: {
|
|
359
|
+
type: 'object',
|
|
360
|
+
properties: {
|
|
361
|
+
id: {
|
|
362
|
+
type: 'string',
|
|
363
|
+
description: 'Batch job ID to check',
|
|
364
|
+
},
|
|
365
|
+
},
|
|
366
|
+
required: ['id'],
|
|
367
|
+
},
|
|
368
|
+
};
|
|
369
|
+
const CHECK_CRAWL_STATUS_TOOL = {
|
|
370
|
+
name: 'firecrawl_check_crawl_status',
|
|
371
|
+
description: 'Check the status of a crawl job.',
|
|
372
|
+
inputSchema: {
|
|
373
|
+
type: 'object',
|
|
374
|
+
properties: {
|
|
375
|
+
id: {
|
|
376
|
+
type: 'string',
|
|
377
|
+
description: 'Crawl job ID to check',
|
|
378
|
+
},
|
|
379
|
+
},
|
|
380
|
+
required: ['id'],
|
|
381
|
+
},
|
|
382
|
+
};
|
|
383
|
+
const SEARCH_TOOL = {
|
|
384
|
+
name: 'firecrawl_search',
|
|
385
|
+
description: 'Search and retrieve content from web pages with optional scraping. ' +
|
|
386
|
+
'Returns SERP results by default (url, title, description) or full page content when scrapeOptions are provided.',
|
|
387
|
+
inputSchema: {
|
|
388
|
+
type: 'object',
|
|
389
|
+
properties: {
|
|
390
|
+
query: {
|
|
391
|
+
type: 'string',
|
|
392
|
+
description: 'Search query string',
|
|
393
|
+
},
|
|
394
|
+
limit: {
|
|
395
|
+
type: 'number',
|
|
396
|
+
description: 'Maximum number of results to return (default: 5)',
|
|
397
|
+
},
|
|
398
|
+
lang: {
|
|
399
|
+
type: 'string',
|
|
400
|
+
description: 'Language code for search results (default: en)',
|
|
401
|
+
},
|
|
402
|
+
country: {
|
|
403
|
+
type: 'string',
|
|
404
|
+
description: 'Country code for search results (default: us)',
|
|
405
|
+
},
|
|
406
|
+
tbs: {
|
|
407
|
+
type: 'string',
|
|
408
|
+
description: 'Time-based search filter',
|
|
409
|
+
},
|
|
410
|
+
filter: {
|
|
411
|
+
type: 'string',
|
|
412
|
+
description: 'Search filter',
|
|
413
|
+
},
|
|
414
|
+
location: {
|
|
415
|
+
type: 'object',
|
|
416
|
+
properties: {
|
|
417
|
+
country: {
|
|
418
|
+
type: 'string',
|
|
419
|
+
description: 'Country code for geolocation',
|
|
420
|
+
},
|
|
421
|
+
languages: {
|
|
422
|
+
type: 'array',
|
|
423
|
+
items: { type: 'string' },
|
|
424
|
+
description: 'Language codes for content',
|
|
425
|
+
},
|
|
426
|
+
},
|
|
427
|
+
description: 'Location settings for search',
|
|
428
|
+
},
|
|
429
|
+
scrapeOptions: {
|
|
430
|
+
type: 'object',
|
|
431
|
+
properties: {
|
|
432
|
+
formats: {
|
|
433
|
+
type: 'array',
|
|
434
|
+
items: {
|
|
435
|
+
type: 'string',
|
|
436
|
+
enum: ['markdown', 'html', 'rawHtml'],
|
|
437
|
+
},
|
|
438
|
+
description: 'Content formats to extract from search results',
|
|
439
|
+
},
|
|
440
|
+
onlyMainContent: {
|
|
441
|
+
type: 'boolean',
|
|
442
|
+
description: 'Extract only the main content from results',
|
|
443
|
+
},
|
|
444
|
+
waitFor: {
|
|
445
|
+
type: 'number',
|
|
446
|
+
description: 'Time in milliseconds to wait for dynamic content',
|
|
447
|
+
},
|
|
448
|
+
},
|
|
449
|
+
description: 'Options for scraping search results',
|
|
450
|
+
},
|
|
451
|
+
},
|
|
452
|
+
required: ['query'],
|
|
453
|
+
},
|
|
454
|
+
};
|
|
455
|
+
const EXTRACT_TOOL = {
|
|
456
|
+
name: 'firecrawl_extract',
|
|
457
|
+
description: 'Extract structured information from web pages using LLM. ' +
|
|
458
|
+
'Supports both cloud AI and self-hosted LLM extraction.',
|
|
459
|
+
inputSchema: {
|
|
460
|
+
type: 'object',
|
|
461
|
+
properties: {
|
|
462
|
+
urls: {
|
|
463
|
+
type: 'array',
|
|
464
|
+
items: { type: 'string' },
|
|
465
|
+
description: 'List of URLs to extract information from',
|
|
466
|
+
},
|
|
467
|
+
prompt: {
|
|
468
|
+
type: 'string',
|
|
469
|
+
description: 'Prompt for the LLM extraction',
|
|
470
|
+
},
|
|
471
|
+
systemPrompt: {
|
|
472
|
+
type: 'string',
|
|
473
|
+
description: 'System prompt for LLM extraction',
|
|
474
|
+
},
|
|
475
|
+
schema: {
|
|
476
|
+
type: 'object',
|
|
477
|
+
description: 'JSON schema for structured data extraction',
|
|
478
|
+
},
|
|
479
|
+
allowExternalLinks: {
|
|
480
|
+
type: 'boolean',
|
|
481
|
+
description: 'Allow extraction from external links',
|
|
482
|
+
},
|
|
483
|
+
enableWebSearch: {
|
|
484
|
+
type: 'boolean',
|
|
485
|
+
description: 'Enable web search for additional context',
|
|
486
|
+
},
|
|
487
|
+
includeSubdomains: {
|
|
488
|
+
type: 'boolean',
|
|
489
|
+
description: 'Include subdomains in extraction',
|
|
490
|
+
},
|
|
491
|
+
},
|
|
492
|
+
required: ['urls'],
|
|
493
|
+
},
|
|
494
|
+
};
|
|
495
|
+
// Type guards
|
|
496
|
+
function isScrapeOptions(args) {
|
|
497
|
+
return (typeof args === 'object' &&
|
|
498
|
+
args !== null &&
|
|
499
|
+
'url' in args &&
|
|
500
|
+
typeof args.url === 'string');
|
|
501
|
+
}
|
|
502
|
+
function isMapOptions(args) {
|
|
503
|
+
return (typeof args === 'object' &&
|
|
504
|
+
args !== null &&
|
|
505
|
+
'url' in args &&
|
|
506
|
+
typeof args.url === 'string');
|
|
507
|
+
}
|
|
508
|
+
function isCrawlOptions(args) {
|
|
509
|
+
return (typeof args === 'object' &&
|
|
510
|
+
args !== null &&
|
|
511
|
+
'url' in args &&
|
|
512
|
+
typeof args.url === 'string');
|
|
513
|
+
}
|
|
514
|
+
function isBatchScrapeOptions(args) {
|
|
515
|
+
return (typeof args === 'object' &&
|
|
516
|
+
args !== null &&
|
|
517
|
+
'urls' in args &&
|
|
518
|
+
Array.isArray(args.urls) &&
|
|
519
|
+
args.urls.every((url) => typeof url === 'string'));
|
|
520
|
+
}
|
|
521
|
+
function isStatusCheckOptions(args) {
|
|
522
|
+
return (typeof args === 'object' &&
|
|
523
|
+
args !== null &&
|
|
524
|
+
'id' in args &&
|
|
525
|
+
typeof args.id === 'string');
|
|
526
|
+
}
|
|
527
|
+
function isSearchOptions(args) {
|
|
528
|
+
return (typeof args === 'object' &&
|
|
529
|
+
args !== null &&
|
|
530
|
+
'query' in args &&
|
|
531
|
+
typeof args.query === 'string');
|
|
532
|
+
}
|
|
533
|
+
function isExtractOptions(args) {
|
|
534
|
+
if (typeof args !== 'object' || args === null)
|
|
535
|
+
return false;
|
|
536
|
+
const { urls } = args;
|
|
537
|
+
return (Array.isArray(urls) &&
|
|
538
|
+
urls.every((url) => typeof url === 'string'));
|
|
539
|
+
}
|
|
540
|
+
// Server implementation
|
|
541
|
+
const server = new Server({
|
|
542
|
+
name: 'firecrawl-mcp',
|
|
543
|
+
version: '1.3.2',
|
|
544
|
+
}, {
|
|
545
|
+
capabilities: {
|
|
546
|
+
tools: {},
|
|
547
|
+
logging: {},
|
|
548
|
+
},
|
|
549
|
+
});
|
|
550
|
+
// Get optional API URL
|
|
551
|
+
const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL;
|
|
552
|
+
const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
|
|
553
|
+
// Check if API key is required (only for cloud service)
|
|
554
|
+
if (!FIRECRAWL_API_URL && !FIRECRAWL_API_KEY) {
|
|
555
|
+
console.error('Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service');
|
|
556
|
+
process.exit(1);
|
|
557
|
+
}
|
|
558
|
+
// Initialize FireCrawl client with optional API URL
|
|
559
|
+
const client = new FirecrawlApp({
|
|
560
|
+
apiKey: FIRECRAWL_API_KEY || '',
|
|
561
|
+
...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
|
|
562
|
+
});
|
|
563
|
+
// Configuration for retries and monitoring
|
|
564
|
+
const CONFIG = {
|
|
565
|
+
retry: {
|
|
566
|
+
maxAttempts: Number(process.env.FIRECRAWL_RETRY_MAX_ATTEMPTS) || 3,
|
|
567
|
+
initialDelay: Number(process.env.FIRECRAWL_RETRY_INITIAL_DELAY) || 1000,
|
|
568
|
+
maxDelay: Number(process.env.FIRECRAWL_RETRY_MAX_DELAY) || 10000,
|
|
569
|
+
backoffFactor: Number(process.env.FIRECRAWL_RETRY_BACKOFF_FACTOR) || 2,
|
|
570
|
+
},
|
|
571
|
+
credit: {
|
|
572
|
+
warningThreshold: Number(process.env.FIRECRAWL_CREDIT_WARNING_THRESHOLD) || 1000,
|
|
573
|
+
criticalThreshold: Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100,
|
|
574
|
+
},
|
|
575
|
+
};
|
|
576
|
+
const creditUsage = {
|
|
577
|
+
total: 0,
|
|
578
|
+
lastCheck: Date.now(),
|
|
579
|
+
};
|
|
580
|
+
// Add utility function for delay
|
|
581
|
+
function delay(ms) {
|
|
582
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
583
|
+
}
|
|
584
|
+
// Add retry logic with exponential backoff
|
|
585
|
+
async function withRetry(operation, context, attempt = 1) {
|
|
586
|
+
try {
|
|
587
|
+
return await operation();
|
|
588
|
+
}
|
|
589
|
+
catch (error) {
|
|
590
|
+
const isRateLimit = error instanceof Error &&
|
|
591
|
+
(error.message.includes('rate limit') || error.message.includes('429'));
|
|
592
|
+
if (isRateLimit && attempt < CONFIG.retry.maxAttempts) {
|
|
593
|
+
const delayMs = Math.min(CONFIG.retry.initialDelay *
|
|
594
|
+
Math.pow(CONFIG.retry.backoffFactor, attempt - 1), CONFIG.retry.maxDelay);
|
|
595
|
+
server.sendLoggingMessage({
|
|
596
|
+
level: 'warning',
|
|
597
|
+
data: `Rate limit hit for ${context}. Attempt ${attempt}/${CONFIG.retry.maxAttempts}. Retrying in ${delayMs}ms`,
|
|
598
|
+
});
|
|
599
|
+
await delay(delayMs);
|
|
600
|
+
return withRetry(operation, context, attempt + 1);
|
|
601
|
+
}
|
|
602
|
+
throw error;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
// Add credit monitoring
|
|
606
|
+
async function updateCreditUsage(creditsUsed) {
|
|
607
|
+
creditUsage.total += creditsUsed;
|
|
608
|
+
// Log credit usage
|
|
609
|
+
server.sendLoggingMessage({
|
|
610
|
+
level: 'info',
|
|
611
|
+
data: `Credit usage: ${creditUsage.total} credits used total`,
|
|
612
|
+
});
|
|
613
|
+
// Check thresholds
|
|
614
|
+
if (creditUsage.total >= CONFIG.credit.criticalThreshold) {
|
|
615
|
+
server.sendLoggingMessage({
|
|
616
|
+
level: 'error',
|
|
617
|
+
data: `CRITICAL: Credit usage has reached ${creditUsage.total}`,
|
|
618
|
+
});
|
|
619
|
+
}
|
|
620
|
+
else if (creditUsage.total >= CONFIG.credit.warningThreshold) {
|
|
621
|
+
server.sendLoggingMessage({
|
|
622
|
+
level: 'warning',
|
|
623
|
+
data: `WARNING: Credit usage has reached ${creditUsage.total}`,
|
|
624
|
+
});
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
// Initialize queue system
|
|
628
|
+
const batchQueue = new PQueue({ concurrency: 1 });
|
|
629
|
+
const batchOperations = new Map();
|
|
630
|
+
let operationCounter = 0;
|
|
631
|
+
async function processBatchOperation(operation) {
|
|
632
|
+
try {
|
|
633
|
+
operation.status = 'processing';
|
|
634
|
+
let totalCreditsUsed = 0;
|
|
635
|
+
// Use library's built-in batch processing
|
|
636
|
+
const response = await withRetry(async () => client.asyncBatchScrapeUrls(operation.urls, operation.options), `batch ${operation.id} processing`);
|
|
637
|
+
if (!response.success) {
|
|
638
|
+
throw new Error(response.error || 'Batch operation failed');
|
|
639
|
+
}
|
|
640
|
+
// Track credits if using cloud API
|
|
641
|
+
if (!FIRECRAWL_API_URL && hasCredits(response)) {
|
|
642
|
+
totalCreditsUsed += response.creditsUsed;
|
|
643
|
+
await updateCreditUsage(response.creditsUsed);
|
|
644
|
+
}
|
|
645
|
+
operation.status = 'completed';
|
|
646
|
+
operation.result = response;
|
|
647
|
+
// Log final credit usage for the batch
|
|
648
|
+
if (!FIRECRAWL_API_URL) {
|
|
649
|
+
server.sendLoggingMessage({
|
|
650
|
+
level: 'info',
|
|
651
|
+
data: `Batch ${operation.id} completed. Total credits used: ${totalCreditsUsed}`,
|
|
652
|
+
});
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
catch (error) {
|
|
656
|
+
operation.status = 'failed';
|
|
657
|
+
operation.error = error instanceof Error ? error.message : String(error);
|
|
658
|
+
server.sendLoggingMessage({
|
|
659
|
+
level: 'error',
|
|
660
|
+
data: `Batch ${operation.id} failed: ${operation.error}`,
|
|
661
|
+
});
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
// Tool handlers
|
|
665
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
666
|
+
tools: [
|
|
667
|
+
SCRAPE_TOOL,
|
|
668
|
+
MAP_TOOL,
|
|
669
|
+
CRAWL_TOOL,
|
|
670
|
+
BATCH_SCRAPE_TOOL,
|
|
671
|
+
CHECK_BATCH_STATUS_TOOL,
|
|
672
|
+
CHECK_CRAWL_STATUS_TOOL,
|
|
673
|
+
SEARCH_TOOL,
|
|
674
|
+
EXTRACT_TOOL,
|
|
675
|
+
],
|
|
676
|
+
}));
|
|
677
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
678
|
+
const startTime = Date.now();
|
|
679
|
+
try {
|
|
680
|
+
const { name, arguments: args } = request.params;
|
|
681
|
+
// Log incoming request with timestamp
|
|
682
|
+
server.sendLoggingMessage({
|
|
683
|
+
level: 'info',
|
|
684
|
+
data: `[${new Date().toISOString()}] Received request for tool: ${name}`,
|
|
685
|
+
});
|
|
686
|
+
if (!args) {
|
|
687
|
+
throw new Error('No arguments provided');
|
|
688
|
+
}
|
|
689
|
+
switch (name) {
|
|
690
|
+
case 'firecrawl_scrape': {
|
|
691
|
+
if (!isScrapeOptions(args)) {
|
|
692
|
+
throw new Error('Invalid arguments for firecrawl_scrape');
|
|
693
|
+
}
|
|
694
|
+
const { url, ...options } = args;
|
|
695
|
+
try {
|
|
696
|
+
const scrapeStartTime = Date.now();
|
|
697
|
+
server.sendLoggingMessage({
|
|
698
|
+
level: 'info',
|
|
699
|
+
data: `Starting scrape for URL: ${url} with options: ${JSON.stringify(options)}`,
|
|
700
|
+
});
|
|
701
|
+
const response = await client.scrapeUrl(url, options);
|
|
702
|
+
// Log performance metrics
|
|
703
|
+
server.sendLoggingMessage({
|
|
704
|
+
level: 'info',
|
|
705
|
+
data: `Scrape completed in ${Date.now() - scrapeStartTime}ms`,
|
|
706
|
+
});
|
|
707
|
+
if ('success' in response && !response.success) {
|
|
708
|
+
throw new Error(response.error || 'Scraping failed');
|
|
709
|
+
}
|
|
710
|
+
const content = 'markdown' in response
|
|
711
|
+
? response.markdown || response.html || response.rawHtml
|
|
712
|
+
: null;
|
|
713
|
+
return {
|
|
714
|
+
content: [
|
|
715
|
+
{ type: 'text', text: content || 'No content available' },
|
|
716
|
+
],
|
|
717
|
+
isError: false,
|
|
718
|
+
};
|
|
719
|
+
}
|
|
720
|
+
catch (error) {
|
|
721
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
722
|
+
return {
|
|
723
|
+
content: [{ type: 'text', text: errorMessage }],
|
|
724
|
+
isError: true,
|
|
725
|
+
};
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
case 'firecrawl_map': {
|
|
729
|
+
if (!isMapOptions(args)) {
|
|
730
|
+
throw new Error('Invalid arguments for firecrawl_map');
|
|
731
|
+
}
|
|
732
|
+
const { url, ...options } = args;
|
|
733
|
+
const response = await client.mapUrl(url, options);
|
|
734
|
+
if ('error' in response) {
|
|
735
|
+
throw new Error(response.error);
|
|
736
|
+
}
|
|
737
|
+
if (!response.links) {
|
|
738
|
+
throw new Error('No links received from FireCrawl API');
|
|
739
|
+
}
|
|
740
|
+
return {
|
|
741
|
+
content: [{ type: 'text', text: response.links.join('\n') }],
|
|
742
|
+
isError: false,
|
|
743
|
+
};
|
|
744
|
+
}
|
|
745
|
+
case 'firecrawl_batch_scrape': {
|
|
746
|
+
if (!isBatchScrapeOptions(args)) {
|
|
747
|
+
throw new Error('Invalid arguments for firecrawl_batch_scrape');
|
|
748
|
+
}
|
|
749
|
+
try {
|
|
750
|
+
const operationId = `batch_${++operationCounter}`;
|
|
751
|
+
const operation = {
|
|
752
|
+
id: operationId,
|
|
753
|
+
urls: args.urls,
|
|
754
|
+
options: args.options,
|
|
755
|
+
status: 'pending',
|
|
756
|
+
progress: {
|
|
757
|
+
completed: 0,
|
|
758
|
+
total: args.urls.length,
|
|
759
|
+
},
|
|
760
|
+
};
|
|
761
|
+
batchOperations.set(operationId, operation);
|
|
762
|
+
// Queue the operation
|
|
763
|
+
batchQueue.add(() => processBatchOperation(operation));
|
|
764
|
+
server.sendLoggingMessage({
|
|
765
|
+
level: 'info',
|
|
766
|
+
data: `Queued batch operation ${operationId} with ${args.urls.length} URLs`,
|
|
767
|
+
});
|
|
768
|
+
return {
|
|
769
|
+
content: [
|
|
770
|
+
{
|
|
771
|
+
type: 'text',
|
|
772
|
+
text: `Batch operation queued with ID: ${operationId}. Use firecrawl_check_batch_status to check progress.`,
|
|
773
|
+
},
|
|
774
|
+
],
|
|
775
|
+
isError: false,
|
|
776
|
+
};
|
|
777
|
+
}
|
|
778
|
+
catch (error) {
|
|
779
|
+
const errorMessage = error instanceof Error
|
|
780
|
+
? error.message
|
|
781
|
+
: `Batch operation failed: ${JSON.stringify(error)}`;
|
|
782
|
+
return {
|
|
783
|
+
content: [{ type: 'text', text: errorMessage }],
|
|
784
|
+
isError: true,
|
|
785
|
+
};
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
case 'firecrawl_check_batch_status': {
|
|
789
|
+
if (!isStatusCheckOptions(args)) {
|
|
790
|
+
throw new Error('Invalid arguments for firecrawl_check_batch_status');
|
|
791
|
+
}
|
|
792
|
+
const operation = batchOperations.get(args.id);
|
|
793
|
+
if (!operation) {
|
|
794
|
+
return {
|
|
795
|
+
content: [
|
|
796
|
+
{
|
|
797
|
+
type: 'text',
|
|
798
|
+
text: `No batch operation found with ID: ${args.id}`,
|
|
799
|
+
},
|
|
800
|
+
],
|
|
801
|
+
isError: true,
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
const status = `Batch Status:
|
|
805
|
+
Status: ${operation.status}
|
|
806
|
+
Progress: ${operation.progress.completed}/${operation.progress.total}
|
|
807
|
+
${operation.error ? `Error: ${operation.error}` : ''}
|
|
808
|
+
${operation.result
|
|
809
|
+
? `Results: ${JSON.stringify(operation.result, null, 2)}`
|
|
810
|
+
: ''}`;
|
|
811
|
+
return {
|
|
812
|
+
content: [{ type: 'text', text: status }],
|
|
813
|
+
isError: false,
|
|
814
|
+
};
|
|
815
|
+
}
|
|
816
|
+
case 'firecrawl_crawl': {
|
|
817
|
+
if (!isCrawlOptions(args)) {
|
|
818
|
+
throw new Error('Invalid arguments for firecrawl_crawl');
|
|
819
|
+
}
|
|
820
|
+
const { url, ...options } = args;
|
|
821
|
+
const response = await withRetry(async () => client.asyncCrawlUrl(url, options), 'crawl operation');
|
|
822
|
+
if (!response.success) {
|
|
823
|
+
throw new Error(response.error);
|
|
824
|
+
}
|
|
825
|
+
// Monitor credits for cloud API
|
|
826
|
+
if (!FIRECRAWL_API_URL && hasCredits(response)) {
|
|
827
|
+
await updateCreditUsage(response.creditsUsed);
|
|
828
|
+
}
|
|
829
|
+
return {
|
|
830
|
+
content: [
|
|
831
|
+
{
|
|
832
|
+
type: 'text',
|
|
833
|
+
text: `Started crawl for ${url} with job ID: ${response.id}`,
|
|
834
|
+
},
|
|
835
|
+
],
|
|
836
|
+
isError: false,
|
|
837
|
+
};
|
|
838
|
+
}
|
|
839
|
+
case 'firecrawl_check_crawl_status': {
|
|
840
|
+
if (!isStatusCheckOptions(args)) {
|
|
841
|
+
throw new Error('Invalid arguments for firecrawl_check_crawl_status');
|
|
842
|
+
}
|
|
843
|
+
const response = await client.checkCrawlStatus(args.id);
|
|
844
|
+
if (!response.success) {
|
|
845
|
+
throw new Error(response.error);
|
|
846
|
+
}
|
|
847
|
+
const status = `Crawl Status:
|
|
848
|
+
Status: ${response.status}
|
|
849
|
+
Progress: ${response.completed}/${response.total}
|
|
850
|
+
Credits Used: ${response.creditsUsed}
|
|
851
|
+
Expires At: ${response.expiresAt}
|
|
852
|
+
${response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''}`;
|
|
853
|
+
return {
|
|
854
|
+
content: [{ type: 'text', text: status }],
|
|
855
|
+
isError: false,
|
|
856
|
+
};
|
|
857
|
+
}
|
|
858
|
+
case 'firecrawl_search': {
|
|
859
|
+
if (!isSearchOptions(args)) {
|
|
860
|
+
throw new Error('Invalid arguments for firecrawl_search');
|
|
861
|
+
}
|
|
862
|
+
try {
|
|
863
|
+
const response = await withRetry(async () => client.search(args.query, args), 'search operation');
|
|
864
|
+
if (!response.success) {
|
|
865
|
+
throw new Error(`Search failed: ${response.error || 'Unknown error'}`);
|
|
866
|
+
}
|
|
867
|
+
// Monitor credits for cloud API
|
|
868
|
+
if (!FIRECRAWL_API_URL && hasCredits(response)) {
|
|
869
|
+
await updateCreditUsage(response.creditsUsed);
|
|
870
|
+
}
|
|
871
|
+
// Format the results
|
|
872
|
+
const results = response.data
|
|
873
|
+
.map((result) => `URL: ${result.url}
|
|
874
|
+
Title: ${result.title || 'No title'}
|
|
875
|
+
Description: ${result.description || 'No description'}
|
|
876
|
+
${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
877
|
+
.join('\n\n');
|
|
878
|
+
return {
|
|
879
|
+
content: [{ type: 'text', text: results }],
|
|
880
|
+
isError: false,
|
|
881
|
+
};
|
|
882
|
+
}
|
|
883
|
+
catch (error) {
|
|
884
|
+
const errorMessage = error instanceof Error
|
|
885
|
+
? error.message
|
|
886
|
+
: `Search failed: ${JSON.stringify(error)}`;
|
|
887
|
+
return {
|
|
888
|
+
content: [{ type: 'text', text: errorMessage }],
|
|
889
|
+
isError: true,
|
|
890
|
+
};
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
case 'firecrawl_extract': {
|
|
894
|
+
if (!isExtractOptions(args)) {
|
|
895
|
+
throw new Error('Invalid arguments for firecrawl_extract');
|
|
896
|
+
}
|
|
897
|
+
try {
|
|
898
|
+
const extractStartTime = Date.now();
|
|
899
|
+
server.sendLoggingMessage({
|
|
900
|
+
level: 'info',
|
|
901
|
+
data: `Starting extraction for URLs: ${args.urls.join(', ')}`,
|
|
902
|
+
});
|
|
903
|
+
// Log if using self-hosted instance
|
|
904
|
+
if (FIRECRAWL_API_URL) {
|
|
905
|
+
server.sendLoggingMessage({
|
|
906
|
+
level: 'info',
|
|
907
|
+
data: 'Using self-hosted instance for extraction',
|
|
908
|
+
});
|
|
909
|
+
}
|
|
910
|
+
const extractResponse = await withRetry(async () => client.extract(args.urls, {
|
|
911
|
+
prompt: args.prompt,
|
|
912
|
+
systemPrompt: args.systemPrompt,
|
|
913
|
+
schema: args.schema,
|
|
914
|
+
allowExternalLinks: args.allowExternalLinks,
|
|
915
|
+
enableWebSearch: args.enableWebSearch,
|
|
916
|
+
includeSubdomains: args.includeSubdomains,
|
|
917
|
+
origin: 'mcp-server',
|
|
918
|
+
}), 'extract operation');
|
|
919
|
+
// Type guard for successful response
|
|
920
|
+
if (!('success' in extractResponse) || !extractResponse.success) {
|
|
921
|
+
throw new Error(extractResponse.error || 'Extraction failed');
|
|
922
|
+
}
|
|
923
|
+
const response = extractResponse;
|
|
924
|
+
// Monitor credits for cloud API
|
|
925
|
+
if (!FIRECRAWL_API_URL && hasCredits(response)) {
|
|
926
|
+
await updateCreditUsage(response.creditsUsed || 0);
|
|
927
|
+
}
|
|
928
|
+
// Log performance metrics
|
|
929
|
+
server.sendLoggingMessage({
|
|
930
|
+
level: 'info',
|
|
931
|
+
data: `Extraction completed in ${Date.now() - extractStartTime}ms`,
|
|
932
|
+
});
|
|
933
|
+
// Add warning to response if present
|
|
934
|
+
const result = {
|
|
935
|
+
content: [
|
|
936
|
+
{
|
|
937
|
+
type: 'text',
|
|
938
|
+
text: JSON.stringify(response.data, null, 2),
|
|
939
|
+
},
|
|
940
|
+
],
|
|
941
|
+
isError: false,
|
|
942
|
+
};
|
|
943
|
+
if (response.warning) {
|
|
944
|
+
server.sendLoggingMessage({
|
|
945
|
+
level: 'warning',
|
|
946
|
+
data: response.warning,
|
|
947
|
+
});
|
|
948
|
+
}
|
|
949
|
+
return result;
|
|
950
|
+
}
|
|
951
|
+
catch (error) {
|
|
952
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
953
|
+
// Special handling for self-hosted instance errors
|
|
954
|
+
if (FIRECRAWL_API_URL &&
|
|
955
|
+
errorMessage.toLowerCase().includes('not supported')) {
|
|
956
|
+
server.sendLoggingMessage({
|
|
957
|
+
level: 'error',
|
|
958
|
+
data: 'Extraction is not supported by this self-hosted instance',
|
|
959
|
+
});
|
|
960
|
+
return {
|
|
961
|
+
content: [
|
|
962
|
+
{
|
|
963
|
+
type: 'text',
|
|
964
|
+
text: 'Extraction is not supported by this self-hosted instance. Please ensure LLM support is configured.',
|
|
965
|
+
},
|
|
966
|
+
],
|
|
967
|
+
isError: true,
|
|
968
|
+
};
|
|
969
|
+
}
|
|
970
|
+
return {
|
|
971
|
+
content: [{ type: 'text', text: errorMessage }],
|
|
972
|
+
isError: true,
|
|
973
|
+
};
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
default:
|
|
977
|
+
return {
|
|
978
|
+
content: [{ type: 'text', text: `Unknown tool: ${name}` }],
|
|
979
|
+
isError: true,
|
|
980
|
+
};
|
|
981
|
+
}
|
|
982
|
+
}
|
|
983
|
+
catch (error) {
|
|
984
|
+
// Log detailed error information
|
|
985
|
+
server.sendLoggingMessage({
|
|
986
|
+
level: 'error',
|
|
987
|
+
data: {
|
|
988
|
+
message: `Request failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
989
|
+
tool: request.params.name,
|
|
990
|
+
arguments: request.params.arguments,
|
|
991
|
+
timestamp: new Date().toISOString(),
|
|
992
|
+
duration: Date.now() - startTime,
|
|
993
|
+
},
|
|
994
|
+
});
|
|
995
|
+
return {
|
|
996
|
+
content: [
|
|
997
|
+
{
|
|
998
|
+
type: 'text',
|
|
999
|
+
text: `Error: ${error instanceof Error ? error.message : String(error)}`,
|
|
1000
|
+
},
|
|
1001
|
+
],
|
|
1002
|
+
isError: true,
|
|
1003
|
+
};
|
|
1004
|
+
}
|
|
1005
|
+
finally {
|
|
1006
|
+
// Log request completion with performance metrics
|
|
1007
|
+
server.sendLoggingMessage({
|
|
1008
|
+
level: 'info',
|
|
1009
|
+
data: `Request completed in ${Date.now() - startTime}ms`,
|
|
1010
|
+
});
|
|
1011
|
+
}
|
|
1012
|
+
});
|
|
1013
|
+
// Helper function to format results
|
|
1014
|
+
function formatResults(data) {
|
|
1015
|
+
return data
|
|
1016
|
+
.map((doc) => {
|
|
1017
|
+
const content = doc.markdown || doc.html || doc.rawHtml || 'No content';
|
|
1018
|
+
return `URL: ${doc.url || 'Unknown URL'}
|
|
1019
|
+
Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''}
|
|
1020
|
+
${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
|
|
1021
|
+
})
|
|
1022
|
+
.join('\n\n');
|
|
1023
|
+
}
|
|
1024
|
+
// Server startup
|
|
1025
|
+
async function runServer() {
|
|
1026
|
+
try {
|
|
1027
|
+
console.error('Initializing FireCrawl MCP Server...');
|
|
1028
|
+
const transport = new StdioServerTransport();
|
|
1029
|
+
await server.connect(transport);
|
|
1030
|
+
// Now that we're connected, we can send logging messages
|
|
1031
|
+
server.sendLoggingMessage({
|
|
1032
|
+
level: 'info',
|
|
1033
|
+
data: 'FireCrawl MCP Server initialized successfully',
|
|
1034
|
+
});
|
|
1035
|
+
server.sendLoggingMessage({
|
|
1036
|
+
level: 'info',
|
|
1037
|
+
data: `Configuration: API URL: ${FIRECRAWL_API_URL || 'default'}`,
|
|
1038
|
+
});
|
|
1039
|
+
console.error('FireCrawl MCP Server running on stdio');
|
|
1040
|
+
}
|
|
1041
|
+
catch (error) {
|
|
1042
|
+
console.error('Fatal error running server:', error);
|
|
1043
|
+
process.exit(1);
|
|
1044
|
+
}
|
|
1045
|
+
}
|
|
1046
|
+
runServer().catch((error) => {
|
|
1047
|
+
console.error('Fatal error running server:', error);
|
|
1048
|
+
process.exit(1);
|
|
1049
|
+
});
|
|
1050
|
+
// Add type guard for credit usage
|
|
1051
|
+
function hasCredits(response) {
|
|
1052
|
+
return 'creditsUsed' in response && typeof response.creditsUsed === 'number';
|
|
1053
|
+
}
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import FirecrawlApp from '@mendable/firecrawl-js';
|
|
2
|
+
import { describe, expect, jest, test, beforeEach, afterEach, } from '@jest/globals';
|
|
3
|
+
import { mock } from 'jest-mock-extended';
|
|
4
|
+
// Mock FirecrawlApp
|
|
5
|
+
jest.mock('@mendable/firecrawl-js');
|
|
6
|
+
describe('FireCrawl Tool Tests', () => {
|
|
7
|
+
let mockClient;
|
|
8
|
+
let requestHandler;
|
|
9
|
+
beforeEach(() => {
|
|
10
|
+
jest.clearAllMocks();
|
|
11
|
+
mockClient = mock();
|
|
12
|
+
// Set up mock implementations
|
|
13
|
+
const mockInstance = new FirecrawlApp({ apiKey: 'test' });
|
|
14
|
+
Object.assign(mockInstance, mockClient);
|
|
15
|
+
// Create request handler
|
|
16
|
+
requestHandler = async (request) => {
|
|
17
|
+
const { name, arguments: args } = request.params;
|
|
18
|
+
if (!args) {
|
|
19
|
+
throw new Error('No arguments provided');
|
|
20
|
+
}
|
|
21
|
+
return handleRequest(name, args, mockClient);
|
|
22
|
+
};
|
|
23
|
+
});
|
|
24
|
+
afterEach(() => {
|
|
25
|
+
jest.clearAllMocks();
|
|
26
|
+
});
|
|
27
|
+
// Test scrape functionality
|
|
28
|
+
test('should handle scrape request', async () => {
|
|
29
|
+
const url = 'https://example.com';
|
|
30
|
+
const options = { formats: ['markdown'] };
|
|
31
|
+
const mockResponse = {
|
|
32
|
+
success: true,
|
|
33
|
+
markdown: '# Test Content',
|
|
34
|
+
html: undefined,
|
|
35
|
+
rawHtml: undefined,
|
|
36
|
+
url: 'https://example.com',
|
|
37
|
+
actions: undefined,
|
|
38
|
+
};
|
|
39
|
+
mockClient.scrapeUrl.mockResolvedValueOnce(mockResponse);
|
|
40
|
+
const response = await requestHandler({
|
|
41
|
+
method: 'call_tool',
|
|
42
|
+
params: {
|
|
43
|
+
name: 'firecrawl_scrape',
|
|
44
|
+
arguments: { url, ...options },
|
|
45
|
+
},
|
|
46
|
+
});
|
|
47
|
+
expect(response).toEqual({
|
|
48
|
+
content: [{ type: 'text', text: '# Test Content' }],
|
|
49
|
+
isError: false,
|
|
50
|
+
});
|
|
51
|
+
expect(mockClient.scrapeUrl).toHaveBeenCalledWith(url, {
|
|
52
|
+
formats: ['markdown'],
|
|
53
|
+
url,
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
// Test batch scrape functionality
|
|
57
|
+
test('should handle batch scrape request', async () => {
|
|
58
|
+
const urls = ['https://example.com'];
|
|
59
|
+
const options = { formats: ['markdown'] };
|
|
60
|
+
mockClient.asyncBatchScrapeUrls.mockResolvedValueOnce({
|
|
61
|
+
success: true,
|
|
62
|
+
id: 'test-batch-id',
|
|
63
|
+
});
|
|
64
|
+
const response = await requestHandler({
|
|
65
|
+
method: 'call_tool',
|
|
66
|
+
params: {
|
|
67
|
+
name: 'firecrawl_batch_scrape',
|
|
68
|
+
arguments: { urls, options },
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
expect(response.content[0].text).toContain('Batch operation queued with ID: batch_');
|
|
72
|
+
expect(mockClient.asyncBatchScrapeUrls).toHaveBeenCalledWith(urls, options);
|
|
73
|
+
});
|
|
74
|
+
// Test search functionality
|
|
75
|
+
test('should handle search request', async () => {
|
|
76
|
+
const query = 'test query';
|
|
77
|
+
const scrapeOptions = { formats: ['markdown'] };
|
|
78
|
+
const mockSearchResponse = {
|
|
79
|
+
success: true,
|
|
80
|
+
data: [
|
|
81
|
+
{
|
|
82
|
+
url: 'https://example.com',
|
|
83
|
+
title: 'Test Page',
|
|
84
|
+
description: 'Test Description',
|
|
85
|
+
markdown: '# Test Content',
|
|
86
|
+
actions: undefined,
|
|
87
|
+
},
|
|
88
|
+
],
|
|
89
|
+
};
|
|
90
|
+
mockClient.search.mockResolvedValueOnce(mockSearchResponse);
|
|
91
|
+
const response = await requestHandler({
|
|
92
|
+
method: 'call_tool',
|
|
93
|
+
params: {
|
|
94
|
+
name: 'firecrawl_search',
|
|
95
|
+
arguments: { query, scrapeOptions },
|
|
96
|
+
},
|
|
97
|
+
});
|
|
98
|
+
expect(response.isError).toBe(false);
|
|
99
|
+
expect(response.content[0].text).toContain('Test Page');
|
|
100
|
+
expect(mockClient.search).toHaveBeenCalledWith(query, scrapeOptions);
|
|
101
|
+
});
|
|
102
|
+
// Test crawl functionality
|
|
103
|
+
test('should handle crawl request', async () => {
|
|
104
|
+
const url = 'https://example.com';
|
|
105
|
+
const options = { maxDepth: 2 };
|
|
106
|
+
mockClient.asyncCrawlUrl.mockResolvedValueOnce({
|
|
107
|
+
success: true,
|
|
108
|
+
id: 'test-crawl-id',
|
|
109
|
+
});
|
|
110
|
+
const response = await requestHandler({
|
|
111
|
+
method: 'call_tool',
|
|
112
|
+
params: {
|
|
113
|
+
name: 'firecrawl_crawl',
|
|
114
|
+
arguments: { url, ...options },
|
|
115
|
+
},
|
|
116
|
+
});
|
|
117
|
+
expect(response.isError).toBe(false);
|
|
118
|
+
expect(response.content[0].text).toContain('test-crawl-id');
|
|
119
|
+
expect(mockClient.asyncCrawlUrl).toHaveBeenCalledWith(url, {
|
|
120
|
+
maxDepth: 2,
|
|
121
|
+
url,
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
// Test error handling
|
|
125
|
+
test('should handle API errors', async () => {
|
|
126
|
+
const url = 'https://example.com';
|
|
127
|
+
mockClient.scrapeUrl.mockRejectedValueOnce(new Error('API Error'));
|
|
128
|
+
const response = await requestHandler({
|
|
129
|
+
method: 'call_tool',
|
|
130
|
+
params: {
|
|
131
|
+
name: 'firecrawl_scrape',
|
|
132
|
+
arguments: { url },
|
|
133
|
+
},
|
|
134
|
+
});
|
|
135
|
+
expect(response.isError).toBe(true);
|
|
136
|
+
expect(response.content[0].text).toContain('API Error');
|
|
137
|
+
});
|
|
138
|
+
// Test rate limiting
|
|
139
|
+
test('should handle rate limits', async () => {
|
|
140
|
+
const url = 'https://example.com';
|
|
141
|
+
// Mock rate limit error
|
|
142
|
+
mockClient.scrapeUrl.mockRejectedValueOnce(new Error('rate limit exceeded'));
|
|
143
|
+
const response = await requestHandler({
|
|
144
|
+
method: 'call_tool',
|
|
145
|
+
params: {
|
|
146
|
+
name: 'firecrawl_scrape',
|
|
147
|
+
arguments: { url },
|
|
148
|
+
},
|
|
149
|
+
});
|
|
150
|
+
expect(response.isError).toBe(true);
|
|
151
|
+
expect(response.content[0].text).toContain('rate limit exceeded');
|
|
152
|
+
});
|
|
153
|
+
});
|
|
154
|
+
// Helper function to simulate request handling
|
|
155
|
+
async function handleRequest(name, args, client) {
|
|
156
|
+
try {
|
|
157
|
+
switch (name) {
|
|
158
|
+
case 'firecrawl_scrape': {
|
|
159
|
+
const response = await client.scrapeUrl(args.url, args);
|
|
160
|
+
if (!response.success) {
|
|
161
|
+
throw new Error(response.error || 'Scraping failed');
|
|
162
|
+
}
|
|
163
|
+
return {
|
|
164
|
+
content: [
|
|
165
|
+
{ type: 'text', text: response.markdown || 'No content available' },
|
|
166
|
+
],
|
|
167
|
+
isError: false,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
case 'firecrawl_batch_scrape': {
|
|
171
|
+
const response = await client.asyncBatchScrapeUrls(args.urls, args.options);
|
|
172
|
+
return {
|
|
173
|
+
content: [
|
|
174
|
+
{
|
|
175
|
+
type: 'text',
|
|
176
|
+
text: `Batch operation queued with ID: batch_1. Use firecrawl_check_batch_status to check progress.`,
|
|
177
|
+
},
|
|
178
|
+
],
|
|
179
|
+
isError: false,
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
case 'firecrawl_search': {
|
|
183
|
+
const response = await client.search(args.query, args.scrapeOptions);
|
|
184
|
+
if (!response.success) {
|
|
185
|
+
throw new Error(response.error || 'Search failed');
|
|
186
|
+
}
|
|
187
|
+
const results = response.data
|
|
188
|
+
.map((result) => `URL: ${result.url}\nTitle: ${result.title || 'No title'}\nDescription: ${result.description || 'No description'}\n${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
189
|
+
.join('\n\n');
|
|
190
|
+
return {
|
|
191
|
+
content: [{ type: 'text', text: results }],
|
|
192
|
+
isError: false,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
case 'firecrawl_crawl': {
|
|
196
|
+
const response = await client.asyncCrawlUrl(args.url, args);
|
|
197
|
+
if (!response.success) {
|
|
198
|
+
throw new Error(response.error);
|
|
199
|
+
}
|
|
200
|
+
return {
|
|
201
|
+
content: [
|
|
202
|
+
{
|
|
203
|
+
type: 'text',
|
|
204
|
+
text: `Started crawl for ${args.url} with job ID: ${response.id}`,
|
|
205
|
+
},
|
|
206
|
+
],
|
|
207
|
+
isError: false,
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
default:
|
|
211
|
+
throw new Error(`Unknown tool: ${name}`);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
catch (error) {
|
|
215
|
+
return {
|
|
216
|
+
content: [
|
|
217
|
+
{
|
|
218
|
+
type: 'text',
|
|
219
|
+
text: error instanceof Error ? error.message : String(error),
|
|
220
|
+
},
|
|
221
|
+
],
|
|
222
|
+
isError: true,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.10.0",
|
|
4
4
|
"description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|