firecrawl-mcp 3.3.1 → 3.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -19
- package/dist/index.js +21 -47
- package/package.json +1 -2
package/README.md
CHANGED
|
@@ -21,25 +21,6 @@ A Model Context Protocol (MCP) server implementation that integrates with [Firec
|
|
|
21
21
|
- Automatic retries and rate limiting
|
|
22
22
|
- Cloud and self-hosted support
|
|
23
23
|
- SSE support
|
|
24
|
-
- **Context limit support for MCP compatibility**
|
|
25
|
-
|
|
26
|
-
## Context Limiting for MCP
|
|
27
|
-
|
|
28
|
-
All tools now support the `maxResponseSize` parameter to limit response size for better MCP compatibility. This is especially useful for large responses that may exceed MCP context limits.
|
|
29
|
-
|
|
30
|
-
**Example Usage:**
|
|
31
|
-
```json
|
|
32
|
-
{
|
|
33
|
-
"name": "firecrawl_scrape",
|
|
34
|
-
"arguments": {
|
|
35
|
-
"url": "https://example.com",
|
|
36
|
-
"formats": ["markdown"],
|
|
37
|
-
"maxResponseSize": 50000
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
When the response exceeds the specified limit, content will be truncated with a clear message indicating truncation occurred. This parameter is optional and preserves full backward compatibility.
|
|
43
24
|
|
|
44
25
|
> Play around with [our MCP Server on MCP.so's playground](https://mcp.so/playground?server=firecrawl-mcp-server) or on [Klavis AI](https://www.klavis.ai/mcp-servers).
|
|
45
26
|
|
package/dist/index.js
CHANGED
|
@@ -124,13 +124,8 @@ function getClient(session) {
|
|
|
124
124
|
}
|
|
125
125
|
return createClient(session?.firecrawlApiKey);
|
|
126
126
|
}
|
|
127
|
-
function asText(data
|
|
128
|
-
|
|
129
|
-
if (maxResponseSize && maxResponseSize > 0 && text.length > maxResponseSize) {
|
|
130
|
-
const truncatedText = text.substring(0, maxResponseSize - 100); // Reserve space for truncation message
|
|
131
|
-
return truncatedText + '\n\n[Content truncated due to size limit. Increase maxResponseSize parameter to see full content.]';
|
|
132
|
-
}
|
|
133
|
-
return text;
|
|
127
|
+
function asText(data) {
|
|
128
|
+
return JSON.stringify(data, null, 2);
|
|
134
129
|
}
|
|
135
130
|
// scrape tool (v2 semantics, minimal args)
|
|
136
131
|
// Centralized scrape params (used by scrape, and referenced in search/crawl scrapeOptions)
|
|
@@ -197,12 +192,11 @@ const scrapeParamsSchema = z.object({
|
|
|
197
192
|
.optional(),
|
|
198
193
|
storeInCache: z.boolean().optional(),
|
|
199
194
|
maxAge: z.number().optional(),
|
|
200
|
-
maxResponseSize: z.number().optional(),
|
|
201
195
|
});
|
|
202
196
|
server.addTool({
|
|
203
197
|
name: 'firecrawl_scrape',
|
|
204
198
|
description: `
|
|
205
|
-
Scrape content from a single URL with advanced options.
|
|
199
|
+
Scrape content from a single URL with advanced options.
|
|
206
200
|
This is the most powerful, fastest and most reliable scraper tool, if available you should always default to using this tool for any web scraping needs.
|
|
207
201
|
|
|
208
202
|
**Best for:** Single page content extraction, when you know exactly which page contains the information.
|
|
@@ -216,24 +210,22 @@ This is the most powerful, fastest and most reliable scraper tool, if available
|
|
|
216
210
|
"arguments": {
|
|
217
211
|
"url": "https://example.com",
|
|
218
212
|
"formats": ["markdown"],
|
|
219
|
-
"maxAge": 172800000
|
|
220
|
-
"maxResponseSize": 50000
|
|
213
|
+
"maxAge": 172800000
|
|
221
214
|
}
|
|
222
215
|
}
|
|
223
216
|
\`\`\`
|
|
224
217
|
**Performance:** Add maxAge parameter for 500% faster scrapes using cached data.
|
|
225
|
-
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility (e.g., 50000 characters).
|
|
226
218
|
**Returns:** Markdown, HTML, or other formats as specified.
|
|
227
219
|
${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.' : ''}
|
|
228
220
|
`,
|
|
229
221
|
parameters: scrapeParamsSchema,
|
|
230
222
|
execute: async (args, { session, log }) => {
|
|
231
|
-
const { url,
|
|
223
|
+
const { url, ...options } = args;
|
|
232
224
|
const client = getClient(session);
|
|
233
225
|
const cleaned = removeEmptyTopLevel(options);
|
|
234
226
|
log.info('Scraping URL', { url: String(url) });
|
|
235
227
|
const res = await client.scrape(String(url), { ...cleaned, origin: ORIGIN });
|
|
236
|
-
return asText(res
|
|
228
|
+
return asText(res);
|
|
237
229
|
},
|
|
238
230
|
});
|
|
239
231
|
server.addTool({
|
|
@@ -244,15 +236,13 @@ Map a website to discover all indexed URLs on the site.
|
|
|
244
236
|
**Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website.
|
|
245
237
|
**Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping).
|
|
246
238
|
**Common mistakes:** Using crawl to discover URLs instead of map.
|
|
247
|
-
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
|
|
248
239
|
**Prompt Example:** "List all URLs on example.com."
|
|
249
240
|
**Usage Example:**
|
|
250
241
|
\`\`\`json
|
|
251
242
|
{
|
|
252
243
|
"name": "firecrawl_map",
|
|
253
244
|
"arguments": {
|
|
254
|
-
"url": "https://example.com"
|
|
255
|
-
"maxResponseSize": 50000
|
|
245
|
+
"url": "https://example.com"
|
|
256
246
|
}
|
|
257
247
|
}
|
|
258
248
|
\`\`\`
|
|
@@ -265,15 +255,14 @@ Map a website to discover all indexed URLs on the site.
|
|
|
265
255
|
includeSubdomains: z.boolean().optional(),
|
|
266
256
|
limit: z.number().optional(),
|
|
267
257
|
ignoreQueryParameters: z.boolean().optional(),
|
|
268
|
-
maxResponseSize: z.number().optional(),
|
|
269
258
|
}),
|
|
270
259
|
execute: async (args, { session, log }) => {
|
|
271
|
-
const { url,
|
|
260
|
+
const { url, ...options } = args;
|
|
272
261
|
const client = getClient(session);
|
|
273
262
|
const cleaned = removeEmptyTopLevel(options);
|
|
274
263
|
log.info('Mapping URL', { url: String(url) });
|
|
275
264
|
const res = await client.map(String(url), { ...cleaned, origin: ORIGIN });
|
|
276
|
-
return asText(res
|
|
265
|
+
return asText(res);
|
|
277
266
|
},
|
|
278
267
|
});
|
|
279
268
|
server.addTool({
|
|
@@ -331,12 +320,10 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
331
320
|
"scrapeOptions": {
|
|
332
321
|
"formats": ["markdown"],
|
|
333
322
|
"onlyMainContent": true
|
|
334
|
-
}
|
|
335
|
-
"maxResponseSize": 50000
|
|
323
|
+
}
|
|
336
324
|
}
|
|
337
325
|
}
|
|
338
326
|
\`\`\`
|
|
339
|
-
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
|
|
340
327
|
**Returns:** Array of search results (with optional scraped content).
|
|
341
328
|
`,
|
|
342
329
|
parameters: z.object({
|
|
@@ -349,18 +336,17 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
349
336
|
.array(z.object({ type: z.enum(['web', 'images', 'news']) }))
|
|
350
337
|
.optional(),
|
|
351
338
|
scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
|
|
352
|
-
maxResponseSize: z.number().optional(),
|
|
353
339
|
}),
|
|
354
340
|
execute: async (args, { session, log }) => {
|
|
355
341
|
const client = getClient(session);
|
|
356
|
-
const { query,
|
|
342
|
+
const { query, ...opts } = args;
|
|
357
343
|
const cleaned = removeEmptyTopLevel(opts);
|
|
358
344
|
log.info('Searching', { query: String(query) });
|
|
359
345
|
const res = await client.search(query, {
|
|
360
346
|
...cleaned,
|
|
361
347
|
origin: ORIGIN,
|
|
362
348
|
});
|
|
363
|
-
return asText(res
|
|
349
|
+
return asText(res);
|
|
364
350
|
},
|
|
365
351
|
});
|
|
366
352
|
server.addTool({
|
|
@@ -383,12 +369,10 @@ server.addTool({
|
|
|
383
369
|
"limit": 20,
|
|
384
370
|
"allowExternalLinks": false,
|
|
385
371
|
"deduplicateSimilarURLs": true,
|
|
386
|
-
"sitemap": "include"
|
|
387
|
-
"maxResponseSize": 50000
|
|
372
|
+
"sitemap": "include"
|
|
388
373
|
}
|
|
389
374
|
}
|
|
390
375
|
\`\`\`
|
|
391
|
-
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
|
|
392
376
|
**Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
|
|
393
377
|
${SAFE_MODE ? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.' : ''}
|
|
394
378
|
`,
|
|
@@ -419,10 +403,9 @@ server.addTool({
|
|
|
419
403
|
deduplicateSimilarURLs: z.boolean().optional(),
|
|
420
404
|
ignoreQueryParameters: z.boolean().optional(),
|
|
421
405
|
scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
|
|
422
|
-
maxResponseSize: z.number().optional(),
|
|
423
406
|
}),
|
|
424
407
|
execute: async (args, { session, log }) => {
|
|
425
|
-
const { url,
|
|
408
|
+
const { url, ...options } = args;
|
|
426
409
|
const client = getClient(session);
|
|
427
410
|
const cleaned = removeEmptyTopLevel(options);
|
|
428
411
|
log.info('Starting crawl', { url: String(url) });
|
|
@@ -430,7 +413,7 @@ server.addTool({
|
|
|
430
413
|
...cleaned,
|
|
431
414
|
origin: ORIGIN,
|
|
432
415
|
});
|
|
433
|
-
return asText(res
|
|
416
|
+
return asText(res);
|
|
434
417
|
},
|
|
435
418
|
});
|
|
436
419
|
server.addTool({
|
|
@@ -443,23 +426,17 @@ Check the status of a crawl job.
|
|
|
443
426
|
{
|
|
444
427
|
"name": "firecrawl_check_crawl_status",
|
|
445
428
|
"arguments": {
|
|
446
|
-
"id": "550e8400-e29b-41d4-a716-446655440000"
|
|
447
|
-
"maxResponseSize": 50000
|
|
429
|
+
"id": "550e8400-e29b-41d4-a716-446655440000"
|
|
448
430
|
}
|
|
449
431
|
}
|
|
450
432
|
\`\`\`
|
|
451
|
-
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
|
|
452
433
|
**Returns:** Status and progress of the crawl job, including results if available.
|
|
453
434
|
`,
|
|
454
|
-
parameters: z.object({
|
|
455
|
-
id: z.string(),
|
|
456
|
-
maxResponseSize: z.number().optional(),
|
|
457
|
-
}),
|
|
435
|
+
parameters: z.object({ id: z.string() }),
|
|
458
436
|
execute: async (args, { session }) => {
|
|
459
|
-
const { id, maxResponseSize } = args;
|
|
460
437
|
const client = getClient(session);
|
|
461
|
-
const res = await client.getCrawlStatus(id);
|
|
462
|
-
return asText(res
|
|
438
|
+
const res = await client.getCrawlStatus(args.id);
|
|
439
|
+
return asText(res);
|
|
463
440
|
},
|
|
464
441
|
});
|
|
465
442
|
server.addTool({
|
|
@@ -495,12 +472,10 @@ Extract structured information from web pages using LLM capabilities. Supports b
|
|
|
495
472
|
},
|
|
496
473
|
"allowExternalLinks": false,
|
|
497
474
|
"enableWebSearch": false,
|
|
498
|
-
"includeSubdomains": false
|
|
499
|
-
"maxResponseSize": 50000
|
|
475
|
+
"includeSubdomains": false
|
|
500
476
|
}
|
|
501
477
|
}
|
|
502
478
|
\`\`\`
|
|
503
|
-
**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
|
|
504
479
|
**Returns:** Extracted structured data as defined by your schema.
|
|
505
480
|
`,
|
|
506
481
|
parameters: z.object({
|
|
@@ -510,7 +485,6 @@ Extract structured information from web pages using LLM capabilities. Supports b
|
|
|
510
485
|
allowExternalLinks: z.boolean().optional(),
|
|
511
486
|
enableWebSearch: z.boolean().optional(),
|
|
512
487
|
includeSubdomains: z.boolean().optional(),
|
|
513
|
-
maxResponseSize: z.number().optional(),
|
|
514
488
|
}),
|
|
515
489
|
execute: async (args, { session, log }) => {
|
|
516
490
|
const client = getClient(session);
|
|
@@ -528,7 +502,7 @@ Extract structured information from web pages using LLM capabilities. Supports b
|
|
|
528
502
|
origin: ORIGIN,
|
|
529
503
|
});
|
|
530
504
|
const res = await client.extract(extractBody);
|
|
531
|
-
return asText(res
|
|
505
|
+
return asText(res);
|
|
532
506
|
},
|
|
533
507
|
});
|
|
534
508
|
const PORT = Number(process.env.PORT || 3000);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "3.3.
|
|
3
|
+
"version": "3.3.3",
|
|
4
4
|
"description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, search, batch processing, structured data extraction, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -30,7 +30,6 @@
|
|
|
30
30
|
"@mendable/firecrawl-js": "^4.3.6",
|
|
31
31
|
"dotenv": "^17.2.2",
|
|
32
32
|
"firecrawl-fastmcp": "^1.0.2",
|
|
33
|
-
"node-fetch": "^2.7.0",
|
|
34
33
|
"typescript": "^5.9.2",
|
|
35
34
|
"zod": "^4.1.5"
|
|
36
35
|
},
|