firecrawl-mcp 3.3.4 → 3.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +19 -0
  2. package/dist/index.js +47 -21
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -21,6 +21,25 @@ A Model Context Protocol (MCP) server implementation that integrates with [Firec
21
21
  - Automatic retries and rate limiting
22
22
  - Cloud and self-hosted support
23
23
  - SSE support
24
+ - **Context limit support for MCP compatibility**
25
+
26
+ ## Context Limiting for MCP
27
+
28
+ All tools now support the `maxResponseSize` parameter to limit response size for better MCP compatibility. This is especially useful for large responses that may exceed MCP context limits.
29
+
30
+ **Example Usage:**
31
+ ```json
32
+ {
33
+ "name": "firecrawl_scrape",
34
+ "arguments": {
35
+ "url": "https://example.com",
36
+ "formats": ["markdown"],
37
+ "maxResponseSize": 50000
38
+ }
39
+ }
40
+ ```
41
+
42
+ When the response exceeds the specified limit, content will be truncated with a clear message indicating truncation occurred. This parameter is optional and preserves full backward compatibility.
24
43
 
25
44
  > Play around with [our MCP Server on MCP.so's playground](https://mcp.so/playground?server=firecrawl-mcp-server) or on [Klavis AI](https://www.klavis.ai/mcp-servers).
26
45
 
package/dist/index.js CHANGED
@@ -124,8 +124,13 @@ function getClient(session) {
124
124
  }
125
125
  return createClient(session?.firecrawlApiKey);
126
126
  }
127
- function asText(data) {
128
- return JSON.stringify(data, null, 2);
127
+ function asText(data, maxResponseSize) {
128
+ const text = JSON.stringify(data, null, 2);
129
+ if (maxResponseSize && maxResponseSize > 0 && text.length > maxResponseSize) {
130
+ const truncatedText = text.substring(0, maxResponseSize - 100); // Reserve space for truncation message
131
+ return truncatedText + '\n\n[Content truncated due to size limit. Increase maxResponseSize parameter to see full content.]';
132
+ }
133
+ return text;
129
134
  }
130
135
  // scrape tool (v2 semantics, minimal args)
131
136
  // Centralized scrape params (used by scrape, and referenced in search/crawl scrapeOptions)
@@ -192,11 +197,12 @@ const scrapeParamsSchema = z.object({
192
197
  .optional(),
193
198
  storeInCache: z.boolean().optional(),
194
199
  maxAge: z.number().optional(),
200
+ maxResponseSize: z.number().optional(),
195
201
  });
196
202
  server.addTool({
197
203
  name: 'firecrawl_scrape',
198
204
  description: `
199
- Scrape content from a single URL with advanced options.
205
+ Scrape content from a single URL with advanced options.
200
206
  This is the most powerful, fastest and most reliable scraper tool, if available you should always default to using this tool for any web scraping needs.
201
207
 
202
208
  **Best for:** Single page content extraction, when you know exactly which page contains the information.
@@ -210,22 +216,24 @@ This is the most powerful, fastest and most reliable scraper tool, if available
210
216
  "arguments": {
211
217
  "url": "https://example.com",
212
218
  "formats": ["markdown"],
213
- "maxAge": 172800000
219
+ "maxAge": 172800000,
220
+ "maxResponseSize": 50000
214
221
  }
215
222
  }
216
223
  \`\`\`
217
224
  **Performance:** Add maxAge parameter for 500% faster scrapes using cached data.
225
+ **Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility (e.g., 50000 characters).
218
226
  **Returns:** Markdown, HTML, or other formats as specified.
219
227
  ${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.' : ''}
220
228
  `,
221
229
  parameters: scrapeParamsSchema,
222
230
  execute: async (args, { session, log }) => {
223
- const { url, ...options } = args;
231
+ const { url, maxResponseSize, ...options } = args;
224
232
  const client = getClient(session);
225
233
  const cleaned = removeEmptyTopLevel(options);
226
234
  log.info('Scraping URL', { url: String(url) });
227
235
  const res = await client.scrape(String(url), { ...cleaned, origin: ORIGIN });
228
- return asText(res);
236
+ return asText(res, maxResponseSize);
229
237
  },
230
238
  });
231
239
  server.addTool({
@@ -236,13 +244,15 @@ Map a website to discover all indexed URLs on the site.
236
244
  **Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website.
237
245
  **Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping).
238
246
  **Common mistakes:** Using crawl to discover URLs instead of map.
247
+ **Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
239
248
  **Prompt Example:** "List all URLs on example.com."
240
249
  **Usage Example:**
241
250
  \`\`\`json
242
251
  {
243
252
  "name": "firecrawl_map",
244
253
  "arguments": {
245
- "url": "https://example.com"
254
+ "url": "https://example.com",
255
+ "maxResponseSize": 50000
246
256
  }
247
257
  }
248
258
  \`\`\`
@@ -255,14 +265,15 @@ Map a website to discover all indexed URLs on the site.
255
265
  includeSubdomains: z.boolean().optional(),
256
266
  limit: z.number().optional(),
257
267
  ignoreQueryParameters: z.boolean().optional(),
268
+ maxResponseSize: z.number().optional(),
258
269
  }),
259
270
  execute: async (args, { session, log }) => {
260
- const { url, ...options } = args;
271
+ const { url, maxResponseSize, ...options } = args;
261
272
  const client = getClient(session);
262
273
  const cleaned = removeEmptyTopLevel(options);
263
274
  log.info('Mapping URL', { url: String(url) });
264
275
  const res = await client.map(String(url), { ...cleaned, origin: ORIGIN });
265
- return asText(res);
276
+ return asText(res, maxResponseSize);
266
277
  },
267
278
  });
268
279
  server.addTool({
@@ -320,10 +331,12 @@ The query also supports search operators, that you can use if needed to refine t
320
331
  "scrapeOptions": {
321
332
  "formats": ["markdown"],
322
333
  "onlyMainContent": true
323
- }
334
+ },
335
+ "maxResponseSize": 50000
324
336
  }
325
337
  }
326
338
  \`\`\`
339
+ **Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
327
340
  **Returns:** Array of search results (with optional scraped content).
328
341
  `,
329
342
  parameters: z.object({
@@ -336,17 +349,18 @@ The query also supports search operators, that you can use if needed to refine t
336
349
  .array(z.object({ type: z.enum(['web', 'images', 'news']) }))
337
350
  .optional(),
338
351
  scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
352
+ maxResponseSize: z.number().optional(),
339
353
  }),
340
354
  execute: async (args, { session, log }) => {
341
355
  const client = getClient(session);
342
- const { query, ...opts } = args;
356
+ const { query, maxResponseSize, ...opts } = args;
343
357
  const cleaned = removeEmptyTopLevel(opts);
344
358
  log.info('Searching', { query: String(query) });
345
359
  const res = await client.search(query, {
346
360
  ...cleaned,
347
361
  origin: ORIGIN,
348
362
  });
349
- return asText(res);
363
+ return asText(res, maxResponseSize);
350
364
  },
351
365
  });
352
366
  server.addTool({
@@ -369,10 +383,12 @@ server.addTool({
369
383
  "limit": 20,
370
384
  "allowExternalLinks": false,
371
385
  "deduplicateSimilarURLs": true,
372
- "sitemap": "include"
386
+ "sitemap": "include",
387
+ "maxResponseSize": 50000
373
388
  }
374
389
  }
375
390
  \`\`\`
391
+ **Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
376
392
  **Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
377
393
  ${SAFE_MODE ? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.' : ''}
378
394
  `,
@@ -403,9 +419,10 @@ server.addTool({
403
419
  deduplicateSimilarURLs: z.boolean().optional(),
404
420
  ignoreQueryParameters: z.boolean().optional(),
405
421
  scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
422
+ maxResponseSize: z.number().optional(),
406
423
  }),
407
424
  execute: async (args, { session, log }) => {
408
- const { url, ...options } = args;
425
+ const { url, maxResponseSize, ...options } = args;
409
426
  const client = getClient(session);
410
427
  const cleaned = removeEmptyTopLevel(options);
411
428
  log.info('Starting crawl', { url: String(url) });
@@ -413,7 +430,7 @@ server.addTool({
413
430
  ...cleaned,
414
431
  origin: ORIGIN,
415
432
  });
416
- return asText(res);
433
+ return asText(res, maxResponseSize);
417
434
  },
418
435
  });
419
436
  server.addTool({
@@ -426,17 +443,23 @@ Check the status of a crawl job.
426
443
  {
427
444
  "name": "firecrawl_check_crawl_status",
428
445
  "arguments": {
429
- "id": "550e8400-e29b-41d4-a716-446655440000"
446
+ "id": "550e8400-e29b-41d4-a716-446655440000",
447
+ "maxResponseSize": 50000
430
448
  }
431
449
  }
432
450
  \`\`\`
451
+ **Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
433
452
  **Returns:** Status and progress of the crawl job, including results if available.
434
453
  `,
435
- parameters: z.object({ id: z.string() }),
454
+ parameters: z.object({
455
+ id: z.string(),
456
+ maxResponseSize: z.number().optional(),
457
+ }),
436
458
  execute: async (args, { session }) => {
459
+ const { id, maxResponseSize } = args;
437
460
  const client = getClient(session);
438
- const res = await client.getCrawlStatus(args.id);
439
- return asText(res);
461
+ const res = await client.getCrawlStatus(id);
462
+ return asText(res, maxResponseSize);
440
463
  },
441
464
  });
442
465
  server.addTool({
@@ -472,10 +495,12 @@ Extract structured information from web pages using LLM capabilities. Supports b
472
495
  },
473
496
  "allowExternalLinks": false,
474
497
  "enableWebSearch": false,
475
- "includeSubdomains": false
498
+ "includeSubdomains": false,
499
+ "maxResponseSize": 50000
476
500
  }
477
501
  }
478
502
  \`\`\`
503
+ **Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
479
504
  **Returns:** Extracted structured data as defined by your schema.
480
505
  `,
481
506
  parameters: z.object({
@@ -485,6 +510,7 @@ Extract structured information from web pages using LLM capabilities. Supports b
485
510
  allowExternalLinks: z.boolean().optional(),
486
511
  enableWebSearch: z.boolean().optional(),
487
512
  includeSubdomains: z.boolean().optional(),
513
+ maxResponseSize: z.number().optional(),
488
514
  }),
489
515
  execute: async (args, { session, log }) => {
490
516
  const client = getClient(session);
@@ -502,7 +528,7 @@ Extract structured information from web pages using LLM capabilities. Supports b
502
528
  origin: ORIGIN,
503
529
  });
504
530
  const res = await client.extract(extractBody);
505
- return asText(res);
531
+ return asText(res, a.maxResponseSize);
506
532
  },
507
533
  });
508
534
  const PORT = Number(process.env.PORT || 3000);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl-mcp",
3
- "version": "3.3.4",
3
+ "version": "3.3.5",
4
4
  "description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, search, batch processing, structured data extraction, and LLM-powered content analysis.",
5
5
  "type": "module",
6
6
  "bin": {