firecrawl-mcp 1.12.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +1 -1
  2. package/dist/index.js +371 -316
  3. package/package.json +10 -9
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Firecrawl MCP Server
2
2
 
3
- A Model Context Protocol (MCP) server implementation that integrates with [Firecrawl](https://github.com/mendableai/firecrawl) for web scraping capabilities.
3
+ A Model Context Protocol (MCP) server implementation that integrates with [Firecrawl](https://github.com/firecrawl/firecrawl) for web scraping capabilities.
4
4
 
5
5
  > Big thanks to [@vrknetha](https://github.com/vrknetha), [@knacklabs](https://www.knacklabs.ai) for the initial implementation!
6
6
 
package/dist/index.js CHANGED
@@ -4,8 +4,10 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
4
4
  import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
5
5
  import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
6
6
  import FirecrawlApp from '@mendable/firecrawl-js';
7
+ import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
7
8
  import express from 'express';
8
9
  import dotenv from 'dotenv';
10
+ import { randomUUID } from 'node:crypto';
9
11
  dotenv.config();
10
12
  // Tool definitions
11
13
  const SCRAPE_TOOL = {
@@ -25,7 +27,7 @@ This is the most powerful, fastest and most reliable scraper tool, if available
25
27
  "arguments": {
26
28
  "url": "https://example.com",
27
29
  "formats": ["markdown"],
28
- "maxAge": 3600000
30
+ "maxAge": 172800000
29
31
  }
30
32
  }
31
33
  \`\`\`
@@ -42,15 +44,39 @@ This is the most powerful, fastest and most reliable scraper tool, if available
42
44
  formats: {
43
45
  type: 'array',
44
46
  items: {
45
- type: 'string',
46
- enum: [
47
- 'markdown',
48
- 'html',
49
- 'rawHtml',
50
- 'screenshot',
51
- 'links',
52
- 'screenshot@fullPage',
53
- 'extract',
47
+ oneOf: [
48
+ {
49
+ type: 'string',
50
+ enum: [
51
+ 'markdown',
52
+ 'html',
53
+ 'rawHtml',
54
+ 'screenshot',
55
+ 'links',
56
+ 'extract',
57
+ 'summary',
58
+ ],
59
+ },
60
+ {
61
+ type: 'object',
62
+ properties: {
63
+ type: {
64
+ type: 'string',
65
+ enum: ['json'],
66
+ },
67
+ prompt: {
68
+ type: 'string',
69
+ description: 'Prompt to guide JSON extraction',
70
+ },
71
+ schema: {
72
+ type: 'object',
73
+ description: 'JSON schema for structured extraction',
74
+ },
75
+ },
76
+ required: ['type'],
77
+ additionalProperties: true,
78
+ description: 'Advanced format option. Use { type: "json", prompt, schema } to request structured JSON extraction.',
79
+ },
54
80
  ],
55
81
  },
56
82
  default: ['markdown'],
@@ -58,6 +84,7 @@ This is the most powerful, fastest and most reliable scraper tool, if available
58
84
  },
59
85
  onlyMainContent: {
60
86
  type: 'boolean',
87
+ default: true,
61
88
  description: 'Extract only the main content, filtering out navigation, footers, etc.',
62
89
  },
63
90
  includeTags: {
@@ -74,10 +101,6 @@ This is the most powerful, fastest and most reliable scraper tool, if available
74
101
  type: 'number',
75
102
  description: 'Time in milliseconds to wait for dynamic content to load',
76
103
  },
77
- timeout: {
78
- type: 'number',
79
- description: 'Maximum time in milliseconds to wait for the page to load',
80
- },
81
104
  actions: {
82
105
  type: 'array',
83
106
  items: {
@@ -131,24 +154,6 @@ This is the most powerful, fastest and most reliable scraper tool, if available
131
154
  },
132
155
  description: 'List of actions to perform before scraping',
133
156
  },
134
- extract: {
135
- type: 'object',
136
- properties: {
137
- schema: {
138
- type: 'object',
139
- description: 'Schema for structured data extraction',
140
- },
141
- systemPrompt: {
142
- type: 'string',
143
- description: 'System prompt for LLM extraction',
144
- },
145
- prompt: {
146
- type: 'string',
147
- description: 'User prompt for LLM extraction',
148
- },
149
- },
150
- description: 'Configuration for structured data extraction',
151
- },
152
157
  mobile: {
153
158
  type: 'boolean',
154
159
  description: 'Use mobile viewport',
@@ -176,9 +181,15 @@ This is the most powerful, fastest and most reliable scraper tool, if available
176
181
  },
177
182
  description: 'Location settings for scraping',
178
183
  },
184
+ storeInCache: {
185
+ type: 'boolean',
186
+ default: true,
187
+ description: 'If true, the page will be stored in the Firecrawl index and cache. Setting this to false is useful if your scraping activity may have data protection concerns.',
188
+ },
179
189
  maxAge: {
180
190
  type: 'number',
181
- description: 'Maximum age in milliseconds for cached content. Use cached data if available and younger than maxAge, otherwise scrape fresh. Enables 500% faster scrapes for recently cached pages. Default: 0 (always scrape fresh)',
191
+ default: 172800000,
192
+ description: 'Maximum age in milliseconds for cached content. Use cached data if available and younger than maxAge, otherwise scrape fresh. Enables 500% faster scrapes for recently cached pages. Default: 172800000',
182
193
  },
183
194
  },
184
195
  required: ['url'],
@@ -215,13 +226,10 @@ Map a website to discover all indexed URLs on the site.
215
226
  type: 'string',
216
227
  description: 'Optional search term to filter URLs',
217
228
  },
218
- ignoreSitemap: {
219
- type: 'boolean',
220
- description: 'Skip sitemap.xml discovery and only use HTML links',
221
- },
222
- sitemapOnly: {
223
- type: 'boolean',
224
- description: 'Only use sitemap.xml for discovery, ignore HTML links',
229
+ sitemap: {
230
+ type: 'string',
231
+ enum: ['include', 'skip', 'only'],
232
+ description: 'Sitemap handling: "include" - use sitemap + find other pages (default), "skip" - ignore sitemap completely, "only" - only return sitemap URLs',
225
233
  },
226
234
  includeSubdomains: {
227
235
  type: 'boolean',
@@ -231,6 +239,11 @@ Map a website to discover all indexed URLs on the site.
231
239
  type: 'number',
232
240
  description: 'Maximum number of URLs to return',
233
241
  },
242
+ ignoreQueryParameters: {
243
+ type: 'boolean',
244
+ default: true,
245
+ description: 'Do not return URLs with query parameters',
246
+ },
234
247
  },
235
248
  required: ['url'],
236
249
  },
@@ -238,28 +251,29 @@ Map a website to discover all indexed URLs on the site.
238
251
  const CRAWL_TOOL = {
239
252
  name: 'firecrawl_crawl',
240
253
  description: `
241
- Starts an asynchronous crawl job on a website and extracts content from all pages.
242
-
243
- **Best for:** Extracting content from multiple related pages, when you need comprehensive coverage.
244
- **Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow).
245
- **Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.
246
- **Common mistakes:** Setting limit or maxDepth too high (causes token overflow); using crawl for a single page (use scrape instead).
247
- **Prompt Example:** "Get all blog posts from the first two levels of example.com/blog."
248
- **Usage Example:**
249
- \`\`\`json
250
- {
251
- "name": "firecrawl_crawl",
252
- "arguments": {
253
- "url": "https://example.com/blog/*",
254
- "maxDepth": 2,
255
- "limit": 100,
256
- "allowExternalLinks": false,
257
- "deduplicateSimilarURLs": true
258
- }
259
- }
260
- \`\`\`
261
- **Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
262
- `,
254
+ Starts a crawl job on a website and extracts content from all pages.
255
+
256
+ **Best for:** Extracting content from multiple related pages, when you need comprehensive coverage.
257
+ **Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow).
258
+ **Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.
259
+ **Common mistakes:** Setting limit or maxDiscoveryDepth too high (causes token overflow); using crawl for a single page (use scrape instead).
260
+ **Prompt Example:** "Get all blog posts from the first two levels of example.com/blog."
261
+ **Usage Example:**
262
+ \`\`\`json
263
+ {
264
+ "name": "firecrawl_crawl",
265
+ "arguments": {
266
+ "url": "https://example.com/blog/*",
267
+ "maxDiscoveryDepth": 2,
268
+ "limit": 100,
269
+ "allowExternalLinks": false,
270
+ "deduplicateSimilarURLs": true,
271
+ "sitemap": "include"
272
+ }
273
+ }
274
+ \`\`\`
275
+ **Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
276
+ `,
263
277
  inputSchema: {
264
278
  type: 'object',
265
279
  properties: {
@@ -267,6 +281,10 @@ Starts an asynchronous crawl job on a website and extracts content from all page
267
281
  type: 'string',
268
282
  description: 'Starting URL for the crawl',
269
283
  },
284
+ prompt: {
285
+ type: 'string',
286
+ description: 'Natural language prompt to generate crawler options. Explicitly set parameters will override generated ones.',
287
+ },
270
288
  excludePaths: {
271
289
  type: 'array',
272
290
  items: { type: 'string' },
@@ -277,26 +295,43 @@ Starts an asynchronous crawl job on a website and extracts content from all page
277
295
  items: { type: 'string' },
278
296
  description: 'Only crawl these URL paths',
279
297
  },
280
- maxDepth: {
298
+ maxDiscoveryDepth: {
281
299
  type: 'number',
282
- description: 'Maximum link depth to crawl',
300
+ description: 'Maximum discovery depth to crawl. The root site and sitemapped pages have depth 0.',
283
301
  },
284
- ignoreSitemap: {
285
- type: 'boolean',
286
- description: 'Skip sitemap.xml discovery',
302
+ sitemap: {
303
+ type: 'string',
304
+ enum: ['skip', 'include', 'only'],
305
+ default: 'include',
306
+ description: "Sitemap mode when crawling. 'skip' ignores the sitemap entirely, 'include' uses sitemap plus other discovery methods (default), 'only' restricts crawling to sitemap URLs.",
287
307
  },
288
308
  limit: {
289
309
  type: 'number',
290
- description: 'Maximum number of pages to crawl',
291
- },
292
- allowBackwardLinks: {
293
- type: 'boolean',
294
- description: 'Allow crawling links that point to parent directories',
310
+ default: 10000,
311
+ description: 'Maximum number of pages to crawl (default: 10000)',
295
312
  },
296
313
  allowExternalLinks: {
297
314
  type: 'boolean',
298
315
  description: 'Allow crawling links to external domains',
299
316
  },
317
+ allowSubdomains: {
318
+ type: 'boolean',
319
+ default: false,
320
+ description: 'Allow crawling links to subdomains of the main domain',
321
+ },
322
+ crawlEntireDomain: {
323
+ type: 'boolean',
324
+ default: false,
325
+ description: 'When true, follow internal links to sibling or parent URLs, not just child paths',
326
+ },
327
+ delay: {
328
+ type: 'number',
329
+ description: 'Delay in seconds between scrapes to respect site rate limits',
330
+ },
331
+ maxConcurrency: {
332
+ type: 'number',
333
+ description: 'Maximum number of concurrent scrapes; if unset, team limit is used',
334
+ },
300
335
  webhook: {
301
336
  oneOf: [
302
337
  {
@@ -325,7 +360,8 @@ Starts an asynchronous crawl job on a website and extracts content from all page
325
360
  },
326
361
  ignoreQueryParameters: {
327
362
  type: 'boolean',
328
- description: 'Ignore query parameters when comparing URLs',
363
+ default: false,
364
+ description: 'Do not re-scrape the same path with different (or none) query parameters',
329
365
  },
330
366
  scrapeOptions: {
331
367
  type: 'object',
@@ -333,17 +369,43 @@ Starts an asynchronous crawl job on a website and extracts content from all page
333
369
  formats: {
334
370
  type: 'array',
335
371
  items: {
336
- type: 'string',
337
- enum: [
338
- 'markdown',
339
- 'html',
340
- 'rawHtml',
341
- 'screenshot',
342
- 'links',
343
- 'screenshot@fullPage',
344
- 'extract',
372
+ oneOf: [
373
+ {
374
+ type: 'string',
375
+ enum: [
376
+ 'markdown',
377
+ 'html',
378
+ 'rawHtml',
379
+ 'screenshot',
380
+ 'links',
381
+ 'extract',
382
+ 'summary',
383
+ ],
384
+ },
385
+ {
386
+ type: 'object',
387
+ properties: {
388
+ type: {
389
+ type: 'string',
390
+ enum: ['json'],
391
+ },
392
+ prompt: {
393
+ type: 'string',
394
+ description: 'Prompt to guide JSON extraction',
395
+ },
396
+ schema: {
397
+ type: 'object',
398
+ description: 'JSON schema for structured extraction',
399
+ },
400
+ },
401
+ required: ['type'],
402
+ additionalProperties: true,
403
+ description: 'Advanced format option. Use { type: "json", prompt, schema } to request structured JSON extraction.',
404
+ },
345
405
  ],
346
406
  },
407
+ default: ['markdown'],
408
+ description: "Content formats to extract (default: ['markdown'])",
347
409
  },
348
410
  onlyMainContent: {
349
411
  type: 'boolean',
@@ -396,12 +458,13 @@ Check the status of a crawl job.
396
458
  const SEARCH_TOOL = {
397
459
  name: 'firecrawl_search',
398
460
  description: `
399
- Search the web and optionally extract content from search results. This is the most powerful search tool available, and if available you should always default to using this tool for any web search needs.
461
+ Search the web and optionally extract content from search results. This is the most powerful web search tool available, and if available you should always default to using this tool for any web search needs.
400
462
 
401
463
  **Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query.
402
- **Not recommended for:** When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl).
464
+ **Not recommended for:** When you need to search the filesystem. When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl.
403
465
  **Common mistakes:** Using crawl or map for open-ended questions (use search instead).
404
466
  **Prompt Example:** "Find the latest research papers on AI published in 2023."
467
+ **Sources:** web, images, news, default to web unless needed images or news.
405
468
  **Usage Example:**
406
469
  \`\`\`json
407
470
  {
@@ -411,6 +474,11 @@ Search the web and optionally extract content from search results. This is the m
411
474
  "limit": 5,
412
475
  "lang": "en",
413
476
  "country": "us",
477
+ "sources": [
478
+ "web",
479
+ "images",
480
+ "news"
481
+ ],
414
482
  "scrapeOptions": {
415
483
  "formats": ["markdown"],
416
484
  "onlyMainContent": true
@@ -431,14 +499,6 @@ Search the web and optionally extract content from search results. This is the m
431
499
  type: 'number',
432
500
  description: 'Maximum number of results to return (default: 5)',
433
501
  },
434
- lang: {
435
- type: 'string',
436
- description: 'Language code for search results (default: en)',
437
- },
438
- country: {
439
- type: 'string',
440
- description: 'Country code for search results (default: us)',
441
- },
442
502
  tbs: {
443
503
  type: 'string',
444
504
  description: 'Time-based search filter',
@@ -448,19 +508,48 @@ Search the web and optionally extract content from search results. This is the m
448
508
  description: 'Search filter',
449
509
  },
450
510
  location: {
451
- type: 'object',
452
- properties: {
453
- country: {
454
- type: 'string',
455
- description: 'Country code for geolocation',
456
- },
457
- languages: {
458
- type: 'array',
459
- items: { type: 'string' },
460
- description: 'Language codes for content',
461
- },
511
+ type: 'string',
512
+ description: 'Location parameter for search results',
513
+ },
514
+ sources: {
515
+ type: 'array',
516
+ description: 'Sources to search. Determines which result arrays are included in the response.',
517
+ items: {
518
+ oneOf: [
519
+ {
520
+ type: 'object',
521
+ properties: {
522
+ type: { type: 'string', enum: ['web'] },
523
+ tbs: {
524
+ type: 'string',
525
+ description: 'Time-based search parameter (e.g., qdr:h, qdr:d, qdr:w, qdr:m, qdr:y or custom cdr with cd_min/cd_max)',
526
+ },
527
+ location: {
528
+ type: 'string',
529
+ description: 'Location parameter for search results',
530
+ },
531
+ },
532
+ required: ['type'],
533
+ additionalProperties: false,
534
+ },
535
+ {
536
+ type: 'object',
537
+ properties: {
538
+ type: { type: 'string', enum: ['images'] },
539
+ },
540
+ required: ['type'],
541
+ additionalProperties: false,
542
+ },
543
+ {
544
+ type: 'object',
545
+ properties: {
546
+ type: { type: 'string', enum: ['news'] },
547
+ },
548
+ required: ['type'],
549
+ additionalProperties: false,
550
+ },
551
+ ],
462
552
  },
463
- description: 'Location settings for search',
464
553
  },
465
554
  scrapeOptions: {
466
555
  type: 'object',
@@ -468,8 +557,22 @@ Search the web and optionally extract content from search results. This is the m
468
557
  formats: {
469
558
  type: 'array',
470
559
  items: {
471
- type: 'string',
472
- enum: ['markdown', 'html', 'rawHtml'],
560
+ oneOf: [
561
+ {
562
+ type: 'string',
563
+ enum: ['markdown', 'html', 'rawHtml'],
564
+ },
565
+ {
566
+ type: 'object',
567
+ properties: {
568
+ type: { type: 'string', enum: ['json'] },
569
+ prompt: { type: 'string' },
570
+ schema: { type: 'object' },
571
+ },
572
+ required: ['type'],
573
+ additionalProperties: true,
574
+ },
575
+ ],
473
576
  },
474
577
  description: 'Content formats to extract from search results',
475
578
  },
@@ -493,12 +596,11 @@ const EXTRACT_TOOL = {
493
596
  description: `
494
597
  Extract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction.
495
598
 
496
- **Best for:** Extracting specific structured data like prices, names, details.
599
+ **Best for:** Extracting specific structured data like prices, names, details from web pages.
497
600
  **Not recommended for:** When you need the full content of a page (use scrape); when you're not looking for specific structured data.
498
601
  **Arguments:**
499
602
  - urls: Array of URLs to extract information from
500
603
  - prompt: Custom prompt for the LLM extraction
501
- - systemPrompt: System prompt to guide the LLM
502
604
  - schema: JSON schema for structured data extraction
503
605
  - allowExternalLinks: Allow extraction from external links
504
606
  - enableWebSearch: Enable web search for additional context
@@ -511,7 +613,6 @@ Extract structured information from web pages using LLM capabilities. Supports b
511
613
  "arguments": {
512
614
  "urls": ["https://example.com/page1", "https://example.com/page2"],
513
615
  "prompt": "Extract product information including name, price, and description",
514
- "systemPrompt": "You are a helpful assistant that extracts product information",
515
616
  "schema": {
516
617
  "type": "object",
517
618
  "properties": {
@@ -541,10 +642,6 @@ Extract structured information from web pages using LLM capabilities. Supports b
541
642
  type: 'string',
542
643
  description: 'Prompt for the LLM extraction',
543
644
  },
544
- systemPrompt: {
545
- type: 'string',
546
- description: 'System prompt for LLM extraction',
547
- },
548
645
  schema: {
549
646
  type: 'object',
550
647
  description: 'JSON schema for structured data extraction',
@@ -565,100 +662,6 @@ Extract structured information from web pages using LLM capabilities. Supports b
565
662
  required: ['urls'],
566
663
  },
567
664
  };
568
- const DEEP_RESEARCH_TOOL = {
569
- name: 'firecrawl_deep_research',
570
- description: `
571
- Conduct deep web research on a query using intelligent crawling, search, and LLM analysis.
572
-
573
- **Best for:** Complex research questions requiring multiple sources, in-depth analysis.
574
- **Not recommended for:** Simple questions that can be answered with a single search; when you need very specific information from a known page (use scrape); when you need results quickly (deep research can take time).
575
- **Arguments:**
576
- - query (string, required): The research question or topic to explore.
577
- - maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3).
578
- - timeLimit (number, optional): Time limit in seconds for the research session (default: 120).
579
- - maxUrls (number, optional): Maximum number of URLs to analyze (default: 50).
580
- **Prompt Example:** "Research the environmental impact of electric vehicles versus gasoline vehicles."
581
- **Usage Example:**
582
- \`\`\`json
583
- {
584
- "name": "firecrawl_deep_research",
585
- "arguments": {
586
- "query": "What are the environmental impacts of electric vehicles compared to gasoline vehicles?",
587
- "maxDepth": 3,
588
- "timeLimit": 120,
589
- "maxUrls": 50
590
- }
591
- }
592
- \`\`\`
593
- **Returns:** Final analysis generated by an LLM based on research. (data.finalAnalysis); may also include structured activities and sources used in the research process.
594
- `,
595
- inputSchema: {
596
- type: 'object',
597
- properties: {
598
- query: {
599
- type: 'string',
600
- description: 'The query to research',
601
- },
602
- maxDepth: {
603
- type: 'number',
604
- description: 'Maximum depth of research iterations (1-10)',
605
- },
606
- timeLimit: {
607
- type: 'number',
608
- description: 'Time limit in seconds (30-300)',
609
- },
610
- maxUrls: {
611
- type: 'number',
612
- description: 'Maximum number of URLs to analyze (1-1000)',
613
- },
614
- },
615
- required: ['query'],
616
- },
617
- };
618
- const GENERATE_LLMSTXT_TOOL = {
619
- name: 'firecrawl_generate_llmstxt',
620
- description: `
621
- Generate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact with the site.
622
-
623
- **Best for:** Creating machine-readable permission guidelines for AI models.
624
- **Not recommended for:** General content extraction or research.
625
- **Arguments:**
626
- - url (string, required): The base URL of the website to analyze.
627
- - maxUrls (number, optional): Max number of URLs to include (default: 10).
628
- - showFullText (boolean, optional): Whether to include llms-full.txt contents in the response.
629
- **Prompt Example:** "Generate an LLMs.txt file for example.com."
630
- **Usage Example:**
631
- \`\`\`json
632
- {
633
- "name": "firecrawl_generate_llmstxt",
634
- "arguments": {
635
- "url": "https://example.com",
636
- "maxUrls": 20,
637
- "showFullText": true
638
- }
639
- }
640
- \`\`\`
641
- **Returns:** LLMs.txt file contents (and optionally llms-full.txt).
642
- `,
643
- inputSchema: {
644
- type: 'object',
645
- properties: {
646
- url: {
647
- type: 'string',
648
- description: 'The URL to generate LLMs.txt from',
649
- },
650
- maxUrls: {
651
- type: 'number',
652
- description: 'Maximum number of URLs to process (1-100, default: 10)',
653
- },
654
- showFullText: {
655
- type: 'boolean',
656
- description: 'Whether to show the full LLMs-full.txt in the response',
657
- },
658
- },
659
- required: ['url'],
660
- },
661
- };
662
665
  // Type guards
663
666
  function isScrapeOptions(args) {
664
667
  return (typeof args === 'object' &&
@@ -672,6 +675,7 @@ function isMapOptions(args) {
672
675
  'url' in args &&
673
676
  typeof args.url === 'string');
674
677
  }
678
+ //@ts-expect-error todo: fix
675
679
  function isCrawlOptions(args) {
676
680
  return (typeof args === 'object' &&
677
681
  args !== null &&
@@ -703,6 +707,24 @@ function isGenerateLLMsTextOptions(args) {
703
707
  'url' in args &&
704
708
  typeof args.url === 'string');
705
709
  }
710
+ function removeEmptyTopLevel(obj) {
711
+ const out = {};
712
+ for (const [k, v] of Object.entries(obj)) {
713
+ if (v == null)
714
+ continue;
715
+ if (typeof v === 'string' && v.trim() === '')
716
+ continue;
717
+ if (Array.isArray(v) && v.length === 0)
718
+ continue;
719
+ if (typeof v === 'object' &&
720
+ !Array.isArray(v) &&
721
+ Object.keys(v).length === 0)
722
+ continue;
723
+ // @ts-expect-error dynamic assignment
724
+ out[k] = v;
725
+ }
726
+ return out;
727
+ }
706
728
  // Server implementation
707
729
  const server = new Server({
708
730
  name: 'firecrawl-mcp',
@@ -710,7 +732,6 @@ const server = new Server({
710
732
  }, {
711
733
  capabilities: {
712
734
  tools: {},
713
- logging: {},
714
735
  },
715
736
  });
716
737
  // Get optional API URL
@@ -743,14 +764,9 @@ function delay(ms) {
743
764
  }
744
765
  let isStdioTransport = false;
745
766
  function safeLog(level, data) {
746
- if (isStdioTransport) {
747
- // For stdio transport, log to stderr to avoid protocol interference
748
- console.error(`[${level}] ${typeof data === 'object' ? JSON.stringify(data) : data}`);
749
- }
750
- else {
751
- // For other transport types, use the normal logging mechanism
752
- server.sendLoggingMessage({ level, data });
753
- }
767
+ // Always log to stderr to avoid relying on MCP logging capability
768
+ const message = `[${level}] ${typeof data === 'object' ? JSON.stringify(data) : String(data)}`;
769
+ console.error(message);
754
770
  }
755
771
  // Add retry logic with exponential backoff
756
772
  async function withRetry(operation, context, attempt = 1) {
@@ -779,18 +795,16 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
779
795
  CHECK_CRAWL_STATUS_TOOL,
780
796
  SEARCH_TOOL,
781
797
  EXTRACT_TOOL,
782
- DEEP_RESEARCH_TOOL,
783
- GENERATE_LLMSTXT_TOOL,
784
798
  ],
785
799
  }));
786
800
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
787
801
  const startTime = Date.now();
788
802
  try {
789
803
  const { name, arguments: args } = request.params;
790
- const apiKey = process.env.CLOUD_SERVICE
804
+ const apiKey = process.env.CLOUD_SERVICE === 'true'
791
805
  ? request.params._meta?.apiKey
792
806
  : FIRECRAWL_API_KEY;
793
- if (process.env.CLOUD_SERVICE && !apiKey) {
807
+ if (process.env.CLOUD_SERVICE === 'true' && !apiKey) {
794
808
  throw new Error('No API key provided');
795
809
  }
796
810
  const client = new FirecrawlApp({
@@ -808,38 +822,46 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
808
822
  throw new Error('Invalid arguments for firecrawl_scrape');
809
823
  }
810
824
  const { url, ...options } = args;
825
+ const cleaned = removeEmptyTopLevel(options);
811
826
  try {
812
827
  const scrapeStartTime = Date.now();
813
828
  safeLog('info', `Starting scrape for URL: ${url} with options: ${JSON.stringify(options)}`);
814
- const response = await client.scrapeUrl(url, {
815
- ...options,
816
- // @ts-expect-error Extended API options including origin
829
+ const response = await client.scrape(url, {
830
+ ...cleaned,
817
831
  origin: 'mcp-server',
818
832
  });
819
833
  // Log performance metrics
820
834
  safeLog('info', `Scrape completed in ${Date.now() - scrapeStartTime}ms`);
821
- if ('success' in response && !response.success) {
822
- throw new Error(response.error || 'Scraping failed');
823
- }
824
835
  // Format content based on requested formats
825
836
  const contentParts = [];
826
- if (options.formats?.includes('markdown') && response.markdown) {
837
+ const formats = (options?.formats ?? []);
838
+ const hasFormat = (name) => Array.isArray(formats) &&
839
+ formats.some((f) => typeof f === 'string'
840
+ ? f === name
841
+ : f && typeof f === 'object' && f.type === name);
842
+ if (hasFormat('markdown') && response.markdown) {
827
843
  contentParts.push(response.markdown);
828
844
  }
829
- if (options.formats?.includes('html') && response.html) {
845
+ if (hasFormat('html') && response.html) {
830
846
  contentParts.push(response.html);
831
847
  }
832
- if (options.formats?.includes('rawHtml') && response.rawHtml) {
848
+ if (hasFormat('rawHtml') && response.rawHtml) {
833
849
  contentParts.push(response.rawHtml);
834
850
  }
835
- if (options.formats?.includes('links') && response.links) {
851
+ if (hasFormat('links') && response.links) {
836
852
  contentParts.push(response.links.join('\n'));
837
853
  }
838
- if (options.formats?.includes('screenshot') && response.screenshot) {
854
+ if (hasFormat('screenshot') && response.screenshot) {
839
855
  contentParts.push(response.screenshot);
840
856
  }
841
- if (options.formats?.includes('extract') && response.extract) {
842
- contentParts.push(JSON.stringify(response.extract, null, 2));
857
+ if (hasFormat('json') && response.json) {
858
+ contentParts.push(JSON.stringify(response.json, null, 2));
859
+ }
860
+ if (hasFormat('changeTracking') && response.changeTracking) {
861
+ contentParts.push(JSON.stringify(response.changeTracking, null, 2));
862
+ }
863
+ if (hasFormat('summary') && response.summary) {
864
+ contentParts.push(JSON.stringify(response.summary, null, 2));
843
865
  }
844
866
  // If options.formats is empty, default to markdown
845
867
  if (!options.formats || options.formats.length === 0) {
@@ -872,20 +894,17 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
872
894
  throw new Error('Invalid arguments for firecrawl_map');
873
895
  }
874
896
  const { url, ...options } = args;
875
- const response = await client.mapUrl(url, {
897
+ const response = await client.map(url, {
876
898
  ...options,
877
899
  // @ts-expect-error Extended API options including origin
878
900
  origin: 'mcp-server',
879
901
  });
880
- if ('error' in response) {
881
- throw new Error(response.error);
882
- }
883
902
  if (!response.links) {
884
903
  throw new Error('No links received from Firecrawl API');
885
904
  }
886
905
  return {
887
906
  content: [
888
- { type: 'text', text: trimResponseText(response.links.join('\n')) },
907
+ { type: 'text', text: trimResponseText(JSON.stringify(response.links, null, 2)) },
889
908
  ],
890
909
  isError: false,
891
910
  };
@@ -895,17 +914,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
895
914
  throw new Error('Invalid arguments for firecrawl_crawl');
896
915
  }
897
916
  const { url, ...options } = args;
898
- const response = await withRetry(async () =>
899
- // @ts-expect-error Extended API options including origin
900
- client.asyncCrawlUrl(url, { ...options, origin: 'mcp-server' }), 'crawl operation');
901
- if (!response.success) {
902
- throw new Error(response.error);
903
- }
917
+ const response = await withRetry(async () => client.crawl(url, {
918
+ ...options,
919
+ // @ts-expect-error Extended API options including origin
920
+ origin: 'mcp-server',
921
+ }), 'crawl operation');
904
922
  return {
905
923
  content: [
906
924
  {
907
925
  type: 'text',
908
- text: trimResponseText(`Started crawl for ${url} with job ID: ${response.id}. Use firecrawl_check_crawl_status to check progress.`),
926
+ text: trimResponseText(JSON.stringify(response)),
909
927
  },
910
928
  ],
911
929
  isError: false,
@@ -915,10 +933,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
915
933
  if (!isStatusCheckOptions(args)) {
916
934
  throw new Error('Invalid arguments for firecrawl_check_crawl_status');
917
935
  }
918
- const response = await client.checkCrawlStatus(args.id);
919
- if (!response.success) {
920
- throw new Error(response.error);
921
- }
936
+ const response = await client.getCrawlStatus(args.id);
922
937
  const status = `Crawl Status:
923
938
  Status: ${response.status}
924
939
  Progress: ${response.completed}/${response.total}
@@ -935,19 +950,18 @@ ${response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''}
935
950
  throw new Error('Invalid arguments for firecrawl_search');
936
951
  }
937
952
  try {
938
- const response = await withRetry(async () => client.search(args.query, { ...args, origin: 'mcp-server' }), 'search operation');
939
- if (!response.success) {
940
- throw new Error(`Search failed: ${response.error || 'Unknown error'}`);
941
- }
942
- // Format the results
943
- const results = response.data
944
- .map((result) => `URL: ${result.url}
945
- Title: ${result.title || 'No title'}
946
- Description: ${result.description || 'No description'}
947
- ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
948
- .join('\n\n');
953
+ const response = await withRetry(async () => client.search(args.query, {
954
+ ...args,
955
+ // @ts-expect-error Extended API options including origin
956
+ origin: 'mcp-server',
957
+ }), 'search operation');
949
958
  return {
950
- content: [{ type: 'text', text: trimResponseText(results) }],
959
+ content: [
960
+ {
961
+ type: 'text',
962
+ text: trimResponseText(JSON.stringify(response, null, 2)),
963
+ },
964
+ ],
951
965
  isError: false,
952
966
  };
953
967
  }
@@ -972,9 +986,9 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
972
986
  if (FIRECRAWL_API_URL) {
973
987
  safeLog('info', 'Using self-hosted instance for extraction');
974
988
  }
975
- const extractResponse = await withRetry(async () => client.extract(args.urls, {
989
+ const extractResponse = await withRetry(async () => client.extract({
990
+ urls: args.urls,
976
991
  prompt: args.prompt,
977
- systemPrompt: args.systemPrompt,
978
992
  schema: args.schema,
979
993
  allowExternalLinks: args.allowExternalLinks,
980
994
  enableWebSearch: args.enableWebSearch,
@@ -1025,57 +1039,6 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
1025
1039
  };
1026
1040
  }
1027
1041
  }
1028
- case 'firecrawl_deep_research': {
1029
- if (!args || typeof args !== 'object' || !('query' in args)) {
1030
- throw new Error('Invalid arguments for firecrawl_deep_research');
1031
- }
1032
- try {
1033
- const researchStartTime = Date.now();
1034
- safeLog('info', `Starting deep research for query: ${args.query}`);
1035
- const response = await client.deepResearch(args.query, {
1036
- maxDepth: args.maxDepth,
1037
- timeLimit: args.timeLimit,
1038
- maxUrls: args.maxUrls,
1039
- // @ts-expect-error Extended API options including origin
1040
- origin: 'mcp-server',
1041
- },
1042
- // Activity callback
1043
- (activity) => {
1044
- safeLog('info', `Research activity: ${activity.message} (Depth: ${activity.depth})`);
1045
- },
1046
- // Source callback
1047
- (source) => {
1048
- safeLog('info', `Research source found: ${source.url}${source.title ? ` - ${source.title}` : ''}`);
1049
- });
1050
- // Log performance metrics
1051
- safeLog('info', `Deep research completed in ${Date.now() - researchStartTime}ms`);
1052
- if (!response.success) {
1053
- throw new Error(response.error || 'Deep research failed');
1054
- }
1055
- // Format the results
1056
- const formattedResponse = {
1057
- finalAnalysis: response.data.finalAnalysis,
1058
- activities: response.data.activities,
1059
- sources: response.data.sources,
1060
- };
1061
- return {
1062
- content: [
1063
- {
1064
- type: 'text',
1065
- text: trimResponseText(formattedResponse.finalAnalysis),
1066
- },
1067
- ],
1068
- isError: false,
1069
- };
1070
- }
1071
- catch (error) {
1072
- const errorMessage = error instanceof Error ? error.message : String(error);
1073
- return {
1074
- content: [{ type: 'text', text: trimResponseText(errorMessage) }],
1075
- isError: true,
1076
- };
1077
- }
1078
- }
1079
1042
  case 'firecrawl_generate_llmstxt': {
1080
1043
  if (!isGenerateLLMsTextOptions(args)) {
1081
1044
  throw new Error('Invalid arguments for firecrawl_generate_llmstxt');
@@ -1152,8 +1115,7 @@ function formatResults(data) {
1152
1115
  return data
1153
1116
  .map((doc) => {
1154
1117
  const content = doc.markdown || doc.html || doc.rawHtml || 'No content';
1155
- return `URL: ${doc.url || 'Unknown URL'}
1156
- Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''}
1118
+ return `Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''}
1157
1119
  ${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
1158
1120
  })
1159
1121
  .join('\n\n');
@@ -1214,6 +1176,92 @@ async function runSSELocalServer() {
1214
1176
  console.error('Error starting server:', error);
1215
1177
  }
1216
1178
  }
1179
+ async function runHTTPStreamableServer() {
1180
+ const app = express();
1181
+ app.use(express.json());
1182
+ const transports = {};
1183
+ // A single endpoint handles all MCP requests.
1184
+ app.all('/mcp', async (req, res) => {
1185
+ try {
1186
+ const sessionId = req.headers['mcp-session-id'];
1187
+ let transport;
1188
+ if (sessionId && transports[sessionId]) {
1189
+ transport = transports[sessionId];
1190
+ }
1191
+ else if (!sessionId &&
1192
+ req.method === 'POST' &&
1193
+ req.body &&
1194
+ typeof req.body === 'object' &&
1195
+ req.body.method === 'initialize') {
1196
+ transport = new StreamableHTTPServerTransport({
1197
+ sessionIdGenerator: () => {
1198
+ const id = randomUUID();
1199
+ return id;
1200
+ },
1201
+ onsessioninitialized: (sid) => {
1202
+ transports[sid] = transport;
1203
+ },
1204
+ });
1205
+ transport.onclose = () => {
1206
+ const sid = transport.sessionId;
1207
+ if (sid && transports[sid]) {
1208
+ delete transports[sid];
1209
+ }
1210
+ };
1211
+ console.log('Creating server instance');
1212
+ console.log('Connecting transport to server');
1213
+ await server.connect(transport);
1214
+ await transport.handleRequest(req, res, req.body);
1215
+ return;
1216
+ }
1217
+ else {
1218
+ res.status(400).json({
1219
+ jsonrpc: '2.0',
1220
+ error: {
1221
+ code: -32000,
1222
+ message: 'Invalid or missing session ID',
1223
+ },
1224
+ id: null,
1225
+ });
1226
+ return;
1227
+ }
1228
+ await transport.handleRequest(req, res, req.body);
1229
+ }
1230
+ catch (error) {
1231
+ if (!res.headersSent) {
1232
+ res.status(500).json({
1233
+ jsonrpc: '2.0',
1234
+ error: {
1235
+ code: -32603,
1236
+ message: 'Internal server error',
1237
+ },
1238
+ id: null,
1239
+ });
1240
+ }
1241
+ }
1242
+ });
1243
+ const PORT = 3000;
1244
+ const appServer = app.listen(PORT, () => {
1245
+ console.log(`MCP Streamable HTTP Server listening on port ${PORT}`);
1246
+ });
1247
+ process.on('SIGINT', async () => {
1248
+ console.log('Shutting down server...');
1249
+ for (const sessionId in transports) {
1250
+ try {
1251
+ console.log(`Closing transport for session ${sessionId}`);
1252
+ await transports[sessionId].close();
1253
+ delete transports[sessionId];
1254
+ }
1255
+ catch (error) {
1256
+ console.error(`Error closing transport for session ${sessionId}:`, error);
1257
+ }
1258
+ }
1259
+ appServer.close(() => {
1260
+ console.log('Server shutdown complete');
1261
+ process.exit(0);
1262
+ });
1263
+ });
1264
+ }
1217
1265
  async function runSSECloudServer() {
1218
1266
  const transports = {};
1219
1267
  const app = express();
@@ -1277,6 +1325,13 @@ else if (process.env.SSE_LOCAL === 'true') {
1277
1325
  process.exit(1);
1278
1326
  });
1279
1327
  }
1328
+ else if (process.env.HTTP_STREAMABLE_SERVER === 'true') {
1329
+ console.log('Running HTTP Streamable Server');
1330
+ runHTTPStreamableServer().catch((error) => {
1331
+ console.error('Fatal error running server:', error);
1332
+ process.exit(1);
1333
+ });
1334
+ }
1280
1335
  else {
1281
1336
  runLocalServer().catch((error) => {
1282
1337
  console.error('Fatal error running server:', error);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl-mcp",
3
- "version": "1.12.0",
3
+ "version": "2.0.0",
4
4
  "description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -20,15 +20,17 @@
20
20
  "lint:fix": "eslint src/**/*.ts --fix",
21
21
  "format": "prettier --write .",
22
22
  "prepare": "npm run build",
23
- "publish": "npm run build && npm publish"
23
+ "publish": "npm run build && npm publish",
24
+ "publish-beta": "npm run build && npm publish --tag beta"
24
25
  },
25
26
  "license": "MIT",
26
27
  "dependencies": {
27
- "@mendable/firecrawl-js": "^1.19.0",
28
- "@modelcontextprotocol/sdk": "^1.4.1",
28
+ "@mendable/firecrawl-js": "^3.0.3",
29
+ "@modelcontextprotocol/sdk": "^1.17.3",
29
30
  "dotenv": "^16.4.7",
30
31
  "express": "^5.1.0",
31
32
  "shx": "^0.3.4",
33
+ "typescript": "^5.9.2",
32
34
  "ws": "^8.18.1"
33
35
  },
34
36
  "devDependencies": {
@@ -43,8 +45,7 @@
43
45
  "jest": "^29.7.0",
44
46
  "jest-mock-extended": "^4.0.0-beta1",
45
47
  "prettier": "^3.1.1",
46
- "ts-jest": "^29.1.1",
47
- "typescript": "^5.3.3"
48
+ "ts-jest": "^29.1.1"
48
49
  },
49
50
  "engines": {
50
51
  "node": ">=18.0.0"
@@ -58,11 +59,11 @@
58
59
  ],
59
60
  "repository": {
60
61
  "type": "git",
61
- "url": "git+https://github.com/mendableai/firecrawl-mcp-server.git"
62
+ "url": "git+https://github.com/firecrawl/firecrawl-mcp-server.git"
62
63
  },
63
64
  "author": "vrknetha",
64
65
  "bugs": {
65
- "url": "https://github.com/mendableai/firecrawl-mcp-server/issues"
66
+ "url": "https://github.com/firecrawl/firecrawl-mcp-server/issues"
66
67
  },
67
- "homepage": "https://github.com/mendableai/firecrawl-mcp-server#readme"
68
+ "homepage": "https://github.com/firecrawl/firecrawl-mcp-server#readme"
68
69
  }