firecrawl-mcp 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +0 -76
  2. package/dist/index.js +17 -56
  3. package/package.json +2 -2
package/README.md CHANGED
@@ -311,8 +311,6 @@ Use this guide to select the right tool for your task:
311
311
  - **If you want to search the web for info:** use **search**
312
312
  - **If you want to extract structured data:** use **extract**
313
313
  - **If you want to analyze a whole site or section:** use **crawl** (with limits!)
314
- - **If you want to do in-depth research:** use **deep_research**
315
- - **If you want to generate LLMs.txt:** use **generate_llmstxt**
316
314
 
317
315
  ### Quick Reference Table
318
316
 
@@ -324,8 +322,6 @@ Use this guide to select the right tool for your task:
324
322
  | crawl | Multi-page extraction (with limits) | markdown/html[] |
325
323
  | search | Web search for info | results[] |
326
324
  | extract | Structured data from pages | JSON |
327
- | deep_research | In-depth, multi-source research | summary, sources|
328
- | generate_llmstxt | LLMs.txt for a domain | text |
329
325
 
330
326
  ## Available Tools
331
327
 
@@ -629,78 +625,6 @@ When using a self-hosted instance, the extraction will use your configured LLM.
629
625
  }
630
626
  ```
631
627
 
632
- ### 9. Deep Research Tool (`firecrawl_deep_research`)
633
-
634
- Conduct deep web research on a query using intelligent crawling, search, and LLM analysis.
635
-
636
- **Best for:**
637
- - Complex research questions requiring multiple sources, in-depth analysis.
638
-
639
- **Not recommended for:**
640
- - Simple questions that can be answered with a single search
641
- - When you need very specific information from a known page (use scrape)
642
- - When you need results quickly (deep research can take time)
643
-
644
- **Arguments:**
645
- - query (string, required): The research question or topic to explore.
646
- - maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3).
647
- - timeLimit (number, optional): Time limit in seconds for the research session (default: 120).
648
- - maxUrls (number, optional): Maximum number of URLs to analyze (default: 50).
649
-
650
- **Prompt Example:**
651
- > "Research the environmental impact of electric vehicles versus gasoline vehicles."
652
-
653
- **Usage Example:**
654
- ```json
655
- {
656
- "name": "firecrawl_deep_research",
657
- "arguments": {
658
- "query": "What are the environmental impacts of electric vehicles compared to gasoline vehicles?",
659
- "maxDepth": 3,
660
- "timeLimit": 120,
661
- "maxUrls": 50
662
- }
663
- }
664
- ```
665
-
666
- **Returns:**
667
- - Final analysis generated by an LLM based on research. (data.finalAnalysis)
668
- - May also include structured activities and sources used in the research process.
669
-
670
- ### 10. Generate LLMs.txt Tool (`firecrawl_generate_llmstxt`)
671
-
672
- Generate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact
673
- with the site.
674
-
675
- **Best for:**
676
- - Creating machine-readable permission guidelines for AI models.
677
-
678
- **Not recommended for:**
679
- - General content extraction or research
680
-
681
- **Arguments:**
682
- - url (string, required): The base URL of the website to analyze.
683
- - maxUrls (number, optional): Max number of URLs to include (default: 10).
684
- - showFullText (boolean, optional): Whether to include llms-full.txt contents in the response.
685
-
686
- **Prompt Example:**
687
- > "Generate an LLMs.txt file for example.com."
688
-
689
- **Usage Example:**
690
- ```json
691
- {
692
- "name": "firecrawl_generate_llmstxt",
693
- "arguments": {
694
- "url": "https://example.com",
695
- "maxUrls": 20,
696
- "showFullText": true
697
- }
698
- }
699
- ```
700
-
701
- **Returns:**
702
- - LLMs.txt file contents (and optionally llms-full.txt)
703
-
704
628
  ## Logging System
705
629
 
706
630
  The server includes comprehensive logging:
package/dist/index.js CHANGED
@@ -55,6 +55,7 @@ This is the most powerful, fastest and most reliable scraper tool, if available
55
55
  'links',
56
56
  'extract',
57
57
  'summary',
58
+ 'changeTracking',
58
59
  ],
59
60
  },
60
61
  {
@@ -256,7 +257,7 @@ const CRAWL_TOOL = {
256
257
  **Best for:** Extracting content from multiple related pages, when you need comprehensive coverage.
257
258
  **Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow).
258
259
  **Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.
259
- **Common mistakes:** Setting limit or maxDiscoveryDepth too high (causes token overflow); using crawl for a single page (use scrape instead).
260
+ **Common mistakes:** Setting limit or maxDiscoveryDepth too high (causes token overflow) or too low (causes missing pages); using crawl for a single page (use scrape instead). Using a /* wildcard is not recommended.
260
261
  **Prompt Example:** "Get all blog posts from the first two levels of example.com/blog."
261
262
  **Usage Example:**
262
263
  \`\`\`json
@@ -264,8 +265,8 @@ const CRAWL_TOOL = {
264
265
  "name": "firecrawl_crawl",
265
266
  "arguments": {
266
267
  "url": "https://example.com/blog/*",
267
- "maxDiscoveryDepth": 2,
268
- "limit": 100,
268
+ "maxDiscoveryDepth": 5,
269
+ "limit": 20,
269
270
  "allowExternalLinks": false,
270
271
  "deduplicateSimilarURLs": true,
271
272
  "sitemap": "include"
@@ -520,14 +521,15 @@ Search the web and optionally extract content from search results. This is the m
520
521
  type: 'object',
521
522
  properties: {
522
523
  type: { type: 'string', enum: ['web'] },
523
- tbs: {
524
- type: 'string',
525
- description: 'Time-based search parameter (e.g., qdr:h, qdr:d, qdr:w, qdr:m, qdr:y or custom cdr with cd_min/cd_max)',
526
- },
527
- location: {
528
- type: 'string',
529
- description: 'Location parameter for search results',
530
- },
524
+ // tbs: {
525
+ // type: 'string',
526
+ // description:
527
+ // 'Time-based search parameter (e.g., qdr:h, qdr:d, qdr:w, qdr:m, qdr:y or custom cdr with cd_min/cd_max)',
528
+ // },
529
+ // location: {
530
+ // type: 'string',
531
+ // description: 'Location parameter for search results',
532
+ // },
531
533
  },
532
534
  required: ['type'],
533
535
  additionalProperties: false,
@@ -701,12 +703,6 @@ function isExtractOptions(args) {
701
703
  return (Array.isArray(urls) &&
702
704
  urls.every((url) => typeof url === 'string'));
703
705
  }
704
- function isGenerateLLMsTextOptions(args) {
705
- return (typeof args === 'object' &&
706
- args !== null &&
707
- 'url' in args &&
708
- typeof args.url === 'string');
709
- }
710
706
  function removeEmptyTopLevel(obj) {
711
707
  const out = {};
712
708
  for (const [k, v] of Object.entries(obj)) {
@@ -904,7 +900,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
904
900
  }
905
901
  return {
906
902
  content: [
907
- { type: 'text', text: trimResponseText(JSON.stringify(response.links, null, 2)) },
903
+ {
904
+ type: 'text',
905
+ text: trimResponseText(JSON.stringify(response.links, null, 2)),
906
+ },
908
907
  ],
909
908
  isError: false,
910
909
  };
@@ -1039,44 +1038,6 @@ ${response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''}
1039
1038
  };
1040
1039
  }
1041
1040
  }
1042
- case 'firecrawl_generate_llmstxt': {
1043
- if (!isGenerateLLMsTextOptions(args)) {
1044
- throw new Error('Invalid arguments for firecrawl_generate_llmstxt');
1045
- }
1046
- try {
1047
- const { url, ...params } = args;
1048
- const generateStartTime = Date.now();
1049
- safeLog('info', `Starting LLMs.txt generation for URL: ${url}`);
1050
- // Start the generation process
1051
- const response = await withRetry(async () =>
1052
- // @ts-expect-error Extended API options including origin
1053
- client.generateLLMsText(url, { ...params, origin: 'mcp-server' }), 'LLMs.txt generation');
1054
- if (!response.success) {
1055
- throw new Error(response.error || 'LLMs.txt generation failed');
1056
- }
1057
- // Log performance metrics
1058
- safeLog('info', `LLMs.txt generation completed in ${Date.now() - generateStartTime}ms`);
1059
- // Format the response
1060
- let resultText = '';
1061
- if ('data' in response) {
1062
- resultText = `LLMs.txt content:\n\n${response.data.llmstxt}`;
1063
- if (args.showFullText && response.data.llmsfulltxt) {
1064
- resultText += `\n\nLLMs-full.txt content:\n\n${response.data.llmsfulltxt}`;
1065
- }
1066
- }
1067
- return {
1068
- content: [{ type: 'text', text: trimResponseText(resultText) }],
1069
- isError: false,
1070
- };
1071
- }
1072
- catch (error) {
1073
- const errorMessage = error instanceof Error ? error.message : String(error);
1074
- return {
1075
- content: [{ type: 'text', text: trimResponseText(errorMessage) }],
1076
- isError: true,
1077
- };
1078
- }
1079
- }
1080
1041
  default:
1081
1042
  return {
1082
1043
  content: [
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "firecrawl-mcp",
3
- "version": "2.0.0",
4
- "description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
3
+ "version": "2.0.2",
4
+ "description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, search, batch processing, structured data extraction, and LLM-powered content analysis.",
5
5
  "type": "module",
6
6
  "bin": {
7
7
  "firecrawl-mcp": "dist/index.js"