firecrawl-mcp 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -76
- package/dist/index.js +8 -48
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -311,8 +311,6 @@ Use this guide to select the right tool for your task:
|
|
|
311
311
|
- **If you want to search the web for info:** use **search**
|
|
312
312
|
- **If you want to extract structured data:** use **extract**
|
|
313
313
|
- **If you want to analyze a whole site or section:** use **crawl** (with limits!)
|
|
314
|
-
- **If you want to do in-depth research:** use **deep_research**
|
|
315
|
-
- **If you want to generate LLMs.txt:** use **generate_llmstxt**
|
|
316
314
|
|
|
317
315
|
### Quick Reference Table
|
|
318
316
|
|
|
@@ -324,8 +322,6 @@ Use this guide to select the right tool for your task:
|
|
|
324
322
|
| crawl | Multi-page extraction (with limits) | markdown/html[] |
|
|
325
323
|
| search | Web search for info | results[] |
|
|
326
324
|
| extract | Structured data from pages | JSON |
|
|
327
|
-
| deep_research | In-depth, multi-source research | summary, sources|
|
|
328
|
-
| generate_llmstxt | LLMs.txt for a domain | text |
|
|
329
325
|
|
|
330
326
|
## Available Tools
|
|
331
327
|
|
|
@@ -629,78 +625,6 @@ When using a self-hosted instance, the extraction will use your configured LLM.
|
|
|
629
625
|
}
|
|
630
626
|
```
|
|
631
627
|
|
|
632
|
-
### 9. Deep Research Tool (`firecrawl_deep_research`)
|
|
633
|
-
|
|
634
|
-
Conduct deep web research on a query using intelligent crawling, search, and LLM analysis.
|
|
635
|
-
|
|
636
|
-
**Best for:**
|
|
637
|
-
- Complex research questions requiring multiple sources, in-depth analysis.
|
|
638
|
-
|
|
639
|
-
**Not recommended for:**
|
|
640
|
-
- Simple questions that can be answered with a single search
|
|
641
|
-
- When you need very specific information from a known page (use scrape)
|
|
642
|
-
- When you need results quickly (deep research can take time)
|
|
643
|
-
|
|
644
|
-
**Arguments:**
|
|
645
|
-
- query (string, required): The research question or topic to explore.
|
|
646
|
-
- maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3).
|
|
647
|
-
- timeLimit (number, optional): Time limit in seconds for the research session (default: 120).
|
|
648
|
-
- maxUrls (number, optional): Maximum number of URLs to analyze (default: 50).
|
|
649
|
-
|
|
650
|
-
**Prompt Example:**
|
|
651
|
-
> "Research the environmental impact of electric vehicles versus gasoline vehicles."
|
|
652
|
-
|
|
653
|
-
**Usage Example:**
|
|
654
|
-
```json
|
|
655
|
-
{
|
|
656
|
-
"name": "firecrawl_deep_research",
|
|
657
|
-
"arguments": {
|
|
658
|
-
"query": "What are the environmental impacts of electric vehicles compared to gasoline vehicles?",
|
|
659
|
-
"maxDepth": 3,
|
|
660
|
-
"timeLimit": 120,
|
|
661
|
-
"maxUrls": 50
|
|
662
|
-
}
|
|
663
|
-
}
|
|
664
|
-
```
|
|
665
|
-
|
|
666
|
-
**Returns:**
|
|
667
|
-
- Final analysis generated by an LLM based on research. (data.finalAnalysis)
|
|
668
|
-
- May also include structured activities and sources used in the research process.
|
|
669
|
-
|
|
670
|
-
### 10. Generate LLMs.txt Tool (`firecrawl_generate_llmstxt`)
|
|
671
|
-
|
|
672
|
-
Generate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact
|
|
673
|
-
with the site.
|
|
674
|
-
|
|
675
|
-
**Best for:**
|
|
676
|
-
- Creating machine-readable permission guidelines for AI models.
|
|
677
|
-
|
|
678
|
-
**Not recommended for:**
|
|
679
|
-
- General content extraction or research
|
|
680
|
-
|
|
681
|
-
**Arguments:**
|
|
682
|
-
- url (string, required): The base URL of the website to analyze.
|
|
683
|
-
- maxUrls (number, optional): Max number of URLs to include (default: 10).
|
|
684
|
-
- showFullText (boolean, optional): Whether to include llms-full.txt contents in the response.
|
|
685
|
-
|
|
686
|
-
**Prompt Example:**
|
|
687
|
-
> "Generate an LLMs.txt file for example.com."
|
|
688
|
-
|
|
689
|
-
**Usage Example:**
|
|
690
|
-
```json
|
|
691
|
-
{
|
|
692
|
-
"name": "firecrawl_generate_llmstxt",
|
|
693
|
-
"arguments": {
|
|
694
|
-
"url": "https://example.com",
|
|
695
|
-
"maxUrls": 20,
|
|
696
|
-
"showFullText": true
|
|
697
|
-
}
|
|
698
|
-
}
|
|
699
|
-
```
|
|
700
|
-
|
|
701
|
-
**Returns:**
|
|
702
|
-
- LLMs.txt file contents (and optionally llms-full.txt)
|
|
703
|
-
|
|
704
628
|
## Logging System
|
|
705
629
|
|
|
706
630
|
The server includes comprehensive logging:
|
package/dist/index.js
CHANGED
|
@@ -55,6 +55,7 @@ This is the most powerful, fastest and most reliable scraper tool, if available
|
|
|
55
55
|
'links',
|
|
56
56
|
'extract',
|
|
57
57
|
'summary',
|
|
58
|
+
'changeTracking',
|
|
58
59
|
],
|
|
59
60
|
},
|
|
60
61
|
{
|
|
@@ -256,7 +257,7 @@ const CRAWL_TOOL = {
|
|
|
256
257
|
**Best for:** Extracting content from multiple related pages, when you need comprehensive coverage.
|
|
257
258
|
**Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow).
|
|
258
259
|
**Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.
|
|
259
|
-
**Common mistakes:** Setting limit or maxDiscoveryDepth too high (causes token overflow); using crawl for a single page (use scrape instead).
|
|
260
|
+
**Common mistakes:** Setting limit or maxDiscoveryDepth too high (causes token overflow) or too low (causes missing pages); using crawl for a single page (use scrape instead). Using a /* wildcard is not recommended.
|
|
260
261
|
**Prompt Example:** "Get all blog posts from the first two levels of example.com/blog."
|
|
261
262
|
**Usage Example:**
|
|
262
263
|
\`\`\`json
|
|
@@ -264,8 +265,8 @@ const CRAWL_TOOL = {
|
|
|
264
265
|
"name": "firecrawl_crawl",
|
|
265
266
|
"arguments": {
|
|
266
267
|
"url": "https://example.com/blog/*",
|
|
267
|
-
"maxDiscoveryDepth":
|
|
268
|
-
"limit":
|
|
268
|
+
"maxDiscoveryDepth": 5,
|
|
269
|
+
"limit": 20,
|
|
269
270
|
"allowExternalLinks": false,
|
|
270
271
|
"deduplicateSimilarURLs": true,
|
|
271
272
|
"sitemap": "include"
|
|
@@ -702,12 +703,6 @@ function isExtractOptions(args) {
|
|
|
702
703
|
return (Array.isArray(urls) &&
|
|
703
704
|
urls.every((url) => typeof url === 'string'));
|
|
704
705
|
}
|
|
705
|
-
function isGenerateLLMsTextOptions(args) {
|
|
706
|
-
return (typeof args === 'object' &&
|
|
707
|
-
args !== null &&
|
|
708
|
-
'url' in args &&
|
|
709
|
-
typeof args.url === 'string');
|
|
710
|
-
}
|
|
711
706
|
function removeEmptyTopLevel(obj) {
|
|
712
707
|
const out = {};
|
|
713
708
|
for (const [k, v] of Object.entries(obj)) {
|
|
@@ -905,7 +900,10 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
905
900
|
}
|
|
906
901
|
return {
|
|
907
902
|
content: [
|
|
908
|
-
{
|
|
903
|
+
{
|
|
904
|
+
type: 'text',
|
|
905
|
+
text: trimResponseText(JSON.stringify(response.links, null, 2)),
|
|
906
|
+
},
|
|
909
907
|
],
|
|
910
908
|
isError: false,
|
|
911
909
|
};
|
|
@@ -1040,44 +1038,6 @@ ${response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''}
|
|
|
1040
1038
|
};
|
|
1041
1039
|
}
|
|
1042
1040
|
}
|
|
1043
|
-
case 'firecrawl_generate_llmstxt': {
|
|
1044
|
-
if (!isGenerateLLMsTextOptions(args)) {
|
|
1045
|
-
throw new Error('Invalid arguments for firecrawl_generate_llmstxt');
|
|
1046
|
-
}
|
|
1047
|
-
try {
|
|
1048
|
-
const { url, ...params } = args;
|
|
1049
|
-
const generateStartTime = Date.now();
|
|
1050
|
-
safeLog('info', `Starting LLMs.txt generation for URL: ${url}`);
|
|
1051
|
-
// Start the generation process
|
|
1052
|
-
const response = await withRetry(async () =>
|
|
1053
|
-
// @ts-expect-error Extended API options including origin
|
|
1054
|
-
client.generateLLMsText(url, { ...params, origin: 'mcp-server' }), 'LLMs.txt generation');
|
|
1055
|
-
if (!response.success) {
|
|
1056
|
-
throw new Error(response.error || 'LLMs.txt generation failed');
|
|
1057
|
-
}
|
|
1058
|
-
// Log performance metrics
|
|
1059
|
-
safeLog('info', `LLMs.txt generation completed in ${Date.now() - generateStartTime}ms`);
|
|
1060
|
-
// Format the response
|
|
1061
|
-
let resultText = '';
|
|
1062
|
-
if ('data' in response) {
|
|
1063
|
-
resultText = `LLMs.txt content:\n\n${response.data.llmstxt}`;
|
|
1064
|
-
if (args.showFullText && response.data.llmsfulltxt) {
|
|
1065
|
-
resultText += `\n\nLLMs-full.txt content:\n\n${response.data.llmsfulltxt}`;
|
|
1066
|
-
}
|
|
1067
|
-
}
|
|
1068
|
-
return {
|
|
1069
|
-
content: [{ type: 'text', text: trimResponseText(resultText) }],
|
|
1070
|
-
isError: false,
|
|
1071
|
-
};
|
|
1072
|
-
}
|
|
1073
|
-
catch (error) {
|
|
1074
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1075
|
-
return {
|
|
1076
|
-
content: [{ type: 'text', text: trimResponseText(errorMessage) }],
|
|
1077
|
-
isError: true,
|
|
1078
|
-
};
|
|
1079
|
-
}
|
|
1080
|
-
}
|
|
1081
1041
|
default:
|
|
1082
1042
|
return {
|
|
1083
1043
|
content: [
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "2.0.
|
|
4
|
-
"description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
|
|
3
|
+
"version": "2.0.2",
|
|
4
|
+
"description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, search, batch processing, structured data extraction, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"firecrawl-mcp": "dist/index.js"
|