firecrawl-mcp 1.4.2 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.js +103 -8
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@ Big thanks to [@vrknetha](https://github.com/vrknetha), [@cawstudios](https://ca
|
|
|
6
6
|
|
|
7
7
|
## Features
|
|
8
8
|
|
|
9
|
-
- Scrape, crawl, search, extract and batch scrape support
|
|
9
|
+
- Scrape, crawl, search, extract, deep research and batch scrape support
|
|
10
10
|
- Web scraping with JS rendering
|
|
11
11
|
- URL discovery and crawling
|
|
12
12
|
- Web search with content extraction
|
package/dist/index.js
CHANGED
|
@@ -518,6 +518,28 @@ const DEEP_RESEARCH_TOOL = {
|
|
|
518
518
|
required: ['query'],
|
|
519
519
|
},
|
|
520
520
|
};
|
|
521
|
+
const GENERATE_LLMSTXT_TOOL = {
|
|
522
|
+
name: 'firecrawl_generate_llmstxt',
|
|
523
|
+
description: 'Generate standardized LLMs.txt file for a given URL, which provides context about how LLMs should interact with the website.',
|
|
524
|
+
inputSchema: {
|
|
525
|
+
type: 'object',
|
|
526
|
+
properties: {
|
|
527
|
+
url: {
|
|
528
|
+
type: 'string',
|
|
529
|
+
description: 'The URL to generate LLMs.txt from',
|
|
530
|
+
},
|
|
531
|
+
maxUrls: {
|
|
532
|
+
type: 'number',
|
|
533
|
+
description: 'Maximum number of URLs to process (1-100, default: 10)',
|
|
534
|
+
},
|
|
535
|
+
showFullText: {
|
|
536
|
+
type: 'boolean',
|
|
537
|
+
description: 'Whether to show the full LLMs-full.txt in the response',
|
|
538
|
+
},
|
|
539
|
+
},
|
|
540
|
+
required: ['url'],
|
|
541
|
+
},
|
|
542
|
+
};
|
|
521
543
|
// Type guards
|
|
522
544
|
function isScrapeOptions(args) {
|
|
523
545
|
return (typeof args === 'object' &&
|
|
@@ -563,6 +585,12 @@ function isExtractOptions(args) {
|
|
|
563
585
|
return (Array.isArray(urls) &&
|
|
564
586
|
urls.every((url) => typeof url === 'string'));
|
|
565
587
|
}
|
|
588
|
+
function isGenerateLLMsTextOptions(args) {
|
|
589
|
+
return (typeof args === 'object' &&
|
|
590
|
+
args !== null &&
|
|
591
|
+
'url' in args &&
|
|
592
|
+
typeof args.url === 'string');
|
|
593
|
+
}
|
|
566
594
|
// Server implementation
|
|
567
595
|
const server = new Server({
|
|
568
596
|
name: 'firecrawl-mcp',
|
|
@@ -699,6 +727,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
|
699
727
|
SEARCH_TOOL,
|
|
700
728
|
EXTRACT_TOOL,
|
|
701
729
|
DEEP_RESEARCH_TOOL,
|
|
730
|
+
GENERATE_LLMSTXT_TOOL,
|
|
702
731
|
],
|
|
703
732
|
}));
|
|
704
733
|
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
@@ -734,12 +763,36 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
734
763
|
if ('success' in response && !response.success) {
|
|
735
764
|
throw new Error(response.error || 'Scraping failed');
|
|
736
765
|
}
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
766
|
+
// Format content based on requested formats
|
|
767
|
+
const contentParts = [];
|
|
768
|
+
if (options.formats?.includes('markdown') && response.markdown) {
|
|
769
|
+
contentParts.push(response.markdown);
|
|
770
|
+
}
|
|
771
|
+
if (options.formats?.includes('html') && response.html) {
|
|
772
|
+
contentParts.push(response.html);
|
|
773
|
+
}
|
|
774
|
+
if (options.formats?.includes('rawHtml') && response.rawHtml) {
|
|
775
|
+
contentParts.push(response.rawHtml);
|
|
776
|
+
}
|
|
777
|
+
if (options.formats?.includes('links') && response.links) {
|
|
778
|
+
contentParts.push(response.links.join('\n'));
|
|
779
|
+
}
|
|
780
|
+
if (options.formats?.includes('screenshot') && response.screenshot) {
|
|
781
|
+
contentParts.push(response.screenshot);
|
|
782
|
+
}
|
|
783
|
+
if (options.formats?.includes('extract') && response.extract) {
|
|
784
|
+
contentParts.push(JSON.stringify(response.extract, null, 2));
|
|
785
|
+
}
|
|
786
|
+
// Add warning to response if present
|
|
787
|
+
if (response.warning) {
|
|
788
|
+
server.sendLoggingMessage({
|
|
789
|
+
level: 'warning',
|
|
790
|
+
data: response.warning,
|
|
791
|
+
});
|
|
792
|
+
}
|
|
740
793
|
return {
|
|
741
794
|
content: [
|
|
742
|
-
{ type: 'text', text:
|
|
795
|
+
{ type: 'text', text: contentParts.join('\n\n') || 'No content available' },
|
|
743
796
|
],
|
|
744
797
|
isError: false,
|
|
745
798
|
};
|
|
@@ -1056,6 +1109,48 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
|
1056
1109
|
};
|
|
1057
1110
|
}
|
|
1058
1111
|
}
|
|
1112
|
+
case 'firecrawl_generate_llmstxt': {
|
|
1113
|
+
if (!isGenerateLLMsTextOptions(args)) {
|
|
1114
|
+
throw new Error('Invalid arguments for firecrawl_generate_llmstxt');
|
|
1115
|
+
}
|
|
1116
|
+
try {
|
|
1117
|
+
const { url, ...params } = args;
|
|
1118
|
+
const generateStartTime = Date.now();
|
|
1119
|
+
server.sendLoggingMessage({
|
|
1120
|
+
level: 'info',
|
|
1121
|
+
data: `Starting LLMs.txt generation for URL: ${url}`,
|
|
1122
|
+
});
|
|
1123
|
+
// Start the generation process
|
|
1124
|
+
const response = await withRetry(async () => client.generateLLMsText(url, params), 'LLMs.txt generation');
|
|
1125
|
+
if (!response.success) {
|
|
1126
|
+
throw new Error(response.error || 'LLMs.txt generation failed');
|
|
1127
|
+
}
|
|
1128
|
+
// Log performance metrics
|
|
1129
|
+
server.sendLoggingMessage({
|
|
1130
|
+
level: 'info',
|
|
1131
|
+
data: `LLMs.txt generation completed in ${Date.now() - generateStartTime}ms`,
|
|
1132
|
+
});
|
|
1133
|
+
// Format the response
|
|
1134
|
+
let resultText = '';
|
|
1135
|
+
if ('data' in response) {
|
|
1136
|
+
resultText = `LLMs.txt content:\n\n${response.data.llmstxt}`;
|
|
1137
|
+
if (args.showFullText && response.data.llmsfulltxt) {
|
|
1138
|
+
resultText += `\n\nLLMs-full.txt content:\n\n${response.data.llmsfulltxt}`;
|
|
1139
|
+
}
|
|
1140
|
+
}
|
|
1141
|
+
return {
|
|
1142
|
+
content: [{ type: 'text', text: resultText }],
|
|
1143
|
+
isError: false,
|
|
1144
|
+
};
|
|
1145
|
+
}
|
|
1146
|
+
catch (error) {
|
|
1147
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1148
|
+
return {
|
|
1149
|
+
content: [{ type: 'text', text: errorMessage }],
|
|
1150
|
+
isError: true,
|
|
1151
|
+
};
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1059
1154
|
default:
|
|
1060
1155
|
return {
|
|
1061
1156
|
content: [{ type: 'text', text: `Unknown tool: ${name}` }],
|
|
@@ -1104,6 +1199,10 @@ ${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
|
|
|
1104
1199
|
})
|
|
1105
1200
|
.join('\n\n');
|
|
1106
1201
|
}
|
|
1202
|
+
// Add type guard for credit usage
|
|
1203
|
+
function hasCredits(response) {
|
|
1204
|
+
return 'creditsUsed' in response && typeof response.creditsUsed === 'number';
|
|
1205
|
+
}
|
|
1107
1206
|
// Server startup
|
|
1108
1207
|
async function runServer() {
|
|
1109
1208
|
try {
|
|
@@ -1130,7 +1229,3 @@ runServer().catch((error) => {
|
|
|
1130
1229
|
console.error('Fatal error running server:', error);
|
|
1131
1230
|
process.exit(1);
|
|
1132
1231
|
});
|
|
1133
|
-
// Add type guard for credit usage
|
|
1134
|
-
function hasCredits(response) {
|
|
1135
|
-
return 'creditsUsed' in response && typeof response.creditsUsed === 'number';
|
|
1136
|
-
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.0",
|
|
4
4
|
"description": "MCP server for FireCrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|