firecrawl-mcp 1.7.2 → 1.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +90 -14
- package/dist/index.js +123 -231
- package/dist/index.test.js +1 -1
- package/package.json +7 -5
- package/dist/jest.setup.js +0 -58
- package/dist/src/index.js +0 -1053
- package/dist/src/index.test.js +0 -225
package/dist/index.js
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
3
3
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
|
+
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
|
|
4
5
|
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
5
6
|
import FirecrawlApp from '@mendable/firecrawl-js';
|
|
6
|
-
import
|
|
7
|
+
import express from 'express';
|
|
7
8
|
import dotenv from 'dotenv';
|
|
8
9
|
dotenv.config();
|
|
9
10
|
// Tool definitions
|
|
@@ -33,6 +34,7 @@ const SCRAPE_TOOL = {
|
|
|
33
34
|
'extract',
|
|
34
35
|
],
|
|
35
36
|
},
|
|
37
|
+
default: ['markdown'],
|
|
36
38
|
description: "Content formats to extract (default: ['markdown'])",
|
|
37
39
|
},
|
|
38
40
|
onlyMainContent: {
|
|
@@ -303,69 +305,6 @@ const CRAWL_TOOL = {
|
|
|
303
305
|
required: ['url'],
|
|
304
306
|
},
|
|
305
307
|
};
|
|
306
|
-
const BATCH_SCRAPE_TOOL = {
|
|
307
|
-
name: 'firecrawl_batch_scrape',
|
|
308
|
-
description: 'Scrape multiple URLs in batch mode. Returns a job ID that can be used to check status.',
|
|
309
|
-
inputSchema: {
|
|
310
|
-
type: 'object',
|
|
311
|
-
properties: {
|
|
312
|
-
urls: {
|
|
313
|
-
type: 'array',
|
|
314
|
-
items: { type: 'string' },
|
|
315
|
-
description: 'List of URLs to scrape',
|
|
316
|
-
},
|
|
317
|
-
options: {
|
|
318
|
-
type: 'object',
|
|
319
|
-
properties: {
|
|
320
|
-
formats: {
|
|
321
|
-
type: 'array',
|
|
322
|
-
items: {
|
|
323
|
-
type: 'string',
|
|
324
|
-
enum: [
|
|
325
|
-
'markdown',
|
|
326
|
-
'html',
|
|
327
|
-
'rawHtml',
|
|
328
|
-
'screenshot',
|
|
329
|
-
'links',
|
|
330
|
-
'screenshot@fullPage',
|
|
331
|
-
'extract',
|
|
332
|
-
],
|
|
333
|
-
},
|
|
334
|
-
},
|
|
335
|
-
onlyMainContent: {
|
|
336
|
-
type: 'boolean',
|
|
337
|
-
},
|
|
338
|
-
includeTags: {
|
|
339
|
-
type: 'array',
|
|
340
|
-
items: { type: 'string' },
|
|
341
|
-
},
|
|
342
|
-
excludeTags: {
|
|
343
|
-
type: 'array',
|
|
344
|
-
items: { type: 'string' },
|
|
345
|
-
},
|
|
346
|
-
waitFor: {
|
|
347
|
-
type: 'number',
|
|
348
|
-
},
|
|
349
|
-
},
|
|
350
|
-
},
|
|
351
|
-
},
|
|
352
|
-
required: ['urls'],
|
|
353
|
-
},
|
|
354
|
-
};
|
|
355
|
-
const CHECK_BATCH_STATUS_TOOL = {
|
|
356
|
-
name: 'firecrawl_check_batch_status',
|
|
357
|
-
description: 'Check the status of a batch scraping job.',
|
|
358
|
-
inputSchema: {
|
|
359
|
-
type: 'object',
|
|
360
|
-
properties: {
|
|
361
|
-
id: {
|
|
362
|
-
type: 'string',
|
|
363
|
-
description: 'Batch job ID to check',
|
|
364
|
-
},
|
|
365
|
-
},
|
|
366
|
-
required: ['id'],
|
|
367
|
-
},
|
|
368
|
-
};
|
|
369
308
|
const CHECK_CRAWL_STATUS_TOOL = {
|
|
370
309
|
name: 'firecrawl_check_crawl_status',
|
|
371
310
|
description: 'Check the status of a crawl job.',
|
|
@@ -559,13 +498,6 @@ function isCrawlOptions(args) {
|
|
|
559
498
|
'url' in args &&
|
|
560
499
|
typeof args.url === 'string');
|
|
561
500
|
}
|
|
562
|
-
function isBatchScrapeOptions(args) {
|
|
563
|
-
return (typeof args === 'object' &&
|
|
564
|
-
args !== null &&
|
|
565
|
-
'urls' in args &&
|
|
566
|
-
Array.isArray(args.urls) &&
|
|
567
|
-
args.urls.every((url) => typeof url === 'string'));
|
|
568
|
-
}
|
|
569
501
|
function isStatusCheckOptions(args) {
|
|
570
502
|
return (typeof args === 'object' &&
|
|
571
503
|
args !== null &&
|
|
@@ -605,15 +537,13 @@ const server = new Server({
|
|
|
605
537
|
const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL;
|
|
606
538
|
const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
|
|
607
539
|
// Check if API key is required (only for cloud service)
|
|
608
|
-
if (
|
|
540
|
+
if (process.env.CLOUD_SERVICE !== 'true' &&
|
|
541
|
+
!FIRECRAWL_API_URL &&
|
|
542
|
+
!FIRECRAWL_API_KEY) {
|
|
609
543
|
console.error('Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service');
|
|
610
544
|
process.exit(1);
|
|
611
545
|
}
|
|
612
|
-
// Initialize
|
|
613
|
-
const client = new FirecrawlApp({
|
|
614
|
-
apiKey: FIRECRAWL_API_KEY || '',
|
|
615
|
-
...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
|
|
616
|
-
});
|
|
546
|
+
// Initialize Firecrawl client with optional API URL
|
|
617
547
|
// Configuration for retries and monitoring
|
|
618
548
|
const CONFIG = {
|
|
619
549
|
retry: {
|
|
@@ -627,10 +557,6 @@ const CONFIG = {
|
|
|
627
557
|
criticalThreshold: Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100,
|
|
628
558
|
},
|
|
629
559
|
};
|
|
630
|
-
const creditUsage = {
|
|
631
|
-
total: 0,
|
|
632
|
-
lastCheck: Date.now(),
|
|
633
|
-
};
|
|
634
560
|
// Add utility function for delay
|
|
635
561
|
function delay(ms) {
|
|
636
562
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
@@ -664,58 +590,12 @@ async function withRetry(operation, context, attempt = 1) {
|
|
|
664
590
|
throw error;
|
|
665
591
|
}
|
|
666
592
|
}
|
|
667
|
-
// Add credit monitoring
|
|
668
|
-
async function updateCreditUsage(creditsUsed) {
|
|
669
|
-
creditUsage.total += creditsUsed;
|
|
670
|
-
// Log credit usage
|
|
671
|
-
safeLog('info', `Credit usage: ${creditUsage.total} credits used total`);
|
|
672
|
-
// Check thresholds
|
|
673
|
-
if (creditUsage.total >= CONFIG.credit.criticalThreshold) {
|
|
674
|
-
safeLog('error', `CRITICAL: Credit usage has reached ${creditUsage.total}`);
|
|
675
|
-
}
|
|
676
|
-
else if (creditUsage.total >= CONFIG.credit.warningThreshold) {
|
|
677
|
-
safeLog('warning', `WARNING: Credit usage has reached ${creditUsage.total}`);
|
|
678
|
-
}
|
|
679
|
-
}
|
|
680
|
-
// Initialize queue system
|
|
681
|
-
const batchQueue = new PQueue({ concurrency: 1 });
|
|
682
|
-
const batchOperations = new Map();
|
|
683
|
-
let operationCounter = 0;
|
|
684
|
-
async function processBatchOperation(operation) {
|
|
685
|
-
try {
|
|
686
|
-
operation.status = 'processing';
|
|
687
|
-
let totalCreditsUsed = 0;
|
|
688
|
-
// Use library's built-in batch processing
|
|
689
|
-
const response = await withRetry(async () => client.asyncBatchScrapeUrls(operation.urls, operation.options), `batch ${operation.id} processing`);
|
|
690
|
-
if (!response.success) {
|
|
691
|
-
throw new Error(response.error || 'Batch operation failed');
|
|
692
|
-
}
|
|
693
|
-
// Track credits if using cloud API
|
|
694
|
-
if (!FIRECRAWL_API_URL && hasCredits(response)) {
|
|
695
|
-
totalCreditsUsed += response.creditsUsed;
|
|
696
|
-
await updateCreditUsage(response.creditsUsed);
|
|
697
|
-
}
|
|
698
|
-
operation.status = 'completed';
|
|
699
|
-
operation.result = response;
|
|
700
|
-
// Log final credit usage for the batch
|
|
701
|
-
if (!FIRECRAWL_API_URL) {
|
|
702
|
-
safeLog('info', `Batch ${operation.id} completed. Total credits used: ${totalCreditsUsed}`);
|
|
703
|
-
}
|
|
704
|
-
}
|
|
705
|
-
catch (error) {
|
|
706
|
-
operation.status = 'failed';
|
|
707
|
-
operation.error = error instanceof Error ? error.message : String(error);
|
|
708
|
-
safeLog('error', `Batch ${operation.id} failed: ${operation.error}`);
|
|
709
|
-
}
|
|
710
|
-
}
|
|
711
593
|
// Tool handlers
|
|
712
594
|
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
713
595
|
tools: [
|
|
714
596
|
SCRAPE_TOOL,
|
|
715
597
|
MAP_TOOL,
|
|
716
598
|
CRAWL_TOOL,
|
|
717
|
-
BATCH_SCRAPE_TOOL,
|
|
718
|
-
CHECK_BATCH_STATUS_TOOL,
|
|
719
599
|
CHECK_CRAWL_STATUS_TOOL,
|
|
720
600
|
SEARCH_TOOL,
|
|
721
601
|
EXTRACT_TOOL,
|
|
@@ -727,6 +607,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
727
607
|
const startTime = Date.now();
|
|
728
608
|
try {
|
|
729
609
|
const { name, arguments: args } = request.params;
|
|
610
|
+
const apiKey = process.env.CLOUD_SERVICE
|
|
611
|
+
? request.params._meta?.apiKey
|
|
612
|
+
: FIRECRAWL_API_KEY;
|
|
613
|
+
if (process.env.CLOUD_SERVICE && !apiKey) {
|
|
614
|
+
throw new Error('No API key provided');
|
|
615
|
+
}
|
|
616
|
+
const client = new FirecrawlApp({
|
|
617
|
+
apiKey,
|
|
618
|
+
...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
|
|
619
|
+
});
|
|
730
620
|
// Log incoming request with timestamp
|
|
731
621
|
safeLog('info', `[${new Date().toISOString()}] Received request for tool: ${name}`);
|
|
732
622
|
if (!args) {
|
|
@@ -741,8 +631,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
741
631
|
try {
|
|
742
632
|
const scrapeStartTime = Date.now();
|
|
743
633
|
safeLog('info', `Starting scrape for URL: ${url} with options: ${JSON.stringify(options)}`);
|
|
744
|
-
|
|
745
|
-
|
|
634
|
+
const response = await client.scrapeUrl(url, {
|
|
635
|
+
...options,
|
|
636
|
+
// @ts-expect-error Extended API options including origin
|
|
637
|
+
origin: 'mcp-server',
|
|
638
|
+
});
|
|
746
639
|
// Log performance metrics
|
|
747
640
|
safeLog('info', `Scrape completed in ${Date.now() - scrapeStartTime}ms`);
|
|
748
641
|
if ('success' in response && !response.success) {
|
|
@@ -799,13 +692,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
799
692
|
throw new Error('Invalid arguments for firecrawl_map');
|
|
800
693
|
}
|
|
801
694
|
const { url, ...options } = args;
|
|
802
|
-
|
|
803
|
-
|
|
695
|
+
const response = await client.mapUrl(url, {
|
|
696
|
+
...options,
|
|
697
|
+
// @ts-expect-error Extended API options including origin
|
|
698
|
+
origin: 'mcp-server',
|
|
699
|
+
});
|
|
804
700
|
if ('error' in response) {
|
|
805
701
|
throw new Error(response.error);
|
|
806
702
|
}
|
|
807
703
|
if (!response.links) {
|
|
808
|
-
throw new Error('No links received from
|
|
704
|
+
throw new Error('No links received from Firecrawl API');
|
|
809
705
|
}
|
|
810
706
|
return {
|
|
811
707
|
content: [
|
|
@@ -814,89 +710,17 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
814
710
|
isError: false,
|
|
815
711
|
};
|
|
816
712
|
}
|
|
817
|
-
case 'firecrawl_batch_scrape': {
|
|
818
|
-
if (!isBatchScrapeOptions(args)) {
|
|
819
|
-
throw new Error('Invalid arguments for firecrawl_batch_scrape');
|
|
820
|
-
}
|
|
821
|
-
try {
|
|
822
|
-
const operationId = `batch_${++operationCounter}`;
|
|
823
|
-
const operation = {
|
|
824
|
-
id: operationId,
|
|
825
|
-
urls: args.urls,
|
|
826
|
-
options: args.options,
|
|
827
|
-
status: 'pending',
|
|
828
|
-
progress: {
|
|
829
|
-
completed: 0,
|
|
830
|
-
total: args.urls.length,
|
|
831
|
-
},
|
|
832
|
-
};
|
|
833
|
-
batchOperations.set(operationId, operation);
|
|
834
|
-
// Queue the operation
|
|
835
|
-
batchQueue.add(() => processBatchOperation(operation));
|
|
836
|
-
safeLog('info', `Queued batch operation ${operationId} with ${args.urls.length} URLs`);
|
|
837
|
-
return {
|
|
838
|
-
content: [
|
|
839
|
-
{
|
|
840
|
-
type: 'text',
|
|
841
|
-
text: trimResponseText(`Batch operation queued with ID: ${operationId}. Use firecrawl_check_batch_status to check progress.`),
|
|
842
|
-
},
|
|
843
|
-
],
|
|
844
|
-
isError: false,
|
|
845
|
-
};
|
|
846
|
-
}
|
|
847
|
-
catch (error) {
|
|
848
|
-
const errorMessage = error instanceof Error
|
|
849
|
-
? error.message
|
|
850
|
-
: `Batch operation failed: ${JSON.stringify(error)}`;
|
|
851
|
-
return {
|
|
852
|
-
content: [{ type: 'text', text: trimResponseText(errorMessage) }],
|
|
853
|
-
isError: true,
|
|
854
|
-
};
|
|
855
|
-
}
|
|
856
|
-
}
|
|
857
|
-
case 'firecrawl_check_batch_status': {
|
|
858
|
-
if (!isStatusCheckOptions(args)) {
|
|
859
|
-
throw new Error('Invalid arguments for firecrawl_check_batch_status');
|
|
860
|
-
}
|
|
861
|
-
const operation = batchOperations.get(args.id);
|
|
862
|
-
if (!operation) {
|
|
863
|
-
return {
|
|
864
|
-
content: [
|
|
865
|
-
{
|
|
866
|
-
type: 'text',
|
|
867
|
-
text: trimResponseText(`No batch operation found with ID: ${args.id}`),
|
|
868
|
-
},
|
|
869
|
-
],
|
|
870
|
-
isError: true,
|
|
871
|
-
};
|
|
872
|
-
}
|
|
873
|
-
const status = `Batch Status:
|
|
874
|
-
Status: ${operation.status}
|
|
875
|
-
Progress: ${operation.progress.completed}/${operation.progress.total}
|
|
876
|
-
${operation.error ? `Error: ${operation.error}` : ''}
|
|
877
|
-
${operation.result
|
|
878
|
-
? `Results: ${JSON.stringify(operation.result, null, 2)}`
|
|
879
|
-
: ''}`;
|
|
880
|
-
return {
|
|
881
|
-
content: [{ type: 'text', text: trimResponseText(status) }],
|
|
882
|
-
isError: false,
|
|
883
|
-
};
|
|
884
|
-
}
|
|
885
713
|
case 'firecrawl_crawl': {
|
|
886
714
|
if (!isCrawlOptions(args)) {
|
|
887
715
|
throw new Error('Invalid arguments for firecrawl_crawl');
|
|
888
716
|
}
|
|
889
717
|
const { url, ...options } = args;
|
|
890
|
-
const response = await withRetry(
|
|
891
|
-
|
|
892
|
-
|
|
718
|
+
const response = await withRetry(async () =>
|
|
719
|
+
// @ts-expect-error Extended API options including origin
|
|
720
|
+
client.asyncCrawlUrl(url, { ...options, origin: 'mcp-server' }), 'crawl operation');
|
|
893
721
|
if (!response.success) {
|
|
894
722
|
throw new Error(response.error);
|
|
895
723
|
}
|
|
896
|
-
// Monitor credits for cloud API
|
|
897
|
-
if (!FIRECRAWL_API_URL && hasCredits(response)) {
|
|
898
|
-
await updateCreditUsage(response.creditsUsed);
|
|
899
|
-
}
|
|
900
724
|
return {
|
|
901
725
|
content: [
|
|
902
726
|
{
|
|
@@ -935,10 +759,6 @@ ${response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''}
|
|
|
935
759
|
if (!response.success) {
|
|
936
760
|
throw new Error(`Search failed: ${response.error || 'Unknown error'}`);
|
|
937
761
|
}
|
|
938
|
-
// Monitor credits for cloud API
|
|
939
|
-
if (!FIRECRAWL_API_URL && hasCredits(response)) {
|
|
940
|
-
await updateCreditUsage(response.creditsUsed);
|
|
941
|
-
}
|
|
942
762
|
// Format the results
|
|
943
763
|
const results = response.data
|
|
944
764
|
.map((result) => `URL: ${result.url}
|
|
@@ -986,10 +806,6 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
|
986
806
|
throw new Error(extractResponse.error || 'Extraction failed');
|
|
987
807
|
}
|
|
988
808
|
const response = extractResponse;
|
|
989
|
-
// Monitor credits for cloud API
|
|
990
|
-
if (!FIRECRAWL_API_URL && hasCredits(response)) {
|
|
991
|
-
await updateCreditUsage(response.creditsUsed || 0);
|
|
992
|
-
}
|
|
993
809
|
// Log performance metrics
|
|
994
810
|
safeLog('info', `Extraction completed in ${Date.now() - extractStartTime}ms`);
|
|
995
811
|
// Add warning to response if present
|
|
@@ -1040,7 +856,7 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
|
1040
856
|
maxDepth: args.maxDepth,
|
|
1041
857
|
timeLimit: args.timeLimit,
|
|
1042
858
|
maxUrls: args.maxUrls,
|
|
1043
|
-
|
|
859
|
+
// @ts-expect-error Extended API options including origin
|
|
1044
860
|
origin: 'mcp-server',
|
|
1045
861
|
},
|
|
1046
862
|
// Activity callback
|
|
@@ -1089,9 +905,9 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
|
1089
905
|
const generateStartTime = Date.now();
|
|
1090
906
|
safeLog('info', `Starting LLMs.txt generation for URL: ${url}`);
|
|
1091
907
|
// Start the generation process
|
|
1092
|
-
const response = await withRetry(
|
|
1093
|
-
|
|
1094
|
-
|
|
908
|
+
const response = await withRetry(async () =>
|
|
909
|
+
// @ts-expect-error Extended API options including origin
|
|
910
|
+
client.generateLLMsText(url, { ...params, origin: 'mcp-server' }), 'LLMs.txt generation');
|
|
1095
911
|
if (!response.success) {
|
|
1096
912
|
throw new Error(response.error || 'LLMs.txt generation failed');
|
|
1097
913
|
}
|
|
@@ -1162,19 +978,15 @@ ${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
|
|
|
1162
978
|
})
|
|
1163
979
|
.join('\n\n');
|
|
1164
980
|
}
|
|
1165
|
-
// Add type guard for credit usage
|
|
1166
|
-
function hasCredits(response) {
|
|
1167
|
-
return 'creditsUsed' in response && typeof response.creditsUsed === 'number';
|
|
1168
|
-
}
|
|
1169
981
|
// Utility function to trim trailing whitespace from text responses
|
|
1170
982
|
// This prevents Claude API errors with "final assistant content cannot end with trailing whitespace"
|
|
1171
983
|
function trimResponseText(text) {
|
|
1172
984
|
return text.trim();
|
|
1173
985
|
}
|
|
1174
986
|
// Server startup
|
|
1175
|
-
async function
|
|
987
|
+
async function runLocalServer() {
|
|
1176
988
|
try {
|
|
1177
|
-
console.error('Initializing
|
|
989
|
+
console.error('Initializing Firecrawl MCP Server...');
|
|
1178
990
|
const transport = new StdioServerTransport();
|
|
1179
991
|
// Detect if we're using stdio transport
|
|
1180
992
|
isStdioTransport = transport instanceof StdioServerTransport;
|
|
@@ -1183,16 +995,96 @@ async function runServer() {
|
|
|
1183
995
|
}
|
|
1184
996
|
await server.connect(transport);
|
|
1185
997
|
// Now that we're connected, we can send logging messages
|
|
1186
|
-
safeLog('info', '
|
|
998
|
+
safeLog('info', 'Firecrawl MCP Server initialized successfully');
|
|
1187
999
|
safeLog('info', `Configuration: API URL: ${FIRECRAWL_API_URL || 'default'}`);
|
|
1188
|
-
console.error('
|
|
1000
|
+
console.error('Firecrawl MCP Server running on stdio');
|
|
1189
1001
|
}
|
|
1190
1002
|
catch (error) {
|
|
1191
1003
|
console.error('Fatal error running server:', error);
|
|
1192
1004
|
process.exit(1);
|
|
1193
1005
|
}
|
|
1194
1006
|
}
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1007
|
+
async function runSSELocalServer() {
|
|
1008
|
+
let transport = null;
|
|
1009
|
+
const app = express();
|
|
1010
|
+
app.get('/sse', async (req, res) => {
|
|
1011
|
+
transport = new SSEServerTransport(`/messages`, res);
|
|
1012
|
+
res.on('close', () => {
|
|
1013
|
+
transport = null;
|
|
1014
|
+
});
|
|
1015
|
+
await server.connect(transport);
|
|
1016
|
+
});
|
|
1017
|
+
// Endpoint for the client to POST messages
|
|
1018
|
+
// Remove express.json() middleware - let the transport handle the body
|
|
1019
|
+
app.post('/messages', (req, res) => {
|
|
1020
|
+
if (transport) {
|
|
1021
|
+
transport.handlePostMessage(req, res);
|
|
1022
|
+
}
|
|
1023
|
+
});
|
|
1024
|
+
}
|
|
1025
|
+
async function runSSECloudServer() {
|
|
1026
|
+
const transports = {};
|
|
1027
|
+
const app = express();
|
|
1028
|
+
app.get('/:apiKey/sse', async (req, res) => {
|
|
1029
|
+
const apiKey = req.params.apiKey;
|
|
1030
|
+
const transport = new SSEServerTransport(`/${apiKey}/messages`, res);
|
|
1031
|
+
//todo: validate api key, close if invalid
|
|
1032
|
+
const compositeKey = `${apiKey}-${transport.sessionId}`;
|
|
1033
|
+
transports[compositeKey] = transport;
|
|
1034
|
+
res.on('close', () => {
|
|
1035
|
+
delete transports[compositeKey];
|
|
1036
|
+
});
|
|
1037
|
+
await server.connect(transport);
|
|
1038
|
+
});
|
|
1039
|
+
// Endpoint for the client to POST messages
|
|
1040
|
+
// Remove express.json() middleware - let the transport handle the body
|
|
1041
|
+
app.post('/:apiKey/messages', express.json(), async (req, res) => {
|
|
1042
|
+
const apiKey = req.params.apiKey;
|
|
1043
|
+
const body = req.body;
|
|
1044
|
+
const enrichedBody = {
|
|
1045
|
+
...body,
|
|
1046
|
+
};
|
|
1047
|
+
if (enrichedBody && enrichedBody.params && !enrichedBody.params._meta) {
|
|
1048
|
+
enrichedBody.params._meta = { apiKey };
|
|
1049
|
+
}
|
|
1050
|
+
else if (enrichedBody &&
|
|
1051
|
+
enrichedBody.params &&
|
|
1052
|
+
enrichedBody.params._meta) {
|
|
1053
|
+
enrichedBody.params._meta.apiKey = apiKey;
|
|
1054
|
+
}
|
|
1055
|
+
console.log('enrichedBody', enrichedBody);
|
|
1056
|
+
const sessionId = req.query.sessionId;
|
|
1057
|
+
const compositeKey = `${apiKey}-${sessionId}`;
|
|
1058
|
+
const transport = transports[compositeKey];
|
|
1059
|
+
if (transport) {
|
|
1060
|
+
await transport.handlePostMessage(req, res, enrichedBody);
|
|
1061
|
+
}
|
|
1062
|
+
else {
|
|
1063
|
+
res.status(400).send('No transport found for sessionId');
|
|
1064
|
+
}
|
|
1065
|
+
});
|
|
1066
|
+
const PORT = 3000;
|
|
1067
|
+
app.listen(PORT, () => {
|
|
1068
|
+
console.log(`MCP SSE Server listening on http://localhost:${PORT}`);
|
|
1069
|
+
console.log(`SSE endpoint: http://localhost:${PORT}/sse`);
|
|
1070
|
+
console.log(`Message endpoint: http://localhost:${PORT}/messages`);
|
|
1071
|
+
});
|
|
1072
|
+
}
|
|
1073
|
+
if (process.env.CLOUD_SERVICE === 'true') {
|
|
1074
|
+
runSSECloudServer().catch((error) => {
|
|
1075
|
+
console.error('Fatal error running server:', error);
|
|
1076
|
+
process.exit(1);
|
|
1077
|
+
});
|
|
1078
|
+
}
|
|
1079
|
+
else if (process.env.SSE_LOCAL === 'true') {
|
|
1080
|
+
runSSELocalServer().catch((error) => {
|
|
1081
|
+
console.error('Fatal error running server:', error);
|
|
1082
|
+
process.exit(1);
|
|
1083
|
+
});
|
|
1084
|
+
}
|
|
1085
|
+
else {
|
|
1086
|
+
runLocalServer().catch((error) => {
|
|
1087
|
+
console.error('Fatal error running server:', error);
|
|
1088
|
+
process.exit(1);
|
|
1089
|
+
});
|
|
1090
|
+
}
|
package/dist/index.test.js
CHANGED
|
@@ -3,7 +3,7 @@ import { describe, expect, jest, test, beforeEach, afterEach, } from '@jest/glob
|
|
|
3
3
|
import { mock } from 'jest-mock-extended';
|
|
4
4
|
// Mock FirecrawlApp
|
|
5
5
|
jest.mock('@mendable/firecrawl-js');
|
|
6
|
-
describe('
|
|
6
|
+
describe('Firecrawl Tool Tests', () => {
|
|
7
7
|
let mockClient;
|
|
8
8
|
let requestHandler;
|
|
9
9
|
beforeEach(() => {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "1.7.
|
|
4
|
-
"description": "MCP server for
|
|
3
|
+
"version": "1.7.3",
|
|
4
|
+
"description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"firecrawl-mcp": "dist/index.js"
|
|
@@ -22,16 +22,18 @@
|
|
|
22
22
|
"prepare": "npm run build",
|
|
23
23
|
"publish": "npm run build && npm publish"
|
|
24
24
|
},
|
|
25
|
-
"license": "
|
|
25
|
+
"license": "MIT",
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"@mendable/firecrawl-js": "^1.19.0",
|
|
28
28
|
"@modelcontextprotocol/sdk": "^1.4.1",
|
|
29
29
|
"dotenv": "^16.4.7",
|
|
30
|
-
"
|
|
31
|
-
"shx": "^0.3.4"
|
|
30
|
+
"express": "^5.1.0",
|
|
31
|
+
"shx": "^0.3.4",
|
|
32
|
+
"ws": "^8.18.1"
|
|
32
33
|
},
|
|
33
34
|
"devDependencies": {
|
|
34
35
|
"@jest/globals": "^29.7.0",
|
|
36
|
+
"@types/express": "^5.0.1",
|
|
35
37
|
"@types/jest": "^29.5.14",
|
|
36
38
|
"@types/node": "^20.10.5",
|
|
37
39
|
"@typescript-eslint/eslint-plugin": "^7.0.0",
|
package/dist/jest.setup.js
DELETED
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
import { jest } from '@jest/globals';
|
|
2
|
-
// Set test timeout
|
|
3
|
-
jest.setTimeout(30000);
|
|
4
|
-
// Create mock responses
|
|
5
|
-
const mockSearchResponse = {
|
|
6
|
-
success: true,
|
|
7
|
-
data: [
|
|
8
|
-
{
|
|
9
|
-
url: 'https://example.com',
|
|
10
|
-
title: 'Test Page',
|
|
11
|
-
description: 'Test Description',
|
|
12
|
-
markdown: '# Test Content',
|
|
13
|
-
actions: null,
|
|
14
|
-
},
|
|
15
|
-
],
|
|
16
|
-
};
|
|
17
|
-
const mockBatchScrapeResponse = {
|
|
18
|
-
success: true,
|
|
19
|
-
id: 'test-batch-id',
|
|
20
|
-
};
|
|
21
|
-
const mockBatchStatusResponse = {
|
|
22
|
-
success: true,
|
|
23
|
-
status: 'completed',
|
|
24
|
-
completed: 1,
|
|
25
|
-
total: 1,
|
|
26
|
-
creditsUsed: 1,
|
|
27
|
-
expiresAt: new Date(),
|
|
28
|
-
data: [
|
|
29
|
-
{
|
|
30
|
-
url: 'https://example.com',
|
|
31
|
-
title: 'Test Page',
|
|
32
|
-
description: 'Test Description',
|
|
33
|
-
markdown: '# Test Content',
|
|
34
|
-
actions: null,
|
|
35
|
-
},
|
|
36
|
-
],
|
|
37
|
-
};
|
|
38
|
-
// Create mock instance methods
|
|
39
|
-
const mockSearch = jest.fn().mockImplementation(async () => mockSearchResponse);
|
|
40
|
-
const mockAsyncBatchScrapeUrls = jest
|
|
41
|
-
.fn()
|
|
42
|
-
.mockImplementation(async () => mockBatchScrapeResponse);
|
|
43
|
-
const mockCheckBatchScrapeStatus = jest
|
|
44
|
-
.fn()
|
|
45
|
-
.mockImplementation(async () => mockBatchStatusResponse);
|
|
46
|
-
// Create mock instance
|
|
47
|
-
const mockInstance = {
|
|
48
|
-
apiKey: 'test-api-key',
|
|
49
|
-
apiUrl: 'test-api-url',
|
|
50
|
-
search: mockSearch,
|
|
51
|
-
asyncBatchScrapeUrls: mockAsyncBatchScrapeUrls,
|
|
52
|
-
checkBatchScrapeStatus: mockCheckBatchScrapeStatus,
|
|
53
|
-
};
|
|
54
|
-
// Mock the module
|
|
55
|
-
jest.mock('@mendable/firecrawl-js', () => ({
|
|
56
|
-
__esModule: true,
|
|
57
|
-
default: jest.fn().mockImplementation(() => mockInstance),
|
|
58
|
-
}));
|