npm - firecrawl-mcp - Versions diffs - 1.7.2 → 1.7.3 - Mend

firecrawl-mcp 1.7.2 → 1.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.js CHANGED Viewed

@@ -1,9 +1,10 @@
 #!/usr/bin/env node
 import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
+import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
 import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
 import FirecrawlApp from '@mendable/firecrawl-js';
-import PQueue from 'p-queue';
+import express from 'express';
 import dotenv from 'dotenv';
 dotenv.config();
 // Tool definitions
@@ -33,6 +34,7 @@ const SCRAPE_TOOL = {
                         'extract',
                     ],
                 },
+                default: ['markdown'],
                 description: "Content formats to extract (default: ['markdown'])",
             },
             onlyMainContent: {
@@ -303,69 +305,6 @@ const CRAWL_TOOL = {
         required: ['url'],
     },
 };
-const BATCH_SCRAPE_TOOL = {
-    name: 'firecrawl_batch_scrape',
-    description: 'Scrape multiple URLs in batch mode. Returns a job ID that can be used to check status.',
-    inputSchema: {
-        type: 'object',
-        properties: {
-            urls: {
-                type: 'array',
-                items: { type: 'string' },
-                description: 'List of URLs to scrape',
-            },
-            options: {
-                type: 'object',
-                properties: {
-                    formats: {
-                        type: 'array',
-                        items: {
-                            type: 'string',
-                            enum: [
-                                'markdown',
-                                'html',
-                                'rawHtml',
-                                'screenshot',
-                                'links',
-                                'screenshot@fullPage',
-                                'extract',
-                            ],
-                        },
-                    },
-                    onlyMainContent: {
-                        type: 'boolean',
-                    },
-                    includeTags: {
-                        type: 'array',
-                        items: { type: 'string' },
-                    },
-                    excludeTags: {
-                        type: 'array',
-                        items: { type: 'string' },
-                    },
-                    waitFor: {
-                        type: 'number',
-                    },
-                },
-            },
-        },
-        required: ['urls'],
-    },
-};
-const CHECK_BATCH_STATUS_TOOL = {
-    name: 'firecrawl_check_batch_status',
-    description: 'Check the status of a batch scraping job.',
-    inputSchema: {
-        type: 'object',
-        properties: {
-            id: {
-                type: 'string',
-                description: 'Batch job ID to check',
-            },
-        },
-        required: ['id'],
-    },
-};
 const CHECK_CRAWL_STATUS_TOOL = {
     name: 'firecrawl_check_crawl_status',
     description: 'Check the status of a crawl job.',
@@ -559,13 +498,6 @@ function isCrawlOptions(args) {
         'url' in args &&
         typeof args.url === 'string');
 }
-function isBatchScrapeOptions(args) {
-    return (typeof args === 'object' &&
-        args !== null &&
-        'urls' in args &&
-        Array.isArray(args.urls) &&
-        args.urls.every((url) => typeof url === 'string'));
-}
 function isStatusCheckOptions(args) {
     return (typeof args === 'object' &&
         args !== null &&
@@ -605,15 +537,13 @@ const server = new Server({
 const FIRECRAWL_API_URL = process.env.FIRECRAWL_API_URL;
 const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
 // Check if API key is required (only for cloud service)
-if (!FIRECRAWL_API_URL && !FIRECRAWL_API_KEY) {
+if (process.env.CLOUD_SERVICE !== 'true' &&
+    !FIRECRAWL_API_URL &&
+    !FIRECRAWL_API_KEY) {
     console.error('Error: FIRECRAWL_API_KEY environment variable is required when using the cloud service');
     process.exit(1);
 }
-// Initialize FireCrawl client with optional API URL
-const client = new FirecrawlApp({
-    apiKey: FIRECRAWL_API_KEY || '',
-    ...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
-});
+// Initialize Firecrawl client with optional API URL
 // Configuration for retries and monitoring
 const CONFIG = {
     retry: {
@@ -627,10 +557,6 @@ const CONFIG = {
         criticalThreshold: Number(process.env.FIRECRAWL_CREDIT_CRITICAL_THRESHOLD) || 100,
     },
 };
-const creditUsage = {
-    total: 0,
-    lastCheck: Date.now(),
-};
 // Add utility function for delay
 function delay(ms) {
     return new Promise((resolve) => setTimeout(resolve, ms));
@@ -664,58 +590,12 @@ async function withRetry(operation, context, attempt = 1) {
         throw error;
     }
 }
-// Add credit monitoring
-async function updateCreditUsage(creditsUsed) {
-    creditUsage.total += creditsUsed;
-    // Log credit usage
-    safeLog('info', `Credit usage: ${creditUsage.total} credits used total`);
-    // Check thresholds
-    if (creditUsage.total >= CONFIG.credit.criticalThreshold) {
-        safeLog('error', `CRITICAL: Credit usage has reached ${creditUsage.total}`);
-    }
-    else if (creditUsage.total >= CONFIG.credit.warningThreshold) {
-        safeLog('warning', `WARNING: Credit usage has reached ${creditUsage.total}`);
-    }
-}
-// Initialize queue system
-const batchQueue = new PQueue({ concurrency: 1 });
-const batchOperations = new Map();
-let operationCounter = 0;
-async function processBatchOperation(operation) {
-    try {
-        operation.status = 'processing';
-        let totalCreditsUsed = 0;
-        // Use library's built-in batch processing
-        const response = await withRetry(async () => client.asyncBatchScrapeUrls(operation.urls, operation.options), `batch ${operation.id} processing`);
-        if (!response.success) {
-            throw new Error(response.error || 'Batch operation failed');
-        }
-        // Track credits if using cloud API
-        if (!FIRECRAWL_API_URL && hasCredits(response)) {
-            totalCreditsUsed += response.creditsUsed;
-            await updateCreditUsage(response.creditsUsed);
-        }
-        operation.status = 'completed';
-        operation.result = response;
-        // Log final credit usage for the batch
-        if (!FIRECRAWL_API_URL) {
-            safeLog('info', `Batch ${operation.id} completed. Total credits used: ${totalCreditsUsed}`);
-        }
-    }
-    catch (error) {
-        operation.status = 'failed';
-        operation.error = error instanceof Error ? error.message : String(error);
-        safeLog('error', `Batch ${operation.id} failed: ${operation.error}`);
-    }
-}
 // Tool handlers
 server.setRequestHandler(ListToolsRequestSchema, async () => ({
     tools: [
         SCRAPE_TOOL,
         MAP_TOOL,
         CRAWL_TOOL,
-        BATCH_SCRAPE_TOOL,
-        CHECK_BATCH_STATUS_TOOL,
         CHECK_CRAWL_STATUS_TOOL,
         SEARCH_TOOL,
         EXTRACT_TOOL,
@@ -727,6 +607,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
     const startTime = Date.now();
     try {
         const { name, arguments: args } = request.params;
+        const apiKey = process.env.CLOUD_SERVICE
+            ? request.params._meta?.apiKey
+            : FIRECRAWL_API_KEY;
+        if (process.env.CLOUD_SERVICE && !apiKey) {
+            throw new Error('No API key provided');
+        }
+        const client = new FirecrawlApp({
+            apiKey,
+            ...(FIRECRAWL_API_URL ? { apiUrl: FIRECRAWL_API_URL } : {}),
+        });
         // Log incoming request with timestamp
         safeLog('info', `[${new Date().toISOString()}] Received request for tool: ${name}`);
         if (!args) {
@@ -741,8 +631,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
                 try {
                     const scrapeStartTime = Date.now();
                     safeLog('info', `Starting scrape for URL: ${url} with options: ${JSON.stringify(options)}`);
-                    //@ts-ignore
-                    const response = await client.scrapeUrl(url, { ...options, origin: 'mcp-server' });
+                    const response = await client.scrapeUrl(url, {
+                        ...options,
+                        // @ts-expect-error Extended API options including origin
+                        origin: 'mcp-server',
+                    });
                     // Log performance metrics
                     safeLog('info', `Scrape completed in ${Date.now() - scrapeStartTime}ms`);
                     if ('success' in response && !response.success) {
@@ -799,13 +692,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
                     throw new Error('Invalid arguments for firecrawl_map');
                 }
                 const { url, ...options } = args;
-                //@ts-ignore
-                const response = await client.mapUrl(url, { ...options, origin: 'mcp-server' });
+                const response = await client.mapUrl(url, {
+                    ...options,
+                    // @ts-expect-error Extended API options including origin
+                    origin: 'mcp-server',
+                });
                 if ('error' in response) {
                     throw new Error(response.error);
                 }
                 if (!response.links) {
-                    throw new Error('No links received from FireCrawl API');
+                    throw new Error('No links received from Firecrawl API');
                 }
                 return {
                     content: [
@@ -814,89 +710,17 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
                     isError: false,
                 };
             }
-            case 'firecrawl_batch_scrape': {
-                if (!isBatchScrapeOptions(args)) {
-                    throw new Error('Invalid arguments for firecrawl_batch_scrape');
-                }
-                try {
-                    const operationId = `batch_${++operationCounter}`;
-                    const operation = {
-                        id: operationId,
-                        urls: args.urls,
-                        options: args.options,
-                        status: 'pending',
-                        progress: {
-                            completed: 0,
-                            total: args.urls.length,
-                        },
-                    };
-                    batchOperations.set(operationId, operation);
-                    // Queue the operation
-                    batchQueue.add(() => processBatchOperation(operation));
-                    safeLog('info', `Queued batch operation ${operationId} with ${args.urls.length} URLs`);
-                    return {
-                        content: [
-                            {
-                                type: 'text',
-                                text: trimResponseText(`Batch operation queued with ID: ${operationId}. Use firecrawl_check_batch_status to check progress.`),
-                            },
-                        ],
-                        isError: false,
-                    };
-                }
-                catch (error) {
-                    const errorMessage = error instanceof Error
-                        ? error.message
-                        : `Batch operation failed: ${JSON.stringify(error)}`;
-                    return {
-                        content: [{ type: 'text', text: trimResponseText(errorMessage) }],
-                        isError: true,
-                    };
-                }
-            }
-            case 'firecrawl_check_batch_status': {
-                if (!isStatusCheckOptions(args)) {
-                    throw new Error('Invalid arguments for firecrawl_check_batch_status');
-                }
-                const operation = batchOperations.get(args.id);
-                if (!operation) {
-                    return {
-                        content: [
-                            {
-                                type: 'text',
-                                text: trimResponseText(`No batch operation found with ID: ${args.id}`),
-                            },
-                        ],
-                        isError: true,
-                    };
-                }
-                const status = `Batch Status:
-Status: ${operation.status}
-Progress: ${operation.progress.completed}/${operation.progress.total}
-${operation.error ? `Error: ${operation.error}` : ''}
-${operation.result
-                    ? `Results: ${JSON.stringify(operation.result, null, 2)}`
-                    : ''}`;
-                return {
-                    content: [{ type: 'text', text: trimResponseText(status) }],
-                    isError: false,
-                };
-            }
             case 'firecrawl_crawl': {
                 if (!isCrawlOptions(args)) {
                     throw new Error('Invalid arguments for firecrawl_crawl');
                 }
                 const { url, ...options } = args;
-                const response = await withRetry(
-                //@ts-ignore
-                async () => client.asyncCrawlUrl(url, { ...options, origin: 'mcp-server' }), 'crawl operation');
+                const response = await withRetry(async () =>
+                // @ts-expect-error Extended API options including origin
+                client.asyncCrawlUrl(url, { ...options, origin: 'mcp-server' }), 'crawl operation');
                 if (!response.success) {
                     throw new Error(response.error);
                 }
-                // Monitor credits for cloud API
-                if (!FIRECRAWL_API_URL && hasCredits(response)) {
-                    await updateCreditUsage(response.creditsUsed);
-                }
                 return {
                     content: [
                         {
@@ -935,10 +759,6 @@ ${response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''}
                     if (!response.success) {
                         throw new Error(`Search failed: ${response.error || 'Unknown error'}`);
                     }
-                    // Monitor credits for cloud API
-                    if (!FIRECRAWL_API_URL && hasCredits(response)) {
-                        await updateCreditUsage(response.creditsUsed);
-                    }
                     // Format the results
                     const results = response.data
                         .map((result) => `URL: ${result.url}
@@ -986,10 +806,6 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
                         throw new Error(extractResponse.error || 'Extraction failed');
                     }
                     const response = extractResponse;
-                    // Monitor credits for cloud API
-                    if (!FIRECRAWL_API_URL && hasCredits(response)) {
-                        await updateCreditUsage(response.creditsUsed || 0);
-                    }
                     // Log performance metrics
                     safeLog('info', `Extraction completed in ${Date.now() - extractStartTime}ms`);
                     // Add warning to response if present
@@ -1040,7 +856,7 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
                         maxDepth: args.maxDepth,
                         timeLimit: args.timeLimit,
                         maxUrls: args.maxUrls,
-                        //@ts-ignore
+                        // @ts-expect-error Extended API options including origin
                         origin: 'mcp-server',
                     },
                     // Activity callback
@@ -1089,9 +905,9 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
                     const generateStartTime = Date.now();
                     safeLog('info', `Starting LLMs.txt generation for URL: ${url}`);
                     // Start the generation process
-                    const response = await withRetry(
-                    //@ts-ignore
-                    async () => client.generateLLMsText(url, { ...params, origin: 'mcp-server' }), 'LLMs.txt generation');
+                    const response = await withRetry(async () =>
+                    // @ts-expect-error Extended API options including origin
+                    client.generateLLMsText(url, { ...params, origin: 'mcp-server' }), 'LLMs.txt generation');
                     if (!response.success) {
                         throw new Error(response.error || 'LLMs.txt generation failed');
                     }
@@ -1162,19 +978,15 @@ ${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
     })
         .join('\n\n');
 }
-// Add type guard for credit usage
-function hasCredits(response) {
-    return 'creditsUsed' in response && typeof response.creditsUsed === 'number';
-}
 // Utility function to trim trailing whitespace from text responses
 // This prevents Claude API errors with "final assistant content cannot end with trailing whitespace"
 function trimResponseText(text) {
     return text.trim();
 }
 // Server startup
-async function runServer() {
+async function runLocalServer() {
     try {
-        console.error('Initializing FireCrawl MCP Server...');
+        console.error('Initializing Firecrawl MCP Server...');
         const transport = new StdioServerTransport();
         // Detect if we're using stdio transport
         isStdioTransport = transport instanceof StdioServerTransport;
@@ -1183,16 +995,96 @@ async function runServer() {
         }
         await server.connect(transport);
         // Now that we're connected, we can send logging messages
-        safeLog('info', 'FireCrawl MCP Server initialized successfully');
+        safeLog('info', 'Firecrawl MCP Server initialized successfully');
         safeLog('info', `Configuration: API URL: ${FIRECRAWL_API_URL || 'default'}`);
-        console.error('FireCrawl MCP Server running on stdio');
+        console.error('Firecrawl MCP Server running on stdio');
     }
     catch (error) {
         console.error('Fatal error running server:', error);
         process.exit(1);
     }
 }
-runServer().catch((error) => {
-    console.error('Fatal error running server:', error);
-    process.exit(1);
-});
+async function runSSELocalServer() {
+    let transport = null;
+    const app = express();
+    app.get('/sse', async (req, res) => {
+        transport = new SSEServerTransport(`/messages`, res);
+        res.on('close', () => {
+            transport = null;
+        });
+        await server.connect(transport);
+    });
+    // Endpoint for the client to POST messages
+    // Remove express.json() middleware - let the transport handle the body
+    app.post('/messages', (req, res) => {
+        if (transport) {
+            transport.handlePostMessage(req, res);
+        }
+    });
+}
+async function runSSECloudServer() {
+    const transports = {};
+    const app = express();
+    app.get('/:apiKey/sse', async (req, res) => {
+        const apiKey = req.params.apiKey;
+        const transport = new SSEServerTransport(`/${apiKey}/messages`, res);
+        //todo: validate api key, close if invalid
+        const compositeKey = `${apiKey}-${transport.sessionId}`;
+        transports[compositeKey] = transport;
+        res.on('close', () => {
+            delete transports[compositeKey];
+        });
+        await server.connect(transport);
+    });
+    // Endpoint for the client to POST messages
+    // Remove express.json() middleware - let the transport handle the body
+    app.post('/:apiKey/messages', express.json(), async (req, res) => {
+        const apiKey = req.params.apiKey;
+        const body = req.body;
+        const enrichedBody = {
+            ...body,
+        };
+        if (enrichedBody && enrichedBody.params && !enrichedBody.params._meta) {
+            enrichedBody.params._meta = { apiKey };
+        }
+        else if (enrichedBody &&
+            enrichedBody.params &&
+            enrichedBody.params._meta) {
+            enrichedBody.params._meta.apiKey = apiKey;
+        }
+        console.log('enrichedBody', enrichedBody);
+        const sessionId = req.query.sessionId;
+        const compositeKey = `${apiKey}-${sessionId}`;
+        const transport = transports[compositeKey];
+        if (transport) {
+            await transport.handlePostMessage(req, res, enrichedBody);
+        }
+        else {
+            res.status(400).send('No transport found for sessionId');
+        }
+    });
+    const PORT = 3000;
+    app.listen(PORT, () => {
+        console.log(`MCP SSE Server listening on http://localhost:${PORT}`);
+        console.log(`SSE endpoint: http://localhost:${PORT}/sse`);
+        console.log(`Message endpoint: http://localhost:${PORT}/messages`);
+    });
+}
+if (process.env.CLOUD_SERVICE === 'true') {
+    runSSECloudServer().catch((error) => {
+        console.error('Fatal error running server:', error);
+        process.exit(1);
+    });
+}
+else if (process.env.SSE_LOCAL === 'true') {
+    runSSELocalServer().catch((error) => {
+        console.error('Fatal error running server:', error);
+        process.exit(1);
+    });
+}
+else {
+    runLocalServer().catch((error) => {
+        console.error('Fatal error running server:', error);
+        process.exit(1);
+    });
+}

package/dist/index.test.js CHANGED Viewed

@@ -3,7 +3,7 @@ import { describe, expect, jest, test, beforeEach, afterEach, } from '@jest/glob
 import { mock } from 'jest-mock-extended';
 // Mock FirecrawlApp
 jest.mock('@mendable/firecrawl-js');
-describe('FireCrawl Tool Tests', () => {
+describe('Firecrawl Tool Tests', () => {
     let mockClient;
     let requestHandler;
     beforeEach(() => {

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "firecrawl-mcp",
-  "version": "1.7.2",
-  "description": "MCP server for FireCrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
+  "version": "1.7.3",
+  "description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
   "type": "module",
   "bin": {
     "firecrawl-mcp": "dist/index.js"
@@ -22,16 +22,18 @@
     "prepare": "npm run build",
     "publish": "npm run build && npm publish"
   },
-  "license": "ISC",
+  "license": "MIT",
   "dependencies": {
     "@mendable/firecrawl-js": "^1.19.0",
     "@modelcontextprotocol/sdk": "^1.4.1",
     "dotenv": "^16.4.7",
-    "p-queue": "^8.0.1",
-    "shx": "^0.3.4"
+    "express": "^5.1.0",
+    "shx": "^0.3.4",
+    "ws": "^8.18.1"
   },
   "devDependencies": {
     "@jest/globals": "^29.7.0",
+    "@types/express": "^5.0.1",
     "@types/jest": "^29.5.14",
     "@types/node": "^20.10.5",
     "@typescript-eslint/eslint-plugin": "^7.0.0",

package/dist/jest.setup.js DELETED Viewed

@@ -1,58 +0,0 @@
-import { jest } from '@jest/globals';
-// Set test timeout
-jest.setTimeout(30000);
-// Create mock responses
-const mockSearchResponse = {
-    success: true,
-    data: [
-        {
-            url: 'https://example.com',
-            title: 'Test Page',
-            description: 'Test Description',
-            markdown: '# Test Content',
-            actions: null,
-        },
-    ],
-};
-const mockBatchScrapeResponse = {
-    success: true,
-    id: 'test-batch-id',
-};
-const mockBatchStatusResponse = {
-    success: true,
-    status: 'completed',
-    completed: 1,
-    total: 1,
-    creditsUsed: 1,
-    expiresAt: new Date(),
-    data: [
-        {
-            url: 'https://example.com',
-            title: 'Test Page',
-            description: 'Test Description',
-            markdown: '# Test Content',
-            actions: null,
-        },
-    ],
-};
-// Create mock instance methods
-const mockSearch = jest.fn().mockImplementation(async () => mockSearchResponse);
-const mockAsyncBatchScrapeUrls = jest
-    .fn()
-    .mockImplementation(async () => mockBatchScrapeResponse);
-const mockCheckBatchScrapeStatus = jest
-    .fn()
-    .mockImplementation(async () => mockBatchStatusResponse);
-// Create mock instance
-const mockInstance = {
-    apiKey: 'test-api-key',
-    apiUrl: 'test-api-url',
-    search: mockSearch,
-    asyncBatchScrapeUrls: mockAsyncBatchScrapeUrls,
-    checkBatchScrapeStatus: mockCheckBatchScrapeStatus,
-};
-// Mock the module
-jest.mock('@mendable/firecrawl-js', () => ({
-    __esModule: true,
-    default: jest.fn().mockImplementation(() => mockInstance),
-}));