npm - firecrawl-mcp - Versions diffs - 3.2.0 → 3.3.1 - Mend

firecrawl-mcp 3.2.0 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -21,6 +21,25 @@ A Model Context Protocol (MCP) server implementation that integrates with [Firec
 - Automatic retries and rate limiting
 - Cloud and self-hosted support
 - SSE support
+- **Context limit support for MCP compatibility**
+## Context Limiting for MCP
+All tools now support the `maxResponseSize` parameter to limit response size for better MCP compatibility. This is especially useful for large responses that may exceed MCP context limits.
+**Example Usage:**
+```json
+{
+  "name": "firecrawl_scrape",
+  "arguments": {
+    "url": "https://example.com",
+    "formats": ["markdown"],
+    "maxResponseSize": 50000
+  }
+}
+```
+When the response exceeds the specified limit, content will be truncated with a clear message indicating truncation occurred. This parameter is optional and preserves full backward compatibility.
 > Play around with [our MCP Server on MCP.so's playground](https://mcp.so/playground?server=firecrawl-mcp-server) or on [Klavis AI](https://www.klavis.ai/mcp-servers).

package/dist/index.js CHANGED Viewed

@@ -108,6 +108,8 @@ function createClient(apiKey) {
     return new FirecrawlApp(config);
 }
 const ORIGIN = 'mcp-fastmcp';
+// Safe mode is enabled by default for cloud service to comply with ChatGPT safety requirements
+const SAFE_MODE = process.env.CLOUD_SERVICE === 'true';
 function getClient(session) {
     // For cloud service, API key is required
     if (process.env.CLOUD_SERVICE === 'true') {
@@ -122,11 +124,22 @@ function getClient(session) {
     }
     return createClient(session?.firecrawlApiKey);
 }
-function asText(data) {
-    return JSON.stringify(data, null, 2);
+function asText(data, maxResponseSize) {
+    const text = JSON.stringify(data, null, 2);
+    if (maxResponseSize && maxResponseSize > 0 && text.length > maxResponseSize) {
+        const truncatedText = text.substring(0, maxResponseSize - 100); // Reserve space for truncation message
+        return truncatedText + '\n\n[Content truncated due to size limit. Increase maxResponseSize parameter to see full content.]';
+    }
+    return text;
 }
 // scrape tool (v2 semantics, minimal args)
 // Centralized scrape params (used by scrape, and referenced in search/crawl scrapeOptions)
+// Define safe action types
+const safeActionTypes = ['wait', 'screenshot', 'scroll', 'scrape'];
+const otherActions = ['click', 'write', 'press', 'executeJavascript', 'generatePDF'];
+const allActionTypes = [...safeActionTypes, ...otherActions];
+// Use appropriate action types based on safe mode
+const allowedActionTypes = SAFE_MODE ? safeActionTypes : allActionTypes;
 const scrapeParamsSchema = z.object({
     url: z.string().url(),
     formats: z
@@ -159,28 +172,20 @@ const scrapeParamsSchema = z.object({
     includeTags: z.array(z.string()).optional(),
     excludeTags: z.array(z.string()).optional(),
     waitFor: z.number().optional(),
-    actions: z
-        .array(z.object({
-        type: z.enum([
-            'wait',
-            'click',
-            'screenshot',
-            'write',
-            'press',
-            'scroll',
-            'scrape',
-            'executeJavascript',
-            'generatePDF',
-        ]),
-        selector: z.string().optional(),
-        milliseconds: z.number().optional(),
-        text: z.string().optional(),
-        key: z.string().optional(),
-        direction: z.enum(['up', 'down']).optional(),
-        script: z.string().optional(),
-        fullPage: z.boolean().optional(),
-    }))
-        .optional(),
+    ...(SAFE_MODE ? {} : {
+        actions: z
+            .array(z.object({
+            type: z.enum(allowedActionTypes),
+            selector: z.string().optional(),
+            milliseconds: z.number().optional(),
+            text: z.string().optional(),
+            key: z.string().optional(),
+            direction: z.enum(['up', 'down']).optional(),
+            script: z.string().optional(),
+            fullPage: z.boolean().optional(),
+        }))
+            .optional(),
+    }),
     mobile: z.boolean().optional(),
     skipTlsVerification: z.boolean().optional(),
     removeBase64Images: z.boolean().optional(),
@@ -192,11 +197,12 @@ const scrapeParamsSchema = z.object({
         .optional(),
     storeInCache: z.boolean().optional(),
     maxAge: z.number().optional(),
+    maxResponseSize: z.number().optional(),
 });
 server.addTool({
     name: 'firecrawl_scrape',
     description: `
-Scrape content from a single URL with advanced options.
+Scrape content from a single URL with advanced options.
 This is the most powerful, fastest and most reliable scraper tool, if available you should always default to using this tool for any web scraping needs.
 **Best for:** Single page content extraction, when you know exactly which page contains the information.
@@ -210,21 +216,24 @@ This is the most powerful, fastest and most reliable scraper tool, if available
   "arguments": {
     "url": "https://example.com",
     "formats": ["markdown"],
-    "maxAge": 172800000
+    "maxAge": 172800000,
+    "maxResponseSize": 50000
   }
 }
 \`\`\`
 **Performance:** Add maxAge parameter for 500% faster scrapes using cached data.
+**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility (e.g., 50000 characters).
 **Returns:** Markdown, HTML, or other formats as specified.
+${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.' : ''}
 `,
     parameters: scrapeParamsSchema,
     execute: async (args, { session, log }) => {
-        const { url, ...options } = args;
+        const { url, maxResponseSize, ...options } = args;
         const client = getClient(session);
         const cleaned = removeEmptyTopLevel(options);
         log.info('Scraping URL', { url: String(url) });
         const res = await client.scrape(String(url), { ...cleaned, origin: ORIGIN });
-        return asText(res);
+        return asText(res, maxResponseSize);
     },
 });
 server.addTool({
@@ -235,13 +244,15 @@ Map a website to discover all indexed URLs on the site.
 **Best for:** Discovering URLs on a website before deciding what to scrape; finding specific sections of a website.
 **Not recommended for:** When you already know which specific URL you need (use scrape or batch_scrape); when you need the content of the pages (use scrape after mapping).
 **Common mistakes:** Using crawl to discover URLs instead of map.
+**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
 **Prompt Example:** "List all URLs on example.com."
 **Usage Example:**
 \`\`\`json
 {
   "name": "firecrawl_map",
   "arguments": {
-    "url": "https://example.com"
+    "url": "https://example.com",
+    "maxResponseSize": 50000
   }
 }
 \`\`\`
@@ -254,14 +265,15 @@ Map a website to discover all indexed URLs on the site.
         includeSubdomains: z.boolean().optional(),
         limit: z.number().optional(),
         ignoreQueryParameters: z.boolean().optional(),
+        maxResponseSize: z.number().optional(),
     }),
     execute: async (args, { session, log }) => {
-        const { url, ...options } = args;
+        const { url, maxResponseSize, ...options } = args;
         const client = getClient(session);
         const cleaned = removeEmptyTopLevel(options);
         log.info('Mapping URL', { url: String(url) });
         const res = await client.map(String(url), { ...cleaned, origin: ORIGIN });
-        return asText(res);
+        return asText(res, maxResponseSize);
     },
 });
 server.addTool({
@@ -269,6 +281,20 @@ server.addTool({
     description: `
 Search the web and optionally extract content from search results. This is the most powerful web search tool available, and if available you should always default to using this tool for any web search needs.
+The query also supports search operators, that you can use if needed to refine the search:
+| Operator | Functionality | Examples |
+---|-|-|
+| \`"\"\` | Non-fuzzy matches a string of text | \`"Firecrawl"\`
+| \`-\` | Excludes certain keywords or negates other operators | \`-bad\`, \`-site:firecrawl.dev\`
+| \`site:\` | Only returns results from a specified website | \`site:firecrawl.dev\`
+| \`inurl:\` | Only returns results that include a word in the URL | \`inurl:firecrawl\`
+| \`allinurl:\` | Only returns results that include multiple words in the URL | \`allinurl:git firecrawl\`
+| \`intitle:\` | Only returns results that include a word in the title of the page | \`intitle:Firecrawl\`
+| \`allintitle:\` | Only returns results that include multiple words in the title of the page | \`allintitle:firecrawl playground\`
+| \`related:\` | Only returns results that are related to a specific domain | \`related:firecrawl.dev\`
+| \`imagesize:\` | Only returns images with exact dimensions | \`imagesize:1920x1080\`
+| \`larger:\` | Only returns images larger than specified dimensions | \`larger:1920x1080\`
 **Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query.
 **Not recommended for:** When you need to search the filesystem. When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl.
 **Common mistakes:** Using crawl or map for open-ended questions (use search instead).
@@ -305,10 +331,12 @@ Search the web and optionally extract content from search results. This is the m
     "scrapeOptions": {
       "formats": ["markdown"],
       "onlyMainContent": true
-    }
+    },
+    "maxResponseSize": 50000
   }
 }
 \`\`\`
+**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
 **Returns:** Array of search results (with optional scraped content).
 `,
     parameters: z.object({
@@ -321,17 +349,18 @@ Search the web and optionally extract content from search results. This is the m
             .array(z.object({ type: z.enum(['web', 'images', 'news']) }))
             .optional(),
         scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
+        maxResponseSize: z.number().optional(),
     }),
     execute: async (args, { session, log }) => {
         const client = getClient(session);
-        const { query, ...opts } = args;
+        const { query, maxResponseSize, ...opts } = args;
         const cleaned = removeEmptyTopLevel(opts);
         log.info('Searching', { query: String(query) });
         const res = await client.search(query, {
             ...cleaned,
             origin: ORIGIN,
         });
-        return asText(res);
+        return asText(res, maxResponseSize);
     },
 });
 server.addTool({
@@ -354,11 +383,14 @@ server.addTool({
      "limit": 20,
      "allowExternalLinks": false,
      "deduplicateSimilarURLs": true,
-     "sitemap": "include"
+     "sitemap": "include",
+     "maxResponseSize": 50000
    }
  }
  \`\`\`
+ **Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
  **Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
+ ${SAFE_MODE ? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.' : ''}
  `,
     parameters: z.object({
         url: z.string(),
@@ -373,21 +405,24 @@ server.addTool({
         crawlEntireDomain: z.boolean().optional(),
         delay: z.number().optional(),
         maxConcurrency: z.number().optional(),
-        webhook: z
-            .union([
-            z.string(),
-            z.object({
-                url: z.string(),
-                headers: z.record(z.string(), z.string()).optional(),
-            }),
-        ])
-            .optional(),
+        ...(SAFE_MODE ? {} : {
+            webhook: z
+                .union([
+                z.string(),
+                z.object({
+                    url: z.string(),
+                    headers: z.record(z.string(), z.string()).optional(),
+                }),
+            ])
+                .optional(),
+        }),
         deduplicateSimilarURLs: z.boolean().optional(),
         ignoreQueryParameters: z.boolean().optional(),
         scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
+        maxResponseSize: z.number().optional(),
     }),
     execute: async (args, { session, log }) => {
-        const { url, ...options } = args;
+        const { url, maxResponseSize, ...options } = args;
         const client = getClient(session);
         const cleaned = removeEmptyTopLevel(options);
         log.info('Starting crawl', { url: String(url) });
@@ -395,7 +430,7 @@ server.addTool({
             ...cleaned,
             origin: ORIGIN,
         });
-        return asText(res);
+        return asText(res, maxResponseSize);
     },
 });
 server.addTool({
@@ -408,17 +443,23 @@ Check the status of a crawl job.
 {
   "name": "firecrawl_check_crawl_status",
   "arguments": {
-    "id": "550e8400-e29b-41d4-a716-446655440000"
+    "id": "550e8400-e29b-41d4-a716-446655440000",
+    "maxResponseSize": 50000
   }
 }
 \`\`\`
+**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
 **Returns:** Status and progress of the crawl job, including results if available.
 `,
-    parameters: z.object({ id: z.string() }),
+    parameters: z.object({
+        id: z.string(),
+        maxResponseSize: z.number().optional(),
+    }),
     execute: async (args, { session }) => {
+        const { id, maxResponseSize } = args;
         const client = getClient(session);
-        const res = await client.getCrawlStatus(args.id);
-        return asText(res);
+        const res = await client.getCrawlStatus(id);
+        return asText(res, maxResponseSize);
     },
 });
 server.addTool({
@@ -454,10 +495,12 @@ Extract structured information from web pages using LLM capabilities. Supports b
     },
     "allowExternalLinks": false,
     "enableWebSearch": false,
-    "includeSubdomains": false
+    "includeSubdomains": false,
+    "maxResponseSize": 50000
   }
 }
 \`\`\`
+**Context Limiting:** Use maxResponseSize parameter to limit response size for MCP compatibility.
 **Returns:** Extracted structured data as defined by your schema.
 `,
     parameters: z.object({
@@ -467,6 +510,7 @@ Extract structured information from web pages using LLM capabilities. Supports b
         allowExternalLinks: z.boolean().optional(),
         enableWebSearch: z.boolean().optional(),
         includeSubdomains: z.boolean().optional(),
+        maxResponseSize: z.number().optional(),
     }),
     execute: async (args, { session, log }) => {
         const client = getClient(session);
@@ -484,7 +528,7 @@ Extract structured information from web pages using LLM capabilities. Supports b
             origin: ORIGIN,
         });
         const res = await client.extract(extractBody);
-        return asText(res);
+        return asText(res, a.maxResponseSize);
     },
 });
 const PORT = Number(process.env.PORT || 3000);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "firecrawl-mcp",
-  "version": "3.2.0",
+  "version": "3.3.1",
   "description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, search, batch processing, structured data extraction, and LLM-powered content analysis.",
   "type": "module",
   "bin": {
@@ -27,9 +27,10 @@
   },
   "license": "MIT",
   "dependencies": {
-    "@mendable/firecrawl-js": "^4.3.4",
+    "@mendable/firecrawl-js": "^4.3.6",
     "dotenv": "^17.2.2",
     "firecrawl-fastmcp": "^1.0.2",
+    "node-fetch": "^2.7.0",
     "typescript": "^5.9.2",
     "zod": "^4.1.5"
   },