firecrawl-mcp 3.1.13 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +70 -39
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -79,8 +79,9 @@ const server = new FastMCP({
79
79
  return { firecrawlApiKey: apiKey };
80
80
  }
81
81
  else {
82
- if (!process.env.FIRECRAWL_API_KEY) {
83
- console.error('Firecrawl API key is required');
82
+ // For self-hosted instances, API key is optional if FIRECRAWL_API_URL is provided
83
+ if (!process.env.FIRECRAWL_API_KEY && !process.env.FIRECRAWL_API_URL) {
84
+ console.error('Either FIRECRAWL_API_KEY or FIRECRAWL_API_URL must be provided');
84
85
  process.exit(1);
85
86
  }
86
87
  return { firecrawlApiKey: process.env.FIRECRAWL_API_KEY };
@@ -95,25 +96,45 @@ const server = new FastMCP({
95
96
  },
96
97
  });
97
98
  function createClient(apiKey) {
98
- return new FirecrawlApp({
99
- apiKey,
99
+ const config = {
100
100
  ...(process.env.FIRECRAWL_API_URL && {
101
101
  apiUrl: process.env.FIRECRAWL_API_URL,
102
102
  }),
103
- });
103
+ };
104
+ // Only add apiKey if it's provided (required for cloud, optional for self-hosted)
105
+ if (apiKey) {
106
+ config.apiKey = apiKey;
107
+ }
108
+ return new FirecrawlApp(config);
104
109
  }
105
110
  const ORIGIN = 'mcp-fastmcp';
111
+ // Safe mode is enabled by default for cloud service to comply with ChatGPT safety requirements
112
+ const SAFE_MODE = process.env.CLOUD_SERVICE === 'true';
106
113
  function getClient(session) {
107
- if (!session || !session.firecrawlApiKey) {
108
- throw new Error('Unauthorized');
114
+ // For cloud service, API key is required
115
+ if (process.env.CLOUD_SERVICE === 'true') {
116
+ if (!session || !session.firecrawlApiKey) {
117
+ throw new Error('Unauthorized');
118
+ }
119
+ return createClient(session.firecrawlApiKey);
120
+ }
121
+ // For self-hosted instances, API key is optional if FIRECRAWL_API_URL is provided
122
+ if (!process.env.FIRECRAWL_API_URL && (!session || !session.firecrawlApiKey)) {
123
+ throw new Error('Unauthorized: API key is required when not using a self-hosted instance');
109
124
  }
110
- return createClient(session.firecrawlApiKey);
125
+ return createClient(session?.firecrawlApiKey);
111
126
  }
112
127
  function asText(data) {
113
128
  return JSON.stringify(data, null, 2);
114
129
  }
115
130
  // scrape tool (v2 semantics, minimal args)
116
131
  // Centralized scrape params (used by scrape, and referenced in search/crawl scrapeOptions)
132
+ // Define safe action types
133
+ const safeActionTypes = ['wait', 'screenshot', 'scroll', 'scrape'];
134
+ const otherActions = ['click', 'write', 'press', 'executeJavascript', 'generatePDF'];
135
+ const allActionTypes = [...safeActionTypes, ...otherActions];
136
+ // Use appropriate action types based on safe mode
137
+ const allowedActionTypes = SAFE_MODE ? safeActionTypes : allActionTypes;
117
138
  const scrapeParamsSchema = z.object({
118
139
  url: z.string().url(),
119
140
  formats: z
@@ -146,28 +167,20 @@ const scrapeParamsSchema = z.object({
146
167
  includeTags: z.array(z.string()).optional(),
147
168
  excludeTags: z.array(z.string()).optional(),
148
169
  waitFor: z.number().optional(),
149
- actions: z
150
- .array(z.object({
151
- type: z.enum([
152
- 'wait',
153
- 'click',
154
- 'screenshot',
155
- 'write',
156
- 'press',
157
- 'scroll',
158
- 'scrape',
159
- 'executeJavascript',
160
- 'generatePDF',
161
- ]),
162
- selector: z.string().optional(),
163
- milliseconds: z.number().optional(),
164
- text: z.string().optional(),
165
- key: z.string().optional(),
166
- direction: z.enum(['up', 'down']).optional(),
167
- script: z.string().optional(),
168
- fullPage: z.boolean().optional(),
169
- }))
170
- .optional(),
170
+ ...(SAFE_MODE ? {} : {
171
+ actions: z
172
+ .array(z.object({
173
+ type: z.enum(allowedActionTypes),
174
+ selector: z.string().optional(),
175
+ milliseconds: z.number().optional(),
176
+ text: z.string().optional(),
177
+ key: z.string().optional(),
178
+ direction: z.enum(['up', 'down']).optional(),
179
+ script: z.string().optional(),
180
+ fullPage: z.boolean().optional(),
181
+ }))
182
+ .optional(),
183
+ }),
171
184
  mobile: z.boolean().optional(),
172
185
  skipTlsVerification: z.boolean().optional(),
173
186
  removeBase64Images: z.boolean().optional(),
@@ -203,6 +216,7 @@ This is the most powerful, fastest and most reliable scraper tool, if available
203
216
  \`\`\`
204
217
  **Performance:** Add maxAge parameter for 500% faster scrapes using cached data.
205
218
  **Returns:** Markdown, HTML, or other formats as specified.
219
+ ${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.' : ''}
206
220
  `,
207
221
  parameters: scrapeParamsSchema,
208
222
  execute: async (args, { session, log }) => {
@@ -256,6 +270,20 @@ server.addTool({
256
270
  description: `
257
271
  Search the web and optionally extract content from search results. This is the most powerful web search tool available, and if available you should always default to using this tool for any web search needs.
258
272
 
273
+ The query also supports search operators, that you can use if needed to refine the search:
274
+ | Operator | Functionality | Examples |
275
+ ---|-|-|
276
+ | \`"\"\` | Non-fuzzy matches a string of text | \`"Firecrawl"\`
277
+ | \`-\` | Excludes certain keywords or negates other operators | \`-bad\`, \`-site:firecrawl.dev\`
278
+ | \`site:\` | Only returns results from a specified website | \`site:firecrawl.dev\`
279
+ | \`inurl:\` | Only returns results that include a word in the URL | \`inurl:firecrawl\`
280
+ | \`allinurl:\` | Only returns results that include multiple words in the URL | \`allinurl:git firecrawl\`
281
+ | \`intitle:\` | Only returns results that include a word in the title of the page | \`intitle:Firecrawl\`
282
+ | \`allintitle:\` | Only returns results that include multiple words in the title of the page | \`allintitle:firecrawl playground\`
283
+ | \`related:\` | Only returns results that are related to a specific domain | \`related:firecrawl.dev\`
284
+ | \`imagesize:\` | Only returns images with exact dimensions | \`imagesize:1920x1080\`
285
+ | \`larger:\` | Only returns images larger than specified dimensions | \`larger:1920x1080\`
286
+
259
287
  **Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query.
260
288
  **Not recommended for:** When you need to search the filesystem. When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl.
261
289
  **Common mistakes:** Using crawl or map for open-ended questions (use search instead).
@@ -346,6 +374,7 @@ server.addTool({
346
374
  }
347
375
  \`\`\`
348
376
  **Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
377
+ ${SAFE_MODE ? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.' : ''}
349
378
  `,
350
379
  parameters: z.object({
351
380
  url: z.string(),
@@ -360,15 +389,17 @@ server.addTool({
360
389
  crawlEntireDomain: z.boolean().optional(),
361
390
  delay: z.number().optional(),
362
391
  maxConcurrency: z.number().optional(),
363
- webhook: z
364
- .union([
365
- z.string(),
366
- z.object({
367
- url: z.string(),
368
- headers: z.record(z.string(), z.string()).optional(),
369
- }),
370
- ])
371
- .optional(),
392
+ ...(SAFE_MODE ? {} : {
393
+ webhook: z
394
+ .union([
395
+ z.string(),
396
+ z.object({
397
+ url: z.string(),
398
+ headers: z.record(z.string(), z.string()).optional(),
399
+ }),
400
+ ])
401
+ .optional(),
402
+ }),
372
403
  deduplicateSimilarURLs: z.boolean().optional(),
373
404
  ignoreQueryParameters: z.boolean().optional(),
374
405
  scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl-mcp",
3
- "version": "3.1.13",
3
+ "version": "3.3.0",
4
4
  "description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, search, batch processing, structured data extraction, and LLM-powered content analysis.",
5
5
  "type": "module",
6
6
  "bin": {