firecrawl-mcp 3.1.13 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +70 -39
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -79,8 +79,9 @@ const server = new FastMCP({
|
|
|
79
79
|
return { firecrawlApiKey: apiKey };
|
|
80
80
|
}
|
|
81
81
|
else {
|
|
82
|
-
if
|
|
83
|
-
|
|
82
|
+
// For self-hosted instances, API key is optional if FIRECRAWL_API_URL is provided
|
|
83
|
+
if (!process.env.FIRECRAWL_API_KEY && !process.env.FIRECRAWL_API_URL) {
|
|
84
|
+
console.error('Either FIRECRAWL_API_KEY or FIRECRAWL_API_URL must be provided');
|
|
84
85
|
process.exit(1);
|
|
85
86
|
}
|
|
86
87
|
return { firecrawlApiKey: process.env.FIRECRAWL_API_KEY };
|
|
@@ -95,25 +96,45 @@ const server = new FastMCP({
|
|
|
95
96
|
},
|
|
96
97
|
});
|
|
97
98
|
function createClient(apiKey) {
|
|
98
|
-
|
|
99
|
-
apiKey,
|
|
99
|
+
const config = {
|
|
100
100
|
...(process.env.FIRECRAWL_API_URL && {
|
|
101
101
|
apiUrl: process.env.FIRECRAWL_API_URL,
|
|
102
102
|
}),
|
|
103
|
-
}
|
|
103
|
+
};
|
|
104
|
+
// Only add apiKey if it's provided (required for cloud, optional for self-hosted)
|
|
105
|
+
if (apiKey) {
|
|
106
|
+
config.apiKey = apiKey;
|
|
107
|
+
}
|
|
108
|
+
return new FirecrawlApp(config);
|
|
104
109
|
}
|
|
105
110
|
const ORIGIN = 'mcp-fastmcp';
|
|
111
|
+
// Safe mode is enabled by default for cloud service to comply with ChatGPT safety requirements
|
|
112
|
+
const SAFE_MODE = process.env.CLOUD_SERVICE === 'true';
|
|
106
113
|
function getClient(session) {
|
|
107
|
-
|
|
108
|
-
|
|
114
|
+
// For cloud service, API key is required
|
|
115
|
+
if (process.env.CLOUD_SERVICE === 'true') {
|
|
116
|
+
if (!session || !session.firecrawlApiKey) {
|
|
117
|
+
throw new Error('Unauthorized');
|
|
118
|
+
}
|
|
119
|
+
return createClient(session.firecrawlApiKey);
|
|
120
|
+
}
|
|
121
|
+
// For self-hosted instances, API key is optional if FIRECRAWL_API_URL is provided
|
|
122
|
+
if (!process.env.FIRECRAWL_API_URL && (!session || !session.firecrawlApiKey)) {
|
|
123
|
+
throw new Error('Unauthorized: API key is required when not using a self-hosted instance');
|
|
109
124
|
}
|
|
110
|
-
return createClient(session
|
|
125
|
+
return createClient(session?.firecrawlApiKey);
|
|
111
126
|
}
|
|
112
127
|
function asText(data) {
|
|
113
128
|
return JSON.stringify(data, null, 2);
|
|
114
129
|
}
|
|
115
130
|
// scrape tool (v2 semantics, minimal args)
|
|
116
131
|
// Centralized scrape params (used by scrape, and referenced in search/crawl scrapeOptions)
|
|
132
|
+
// Define safe action types
|
|
133
|
+
const safeActionTypes = ['wait', 'screenshot', 'scroll', 'scrape'];
|
|
134
|
+
const otherActions = ['click', 'write', 'press', 'executeJavascript', 'generatePDF'];
|
|
135
|
+
const allActionTypes = [...safeActionTypes, ...otherActions];
|
|
136
|
+
// Use appropriate action types based on safe mode
|
|
137
|
+
const allowedActionTypes = SAFE_MODE ? safeActionTypes : allActionTypes;
|
|
117
138
|
const scrapeParamsSchema = z.object({
|
|
118
139
|
url: z.string().url(),
|
|
119
140
|
formats: z
|
|
@@ -146,28 +167,20 @@ const scrapeParamsSchema = z.object({
|
|
|
146
167
|
includeTags: z.array(z.string()).optional(),
|
|
147
168
|
excludeTags: z.array(z.string()).optional(),
|
|
148
169
|
waitFor: z.number().optional(),
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
'
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
milliseconds: z.number().optional(),
|
|
164
|
-
text: z.string().optional(),
|
|
165
|
-
key: z.string().optional(),
|
|
166
|
-
direction: z.enum(['up', 'down']).optional(),
|
|
167
|
-
script: z.string().optional(),
|
|
168
|
-
fullPage: z.boolean().optional(),
|
|
169
|
-
}))
|
|
170
|
-
.optional(),
|
|
170
|
+
...(SAFE_MODE ? {} : {
|
|
171
|
+
actions: z
|
|
172
|
+
.array(z.object({
|
|
173
|
+
type: z.enum(allowedActionTypes),
|
|
174
|
+
selector: z.string().optional(),
|
|
175
|
+
milliseconds: z.number().optional(),
|
|
176
|
+
text: z.string().optional(),
|
|
177
|
+
key: z.string().optional(),
|
|
178
|
+
direction: z.enum(['up', 'down']).optional(),
|
|
179
|
+
script: z.string().optional(),
|
|
180
|
+
fullPage: z.boolean().optional(),
|
|
181
|
+
}))
|
|
182
|
+
.optional(),
|
|
183
|
+
}),
|
|
171
184
|
mobile: z.boolean().optional(),
|
|
172
185
|
skipTlsVerification: z.boolean().optional(),
|
|
173
186
|
removeBase64Images: z.boolean().optional(),
|
|
@@ -203,6 +216,7 @@ This is the most powerful, fastest and most reliable scraper tool, if available
|
|
|
203
216
|
\`\`\`
|
|
204
217
|
**Performance:** Add maxAge parameter for 500% faster scrapes using cached data.
|
|
205
218
|
**Returns:** Markdown, HTML, or other formats as specified.
|
|
219
|
+
${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.' : ''}
|
|
206
220
|
`,
|
|
207
221
|
parameters: scrapeParamsSchema,
|
|
208
222
|
execute: async (args, { session, log }) => {
|
|
@@ -256,6 +270,20 @@ server.addTool({
|
|
|
256
270
|
description: `
|
|
257
271
|
Search the web and optionally extract content from search results. This is the most powerful web search tool available, and if available you should always default to using this tool for any web search needs.
|
|
258
272
|
|
|
273
|
+
The query also supports search operators, that you can use if needed to refine the search:
|
|
274
|
+
| Operator | Functionality | Examples |
|
|
275
|
+
---|-|-|
|
|
276
|
+
| \`"\"\` | Non-fuzzy matches a string of text | \`"Firecrawl"\`
|
|
277
|
+
| \`-\` | Excludes certain keywords or negates other operators | \`-bad\`, \`-site:firecrawl.dev\`
|
|
278
|
+
| \`site:\` | Only returns results from a specified website | \`site:firecrawl.dev\`
|
|
279
|
+
| \`inurl:\` | Only returns results that include a word in the URL | \`inurl:firecrawl\`
|
|
280
|
+
| \`allinurl:\` | Only returns results that include multiple words in the URL | \`allinurl:git firecrawl\`
|
|
281
|
+
| \`intitle:\` | Only returns results that include a word in the title of the page | \`intitle:Firecrawl\`
|
|
282
|
+
| \`allintitle:\` | Only returns results that include multiple words in the title of the page | \`allintitle:firecrawl playground\`
|
|
283
|
+
| \`related:\` | Only returns results that are related to a specific domain | \`related:firecrawl.dev\`
|
|
284
|
+
| \`imagesize:\` | Only returns images with exact dimensions | \`imagesize:1920x1080\`
|
|
285
|
+
| \`larger:\` | Only returns images larger than specified dimensions | \`larger:1920x1080\`
|
|
286
|
+
|
|
259
287
|
**Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query.
|
|
260
288
|
**Not recommended for:** When you need to search the filesystem. When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl.
|
|
261
289
|
**Common mistakes:** Using crawl or map for open-ended questions (use search instead).
|
|
@@ -346,6 +374,7 @@ server.addTool({
|
|
|
346
374
|
}
|
|
347
375
|
\`\`\`
|
|
348
376
|
**Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
|
|
377
|
+
${SAFE_MODE ? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.' : ''}
|
|
349
378
|
`,
|
|
350
379
|
parameters: z.object({
|
|
351
380
|
url: z.string(),
|
|
@@ -360,15 +389,17 @@ server.addTool({
|
|
|
360
389
|
crawlEntireDomain: z.boolean().optional(),
|
|
361
390
|
delay: z.number().optional(),
|
|
362
391
|
maxConcurrency: z.number().optional(),
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
392
|
+
...(SAFE_MODE ? {} : {
|
|
393
|
+
webhook: z
|
|
394
|
+
.union([
|
|
395
|
+
z.string(),
|
|
396
|
+
z.object({
|
|
397
|
+
url: z.string(),
|
|
398
|
+
headers: z.record(z.string(), z.string()).optional(),
|
|
399
|
+
}),
|
|
400
|
+
])
|
|
401
|
+
.optional(),
|
|
402
|
+
}),
|
|
372
403
|
deduplicateSimilarURLs: z.boolean().optional(),
|
|
373
404
|
ignoreQueryParameters: z.boolean().optional(),
|
|
374
405
|
scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.3.0",
|
|
4
4
|
"description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, search, batch processing, structured data extraction, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|