firecrawl-mcp 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +49 -31
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -108,6 +108,8 @@ function createClient(apiKey) {
|
|
|
108
108
|
return new FirecrawlApp(config);
|
|
109
109
|
}
|
|
110
110
|
const ORIGIN = 'mcp-fastmcp';
|
|
111
|
+
// Safe mode is enabled by default for cloud service to comply with ChatGPT safety requirements
|
|
112
|
+
const SAFE_MODE = process.env.CLOUD_SERVICE === 'true';
|
|
111
113
|
function getClient(session) {
|
|
112
114
|
// For cloud service, API key is required
|
|
113
115
|
if (process.env.CLOUD_SERVICE === 'true') {
|
|
@@ -127,6 +129,12 @@ function asText(data) {
|
|
|
127
129
|
}
|
|
128
130
|
// scrape tool (v2 semantics, minimal args)
|
|
129
131
|
// Centralized scrape params (used by scrape, and referenced in search/crawl scrapeOptions)
|
|
132
|
+
// Define safe action types
|
|
133
|
+
const safeActionTypes = ['wait', 'screenshot', 'scroll', 'scrape'];
|
|
134
|
+
const otherActions = ['click', 'write', 'press', 'executeJavascript', 'generatePDF'];
|
|
135
|
+
const allActionTypes = [...safeActionTypes, ...otherActions];
|
|
136
|
+
// Use appropriate action types based on safe mode
|
|
137
|
+
const allowedActionTypes = SAFE_MODE ? safeActionTypes : allActionTypes;
|
|
130
138
|
const scrapeParamsSchema = z.object({
|
|
131
139
|
url: z.string().url(),
|
|
132
140
|
formats: z
|
|
@@ -159,28 +167,20 @@ const scrapeParamsSchema = z.object({
|
|
|
159
167
|
includeTags: z.array(z.string()).optional(),
|
|
160
168
|
excludeTags: z.array(z.string()).optional(),
|
|
161
169
|
waitFor: z.number().optional(),
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
'
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
milliseconds: z.number().optional(),
|
|
177
|
-
text: z.string().optional(),
|
|
178
|
-
key: z.string().optional(),
|
|
179
|
-
direction: z.enum(['up', 'down']).optional(),
|
|
180
|
-
script: z.string().optional(),
|
|
181
|
-
fullPage: z.boolean().optional(),
|
|
182
|
-
}))
|
|
183
|
-
.optional(),
|
|
170
|
+
...(SAFE_MODE ? {} : {
|
|
171
|
+
actions: z
|
|
172
|
+
.array(z.object({
|
|
173
|
+
type: z.enum(allowedActionTypes),
|
|
174
|
+
selector: z.string().optional(),
|
|
175
|
+
milliseconds: z.number().optional(),
|
|
176
|
+
text: z.string().optional(),
|
|
177
|
+
key: z.string().optional(),
|
|
178
|
+
direction: z.enum(['up', 'down']).optional(),
|
|
179
|
+
script: z.string().optional(),
|
|
180
|
+
fullPage: z.boolean().optional(),
|
|
181
|
+
}))
|
|
182
|
+
.optional(),
|
|
183
|
+
}),
|
|
184
184
|
mobile: z.boolean().optional(),
|
|
185
185
|
skipTlsVerification: z.boolean().optional(),
|
|
186
186
|
removeBase64Images: z.boolean().optional(),
|
|
@@ -216,6 +216,7 @@ This is the most powerful, fastest and most reliable scraper tool, if available
|
|
|
216
216
|
\`\`\`
|
|
217
217
|
**Performance:** Add maxAge parameter for 500% faster scrapes using cached data.
|
|
218
218
|
**Returns:** Markdown, HTML, or other formats as specified.
|
|
219
|
+
${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.' : ''}
|
|
219
220
|
`,
|
|
220
221
|
parameters: scrapeParamsSchema,
|
|
221
222
|
execute: async (args, { session, log }) => {
|
|
@@ -269,6 +270,20 @@ server.addTool({
|
|
|
269
270
|
description: `
|
|
270
271
|
Search the web and optionally extract content from search results. This is the most powerful web search tool available, and if available you should always default to using this tool for any web search needs.
|
|
271
272
|
|
|
273
|
+
The query also supports search operators, that you can use if needed to refine the search:
|
|
274
|
+
| Operator | Functionality | Examples |
|
|
275
|
+
---|-|-|
|
|
276
|
+
| \`"\"\` | Non-fuzzy matches a string of text | \`"Firecrawl"\`
|
|
277
|
+
| \`-\` | Excludes certain keywords or negates other operators | \`-bad\`, \`-site:firecrawl.dev\`
|
|
278
|
+
| \`site:\` | Only returns results from a specified website | \`site:firecrawl.dev\`
|
|
279
|
+
| \`inurl:\` | Only returns results that include a word in the URL | \`inurl:firecrawl\`
|
|
280
|
+
| \`allinurl:\` | Only returns results that include multiple words in the URL | \`allinurl:git firecrawl\`
|
|
281
|
+
| \`intitle:\` | Only returns results that include a word in the title of the page | \`intitle:Firecrawl\`
|
|
282
|
+
| \`allintitle:\` | Only returns results that include multiple words in the title of the page | \`allintitle:firecrawl playground\`
|
|
283
|
+
| \`related:\` | Only returns results that are related to a specific domain | \`related:firecrawl.dev\`
|
|
284
|
+
| \`imagesize:\` | Only returns images with exact dimensions | \`imagesize:1920x1080\`
|
|
285
|
+
| \`larger:\` | Only returns images larger than specified dimensions | \`larger:1920x1080\`
|
|
286
|
+
|
|
272
287
|
**Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query.
|
|
273
288
|
**Not recommended for:** When you need to search the filesystem. When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl.
|
|
274
289
|
**Common mistakes:** Using crawl or map for open-ended questions (use search instead).
|
|
@@ -359,6 +374,7 @@ server.addTool({
|
|
|
359
374
|
}
|
|
360
375
|
\`\`\`
|
|
361
376
|
**Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
|
|
377
|
+
${SAFE_MODE ? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.' : ''}
|
|
362
378
|
`,
|
|
363
379
|
parameters: z.object({
|
|
364
380
|
url: z.string(),
|
|
@@ -373,15 +389,17 @@ server.addTool({
|
|
|
373
389
|
crawlEntireDomain: z.boolean().optional(),
|
|
374
390
|
delay: z.number().optional(),
|
|
375
391
|
maxConcurrency: z.number().optional(),
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
392
|
+
...(SAFE_MODE ? {} : {
|
|
393
|
+
webhook: z
|
|
394
|
+
.union([
|
|
395
|
+
z.string(),
|
|
396
|
+
z.object({
|
|
397
|
+
url: z.string(),
|
|
398
|
+
headers: z.record(z.string(), z.string()).optional(),
|
|
399
|
+
}),
|
|
400
|
+
])
|
|
401
|
+
.optional(),
|
|
402
|
+
}),
|
|
385
403
|
deduplicateSimilarURLs: z.boolean().optional(),
|
|
386
404
|
ignoreQueryParameters: z.boolean().optional(),
|
|
387
405
|
scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.3.0",
|
|
4
4
|
"description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, search, batch processing, structured data extraction, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|