firecrawl-mcp 3.6.1 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +0 -0
- package/dist/index.js +114 -0
- package/package.json +2 -2
- package/dist/index-v1.js +0 -1313
- package/dist/index.test.js +0 -255
- package/dist/jest.setup.js +0 -58
- package/dist/server-v1.js +0 -1154
- package/dist/server-v2.js +0 -1067
- package/dist/src/index.js +0 -1053
- package/dist/src/index.test.js +0 -225
- package/dist/versioned-server.js +0 -203
package/LICENSE
CHANGED
|
File without changes
|
package/dist/index.js
CHANGED
|
@@ -210,7 +210,9 @@ const scrapeParamsSchema = z.object({
|
|
|
210
210
|
})
|
|
211
211
|
.optional(),
|
|
212
212
|
storeInCache: z.boolean().optional(),
|
|
213
|
+
zeroDataRetention: z.boolean().optional(),
|
|
213
214
|
maxAge: z.number().optional(),
|
|
215
|
+
proxy: z.enum(['basic', 'stealth', 'auto']).optional(),
|
|
214
216
|
});
|
|
215
217
|
server.addTool({
|
|
216
218
|
name: 'firecrawl_scrape',
|
|
@@ -221,6 +223,7 @@ This is the most powerful, fastest and most reliable scraper tool, if available
|
|
|
221
223
|
**Best for:** Single page content extraction, when you know exactly which page contains the information.
|
|
222
224
|
**Not recommended for:** Multiple pages (use batch_scrape), unknown page (use search), structured data (use extract).
|
|
223
225
|
**Common mistakes:** Using scrape for a list of URLs (use batch_scrape instead). If batch scrape doesnt work, just use scrape and call it multiple times.
|
|
226
|
+
**Other Features:** Use 'branding' format to extract brand identity (colors, fonts, typography, spacing, UI components) for design analysis or style replication.
|
|
224
227
|
**Prompt Example:** "Get the content of the page at https://example.com."
|
|
225
228
|
**Usage Example:**
|
|
226
229
|
\`\`\`json
|
|
@@ -365,6 +368,7 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
365
368
|
.array(z.object({ type: z.enum(['web', 'images', 'news']) }))
|
|
366
369
|
.optional(),
|
|
367
370
|
scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),
|
|
371
|
+
enterprise: z.array(z.enum(['default', 'anon', 'zdr'])).optional(),
|
|
368
372
|
}),
|
|
369
373
|
execute: async (args, { session, log }) => {
|
|
370
374
|
const client = getClient(session);
|
|
@@ -538,6 +542,116 @@ Extract structured information from web pages using LLM capabilities. Supports b
|
|
|
538
542
|
return asText(res);
|
|
539
543
|
},
|
|
540
544
|
});
|
|
545
|
+
server.addTool({
|
|
546
|
+
name: 'firecrawl_agent',
|
|
547
|
+
description: `
|
|
548
|
+
Autonomous web data gathering agent. Describe what data you want, and the agent searches, navigates, and extracts it from anywhere on the web.
|
|
549
|
+
|
|
550
|
+
**Best for:** Complex data gathering tasks where you don't know the exact URLs; research tasks requiring multiple sources; finding data in hard-to-reach places.
|
|
551
|
+
**Not recommended for:** Simple single-page scraping (use scrape); when you already know the exact URL (use scrape or extract).
|
|
552
|
+
**Key advantages over extract:**
|
|
553
|
+
- No URLs required - just describe what you need
|
|
554
|
+
- Autonomously searches and navigates the web
|
|
555
|
+
- Faster and more cost-effective for complex tasks
|
|
556
|
+
- Higher reliability for varied queries
|
|
557
|
+
|
|
558
|
+
**Arguments:**
|
|
559
|
+
- prompt: Natural language description of the data you want (required, max 10,000 characters)
|
|
560
|
+
- urls: Optional array of URLs to focus the agent on specific pages
|
|
561
|
+
- schema: Optional JSON schema for structured output
|
|
562
|
+
|
|
563
|
+
**Prompt Example:** "Find the founders of Firecrawl and their backgrounds"
|
|
564
|
+
**Usage Example (no URLs):**
|
|
565
|
+
\`\`\`json
|
|
566
|
+
{
|
|
567
|
+
"name": "firecrawl_agent",
|
|
568
|
+
"arguments": {
|
|
569
|
+
"prompt": "Find the top 5 AI startups founded in 2024 and their funding amounts",
|
|
570
|
+
"schema": {
|
|
571
|
+
"type": "object",
|
|
572
|
+
"properties": {
|
|
573
|
+
"startups": {
|
|
574
|
+
"type": "array",
|
|
575
|
+
"items": {
|
|
576
|
+
"type": "object",
|
|
577
|
+
"properties": {
|
|
578
|
+
"name": { "type": "string" },
|
|
579
|
+
"funding": { "type": "string" },
|
|
580
|
+
"founded": { "type": "string" }
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
\`\`\`
|
|
589
|
+
**Usage Example (with URLs):**
|
|
590
|
+
\`\`\`json
|
|
591
|
+
{
|
|
592
|
+
"name": "firecrawl_agent",
|
|
593
|
+
"arguments": {
|
|
594
|
+
"urls": ["https://docs.firecrawl.dev", "https://firecrawl.dev/pricing"],
|
|
595
|
+
"prompt": "Compare the features and pricing information from these pages"
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
\`\`\`
|
|
599
|
+
**Returns:** Extracted data matching your prompt/schema, plus credits used.
|
|
600
|
+
`,
|
|
601
|
+
parameters: z.object({
|
|
602
|
+
prompt: z.string().min(1).max(10000),
|
|
603
|
+
urls: z.array(z.string().url()).optional(),
|
|
604
|
+
schema: z.record(z.string(), z.any()).optional(),
|
|
605
|
+
}),
|
|
606
|
+
execute: async (args, { session, log }) => {
|
|
607
|
+
const client = getClient(session);
|
|
608
|
+
const a = args;
|
|
609
|
+
log.info('Starting agent', {
|
|
610
|
+
prompt: a.prompt.substring(0, 100),
|
|
611
|
+
urlCount: Array.isArray(a.urls) ? a.urls.length : 0,
|
|
612
|
+
});
|
|
613
|
+
const agentBody = removeEmptyTopLevel({
|
|
614
|
+
prompt: a.prompt,
|
|
615
|
+
urls: a.urls,
|
|
616
|
+
schema: a.schema || undefined,
|
|
617
|
+
});
|
|
618
|
+
const res = await client.agent({
|
|
619
|
+
...agentBody,
|
|
620
|
+
origin: ORIGIN,
|
|
621
|
+
});
|
|
622
|
+
return asText(res);
|
|
623
|
+
},
|
|
624
|
+
});
|
|
625
|
+
server.addTool({
|
|
626
|
+
name: 'firecrawl_agent_status',
|
|
627
|
+
description: `
|
|
628
|
+
Check the status of an agent job.
|
|
629
|
+
|
|
630
|
+
**Usage Example:**
|
|
631
|
+
\`\`\`json
|
|
632
|
+
{
|
|
633
|
+
"name": "firecrawl_agent_status",
|
|
634
|
+
"arguments": {
|
|
635
|
+
"id": "550e8400-e29b-41d4-a716-446655440000"
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
\`\`\`
|
|
639
|
+
**Possible statuses:**
|
|
640
|
+
- processing: Agent is still working
|
|
641
|
+
- completed: Extraction finished successfully
|
|
642
|
+
- failed: An error occurred
|
|
643
|
+
|
|
644
|
+
**Returns:** Status, progress, and results (if completed) of the agent job.
|
|
645
|
+
`,
|
|
646
|
+
parameters: z.object({ id: z.string() }),
|
|
647
|
+
execute: async (args, { session, log }) => {
|
|
648
|
+
const client = getClient(session);
|
|
649
|
+
const { id } = args;
|
|
650
|
+
log.info('Checking agent status', { id });
|
|
651
|
+
const res = await client.getAgentStatus(id);
|
|
652
|
+
return asText(res);
|
|
653
|
+
},
|
|
654
|
+
});
|
|
541
655
|
const PORT = Number(process.env.PORT || 3000);
|
|
542
656
|
const HOST = process.env.CLOUD_SERVICE === 'true'
|
|
543
657
|
? '0.0.0.0'
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.7.0",
|
|
4
4
|
"description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, search, batch processing, structured data extraction, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"mcpName": "io.github.firecrawl/firecrawl-mcp-server",
|
|
@@ -28,7 +28,7 @@
|
|
|
28
28
|
},
|
|
29
29
|
"license": "MIT",
|
|
30
30
|
"dependencies": {
|
|
31
|
-
"@mendable/firecrawl-js": "^4.3
|
|
31
|
+
"@mendable/firecrawl-js": "^4.9.3",
|
|
32
32
|
"dotenv": "^17.2.2",
|
|
33
33
|
"firecrawl-fastmcp": "^1.0.4",
|
|
34
34
|
"typescript": "^5.9.2",
|