firecrawl-mcp 3.14.1 → 3.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -1
- package/dist/index.js +181 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -565,12 +565,51 @@ Search the web and optionally extract content from search results.
|
|
|
565
565
|
|
|
566
566
|
**Returns:**
|
|
567
567
|
|
|
568
|
-
- Array of search results (with optional scraped content)
|
|
568
|
+
- Array of search results (with optional scraped content), plus an `id` field. Pass that `id` to `firecrawl_search_feedback` after you've used the results to refund 1 credit (search costs 2) and improve search quality.
|
|
569
569
|
|
|
570
570
|
**Prompt Example:**
|
|
571
571
|
|
|
572
572
|
> "Find the latest research papers on AI published in 2023."
|
|
573
573
|
|
|
574
|
+
### 5b. Search Feedback Tool (`firecrawl_search_feedback`)
|
|
575
|
+
|
|
576
|
+
Sends structured feedback on a previous `firecrawl_search` result. The first feedback per search id refunds 1 credit and improves Firecrawl's search quality. Idempotent per search id.
|
|
577
|
+
|
|
578
|
+
**Call this after every search you actually use** (or that didn't help). Bad/partial feedback with `missingContent` is just as valuable as good feedback.
|
|
579
|
+
|
|
580
|
+
**Opt out:** set `FIRECRAWL_NO_SEARCH_FEEDBACK=1` (or `FIRECRAWL_DISABLE_SEARCH_FEEDBACK=1`) in the environment when starting the MCP server. The `firecrawl_search_feedback` tool will not be registered, so agents can't call it. Team admins can also disable feedback server-side; in that case the tool is registered but always returns `feedbackErrorCode: "TEAM_OPTED_OUT"`.
|
|
581
|
+
|
|
582
|
+
**Most important field:** `missingContent`. It's an array of specific pieces of content the agent expected to find but did not. One entry per missing topic — these aggregate across teams and tell us what to index next.
|
|
583
|
+
|
|
584
|
+
**Daily refund cap (per team, per UTC day, default 100 credits).** Once a team's `creditsRefundedToday` reaches `dailyRefundCap`, further submissions still record feedback but no longer refund credits. The response sets `dailyCapReached: true`. Agents should stop calling this tool for the rest of the UTC day when they see that flag.
|
|
585
|
+
|
|
586
|
+
**Usage Example:**
|
|
587
|
+
|
|
588
|
+
```json
|
|
589
|
+
{
|
|
590
|
+
"name": "firecrawl_search_feedback",
|
|
591
|
+
"arguments": {
|
|
592
|
+
"searchId": "0193f6c5-1234-7890-abcd-1234567890ab",
|
|
593
|
+
"rating": "good",
|
|
594
|
+
"valuableSources": [
|
|
595
|
+
{
|
|
596
|
+
"url": "https://docs.firecrawl.dev/features/search",
|
|
597
|
+
"reason": "Most up-to-date description of /search."
|
|
598
|
+
}
|
|
599
|
+
],
|
|
600
|
+
"missingContent": [
|
|
601
|
+
{ "topic": "Pricing for the search endpoint", "description": "No pricing tier table for /search specifically." },
|
|
602
|
+
{ "topic": "Per-team rate limits" }
|
|
603
|
+
],
|
|
604
|
+
"querySuggestions": "Boost docs.firecrawl.dev for queries that mention 'firecrawl'"
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
```
|
|
608
|
+
|
|
609
|
+
**Returns:**
|
|
610
|
+
|
|
611
|
+
- `{ success, feedbackId, creditsRefunded, alreadySubmitted? }` JSON.
|
|
612
|
+
|
|
574
613
|
### 6. Crawl Tool (`firecrawl_crawl`)
|
|
575
614
|
|
|
576
615
|
Starts an asynchronous crawl job on a website and extract content from all pages.
|
package/dist/index.js
CHANGED
|
@@ -252,6 +252,7 @@ const scrapeParamsSchema = z.object({
|
|
|
252
252
|
queryOptions: z
|
|
253
253
|
.object({
|
|
254
254
|
prompt: z.string().max(10000),
|
|
255
|
+
mode: z.enum(['directQuote', 'freeform']).default('freeform'),
|
|
255
256
|
})
|
|
256
257
|
.optional(),
|
|
257
258
|
screenshotOptions: z
|
|
@@ -389,6 +390,7 @@ If JSON extraction returns empty, minimal, or just navigation content, the page
|
|
|
389
390
|
**Use query format only when:**
|
|
390
391
|
- The page is extremely long and you need a single targeted answer without processing the full content
|
|
391
392
|
- You want a quick factual answer and don't need to retain the page content
|
|
393
|
+
- Set \`queryOptions.mode\` to \`"directQuote"\` when you need verbatim page text; otherwise it defaults to \`"freeform"\`
|
|
392
394
|
|
|
393
395
|
**Usage Example (markdown format - default for most tasks):**
|
|
394
396
|
\`\`\`json
|
|
@@ -528,6 +530,7 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
528
530
|
**Domain filters:** Use includeDomains to restrict results to specific domains, or excludeDomains to remove domains. Do not use both in the same request. Domains must be hostnames only, without protocol or path.
|
|
529
531
|
**Scrape Options:** Only use scrapeOptions when you think it is absolutely necessary. When you do so default to a lower limit to avoid timeouts, 5 or lower.
|
|
530
532
|
**Optimal Workflow:** Search first using firecrawl_search without formats, then after fetching the results, use the scrape tool to get the content of the relevantpage(s) that you want to scrape
|
|
533
|
+
**After the search:** Once you have processed the results (or decided they were not useful), call \`firecrawl_search_feedback\` with the \`id\` from this response. The first feedback per search refunds 1 credit and helps Firecrawl improve search quality.
|
|
531
534
|
|
|
532
535
|
**Usage Example without formats (Preferred):**
|
|
533
536
|
\`\`\`json
|
|
@@ -564,7 +567,7 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
564
567
|
}
|
|
565
568
|
}
|
|
566
569
|
\`\`\`
|
|
567
|
-
**Returns:**
|
|
570
|
+
**Returns:** A JSON envelope of the form \`{ success, data: { web?, images?, news? }, id, creditsUsed }\`. Each result array contains the search results (with optional scraped content). Pass the top-level \`id\` to \`firecrawl_search_feedback\` after you've used the results.
|
|
568
571
|
`,
|
|
569
572
|
parameters: z
|
|
570
573
|
.object({
|
|
@@ -599,13 +602,187 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
599
602
|
const cleaned = removeEmptyTopLevel(searchOpts);
|
|
600
603
|
const searchQuery = buildSearchQueryWithDomains(query, includeDomains, excludeDomains);
|
|
601
604
|
log.info('Searching', { query: searchQuery });
|
|
602
|
-
|
|
605
|
+
// Call /v2/search through the SDK's HTTP layer (auth + retries) instead
|
|
606
|
+
// of `client.search()` so we preserve the full response envelope. The
|
|
607
|
+
// high-level `search()` helper strips `id` and `creditsUsed`, which
|
|
608
|
+
// breaks the `firecrawl_search_feedback` workflow that this server
|
|
609
|
+
// explicitly tells the LLM to use after every search.
|
|
610
|
+
const httpRes = await client.http.post('/v2/search', {
|
|
611
|
+
query: searchQuery,
|
|
603
612
|
...cleaned,
|
|
604
613
|
origin: ORIGIN,
|
|
605
614
|
});
|
|
606
|
-
return asText(
|
|
615
|
+
return asText(httpRes?.data ?? {});
|
|
607
616
|
},
|
|
608
617
|
});
|
|
618
|
+
const DEFAULT_CLOUD_API_URL = 'https://api.firecrawl.dev';
|
|
619
|
+
function resolveApiBaseUrl() {
|
|
620
|
+
return (process.env.FIRECRAWL_API_URL || DEFAULT_CLOUD_API_URL).replace(/\/$/, '');
|
|
621
|
+
}
|
|
622
|
+
const SEARCH_FEEDBACK_DISABLED = ['1', 'true', 'yes', 'on'].includes((process.env.FIRECRAWL_NO_SEARCH_FEEDBACK ||
|
|
623
|
+
process.env.FIRECRAWL_DISABLE_SEARCH_FEEDBACK ||
|
|
624
|
+
'')
|
|
625
|
+
.trim()
|
|
626
|
+
.toLowerCase());
|
|
627
|
+
if (SEARCH_FEEDBACK_DISABLED) {
|
|
628
|
+
console.error('[firecrawl-mcp] Search feedback tool disabled by FIRECRAWL_NO_SEARCH_FEEDBACK; firecrawl_search_feedback will not be registered.');
|
|
629
|
+
}
|
|
630
|
+
if (!SEARCH_FEEDBACK_DISABLED) {
|
|
631
|
+
server.addTool({
|
|
632
|
+
name: 'firecrawl_search_feedback',
|
|
633
|
+
annotations: {
|
|
634
|
+
title: 'Send feedback on a search result',
|
|
635
|
+
readOnlyHint: false,
|
|
636
|
+
openWorldHint: true,
|
|
637
|
+
},
|
|
638
|
+
description: `
|
|
639
|
+
Send structured feedback on a previous \`firecrawl_search\` result. **Call this immediately after a search where you used the results** so we can improve search quality and refund 1 credit (search costs 2).
|
|
640
|
+
|
|
641
|
+
Pass the \`searchId\` returned by \`firecrawl_search\` (the \`id\` field on the response) and tell us:
|
|
642
|
+
|
|
643
|
+
- **rating** — overall result quality: \`good\`, \`partial\`, or \`bad\`.
|
|
644
|
+
- **valuableSources** — which result URLs were actually useful, and a short reason why.
|
|
645
|
+
- **missingContent** — **the most important field.** An ARRAY of specific pieces of content you expected to find but didn't. One entry per missing piece, each with a short \`topic\` and an optional longer \`description\`. Examples: \`{"topic":"enterprise pricing","description":"no pricing tier table for the Enterprise plan was returned"}\`, \`{"topic":"API rate limits"}\`, \`{"topic":"comparison vs competitors"}\`. **Be specific** — these aggregate across teams and tell us what to index next. Do not pack multiple topics into one entry.
|
|
646
|
+
- **querySuggestions** — how the query or response shape could be improved (e.g. "would have liked official docs first", "should boost github.com").
|
|
647
|
+
|
|
648
|
+
**Substantive-feedback requirement** (zero-effort feedback is rejected with HTTP 400):
|
|
649
|
+
- \`good\` — must include at least one \`valuableSources\` entry
|
|
650
|
+
- \`partial\` — must include \`valuableSources\` or at least one \`missingContent\` entry
|
|
651
|
+
- \`bad\` — must include at least one \`missingContent\` entry or \`querySuggestions\`
|
|
652
|
+
|
|
653
|
+
**Time window:** Feedback must be submitted within ~2 minutes of the search. Beyond that, the call returns HTTP 409 with \`feedbackErrorCode: "FEEDBACK_WINDOW_EXPIRED"\` — do not retry, just move on. Same goes for any 4xx response: do not retry-loop.
|
|
654
|
+
|
|
655
|
+
**Behaviors:**
|
|
656
|
+
- Idempotent per \`searchId\`. Re-submitting for the same id returns \`alreadySubmitted: true\` with \`creditsRefunded: 0\`.
|
|
657
|
+
- Refund only applies to billable searches; preview teams are blocked.
|
|
658
|
+
- Failed searches cannot receive feedback (the search itself already returned an error you can act on).
|
|
659
|
+
- **Daily refund cap (per team, per UTC day, default 100 credits).** Once a team's \`creditsRefundedToday\` reaches \`dailyRefundCap\`, the response returns \`dailyCapReached: true\` with \`creditsRefunded: 0\`. The feedback is still recorded for search-quality improvement — only the credit refund is gated. **Stop calling this tool for the rest of the UTC day** when you see \`dailyCapReached: true\`.
|
|
660
|
+
|
|
661
|
+
**When to call:** Right after processing a search result. If the result didn't help, send rating \`bad\` with a clear \`missingContent\` — that is just as valuable as a \`good\` rating.
|
|
662
|
+
|
|
663
|
+
**Usage Example (good rating with valuable sources + missing content):**
|
|
664
|
+
\`\`\`json
|
|
665
|
+
{
|
|
666
|
+
"name": "firecrawl_search_feedback",
|
|
667
|
+
"arguments": {
|
|
668
|
+
"searchId": "0193f6c5-1234-7890-abcd-1234567890ab",
|
|
669
|
+
"rating": "good",
|
|
670
|
+
"valuableSources": [
|
|
671
|
+
{ "url": "https://docs.firecrawl.dev/features/search", "reason": "Most up-to-date description of /search." }
|
|
672
|
+
],
|
|
673
|
+
"missingContent": [
|
|
674
|
+
{ "topic": "Pricing for the search endpoint", "description": "No pricing tier table for /search specifically." },
|
|
675
|
+
{ "topic": "Rate limits", "description": "Per-team RPS for /search not documented." }
|
|
676
|
+
],
|
|
677
|
+
"querySuggestions": "Boost docs.firecrawl.dev for queries that mention 'firecrawl'"
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
\`\`\`
|
|
681
|
+
|
|
682
|
+
**Usage Example (bad rating, what was missing):**
|
|
683
|
+
\`\`\`json
|
|
684
|
+
{
|
|
685
|
+
"name": "firecrawl_search_feedback",
|
|
686
|
+
"arguments": {
|
|
687
|
+
"searchId": "0193f6c5-1234-7890-abcd-1234567890ab",
|
|
688
|
+
"rating": "bad",
|
|
689
|
+
"missingContent": [
|
|
690
|
+
{ "topic": "Recent benchmarks", "description": "All results were >12 months old." },
|
|
691
|
+
{ "topic": "Comparison vs Algolia" }
|
|
692
|
+
]
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
\`\`\`
|
|
696
|
+
|
|
697
|
+
**Returns:** \`{ success, feedbackId, creditsRefunded, creditsRefundedToday, dailyRefundCap, dailyCapReached?, alreadySubmitted?, warning? }\` JSON.
|
|
698
|
+
`,
|
|
699
|
+
parameters: z.object({
|
|
700
|
+
searchId: z
|
|
701
|
+
.string()
|
|
702
|
+
.uuid('searchId must be the UUID returned by firecrawl_search'),
|
|
703
|
+
rating: z.enum(['good', 'bad', 'partial']),
|
|
704
|
+
valuableSources: z
|
|
705
|
+
.array(z.object({
|
|
706
|
+
url: z.string().url(),
|
|
707
|
+
reason: z.string().max(1000).optional(),
|
|
708
|
+
}))
|
|
709
|
+
.max(50)
|
|
710
|
+
.optional(),
|
|
711
|
+
missingContent: z
|
|
712
|
+
.array(z.object({
|
|
713
|
+
topic: z
|
|
714
|
+
.string()
|
|
715
|
+
.min(1, 'topic must not be empty')
|
|
716
|
+
.max(200, 'topic must be 200 characters or fewer'),
|
|
717
|
+
description: z.string().max(2000).optional(),
|
|
718
|
+
}))
|
|
719
|
+
.max(20)
|
|
720
|
+
.optional()
|
|
721
|
+
.describe('Array of specific pieces of content the agent expected to find but did not. ' +
|
|
722
|
+
'One entry per distinct topic. Each entry has a short `topic` and optional ' +
|
|
723
|
+
'longer `description`.'),
|
|
724
|
+
querySuggestions: z.string().max(2000).optional(),
|
|
725
|
+
}),
|
|
726
|
+
execute: async (args, { session, log }) => {
|
|
727
|
+
const { searchId, rating, valuableSources, missingContent, querySuggestions, } = args;
|
|
728
|
+
const apiBase = resolveApiBaseUrl();
|
|
729
|
+
const endpoint = `${apiBase}/v2/search/${encodeURIComponent(searchId)}/feedback`;
|
|
730
|
+
const body = {
|
|
731
|
+
rating,
|
|
732
|
+
origin: ORIGIN,
|
|
733
|
+
};
|
|
734
|
+
if (valuableSources && valuableSources.length > 0) {
|
|
735
|
+
body.valuableSources = valuableSources;
|
|
736
|
+
}
|
|
737
|
+
if (missingContent && missingContent.length > 0) {
|
|
738
|
+
body.missingContent = missingContent;
|
|
739
|
+
}
|
|
740
|
+
if (querySuggestions)
|
|
741
|
+
body.querySuggestions = querySuggestions;
|
|
742
|
+
const headers = {
|
|
743
|
+
'Content-Type': 'application/json',
|
|
744
|
+
};
|
|
745
|
+
const apiKey = session?.firecrawlApiKey;
|
|
746
|
+
if (apiKey) {
|
|
747
|
+
headers['Authorization'] = `Bearer ${apiKey}`;
|
|
748
|
+
}
|
|
749
|
+
else if (process.env.CLOUD_SERVICE === 'true') {
|
|
750
|
+
throw new Error('Unauthorized: missing API key for search feedback.');
|
|
751
|
+
}
|
|
752
|
+
log.info('Submitting search feedback', { searchId, rating });
|
|
753
|
+
const response = await fetch(endpoint, {
|
|
754
|
+
method: 'POST',
|
|
755
|
+
headers,
|
|
756
|
+
body: JSON.stringify(body),
|
|
757
|
+
});
|
|
758
|
+
const responseText = await response.text();
|
|
759
|
+
let parsed;
|
|
760
|
+
try {
|
|
761
|
+
parsed = JSON.parse(responseText);
|
|
762
|
+
}
|
|
763
|
+
catch {
|
|
764
|
+
parsed = { raw: responseText };
|
|
765
|
+
}
|
|
766
|
+
// 4xx is terminal; surface a structured payload (with retryable=false)
|
|
767
|
+
// so agents do not retry-loop on substantive-feedback rejections,
|
|
768
|
+
// expired windows, etc.
|
|
769
|
+
if (!response.ok) {
|
|
770
|
+
log.warn('Search feedback rejected', {
|
|
771
|
+
status: response.status,
|
|
772
|
+
feedbackErrorCode: parsed?.feedbackErrorCode,
|
|
773
|
+
});
|
|
774
|
+
return asText({
|
|
775
|
+
success: false,
|
|
776
|
+
status: response.status,
|
|
777
|
+
feedbackErrorCode: parsed?.feedbackErrorCode,
|
|
778
|
+
error: parsed?.error ?? `HTTP ${response.status}`,
|
|
779
|
+
retryable: response.status >= 500,
|
|
780
|
+
});
|
|
781
|
+
}
|
|
782
|
+
return asText(parsed);
|
|
783
|
+
},
|
|
784
|
+
});
|
|
785
|
+
}
|
|
609
786
|
server.addTool({
|
|
610
787
|
name: 'firecrawl_crawl',
|
|
611
788
|
annotations: {
|
|
@@ -1259,6 +1436,7 @@ if (process.env.CLOUD_SERVICE !== 'true') {
|
|
|
1259
1436
|
queryOptions: z
|
|
1260
1437
|
.object({
|
|
1261
1438
|
prompt: z.string().max(10000),
|
|
1439
|
+
mode: z.enum(['directQuote', 'freeform']).default('freeform'),
|
|
1262
1440
|
})
|
|
1263
1441
|
.optional(),
|
|
1264
1442
|
parsers: z.array(z.enum(['pdf'])).optional(),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.16.0",
|
|
4
4
|
"description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"mcpName": "io.github.firecrawl/firecrawl-mcp-server",
|