firecrawl-mcp 3.15.0 → 3.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -1
- package/dist/index.js +178 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -565,12 +565,51 @@ Search the web and optionally extract content from search results.
|
|
|
565
565
|
|
|
566
566
|
**Returns:**
|
|
567
567
|
|
|
568
|
-
- Array of search results (with optional scraped content)
|
|
568
|
+
- Array of search results (with optional scraped content), plus an `id` field. Pass that `id` to `firecrawl_search_feedback` after you've used the results to refund 1 credit (search costs 2) and improve search quality.
|
|
569
569
|
|
|
570
570
|
**Prompt Example:**
|
|
571
571
|
|
|
572
572
|
> "Find the latest research papers on AI published in 2023."
|
|
573
573
|
|
|
574
|
+
### 5b. Search Feedback Tool (`firecrawl_search_feedback`)
|
|
575
|
+
|
|
576
|
+
Sends structured feedback on a previous `firecrawl_search` result. The first feedback per search id refunds 1 credit and improves Firecrawl's search quality. Idempotent per search id.
|
|
577
|
+
|
|
578
|
+
**Call this after every search you actually use** (or that didn't help). Bad/partial feedback with `missingContent` is just as valuable as good feedback.
|
|
579
|
+
|
|
580
|
+
**Opt out:** set `FIRECRAWL_NO_SEARCH_FEEDBACK=1` (or `FIRECRAWL_DISABLE_SEARCH_FEEDBACK=1`) in the environment when starting the MCP server. The `firecrawl_search_feedback` tool will not be registered, so agents can't call it. Team admins can also disable feedback server-side; in that case the tool is registered but always returns `feedbackErrorCode: "TEAM_OPTED_OUT"`.
|
|
581
|
+
|
|
582
|
+
**Most important field:** `missingContent`. It's an array of specific pieces of content the agent expected to find but did not. One entry per missing topic — these aggregate across teams and tell us what to index next.
|
|
583
|
+
|
|
584
|
+
**Daily refund cap (per team, per UTC day, default 100 credits).** Once a team's `creditsRefundedToday` reaches `dailyRefundCap`, further submissions still record feedback but no longer refund credits. The response sets `dailyCapReached: true`. Agents should stop calling this tool for the rest of the UTC day when they see that flag.
|
|
585
|
+
|
|
586
|
+
**Usage Example:**
|
|
587
|
+
|
|
588
|
+
```json
|
|
589
|
+
{
|
|
590
|
+
"name": "firecrawl_search_feedback",
|
|
591
|
+
"arguments": {
|
|
592
|
+
"searchId": "0193f6c5-1234-7890-abcd-1234567890ab",
|
|
593
|
+
"rating": "good",
|
|
594
|
+
"valuableSources": [
|
|
595
|
+
{
|
|
596
|
+
"url": "https://docs.firecrawl.dev/features/search",
|
|
597
|
+
"reason": "Most up-to-date description of /search."
|
|
598
|
+
}
|
|
599
|
+
],
|
|
600
|
+
"missingContent": [
|
|
601
|
+
{ "topic": "Pricing for the search endpoint", "description": "No pricing tier table for /search specifically." },
|
|
602
|
+
{ "topic": "Per-team rate limits" }
|
|
603
|
+
],
|
|
604
|
+
"querySuggestions": "Boost docs.firecrawl.dev for queries that mention 'firecrawl'"
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
```
|
|
608
|
+
|
|
609
|
+
**Returns:**
|
|
610
|
+
|
|
611
|
+
- `{ success, feedbackId, creditsRefunded, alreadySubmitted? }` JSON.
|
|
612
|
+
|
|
574
613
|
### 6. Crawl Tool (`firecrawl_crawl`)
|
|
575
614
|
|
|
576
615
|
Starts an asynchronous crawl job on a website and extract content from all pages.
|
package/dist/index.js
CHANGED
|
@@ -530,6 +530,7 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
530
530
|
**Domain filters:** Use includeDomains to restrict results to specific domains, or excludeDomains to remove domains. Do not use both in the same request. Domains must be hostnames only, without protocol or path.
|
|
531
531
|
**Scrape Options:** Only use scrapeOptions when you think it is absolutely necessary. When you do so default to a lower limit to avoid timeouts, 5 or lower.
|
|
532
532
|
**Optimal Workflow:** Search first using firecrawl_search without formats, then after fetching the results, use the scrape tool to get the content of the relevantpage(s) that you want to scrape
|
|
533
|
+
**After the search:** Once you have processed the results (or decided they were not useful), call \`firecrawl_search_feedback\` with the \`id\` from this response. The first feedback per search refunds 1 credit and helps Firecrawl improve search quality.
|
|
533
534
|
|
|
534
535
|
**Usage Example without formats (Preferred):**
|
|
535
536
|
\`\`\`json
|
|
@@ -566,7 +567,7 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
566
567
|
}
|
|
567
568
|
}
|
|
568
569
|
\`\`\`
|
|
569
|
-
**Returns:**
|
|
570
|
+
**Returns:** A JSON envelope of the form \`{ success, data: { web?, images?, news? }, id, creditsUsed }\`. Each result array contains the search results (with optional scraped content). Pass the top-level \`id\` to \`firecrawl_search_feedback\` after you've used the results.
|
|
570
571
|
`,
|
|
571
572
|
parameters: z
|
|
572
573
|
.object({
|
|
@@ -601,13 +602,187 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
601
602
|
const cleaned = removeEmptyTopLevel(searchOpts);
|
|
602
603
|
const searchQuery = buildSearchQueryWithDomains(query, includeDomains, excludeDomains);
|
|
603
604
|
log.info('Searching', { query: searchQuery });
|
|
604
|
-
|
|
605
|
+
// Call /v2/search through the SDK's HTTP layer (auth + retries) instead
|
|
606
|
+
// of `client.search()` so we preserve the full response envelope. The
|
|
607
|
+
// high-level `search()` helper strips `id` and `creditsUsed`, which
|
|
608
|
+
// breaks the `firecrawl_search_feedback` workflow that this server
|
|
609
|
+
// explicitly tells the LLM to use after every search.
|
|
610
|
+
const httpRes = await client.http.post('/v2/search', {
|
|
611
|
+
query: searchQuery,
|
|
605
612
|
...cleaned,
|
|
606
613
|
origin: ORIGIN,
|
|
607
614
|
});
|
|
608
|
-
return asText(
|
|
615
|
+
return asText(httpRes?.data ?? {});
|
|
609
616
|
},
|
|
610
617
|
});
|
|
618
|
+
const DEFAULT_CLOUD_API_URL = 'https://api.firecrawl.dev';
|
|
619
|
+
function resolveApiBaseUrl() {
|
|
620
|
+
return (process.env.FIRECRAWL_API_URL || DEFAULT_CLOUD_API_URL).replace(/\/$/, '');
|
|
621
|
+
}
|
|
622
|
+
const SEARCH_FEEDBACK_DISABLED = ['1', 'true', 'yes', 'on'].includes((process.env.FIRECRAWL_NO_SEARCH_FEEDBACK ||
|
|
623
|
+
process.env.FIRECRAWL_DISABLE_SEARCH_FEEDBACK ||
|
|
624
|
+
'')
|
|
625
|
+
.trim()
|
|
626
|
+
.toLowerCase());
|
|
627
|
+
if (SEARCH_FEEDBACK_DISABLED) {
|
|
628
|
+
console.error('[firecrawl-mcp] Search feedback tool disabled by FIRECRAWL_NO_SEARCH_FEEDBACK; firecrawl_search_feedback will not be registered.');
|
|
629
|
+
}
|
|
630
|
+
if (!SEARCH_FEEDBACK_DISABLED) {
|
|
631
|
+
server.addTool({
|
|
632
|
+
name: 'firecrawl_search_feedback',
|
|
633
|
+
annotations: {
|
|
634
|
+
title: 'Send feedback on a search result',
|
|
635
|
+
readOnlyHint: false,
|
|
636
|
+
openWorldHint: true,
|
|
637
|
+
},
|
|
638
|
+
description: `
|
|
639
|
+
Send structured feedback on a previous \`firecrawl_search\` result. **Call this immediately after a search where you used the results** so we can improve search quality and refund 1 credit (search costs 2).
|
|
640
|
+
|
|
641
|
+
Pass the \`searchId\` returned by \`firecrawl_search\` (the \`id\` field on the response) and tell us:
|
|
642
|
+
|
|
643
|
+
- **rating** — overall result quality: \`good\`, \`partial\`, or \`bad\`.
|
|
644
|
+
- **valuableSources** — which result URLs were actually useful, and a short reason why.
|
|
645
|
+
- **missingContent** — **the most important field.** An ARRAY of specific pieces of content you expected to find but didn't. One entry per missing piece, each with a short \`topic\` and an optional longer \`description\`. Examples: \`{"topic":"enterprise pricing","description":"no pricing tier table for the Enterprise plan was returned"}\`, \`{"topic":"API rate limits"}\`, \`{"topic":"comparison vs competitors"}\`. **Be specific** — these aggregate across teams and tell us what to index next. Do not pack multiple topics into one entry.
|
|
646
|
+
- **querySuggestions** — how the query or response shape could be improved (e.g. "would have liked official docs first", "should boost github.com").
|
|
647
|
+
|
|
648
|
+
**Substantive-feedback requirement** (zero-effort feedback is rejected with HTTP 400):
|
|
649
|
+
- \`good\` — must include at least one \`valuableSources\` entry
|
|
650
|
+
- \`partial\` — must include \`valuableSources\` or at least one \`missingContent\` entry
|
|
651
|
+
- \`bad\` — must include at least one \`missingContent\` entry or \`querySuggestions\`
|
|
652
|
+
|
|
653
|
+
**Time window:** Feedback must be submitted within ~2 minutes of the search. Beyond that, the call returns HTTP 409 with \`feedbackErrorCode: "FEEDBACK_WINDOW_EXPIRED"\` — do not retry, just move on. Same goes for any 4xx response: do not retry-loop.
|
|
654
|
+
|
|
655
|
+
**Behaviors:**
|
|
656
|
+
- Idempotent per \`searchId\`. Re-submitting for the same id returns \`alreadySubmitted: true\` with \`creditsRefunded: 0\`.
|
|
657
|
+
- Refund only applies to billable searches; preview teams are blocked.
|
|
658
|
+
- Failed searches cannot receive feedback (the search itself already returned an error you can act on).
|
|
659
|
+
- **Daily refund cap (per team, per UTC day, default 100 credits).** Once a team's \`creditsRefundedToday\` reaches \`dailyRefundCap\`, the response returns \`dailyCapReached: true\` with \`creditsRefunded: 0\`. The feedback is still recorded for search-quality improvement — only the credit refund is gated. **Stop calling this tool for the rest of the UTC day** when you see \`dailyCapReached: true\`.
|
|
660
|
+
|
|
661
|
+
**When to call:** Right after processing a search result. If the result didn't help, send rating \`bad\` with a clear \`missingContent\` — that is just as valuable as a \`good\` rating.
|
|
662
|
+
|
|
663
|
+
**Usage Example (good rating with valuable sources + missing content):**
|
|
664
|
+
\`\`\`json
|
|
665
|
+
{
|
|
666
|
+
"name": "firecrawl_search_feedback",
|
|
667
|
+
"arguments": {
|
|
668
|
+
"searchId": "0193f6c5-1234-7890-abcd-1234567890ab",
|
|
669
|
+
"rating": "good",
|
|
670
|
+
"valuableSources": [
|
|
671
|
+
{ "url": "https://docs.firecrawl.dev/features/search", "reason": "Most up-to-date description of /search." }
|
|
672
|
+
],
|
|
673
|
+
"missingContent": [
|
|
674
|
+
{ "topic": "Pricing for the search endpoint", "description": "No pricing tier table for /search specifically." },
|
|
675
|
+
{ "topic": "Rate limits", "description": "Per-team RPS for /search not documented." }
|
|
676
|
+
],
|
|
677
|
+
"querySuggestions": "Boost docs.firecrawl.dev for queries that mention 'firecrawl'"
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
\`\`\`
|
|
681
|
+
|
|
682
|
+
**Usage Example (bad rating, what was missing):**
|
|
683
|
+
\`\`\`json
|
|
684
|
+
{
|
|
685
|
+
"name": "firecrawl_search_feedback",
|
|
686
|
+
"arguments": {
|
|
687
|
+
"searchId": "0193f6c5-1234-7890-abcd-1234567890ab",
|
|
688
|
+
"rating": "bad",
|
|
689
|
+
"missingContent": [
|
|
690
|
+
{ "topic": "Recent benchmarks", "description": "All results were >12 months old." },
|
|
691
|
+
{ "topic": "Comparison vs Algolia" }
|
|
692
|
+
]
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
\`\`\`
|
|
696
|
+
|
|
697
|
+
**Returns:** \`{ success, feedbackId, creditsRefunded, creditsRefundedToday, dailyRefundCap, dailyCapReached?, alreadySubmitted?, warning? }\` JSON.
|
|
698
|
+
`,
|
|
699
|
+
parameters: z.object({
|
|
700
|
+
searchId: z
|
|
701
|
+
.string()
|
|
702
|
+
.uuid('searchId must be the UUID returned by firecrawl_search'),
|
|
703
|
+
rating: z.enum(['good', 'bad', 'partial']),
|
|
704
|
+
valuableSources: z
|
|
705
|
+
.array(z.object({
|
|
706
|
+
url: z.string().url(),
|
|
707
|
+
reason: z.string().max(1000).optional(),
|
|
708
|
+
}))
|
|
709
|
+
.max(50)
|
|
710
|
+
.optional(),
|
|
711
|
+
missingContent: z
|
|
712
|
+
.array(z.object({
|
|
713
|
+
topic: z
|
|
714
|
+
.string()
|
|
715
|
+
.min(1, 'topic must not be empty')
|
|
716
|
+
.max(200, 'topic must be 200 characters or fewer'),
|
|
717
|
+
description: z.string().max(2000).optional(),
|
|
718
|
+
}))
|
|
719
|
+
.max(20)
|
|
720
|
+
.optional()
|
|
721
|
+
.describe('Array of specific pieces of content the agent expected to find but did not. ' +
|
|
722
|
+
'One entry per distinct topic. Each entry has a short `topic` and optional ' +
|
|
723
|
+
'longer `description`.'),
|
|
724
|
+
querySuggestions: z.string().max(2000).optional(),
|
|
725
|
+
}),
|
|
726
|
+
execute: async (args, { session, log }) => {
|
|
727
|
+
const { searchId, rating, valuableSources, missingContent, querySuggestions, } = args;
|
|
728
|
+
const apiBase = resolveApiBaseUrl();
|
|
729
|
+
const endpoint = `${apiBase}/v2/search/${encodeURIComponent(searchId)}/feedback`;
|
|
730
|
+
const body = {
|
|
731
|
+
rating,
|
|
732
|
+
origin: ORIGIN,
|
|
733
|
+
};
|
|
734
|
+
if (valuableSources && valuableSources.length > 0) {
|
|
735
|
+
body.valuableSources = valuableSources;
|
|
736
|
+
}
|
|
737
|
+
if (missingContent && missingContent.length > 0) {
|
|
738
|
+
body.missingContent = missingContent;
|
|
739
|
+
}
|
|
740
|
+
if (querySuggestions)
|
|
741
|
+
body.querySuggestions = querySuggestions;
|
|
742
|
+
const headers = {
|
|
743
|
+
'Content-Type': 'application/json',
|
|
744
|
+
};
|
|
745
|
+
const apiKey = session?.firecrawlApiKey;
|
|
746
|
+
if (apiKey) {
|
|
747
|
+
headers['Authorization'] = `Bearer ${apiKey}`;
|
|
748
|
+
}
|
|
749
|
+
else if (process.env.CLOUD_SERVICE === 'true') {
|
|
750
|
+
throw new Error('Unauthorized: missing API key for search feedback.');
|
|
751
|
+
}
|
|
752
|
+
log.info('Submitting search feedback', { searchId, rating });
|
|
753
|
+
const response = await fetch(endpoint, {
|
|
754
|
+
method: 'POST',
|
|
755
|
+
headers,
|
|
756
|
+
body: JSON.stringify(body),
|
|
757
|
+
});
|
|
758
|
+
const responseText = await response.text();
|
|
759
|
+
let parsed;
|
|
760
|
+
try {
|
|
761
|
+
parsed = JSON.parse(responseText);
|
|
762
|
+
}
|
|
763
|
+
catch {
|
|
764
|
+
parsed = { raw: responseText };
|
|
765
|
+
}
|
|
766
|
+
// 4xx is terminal; surface a structured payload (with retryable=false)
|
|
767
|
+
// so agents do not retry-loop on substantive-feedback rejections,
|
|
768
|
+
// expired windows, etc.
|
|
769
|
+
if (!response.ok) {
|
|
770
|
+
log.warn('Search feedback rejected', {
|
|
771
|
+
status: response.status,
|
|
772
|
+
feedbackErrorCode: parsed?.feedbackErrorCode,
|
|
773
|
+
});
|
|
774
|
+
return asText({
|
|
775
|
+
success: false,
|
|
776
|
+
status: response.status,
|
|
777
|
+
feedbackErrorCode: parsed?.feedbackErrorCode,
|
|
778
|
+
error: parsed?.error ?? `HTTP ${response.status}`,
|
|
779
|
+
retryable: response.status >= 500,
|
|
780
|
+
});
|
|
781
|
+
}
|
|
782
|
+
return asText(parsed);
|
|
783
|
+
},
|
|
784
|
+
});
|
|
785
|
+
}
|
|
611
786
|
server.addTool({
|
|
612
787
|
name: 'firecrawl_crawl',
|
|
613
788
|
annotations: {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.16.0",
|
|
4
4
|
"description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"mcpName": "io.github.firecrawl/firecrawl-mcp-server",
|