firecrawl-mcp 3.20.4 → 3.20.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -0
- package/dist/index.js +230 -13
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -623,6 +623,43 @@ Sends structured feedback on a previous `firecrawl_search` result. The first fee
|
|
|
623
623
|
|
|
624
624
|
- `{ success, feedbackId, creditsRefunded, alreadySubmitted? }` JSON.
|
|
625
625
|
|
|
626
|
+
### 5c. Generic Feedback Tool (`firecrawl_feedback`)
|
|
627
|
+
|
|
628
|
+
Sends structured feedback for a completed v2 endpoint job through `/v2/feedback`.
|
|
629
|
+
Use this for endpoint-level feedback on `scrape`, `parse`, `map`, or `search`
|
|
630
|
+
jobs. For search-result quality specifically, prefer
|
|
631
|
+
`firecrawl_search_feedback` because it includes search-specific guidance.
|
|
632
|
+
|
|
633
|
+
Keep feedback concise: use issue codes, tags, short notes, URLs, page numbers,
|
|
634
|
+
and small metadata objects. Do not include raw scrape/parse outputs.
|
|
635
|
+
|
|
636
|
+
**Opt out:** set `FIRECRAWL_NO_ENDPOINT_FEEDBACK=1` (or `FIRECRAWL_DISABLE_ENDPOINT_FEEDBACK=1`) in the environment when starting the MCP server. The `firecrawl_feedback` tool will not be registered, so agents cannot call it.
|
|
637
|
+
|
|
638
|
+
**Usage Example:**
|
|
639
|
+
|
|
640
|
+
```json
|
|
641
|
+
{
|
|
642
|
+
"name": "firecrawl_feedback",
|
|
643
|
+
"arguments": {
|
|
644
|
+
"endpoint": "scrape",
|
|
645
|
+
"jobId": "0193f6c5-1234-7890-abcd-1234567890ab",
|
|
646
|
+
"rating": "partial",
|
|
647
|
+
"issues": ["missing_markdown"],
|
|
648
|
+
"tags": ["docs"],
|
|
649
|
+
"note": "The pricing table was missing from the markdown output.",
|
|
650
|
+
"url": "https://example.com/pricing",
|
|
651
|
+
"pageNumbers": [1],
|
|
652
|
+
"metadata": {
|
|
653
|
+
"format": "markdown"
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
```
|
|
658
|
+
|
|
659
|
+
**Returns:**
|
|
660
|
+
|
|
661
|
+
- `{ success, feedbackId, creditsRefunded, creditsRefundedToday?, dailyRefundCap?, dailyCapReached?, alreadySubmitted?, warning? }` JSON.
|
|
662
|
+
|
|
626
663
|
### 6. Crawl Tool (`firecrawl_crawl`)
|
|
627
664
|
|
|
628
665
|
Starts an asynchronous crawl job on a website and extract content from all pages.
|
package/dist/index.js
CHANGED
|
@@ -223,6 +223,19 @@ const server = new FastMCP({
|
|
|
223
223
|
const envCred = resolveCredentialFromEnv();
|
|
224
224
|
if (process.env.CLOUD_SERVICE === 'true') {
|
|
225
225
|
if (!headerCred) {
|
|
226
|
+
// Keyless free tier over the hosted MCP: serve it only when a forwarding
|
|
227
|
+
// secret is configured, we know the end-user's client IP (so the API can
|
|
228
|
+
// rate-limit per real IP, not the shared server IP), AND that IP still
|
|
229
|
+
// has free quota. If the IP is out of quota (or keyless is off), fall
|
|
230
|
+
// through to throw so FastMCP emits the OAuth 401 + WWW-Authenticate
|
|
231
|
+
// challenge — i.e. prompt the user to connect an account exactly when
|
|
232
|
+
// their free quota runs out.
|
|
233
|
+
const clientIp = extractClientIp(request);
|
|
234
|
+
if (process.env.KEYLESS_PROXY_SECRET &&
|
|
235
|
+
clientIp &&
|
|
236
|
+
(await keylessEligible(clientIp))) {
|
|
237
|
+
return { firecrawlApiKey: undefined, research, keylessClientIp: clientIp };
|
|
238
|
+
}
|
|
226
239
|
throw new Error('Firecrawl credentials required: OAuth access token (Authorization: Bearer fco_...) or API key (x-firecrawl-api-key)');
|
|
227
240
|
}
|
|
228
241
|
return { firecrawlApiKey: headerCred, research };
|
|
@@ -233,8 +246,12 @@ const server = new FastMCP({
|
|
|
233
246
|
if (!httpStreaming &&
|
|
234
247
|
!process.env.FIRECRAWL_API_KEY &&
|
|
235
248
|
!process.env.FIRECRAWL_API_URL) {
|
|
236
|
-
|
|
237
|
-
|
|
249
|
+
// No credential and no self-hosted URL: run in keyless mode. scrape and
|
|
250
|
+
// search work for free (rate-limited per IP) against the Firecrawl cloud;
|
|
251
|
+
// every other tool needs an API key and will return Unauthorized.
|
|
252
|
+
console.error('No FIRECRAWL_API_KEY or FIRECRAWL_API_URL set — running in keyless mode. ' +
|
|
253
|
+
'firecrawl_scrape and firecrawl_search are free (rate-limited per IP) against the Firecrawl cloud; ' +
|
|
254
|
+
'other tools require an API key (get one free at https://firecrawl.dev).');
|
|
238
255
|
}
|
|
239
256
|
if (httpStreaming && !credential && !process.env.FIRECRAWL_API_URL) {
|
|
240
257
|
console.error('HTTP MCP transport requires FIRECRAWL_API_URL and/or credentials (OAuth: Authorization Bearer fco_..., or FIRECRAWL_API_KEY / FIRECRAWL_OAUTH_TOKEN)');
|
|
@@ -559,7 +576,6 @@ ${SAFE_MODE
|
|
|
559
576
|
parameters: scrapeParamsSchema,
|
|
560
577
|
execute: async (args, { session, log }) => {
|
|
561
578
|
const { url, ...options } = args;
|
|
562
|
-
const client = getClient(session);
|
|
563
579
|
const transformed = transformScrapeParams(options);
|
|
564
580
|
const cleaned = removeEmptyTopLevel(transformed);
|
|
565
581
|
if (cleaned.lockdown) {
|
|
@@ -568,6 +584,15 @@ ${SAFE_MODE
|
|
|
568
584
|
else {
|
|
569
585
|
log.info('Scraping URL', { url: String(url) });
|
|
570
586
|
}
|
|
587
|
+
if (isKeylessMode(session)) {
|
|
588
|
+
const json = await keylessPost('/v2/scrape', {
|
|
589
|
+
url: String(url),
|
|
590
|
+
...cleaned,
|
|
591
|
+
origin: ORIGIN,
|
|
592
|
+
}, session);
|
|
593
|
+
return asText(json?.data ?? json);
|
|
594
|
+
}
|
|
595
|
+
const client = getClient(session);
|
|
571
596
|
const res = await client.scrape(String(url), {
|
|
572
597
|
...cleaned,
|
|
573
598
|
origin: ORIGIN,
|
|
@@ -724,7 +749,6 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
724
749
|
})
|
|
725
750
|
.refine((args) => !(args.includeDomains?.length && args.excludeDomains?.length), 'includeDomains and excludeDomains cannot both be specified'),
|
|
726
751
|
execute: async (args, { session, log }) => {
|
|
727
|
-
const client = getClient(session);
|
|
728
752
|
const { query, ...opts } = args;
|
|
729
753
|
const searchOpts = { ...opts };
|
|
730
754
|
const includeDomains = searchOpts.includeDomains;
|
|
@@ -737,16 +761,22 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
737
761
|
const cleaned = removeEmptyTopLevel(searchOpts);
|
|
738
762
|
const searchQuery = buildSearchQueryWithDomains(query, includeDomains, excludeDomains);
|
|
739
763
|
log.info('Searching', { query: searchQuery });
|
|
764
|
+
const searchBody = {
|
|
765
|
+
query: searchQuery,
|
|
766
|
+
...cleaned,
|
|
767
|
+
origin: ORIGIN,
|
|
768
|
+
};
|
|
769
|
+
if (isKeylessMode(session)) {
|
|
770
|
+
const json = await keylessPost('/v2/search', searchBody, session);
|
|
771
|
+
return asText(json ?? {});
|
|
772
|
+
}
|
|
740
773
|
// Call /v2/search through the SDK's HTTP layer (auth + retries) instead
|
|
741
774
|
// of `client.search()` so we preserve the full response envelope. The
|
|
742
775
|
// high-level `search()` helper strips `id` and `creditsUsed`, which
|
|
743
776
|
// breaks the `firecrawl_search_feedback` workflow that this server
|
|
744
777
|
// explicitly tells the LLM to use after every search.
|
|
745
|
-
const
|
|
746
|
-
|
|
747
|
-
...cleaned,
|
|
748
|
-
origin: ORIGIN,
|
|
749
|
-
});
|
|
778
|
+
const client = getClient(session);
|
|
779
|
+
const httpRes = await client.http.post('/v2/search', searchBody);
|
|
750
780
|
return asText(httpRes?.data ?? {});
|
|
751
781
|
},
|
|
752
782
|
});
|
|
@@ -754,11 +784,97 @@ const DEFAULT_CLOUD_API_URL = 'https://api.firecrawl.dev';
|
|
|
754
784
|
function resolveApiBaseUrl() {
|
|
755
785
|
return (process.env.FIRECRAWL_API_URL || DEFAULT_CLOUD_API_URL).replace(/\/$/, '');
|
|
756
786
|
}
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
787
|
+
// Keyless free tier: when no credential is configured and we're targeting the
|
|
788
|
+
// Firecrawl cloud (not self-hosted via FIRECRAWL_API_URL, not the multi-tenant
|
|
789
|
+
// CLOUD_SERVICE deployment), scrape and search are free, rate-limited per IP.
|
|
790
|
+
// The cloud only grants this when NO Authorization header is sent, so we bypass
|
|
791
|
+
// the SDK — which always attaches a Bearer header — and post directly.
|
|
792
|
+
/** Best-effort end-user client IP from the incoming MCP request headers. */
|
|
793
|
+
function extractClientIp(request) {
|
|
794
|
+
const xff = request?.headers?.['x-forwarded-for'];
|
|
795
|
+
const raw = Array.isArray(xff) ? xff[0] : xff;
|
|
796
|
+
const first = typeof raw === 'string' ? raw.split(',')[0].trim() : undefined;
|
|
797
|
+
return first || undefined;
|
|
798
|
+
}
|
|
799
|
+
/**
|
|
800
|
+
* Read-only check (no quota consumed) of whether a client IP can still use the
|
|
801
|
+
* keyless free tier, via the API's secret-gated eligibility endpoint. Fails
|
|
802
|
+
* closed: anything other than a clear "eligible: true" means fall through to the
|
|
803
|
+
* OAuth challenge rather than silently granting keyless.
|
|
804
|
+
*/
|
|
805
|
+
async function keylessEligible(clientIp) {
|
|
806
|
+
const secret = process.env.KEYLESS_PROXY_SECRET;
|
|
807
|
+
if (!secret)
|
|
808
|
+
return false;
|
|
809
|
+
try {
|
|
810
|
+
const response = await fetch(`${resolveApiBaseUrl()}/v2/keyless/eligibility`, {
|
|
811
|
+
headers: {
|
|
812
|
+
'x-firecrawl-keyless-ip': clientIp,
|
|
813
|
+
'x-firecrawl-keyless-secret': secret,
|
|
814
|
+
},
|
|
815
|
+
});
|
|
816
|
+
if (!response.ok)
|
|
817
|
+
return false;
|
|
818
|
+
const json = await response.json().catch(() => ({}));
|
|
819
|
+
return json?.eligible === true;
|
|
820
|
+
}
|
|
821
|
+
catch {
|
|
822
|
+
return false;
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
function isKeylessMode(session) {
|
|
826
|
+
if (session?.firecrawlApiKey)
|
|
827
|
+
return false;
|
|
828
|
+
if (process.env.CLOUD_SERVICE === 'true') {
|
|
829
|
+
// Hosted: keyless only for secret-gated sessions carrying the forwarded
|
|
830
|
+
// client IP (so the per-IP cap is meaningful, not the shared server IP).
|
|
831
|
+
return !!session?.keylessClientIp;
|
|
832
|
+
}
|
|
833
|
+
// Local/stdio against the cloud (not a self-hosted FIRECRAWL_API_URL).
|
|
834
|
+
return !process.env.FIRECRAWL_API_URL;
|
|
835
|
+
}
|
|
836
|
+
async function keylessPost(path, body, session) {
|
|
837
|
+
const headers = { 'Content-Type': 'application/json' };
|
|
838
|
+
// Forward the real client IP (secret-authenticated) when proxying keyless
|
|
839
|
+
// requests through the hosted MCP, so the API rate-limits per real IP.
|
|
840
|
+
if (session?.keylessClientIp && process.env.KEYLESS_PROXY_SECRET) {
|
|
841
|
+
headers['x-firecrawl-keyless-ip'] = session.keylessClientIp;
|
|
842
|
+
headers['x-firecrawl-keyless-secret'] = process.env.KEYLESS_PROXY_SECRET;
|
|
843
|
+
}
|
|
844
|
+
const response = await fetch(`${resolveApiBaseUrl()}${path}`, {
|
|
845
|
+
method: 'POST',
|
|
846
|
+
headers,
|
|
847
|
+
body: JSON.stringify(body),
|
|
848
|
+
});
|
|
849
|
+
const json = await response.json().catch(() => ({}));
|
|
850
|
+
if (!response.ok) {
|
|
851
|
+
throw new Error(json?.error || `Firecrawl request failed (HTTP ${response.status})`);
|
|
852
|
+
}
|
|
853
|
+
return json;
|
|
854
|
+
}
|
|
855
|
+
const feedbackIssueSchema = z
|
|
856
|
+
.string()
|
|
760
857
|
.trim()
|
|
761
|
-
.
|
|
858
|
+
.min(1)
|
|
859
|
+
.max(80)
|
|
860
|
+
.regex(/^[a-z0-9][a-z0-9_-]*$/, 'Issue codes must use lowercase letters, numbers, underscores, or hyphens');
|
|
861
|
+
const valuableSourceSchema = z.object({
|
|
862
|
+
url: z.string().url(),
|
|
863
|
+
reason: z.string().max(1000).optional(),
|
|
864
|
+
});
|
|
865
|
+
const missingContentSchema = z.object({
|
|
866
|
+
topic: z
|
|
867
|
+
.string()
|
|
868
|
+
.min(1, 'topic must not be empty')
|
|
869
|
+
.max(200, 'topic must be 200 characters or fewer'),
|
|
870
|
+
description: z.string().max(2000).optional(),
|
|
871
|
+
});
|
|
872
|
+
const FEEDBACK_DISABLED_VALUES = new Set(['1', 'true', 'yes', 'on']);
|
|
873
|
+
function feedbackEnvEnabled(...keys) {
|
|
874
|
+
return keys.some((key) => FEEDBACK_DISABLED_VALUES.has((process.env[key] || '').trim().toLowerCase()));
|
|
875
|
+
}
|
|
876
|
+
const SEARCH_FEEDBACK_DISABLED = feedbackEnvEnabled('FIRECRAWL_NO_SEARCH_FEEDBACK', 'FIRECRAWL_DISABLE_SEARCH_FEEDBACK');
|
|
877
|
+
const ENDPOINT_FEEDBACK_DISABLED = feedbackEnvEnabled('FIRECRAWL_NO_ENDPOINT_FEEDBACK', 'FIRECRAWL_DISABLE_ENDPOINT_FEEDBACK');
|
|
762
878
|
if (SEARCH_FEEDBACK_DISABLED) {
|
|
763
879
|
console.error('[firecrawl-mcp] Search feedback tool disabled by FIRECRAWL_NO_SEARCH_FEEDBACK; firecrawl_search_feedback will not be registered.');
|
|
764
880
|
}
|
|
@@ -918,6 +1034,107 @@ Pass the \`searchId\` returned by \`firecrawl_search\` (the \`id\` field on the
|
|
|
918
1034
|
},
|
|
919
1035
|
});
|
|
920
1036
|
}
|
|
1037
|
+
if (ENDPOINT_FEEDBACK_DISABLED) {
|
|
1038
|
+
console.error('[firecrawl-mcp] Endpoint feedback tool disabled by FIRECRAWL_NO_ENDPOINT_FEEDBACK; firecrawl_feedback will not be registered.');
|
|
1039
|
+
}
|
|
1040
|
+
if (!ENDPOINT_FEEDBACK_DISABLED) {
|
|
1041
|
+
server.addTool({
|
|
1042
|
+
name: 'firecrawl_feedback',
|
|
1043
|
+
annotations: {
|
|
1044
|
+
title: 'Send feedback on a Firecrawl job',
|
|
1045
|
+
readOnlyHint: false,
|
|
1046
|
+
openWorldHint: true,
|
|
1047
|
+
},
|
|
1048
|
+
description: `
|
|
1049
|
+
Send structured feedback for a completed Firecrawl v2 job. Use this for endpoint-level feedback on \`scrape\`, \`parse\`, \`map\`, or \`search\` jobs when the job result was useful, partially useful, or failed to meet expectations.
|
|
1050
|
+
|
|
1051
|
+
For search-result quality specifically, prefer \`firecrawl_search_feedback\` when available because it has search-focused guidance. This generic tool posts to \`/v2/feedback\` and accepts endpoint-wide signals:
|
|
1052
|
+
|
|
1053
|
+
- **endpoint** — one of \`search\`, \`scrape\`, \`parse\`, or \`map\`.
|
|
1054
|
+
- **jobId** — the id returned by that endpoint.
|
|
1055
|
+
- **rating** — overall result quality: \`good\`, \`partial\`, or \`bad\`.
|
|
1056
|
+
- **issues** — stable lowercase issue codes such as \`missing_markdown\`, \`bad_pdf_parse\`, or \`wrong_links\`.
|
|
1057
|
+
- **tags** — optional lowercase tags for grouping feedback.
|
|
1058
|
+
- **note** — short human-readable context. Do not include huge page contents or raw scrape results.
|
|
1059
|
+
- **url**, **pageNumbers**, and **metadata** — small contextual fields that identify what the feedback refers to.
|
|
1060
|
+
|
|
1061
|
+
Do not store multi-MB outputs in feedback. Use concise notes, issue codes, URLs, and page numbers.
|
|
1062
|
+
|
|
1063
|
+
**Returns:** \`{ success, feedbackId, creditsRefunded, creditsRefundedToday?, dailyRefundCap?, dailyCapReached?, alreadySubmitted?, warning? }\` JSON.
|
|
1064
|
+
`,
|
|
1065
|
+
parameters: z.object({
|
|
1066
|
+
endpoint: z.enum(['search', 'scrape', 'parse', 'map']),
|
|
1067
|
+
jobId: z.string().uuid('jobId must be the UUID returned by Firecrawl'),
|
|
1068
|
+
rating: z.enum(['good', 'bad', 'partial']),
|
|
1069
|
+
issues: z.array(feedbackIssueSchema).max(20).optional(),
|
|
1070
|
+
tags: z.array(feedbackIssueSchema).max(20).optional(),
|
|
1071
|
+
note: z.string().max(4000).optional(),
|
|
1072
|
+
valuableSources: z.array(valuableSourceSchema).max(50).optional(),
|
|
1073
|
+
missingContent: z.array(missingContentSchema).max(50).optional(),
|
|
1074
|
+
querySuggestions: z.string().max(2000).optional(),
|
|
1075
|
+
url: z.string().url().optional(),
|
|
1076
|
+
pageNumbers: z.array(z.number().int().positive()).max(100).optional(),
|
|
1077
|
+
metadata: z.record(z.string(), z.unknown()).optional(),
|
|
1078
|
+
}),
|
|
1079
|
+
execute: async (args, { session, log }) => {
|
|
1080
|
+
const { endpoint, jobId, rating, issues, tags, note, valuableSources, missingContent, querySuggestions, url, pageNumbers, metadata, } = args;
|
|
1081
|
+
const apiBase = resolveApiBaseUrl();
|
|
1082
|
+
const headers = {
|
|
1083
|
+
'Content-Type': 'application/json',
|
|
1084
|
+
};
|
|
1085
|
+
const apiKey = session?.firecrawlApiKey;
|
|
1086
|
+
if (apiKey) {
|
|
1087
|
+
headers['Authorization'] = `Bearer ${apiKey}`;
|
|
1088
|
+
}
|
|
1089
|
+
else if (process.env.CLOUD_SERVICE === 'true') {
|
|
1090
|
+
throw new Error('Unauthorized: missing API key for feedback.');
|
|
1091
|
+
}
|
|
1092
|
+
const body = removeEmptyTopLevel({
|
|
1093
|
+
endpoint,
|
|
1094
|
+
jobId,
|
|
1095
|
+
rating,
|
|
1096
|
+
issues,
|
|
1097
|
+
tags,
|
|
1098
|
+
note,
|
|
1099
|
+
valuableSources,
|
|
1100
|
+
missingContent,
|
|
1101
|
+
querySuggestions,
|
|
1102
|
+
url,
|
|
1103
|
+
pageNumbers,
|
|
1104
|
+
metadata,
|
|
1105
|
+
origin: ORIGIN,
|
|
1106
|
+
});
|
|
1107
|
+
log.info('Submitting endpoint feedback', { endpoint, jobId, rating });
|
|
1108
|
+
const response = await fetch(`${apiBase}/v2/feedback`, {
|
|
1109
|
+
method: 'POST',
|
|
1110
|
+
headers,
|
|
1111
|
+
body: JSON.stringify(body),
|
|
1112
|
+
});
|
|
1113
|
+
const responseText = await response.text();
|
|
1114
|
+
let parsed;
|
|
1115
|
+
try {
|
|
1116
|
+
parsed = JSON.parse(responseText);
|
|
1117
|
+
}
|
|
1118
|
+
catch {
|
|
1119
|
+
parsed = { raw: responseText };
|
|
1120
|
+
}
|
|
1121
|
+
if (!response.ok) {
|
|
1122
|
+
log.warn('Endpoint feedback rejected', {
|
|
1123
|
+
status: response.status,
|
|
1124
|
+
feedbackErrorCode: parsed?.feedbackErrorCode,
|
|
1125
|
+
});
|
|
1126
|
+
return asText({
|
|
1127
|
+
success: false,
|
|
1128
|
+
status: response.status,
|
|
1129
|
+
feedbackErrorCode: parsed?.feedbackErrorCode,
|
|
1130
|
+
error: parsed?.error ?? `HTTP ${response.status}`,
|
|
1131
|
+
retryable: response.status >= 500,
|
|
1132
|
+
});
|
|
1133
|
+
}
|
|
1134
|
+
return asText(parsed);
|
|
1135
|
+
},
|
|
1136
|
+
});
|
|
1137
|
+
}
|
|
921
1138
|
server.addTool({
|
|
922
1139
|
name: 'firecrawl_crawl',
|
|
923
1140
|
annotations: {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "3.20.
|
|
3
|
+
"version": "3.20.6",
|
|
4
4
|
"description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"mcpName": "io.github.firecrawl/firecrawl-mcp-server",
|