firecrawl-mcp 3.15.0 → 3.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -2
- package/dist/index.js +180 -3
- package/dist/monitor.js +354 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
# Firecrawl MCP Server
|
|
10
10
|
|
|
11
|
-
A Model Context Protocol (MCP) server
|
|
11
|
+
A Model Context Protocol (MCP) server that brings [Firecrawl](https://github.com/firecrawl/firecrawl) to MCP-compatible AI agents — search, scrape, and interact with the live web for clean, agent-ready context.
|
|
12
12
|
|
|
13
13
|
> Big thanks to [@vrknetha](https://github.com/vrknetha), [@knacklabs](https://www.knacklabs.ai) for the initial implementation!
|
|
14
14
|
|
|
@@ -565,12 +565,51 @@ Search the web and optionally extract content from search results.
|
|
|
565
565
|
|
|
566
566
|
**Returns:**
|
|
567
567
|
|
|
568
|
-
- Array of search results (with optional scraped content)
|
|
568
|
+
- Array of search results (with optional scraped content), plus an `id` field. Pass that `id` to `firecrawl_search_feedback` after you've used the results to refund 1 credit (search costs 2) and improve search quality.
|
|
569
569
|
|
|
570
570
|
**Prompt Example:**
|
|
571
571
|
|
|
572
572
|
> "Find the latest research papers on AI published in 2023."
|
|
573
573
|
|
|
574
|
+
### 5b. Search Feedback Tool (`firecrawl_search_feedback`)
|
|
575
|
+
|
|
576
|
+
Sends structured feedback on a previous `firecrawl_search` result. The first feedback per search id refunds 1 credit and improves Firecrawl's search quality. Idempotent per search id.
|
|
577
|
+
|
|
578
|
+
**Call this after every search you actually use** (or that didn't help). Bad/partial feedback with `missingContent` is just as valuable as good feedback.
|
|
579
|
+
|
|
580
|
+
**Opt out:** set `FIRECRAWL_NO_SEARCH_FEEDBACK=1` (or `FIRECRAWL_DISABLE_SEARCH_FEEDBACK=1`) in the environment when starting the MCP server. The `firecrawl_search_feedback` tool will not be registered, so agents can't call it. Team admins can also disable feedback server-side; in that case the tool is registered but always returns `feedbackErrorCode: "TEAM_OPTED_OUT"`.
|
|
581
|
+
|
|
582
|
+
**Most important field:** `missingContent`. It's an array of specific pieces of content the agent expected to find but did not. One entry per missing topic — these aggregate across teams and tell us what to index next.
|
|
583
|
+
|
|
584
|
+
**Daily refund cap (per team, per UTC day, default 100 credits).** Once a team's `creditsRefundedToday` reaches `dailyRefundCap`, further submissions still record feedback but no longer refund credits. The response sets `dailyCapReached: true`. Agents should stop calling this tool for the rest of the UTC day when they see that flag.
|
|
585
|
+
|
|
586
|
+
**Usage Example:**
|
|
587
|
+
|
|
588
|
+
```json
|
|
589
|
+
{
|
|
590
|
+
"name": "firecrawl_search_feedback",
|
|
591
|
+
"arguments": {
|
|
592
|
+
"searchId": "0193f6c5-1234-7890-abcd-1234567890ab",
|
|
593
|
+
"rating": "good",
|
|
594
|
+
"valuableSources": [
|
|
595
|
+
{
|
|
596
|
+
"url": "https://docs.firecrawl.dev/features/search",
|
|
597
|
+
"reason": "Most up-to-date description of /search."
|
|
598
|
+
}
|
|
599
|
+
],
|
|
600
|
+
"missingContent": [
|
|
601
|
+
{ "topic": "Pricing for the search endpoint", "description": "No pricing tier table for /search specifically." },
|
|
602
|
+
{ "topic": "Per-team rate limits" }
|
|
603
|
+
],
|
|
604
|
+
"querySuggestions": "Boost docs.firecrawl.dev for queries that mention 'firecrawl'"
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
```
|
|
608
|
+
|
|
609
|
+
**Returns:**
|
|
610
|
+
|
|
611
|
+
- `{ success, feedbackId, creditsRefunded, alreadySubmitted? }` JSON.
|
|
612
|
+
|
|
574
613
|
### 6. Crawl Tool (`firecrawl_crawl`)
|
|
575
614
|
|
|
576
615
|
Starts an asynchronous crawl job on a website and extract content from all pages.
|
package/dist/index.js
CHANGED
|
@@ -5,6 +5,7 @@ import { z } from 'zod';
|
|
|
5
5
|
import FirecrawlApp from '@mendable/firecrawl-js';
|
|
6
6
|
import { readFile } from 'node:fs/promises';
|
|
7
7
|
import path from 'node:path';
|
|
8
|
+
import { registerMonitorTools } from './monitor.js';
|
|
8
9
|
dotenv.config({ debug: false, quiet: true });
|
|
9
10
|
function extractApiKey(headers) {
|
|
10
11
|
const headerAuth = headers['authorization'];
|
|
@@ -530,6 +531,7 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
530
531
|
**Domain filters:** Use includeDomains to restrict results to specific domains, or excludeDomains to remove domains. Do not use both in the same request. Domains must be hostnames only, without protocol or path.
|
|
531
532
|
**Scrape Options:** Only use scrapeOptions when you think it is absolutely necessary. When you do so default to a lower limit to avoid timeouts, 5 or lower.
|
|
532
533
|
**Optimal Workflow:** Search first using firecrawl_search without formats, then after fetching the results, use the scrape tool to get the content of the relevantpage(s) that you want to scrape
|
|
534
|
+
**After the search:** Once you have processed the results (or decided they were not useful), call \`firecrawl_search_feedback\` with the \`id\` from this response. The first feedback per search refunds 1 credit and helps Firecrawl improve search quality.
|
|
533
535
|
|
|
534
536
|
**Usage Example without formats (Preferred):**
|
|
535
537
|
\`\`\`json
|
|
@@ -566,7 +568,7 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
566
568
|
}
|
|
567
569
|
}
|
|
568
570
|
\`\`\`
|
|
569
|
-
**Returns:**
|
|
571
|
+
**Returns:** A JSON envelope of the form \`{ success, data: { web?, images?, news? }, id, creditsUsed }\`. Each result array contains the search results (with optional scraped content). Pass the top-level \`id\` to \`firecrawl_search_feedback\` after you've used the results.
|
|
570
572
|
`,
|
|
571
573
|
parameters: z
|
|
572
574
|
.object({
|
|
@@ -601,13 +603,187 @@ The query also supports search operators, that you can use if needed to refine t
|
|
|
601
603
|
const cleaned = removeEmptyTopLevel(searchOpts);
|
|
602
604
|
const searchQuery = buildSearchQueryWithDomains(query, includeDomains, excludeDomains);
|
|
603
605
|
log.info('Searching', { query: searchQuery });
|
|
604
|
-
|
|
606
|
+
// Call /v2/search through the SDK's HTTP layer (auth + retries) instead
|
|
607
|
+
// of `client.search()` so we preserve the full response envelope. The
|
|
608
|
+
// high-level `search()` helper strips `id` and `creditsUsed`, which
|
|
609
|
+
// breaks the `firecrawl_search_feedback` workflow that this server
|
|
610
|
+
// explicitly tells the LLM to use after every search.
|
|
611
|
+
const httpRes = await client.http.post('/v2/search', {
|
|
612
|
+
query: searchQuery,
|
|
605
613
|
...cleaned,
|
|
606
614
|
origin: ORIGIN,
|
|
607
615
|
});
|
|
608
|
-
return asText(
|
|
616
|
+
return asText(httpRes?.data ?? {});
|
|
609
617
|
},
|
|
610
618
|
});
|
|
619
|
+
const DEFAULT_CLOUD_API_URL = 'https://api.firecrawl.dev';
|
|
620
|
+
function resolveApiBaseUrl() {
|
|
621
|
+
return (process.env.FIRECRAWL_API_URL || DEFAULT_CLOUD_API_URL).replace(/\/$/, '');
|
|
622
|
+
}
|
|
623
|
+
const SEARCH_FEEDBACK_DISABLED = ['1', 'true', 'yes', 'on'].includes((process.env.FIRECRAWL_NO_SEARCH_FEEDBACK ||
|
|
624
|
+
process.env.FIRECRAWL_DISABLE_SEARCH_FEEDBACK ||
|
|
625
|
+
'')
|
|
626
|
+
.trim()
|
|
627
|
+
.toLowerCase());
|
|
628
|
+
if (SEARCH_FEEDBACK_DISABLED) {
|
|
629
|
+
console.error('[firecrawl-mcp] Search feedback tool disabled by FIRECRAWL_NO_SEARCH_FEEDBACK; firecrawl_search_feedback will not be registered.');
|
|
630
|
+
}
|
|
631
|
+
if (!SEARCH_FEEDBACK_DISABLED) {
|
|
632
|
+
server.addTool({
|
|
633
|
+
name: 'firecrawl_search_feedback',
|
|
634
|
+
annotations: {
|
|
635
|
+
title: 'Send feedback on a search result',
|
|
636
|
+
readOnlyHint: false,
|
|
637
|
+
openWorldHint: true,
|
|
638
|
+
},
|
|
639
|
+
description: `
|
|
640
|
+
Send structured feedback on a previous \`firecrawl_search\` result. **Call this immediately after a search where you used the results** so we can improve search quality and refund 1 credit (search costs 2).
|
|
641
|
+
|
|
642
|
+
Pass the \`searchId\` returned by \`firecrawl_search\` (the \`id\` field on the response) and tell us:
|
|
643
|
+
|
|
644
|
+
- **rating** — overall result quality: \`good\`, \`partial\`, or \`bad\`.
|
|
645
|
+
- **valuableSources** — which result URLs were actually useful, and a short reason why.
|
|
646
|
+
- **missingContent** — **the most important field.** An ARRAY of specific pieces of content you expected to find but didn't. One entry per missing piece, each with a short \`topic\` and an optional longer \`description\`. Examples: \`{"topic":"enterprise pricing","description":"no pricing tier table for the Enterprise plan was returned"}\`, \`{"topic":"API rate limits"}\`, \`{"topic":"comparison vs competitors"}\`. **Be specific** — these aggregate across teams and tell us what to index next. Do not pack multiple topics into one entry.
|
|
647
|
+
- **querySuggestions** — how the query or response shape could be improved (e.g. "would have liked official docs first", "should boost github.com").
|
|
648
|
+
|
|
649
|
+
**Substantive-feedback requirement** (zero-effort feedback is rejected with HTTP 400):
|
|
650
|
+
- \`good\` — must include at least one \`valuableSources\` entry
|
|
651
|
+
- \`partial\` — must include \`valuableSources\` or at least one \`missingContent\` entry
|
|
652
|
+
- \`bad\` — must include at least one \`missingContent\` entry or \`querySuggestions\`
|
|
653
|
+
|
|
654
|
+
**Time window:** Feedback must be submitted within ~2 minutes of the search. Beyond that, the call returns HTTP 409 with \`feedbackErrorCode: "FEEDBACK_WINDOW_EXPIRED"\` — do not retry, just move on. Same goes for any 4xx response: do not retry-loop.
|
|
655
|
+
|
|
656
|
+
**Behaviors:**
|
|
657
|
+
- Idempotent per \`searchId\`. Re-submitting for the same id returns \`alreadySubmitted: true\` with \`creditsRefunded: 0\`.
|
|
658
|
+
- Refund only applies to billable searches; preview teams are blocked.
|
|
659
|
+
- Failed searches cannot receive feedback (the search itself already returned an error you can act on).
|
|
660
|
+
- **Daily refund cap (per team, per UTC day, default 100 credits).** Once a team's \`creditsRefundedToday\` reaches \`dailyRefundCap\`, the response returns \`dailyCapReached: true\` with \`creditsRefunded: 0\`. The feedback is still recorded for search-quality improvement — only the credit refund is gated. **Stop calling this tool for the rest of the UTC day** when you see \`dailyCapReached: true\`.
|
|
661
|
+
|
|
662
|
+
**When to call:** Right after processing a search result. If the result didn't help, send rating \`bad\` with a clear \`missingContent\` — that is just as valuable as a \`good\` rating.
|
|
663
|
+
|
|
664
|
+
**Usage Example (good rating with valuable sources + missing content):**
|
|
665
|
+
\`\`\`json
|
|
666
|
+
{
|
|
667
|
+
"name": "firecrawl_search_feedback",
|
|
668
|
+
"arguments": {
|
|
669
|
+
"searchId": "0193f6c5-1234-7890-abcd-1234567890ab",
|
|
670
|
+
"rating": "good",
|
|
671
|
+
"valuableSources": [
|
|
672
|
+
{ "url": "https://docs.firecrawl.dev/features/search", "reason": "Most up-to-date description of /search." }
|
|
673
|
+
],
|
|
674
|
+
"missingContent": [
|
|
675
|
+
{ "topic": "Pricing for the search endpoint", "description": "No pricing tier table for /search specifically." },
|
|
676
|
+
{ "topic": "Rate limits", "description": "Per-team RPS for /search not documented." }
|
|
677
|
+
],
|
|
678
|
+
"querySuggestions": "Boost docs.firecrawl.dev for queries that mention 'firecrawl'"
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
\`\`\`
|
|
682
|
+
|
|
683
|
+
**Usage Example (bad rating, what was missing):**
|
|
684
|
+
\`\`\`json
|
|
685
|
+
{
|
|
686
|
+
"name": "firecrawl_search_feedback",
|
|
687
|
+
"arguments": {
|
|
688
|
+
"searchId": "0193f6c5-1234-7890-abcd-1234567890ab",
|
|
689
|
+
"rating": "bad",
|
|
690
|
+
"missingContent": [
|
|
691
|
+
{ "topic": "Recent benchmarks", "description": "All results were >12 months old." },
|
|
692
|
+
{ "topic": "Comparison vs Algolia" }
|
|
693
|
+
]
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
\`\`\`
|
|
697
|
+
|
|
698
|
+
**Returns:** \`{ success, feedbackId, creditsRefunded, creditsRefundedToday, dailyRefundCap, dailyCapReached?, alreadySubmitted?, warning? }\` JSON.
|
|
699
|
+
`,
|
|
700
|
+
parameters: z.object({
|
|
701
|
+
searchId: z
|
|
702
|
+
.string()
|
|
703
|
+
.uuid('searchId must be the UUID returned by firecrawl_search'),
|
|
704
|
+
rating: z.enum(['good', 'bad', 'partial']),
|
|
705
|
+
valuableSources: z
|
|
706
|
+
.array(z.object({
|
|
707
|
+
url: z.string().url(),
|
|
708
|
+
reason: z.string().max(1000).optional(),
|
|
709
|
+
}))
|
|
710
|
+
.max(50)
|
|
711
|
+
.optional(),
|
|
712
|
+
missingContent: z
|
|
713
|
+
.array(z.object({
|
|
714
|
+
topic: z
|
|
715
|
+
.string()
|
|
716
|
+
.min(1, 'topic must not be empty')
|
|
717
|
+
.max(200, 'topic must be 200 characters or fewer'),
|
|
718
|
+
description: z.string().max(2000).optional(),
|
|
719
|
+
}))
|
|
720
|
+
.max(20)
|
|
721
|
+
.optional()
|
|
722
|
+
.describe('Array of specific pieces of content the agent expected to find but did not. ' +
|
|
723
|
+
'One entry per distinct topic. Each entry has a short `topic` and optional ' +
|
|
724
|
+
'longer `description`.'),
|
|
725
|
+
querySuggestions: z.string().max(2000).optional(),
|
|
726
|
+
}),
|
|
727
|
+
execute: async (args, { session, log }) => {
|
|
728
|
+
const { searchId, rating, valuableSources, missingContent, querySuggestions, } = args;
|
|
729
|
+
const apiBase = resolveApiBaseUrl();
|
|
730
|
+
const endpoint = `${apiBase}/v2/search/${encodeURIComponent(searchId)}/feedback`;
|
|
731
|
+
const body = {
|
|
732
|
+
rating,
|
|
733
|
+
origin: ORIGIN,
|
|
734
|
+
};
|
|
735
|
+
if (valuableSources && valuableSources.length > 0) {
|
|
736
|
+
body.valuableSources = valuableSources;
|
|
737
|
+
}
|
|
738
|
+
if (missingContent && missingContent.length > 0) {
|
|
739
|
+
body.missingContent = missingContent;
|
|
740
|
+
}
|
|
741
|
+
if (querySuggestions)
|
|
742
|
+
body.querySuggestions = querySuggestions;
|
|
743
|
+
const headers = {
|
|
744
|
+
'Content-Type': 'application/json',
|
|
745
|
+
};
|
|
746
|
+
const apiKey = session?.firecrawlApiKey;
|
|
747
|
+
if (apiKey) {
|
|
748
|
+
headers['Authorization'] = `Bearer ${apiKey}`;
|
|
749
|
+
}
|
|
750
|
+
else if (process.env.CLOUD_SERVICE === 'true') {
|
|
751
|
+
throw new Error('Unauthorized: missing API key for search feedback.');
|
|
752
|
+
}
|
|
753
|
+
log.info('Submitting search feedback', { searchId, rating });
|
|
754
|
+
const response = await fetch(endpoint, {
|
|
755
|
+
method: 'POST',
|
|
756
|
+
headers,
|
|
757
|
+
body: JSON.stringify(body),
|
|
758
|
+
});
|
|
759
|
+
const responseText = await response.text();
|
|
760
|
+
let parsed;
|
|
761
|
+
try {
|
|
762
|
+
parsed = JSON.parse(responseText);
|
|
763
|
+
}
|
|
764
|
+
catch {
|
|
765
|
+
parsed = { raw: responseText };
|
|
766
|
+
}
|
|
767
|
+
// 4xx is terminal; surface a structured payload (with retryable=false)
|
|
768
|
+
// so agents do not retry-loop on substantive-feedback rejections,
|
|
769
|
+
// expired windows, etc.
|
|
770
|
+
if (!response.ok) {
|
|
771
|
+
log.warn('Search feedback rejected', {
|
|
772
|
+
status: response.status,
|
|
773
|
+
feedbackErrorCode: parsed?.feedbackErrorCode,
|
|
774
|
+
});
|
|
775
|
+
return asText({
|
|
776
|
+
success: false,
|
|
777
|
+
status: response.status,
|
|
778
|
+
feedbackErrorCode: parsed?.feedbackErrorCode,
|
|
779
|
+
error: parsed?.error ?? `HTTP ${response.status}`,
|
|
780
|
+
retryable: response.status >= 500,
|
|
781
|
+
});
|
|
782
|
+
}
|
|
783
|
+
return asText(parsed);
|
|
784
|
+
},
|
|
785
|
+
});
|
|
786
|
+
}
|
|
611
787
|
server.addTool({
|
|
612
788
|
name: 'firecrawl_crawl',
|
|
613
789
|
annotations: {
|
|
@@ -1445,4 +1621,5 @@ else {
|
|
|
1445
1621
|
transportType: 'stdio',
|
|
1446
1622
|
};
|
|
1447
1623
|
}
|
|
1624
|
+
registerMonitorTools(server);
|
|
1448
1625
|
await server.start(args);
|
package/dist/monitor.js
ADDED
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Firecrawl Monitor tools.
|
|
3
|
+
*
|
|
4
|
+
* Monitors run recurring scrapes/crawls and diff each result against the last
|
|
5
|
+
* retained snapshot. The SDK exposes monitor methods, but its HttpClient
|
|
6
|
+
* injects a top-level `origin` field into every POST/PATCH body and
|
|
7
|
+
* /v2/monitor rejects that with "Unrecognized key in body". Until the SDK
|
|
8
|
+
* strips `origin` for monitor requests, we hit /v2/monitor directly via fetch
|
|
9
|
+
* — same pattern the CLI uses.
|
|
10
|
+
*/
|
|
11
|
+
import { z } from 'zod';
|
|
12
|
+
const DEFAULT_API_URL = 'https://api.firecrawl.dev';
|
|
13
|
+
function resolveAuth(session) {
|
|
14
|
+
const apiKey = session?.firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY;
|
|
15
|
+
const baseUrl = (process.env.FIRECRAWL_API_URL ?? DEFAULT_API_URL).replace(/\/$/, '');
|
|
16
|
+
return { apiKey, baseUrl };
|
|
17
|
+
}
|
|
18
|
+
async function monitorRequest(session, path, init = {}) {
|
|
19
|
+
const { apiKey, baseUrl } = resolveAuth(session);
|
|
20
|
+
if (!apiKey && !process.env.FIRECRAWL_API_URL) {
|
|
21
|
+
throw new Error('Unauthorized: API key is required for monitor requests');
|
|
22
|
+
}
|
|
23
|
+
let url = `${baseUrl}/v2${path}`;
|
|
24
|
+
if (init.query) {
|
|
25
|
+
const qs = new URLSearchParams();
|
|
26
|
+
for (const [k, v] of Object.entries(init.query)) {
|
|
27
|
+
if (v !== undefined && v !== null && v !== '')
|
|
28
|
+
qs.set(k, String(v));
|
|
29
|
+
}
|
|
30
|
+
const s = qs.toString();
|
|
31
|
+
if (s)
|
|
32
|
+
url += `?${s}`;
|
|
33
|
+
}
|
|
34
|
+
const headers = { 'X-Origin': 'mcp' };
|
|
35
|
+
if (apiKey)
|
|
36
|
+
headers.Authorization = `Bearer ${apiKey}`;
|
|
37
|
+
if (init.body !== undefined)
|
|
38
|
+
headers['Content-Type'] = 'application/json';
|
|
39
|
+
const response = await fetch(url, {
|
|
40
|
+
method: init.method ?? 'GET',
|
|
41
|
+
headers,
|
|
42
|
+
body: init.body !== undefined ? JSON.stringify(init.body) : undefined,
|
|
43
|
+
});
|
|
44
|
+
const payload = (await response.json().catch(() => ({})));
|
|
45
|
+
if (!response.ok || payload?.success === false) {
|
|
46
|
+
const message = payload?.error ||
|
|
47
|
+
`HTTP ${response.status}: ${response.statusText || 'Request failed'}`;
|
|
48
|
+
throw new Error(message);
|
|
49
|
+
}
|
|
50
|
+
return payload;
|
|
51
|
+
}
|
|
52
|
+
function asText(data) {
|
|
53
|
+
return JSON.stringify(data, null, 2);
|
|
54
|
+
}
|
|
55
|
+
const pageStatusSchema = z.enum(['same', 'new', 'changed', 'removed', 'error']);
|
|
56
|
+
export function registerMonitorTools(server) {
|
|
57
|
+
server.addTool({
|
|
58
|
+
name: 'firecrawl_monitor_create',
|
|
59
|
+
annotations: {
|
|
60
|
+
title: 'Create monitor',
|
|
61
|
+
readOnlyHint: false,
|
|
62
|
+
openWorldHint: true,
|
|
63
|
+
},
|
|
64
|
+
description: `
|
|
65
|
+
Create a Firecrawl monitor — a recurring scrape or crawl that diffs each result against the last retained snapshot.
|
|
66
|
+
|
|
67
|
+
Pass the full request body. Required fields: \`name\`, \`schedule\` (with \`cron\` or \`text\`), and \`targets\` (one or more \`{ type: 'scrape', urls: [...] }\` or \`{ type: 'crawl', url: '...' }\`). Optional: \`webhook\`, \`notification\`, \`retentionDays\`.
|
|
68
|
+
|
|
69
|
+
**Markdown-mode (default):** Each check produces a unified text diff of the page's markdown. No extra configuration needed.
|
|
70
|
+
|
|
71
|
+
\`\`\`json
|
|
72
|
+
{
|
|
73
|
+
"name": "firecrawl_monitor_create",
|
|
74
|
+
"arguments": {
|
|
75
|
+
"body": {
|
|
76
|
+
"name": "Blog watch",
|
|
77
|
+
"schedule": { "text": "every 30 minutes", "timezone": "UTC" },
|
|
78
|
+
"targets": [{ "type": "scrape", "urls": ["https://example.com/blog"] }],
|
|
79
|
+
"notification": { "email": { "enabled": true, "recipients": ["a@b.com"] } }
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
\`\`\`
|
|
84
|
+
|
|
85
|
+
**JSON-mode change tracking:** To detect changes in **specific structured fields** (price, headline, in-stock flag, list items) instead of the whole page, add a \`changeTracking\` format with \`modes: ["json"]\` and a JSON schema to the target's \`scrapeOptions.formats\`. The check response will then carry a per-field diff (keyed by JSON path, e.g. \`plans[0].price\`) and a \`snapshot.json\` with the full current extraction. See \`firecrawl_monitor_check\` for the response shape.
|
|
86
|
+
|
|
87
|
+
\`\`\`json
|
|
88
|
+
{
|
|
89
|
+
"name": "firecrawl_monitor_create",
|
|
90
|
+
"arguments": {
|
|
91
|
+
"body": {
|
|
92
|
+
"name": "Pricing watch",
|
|
93
|
+
"schedule": { "text": "hourly", "timezone": "UTC" },
|
|
94
|
+
"targets": [{
|
|
95
|
+
"type": "scrape",
|
|
96
|
+
"urls": ["https://example.com/pricing"],
|
|
97
|
+
"scrapeOptions": {
|
|
98
|
+
"formats": [{
|
|
99
|
+
"type": "changeTracking",
|
|
100
|
+
"modes": ["json"],
|
|
101
|
+
"prompt": "Extract pricing tiers and headline features for each plan.",
|
|
102
|
+
"schema": {
|
|
103
|
+
"type": "object",
|
|
104
|
+
"properties": {
|
|
105
|
+
"plans": {
|
|
106
|
+
"type": "array",
|
|
107
|
+
"items": {
|
|
108
|
+
"type": "object",
|
|
109
|
+
"properties": {
|
|
110
|
+
"name": { "type": "string" },
|
|
111
|
+
"price": { "type": "string" },
|
|
112
|
+
"features": { "type": "array", "items": { "type": "string" } }
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}]
|
|
119
|
+
}
|
|
120
|
+
}]
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
\`\`\`
|
|
125
|
+
|
|
126
|
+
**Mixed mode (JSON + git-diff):** Use \`modes: ["json", "git-diff"]\` to get both per-field diffs and a markdown sidecar. The page is marked \`changed\` whenever either surface changed.
|
|
127
|
+
`,
|
|
128
|
+
parameters: z.object({
|
|
129
|
+
body: z.record(z.string(), z.any()),
|
|
130
|
+
}),
|
|
131
|
+
execute: async (args, { session, log }) => {
|
|
132
|
+
const { body } = args;
|
|
133
|
+
log.info('Creating monitor', { name: body.name });
|
|
134
|
+
const res = await monitorRequest(session, '/monitor', {
|
|
135
|
+
method: 'POST',
|
|
136
|
+
body,
|
|
137
|
+
});
|
|
138
|
+
return asText(res);
|
|
139
|
+
},
|
|
140
|
+
});
|
|
141
|
+
server.addTool({
|
|
142
|
+
name: 'firecrawl_monitor_list',
|
|
143
|
+
annotations: {
|
|
144
|
+
title: 'List monitors',
|
|
145
|
+
readOnlyHint: true,
|
|
146
|
+
openWorldHint: false,
|
|
147
|
+
},
|
|
148
|
+
description: `
|
|
149
|
+
List all Firecrawl monitors for the authenticated account.
|
|
150
|
+
|
|
151
|
+
**Usage Example:**
|
|
152
|
+
\`\`\`json
|
|
153
|
+
{ "name": "firecrawl_monitor_list", "arguments": { "limit": 20 } }
|
|
154
|
+
\`\`\`
|
|
155
|
+
`,
|
|
156
|
+
parameters: z.object({
|
|
157
|
+
limit: z.number().int().positive().optional(),
|
|
158
|
+
offset: z.number().int().nonnegative().optional(),
|
|
159
|
+
}),
|
|
160
|
+
execute: async (args, { session }) => {
|
|
161
|
+
const { limit, offset } = args;
|
|
162
|
+
const res = await monitorRequest(session, '/monitor', {
|
|
163
|
+
query: { limit, offset },
|
|
164
|
+
});
|
|
165
|
+
return asText(res);
|
|
166
|
+
},
|
|
167
|
+
});
|
|
168
|
+
server.addTool({
|
|
169
|
+
name: 'firecrawl_monitor_get',
|
|
170
|
+
annotations: {
|
|
171
|
+
title: 'Get monitor',
|
|
172
|
+
readOnlyHint: true,
|
|
173
|
+
openWorldHint: false,
|
|
174
|
+
},
|
|
175
|
+
description: `
|
|
176
|
+
Get a single monitor by ID.
|
|
177
|
+
|
|
178
|
+
**Usage Example:**
|
|
179
|
+
\`\`\`json
|
|
180
|
+
{ "name": "firecrawl_monitor_get", "arguments": { "id": "mon_abc123" } }
|
|
181
|
+
\`\`\`
|
|
182
|
+
`,
|
|
183
|
+
parameters: z.object({ id: z.string() }),
|
|
184
|
+
execute: async (args, { session }) => {
|
|
185
|
+
const { id } = args;
|
|
186
|
+
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}`);
|
|
187
|
+
return asText(res);
|
|
188
|
+
},
|
|
189
|
+
});
|
|
190
|
+
server.addTool({
|
|
191
|
+
name: 'firecrawl_monitor_update',
|
|
192
|
+
annotations: {
|
|
193
|
+
title: 'Update monitor',
|
|
194
|
+
readOnlyHint: false,
|
|
195
|
+
openWorldHint: true,
|
|
196
|
+
},
|
|
197
|
+
description: `
|
|
198
|
+
Update a monitor. Pass any subset of fields to patch: \`name\`, \`status\` ("active" | "paused"), \`schedule\`, \`targets\`, \`webhook\`, \`notification\`, \`retentionDays\`.
|
|
199
|
+
|
|
200
|
+
**Usage Example:**
|
|
201
|
+
\`\`\`json
|
|
202
|
+
{
|
|
203
|
+
"name": "firecrawl_monitor_update",
|
|
204
|
+
"arguments": {
|
|
205
|
+
"id": "mon_abc123",
|
|
206
|
+
"body": { "status": "paused" }
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
\`\`\`
|
|
210
|
+
`,
|
|
211
|
+
parameters: z.object({
|
|
212
|
+
id: z.string(),
|
|
213
|
+
body: z.record(z.string(), z.any()),
|
|
214
|
+
}),
|
|
215
|
+
execute: async (args, { session }) => {
|
|
216
|
+
const { id, body } = args;
|
|
217
|
+
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}`, { method: 'PATCH', body });
|
|
218
|
+
return asText(res);
|
|
219
|
+
},
|
|
220
|
+
});
|
|
221
|
+
server.addTool({
|
|
222
|
+
name: 'firecrawl_monitor_delete',
|
|
223
|
+
annotations: {
|
|
224
|
+
title: 'Delete monitor',
|
|
225
|
+
readOnlyHint: false,
|
|
226
|
+
destructiveHint: true,
|
|
227
|
+
openWorldHint: true,
|
|
228
|
+
},
|
|
229
|
+
description: `
|
|
230
|
+
Permanently delete a monitor and stop its schedule. This cannot be undone.
|
|
231
|
+
|
|
232
|
+
**Usage Example:**
|
|
233
|
+
\`\`\`json
|
|
234
|
+
{ "name": "firecrawl_monitor_delete", "arguments": { "id": "mon_abc123" } }
|
|
235
|
+
\`\`\`
|
|
236
|
+
`,
|
|
237
|
+
parameters: z.object({ id: z.string() }),
|
|
238
|
+
execute: async (args, { session, log }) => {
|
|
239
|
+
const { id } = args;
|
|
240
|
+
log.info('Deleting monitor', { id });
|
|
241
|
+
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}`, { method: 'DELETE' });
|
|
242
|
+
return asText(res);
|
|
243
|
+
},
|
|
244
|
+
});
|
|
245
|
+
server.addTool({
|
|
246
|
+
name: 'firecrawl_monitor_run',
|
|
247
|
+
annotations: {
|
|
248
|
+
title: 'Run monitor now',
|
|
249
|
+
readOnlyHint: false,
|
|
250
|
+
openWorldHint: true,
|
|
251
|
+
},
|
|
252
|
+
description: `
|
|
253
|
+
Trigger a monitor check immediately, outside its normal schedule. Returns the queued check.
|
|
254
|
+
|
|
255
|
+
**Usage Example:**
|
|
256
|
+
\`\`\`json
|
|
257
|
+
{ "name": "firecrawl_monitor_run", "arguments": { "id": "mon_abc123" } }
|
|
258
|
+
\`\`\`
|
|
259
|
+
`,
|
|
260
|
+
parameters: z.object({ id: z.string() }),
|
|
261
|
+
execute: async (args, { session }) => {
|
|
262
|
+
const { id } = args;
|
|
263
|
+
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/run`, { method: 'POST' });
|
|
264
|
+
return asText(res);
|
|
265
|
+
},
|
|
266
|
+
});
|
|
267
|
+
server.addTool({
|
|
268
|
+
name: 'firecrawl_monitor_checks',
|
|
269
|
+
annotations: {
|
|
270
|
+
title: 'List monitor checks',
|
|
271
|
+
readOnlyHint: true,
|
|
272
|
+
openWorldHint: false,
|
|
273
|
+
},
|
|
274
|
+
description: `
|
|
275
|
+
List historical checks for a monitor.
|
|
276
|
+
|
|
277
|
+
**Usage Example:**
|
|
278
|
+
\`\`\`json
|
|
279
|
+
{ "name": "firecrawl_monitor_checks", "arguments": { "id": "mon_abc123", "limit": 10 } }
|
|
280
|
+
\`\`\`
|
|
281
|
+
`,
|
|
282
|
+
parameters: z.object({
|
|
283
|
+
id: z.string(),
|
|
284
|
+
limit: z.number().int().positive().optional(),
|
|
285
|
+
offset: z.number().int().nonnegative().optional(),
|
|
286
|
+
}),
|
|
287
|
+
execute: async (args, { session }) => {
|
|
288
|
+
const { id, limit, offset } = args;
|
|
289
|
+
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/checks`, { query: { limit, offset } });
|
|
290
|
+
return asText(res);
|
|
291
|
+
},
|
|
292
|
+
});
|
|
293
|
+
server.addTool({
|
|
294
|
+
name: 'firecrawl_monitor_check',
|
|
295
|
+
annotations: {
|
|
296
|
+
title: 'Get monitor check',
|
|
297
|
+
readOnlyHint: true,
|
|
298
|
+
openWorldHint: false,
|
|
299
|
+
},
|
|
300
|
+
description: `
|
|
301
|
+
Get a single check with page-level diff results. Filter \`pageStatus\` to surface only the pages that changed (or were new, removed, etc.).
|
|
302
|
+
|
|
303
|
+
Each entry in \`data.pages[]\` has \`url\`, \`status\` (\`same\` | \`new\` | \`changed\` | \`removed\` | \`error\`), and — when changed — a \`diff\` and possibly a \`snapshot\`. The shape of \`diff\` depends on the monitor's \`formats\` configuration:
|
|
304
|
+
|
|
305
|
+
- **Markdown mode (default).** \`diff.text\` is the unified markdown diff; \`diff.json\` is a parse-diff AST (\`{ files: [...] }\`). No \`snapshot\`.
|
|
306
|
+
- **JSON mode** (\`changeTracking\` with \`modes: ["json"]\`). \`diff.json\` is a per-field map keyed by JSON path into the extraction, e.g. \`plans[0].price\`, with each value being \`{ previous, current }\`. \`snapshot.json\` is the full current extraction. No \`diff.text\`.
|
|
307
|
+
- **Mixed mode** (\`modes: ["json", "git-diff"]\`). Both \`diff.text\` (markdown sidecar) AND \`diff.json\` (per-field map) are present, plus \`snapshot.json\`.
|
|
308
|
+
|
|
309
|
+
**Example JSON-mode response \`pages[]\` entry:**
|
|
310
|
+
|
|
311
|
+
\`\`\`json
|
|
312
|
+
{
|
|
313
|
+
"url": "https://example.com/pricing",
|
|
314
|
+
"status": "changed",
|
|
315
|
+
"diff": {
|
|
316
|
+
"json": {
|
|
317
|
+
"plans[0].price": { "previous": "$19/mo", "current": "$24/mo" },
|
|
318
|
+
"plans[1].features[2]": { "previous": "10 GB storage", "current": "25 GB storage" }
|
|
319
|
+
}
|
|
320
|
+
},
|
|
321
|
+
"snapshot": { "json": { "plans": [/* current full extraction matching the monitor's schema */] } }
|
|
322
|
+
}
|
|
323
|
+
\`\`\`
|
|
324
|
+
|
|
325
|
+
When summarizing a check for the user, prefer \`diff.json\` paths (e.g. "plans[0].price changed from $19/mo to $24/mo") over re-printing the markdown diff — it's more concise and grounded in the schema fields they asked for.
|
|
326
|
+
|
|
327
|
+
The endpoint paginates via a top-level \`next\` URL; this tool returns one page at a time. Increase \`limit\` (max 100) to fetch fewer pages.
|
|
328
|
+
|
|
329
|
+
**Usage Example:**
|
|
330
|
+
\`\`\`json
|
|
331
|
+
{
|
|
332
|
+
"name": "firecrawl_monitor_check",
|
|
333
|
+
"arguments": {
|
|
334
|
+
"id": "mon_abc123",
|
|
335
|
+
"checkId": "chk_xyz",
|
|
336
|
+
"pageStatus": "changed"
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
\`\`\`
|
|
340
|
+
`,
|
|
341
|
+
parameters: z.object({
|
|
342
|
+
id: z.string(),
|
|
343
|
+
checkId: z.string(),
|
|
344
|
+
limit: z.number().int().positive().optional(),
|
|
345
|
+
skip: z.number().int().nonnegative().optional(),
|
|
346
|
+
pageStatus: pageStatusSchema.optional(),
|
|
347
|
+
}),
|
|
348
|
+
execute: async (args, { session }) => {
|
|
349
|
+
const { id, checkId, limit, skip, pageStatus } = args;
|
|
350
|
+
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/checks/${encodeURIComponent(checkId)}`, { query: { limit, skip, status: pageStatus } });
|
|
351
|
+
return asText(res);
|
|
352
|
+
},
|
|
353
|
+
});
|
|
354
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.17.0",
|
|
4
4
|
"description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"mcpName": "io.github.firecrawl/firecrawl-mcp-server",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
},
|
|
16
16
|
"license": "MIT",
|
|
17
17
|
"dependencies": {
|
|
18
|
-
"@mendable/firecrawl-js": "4.
|
|
18
|
+
"@mendable/firecrawl-js": "4.24.0",
|
|
19
19
|
"dotenv": "^17.2.2",
|
|
20
20
|
"firecrawl-fastmcp": "^1.0.4",
|
|
21
21
|
"typescript": "^5.9.2",
|