firecrawl-mcp 3.18.0 → 3.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -4
- package/dist/monitor.js +132 -15
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -872,7 +872,75 @@ Check the status of an agent job and retrieve results when complete. Use this to
|
|
|
872
872
|
- `completed`: Research finished - response includes the extracted data
|
|
873
873
|
- `failed`: An error occurred
|
|
874
874
|
|
|
875
|
-
### 11.
|
|
875
|
+
### 11. Monitor Tools (`firecrawl_monitor_*`)
|
|
876
|
+
|
|
877
|
+
Create and manage recurring page monitors. Monitors run scheduled scrapes or crawls, diff each result against the last retained snapshot, and can notify by webhook or email.
|
|
878
|
+
|
|
879
|
+
**Best for:**
|
|
880
|
+
|
|
881
|
+
- Watching one page or a few pages over time
|
|
882
|
+
- Alerting on meaningful changes using a plain-English goal
|
|
883
|
+
- Tracking check history and page-level diffs
|
|
884
|
+
|
|
885
|
+
**Recommended create pattern:**
|
|
886
|
+
|
|
887
|
+
Use `page` or `pages` plus `goal`. The MCP server builds the monitor request with a 30-minute schedule and the API enables meaningful-change judging automatically.
|
|
888
|
+
|
|
889
|
+
Meaningful-change judging runs automatically when `goal` is set. Page webhooks expose `isMeaningful` and `judgment` on `monitor.page` events.
|
|
890
|
+
|
|
891
|
+
Write goals as concise 2-3 sentence monitor instructions. Say what should trigger an alert, preserve any scope the user gave, and include intent-specific exclusions only when obvious from the request. Generic noise such as whitespace, formatting-only changes, request IDs, tracking params, generic metadata, and unrelated page chrome is already handled by the judge, so do not repeat it in every goal. If the user is vague, keep the goal broad; if they ask for broad monitoring or "any change", preserve that. If the user says they do not care about something, include that explicitly.
|
|
892
|
+
|
|
893
|
+
```json
|
|
894
|
+
{
|
|
895
|
+
"name": "firecrawl_monitor_create",
|
|
896
|
+
"arguments": {
|
|
897
|
+
"page": "https://example.com/pricing",
|
|
898
|
+
"goal": "Alert when pricing, packaging, or launch messaging changes."
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
```
|
|
902
|
+
|
|
903
|
+
**Multiple pages with webhooks:**
|
|
904
|
+
|
|
905
|
+
```json
|
|
906
|
+
{
|
|
907
|
+
"name": "firecrawl_monitor_create",
|
|
908
|
+
"arguments": {
|
|
909
|
+
"pages": ["https://example.com/pricing", "https://example.com/changelog"],
|
|
910
|
+
"goal": "Alert when pricing, packaging, or launch messaging changes.",
|
|
911
|
+
"webhookUrl": "https://example.com/webhooks/firecrawl"
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
```
|
|
915
|
+
|
|
916
|
+
**Advanced create requests:**
|
|
917
|
+
|
|
918
|
+
Pass `body` when you need crawl targets, JSON change tracking, custom retention, or explicit `judgeEnabled` control.
|
|
919
|
+
|
|
920
|
+
```json
|
|
921
|
+
{
|
|
922
|
+
"name": "firecrawl_monitor_create",
|
|
923
|
+
"arguments": {
|
|
924
|
+
"body": {
|
|
925
|
+
"name": "Docs monitor",
|
|
926
|
+
"schedule": { "text": "hourly", "timezone": "UTC" },
|
|
927
|
+
"goal": "Alert when docs pages add, remove, or materially change API behavior.",
|
|
928
|
+
"targets": [{ "type": "crawl", "url": "https://example.com/docs" }]
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
```
|
|
933
|
+
|
|
934
|
+
**Other monitor tools:**
|
|
935
|
+
|
|
936
|
+
- `firecrawl_monitor_list`: list monitors.
|
|
937
|
+
- `firecrawl_monitor_get`: get one monitor.
|
|
938
|
+
- `firecrawl_monitor_update`: update fields including `goal`, `judgeEnabled`, `webhook`, and `notification`.
|
|
939
|
+
- `firecrawl_monitor_run`: trigger a check now.
|
|
940
|
+
- `firecrawl_monitor_checks`: list checks, optionally filtered by status.
|
|
941
|
+
- `firecrawl_monitor_check`: get page-level results, including `diff`, `snapshot`, `judgment.meaningful`, and `judgment.meaningfulChanges`.
|
|
942
|
+
|
|
943
|
+
### 12. Browser Create (`firecrawl_browser_create`) — Deprecated
|
|
876
944
|
|
|
877
945
|
> **Deprecated:** Prefer `firecrawl_scrape` + `firecrawl_interact` instead. Interact lets you scrape a page and then click, fill forms, and navigate without managing sessions manually.
|
|
878
946
|
|
|
@@ -903,7 +971,7 @@ Create a cloud browser session for interactive automation.
|
|
|
903
971
|
|
|
904
972
|
- Session ID, CDP URL, and live view URL
|
|
905
973
|
|
|
906
|
-
###
|
|
974
|
+
### 13. Browser Execute (`firecrawl_browser_execute`) — Deprecated
|
|
907
975
|
|
|
908
976
|
> **Deprecated:** Prefer `firecrawl_scrape` + `firecrawl_interact` instead.
|
|
909
977
|
|
|
@@ -947,7 +1015,7 @@ Execute code in a browser session. Supports agent-browser commands (bash), Pytho
|
|
|
947
1015
|
}
|
|
948
1016
|
```
|
|
949
1017
|
|
|
950
|
-
###
|
|
1018
|
+
### 14. Browser List (`firecrawl_browser_list`) — Deprecated
|
|
951
1019
|
|
|
952
1020
|
> **Deprecated:** Prefer `firecrawl_scrape` + `firecrawl_interact` instead.
|
|
953
1021
|
|
|
@@ -962,7 +1030,7 @@ List browser sessions, optionally filtered by status.
|
|
|
962
1030
|
}
|
|
963
1031
|
```
|
|
964
1032
|
|
|
965
|
-
###
|
|
1033
|
+
### 15. Browser Delete (`firecrawl_browser_delete`) — Deprecated
|
|
966
1034
|
|
|
967
1035
|
> **Deprecated:** Prefer `firecrawl_scrape` + `firecrawl_interact` instead.
|
|
968
1036
|
|
package/dist/monitor.js
CHANGED
|
@@ -53,6 +53,67 @@ function asText(data) {
|
|
|
53
53
|
return JSON.stringify(data, null, 2);
|
|
54
54
|
}
|
|
55
55
|
const pageStatusSchema = z.enum(['same', 'new', 'changed', 'removed', 'error']);
|
|
56
|
+
const checkStatusSchema = z.enum([
|
|
57
|
+
'queued',
|
|
58
|
+
'running',
|
|
59
|
+
'completed',
|
|
60
|
+
'failed',
|
|
61
|
+
'partial',
|
|
62
|
+
'skipped_overlap',
|
|
63
|
+
]);
|
|
64
|
+
function splitPages(page, pages) {
|
|
65
|
+
return [page, ...(pages ?? [])]
|
|
66
|
+
.filter((url) => typeof url === 'string')
|
|
67
|
+
.map(url => url.trim())
|
|
68
|
+
.filter(Boolean);
|
|
69
|
+
}
|
|
70
|
+
function buildMonitorCreateBody(args) {
|
|
71
|
+
if (args.body && typeof args.body === 'object' && !Array.isArray(args.body)) {
|
|
72
|
+
return args.body;
|
|
73
|
+
}
|
|
74
|
+
const urls = splitPages(args.page, args.pages);
|
|
75
|
+
if (urls.length === 0) {
|
|
76
|
+
throw new Error('firecrawl_monitor_create requires either `body`, `page`, or `pages`.');
|
|
77
|
+
}
|
|
78
|
+
const goal = typeof args.goal === 'string' ? args.goal.trim() : '';
|
|
79
|
+
if (!goal) {
|
|
80
|
+
throw new Error('firecrawl_monitor_create shorthand requires `goal`. Use `body` for advanced requests without a goal.');
|
|
81
|
+
}
|
|
82
|
+
const webhookUrl = typeof args.webhookUrl === 'string' ? args.webhookUrl.trim() : '';
|
|
83
|
+
const email = typeof args.email === 'string' && args.email.trim()
|
|
84
|
+
? {
|
|
85
|
+
email: {
|
|
86
|
+
enabled: true,
|
|
87
|
+
recipients: [args.email.trim()],
|
|
88
|
+
includeDiffs: Boolean(args.includeDiffs),
|
|
89
|
+
},
|
|
90
|
+
}
|
|
91
|
+
: undefined;
|
|
92
|
+
return {
|
|
93
|
+
name: typeof args.name === 'string' && args.name.trim()
|
|
94
|
+
? args.name.trim()
|
|
95
|
+
: `Monitor ${urls[0]}`,
|
|
96
|
+
schedule: {
|
|
97
|
+
text: typeof args.scheduleText === 'string' && args.scheduleText.trim()
|
|
98
|
+
? args.scheduleText.trim()
|
|
99
|
+
: 'every 30 minutes',
|
|
100
|
+
timezone: typeof args.timezone === 'string' && args.timezone.trim()
|
|
101
|
+
? args.timezone.trim()
|
|
102
|
+
: 'UTC',
|
|
103
|
+
},
|
|
104
|
+
goal,
|
|
105
|
+
targets: [{ type: 'scrape', urls }],
|
|
106
|
+
...(email ? { notification: email } : {}),
|
|
107
|
+
...(webhookUrl
|
|
108
|
+
? {
|
|
109
|
+
webhook: {
|
|
110
|
+
url: webhookUrl,
|
|
111
|
+
events: ['monitor.page', 'monitor.check.completed'],
|
|
112
|
+
},
|
|
113
|
+
}
|
|
114
|
+
: {}),
|
|
115
|
+
};
|
|
116
|
+
}
|
|
56
117
|
export function registerMonitorTools(server) {
|
|
57
118
|
server.addTool({
|
|
58
119
|
name: 'firecrawl_monitor_create',
|
|
@@ -64,7 +125,27 @@ export function registerMonitorTools(server) {
|
|
|
64
125
|
description: `
|
|
65
126
|
Create a Firecrawl monitor — a recurring scrape or crawl that diffs each result against the last retained snapshot.
|
|
66
127
|
|
|
67
|
-
|
|
128
|
+
Prefer the simple path: pass \`page\` or \`pages\` plus \`goal\`. The tool will create a scrape monitor with a 30-minute schedule and meaningful-change judging enabled by the API. Use \`body\` only for advanced requests such as crawl targets, JSON change tracking, custom retention, or manual \`judgeEnabled\` control.
|
|
129
|
+
|
|
130
|
+
Meaningful-change judge: set \`goal\` to a plain-language description of what the user actually cares about. \`judgeEnabled\` defaults to true when \`goal\` is set, so providing \`goal\` is enough. Page webhooks expose \`isMeaningful\` and \`judgment\` on \`monitor.page\` events.
|
|
131
|
+
|
|
132
|
+
Simple fields:
|
|
133
|
+
- \`page\`: one page URL to monitor.
|
|
134
|
+
- \`pages\`: multiple page URLs to monitor.
|
|
135
|
+
- \`goal\`: plain-English instruction for what changes matter. Required for the simple path.
|
|
136
|
+
- \`scheduleText\`: optional natural-language schedule, default \`every 30 minutes\`.
|
|
137
|
+
- \`email\`: optional email recipient for summaries.
|
|
138
|
+
- \`webhookUrl\`: optional webhook URL. Configures \`monitor.page\` and \`monitor.check.completed\`.
|
|
139
|
+
|
|
140
|
+
Goal guidance:
|
|
141
|
+
- Expand the user's one-line monitoring intent into a concise 2-3 sentence monitor goal.
|
|
142
|
+
- State what should trigger an alert, restate any scope the user gave, and include intent-specific exclusions only when obvious from the user's request.
|
|
143
|
+
- Generic noise such as whitespace, formatting-only changes, request IDs, tracking params, generic metadata, and unrelated page chrome is already handled by the judge; do not repeat it in every goal.
|
|
144
|
+
- If the user is vague, keep the goal broad rather than guessing exclusions. If the user asks for broad monitoring or "any change", preserve that and do not add exclusions that hide changes.
|
|
145
|
+
- If the user says they do not care about something, include that explicitly. It is okay to ask whether they want to ignore specific noise when it is likely to matter.
|
|
146
|
+
- Do not invent page-specific sections, thresholds, entities, or business rules unless the user mentioned them.
|
|
147
|
+
|
|
148
|
+
Full \`body\` requests require: \`name\`, \`schedule\` (with \`cron\` or \`text\`), and \`targets\` (one or more \`{ type: 'scrape', urls: [...] }\` or \`{ type: 'crawl', url: '...' }\`). Optional: \`goal\`, \`judgeEnabled\`, \`webhook\`, \`notification\`, \`retentionDays\`.
|
|
68
149
|
|
|
69
150
|
**Markdown-mode (default):** Each check produces a unified text diff of the page's markdown. No extra configuration needed.
|
|
70
151
|
|
|
@@ -72,12 +153,22 @@ Pass the full request body. Required fields: \`name\`, \`schedule\` (with \`cron
|
|
|
72
153
|
{
|
|
73
154
|
"name": "firecrawl_monitor_create",
|
|
74
155
|
"arguments": {
|
|
75
|
-
"
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
156
|
+
"page": "https://example.com/blog",
|
|
157
|
+
"goal": "Alert when a new blog post is published or an existing headline changes.",
|
|
158
|
+
"email": "alerts@example.com"
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
\`\`\`
|
|
162
|
+
|
|
163
|
+
**Multiple pages:**
|
|
164
|
+
|
|
165
|
+
\`\`\`json
|
|
166
|
+
{
|
|
167
|
+
"name": "firecrawl_monitor_create",
|
|
168
|
+
"arguments": {
|
|
169
|
+
"pages": ["https://example.com/pricing", "https://example.com/changelog"],
|
|
170
|
+
"goal": "Alert when pricing, packaging, or launch messaging changes.",
|
|
171
|
+
"webhookUrl": "https://example.com/webhooks/firecrawl"
|
|
81
172
|
}
|
|
82
173
|
}
|
|
83
174
|
\`\`\`
|
|
@@ -91,6 +182,7 @@ Pass the full request body. Required fields: \`name\`, \`schedule\` (with \`cron
|
|
|
91
182
|
"body": {
|
|
92
183
|
"name": "Pricing watch",
|
|
93
184
|
"schedule": { "text": "hourly", "timezone": "UTC" },
|
|
185
|
+
"goal": "Alert when a pricing tier, price, billing period, limit, or headline feature changes. Ignore unrelated marketing copy unless it changes the pricing offer.",
|
|
94
186
|
"targets": [{
|
|
95
187
|
"type": "scrape",
|
|
96
188
|
"urls": ["https://example.com/pricing"],
|
|
@@ -126,10 +218,19 @@ Pass the full request body. Required fields: \`name\`, \`schedule\` (with \`cron
|
|
|
126
218
|
**Mixed mode (JSON + git-diff):** Use \`modes: ["json", "git-diff"]\` to get both per-field diffs and a markdown sidecar. The page is marked \`changed\` whenever either surface changed.
|
|
127
219
|
`,
|
|
128
220
|
parameters: z.object({
|
|
129
|
-
body: z.record(z.string(), z.any()),
|
|
221
|
+
body: z.record(z.string(), z.any()).optional(),
|
|
222
|
+
page: z.string().optional(),
|
|
223
|
+
pages: z.array(z.string()).optional(),
|
|
224
|
+
goal: z.string().optional(),
|
|
225
|
+
name: z.string().optional(),
|
|
226
|
+
scheduleText: z.string().optional(),
|
|
227
|
+
timezone: z.string().optional(),
|
|
228
|
+
email: z.string().optional(),
|
|
229
|
+
includeDiffs: z.boolean().optional(),
|
|
230
|
+
webhookUrl: z.string().optional(),
|
|
130
231
|
}),
|
|
131
232
|
execute: async (args, { session, log }) => {
|
|
132
|
-
const
|
|
233
|
+
const body = buildMonitorCreateBody(args);
|
|
133
234
|
log.info('Creating monitor', { name: body.name });
|
|
134
235
|
const res = await monitorRequest(session, '/monitor', {
|
|
135
236
|
method: 'POST',
|
|
@@ -195,7 +296,7 @@ Get a single monitor by ID.
|
|
|
195
296
|
openWorldHint: true,
|
|
196
297
|
},
|
|
197
298
|
description: `
|
|
198
|
-
Update a monitor. Pass any subset of fields to patch: \`name\`, \`status\` ("active" | "paused"), \`schedule\`, \`targets\`, \`webhook\`, \`notification\`, \`retentionDays\`.
|
|
299
|
+
Update a monitor. Pass any subset of fields to patch: \`name\`, \`status\` ("active" | "paused"), \`schedule\`, \`targets\`, \`goal\`, \`judgeEnabled\`, \`webhook\`, \`notification\`, \`retentionDays\`.
|
|
199
300
|
|
|
200
301
|
**Usage Example:**
|
|
201
302
|
\`\`\`json
|
|
@@ -276,17 +377,18 @@ List historical checks for a monitor.
|
|
|
276
377
|
|
|
277
378
|
**Usage Example:**
|
|
278
379
|
\`\`\`json
|
|
279
|
-
{ "name": "firecrawl_monitor_checks", "arguments": { "id": "mon_abc123", "limit": 10 } }
|
|
380
|
+
{ "name": "firecrawl_monitor_checks", "arguments": { "id": "mon_abc123", "limit": 10, "status": "completed" } }
|
|
280
381
|
\`\`\`
|
|
281
382
|
`,
|
|
282
383
|
parameters: z.object({
|
|
283
384
|
id: z.string(),
|
|
284
385
|
limit: z.number().int().positive().optional(),
|
|
285
386
|
offset: z.number().int().nonnegative().optional(),
|
|
387
|
+
status: checkStatusSchema.optional(),
|
|
286
388
|
}),
|
|
287
389
|
execute: async (args, { session }) => {
|
|
288
|
-
const { id, limit, offset } = args;
|
|
289
|
-
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/checks`, { query: { limit, offset } });
|
|
390
|
+
const { id, limit, offset, status } = args;
|
|
391
|
+
const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/checks`, { query: { limit, offset, status } });
|
|
290
392
|
return asText(res);
|
|
291
393
|
},
|
|
292
394
|
});
|
|
@@ -300,7 +402,7 @@ List historical checks for a monitor.
|
|
|
300
402
|
description: `
|
|
301
403
|
Get a single check with page-level diff results. Filter \`pageStatus\` to surface only the pages that changed (or were new, removed, etc.).
|
|
302
404
|
|
|
303
|
-
Each entry in \`data.pages[]\` has \`url\`, \`status\` (\`same\` | \`new\` | \`changed\` | \`removed\` | \`error\`), and — when changed — a \`diff\` and possibly a \`snapshot\`. The shape of \`diff\` depends on the monitor's \`formats\` configuration:
|
|
405
|
+
Each entry in \`data.pages[]\` has \`url\`, \`status\` (\`same\` | \`new\` | \`changed\` | \`removed\` | \`error\`), optional \`judgment\` when goal-based judging ran, and — when changed — a \`diff\` and possibly a \`snapshot\`. The shape of \`diff\` depends on the monitor's \`formats\` configuration:
|
|
304
406
|
|
|
305
407
|
- **Markdown mode (default).** \`diff.text\` is the unified markdown diff; \`diff.json\` is a parse-diff AST (\`{ files: [...] }\`). No \`snapshot\`.
|
|
306
408
|
- **JSON mode** (\`changeTracking\` with \`modes: ["json"]\`). \`diff.json\` is a per-field map keyed by JSON path into the extraction, e.g. \`plans[0].price\`, with each value being \`{ previous, current }\`. \`snapshot.json\` is the full current extraction. No \`diff.text\`.
|
|
@@ -318,12 +420,27 @@ Each entry in \`data.pages[]\` has \`url\`, \`status\` (\`same\` | \`new\` | \`c
|
|
|
318
420
|
"plans[1].features[2]": { "previous": "10 GB storage", "current": "25 GB storage" }
|
|
319
421
|
}
|
|
320
422
|
},
|
|
321
|
-
"snapshot": { "json": { "plans": [/* current full extraction matching the monitor's schema */] } }
|
|
423
|
+
"snapshot": { "json": { "plans": [/* current full extraction matching the monitor's schema */] } },
|
|
424
|
+
"judgment": {
|
|
425
|
+
"meaningful": true,
|
|
426
|
+
"confidence": "high",
|
|
427
|
+
"reason": "The pricing changed, which matches the monitor goal.",
|
|
428
|
+
"meaningfulChanges": [
|
|
429
|
+
{
|
|
430
|
+
"type": "changed",
|
|
431
|
+
"before": "$19/mo",
|
|
432
|
+
"after": "$24/mo",
|
|
433
|
+
"reason": "The tracked plan price changed."
|
|
434
|
+
}
|
|
435
|
+
]
|
|
436
|
+
}
|
|
322
437
|
}
|
|
323
438
|
\`\`\`
|
|
324
439
|
|
|
325
440
|
When summarizing a check for the user, prefer \`diff.json\` paths (e.g. "plans[0].price changed from $19/mo to $24/mo") over re-printing the markdown diff — it's more concise and grounded in the schema fields they asked for.
|
|
326
441
|
|
|
442
|
+
When \`judgment\` is present, use it to decide what to surface. \`judgment.meaningful: false\` means the change was classified as noise for the monitor's goal. When \`judgment.meaningfulChanges\` is present, prefer those goal-relevant changes over raw diff hunks; each item includes \`type\`, \`before\`, \`after\`, and \`reason\`.
|
|
443
|
+
|
|
327
444
|
The endpoint paginates via a top-level \`next\` URL; this tool returns one page at a time. Increase \`limit\` (max 100) to fetch fewer pages.
|
|
328
445
|
|
|
329
446
|
**Usage Example:**
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.19.1",
|
|
4
4
|
"description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"mcpName": "io.github.firecrawl/firecrawl-mcp-server",
|