firecrawl-mcp 3.16.0 → 3.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
  # Firecrawl MCP Server
10
10
 
11
- A Model Context Protocol (MCP) server implementation that integrates with [Firecrawl](https://github.com/firecrawl/firecrawl) for searching, scraping, and interacting with the web.
11
+ A Model Context Protocol (MCP) server that brings [Firecrawl](https://github.com/firecrawl/firecrawl) to MCP-compatible AI agents — search, scrape, and interact with the live web for clean, agent-ready context.
12
12
 
13
13
  > Big thanks to [@vrknetha](https://github.com/vrknetha), [@knacklabs](https://www.knacklabs.ai) for the initial implementation!
14
14
 
package/dist/index.js CHANGED
@@ -5,6 +5,7 @@ import { z } from 'zod';
5
5
  import FirecrawlApp from '@mendable/firecrawl-js';
6
6
  import { readFile } from 'node:fs/promises';
7
7
  import path from 'node:path';
8
+ import { registerMonitorTools } from './monitor.js';
8
9
  dotenv.config({ debug: false, quiet: true });
9
10
  function extractApiKey(headers) {
10
11
  const headerAuth = headers['authorization'];
@@ -1620,4 +1621,5 @@ else {
1620
1621
  transportType: 'stdio',
1621
1622
  };
1622
1623
  }
1624
+ registerMonitorTools(server);
1623
1625
  await server.start(args);
@@ -0,0 +1,354 @@
1
+ /**
2
+ * Firecrawl Monitor tools.
3
+ *
4
+ * Monitors run recurring scrapes/crawls and diff each result against the last
5
+ * retained snapshot. The SDK exposes monitor methods, but its HttpClient
6
+ * injects a top-level `origin` field into every POST/PATCH body and
7
+ * /v2/monitor rejects that with "Unrecognized key in body". Until the SDK
8
+ * strips `origin` for monitor requests, we hit /v2/monitor directly via fetch
9
+ * — same pattern the CLI uses.
10
+ */
11
+ import { z } from 'zod';
12
+ const DEFAULT_API_URL = 'https://api.firecrawl.dev';
13
+ function resolveAuth(session) {
14
+ const apiKey = session?.firecrawlApiKey ?? process.env.FIRECRAWL_API_KEY;
15
+ const baseUrl = (process.env.FIRECRAWL_API_URL ?? DEFAULT_API_URL).replace(/\/$/, '');
16
+ return { apiKey, baseUrl };
17
+ }
18
+ async function monitorRequest(session, path, init = {}) {
19
+ const { apiKey, baseUrl } = resolveAuth(session);
20
+ if (!apiKey && !process.env.FIRECRAWL_API_URL) {
21
+ throw new Error('Unauthorized: API key is required for monitor requests');
22
+ }
23
+ let url = `${baseUrl}/v2${path}`;
24
+ if (init.query) {
25
+ const qs = new URLSearchParams();
26
+ for (const [k, v] of Object.entries(init.query)) {
27
+ if (v !== undefined && v !== null && v !== '')
28
+ qs.set(k, String(v));
29
+ }
30
+ const s = qs.toString();
31
+ if (s)
32
+ url += `?${s}`;
33
+ }
34
+ const headers = { 'X-Origin': 'mcp' };
35
+ if (apiKey)
36
+ headers.Authorization = `Bearer ${apiKey}`;
37
+ if (init.body !== undefined)
38
+ headers['Content-Type'] = 'application/json';
39
+ const response = await fetch(url, {
40
+ method: init.method ?? 'GET',
41
+ headers,
42
+ body: init.body !== undefined ? JSON.stringify(init.body) : undefined,
43
+ });
44
+ const payload = (await response.json().catch(() => ({})));
45
+ if (!response.ok || payload?.success === false) {
46
+ const message = payload?.error ||
47
+ `HTTP ${response.status}: ${response.statusText || 'Request failed'}`;
48
+ throw new Error(message);
49
+ }
50
+ return payload;
51
+ }
52
+ function asText(data) {
53
+ return JSON.stringify(data, null, 2);
54
+ }
55
+ const pageStatusSchema = z.enum(['same', 'new', 'changed', 'removed', 'error']);
56
+ export function registerMonitorTools(server) {
57
+ server.addTool({
58
+ name: 'firecrawl_monitor_create',
59
+ annotations: {
60
+ title: 'Create monitor',
61
+ readOnlyHint: false,
62
+ openWorldHint: true,
63
+ },
64
+ description: `
65
+ Create a Firecrawl monitor — a recurring scrape or crawl that diffs each result against the last retained snapshot.
66
+
67
+ Pass the full request body. Required fields: \`name\`, \`schedule\` (with \`cron\` or \`text\`), and \`targets\` (one or more \`{ type: 'scrape', urls: [...] }\` or \`{ type: 'crawl', url: '...' }\`). Optional: \`webhook\`, \`notification\`, \`retentionDays\`.
68
+
69
+ **Markdown-mode (default):** Each check produces a unified text diff of the page's markdown. No extra configuration needed.
70
+
71
+ \`\`\`json
72
+ {
73
+ "name": "firecrawl_monitor_create",
74
+ "arguments": {
75
+ "body": {
76
+ "name": "Blog watch",
77
+ "schedule": { "text": "every 30 minutes", "timezone": "UTC" },
78
+ "targets": [{ "type": "scrape", "urls": ["https://example.com/blog"] }],
79
+ "notification": { "email": { "enabled": true, "recipients": ["a@b.com"] } }
80
+ }
81
+ }
82
+ }
83
+ \`\`\`
84
+
85
+ **JSON-mode change tracking:** To detect changes in **specific structured fields** (price, headline, in-stock flag, list items) instead of the whole page, add a \`changeTracking\` format with \`modes: ["json"]\` and a JSON schema to the target's \`scrapeOptions.formats\`. The check response will then carry a per-field diff (keyed by JSON path, e.g. \`plans[0].price\`) and a \`snapshot.json\` with the full current extraction. See \`firecrawl_monitor_check\` for the response shape.
86
+
87
+ \`\`\`json
88
+ {
89
+ "name": "firecrawl_monitor_create",
90
+ "arguments": {
91
+ "body": {
92
+ "name": "Pricing watch",
93
+ "schedule": { "text": "hourly", "timezone": "UTC" },
94
+ "targets": [{
95
+ "type": "scrape",
96
+ "urls": ["https://example.com/pricing"],
97
+ "scrapeOptions": {
98
+ "formats": [{
99
+ "type": "changeTracking",
100
+ "modes": ["json"],
101
+ "prompt": "Extract pricing tiers and headline features for each plan.",
102
+ "schema": {
103
+ "type": "object",
104
+ "properties": {
105
+ "plans": {
106
+ "type": "array",
107
+ "items": {
108
+ "type": "object",
109
+ "properties": {
110
+ "name": { "type": "string" },
111
+ "price": { "type": "string" },
112
+ "features": { "type": "array", "items": { "type": "string" } }
113
+ }
114
+ }
115
+ }
116
+ }
117
+ }
118
+ }]
119
+ }
120
+ }]
121
+ }
122
+ }
123
+ }
124
+ \`\`\`
125
+
126
+ **Mixed mode (JSON + git-diff):** Use \`modes: ["json", "git-diff"]\` to get both per-field diffs and a markdown sidecar. The page is marked \`changed\` whenever either surface changed.
127
+ `,
128
+ parameters: z.object({
129
+ body: z.record(z.string(), z.any()),
130
+ }),
131
+ execute: async (args, { session, log }) => {
132
+ const { body } = args;
133
+ log.info('Creating monitor', { name: body.name });
134
+ const res = await monitorRequest(session, '/monitor', {
135
+ method: 'POST',
136
+ body,
137
+ });
138
+ return asText(res);
139
+ },
140
+ });
141
+ server.addTool({
142
+ name: 'firecrawl_monitor_list',
143
+ annotations: {
144
+ title: 'List monitors',
145
+ readOnlyHint: true,
146
+ openWorldHint: false,
147
+ },
148
+ description: `
149
+ List all Firecrawl monitors for the authenticated account.
150
+
151
+ **Usage Example:**
152
+ \`\`\`json
153
+ { "name": "firecrawl_monitor_list", "arguments": { "limit": 20 } }
154
+ \`\`\`
155
+ `,
156
+ parameters: z.object({
157
+ limit: z.number().int().positive().optional(),
158
+ offset: z.number().int().nonnegative().optional(),
159
+ }),
160
+ execute: async (args, { session }) => {
161
+ const { limit, offset } = args;
162
+ const res = await monitorRequest(session, '/monitor', {
163
+ query: { limit, offset },
164
+ });
165
+ return asText(res);
166
+ },
167
+ });
168
+ server.addTool({
169
+ name: 'firecrawl_monitor_get',
170
+ annotations: {
171
+ title: 'Get monitor',
172
+ readOnlyHint: true,
173
+ openWorldHint: false,
174
+ },
175
+ description: `
176
+ Get a single monitor by ID.
177
+
178
+ **Usage Example:**
179
+ \`\`\`json
180
+ { "name": "firecrawl_monitor_get", "arguments": { "id": "mon_abc123" } }
181
+ \`\`\`
182
+ `,
183
+ parameters: z.object({ id: z.string() }),
184
+ execute: async (args, { session }) => {
185
+ const { id } = args;
186
+ const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}`);
187
+ return asText(res);
188
+ },
189
+ });
190
+ server.addTool({
191
+ name: 'firecrawl_monitor_update',
192
+ annotations: {
193
+ title: 'Update monitor',
194
+ readOnlyHint: false,
195
+ openWorldHint: true,
196
+ },
197
+ description: `
198
+ Update a monitor. Pass any subset of fields to patch: \`name\`, \`status\` ("active" | "paused"), \`schedule\`, \`targets\`, \`webhook\`, \`notification\`, \`retentionDays\`.
199
+
200
+ **Usage Example:**
201
+ \`\`\`json
202
+ {
203
+ "name": "firecrawl_monitor_update",
204
+ "arguments": {
205
+ "id": "mon_abc123",
206
+ "body": { "status": "paused" }
207
+ }
208
+ }
209
+ \`\`\`
210
+ `,
211
+ parameters: z.object({
212
+ id: z.string(),
213
+ body: z.record(z.string(), z.any()),
214
+ }),
215
+ execute: async (args, { session }) => {
216
+ const { id, body } = args;
217
+ const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}`, { method: 'PATCH', body });
218
+ return asText(res);
219
+ },
220
+ });
221
+ server.addTool({
222
+ name: 'firecrawl_monitor_delete',
223
+ annotations: {
224
+ title: 'Delete monitor',
225
+ readOnlyHint: false,
226
+ destructiveHint: true,
227
+ openWorldHint: true,
228
+ },
229
+ description: `
230
+ Permanently delete a monitor and stop its schedule. This cannot be undone.
231
+
232
+ **Usage Example:**
233
+ \`\`\`json
234
+ { "name": "firecrawl_monitor_delete", "arguments": { "id": "mon_abc123" } }
235
+ \`\`\`
236
+ `,
237
+ parameters: z.object({ id: z.string() }),
238
+ execute: async (args, { session, log }) => {
239
+ const { id } = args;
240
+ log.info('Deleting monitor', { id });
241
+ const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}`, { method: 'DELETE' });
242
+ return asText(res);
243
+ },
244
+ });
245
+ server.addTool({
246
+ name: 'firecrawl_monitor_run',
247
+ annotations: {
248
+ title: 'Run monitor now',
249
+ readOnlyHint: false,
250
+ openWorldHint: true,
251
+ },
252
+ description: `
253
+ Trigger a monitor check immediately, outside its normal schedule. Returns the queued check.
254
+
255
+ **Usage Example:**
256
+ \`\`\`json
257
+ { "name": "firecrawl_monitor_run", "arguments": { "id": "mon_abc123" } }
258
+ \`\`\`
259
+ `,
260
+ parameters: z.object({ id: z.string() }),
261
+ execute: async (args, { session }) => {
262
+ const { id } = args;
263
+ const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/run`, { method: 'POST' });
264
+ return asText(res);
265
+ },
266
+ });
267
+ server.addTool({
268
+ name: 'firecrawl_monitor_checks',
269
+ annotations: {
270
+ title: 'List monitor checks',
271
+ readOnlyHint: true,
272
+ openWorldHint: false,
273
+ },
274
+ description: `
275
+ List historical checks for a monitor.
276
+
277
+ **Usage Example:**
278
+ \`\`\`json
279
+ { "name": "firecrawl_monitor_checks", "arguments": { "id": "mon_abc123", "limit": 10 } }
280
+ \`\`\`
281
+ `,
282
+ parameters: z.object({
283
+ id: z.string(),
284
+ limit: z.number().int().positive().optional(),
285
+ offset: z.number().int().nonnegative().optional(),
286
+ }),
287
+ execute: async (args, { session }) => {
288
+ const { id, limit, offset } = args;
289
+ const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/checks`, { query: { limit, offset } });
290
+ return asText(res);
291
+ },
292
+ });
293
+ server.addTool({
294
+ name: 'firecrawl_monitor_check',
295
+ annotations: {
296
+ title: 'Get monitor check',
297
+ readOnlyHint: true,
298
+ openWorldHint: false,
299
+ },
300
+ description: `
301
+ Get a single check with page-level diff results. Filter \`pageStatus\` to surface only the pages that changed (or were new, removed, etc.).
302
+
303
+ Each entry in \`data.pages[]\` has \`url\`, \`status\` (\`same\` | \`new\` | \`changed\` | \`removed\` | \`error\`), and — when changed — a \`diff\` and possibly a \`snapshot\`. The shape of \`diff\` depends on the monitor's \`formats\` configuration:
304
+
305
+ - **Markdown mode (default).** \`diff.text\` is the unified markdown diff; \`diff.json\` is a parse-diff AST (\`{ files: [...] }\`). No \`snapshot\`.
306
+ - **JSON mode** (\`changeTracking\` with \`modes: ["json"]\`). \`diff.json\` is a per-field map keyed by JSON path into the extraction, e.g. \`plans[0].price\`, with each value being \`{ previous, current }\`. \`snapshot.json\` is the full current extraction. No \`diff.text\`.
307
+ - **Mixed mode** (\`modes: ["json", "git-diff"]\`). Both \`diff.text\` (markdown sidecar) AND \`diff.json\` (per-field map) are present, plus \`snapshot.json\`.
308
+
309
+ **Example JSON-mode response \`pages[]\` entry:**
310
+
311
+ \`\`\`json
312
+ {
313
+ "url": "https://example.com/pricing",
314
+ "status": "changed",
315
+ "diff": {
316
+ "json": {
317
+ "plans[0].price": { "previous": "$19/mo", "current": "$24/mo" },
318
+ "plans[1].features[2]": { "previous": "10 GB storage", "current": "25 GB storage" }
319
+ }
320
+ },
321
+ "snapshot": { "json": { "plans": [/* current full extraction matching the monitor's schema */] } }
322
+ }
323
+ \`\`\`
324
+
325
+ When summarizing a check for the user, prefer \`diff.json\` paths (e.g. "plans[0].price changed from $19/mo to $24/mo") over re-printing the markdown diff — it's more concise and grounded in the schema fields they asked for.
326
+
327
+ The endpoint paginates via a top-level \`next\` URL; this tool returns one page at a time. Increase \`limit\` (max 100) to fetch fewer pages.
328
+
329
+ **Usage Example:**
330
+ \`\`\`json
331
+ {
332
+ "name": "firecrawl_monitor_check",
333
+ "arguments": {
334
+ "id": "mon_abc123",
335
+ "checkId": "chk_xyz",
336
+ "pageStatus": "changed"
337
+ }
338
+ }
339
+ \`\`\`
340
+ `,
341
+ parameters: z.object({
342
+ id: z.string(),
343
+ checkId: z.string(),
344
+ limit: z.number().int().positive().optional(),
345
+ skip: z.number().int().nonnegative().optional(),
346
+ pageStatus: pageStatusSchema.optional(),
347
+ }),
348
+ execute: async (args, { session }) => {
349
+ const { id, checkId, limit, skip, pageStatus } = args;
350
+ const res = await monitorRequest(session, `/monitor/${encodeURIComponent(id)}/checks/${encodeURIComponent(checkId)}`, { query: { limit, skip, status: pageStatus } });
351
+ return asText(res);
352
+ },
353
+ });
354
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl-mcp",
3
- "version": "3.16.0",
3
+ "version": "3.17.0",
4
4
  "description": "MCP server for Firecrawl — search, scrape, and interact with the web. Supports both cloud and self-hosted instances. Features include web search, scraping, page interaction, batch processing, and LLM-powered content analysis.",
5
5
  "type": "module",
6
6
  "mcpName": "io.github.firecrawl/firecrawl-mcp-server",
@@ -15,7 +15,7 @@
15
15
  },
16
16
  "license": "MIT",
17
17
  "dependencies": {
18
- "@mendable/firecrawl-js": "4.21.0",
18
+ "@mendable/firecrawl-js": "4.24.0",
19
19
  "dotenv": "^17.2.2",
20
20
  "firecrawl-fastmcp": "^1.0.4",
21
21
  "typescript": "^5.9.2",