firecrawl-mcp 1.12.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.js +372 -316
- package/package.json +10 -9
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Firecrawl MCP Server
|
|
2
2
|
|
|
3
|
-
A Model Context Protocol (MCP) server implementation that integrates with [Firecrawl](https://github.com/
|
|
3
|
+
A Model Context Protocol (MCP) server implementation that integrates with [Firecrawl](https://github.com/firecrawl/firecrawl) for web scraping capabilities.
|
|
4
4
|
|
|
5
5
|
> Big thanks to [@vrknetha](https://github.com/vrknetha), [@knacklabs](https://www.knacklabs.ai) for the initial implementation!
|
|
6
6
|
|
package/dist/index.js
CHANGED
|
@@ -4,8 +4,10 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
|
|
4
4
|
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
|
|
5
5
|
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
6
6
|
import FirecrawlApp from '@mendable/firecrawl-js';
|
|
7
|
+
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
|
|
7
8
|
import express from 'express';
|
|
8
9
|
import dotenv from 'dotenv';
|
|
10
|
+
import { randomUUID } from 'node:crypto';
|
|
9
11
|
dotenv.config();
|
|
10
12
|
// Tool definitions
|
|
11
13
|
const SCRAPE_TOOL = {
|
|
@@ -25,7 +27,7 @@ This is the most powerful, fastest and most reliable scraper tool, if available
|
|
|
25
27
|
"arguments": {
|
|
26
28
|
"url": "https://example.com",
|
|
27
29
|
"formats": ["markdown"],
|
|
28
|
-
"maxAge":
|
|
30
|
+
"maxAge": 172800000
|
|
29
31
|
}
|
|
30
32
|
}
|
|
31
33
|
\`\`\`
|
|
@@ -42,15 +44,39 @@ This is the most powerful, fastest and most reliable scraper tool, if available
|
|
|
42
44
|
formats: {
|
|
43
45
|
type: 'array',
|
|
44
46
|
items: {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
47
|
+
oneOf: [
|
|
48
|
+
{
|
|
49
|
+
type: 'string',
|
|
50
|
+
enum: [
|
|
51
|
+
'markdown',
|
|
52
|
+
'html',
|
|
53
|
+
'rawHtml',
|
|
54
|
+
'screenshot',
|
|
55
|
+
'links',
|
|
56
|
+
'extract',
|
|
57
|
+
'summary',
|
|
58
|
+
],
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
type: 'object',
|
|
62
|
+
properties: {
|
|
63
|
+
type: {
|
|
64
|
+
type: 'string',
|
|
65
|
+
enum: ['json'],
|
|
66
|
+
},
|
|
67
|
+
prompt: {
|
|
68
|
+
type: 'string',
|
|
69
|
+
description: 'Prompt to guide JSON extraction',
|
|
70
|
+
},
|
|
71
|
+
schema: {
|
|
72
|
+
type: 'object',
|
|
73
|
+
description: 'JSON schema for structured extraction',
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
required: ['type'],
|
|
77
|
+
additionalProperties: true,
|
|
78
|
+
description: 'Advanced format option. Use { type: "json", prompt, schema } to request structured JSON extraction.',
|
|
79
|
+
},
|
|
54
80
|
],
|
|
55
81
|
},
|
|
56
82
|
default: ['markdown'],
|
|
@@ -58,6 +84,7 @@ This is the most powerful, fastest and most reliable scraper tool, if available
|
|
|
58
84
|
},
|
|
59
85
|
onlyMainContent: {
|
|
60
86
|
type: 'boolean',
|
|
87
|
+
default: true,
|
|
61
88
|
description: 'Extract only the main content, filtering out navigation, footers, etc.',
|
|
62
89
|
},
|
|
63
90
|
includeTags: {
|
|
@@ -74,10 +101,6 @@ This is the most powerful, fastest and most reliable scraper tool, if available
|
|
|
74
101
|
type: 'number',
|
|
75
102
|
description: 'Time in milliseconds to wait for dynamic content to load',
|
|
76
103
|
},
|
|
77
|
-
timeout: {
|
|
78
|
-
type: 'number',
|
|
79
|
-
description: 'Maximum time in milliseconds to wait for the page to load',
|
|
80
|
-
},
|
|
81
104
|
actions: {
|
|
82
105
|
type: 'array',
|
|
83
106
|
items: {
|
|
@@ -131,24 +154,6 @@ This is the most powerful, fastest and most reliable scraper tool, if available
|
|
|
131
154
|
},
|
|
132
155
|
description: 'List of actions to perform before scraping',
|
|
133
156
|
},
|
|
134
|
-
extract: {
|
|
135
|
-
type: 'object',
|
|
136
|
-
properties: {
|
|
137
|
-
schema: {
|
|
138
|
-
type: 'object',
|
|
139
|
-
description: 'Schema for structured data extraction',
|
|
140
|
-
},
|
|
141
|
-
systemPrompt: {
|
|
142
|
-
type: 'string',
|
|
143
|
-
description: 'System prompt for LLM extraction',
|
|
144
|
-
},
|
|
145
|
-
prompt: {
|
|
146
|
-
type: 'string',
|
|
147
|
-
description: 'User prompt for LLM extraction',
|
|
148
|
-
},
|
|
149
|
-
},
|
|
150
|
-
description: 'Configuration for structured data extraction',
|
|
151
|
-
},
|
|
152
157
|
mobile: {
|
|
153
158
|
type: 'boolean',
|
|
154
159
|
description: 'Use mobile viewport',
|
|
@@ -176,9 +181,15 @@ This is the most powerful, fastest and most reliable scraper tool, if available
|
|
|
176
181
|
},
|
|
177
182
|
description: 'Location settings for scraping',
|
|
178
183
|
},
|
|
184
|
+
storeInCache: {
|
|
185
|
+
type: 'boolean',
|
|
186
|
+
default: true,
|
|
187
|
+
description: 'If true, the page will be stored in the Firecrawl index and cache. Setting this to false is useful if your scraping activity may have data protection concerns.',
|
|
188
|
+
},
|
|
179
189
|
maxAge: {
|
|
180
190
|
type: 'number',
|
|
181
|
-
|
|
191
|
+
default: 172800000,
|
|
192
|
+
description: 'Maximum age in milliseconds for cached content. Use cached data if available and younger than maxAge, otherwise scrape fresh. Enables 500% faster scrapes for recently cached pages. Default: 172800000',
|
|
182
193
|
},
|
|
183
194
|
},
|
|
184
195
|
required: ['url'],
|
|
@@ -215,13 +226,10 @@ Map a website to discover all indexed URLs on the site.
|
|
|
215
226
|
type: 'string',
|
|
216
227
|
description: 'Optional search term to filter URLs',
|
|
217
228
|
},
|
|
218
|
-
|
|
219
|
-
type: '
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
sitemapOnly: {
|
|
223
|
-
type: 'boolean',
|
|
224
|
-
description: 'Only use sitemap.xml for discovery, ignore HTML links',
|
|
229
|
+
sitemap: {
|
|
230
|
+
type: 'string',
|
|
231
|
+
enum: ['include', 'skip', 'only'],
|
|
232
|
+
description: 'Sitemap handling: "include" - use sitemap + find other pages (default), "skip" - ignore sitemap completely, "only" - only return sitemap URLs',
|
|
225
233
|
},
|
|
226
234
|
includeSubdomains: {
|
|
227
235
|
type: 'boolean',
|
|
@@ -231,6 +239,11 @@ Map a website to discover all indexed URLs on the site.
|
|
|
231
239
|
type: 'number',
|
|
232
240
|
description: 'Maximum number of URLs to return',
|
|
233
241
|
},
|
|
242
|
+
ignoreQueryParameters: {
|
|
243
|
+
type: 'boolean',
|
|
244
|
+
default: true,
|
|
245
|
+
description: 'Do not return URLs with query parameters',
|
|
246
|
+
},
|
|
234
247
|
},
|
|
235
248
|
required: ['url'],
|
|
236
249
|
},
|
|
@@ -238,28 +251,29 @@ Map a website to discover all indexed URLs on the site.
|
|
|
238
251
|
const CRAWL_TOOL = {
|
|
239
252
|
name: 'firecrawl_crawl',
|
|
240
253
|
description: `
|
|
241
|
-
Starts
|
|
242
|
-
|
|
243
|
-
**Best for:** Extracting content from multiple related pages, when you need comprehensive coverage.
|
|
244
|
-
**Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow).
|
|
245
|
-
**Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.
|
|
246
|
-
**Common mistakes:** Setting limit or
|
|
247
|
-
**Prompt Example:** "Get all blog posts from the first two levels of example.com/blog."
|
|
248
|
-
**Usage Example:**
|
|
249
|
-
\`\`\`json
|
|
250
|
-
{
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
254
|
+
Starts a crawl job on a website and extracts content from all pages.
|
|
255
|
+
|
|
256
|
+
**Best for:** Extracting content from multiple related pages, when you need comprehensive coverage.
|
|
257
|
+
**Not recommended for:** Extracting content from a single page (use scrape); when token limits are a concern (use map + batch_scrape); when you need fast results (crawling can be slow).
|
|
258
|
+
**Warning:** Crawl responses can be very large and may exceed token limits. Limit the crawl depth and number of pages, or use map + batch_scrape for better control.
|
|
259
|
+
**Common mistakes:** Setting limit or maxDiscoveryDepth too high (causes token overflow); using crawl for a single page (use scrape instead).
|
|
260
|
+
**Prompt Example:** "Get all blog posts from the first two levels of example.com/blog."
|
|
261
|
+
**Usage Example:**
|
|
262
|
+
\`\`\`json
|
|
263
|
+
{
|
|
264
|
+
"name": "firecrawl_crawl",
|
|
265
|
+
"arguments": {
|
|
266
|
+
"url": "https://example.com/blog/*",
|
|
267
|
+
"maxDiscoveryDepth": 2,
|
|
268
|
+
"limit": 100,
|
|
269
|
+
"allowExternalLinks": false,
|
|
270
|
+
"deduplicateSimilarURLs": true,
|
|
271
|
+
"sitemap": "include"
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
\`\`\`
|
|
275
|
+
**Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress.
|
|
276
|
+
`,
|
|
263
277
|
inputSchema: {
|
|
264
278
|
type: 'object',
|
|
265
279
|
properties: {
|
|
@@ -267,6 +281,10 @@ Starts an asynchronous crawl job on a website and extracts content from all page
|
|
|
267
281
|
type: 'string',
|
|
268
282
|
description: 'Starting URL for the crawl',
|
|
269
283
|
},
|
|
284
|
+
prompt: {
|
|
285
|
+
type: 'string',
|
|
286
|
+
description: 'Natural language prompt to generate crawler options. Explicitly set parameters will override generated ones.',
|
|
287
|
+
},
|
|
270
288
|
excludePaths: {
|
|
271
289
|
type: 'array',
|
|
272
290
|
items: { type: 'string' },
|
|
@@ -277,26 +295,43 @@ Starts an asynchronous crawl job on a website and extracts content from all page
|
|
|
277
295
|
items: { type: 'string' },
|
|
278
296
|
description: 'Only crawl these URL paths',
|
|
279
297
|
},
|
|
280
|
-
|
|
298
|
+
maxDiscoveryDepth: {
|
|
281
299
|
type: 'number',
|
|
282
|
-
description: 'Maximum
|
|
300
|
+
description: 'Maximum discovery depth to crawl. The root site and sitemapped pages have depth 0.',
|
|
283
301
|
},
|
|
284
|
-
|
|
285
|
-
type: '
|
|
286
|
-
|
|
302
|
+
sitemap: {
|
|
303
|
+
type: 'string',
|
|
304
|
+
enum: ['skip', 'include', 'only'],
|
|
305
|
+
default: 'include',
|
|
306
|
+
description: "Sitemap mode when crawling. 'skip' ignores the sitemap entirely, 'include' uses sitemap plus other discovery methods (default), 'only' restricts crawling to sitemap URLs.",
|
|
287
307
|
},
|
|
288
308
|
limit: {
|
|
289
309
|
type: 'number',
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
allowBackwardLinks: {
|
|
293
|
-
type: 'boolean',
|
|
294
|
-
description: 'Allow crawling links that point to parent directories',
|
|
310
|
+
default: 10000,
|
|
311
|
+
description: 'Maximum number of pages to crawl (default: 10000)',
|
|
295
312
|
},
|
|
296
313
|
allowExternalLinks: {
|
|
297
314
|
type: 'boolean',
|
|
298
315
|
description: 'Allow crawling links to external domains',
|
|
299
316
|
},
|
|
317
|
+
allowSubdomains: {
|
|
318
|
+
type: 'boolean',
|
|
319
|
+
default: false,
|
|
320
|
+
description: 'Allow crawling links to subdomains of the main domain',
|
|
321
|
+
},
|
|
322
|
+
crawlEntireDomain: {
|
|
323
|
+
type: 'boolean',
|
|
324
|
+
default: false,
|
|
325
|
+
description: 'When true, follow internal links to sibling or parent URLs, not just child paths',
|
|
326
|
+
},
|
|
327
|
+
delay: {
|
|
328
|
+
type: 'number',
|
|
329
|
+
description: 'Delay in seconds between scrapes to respect site rate limits',
|
|
330
|
+
},
|
|
331
|
+
maxConcurrency: {
|
|
332
|
+
type: 'number',
|
|
333
|
+
description: 'Maximum number of concurrent scrapes; if unset, team limit is used',
|
|
334
|
+
},
|
|
300
335
|
webhook: {
|
|
301
336
|
oneOf: [
|
|
302
337
|
{
|
|
@@ -325,7 +360,8 @@ Starts an asynchronous crawl job on a website and extracts content from all page
|
|
|
325
360
|
},
|
|
326
361
|
ignoreQueryParameters: {
|
|
327
362
|
type: 'boolean',
|
|
328
|
-
|
|
363
|
+
default: false,
|
|
364
|
+
description: 'Do not re-scrape the same path with different (or none) query parameters',
|
|
329
365
|
},
|
|
330
366
|
scrapeOptions: {
|
|
331
367
|
type: 'object',
|
|
@@ -333,17 +369,43 @@ Starts an asynchronous crawl job on a website and extracts content from all page
|
|
|
333
369
|
formats: {
|
|
334
370
|
type: 'array',
|
|
335
371
|
items: {
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
372
|
+
oneOf: [
|
|
373
|
+
{
|
|
374
|
+
type: 'string',
|
|
375
|
+
enum: [
|
|
376
|
+
'markdown',
|
|
377
|
+
'html',
|
|
378
|
+
'rawHtml',
|
|
379
|
+
'screenshot',
|
|
380
|
+
'links',
|
|
381
|
+
'extract',
|
|
382
|
+
'summary',
|
|
383
|
+
],
|
|
384
|
+
},
|
|
385
|
+
{
|
|
386
|
+
type: 'object',
|
|
387
|
+
properties: {
|
|
388
|
+
type: {
|
|
389
|
+
type: 'string',
|
|
390
|
+
enum: ['json'],
|
|
391
|
+
},
|
|
392
|
+
prompt: {
|
|
393
|
+
type: 'string',
|
|
394
|
+
description: 'Prompt to guide JSON extraction',
|
|
395
|
+
},
|
|
396
|
+
schema: {
|
|
397
|
+
type: 'object',
|
|
398
|
+
description: 'JSON schema for structured extraction',
|
|
399
|
+
},
|
|
400
|
+
},
|
|
401
|
+
required: ['type'],
|
|
402
|
+
additionalProperties: true,
|
|
403
|
+
description: 'Advanced format option. Use { type: "json", prompt, schema } to request structured JSON extraction.',
|
|
404
|
+
},
|
|
345
405
|
],
|
|
346
406
|
},
|
|
407
|
+
default: ['markdown'],
|
|
408
|
+
description: "Content formats to extract (default: ['markdown'])",
|
|
347
409
|
},
|
|
348
410
|
onlyMainContent: {
|
|
349
411
|
type: 'boolean',
|
|
@@ -396,12 +458,13 @@ Check the status of a crawl job.
|
|
|
396
458
|
const SEARCH_TOOL = {
|
|
397
459
|
name: 'firecrawl_search',
|
|
398
460
|
description: `
|
|
399
|
-
Search the web and optionally extract content from search results. This is the most powerful search tool available, and if available you should always default to using this tool for any web search needs.
|
|
461
|
+
Search the web and optionally extract content from search results. This is the most powerful web search tool available, and if available you should always default to using this tool for any web search needs.
|
|
400
462
|
|
|
401
463
|
**Best for:** Finding specific information across multiple websites, when you don't know which website has the information; when you need the most relevant content for a query.
|
|
402
|
-
**Not recommended for:** When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl
|
|
464
|
+
**Not recommended for:** When you need to search the filesystem. When you already know which website to scrape (use scrape); when you need comprehensive coverage of a single website (use map or crawl.
|
|
403
465
|
**Common mistakes:** Using crawl or map for open-ended questions (use search instead).
|
|
404
466
|
**Prompt Example:** "Find the latest research papers on AI published in 2023."
|
|
467
|
+
**Sources:** web, images, news, default to web unless needed images or news.
|
|
405
468
|
**Usage Example:**
|
|
406
469
|
\`\`\`json
|
|
407
470
|
{
|
|
@@ -411,6 +474,11 @@ Search the web and optionally extract content from search results. This is the m
|
|
|
411
474
|
"limit": 5,
|
|
412
475
|
"lang": "en",
|
|
413
476
|
"country": "us",
|
|
477
|
+
"sources": [
|
|
478
|
+
"web",
|
|
479
|
+
"images",
|
|
480
|
+
"news"
|
|
481
|
+
],
|
|
414
482
|
"scrapeOptions": {
|
|
415
483
|
"formats": ["markdown"],
|
|
416
484
|
"onlyMainContent": true
|
|
@@ -431,14 +499,6 @@ Search the web and optionally extract content from search results. This is the m
|
|
|
431
499
|
type: 'number',
|
|
432
500
|
description: 'Maximum number of results to return (default: 5)',
|
|
433
501
|
},
|
|
434
|
-
lang: {
|
|
435
|
-
type: 'string',
|
|
436
|
-
description: 'Language code for search results (default: en)',
|
|
437
|
-
},
|
|
438
|
-
country: {
|
|
439
|
-
type: 'string',
|
|
440
|
-
description: 'Country code for search results (default: us)',
|
|
441
|
-
},
|
|
442
502
|
tbs: {
|
|
443
503
|
type: 'string',
|
|
444
504
|
description: 'Time-based search filter',
|
|
@@ -448,19 +508,49 @@ Search the web and optionally extract content from search results. This is the m
|
|
|
448
508
|
description: 'Search filter',
|
|
449
509
|
},
|
|
450
510
|
location: {
|
|
451
|
-
type: '
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
511
|
+
type: 'string',
|
|
512
|
+
description: 'Location parameter for search results',
|
|
513
|
+
},
|
|
514
|
+
sources: {
|
|
515
|
+
type: 'array',
|
|
516
|
+
description: 'Sources to search. Determines which result arrays are included in the response.',
|
|
517
|
+
items: {
|
|
518
|
+
oneOf: [
|
|
519
|
+
{
|
|
520
|
+
type: 'object',
|
|
521
|
+
properties: {
|
|
522
|
+
type: { type: 'string', enum: ['web'] },
|
|
523
|
+
// tbs: {
|
|
524
|
+
// type: 'string',
|
|
525
|
+
// description:
|
|
526
|
+
// 'Time-based search parameter (e.g., qdr:h, qdr:d, qdr:w, qdr:m, qdr:y or custom cdr with cd_min/cd_max)',
|
|
527
|
+
// },
|
|
528
|
+
// location: {
|
|
529
|
+
// type: 'string',
|
|
530
|
+
// description: 'Location parameter for search results',
|
|
531
|
+
// },
|
|
532
|
+
},
|
|
533
|
+
required: ['type'],
|
|
534
|
+
additionalProperties: false,
|
|
535
|
+
},
|
|
536
|
+
{
|
|
537
|
+
type: 'object',
|
|
538
|
+
properties: {
|
|
539
|
+
type: { type: 'string', enum: ['images'] },
|
|
540
|
+
},
|
|
541
|
+
required: ['type'],
|
|
542
|
+
additionalProperties: false,
|
|
543
|
+
},
|
|
544
|
+
{
|
|
545
|
+
type: 'object',
|
|
546
|
+
properties: {
|
|
547
|
+
type: { type: 'string', enum: ['news'] },
|
|
548
|
+
},
|
|
549
|
+
required: ['type'],
|
|
550
|
+
additionalProperties: false,
|
|
551
|
+
},
|
|
552
|
+
],
|
|
462
553
|
},
|
|
463
|
-
description: 'Location settings for search',
|
|
464
554
|
},
|
|
465
555
|
scrapeOptions: {
|
|
466
556
|
type: 'object',
|
|
@@ -468,8 +558,22 @@ Search the web and optionally extract content from search results. This is the m
|
|
|
468
558
|
formats: {
|
|
469
559
|
type: 'array',
|
|
470
560
|
items: {
|
|
471
|
-
|
|
472
|
-
|
|
561
|
+
oneOf: [
|
|
562
|
+
{
|
|
563
|
+
type: 'string',
|
|
564
|
+
enum: ['markdown', 'html', 'rawHtml'],
|
|
565
|
+
},
|
|
566
|
+
{
|
|
567
|
+
type: 'object',
|
|
568
|
+
properties: {
|
|
569
|
+
type: { type: 'string', enum: ['json'] },
|
|
570
|
+
prompt: { type: 'string' },
|
|
571
|
+
schema: { type: 'object' },
|
|
572
|
+
},
|
|
573
|
+
required: ['type'],
|
|
574
|
+
additionalProperties: true,
|
|
575
|
+
},
|
|
576
|
+
],
|
|
473
577
|
},
|
|
474
578
|
description: 'Content formats to extract from search results',
|
|
475
579
|
},
|
|
@@ -493,12 +597,11 @@ const EXTRACT_TOOL = {
|
|
|
493
597
|
description: `
|
|
494
598
|
Extract structured information from web pages using LLM capabilities. Supports both cloud AI and self-hosted LLM extraction.
|
|
495
599
|
|
|
496
|
-
**Best for:** Extracting specific structured data like prices, names, details.
|
|
600
|
+
**Best for:** Extracting specific structured data like prices, names, details from web pages.
|
|
497
601
|
**Not recommended for:** When you need the full content of a page (use scrape); when you're not looking for specific structured data.
|
|
498
602
|
**Arguments:**
|
|
499
603
|
- urls: Array of URLs to extract information from
|
|
500
604
|
- prompt: Custom prompt for the LLM extraction
|
|
501
|
-
- systemPrompt: System prompt to guide the LLM
|
|
502
605
|
- schema: JSON schema for structured data extraction
|
|
503
606
|
- allowExternalLinks: Allow extraction from external links
|
|
504
607
|
- enableWebSearch: Enable web search for additional context
|
|
@@ -511,7 +614,6 @@ Extract structured information from web pages using LLM capabilities. Supports b
|
|
|
511
614
|
"arguments": {
|
|
512
615
|
"urls": ["https://example.com/page1", "https://example.com/page2"],
|
|
513
616
|
"prompt": "Extract product information including name, price, and description",
|
|
514
|
-
"systemPrompt": "You are a helpful assistant that extracts product information",
|
|
515
617
|
"schema": {
|
|
516
618
|
"type": "object",
|
|
517
619
|
"properties": {
|
|
@@ -541,10 +643,6 @@ Extract structured information from web pages using LLM capabilities. Supports b
|
|
|
541
643
|
type: 'string',
|
|
542
644
|
description: 'Prompt for the LLM extraction',
|
|
543
645
|
},
|
|
544
|
-
systemPrompt: {
|
|
545
|
-
type: 'string',
|
|
546
|
-
description: 'System prompt for LLM extraction',
|
|
547
|
-
},
|
|
548
646
|
schema: {
|
|
549
647
|
type: 'object',
|
|
550
648
|
description: 'JSON schema for structured data extraction',
|
|
@@ -565,100 +663,6 @@ Extract structured information from web pages using LLM capabilities. Supports b
|
|
|
565
663
|
required: ['urls'],
|
|
566
664
|
},
|
|
567
665
|
};
|
|
568
|
-
const DEEP_RESEARCH_TOOL = {
|
|
569
|
-
name: 'firecrawl_deep_research',
|
|
570
|
-
description: `
|
|
571
|
-
Conduct deep web research on a query using intelligent crawling, search, and LLM analysis.
|
|
572
|
-
|
|
573
|
-
**Best for:** Complex research questions requiring multiple sources, in-depth analysis.
|
|
574
|
-
**Not recommended for:** Simple questions that can be answered with a single search; when you need very specific information from a known page (use scrape); when you need results quickly (deep research can take time).
|
|
575
|
-
**Arguments:**
|
|
576
|
-
- query (string, required): The research question or topic to explore.
|
|
577
|
-
- maxDepth (number, optional): Maximum recursive depth for crawling/search (default: 3).
|
|
578
|
-
- timeLimit (number, optional): Time limit in seconds for the research session (default: 120).
|
|
579
|
-
- maxUrls (number, optional): Maximum number of URLs to analyze (default: 50).
|
|
580
|
-
**Prompt Example:** "Research the environmental impact of electric vehicles versus gasoline vehicles."
|
|
581
|
-
**Usage Example:**
|
|
582
|
-
\`\`\`json
|
|
583
|
-
{
|
|
584
|
-
"name": "firecrawl_deep_research",
|
|
585
|
-
"arguments": {
|
|
586
|
-
"query": "What are the environmental impacts of electric vehicles compared to gasoline vehicles?",
|
|
587
|
-
"maxDepth": 3,
|
|
588
|
-
"timeLimit": 120,
|
|
589
|
-
"maxUrls": 50
|
|
590
|
-
}
|
|
591
|
-
}
|
|
592
|
-
\`\`\`
|
|
593
|
-
**Returns:** Final analysis generated by an LLM based on research. (data.finalAnalysis); may also include structured activities and sources used in the research process.
|
|
594
|
-
`,
|
|
595
|
-
inputSchema: {
|
|
596
|
-
type: 'object',
|
|
597
|
-
properties: {
|
|
598
|
-
query: {
|
|
599
|
-
type: 'string',
|
|
600
|
-
description: 'The query to research',
|
|
601
|
-
},
|
|
602
|
-
maxDepth: {
|
|
603
|
-
type: 'number',
|
|
604
|
-
description: 'Maximum depth of research iterations (1-10)',
|
|
605
|
-
},
|
|
606
|
-
timeLimit: {
|
|
607
|
-
type: 'number',
|
|
608
|
-
description: 'Time limit in seconds (30-300)',
|
|
609
|
-
},
|
|
610
|
-
maxUrls: {
|
|
611
|
-
type: 'number',
|
|
612
|
-
description: 'Maximum number of URLs to analyze (1-1000)',
|
|
613
|
-
},
|
|
614
|
-
},
|
|
615
|
-
required: ['query'],
|
|
616
|
-
},
|
|
617
|
-
};
|
|
618
|
-
const GENERATE_LLMSTXT_TOOL = {
|
|
619
|
-
name: 'firecrawl_generate_llmstxt',
|
|
620
|
-
description: `
|
|
621
|
-
Generate a standardized llms.txt (and optionally llms-full.txt) file for a given domain. This file defines how large language models should interact with the site.
|
|
622
|
-
|
|
623
|
-
**Best for:** Creating machine-readable permission guidelines for AI models.
|
|
624
|
-
**Not recommended for:** General content extraction or research.
|
|
625
|
-
**Arguments:**
|
|
626
|
-
- url (string, required): The base URL of the website to analyze.
|
|
627
|
-
- maxUrls (number, optional): Max number of URLs to include (default: 10).
|
|
628
|
-
- showFullText (boolean, optional): Whether to include llms-full.txt contents in the response.
|
|
629
|
-
**Prompt Example:** "Generate an LLMs.txt file for example.com."
|
|
630
|
-
**Usage Example:**
|
|
631
|
-
\`\`\`json
|
|
632
|
-
{
|
|
633
|
-
"name": "firecrawl_generate_llmstxt",
|
|
634
|
-
"arguments": {
|
|
635
|
-
"url": "https://example.com",
|
|
636
|
-
"maxUrls": 20,
|
|
637
|
-
"showFullText": true
|
|
638
|
-
}
|
|
639
|
-
}
|
|
640
|
-
\`\`\`
|
|
641
|
-
**Returns:** LLMs.txt file contents (and optionally llms-full.txt).
|
|
642
|
-
`,
|
|
643
|
-
inputSchema: {
|
|
644
|
-
type: 'object',
|
|
645
|
-
properties: {
|
|
646
|
-
url: {
|
|
647
|
-
type: 'string',
|
|
648
|
-
description: 'The URL to generate LLMs.txt from',
|
|
649
|
-
},
|
|
650
|
-
maxUrls: {
|
|
651
|
-
type: 'number',
|
|
652
|
-
description: 'Maximum number of URLs to process (1-100, default: 10)',
|
|
653
|
-
},
|
|
654
|
-
showFullText: {
|
|
655
|
-
type: 'boolean',
|
|
656
|
-
description: 'Whether to show the full LLMs-full.txt in the response',
|
|
657
|
-
},
|
|
658
|
-
},
|
|
659
|
-
required: ['url'],
|
|
660
|
-
},
|
|
661
|
-
};
|
|
662
666
|
// Type guards
|
|
663
667
|
function isScrapeOptions(args) {
|
|
664
668
|
return (typeof args === 'object' &&
|
|
@@ -672,6 +676,7 @@ function isMapOptions(args) {
|
|
|
672
676
|
'url' in args &&
|
|
673
677
|
typeof args.url === 'string');
|
|
674
678
|
}
|
|
679
|
+
//@ts-expect-error todo: fix
|
|
675
680
|
function isCrawlOptions(args) {
|
|
676
681
|
return (typeof args === 'object' &&
|
|
677
682
|
args !== null &&
|
|
@@ -703,6 +708,24 @@ function isGenerateLLMsTextOptions(args) {
|
|
|
703
708
|
'url' in args &&
|
|
704
709
|
typeof args.url === 'string');
|
|
705
710
|
}
|
|
711
|
+
function removeEmptyTopLevel(obj) {
|
|
712
|
+
const out = {};
|
|
713
|
+
for (const [k, v] of Object.entries(obj)) {
|
|
714
|
+
if (v == null)
|
|
715
|
+
continue;
|
|
716
|
+
if (typeof v === 'string' && v.trim() === '')
|
|
717
|
+
continue;
|
|
718
|
+
if (Array.isArray(v) && v.length === 0)
|
|
719
|
+
continue;
|
|
720
|
+
if (typeof v === 'object' &&
|
|
721
|
+
!Array.isArray(v) &&
|
|
722
|
+
Object.keys(v).length === 0)
|
|
723
|
+
continue;
|
|
724
|
+
// @ts-expect-error dynamic assignment
|
|
725
|
+
out[k] = v;
|
|
726
|
+
}
|
|
727
|
+
return out;
|
|
728
|
+
}
|
|
706
729
|
// Server implementation
|
|
707
730
|
const server = new Server({
|
|
708
731
|
name: 'firecrawl-mcp',
|
|
@@ -710,7 +733,6 @@ const server = new Server({
|
|
|
710
733
|
}, {
|
|
711
734
|
capabilities: {
|
|
712
735
|
tools: {},
|
|
713
|
-
logging: {},
|
|
714
736
|
},
|
|
715
737
|
});
|
|
716
738
|
// Get optional API URL
|
|
@@ -743,14 +765,9 @@ function delay(ms) {
|
|
|
743
765
|
}
|
|
744
766
|
let isStdioTransport = false;
|
|
745
767
|
function safeLog(level, data) {
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
}
|
|
750
|
-
else {
|
|
751
|
-
// For other transport types, use the normal logging mechanism
|
|
752
|
-
server.sendLoggingMessage({ level, data });
|
|
753
|
-
}
|
|
768
|
+
// Always log to stderr to avoid relying on MCP logging capability
|
|
769
|
+
const message = `[${level}] ${typeof data === 'object' ? JSON.stringify(data) : String(data)}`;
|
|
770
|
+
console.error(message);
|
|
754
771
|
}
|
|
755
772
|
// Add retry logic with exponential backoff
|
|
756
773
|
async function withRetry(operation, context, attempt = 1) {
|
|
@@ -779,18 +796,16 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
|
779
796
|
CHECK_CRAWL_STATUS_TOOL,
|
|
780
797
|
SEARCH_TOOL,
|
|
781
798
|
EXTRACT_TOOL,
|
|
782
|
-
DEEP_RESEARCH_TOOL,
|
|
783
|
-
GENERATE_LLMSTXT_TOOL,
|
|
784
799
|
],
|
|
785
800
|
}));
|
|
786
801
|
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
787
802
|
const startTime = Date.now();
|
|
788
803
|
try {
|
|
789
804
|
const { name, arguments: args } = request.params;
|
|
790
|
-
const apiKey = process.env.CLOUD_SERVICE
|
|
805
|
+
const apiKey = process.env.CLOUD_SERVICE === 'true'
|
|
791
806
|
? request.params._meta?.apiKey
|
|
792
807
|
: FIRECRAWL_API_KEY;
|
|
793
|
-
if (process.env.CLOUD_SERVICE && !apiKey) {
|
|
808
|
+
if (process.env.CLOUD_SERVICE === 'true' && !apiKey) {
|
|
794
809
|
throw new Error('No API key provided');
|
|
795
810
|
}
|
|
796
811
|
const client = new FirecrawlApp({
|
|
@@ -808,38 +823,46 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
808
823
|
throw new Error('Invalid arguments for firecrawl_scrape');
|
|
809
824
|
}
|
|
810
825
|
const { url, ...options } = args;
|
|
826
|
+
const cleaned = removeEmptyTopLevel(options);
|
|
811
827
|
try {
|
|
812
828
|
const scrapeStartTime = Date.now();
|
|
813
829
|
safeLog('info', `Starting scrape for URL: ${url} with options: ${JSON.stringify(options)}`);
|
|
814
|
-
const response = await client.
|
|
815
|
-
...
|
|
816
|
-
// @ts-expect-error Extended API options including origin
|
|
830
|
+
const response = await client.scrape(url, {
|
|
831
|
+
...cleaned,
|
|
817
832
|
origin: 'mcp-server',
|
|
818
833
|
});
|
|
819
834
|
// Log performance metrics
|
|
820
835
|
safeLog('info', `Scrape completed in ${Date.now() - scrapeStartTime}ms`);
|
|
821
|
-
if ('success' in response && !response.success) {
|
|
822
|
-
throw new Error(response.error || 'Scraping failed');
|
|
823
|
-
}
|
|
824
836
|
// Format content based on requested formats
|
|
825
837
|
const contentParts = [];
|
|
826
|
-
|
|
838
|
+
const formats = (options?.formats ?? []);
|
|
839
|
+
const hasFormat = (name) => Array.isArray(formats) &&
|
|
840
|
+
formats.some((f) => typeof f === 'string'
|
|
841
|
+
? f === name
|
|
842
|
+
: f && typeof f === 'object' && f.type === name);
|
|
843
|
+
if (hasFormat('markdown') && response.markdown) {
|
|
827
844
|
contentParts.push(response.markdown);
|
|
828
845
|
}
|
|
829
|
-
if (
|
|
846
|
+
if (hasFormat('html') && response.html) {
|
|
830
847
|
contentParts.push(response.html);
|
|
831
848
|
}
|
|
832
|
-
if (
|
|
849
|
+
if (hasFormat('rawHtml') && response.rawHtml) {
|
|
833
850
|
contentParts.push(response.rawHtml);
|
|
834
851
|
}
|
|
835
|
-
if (
|
|
852
|
+
if (hasFormat('links') && response.links) {
|
|
836
853
|
contentParts.push(response.links.join('\n'));
|
|
837
854
|
}
|
|
838
|
-
if (
|
|
855
|
+
if (hasFormat('screenshot') && response.screenshot) {
|
|
839
856
|
contentParts.push(response.screenshot);
|
|
840
857
|
}
|
|
841
|
-
if (
|
|
842
|
-
contentParts.push(JSON.stringify(response.
|
|
858
|
+
if (hasFormat('json') && response.json) {
|
|
859
|
+
contentParts.push(JSON.stringify(response.json, null, 2));
|
|
860
|
+
}
|
|
861
|
+
if (hasFormat('changeTracking') && response.changeTracking) {
|
|
862
|
+
contentParts.push(JSON.stringify(response.changeTracking, null, 2));
|
|
863
|
+
}
|
|
864
|
+
if (hasFormat('summary') && response.summary) {
|
|
865
|
+
contentParts.push(JSON.stringify(response.summary, null, 2));
|
|
843
866
|
}
|
|
844
867
|
// If options.formats is empty, default to markdown
|
|
845
868
|
if (!options.formats || options.formats.length === 0) {
|
|
@@ -872,20 +895,17 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
872
895
|
throw new Error('Invalid arguments for firecrawl_map');
|
|
873
896
|
}
|
|
874
897
|
const { url, ...options } = args;
|
|
875
|
-
const response = await client.
|
|
898
|
+
const response = await client.map(url, {
|
|
876
899
|
...options,
|
|
877
900
|
// @ts-expect-error Extended API options including origin
|
|
878
901
|
origin: 'mcp-server',
|
|
879
902
|
});
|
|
880
|
-
if ('error' in response) {
|
|
881
|
-
throw new Error(response.error);
|
|
882
|
-
}
|
|
883
903
|
if (!response.links) {
|
|
884
904
|
throw new Error('No links received from Firecrawl API');
|
|
885
905
|
}
|
|
886
906
|
return {
|
|
887
907
|
content: [
|
|
888
|
-
{ type: 'text', text: trimResponseText(response.links
|
|
908
|
+
{ type: 'text', text: trimResponseText(JSON.stringify(response.links, null, 2)) },
|
|
889
909
|
],
|
|
890
910
|
isError: false,
|
|
891
911
|
};
|
|
@@ -895,17 +915,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
895
915
|
throw new Error('Invalid arguments for firecrawl_crawl');
|
|
896
916
|
}
|
|
897
917
|
const { url, ...options } = args;
|
|
898
|
-
const response = await withRetry(async () =>
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
}
|
|
918
|
+
const response = await withRetry(async () => client.crawl(url, {
|
|
919
|
+
...options,
|
|
920
|
+
// @ts-expect-error Extended API options including origin
|
|
921
|
+
origin: 'mcp-server',
|
|
922
|
+
}), 'crawl operation');
|
|
904
923
|
return {
|
|
905
924
|
content: [
|
|
906
925
|
{
|
|
907
926
|
type: 'text',
|
|
908
|
-
text: trimResponseText(
|
|
927
|
+
text: trimResponseText(JSON.stringify(response)),
|
|
909
928
|
},
|
|
910
929
|
],
|
|
911
930
|
isError: false,
|
|
@@ -915,10 +934,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
915
934
|
if (!isStatusCheckOptions(args)) {
|
|
916
935
|
throw new Error('Invalid arguments for firecrawl_check_crawl_status');
|
|
917
936
|
}
|
|
918
|
-
const response = await client.
|
|
919
|
-
if (!response.success) {
|
|
920
|
-
throw new Error(response.error);
|
|
921
|
-
}
|
|
937
|
+
const response = await client.getCrawlStatus(args.id);
|
|
922
938
|
const status = `Crawl Status:
|
|
923
939
|
Status: ${response.status}
|
|
924
940
|
Progress: ${response.completed}/${response.total}
|
|
@@ -935,19 +951,18 @@ ${response.data.length > 0 ? '\nResults:\n' + formatResults(response.data) : ''}
|
|
|
935
951
|
throw new Error('Invalid arguments for firecrawl_search');
|
|
936
952
|
}
|
|
937
953
|
try {
|
|
938
|
-
const response = await withRetry(async () => client.search(args.query, {
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
const results = response.data
|
|
944
|
-
.map((result) => `URL: ${result.url}
|
|
945
|
-
Title: ${result.title || 'No title'}
|
|
946
|
-
Description: ${result.description || 'No description'}
|
|
947
|
-
${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
948
|
-
.join('\n\n');
|
|
954
|
+
const response = await withRetry(async () => client.search(args.query, {
|
|
955
|
+
...args,
|
|
956
|
+
// @ts-expect-error Extended API options including origin
|
|
957
|
+
origin: 'mcp-server',
|
|
958
|
+
}), 'search operation');
|
|
949
959
|
return {
|
|
950
|
-
content: [
|
|
960
|
+
content: [
|
|
961
|
+
{
|
|
962
|
+
type: 'text',
|
|
963
|
+
text: trimResponseText(JSON.stringify(response, null, 2)),
|
|
964
|
+
},
|
|
965
|
+
],
|
|
951
966
|
isError: false,
|
|
952
967
|
};
|
|
953
968
|
}
|
|
@@ -972,9 +987,9 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
|
972
987
|
if (FIRECRAWL_API_URL) {
|
|
973
988
|
safeLog('info', 'Using self-hosted instance for extraction');
|
|
974
989
|
}
|
|
975
|
-
const extractResponse = await withRetry(async () => client.extract(
|
|
990
|
+
const extractResponse = await withRetry(async () => client.extract({
|
|
991
|
+
urls: args.urls,
|
|
976
992
|
prompt: args.prompt,
|
|
977
|
-
systemPrompt: args.systemPrompt,
|
|
978
993
|
schema: args.schema,
|
|
979
994
|
allowExternalLinks: args.allowExternalLinks,
|
|
980
995
|
enableWebSearch: args.enableWebSearch,
|
|
@@ -1025,57 +1040,6 @@ ${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
|
1025
1040
|
};
|
|
1026
1041
|
}
|
|
1027
1042
|
}
|
|
1028
|
-
case 'firecrawl_deep_research': {
|
|
1029
|
-
if (!args || typeof args !== 'object' || !('query' in args)) {
|
|
1030
|
-
throw new Error('Invalid arguments for firecrawl_deep_research');
|
|
1031
|
-
}
|
|
1032
|
-
try {
|
|
1033
|
-
const researchStartTime = Date.now();
|
|
1034
|
-
safeLog('info', `Starting deep research for query: ${args.query}`);
|
|
1035
|
-
const response = await client.deepResearch(args.query, {
|
|
1036
|
-
maxDepth: args.maxDepth,
|
|
1037
|
-
timeLimit: args.timeLimit,
|
|
1038
|
-
maxUrls: args.maxUrls,
|
|
1039
|
-
// @ts-expect-error Extended API options including origin
|
|
1040
|
-
origin: 'mcp-server',
|
|
1041
|
-
},
|
|
1042
|
-
// Activity callback
|
|
1043
|
-
(activity) => {
|
|
1044
|
-
safeLog('info', `Research activity: ${activity.message} (Depth: ${activity.depth})`);
|
|
1045
|
-
},
|
|
1046
|
-
// Source callback
|
|
1047
|
-
(source) => {
|
|
1048
|
-
safeLog('info', `Research source found: ${source.url}${source.title ? ` - ${source.title}` : ''}`);
|
|
1049
|
-
});
|
|
1050
|
-
// Log performance metrics
|
|
1051
|
-
safeLog('info', `Deep research completed in ${Date.now() - researchStartTime}ms`);
|
|
1052
|
-
if (!response.success) {
|
|
1053
|
-
throw new Error(response.error || 'Deep research failed');
|
|
1054
|
-
}
|
|
1055
|
-
// Format the results
|
|
1056
|
-
const formattedResponse = {
|
|
1057
|
-
finalAnalysis: response.data.finalAnalysis,
|
|
1058
|
-
activities: response.data.activities,
|
|
1059
|
-
sources: response.data.sources,
|
|
1060
|
-
};
|
|
1061
|
-
return {
|
|
1062
|
-
content: [
|
|
1063
|
-
{
|
|
1064
|
-
type: 'text',
|
|
1065
|
-
text: trimResponseText(formattedResponse.finalAnalysis),
|
|
1066
|
-
},
|
|
1067
|
-
],
|
|
1068
|
-
isError: false,
|
|
1069
|
-
};
|
|
1070
|
-
}
|
|
1071
|
-
catch (error) {
|
|
1072
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1073
|
-
return {
|
|
1074
|
-
content: [{ type: 'text', text: trimResponseText(errorMessage) }],
|
|
1075
|
-
isError: true,
|
|
1076
|
-
};
|
|
1077
|
-
}
|
|
1078
|
-
}
|
|
1079
1043
|
case 'firecrawl_generate_llmstxt': {
|
|
1080
1044
|
if (!isGenerateLLMsTextOptions(args)) {
|
|
1081
1045
|
throw new Error('Invalid arguments for firecrawl_generate_llmstxt');
|
|
@@ -1152,8 +1116,7 @@ function formatResults(data) {
|
|
|
1152
1116
|
return data
|
|
1153
1117
|
.map((doc) => {
|
|
1154
1118
|
const content = doc.markdown || doc.html || doc.rawHtml || 'No content';
|
|
1155
|
-
return `
|
|
1156
|
-
Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''}
|
|
1119
|
+
return `Content: ${content.substring(0, 100)}${content.length > 100 ? '...' : ''}
|
|
1157
1120
|
${doc.metadata?.title ? `Title: ${doc.metadata.title}` : ''}`;
|
|
1158
1121
|
})
|
|
1159
1122
|
.join('\n\n');
|
|
@@ -1214,6 +1177,92 @@ async function runSSELocalServer() {
|
|
|
1214
1177
|
console.error('Error starting server:', error);
|
|
1215
1178
|
}
|
|
1216
1179
|
}
|
|
1180
|
+
async function runHTTPStreamableServer() {
|
|
1181
|
+
const app = express();
|
|
1182
|
+
app.use(express.json());
|
|
1183
|
+
const transports = {};
|
|
1184
|
+
// A single endpoint handles all MCP requests.
|
|
1185
|
+
app.all('/mcp', async (req, res) => {
|
|
1186
|
+
try {
|
|
1187
|
+
const sessionId = req.headers['mcp-session-id'];
|
|
1188
|
+
let transport;
|
|
1189
|
+
if (sessionId && transports[sessionId]) {
|
|
1190
|
+
transport = transports[sessionId];
|
|
1191
|
+
}
|
|
1192
|
+
else if (!sessionId &&
|
|
1193
|
+
req.method === 'POST' &&
|
|
1194
|
+
req.body &&
|
|
1195
|
+
typeof req.body === 'object' &&
|
|
1196
|
+
req.body.method === 'initialize') {
|
|
1197
|
+
transport = new StreamableHTTPServerTransport({
|
|
1198
|
+
sessionIdGenerator: () => {
|
|
1199
|
+
const id = randomUUID();
|
|
1200
|
+
return id;
|
|
1201
|
+
},
|
|
1202
|
+
onsessioninitialized: (sid) => {
|
|
1203
|
+
transports[sid] = transport;
|
|
1204
|
+
},
|
|
1205
|
+
});
|
|
1206
|
+
transport.onclose = () => {
|
|
1207
|
+
const sid = transport.sessionId;
|
|
1208
|
+
if (sid && transports[sid]) {
|
|
1209
|
+
delete transports[sid];
|
|
1210
|
+
}
|
|
1211
|
+
};
|
|
1212
|
+
console.log('Creating server instance');
|
|
1213
|
+
console.log('Connecting transport to server');
|
|
1214
|
+
await server.connect(transport);
|
|
1215
|
+
await transport.handleRequest(req, res, req.body);
|
|
1216
|
+
return;
|
|
1217
|
+
}
|
|
1218
|
+
else {
|
|
1219
|
+
res.status(400).json({
|
|
1220
|
+
jsonrpc: '2.0',
|
|
1221
|
+
error: {
|
|
1222
|
+
code: -32000,
|
|
1223
|
+
message: 'Invalid or missing session ID',
|
|
1224
|
+
},
|
|
1225
|
+
id: null,
|
|
1226
|
+
});
|
|
1227
|
+
return;
|
|
1228
|
+
}
|
|
1229
|
+
await transport.handleRequest(req, res, req.body);
|
|
1230
|
+
}
|
|
1231
|
+
catch (error) {
|
|
1232
|
+
if (!res.headersSent) {
|
|
1233
|
+
res.status(500).json({
|
|
1234
|
+
jsonrpc: '2.0',
|
|
1235
|
+
error: {
|
|
1236
|
+
code: -32603,
|
|
1237
|
+
message: 'Internal server error',
|
|
1238
|
+
},
|
|
1239
|
+
id: null,
|
|
1240
|
+
});
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
1243
|
+
});
|
|
1244
|
+
const PORT = 3000;
|
|
1245
|
+
const appServer = app.listen(PORT, () => {
|
|
1246
|
+
console.log(`MCP Streamable HTTP Server listening on port ${PORT}`);
|
|
1247
|
+
});
|
|
1248
|
+
process.on('SIGINT', async () => {
|
|
1249
|
+
console.log('Shutting down server...');
|
|
1250
|
+
for (const sessionId in transports) {
|
|
1251
|
+
try {
|
|
1252
|
+
console.log(`Closing transport for session ${sessionId}`);
|
|
1253
|
+
await transports[sessionId].close();
|
|
1254
|
+
delete transports[sessionId];
|
|
1255
|
+
}
|
|
1256
|
+
catch (error) {
|
|
1257
|
+
console.error(`Error closing transport for session ${sessionId}:`, error);
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
appServer.close(() => {
|
|
1261
|
+
console.log('Server shutdown complete');
|
|
1262
|
+
process.exit(0);
|
|
1263
|
+
});
|
|
1264
|
+
});
|
|
1265
|
+
}
|
|
1217
1266
|
async function runSSECloudServer() {
|
|
1218
1267
|
const transports = {};
|
|
1219
1268
|
const app = express();
|
|
@@ -1277,6 +1326,13 @@ else if (process.env.SSE_LOCAL === 'true') {
|
|
|
1277
1326
|
process.exit(1);
|
|
1278
1327
|
});
|
|
1279
1328
|
}
|
|
1329
|
+
else if (process.env.HTTP_STREAMABLE_SERVER === 'true') {
|
|
1330
|
+
console.log('Running HTTP Streamable Server');
|
|
1331
|
+
runHTTPStreamableServer().catch((error) => {
|
|
1332
|
+
console.error('Fatal error running server:', error);
|
|
1333
|
+
process.exit(1);
|
|
1334
|
+
});
|
|
1335
|
+
}
|
|
1280
1336
|
else {
|
|
1281
1337
|
runLocalServer().catch((error) => {
|
|
1282
1338
|
console.error('Fatal error running server:', error);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "firecrawl-mcp",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.1",
|
|
4
4
|
"description": "MCP server for Firecrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -20,15 +20,17 @@
|
|
|
20
20
|
"lint:fix": "eslint src/**/*.ts --fix",
|
|
21
21
|
"format": "prettier --write .",
|
|
22
22
|
"prepare": "npm run build",
|
|
23
|
-
"publish": "npm run build && npm publish"
|
|
23
|
+
"publish": "npm run build && npm publish",
|
|
24
|
+
"publish-beta": "npm run build && npm publish --tag beta"
|
|
24
25
|
},
|
|
25
26
|
"license": "MIT",
|
|
26
27
|
"dependencies": {
|
|
27
|
-
"@mendable/firecrawl-js": "^
|
|
28
|
-
"@modelcontextprotocol/sdk": "^1.
|
|
28
|
+
"@mendable/firecrawl-js": "^3.0.3",
|
|
29
|
+
"@modelcontextprotocol/sdk": "^1.17.3",
|
|
29
30
|
"dotenv": "^16.4.7",
|
|
30
31
|
"express": "^5.1.0",
|
|
31
32
|
"shx": "^0.3.4",
|
|
33
|
+
"typescript": "^5.9.2",
|
|
32
34
|
"ws": "^8.18.1"
|
|
33
35
|
},
|
|
34
36
|
"devDependencies": {
|
|
@@ -43,8 +45,7 @@
|
|
|
43
45
|
"jest": "^29.7.0",
|
|
44
46
|
"jest-mock-extended": "^4.0.0-beta1",
|
|
45
47
|
"prettier": "^3.1.1",
|
|
46
|
-
"ts-jest": "^29.1.1"
|
|
47
|
-
"typescript": "^5.3.3"
|
|
48
|
+
"ts-jest": "^29.1.1"
|
|
48
49
|
},
|
|
49
50
|
"engines": {
|
|
50
51
|
"node": ">=18.0.0"
|
|
@@ -58,11 +59,11 @@
|
|
|
58
59
|
],
|
|
59
60
|
"repository": {
|
|
60
61
|
"type": "git",
|
|
61
|
-
"url": "git+https://github.com/
|
|
62
|
+
"url": "git+https://github.com/firecrawl/firecrawl-mcp-server.git"
|
|
62
63
|
},
|
|
63
64
|
"author": "vrknetha",
|
|
64
65
|
"bugs": {
|
|
65
|
-
"url": "https://github.com/
|
|
66
|
+
"url": "https://github.com/firecrawl/firecrawl-mcp-server/issues"
|
|
66
67
|
},
|
|
67
|
-
"homepage": "https://github.com/
|
|
68
|
+
"homepage": "https://github.com/firecrawl/firecrawl-mcp-server#readme"
|
|
68
69
|
}
|