firecrawl-mcp 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +398 -0
- package/dist/jest.setup.js +58 -0
- package/dist/src/index.js +1053 -0
- package/dist/src/index.test.js +225 -0
- package/package.json +65 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import FirecrawlApp from '@mendable/firecrawl-js';
|
|
2
|
+
import { describe, expect, jest, test, beforeEach, afterEach, } from '@jest/globals';
|
|
3
|
+
import { mock } from 'jest-mock-extended';
|
|
4
|
+
// Mock FirecrawlApp
|
|
5
|
+
jest.mock('@mendable/firecrawl-js');
|
|
6
|
+
describe('FireCrawl Tool Tests', () => {
|
|
7
|
+
let mockClient;
|
|
8
|
+
let requestHandler;
|
|
9
|
+
beforeEach(() => {
|
|
10
|
+
jest.clearAllMocks();
|
|
11
|
+
mockClient = mock();
|
|
12
|
+
// Set up mock implementations
|
|
13
|
+
const mockInstance = new FirecrawlApp({ apiKey: 'test' });
|
|
14
|
+
Object.assign(mockInstance, mockClient);
|
|
15
|
+
// Create request handler
|
|
16
|
+
requestHandler = async (request) => {
|
|
17
|
+
const { name, arguments: args } = request.params;
|
|
18
|
+
if (!args) {
|
|
19
|
+
throw new Error('No arguments provided');
|
|
20
|
+
}
|
|
21
|
+
return handleRequest(name, args, mockClient);
|
|
22
|
+
};
|
|
23
|
+
});
|
|
24
|
+
afterEach(() => {
|
|
25
|
+
jest.clearAllMocks();
|
|
26
|
+
});
|
|
27
|
+
// Test scrape functionality
|
|
28
|
+
test('should handle scrape request', async () => {
|
|
29
|
+
const url = 'https://example.com';
|
|
30
|
+
const options = { formats: ['markdown'] };
|
|
31
|
+
const mockResponse = {
|
|
32
|
+
success: true,
|
|
33
|
+
markdown: '# Test Content',
|
|
34
|
+
html: undefined,
|
|
35
|
+
rawHtml: undefined,
|
|
36
|
+
url: 'https://example.com',
|
|
37
|
+
actions: undefined,
|
|
38
|
+
};
|
|
39
|
+
mockClient.scrapeUrl.mockResolvedValueOnce(mockResponse);
|
|
40
|
+
const response = await requestHandler({
|
|
41
|
+
method: 'call_tool',
|
|
42
|
+
params: {
|
|
43
|
+
name: 'firecrawl_scrape',
|
|
44
|
+
arguments: { url, ...options },
|
|
45
|
+
},
|
|
46
|
+
});
|
|
47
|
+
expect(response).toEqual({
|
|
48
|
+
content: [{ type: 'text', text: '# Test Content' }],
|
|
49
|
+
isError: false,
|
|
50
|
+
});
|
|
51
|
+
expect(mockClient.scrapeUrl).toHaveBeenCalledWith(url, {
|
|
52
|
+
formats: ['markdown'],
|
|
53
|
+
url,
|
|
54
|
+
});
|
|
55
|
+
});
|
|
56
|
+
// Test batch scrape functionality
|
|
57
|
+
test('should handle batch scrape request', async () => {
|
|
58
|
+
const urls = ['https://example.com'];
|
|
59
|
+
const options = { formats: ['markdown'] };
|
|
60
|
+
mockClient.asyncBatchScrapeUrls.mockResolvedValueOnce({
|
|
61
|
+
success: true,
|
|
62
|
+
id: 'test-batch-id',
|
|
63
|
+
});
|
|
64
|
+
const response = await requestHandler({
|
|
65
|
+
method: 'call_tool',
|
|
66
|
+
params: {
|
|
67
|
+
name: 'firecrawl_batch_scrape',
|
|
68
|
+
arguments: { urls, options },
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
expect(response.content[0].text).toContain('Batch operation queued with ID: batch_');
|
|
72
|
+
expect(mockClient.asyncBatchScrapeUrls).toHaveBeenCalledWith(urls, options);
|
|
73
|
+
});
|
|
74
|
+
// Test search functionality
|
|
75
|
+
test('should handle search request', async () => {
|
|
76
|
+
const query = 'test query';
|
|
77
|
+
const scrapeOptions = { formats: ['markdown'] };
|
|
78
|
+
const mockSearchResponse = {
|
|
79
|
+
success: true,
|
|
80
|
+
data: [
|
|
81
|
+
{
|
|
82
|
+
url: 'https://example.com',
|
|
83
|
+
title: 'Test Page',
|
|
84
|
+
description: 'Test Description',
|
|
85
|
+
markdown: '# Test Content',
|
|
86
|
+
actions: undefined,
|
|
87
|
+
},
|
|
88
|
+
],
|
|
89
|
+
};
|
|
90
|
+
mockClient.search.mockResolvedValueOnce(mockSearchResponse);
|
|
91
|
+
const response = await requestHandler({
|
|
92
|
+
method: 'call_tool',
|
|
93
|
+
params: {
|
|
94
|
+
name: 'firecrawl_search',
|
|
95
|
+
arguments: { query, scrapeOptions },
|
|
96
|
+
},
|
|
97
|
+
});
|
|
98
|
+
expect(response.isError).toBe(false);
|
|
99
|
+
expect(response.content[0].text).toContain('Test Page');
|
|
100
|
+
expect(mockClient.search).toHaveBeenCalledWith(query, scrapeOptions);
|
|
101
|
+
});
|
|
102
|
+
// Test crawl functionality
|
|
103
|
+
test('should handle crawl request', async () => {
|
|
104
|
+
const url = 'https://example.com';
|
|
105
|
+
const options = { maxDepth: 2 };
|
|
106
|
+
mockClient.asyncCrawlUrl.mockResolvedValueOnce({
|
|
107
|
+
success: true,
|
|
108
|
+
id: 'test-crawl-id',
|
|
109
|
+
});
|
|
110
|
+
const response = await requestHandler({
|
|
111
|
+
method: 'call_tool',
|
|
112
|
+
params: {
|
|
113
|
+
name: 'firecrawl_crawl',
|
|
114
|
+
arguments: { url, ...options },
|
|
115
|
+
},
|
|
116
|
+
});
|
|
117
|
+
expect(response.isError).toBe(false);
|
|
118
|
+
expect(response.content[0].text).toContain('test-crawl-id');
|
|
119
|
+
expect(mockClient.asyncCrawlUrl).toHaveBeenCalledWith(url, {
|
|
120
|
+
maxDepth: 2,
|
|
121
|
+
url,
|
|
122
|
+
});
|
|
123
|
+
});
|
|
124
|
+
// Test error handling
|
|
125
|
+
test('should handle API errors', async () => {
|
|
126
|
+
const url = 'https://example.com';
|
|
127
|
+
mockClient.scrapeUrl.mockRejectedValueOnce(new Error('API Error'));
|
|
128
|
+
const response = await requestHandler({
|
|
129
|
+
method: 'call_tool',
|
|
130
|
+
params: {
|
|
131
|
+
name: 'firecrawl_scrape',
|
|
132
|
+
arguments: { url },
|
|
133
|
+
},
|
|
134
|
+
});
|
|
135
|
+
expect(response.isError).toBe(true);
|
|
136
|
+
expect(response.content[0].text).toContain('API Error');
|
|
137
|
+
});
|
|
138
|
+
// Test rate limiting
|
|
139
|
+
test('should handle rate limits', async () => {
|
|
140
|
+
const url = 'https://example.com';
|
|
141
|
+
// Mock rate limit error
|
|
142
|
+
mockClient.scrapeUrl.mockRejectedValueOnce(new Error('rate limit exceeded'));
|
|
143
|
+
const response = await requestHandler({
|
|
144
|
+
method: 'call_tool',
|
|
145
|
+
params: {
|
|
146
|
+
name: 'firecrawl_scrape',
|
|
147
|
+
arguments: { url },
|
|
148
|
+
},
|
|
149
|
+
});
|
|
150
|
+
expect(response.isError).toBe(true);
|
|
151
|
+
expect(response.content[0].text).toContain('rate limit exceeded');
|
|
152
|
+
});
|
|
153
|
+
});
|
|
154
|
+
// Helper function to simulate request handling
|
|
155
|
+
async function handleRequest(name, args, client) {
|
|
156
|
+
try {
|
|
157
|
+
switch (name) {
|
|
158
|
+
case 'firecrawl_scrape': {
|
|
159
|
+
const response = await client.scrapeUrl(args.url, args);
|
|
160
|
+
if (!response.success) {
|
|
161
|
+
throw new Error(response.error || 'Scraping failed');
|
|
162
|
+
}
|
|
163
|
+
return {
|
|
164
|
+
content: [
|
|
165
|
+
{ type: 'text', text: response.markdown || 'No content available' },
|
|
166
|
+
],
|
|
167
|
+
isError: false,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
case 'firecrawl_batch_scrape': {
|
|
171
|
+
const response = await client.asyncBatchScrapeUrls(args.urls, args.options);
|
|
172
|
+
return {
|
|
173
|
+
content: [
|
|
174
|
+
{
|
|
175
|
+
type: 'text',
|
|
176
|
+
text: `Batch operation queued with ID: batch_1. Use firecrawl_check_batch_status to check progress.`,
|
|
177
|
+
},
|
|
178
|
+
],
|
|
179
|
+
isError: false,
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
case 'firecrawl_search': {
|
|
183
|
+
const response = await client.search(args.query, args.scrapeOptions);
|
|
184
|
+
if (!response.success) {
|
|
185
|
+
throw new Error(response.error || 'Search failed');
|
|
186
|
+
}
|
|
187
|
+
const results = response.data
|
|
188
|
+
.map((result) => `URL: ${result.url}\nTitle: ${result.title || 'No title'}\nDescription: ${result.description || 'No description'}\n${result.markdown ? `\nContent:\n${result.markdown}` : ''}`)
|
|
189
|
+
.join('\n\n');
|
|
190
|
+
return {
|
|
191
|
+
content: [{ type: 'text', text: results }],
|
|
192
|
+
isError: false,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
case 'firecrawl_crawl': {
|
|
196
|
+
const response = await client.asyncCrawlUrl(args.url, args);
|
|
197
|
+
if (!response.success) {
|
|
198
|
+
throw new Error(response.error);
|
|
199
|
+
}
|
|
200
|
+
return {
|
|
201
|
+
content: [
|
|
202
|
+
{
|
|
203
|
+
type: 'text',
|
|
204
|
+
text: `Started crawl for ${args.url} with job ID: ${response.id}`,
|
|
205
|
+
},
|
|
206
|
+
],
|
|
207
|
+
isError: false,
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
default:
|
|
211
|
+
throw new Error(`Unknown tool: ${name}`);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
catch (error) {
|
|
215
|
+
return {
|
|
216
|
+
content: [
|
|
217
|
+
{
|
|
218
|
+
type: 'text',
|
|
219
|
+
text: error instanceof Error ? error.message : String(error),
|
|
220
|
+
},
|
|
221
|
+
],
|
|
222
|
+
isError: true,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "firecrawl-mcp",
|
|
3
|
+
"version": "1.3.0",
|
|
4
|
+
"description": "MCP server for FireCrawl web scraping integration. Supports both cloud and self-hosted instances. Features include web scraping, batch processing, structured data extraction, and LLM-powered content analysis.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": "dist/src/index.js",
|
|
7
|
+
"files": [
|
|
8
|
+
"dist",
|
|
9
|
+
"README.md",
|
|
10
|
+
"LICENSE"
|
|
11
|
+
],
|
|
12
|
+
"publishConfig": {
|
|
13
|
+
"access": "public"
|
|
14
|
+
},
|
|
15
|
+
"scripts": {
|
|
16
|
+
"build": "tsc",
|
|
17
|
+
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js",
|
|
18
|
+
"start": "node dist/src/index.js",
|
|
19
|
+
"lint": "eslint src/**/*.ts",
|
|
20
|
+
"lint:fix": "eslint src/**/*.ts --fix",
|
|
21
|
+
"format": "prettier --write .",
|
|
22
|
+
"prepare": "npm run build",
|
|
23
|
+
"publish": "npm run build && npm publish"
|
|
24
|
+
},
|
|
25
|
+
"license": "ISC",
|
|
26
|
+
"dependencies": {
|
|
27
|
+
"@mendable/firecrawl-js": "^1.16.0",
|
|
28
|
+
"@modelcontextprotocol/sdk": "^1.4.1",
|
|
29
|
+
"dotenv": "^16.4.7",
|
|
30
|
+
"p-queue": "^8.0.1"
|
|
31
|
+
},
|
|
32
|
+
"devDependencies": {
|
|
33
|
+
"@jest/globals": "^29.7.0",
|
|
34
|
+
"@types/jest": "^29.5.14",
|
|
35
|
+
"@types/node": "^20.10.5",
|
|
36
|
+
"@typescript-eslint/eslint-plugin": "^7.0.0",
|
|
37
|
+
"@typescript-eslint/parser": "^7.0.0",
|
|
38
|
+
"eslint": "^8.56.0",
|
|
39
|
+
"eslint-config-prettier": "^9.1.0",
|
|
40
|
+
"jest": "^29.7.0",
|
|
41
|
+
"jest-mock-extended": "^4.0.0-beta1",
|
|
42
|
+
"prettier": "^3.1.1",
|
|
43
|
+
"ts-jest": "^29.1.1",
|
|
44
|
+
"typescript": "^5.3.3"
|
|
45
|
+
},
|
|
46
|
+
"engines": {
|
|
47
|
+
"node": ">=18.0.0"
|
|
48
|
+
},
|
|
49
|
+
"keywords": [
|
|
50
|
+
"mcp",
|
|
51
|
+
"firecrawl",
|
|
52
|
+
"web-scraping",
|
|
53
|
+
"crawler",
|
|
54
|
+
"content-extraction"
|
|
55
|
+
],
|
|
56
|
+
"repository": {
|
|
57
|
+
"type": "git",
|
|
58
|
+
"url": "git+https://github.com/mendableai/firecrawl-mcp-server.git"
|
|
59
|
+
},
|
|
60
|
+
"author": "vrknetha",
|
|
61
|
+
"bugs": {
|
|
62
|
+
"url": "https://github.com/mendableai/firecrawl-mcp-server/issues"
|
|
63
|
+
},
|
|
64
|
+
"homepage": "https://github.com/mendableai/firecrawl-mcp-server#readme"
|
|
65
|
+
}
|