@mendable/firecrawl 1.2.2 → 1.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -0
- package/dist/index.cjs +778 -0
- package/dist/index.d.cts +452 -0
- package/dist/index.d.ts +452 -0
- package/dist/index.js +742 -0
- package/package.json +12 -14
- package/src/__tests__/index.test.ts +18 -9
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +226 -113
- package/src/index.ts +1100 -130
- package/tsconfig.json +19 -105
- package/tsup.config.ts +9 -0
- package/build/cjs/index.js +0 -354
- package/build/cjs/package.json +0 -1
- package/build/esm/index.js +0 -346
- package/build/esm/package.json +0 -1
- package/types/index.d.ts +0 -260
package/package.json
CHANGED
|
@@ -1,22 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.18.1",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
|
-
"main": "
|
|
6
|
-
"types": "
|
|
7
|
-
"type": "module",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
8
7
|
"exports": {
|
|
9
|
-
"
|
|
10
|
-
|
|
11
|
-
"
|
|
12
|
-
|
|
13
|
-
"import": {
|
|
14
|
-
"types": "./types/index.d.ts",
|
|
15
|
-
"default": "./build/esm/index.js"
|
|
8
|
+
"./package.json": "./package.json",
|
|
9
|
+
".": {
|
|
10
|
+
"import": "./dist/index.js",
|
|
11
|
+
"default": "./dist/index.cjs"
|
|
16
12
|
}
|
|
17
13
|
},
|
|
14
|
+
"type": "module",
|
|
18
15
|
"scripts": {
|
|
19
|
-
"build": "
|
|
16
|
+
"build": "tsup",
|
|
20
17
|
"build-and-publish": "npm run build && npm publish --access public",
|
|
21
18
|
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
22
19
|
"test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
@@ -29,10 +26,8 @@
|
|
|
29
26
|
"license": "MIT",
|
|
30
27
|
"dependencies": {
|
|
31
28
|
"axios": "^1.6.8",
|
|
32
|
-
"dotenv": "^16.4.5",
|
|
33
29
|
"isows": "^1.0.4",
|
|
34
30
|
"typescript-event-target": "^1.1.1",
|
|
35
|
-
"uuid": "^9.0.1",
|
|
36
31
|
"zod": "^3.23.8",
|
|
37
32
|
"zod-to-json-schema": "^3.23.0"
|
|
38
33
|
},
|
|
@@ -41,6 +36,8 @@
|
|
|
41
36
|
},
|
|
42
37
|
"homepage": "https://github.com/mendableai/firecrawl#readme",
|
|
43
38
|
"devDependencies": {
|
|
39
|
+
"uuid": "^9.0.1",
|
|
40
|
+
"dotenv": "^16.4.5",
|
|
44
41
|
"@jest/globals": "^29.7.0",
|
|
45
42
|
"@types/axios": "^0.14.0",
|
|
46
43
|
"@types/dotenv": "^8.2.0",
|
|
@@ -50,6 +47,7 @@
|
|
|
50
47
|
"@types/uuid": "^9.0.8",
|
|
51
48
|
"jest": "^29.7.0",
|
|
52
49
|
"ts-jest": "^29.2.2",
|
|
50
|
+
"tsup": "^8.2.4",
|
|
53
51
|
"typescript": "^5.4.5"
|
|
54
52
|
},
|
|
55
53
|
"keywords": [
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { describe,
|
|
2
|
-
import axios from 'axios';
|
|
3
|
-
import FirecrawlApp from '../index';
|
|
1
|
+
import { describe, expect, jest, test } from '@jest/globals';
|
|
4
2
|
|
|
5
|
-
import
|
|
3
|
+
import FirecrawlApp from '../index';
|
|
4
|
+
import axios from 'axios';
|
|
6
5
|
import { join } from 'path';
|
|
6
|
+
import { readFile } from 'fs/promises';
|
|
7
7
|
|
|
8
8
|
// Mock jest and set the type
|
|
9
9
|
jest.mock('axios');
|
|
@@ -14,13 +14,22 @@ async function loadFixture(name: string): Promise<string> {
|
|
|
14
14
|
return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8')
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
+
const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
|
|
18
|
+
|
|
17
19
|
describe('the firecrawl JS SDK', () => {
|
|
18
20
|
|
|
19
|
-
test('Should require an API key
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
test('Should require an API key only for cloud service', async () => {
|
|
22
|
+
if (API_URL.includes('api.firecrawl.dev')) {
|
|
23
|
+
// Should throw for cloud service
|
|
24
|
+
expect(() => {
|
|
25
|
+
new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
|
|
26
|
+
}).toThrow('No API key provided');
|
|
27
|
+
} else {
|
|
28
|
+
// Should not throw for self-hosted
|
|
29
|
+
expect(() => {
|
|
30
|
+
new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
|
|
31
|
+
}).not.toThrow();
|
|
32
|
+
}
|
|
24
33
|
});
|
|
25
34
|
|
|
26
35
|
test('Should return scraped data from a /scrape API call', async () => {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import FirecrawlApp, { CrawlParams, CrawlResponse, CrawlStatusResponse, MapResponse,
|
|
1
|
+
import FirecrawlApp, { type CrawlParams, type CrawlResponse, type CrawlStatusResponse, type MapResponse, type ScrapeResponse } from '../../../index';
|
|
2
2
|
import { v4 as uuidv4 } from 'uuid';
|
|
3
3
|
import dotenv from 'dotenv';
|
|
4
4
|
import { describe, test, expect } from '@jest/globals';
|
|
@@ -6,18 +6,31 @@ import { describe, test, expect } from '@jest/globals';
|
|
|
6
6
|
dotenv.config();
|
|
7
7
|
|
|
8
8
|
const TEST_API_KEY = process.env.TEST_API_KEY;
|
|
9
|
-
const API_URL = "
|
|
9
|
+
const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
|
|
10
10
|
|
|
11
11
|
describe('FirecrawlApp E2E Tests', () => {
|
|
12
|
-
test.concurrent('should throw error for no API key', async () => {
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
test.concurrent('should throw error for no API key only for cloud service', async () => {
|
|
13
|
+
if (API_URL.includes('api.firecrawl.dev')) {
|
|
14
|
+
// Should throw for cloud service
|
|
15
|
+
expect(() => {
|
|
16
|
+
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
|
17
|
+
}).toThrow("No API key provided");
|
|
18
|
+
} else {
|
|
19
|
+
// Should not throw for self-hosted
|
|
20
|
+
expect(() => {
|
|
21
|
+
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
|
22
|
+
}).not.toThrow();
|
|
23
|
+
}
|
|
16
24
|
});
|
|
17
25
|
|
|
18
26
|
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
|
19
|
-
|
|
20
|
-
|
|
27
|
+
if (API_URL.includes('api.firecrawl.dev')) {
|
|
28
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
29
|
+
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 404");
|
|
30
|
+
} else {
|
|
31
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
32
|
+
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
|
|
33
|
+
}
|
|
21
34
|
});
|
|
22
35
|
|
|
23
36
|
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
|
@@ -28,14 +41,22 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
28
41
|
|
|
29
42
|
test.concurrent('should return successful response with valid preview token', async () => {
|
|
30
43
|
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
|
31
|
-
const response = await app.scrapeUrl('https://roastmywebsite.ai')
|
|
44
|
+
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
45
|
+
if (!response.success) {
|
|
46
|
+
throw new Error(response.error);
|
|
47
|
+
}
|
|
48
|
+
|
|
32
49
|
expect(response).not.toBeNull();
|
|
33
50
|
expect(response?.markdown).toContain("_Roast_");
|
|
34
51
|
}, 30000); // 30 seconds timeout
|
|
35
52
|
|
|
36
53
|
test.concurrent('should return successful response for valid scrape', async () => {
|
|
37
54
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
38
|
-
const response = await app.scrapeUrl('https://roastmywebsite.ai')
|
|
55
|
+
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
56
|
+
if (!response.success) {
|
|
57
|
+
throw new Error(response.error);
|
|
58
|
+
}
|
|
59
|
+
|
|
39
60
|
expect(response).not.toBeNull();
|
|
40
61
|
expect(response).not.toHaveProperty('content'); // v0
|
|
41
62
|
expect(response).not.toHaveProperty('html');
|
|
@@ -58,7 +79,11 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
58
79
|
onlyMainContent: true,
|
|
59
80
|
timeout: 30000,
|
|
60
81
|
waitFor: 1000
|
|
61
|
-
})
|
|
82
|
+
});
|
|
83
|
+
if (!response.success) {
|
|
84
|
+
throw new Error(response.error);
|
|
85
|
+
}
|
|
86
|
+
|
|
62
87
|
expect(response).not.toBeNull();
|
|
63
88
|
expect(response).not.toHaveProperty('content'); // v0
|
|
64
89
|
expect(response.markdown).toContain("_Roast_");
|
|
@@ -71,6 +96,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
71
96
|
expect(response.links?.length).toBeGreaterThan(0);
|
|
72
97
|
expect(response.links?.[0]).toContain("https://");
|
|
73
98
|
expect(response.metadata).not.toBeNull();
|
|
99
|
+
expect(response.metadata).not.toBeUndefined();
|
|
74
100
|
expect(response.metadata).toHaveProperty("title");
|
|
75
101
|
expect(response.metadata).toHaveProperty("description");
|
|
76
102
|
expect(response.metadata).toHaveProperty("keywords");
|
|
@@ -85,49 +111,75 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
85
111
|
expect(response.metadata).not.toHaveProperty("pageStatusCode");
|
|
86
112
|
expect(response.metadata).toHaveProperty("statusCode");
|
|
87
113
|
expect(response.metadata).not.toHaveProperty("pageError");
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
114
|
+
|
|
115
|
+
if (response.metadata !== undefined) {
|
|
116
|
+
expect(response.metadata.error).toBeUndefined();
|
|
117
|
+
expect(response.metadata.title).toBe("Roast My Website");
|
|
118
|
+
expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
|
|
119
|
+
expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
|
|
120
|
+
expect(response.metadata.robots).toBe("follow, index");
|
|
121
|
+
expect(response.metadata.ogTitle).toBe("Roast My Website");
|
|
122
|
+
expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
|
|
123
|
+
expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
|
|
124
|
+
expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
|
|
125
|
+
expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
|
|
126
|
+
expect(response.metadata.ogSiteName).toBe("Roast My Website");
|
|
127
|
+
expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
|
|
128
|
+
expect(response.metadata.statusCode).toBe(200);
|
|
129
|
+
}
|
|
130
|
+
}, 30000); // 30 seconds timeout
|
|
131
|
+
|
|
132
|
+
test.concurrent('should return successful response with valid API key and screenshot fullPage', async () => {
|
|
133
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
134
|
+
const response = await app.scrapeUrl(
|
|
135
|
+
'https://roastmywebsite.ai', {
|
|
136
|
+
formats: ['screenshot@fullPage'],
|
|
137
|
+
});
|
|
138
|
+
if (!response.success) {
|
|
139
|
+
throw new Error(response.error);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
expect(response).not.toBeNull();
|
|
143
|
+
expect(response.screenshot).not.toBeUndefined();
|
|
144
|
+
expect(response.screenshot).not.toBeNull();
|
|
145
|
+
expect(response.screenshot).toContain("https://");
|
|
101
146
|
}, 30000); // 30 seconds timeout
|
|
102
147
|
|
|
103
148
|
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
|
104
149
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
105
|
-
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf')
|
|
150
|
+
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
|
151
|
+
if (!response.success) {
|
|
152
|
+
throw new Error(response.error);
|
|
153
|
+
}
|
|
154
|
+
|
|
106
155
|
expect(response).not.toBeNull();
|
|
107
156
|
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
108
157
|
}, 30000); // 30 seconds timeout
|
|
109
158
|
|
|
110
159
|
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
|
111
160
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
112
|
-
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001')
|
|
161
|
+
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
|
162
|
+
if (!response.success) {
|
|
163
|
+
throw new Error(response.error);
|
|
164
|
+
}
|
|
165
|
+
|
|
113
166
|
expect(response).not.toBeNull();
|
|
114
167
|
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
115
168
|
}, 30000); // 30 seconds timeout
|
|
116
169
|
|
|
117
170
|
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.");
|
|
171
|
+
if (API_URL.includes('api.firecrawl.dev')) {
|
|
172
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
173
|
+
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
|
|
174
|
+
} else {
|
|
175
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
176
|
+
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
|
|
177
|
+
}
|
|
126
178
|
});
|
|
127
179
|
|
|
128
180
|
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
|
129
181
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
130
|
-
const response = await app.crawlUrl('https://roastmywebsite.ai', {},
|
|
182
|
+
const response = await app.crawlUrl('https://roastmywebsite.ai', {}, 30) as CrawlStatusResponse;
|
|
131
183
|
expect(response).not.toBeNull();
|
|
132
184
|
expect(response).toHaveProperty("total");
|
|
133
185
|
expect(response.total).toBeGreaterThan(0);
|
|
@@ -138,21 +190,25 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
138
190
|
expect(response).toHaveProperty("status");
|
|
139
191
|
expect(response.status).toBe("completed");
|
|
140
192
|
expect(response).not.toHaveProperty("next"); // wait until done
|
|
141
|
-
expect(response.data
|
|
142
|
-
expect(response.data
|
|
143
|
-
expect(response.data
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
193
|
+
expect(response.data.length).toBeGreaterThan(0);
|
|
194
|
+
expect(response.data[0]).not.toBeNull();
|
|
195
|
+
expect(response.data[0]).not.toBeUndefined();
|
|
196
|
+
if (response.data[0]) {
|
|
197
|
+
expect(response.data[0]).toHaveProperty("markdown");
|
|
198
|
+
expect(response.data[0].markdown).toContain("_Roast_");
|
|
199
|
+
expect(response.data[0]).not.toHaveProperty('content'); // v0
|
|
200
|
+
expect(response.data[0]).not.toHaveProperty("html");
|
|
201
|
+
expect(response.data[0]).not.toHaveProperty("rawHtml");
|
|
202
|
+
expect(response.data[0]).not.toHaveProperty("screenshot");
|
|
203
|
+
expect(response.data[0]).not.toHaveProperty("links");
|
|
204
|
+
expect(response.data[0]).toHaveProperty("metadata");
|
|
205
|
+
expect(response.data[0].metadata).toHaveProperty("title");
|
|
206
|
+
expect(response.data[0].metadata).toHaveProperty("description");
|
|
207
|
+
expect(response.data[0].metadata).toHaveProperty("language");
|
|
208
|
+
expect(response.data[0].metadata).toHaveProperty("sourceURL");
|
|
209
|
+
expect(response.data[0].metadata).toHaveProperty("statusCode");
|
|
210
|
+
expect(response.data[0].metadata).not.toHaveProperty("error");
|
|
211
|
+
}
|
|
156
212
|
}, 60000); // 60 seconds timeout
|
|
157
213
|
|
|
158
214
|
test.concurrent('should return successful response for crawl with options and wait for completion', async () => {
|
|
@@ -173,7 +229,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
173
229
|
onlyMainContent: true,
|
|
174
230
|
waitFor: 1000
|
|
175
231
|
}
|
|
176
|
-
} as CrawlParams,
|
|
232
|
+
} as CrawlParams, 30) as CrawlStatusResponse;
|
|
177
233
|
expect(response).not.toBeNull();
|
|
178
234
|
expect(response).toHaveProperty("total");
|
|
179
235
|
expect(response.total).toBeGreaterThan(0);
|
|
@@ -184,41 +240,45 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
184
240
|
expect(response).toHaveProperty("status");
|
|
185
241
|
expect(response.status).toBe("completed");
|
|
186
242
|
expect(response).not.toHaveProperty("next");
|
|
187
|
-
expect(response.data
|
|
188
|
-
expect(response.data
|
|
189
|
-
expect(response.data
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
243
|
+
expect(response.data.length).toBeGreaterThan(0);
|
|
244
|
+
expect(response.data[0]).not.toBeNull();
|
|
245
|
+
expect(response.data[0]).not.toBeUndefined();
|
|
246
|
+
if (response.data[0]) {
|
|
247
|
+
expect(response.data[0]).toHaveProperty("markdown");
|
|
248
|
+
expect(response.data[0].markdown).toContain("_Roast_");
|
|
249
|
+
expect(response.data[0]).not.toHaveProperty('content'); // v0
|
|
250
|
+
expect(response.data[0]).toHaveProperty("html");
|
|
251
|
+
expect(response.data[0].html).toContain("<h1");
|
|
252
|
+
expect(response.data[0]).toHaveProperty("rawHtml");
|
|
253
|
+
expect(response.data[0].rawHtml).toContain("<h1");
|
|
254
|
+
expect(response.data[0]).toHaveProperty("screenshot");
|
|
255
|
+
expect(response.data[0].screenshot).toContain("https://");
|
|
256
|
+
expect(response.data[0]).toHaveProperty("links");
|
|
257
|
+
expect(response.data[0].links).not.toBeNull();
|
|
258
|
+
expect(response.data[0].links?.length).toBeGreaterThan(0);
|
|
259
|
+
expect(response.data[0]).toHaveProperty("metadata");
|
|
260
|
+
expect(response.data[0].metadata).toHaveProperty("title");
|
|
261
|
+
expect(response.data[0].metadata).toHaveProperty("description");
|
|
262
|
+
expect(response.data[0].metadata).toHaveProperty("language");
|
|
263
|
+
expect(response.data[0].metadata).toHaveProperty("sourceURL");
|
|
264
|
+
expect(response.data[0].metadata).toHaveProperty("statusCode");
|
|
265
|
+
expect(response.data[0].metadata).not.toHaveProperty("error");
|
|
266
|
+
}
|
|
207
267
|
}, 60000); // 60 seconds timeout
|
|
208
268
|
|
|
209
269
|
test.concurrent('should handle idempotency key for crawl', async () => {
|
|
210
270
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
211
271
|
const uniqueIdempotencyKey = uuidv4();
|
|
212
|
-
const response = await app.
|
|
272
|
+
const response = await app.asyncCrawlUrl('https://roastmywebsite.ai', {}, uniqueIdempotencyKey) as CrawlResponse;
|
|
213
273
|
expect(response).not.toBeNull();
|
|
214
274
|
expect(response.id).toBeDefined();
|
|
215
275
|
|
|
216
|
-
await expect(app.crawlUrl('https://roastmywebsite.ai', {},
|
|
276
|
+
await expect(app.crawlUrl('https://roastmywebsite.ai', {}, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
|
|
217
277
|
});
|
|
218
278
|
|
|
219
279
|
test.concurrent('should check crawl status', async () => {
|
|
220
280
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
221
|
-
const response = await app.
|
|
281
|
+
const response = await app.asyncCrawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse;
|
|
222
282
|
expect(response).not.toBeNull();
|
|
223
283
|
expect(response.id).toBeDefined();
|
|
224
284
|
|
|
@@ -226,7 +286,8 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
226
286
|
const maxChecks = 15;
|
|
227
287
|
let checks = 0;
|
|
228
288
|
|
|
229
|
-
|
|
289
|
+
expect(statusResponse.success).toBe(true);
|
|
290
|
+
while ((statusResponse as any).status === 'scraping' && checks < maxChecks) {
|
|
230
291
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
231
292
|
expect(statusResponse).not.toHaveProperty("partial_data"); // v0
|
|
232
293
|
expect(statusResponse).not.toHaveProperty("current"); // v0
|
|
@@ -236,49 +297,65 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
236
297
|
expect(statusResponse).toHaveProperty("expiresAt");
|
|
237
298
|
expect(statusResponse).toHaveProperty("status");
|
|
238
299
|
expect(statusResponse).toHaveProperty("next");
|
|
239
|
-
expect(statusResponse.
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
300
|
+
expect(statusResponse.success).toBe(true);
|
|
301
|
+
if (statusResponse.success === true) {
|
|
302
|
+
expect(statusResponse.total).toBeGreaterThan(0);
|
|
303
|
+
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
|
|
304
|
+
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
|
|
305
|
+
expect(statusResponse.status).toBe("scraping");
|
|
306
|
+
expect(statusResponse.next).toContain("/v1/crawl/");
|
|
307
|
+
}
|
|
244
308
|
statusResponse = await app.checkCrawlStatus(response.id) as CrawlStatusResponse;
|
|
309
|
+
expect(statusResponse.success).toBe(true);
|
|
245
310
|
checks++;
|
|
246
311
|
}
|
|
247
312
|
|
|
248
313
|
expect(statusResponse).not.toBeNull();
|
|
249
314
|
expect(statusResponse).toHaveProperty("total");
|
|
250
|
-
expect(statusResponse.
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
315
|
+
expect(statusResponse.success).toBe(true);
|
|
316
|
+
if (statusResponse.success === true) {
|
|
317
|
+
expect(statusResponse.total).toBeGreaterThan(0);
|
|
318
|
+
expect(statusResponse).toHaveProperty("creditsUsed");
|
|
319
|
+
expect(statusResponse.creditsUsed).toBeGreaterThan(0);
|
|
320
|
+
expect(statusResponse).toHaveProperty("expiresAt");
|
|
321
|
+
expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
|
|
322
|
+
expect(statusResponse).toHaveProperty("status");
|
|
323
|
+
expect(statusResponse.status).toBe("completed");
|
|
324
|
+
expect(statusResponse.data.length).toBeGreaterThan(0);
|
|
325
|
+
expect(statusResponse.data[0]).not.toBeNull();
|
|
326
|
+
expect(statusResponse.data[0]).not.toBeUndefined();
|
|
327
|
+
if (statusResponse.data[0]) {
|
|
328
|
+
expect(statusResponse.data[0]).toHaveProperty("markdown");
|
|
329
|
+
expect(statusResponse.data[0].markdown?.length).toBeGreaterThan(10);
|
|
330
|
+
expect(statusResponse.data[0]).not.toHaveProperty('content'); // v0
|
|
331
|
+
expect(statusResponse.data[0]).toHaveProperty("html");
|
|
332
|
+
expect(statusResponse.data[0].html).toContain("<div");
|
|
333
|
+
expect(statusResponse.data[0]).toHaveProperty("rawHtml");
|
|
334
|
+
expect(statusResponse.data[0].rawHtml).toContain("<div");
|
|
335
|
+
expect(statusResponse.data[0]).toHaveProperty("screenshot");
|
|
336
|
+
expect(statusResponse.data[0].screenshot).toContain("https://");
|
|
337
|
+
expect(statusResponse.data[0]).toHaveProperty("links");
|
|
338
|
+
expect(statusResponse.data[0].links).not.toBeNull();
|
|
339
|
+
expect(statusResponse.data[0].links?.length).toBeGreaterThan(0);
|
|
340
|
+
expect(statusResponse.data[0]).toHaveProperty("metadata");
|
|
341
|
+
expect(statusResponse.data[0].metadata).toHaveProperty("title");
|
|
342
|
+
expect(statusResponse.data[0].metadata).toHaveProperty("description");
|
|
343
|
+
expect(statusResponse.data[0].metadata).toHaveProperty("language");
|
|
344
|
+
expect(statusResponse.data[0].metadata).toHaveProperty("sourceURL");
|
|
345
|
+
expect(statusResponse.data[0].metadata).toHaveProperty("statusCode");
|
|
346
|
+
expect(statusResponse.data[0].metadata).not.toHaveProperty("error");
|
|
347
|
+
}
|
|
348
|
+
}
|
|
277
349
|
}, 60000); // 60 seconds timeout
|
|
278
350
|
|
|
279
351
|
test.concurrent('should throw error for invalid API key on map', async () => {
|
|
280
|
-
|
|
281
|
-
|
|
352
|
+
if (API_URL.includes('api.firecrawl.dev')) {
|
|
353
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
354
|
+
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
|
|
355
|
+
} else {
|
|
356
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
357
|
+
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
|
|
358
|
+
}
|
|
282
359
|
});
|
|
283
360
|
|
|
284
361
|
test.concurrent('should throw error for blocklisted URL on map', async () => {
|
|
@@ -295,8 +372,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
295
372
|
}, 30000); // 30 seconds timeout
|
|
296
373
|
|
|
297
374
|
test.concurrent('should return successful response for valid map', async () => {
|
|
298
|
-
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
299
|
-
const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
|
|
375
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
|
|
300
376
|
expect(response).not.toBeNull();
|
|
301
377
|
|
|
302
378
|
expect(response.links?.length).toBeGreaterThan(0);
|
|
@@ -305,8 +381,45 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
305
381
|
expect(filteredLinks?.length).toBeGreaterThan(0);
|
|
306
382
|
}, 30000); // 30 seconds timeout
|
|
307
383
|
|
|
308
|
-
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
test('should search with string query', async () => {
|
|
387
|
+
const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
|
|
388
|
+
const response = await app.search("firecrawl");
|
|
389
|
+
expect(response.success).toBe(true);
|
|
390
|
+
console.log(response.data);
|
|
391
|
+
expect(response.data?.length).toBeGreaterThan(0);
|
|
392
|
+
expect(response.data?.[0]?.markdown).toBeDefined();
|
|
393
|
+
expect(response.data?.[0]?.metadata).toBeDefined();
|
|
394
|
+
expect(response.data?.[0]?.metadata?.title).toBeDefined();
|
|
395
|
+
expect(response.data?.[0]?.metadata?.description).toBeDefined();
|
|
396
|
+
});
|
|
397
|
+
|
|
398
|
+
test('should search with params object', async () => {
|
|
309
399
|
const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
|
|
310
|
-
await
|
|
400
|
+
const response = await app.search("firecrawl", {
|
|
401
|
+
limit: 3,
|
|
402
|
+
lang: 'en',
|
|
403
|
+
country: 'us',
|
|
404
|
+
scrapeOptions: {
|
|
405
|
+
formats: ['markdown', 'html', 'links'],
|
|
406
|
+
onlyMainContent: true
|
|
407
|
+
}
|
|
408
|
+
});
|
|
409
|
+
expect(response.success).toBe(true);
|
|
410
|
+
expect(response.data.length).toBeLessThanOrEqual(3);
|
|
411
|
+
for (const doc of response.data) {
|
|
412
|
+
expect(doc.markdown).toBeDefined();
|
|
413
|
+
expect(doc.html).toBeDefined();
|
|
414
|
+
expect(doc.links).toBeDefined();
|
|
415
|
+
expect(doc.metadata).toBeDefined();
|
|
416
|
+
expect(doc.metadata?.title).toBeDefined();
|
|
417
|
+
expect(doc.metadata?.description).toBeDefined();
|
|
418
|
+
}
|
|
419
|
+
});
|
|
420
|
+
|
|
421
|
+
test('should handle invalid API key for search', async () => {
|
|
422
|
+
const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: "invalid_api_key" });
|
|
423
|
+
await expect(app.search("test query")).rejects.toThrow("Request failed with status code 404");
|
|
311
424
|
});
|
|
312
425
|
});
|