firecrawl 0.0.29 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,156 +1,330 @@
1
- import FirecrawlApp from '../../index';
2
- import { v4 as uuidv4 } from 'uuid';
3
- import dotenv from 'dotenv';
4
-
1
+ import FirecrawlApp, {
2
+ CrawlResponseV0,
3
+ CrawlStatusResponse,
4
+ CrawlStatusResponseV0,
5
+ FirecrawlDocumentV0,
6
+ ScrapeResponseV0,
7
+ SearchResponseV0,
8
+ } from "../../index";
9
+ import { v4 as uuidv4 } from "uuid";
10
+ import dotenv from "dotenv";
11
+ import { describe, test, expect } from "@jest/globals";
5
12
 
6
13
  dotenv.config();
7
14
 
8
15
  const TEST_API_KEY = process.env.TEST_API_KEY;
9
16
  const API_URL = "http://127.0.0.1:3002";
10
17
 
11
- describe('FirecrawlApp E2E Tests', () => {
12
- test.concurrent('should throw error for no API key', () => {
18
+ describe('FirecrawlApp<"v0"> E2E Tests', () => {
19
+ test.concurrent("should throw error for no API key", async () => {
13
20
  expect(() => {
14
- new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
21
+ new FirecrawlApp<"v0">({ apiKey: null, apiUrl: API_URL, version: "v0" });
15
22
  }).toThrow("No API key provided");
16
23
  });
17
24
 
18
- test.concurrent('should throw error for invalid API key on scrape', async () => {
19
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
20
- await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
21
- });
25
+ test.concurrent(
26
+ "should throw error for invalid API key on scrape",
27
+ async () => {
28
+ const invalidApp = new FirecrawlApp<"v0">({
29
+ apiKey: "invalid_api_key",
30
+ apiUrl: API_URL,
31
+ version: "v0",
32
+ });
33
+ await expect(
34
+ invalidApp.scrapeUrl("https://roastmywebsite.ai")
35
+ ).rejects.toThrow("Request failed with status code 401");
36
+ }
37
+ );
22
38
 
23
- test.concurrent('should throw error for blocklisted URL on scrape', async () => {
24
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
25
- const blocklistedUrl = "https://facebook.com/fake-test";
26
- await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
27
- });
39
+ test.concurrent(
40
+ "should throw error for blocklisted URL on scrape",
41
+ async () => {
42
+ const app = new FirecrawlApp<"v0">({
43
+ apiKey: TEST_API_KEY,
44
+ apiUrl: API_URL,
45
+ version: "v0",
46
+ });
47
+ const blocklistedUrl = "https://facebook.com/fake-test";
48
+ await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow(
49
+ "Request failed with status code 403"
50
+ );
51
+ }
52
+ );
28
53
 
29
- test.concurrent('should return successful response with valid preview token', async () => {
30
- const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
31
- const response = await app.scrapeUrl('https://roastmywebsite.ai');
32
- expect(response).not.toBeNull();
33
- expect(response.data?.content).toContain("_Roast_");
34
- }, 30000); // 30 seconds timeout
54
+ test.concurrent(
55
+ "should return successful response with valid preview token",
56
+ async () => {
57
+ const app = new FirecrawlApp<"v0">({
58
+ apiKey: "this_is_just_a_preview_token",
59
+ apiUrl: API_URL,
60
+ version: "v0",
61
+ });
62
+ const response = (await app.scrapeUrl(
63
+ "https://roastmywebsite.ai"
64
+ )) as ScrapeResponseV0;
65
+ expect(response).not.toBeNull();
66
+ expect(response.data?.content).toContain("_Roast_");
67
+ },
68
+ 30000
69
+ ); // 30 seconds timeout
35
70
 
36
- test.concurrent('should return successful response for valid scrape', async () => {
37
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
38
- const response = await app.scrapeUrl('https://roastmywebsite.ai');
39
- expect(response).not.toBeNull();
40
- expect(response.data?.content).toContain("_Roast_");
41
- expect(response.data).toHaveProperty('markdown');
42
- expect(response.data).toHaveProperty('metadata');
43
- expect(response.data).not.toHaveProperty('html');
44
- }, 30000); // 30 seconds timeout
45
-
46
- test.concurrent('should return successful response with valid API key and include HTML', async () => {
47
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
48
- const response = await app.scrapeUrl('https://roastmywebsite.ai', { pageOptions: { includeHtml: true } });
49
- expect(response).not.toBeNull();
50
- expect(response.data?.content).toContain("_Roast_");
51
- expect(response.data?.markdown).toContain("_Roast_");
52
- expect(response.data?.html).toContain("<h1");
53
- }, 30000); // 30 seconds timeout
54
-
55
- test.concurrent('should return successful response for valid scrape with PDF file', async () => {
56
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
57
- const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
58
- expect(response).not.toBeNull();
59
- expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
60
- }, 30000); // 30 seconds timeout
71
+ test.concurrent(
72
+ "should return successful response for valid scrape",
73
+ async () => {
74
+ const app = new FirecrawlApp<"v0">({
75
+ apiKey: TEST_API_KEY,
76
+ apiUrl: API_URL,
77
+ version: "v0",
78
+ });
79
+ const response = (await app.scrapeUrl(
80
+ "https://roastmywebsite.ai"
81
+ )) as ScrapeResponseV0;
82
+ expect(response).not.toBeNull();
83
+ expect(response.data?.content).toContain("_Roast_");
84
+ expect(response.data).toHaveProperty("markdown");
85
+ expect(response.data).toHaveProperty("metadata");
86
+ expect(response.data).not.toHaveProperty("html");
87
+ },
88
+ 30000
89
+ ); // 30 seconds timeout
61
90
 
62
- test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
63
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
64
- const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
65
- expect(response).not.toBeNull();
66
- expect(response.data?.content).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
67
- }, 30000); // 30 seconds timeout
91
+ test.concurrent(
92
+ "should return successful response with valid API key and include HTML",
93
+ async () => {
94
+ const app = new FirecrawlApp<"v0">({
95
+ apiKey: TEST_API_KEY,
96
+ apiUrl: API_URL,
97
+ version: "v0",
98
+ });
99
+ const response = (await app.scrapeUrl("https://roastmywebsite.ai", {
100
+ pageOptions: { includeHtml: true },
101
+ })) as ScrapeResponseV0;
102
+ expect(response).not.toBeNull();
103
+ expect(response.data?.content).toContain("_Roast_");
104
+ expect(response.data?.markdown).toContain("_Roast_");
105
+ expect(response.data?.html).toContain("<h1");
106
+ },
107
+ 30000
108
+ ); // 30 seconds timeout
68
109
 
69
- test.concurrent('should throw error for invalid API key on crawl', async () => {
70
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
71
- await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
72
- });
110
+ test.concurrent(
111
+ "should return successful response for valid scrape with PDF file",
112
+ async () => {
113
+ const app = new FirecrawlApp<"v0">({
114
+ apiKey: TEST_API_KEY,
115
+ apiUrl: API_URL,
116
+ version: "v0",
117
+ });
118
+ const response = (await app.scrapeUrl(
119
+ "https://arxiv.org/pdf/astro-ph/9301001.pdf"
120
+ )) as ScrapeResponseV0;
121
+ expect(response).not.toBeNull();
122
+ expect(response.data?.content).toContain(
123
+ "We present spectrophotometric observations of the Broad Line Radio Galaxy"
124
+ );
125
+ },
126
+ 30000
127
+ ); // 30 seconds timeout
73
128
 
74
- test.concurrent('should throw error for blocklisted URL on crawl', async () => {
75
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
76
- const blocklistedUrl = "https://twitter.com/fake-test";
77
- await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("Request failed with status code 403");
78
- });
129
+ test.concurrent(
130
+ "should return successful response for valid scrape with PDF file without explicit extension",
131
+ async () => {
132
+ const app = new FirecrawlApp<"v0">({
133
+ apiKey: TEST_API_KEY,
134
+ apiUrl: API_URL,
135
+ version: "v0",
136
+ });
137
+ const response = (await app.scrapeUrl(
138
+ "https://arxiv.org/pdf/astro-ph/9301001"
139
+ )) as ScrapeResponseV0;
140
+ expect(response).not.toBeNull();
141
+ expect(response.data?.content).toContain(
142
+ "We present spectrophotometric observations of the Broad Line Radio Galaxy"
143
+ );
144
+ },
145
+ 30000
146
+ ); // 30 seconds timeout
79
147
 
80
- test.concurrent('should return successful response for crawl and wait for completion', async () => {
81
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
82
- const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 30);
83
- expect(response).not.toBeNull();
84
- expect(response[0].content).toContain("_Roast_");
85
- }, 60000); // 60 seconds timeout
148
+ test.concurrent(
149
+ "should throw error for invalid API key on crawl",
150
+ async () => {
151
+ const invalidApp = new FirecrawlApp<"v0">({
152
+ apiKey: "invalid_api_key",
153
+ apiUrl: API_URL,
154
+ version: "v0",
155
+ });
156
+ await expect(
157
+ invalidApp.crawlUrl("https://roastmywebsite.ai")
158
+ ).rejects.toThrow("Request failed with status code 401");
159
+ }
160
+ );
86
161
 
87
- test.concurrent('should handle idempotency key for crawl', async () => {
88
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
162
+ test.concurrent(
163
+ "should throw error for blocklisted URL on crawl",
164
+ async () => {
165
+ const app = new FirecrawlApp<"v0">({
166
+ apiKey: TEST_API_KEY,
167
+ apiUrl: API_URL,
168
+ version: "v0",
169
+ });
170
+ const blocklistedUrl = "https://twitter.com/fake-test";
171
+ await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow(
172
+ "Request failed with status code 403"
173
+ );
174
+ }
175
+ );
176
+
177
+ test.concurrent(
178
+ "should return successful response for crawl and wait for completion",
179
+ async () => {
180
+ const app = new FirecrawlApp<"v0">({
181
+ apiKey: TEST_API_KEY,
182
+ apiUrl: API_URL,
183
+ version: "v0",
184
+ });
185
+ const response = (await app.crawlUrl(
186
+ "https://roastmywebsite.ai",
187
+ { crawlerOptions: { excludes: ["blog/*"] } },
188
+ true,
189
+ 10
190
+ )) as FirecrawlDocumentV0[];
191
+ expect(response).not.toBeNull();
192
+ expect(response[0].content).toContain("_Roast_");
193
+ },
194
+ 60000
195
+ ); // 60 seconds timeout
196
+
197
+ test.concurrent("should handle idempotency key for crawl", async () => {
198
+ const app = new FirecrawlApp<"v0">({
199
+ apiKey: TEST_API_KEY,
200
+ apiUrl: API_URL,
201
+ version: "v0",
202
+ });
89
203
  const uniqueIdempotencyKey = uuidv4();
90
- const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false, 2, uniqueIdempotencyKey);
204
+ const response = (await app.crawlUrl(
205
+ "https://roastmywebsite.ai",
206
+ { crawlerOptions: { excludes: ["blog/*"] } },
207
+ false,
208
+ 2,
209
+ uniqueIdempotencyKey
210
+ )) as CrawlResponseV0;
91
211
  expect(response).not.toBeNull();
92
212
  expect(response.jobId).toBeDefined();
93
213
 
94
- await expect(app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
214
+ await expect(
215
+ app.crawlUrl(
216
+ "https://roastmywebsite.ai",
217
+ { crawlerOptions: { excludes: ["blog/*"] } },
218
+ true,
219
+ 2,
220
+ uniqueIdempotencyKey
221
+ )
222
+ ).rejects.toThrow("Request failed with status code 409");
95
223
  });
96
224
 
97
- test.concurrent('should check crawl status', async () => {
98
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
99
- const response = await app.crawlUrl('https://roastmywebsite.ai', { crawlerOptions: { excludes: ['blog/*'] } }, false);
100
- expect(response).not.toBeNull();
101
- expect(response.jobId).toBeDefined();
225
+ test.concurrent(
226
+ "should check crawl status",
227
+ async () => {
228
+ const app = new FirecrawlApp<"v0">({
229
+ apiKey: TEST_API_KEY,
230
+ apiUrl: API_URL,
231
+ version: "v0",
232
+ });
233
+ const response: any = (await app.crawlUrl(
234
+ "https://roastmywebsite.ai",
235
+ { crawlerOptions: { excludes: ["blog/*"] } },
236
+ false
237
+ )) as CrawlResponseV0;
238
+ expect(response).not.toBeNull();
239
+ expect(response.jobId).toBeDefined();
102
240
 
103
- let statusResponse = await app.checkCrawlStatus(response.jobId);
104
- const maxChecks = 15;
105
- let checks = 0;
241
+ let statusResponse = await app.checkCrawlStatus(response.jobId);
242
+ const maxChecks = 15;
243
+ let checks = 0;
106
244
 
107
- while (statusResponse.status === 'active' && checks < maxChecks) {
108
- await new Promise(resolve => setTimeout(resolve, 1000));
109
- expect(statusResponse.partial_data).not.toBeNull();
110
- statusResponse = await app.checkCrawlStatus(response.jobId);
111
- checks++;
112
- }
245
+ while (statusResponse.status === "active" && checks < maxChecks) {
246
+ await new Promise((resolve) => setTimeout(resolve, 5000));
247
+ expect(statusResponse.partial_data).not.toBeNull();
248
+ // expect(statusResponse.current).toBeGreaterThanOrEqual(1);
249
+ statusResponse = (await app.checkCrawlStatus(
250
+ response.jobId
251
+ )) as CrawlStatusResponseV0;
252
+ checks++;
253
+ }
113
254
 
114
- expect(statusResponse).not.toBeNull();
115
- expect(statusResponse.status).toBe('completed');
116
- expect(statusResponse?.data?.length).toBeGreaterThan(0);
117
- }, 35000); // 35 seconds timeout
255
+ expect(statusResponse).not.toBeNull();
256
+ expect(statusResponse.success).toBe(true);
257
+ expect(statusResponse.status).toBe("completed");
258
+ expect(statusResponse.total).toEqual(statusResponse.current);
259
+ expect(statusResponse.current_step).not.toBeNull();
260
+ expect(statusResponse.current).toBeGreaterThanOrEqual(1);
118
261
 
119
- test.concurrent('should return successful response for search', async () => {
120
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
121
- const response = await app.search("test query");
122
- expect(response).not.toBeNull();
123
- expect(response?.data?.[0]?.content).toBeDefined();
124
- expect(response?.data?.length).toBeGreaterThan(2);
125
- }, 30000); // 30 seconds timeout
262
+ expect(statusResponse?.data?.length).toBeGreaterThan(0);
263
+ },
264
+ 35000
265
+ ); // 35 seconds timeout
126
266
 
127
- test.concurrent('should throw error for invalid API key on search', async () => {
128
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
129
- await expect(invalidApp.search("test query")).rejects.toThrow("Request failed with status code 401");
130
- });
267
+ test.concurrent(
268
+ "should return successful response for search",
269
+ async () => {
270
+ const app = new FirecrawlApp<"v0">({
271
+ apiKey: TEST_API_KEY,
272
+ apiUrl: API_URL,
273
+ version: "v0",
274
+ });
275
+ const response = (await app.search("test query")) as SearchResponseV0;
276
+ expect(response).not.toBeNull();
277
+ expect(response?.data?.[0]?.content).toBeDefined();
278
+ expect(response?.data?.length).toBeGreaterThan(2);
279
+ },
280
+ 30000
281
+ ); // 30 seconds timeout
282
+
283
+ test.concurrent(
284
+ "should throw error for invalid API key on search",
285
+ async () => {
286
+ const invalidApp = new FirecrawlApp<"v0">({
287
+ apiKey: "invalid_api_key",
288
+ apiUrl: API_URL,
289
+ version: "v0",
290
+ });
291
+ await expect(invalidApp.search("test query")).rejects.toThrow(
292
+ "Request failed with status code 401"
293
+ );
294
+ }
295
+ );
131
296
 
132
- test.concurrent('should perform LLM extraction', async () => {
133
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
134
- const response = await app.scrapeUrl("https://mendable.ai", {
135
- extractorOptions: {
136
- mode: 'llm-extraction',
137
- extractionPrompt: "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
138
- extractionSchema: {
139
- type: 'object',
140
- properties: {
141
- company_mission: { type: 'string' },
142
- supports_sso: { type: 'boolean' },
143
- is_open_source: { type: 'boolean' }
297
+ test.concurrent(
298
+ "should perform LLM extraction",
299
+ async () => {
300
+ const app = new FirecrawlApp<"v0">({
301
+ apiKey: TEST_API_KEY,
302
+ apiUrl: API_URL,
303
+ version: "v0",
304
+ });
305
+ const response = (await app.scrapeUrl("https://mendable.ai", {
306
+ extractorOptions: {
307
+ mode: "llm-extraction",
308
+ extractionPrompt:
309
+ "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source",
310
+ extractionSchema: {
311
+ type: "object",
312
+ properties: {
313
+ company_mission: { type: "string" },
314
+ supports_sso: { type: "boolean" },
315
+ is_open_source: { type: "boolean" },
316
+ },
317
+ required: ["company_mission", "supports_sso", "is_open_source"],
144
318
  },
145
- required: ['company_mission', 'supports_sso', 'is_open_source']
146
- }
147
- }
148
- });
149
- expect(response).not.toBeNull();
150
- expect(response.data?.llm_extraction).toBeDefined();
151
- const llmExtraction = response.data?.llm_extraction;
152
- expect(llmExtraction?.company_mission).toBeDefined();
153
- expect(typeof llmExtraction?.supports_sso).toBe('boolean');
154
- expect(typeof llmExtraction?.is_open_source).toBe('boolean');
155
- }, 30000); // 30 seconds timeout
319
+ },
320
+ })) as ScrapeResponseV0;
321
+ expect(response).not.toBeNull();
322
+ expect(response.data?.llm_extraction).toBeDefined();
323
+ const llmExtraction = response.data?.llm_extraction;
324
+ expect(llmExtraction?.company_mission).toBeDefined();
325
+ expect(typeof llmExtraction?.supports_sso).toBe("boolean");
326
+ expect(typeof llmExtraction?.is_open_source).toBe("boolean");
327
+ },
328
+ 30000
329
+ ); // 30 seconds timeout
156
330
  });
@@ -31,7 +31,7 @@ describe('the firecrawl JS SDK', () => {
31
31
  });
32
32
 
33
33
  const apiKey = 'YOUR_API_KEY'
34
- const app = new FirecrawlApp({ apiKey });
34
+ const app = new FirecrawlApp<"v0">({ apiKey });
35
35
  // Scrape a single URL
36
36
  const url = 'https://mendable.ai';
37
37
  const scrapedData = await app.scrapeUrl(url);