@mendable/firecrawl 1.2.2 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,22 +1,19 @@
1
1
  {
2
2
  "name": "@mendable/firecrawl",
3
- "version": "1.2.2",
3
+ "version": "1.18.0",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
- "main": "build/cjs/index.js",
6
- "types": "types/index.d.ts",
7
- "type": "module",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
8
7
  "exports": {
9
- "require": {
10
- "types": "./types/index.d.ts",
11
- "default": "./build/cjs/index.js"
12
- },
13
- "import": {
14
- "types": "./types/index.d.ts",
15
- "default": "./build/esm/index.js"
8
+ "./package.json": "./package.json",
9
+ ".": {
10
+ "import": "./dist/index.js",
11
+ "default": "./dist/index.cjs"
16
12
  }
17
13
  },
14
+ "type": "module",
18
15
  "scripts": {
19
- "build": "tsc --module commonjs --moduleResolution node10 --outDir build/cjs/ && echo '{\"type\": \"commonjs\"}' > build/cjs/package.json && npx tsc --module NodeNext --moduleResolution NodeNext --outDir build/esm/ && echo '{\"type\": \"module\"}' > build/esm/package.json",
16
+ "build": "tsup",
20
17
  "build-and-publish": "npm run build && npm publish --access public",
21
18
  "publish-beta": "npm run build && npm publish --access public --tag beta",
22
19
  "test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
@@ -29,10 +26,8 @@
29
26
  "license": "MIT",
30
27
  "dependencies": {
31
28
  "axios": "^1.6.8",
32
- "dotenv": "^16.4.5",
33
29
  "isows": "^1.0.4",
34
30
  "typescript-event-target": "^1.1.1",
35
- "uuid": "^9.0.1",
36
31
  "zod": "^3.23.8",
37
32
  "zod-to-json-schema": "^3.23.0"
38
33
  },
@@ -41,6 +36,8 @@
41
36
  },
42
37
  "homepage": "https://github.com/mendableai/firecrawl#readme",
43
38
  "devDependencies": {
39
+ "uuid": "^9.0.1",
40
+ "dotenv": "^16.4.5",
44
41
  "@jest/globals": "^29.7.0",
45
42
  "@types/axios": "^0.14.0",
46
43
  "@types/dotenv": "^8.2.0",
@@ -50,6 +47,7 @@
50
47
  "@types/uuid": "^9.0.8",
51
48
  "jest": "^29.7.0",
52
49
  "ts-jest": "^29.2.2",
50
+ "tsup": "^8.2.4",
53
51
  "typescript": "^5.4.5"
54
52
  },
55
53
  "keywords": [
@@ -1,9 +1,9 @@
1
- import { describe, test, expect, jest } from '@jest/globals';
2
- import axios from 'axios';
3
- import FirecrawlApp from '../index';
1
+ import { describe, expect, jest, test } from '@jest/globals';
4
2
 
5
- import { readFile } from 'fs/promises';
3
+ import FirecrawlApp from '../index';
4
+ import axios from 'axios';
6
5
  import { join } from 'path';
6
+ import { readFile } from 'fs/promises';
7
7
 
8
8
  // Mock jest and set the type
9
9
  jest.mock('axios');
@@ -14,13 +14,22 @@ async function loadFixture(name: string): Promise<string> {
14
14
  return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8')
15
15
  }
16
16
 
17
+ const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
18
+
17
19
  describe('the firecrawl JS SDK', () => {
18
20
 
19
- test('Should require an API key to instantiate FirecrawlApp', async () => {
20
- const fn = () => {
21
- new FirecrawlApp({ apiKey: undefined });
22
- };
23
- expect(fn).toThrow('No API key provided');
21
+ test('Should require an API key only for cloud service', async () => {
22
+ if (API_URL.includes('api.firecrawl.dev')) {
23
+ // Should throw for cloud service
24
+ expect(() => {
25
+ new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
26
+ }).toThrow('No API key provided');
27
+ } else {
28
+ // Should not throw for self-hosted
29
+ expect(() => {
30
+ new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
31
+ }).not.toThrow();
32
+ }
24
33
  });
25
34
 
26
35
  test('Should return scraped data from a /scrape API call', async () => {
@@ -1,4 +1,4 @@
1
- import FirecrawlApp, { CrawlParams, CrawlResponse, CrawlStatusResponse, MapResponse, ScrapeParams, ScrapeResponse } from '../../../index';
1
+ import FirecrawlApp, { type CrawlParams, type CrawlResponse, type CrawlStatusResponse, type MapResponse, type ScrapeResponse } from '../../../index';
2
2
  import { v4 as uuidv4 } from 'uuid';
3
3
  import dotenv from 'dotenv';
4
4
  import { describe, test, expect } from '@jest/globals';
@@ -6,18 +6,31 @@ import { describe, test, expect } from '@jest/globals';
6
6
  dotenv.config();
7
7
 
8
8
  const TEST_API_KEY = process.env.TEST_API_KEY;
9
- const API_URL = "http://127.0.0.1:3002";
9
+ const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
10
10
 
11
11
  describe('FirecrawlApp E2E Tests', () => {
12
- test.concurrent('should throw error for no API key', async () => {
13
- expect(() => {
14
- new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
15
- }).toThrow("No API key provided");
12
+ test.concurrent('should throw error for no API key only for cloud service', async () => {
13
+ if (API_URL.includes('api.firecrawl.dev')) {
14
+ // Should throw for cloud service
15
+ expect(() => {
16
+ new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
17
+ }).toThrow("No API key provided");
18
+ } else {
19
+ // Should not throw for self-hosted
20
+ expect(() => {
21
+ new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
22
+ }).not.toThrow();
23
+ }
16
24
  });
17
25
 
18
26
  test.concurrent('should throw error for invalid API key on scrape', async () => {
19
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
20
- await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
27
+ if (API_URL.includes('api.firecrawl.dev')) {
28
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
29
+ await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 404");
30
+ } else {
31
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
32
+ await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
33
+ }
21
34
  });
22
35
 
23
36
  test.concurrent('should throw error for blocklisted URL on scrape', async () => {
@@ -28,14 +41,22 @@ describe('FirecrawlApp E2E Tests', () => {
28
41
 
29
42
  test.concurrent('should return successful response with valid preview token', async () => {
30
43
  const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
31
- const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse;
44
+ const response = await app.scrapeUrl('https://roastmywebsite.ai');
45
+ if (!response.success) {
46
+ throw new Error(response.error);
47
+ }
48
+
32
49
  expect(response).not.toBeNull();
33
50
  expect(response?.markdown).toContain("_Roast_");
34
51
  }, 30000); // 30 seconds timeout
35
52
 
36
53
  test.concurrent('should return successful response for valid scrape', async () => {
37
54
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
38
- const response = await app.scrapeUrl('https://roastmywebsite.ai') as ScrapeResponse;
55
+ const response = await app.scrapeUrl('https://roastmywebsite.ai');
56
+ if (!response.success) {
57
+ throw new Error(response.error);
58
+ }
59
+
39
60
  expect(response).not.toBeNull();
40
61
  expect(response).not.toHaveProperty('content'); // v0
41
62
  expect(response).not.toHaveProperty('html');
@@ -58,7 +79,11 @@ describe('FirecrawlApp E2E Tests', () => {
58
79
  onlyMainContent: true,
59
80
  timeout: 30000,
60
81
  waitFor: 1000
61
- }) as ScrapeResponse;
82
+ });
83
+ if (!response.success) {
84
+ throw new Error(response.error);
85
+ }
86
+
62
87
  expect(response).not.toBeNull();
63
88
  expect(response).not.toHaveProperty('content'); // v0
64
89
  expect(response.markdown).toContain("_Roast_");
@@ -71,6 +96,7 @@ describe('FirecrawlApp E2E Tests', () => {
71
96
  expect(response.links?.length).toBeGreaterThan(0);
72
97
  expect(response.links?.[0]).toContain("https://");
73
98
  expect(response.metadata).not.toBeNull();
99
+ expect(response.metadata).not.toBeUndefined();
74
100
  expect(response.metadata).toHaveProperty("title");
75
101
  expect(response.metadata).toHaveProperty("description");
76
102
  expect(response.metadata).toHaveProperty("keywords");
@@ -85,49 +111,75 @@ describe('FirecrawlApp E2E Tests', () => {
85
111
  expect(response.metadata).not.toHaveProperty("pageStatusCode");
86
112
  expect(response.metadata).toHaveProperty("statusCode");
87
113
  expect(response.metadata).not.toHaveProperty("pageError");
88
- expect(response.metadata.error).toBeUndefined();
89
- expect(response.metadata.title).toBe("Roast My Website");
90
- expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
91
- expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
92
- expect(response.metadata.robots).toBe("follow, index");
93
- expect(response.metadata.ogTitle).toBe("Roast My Website");
94
- expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
95
- expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
96
- expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
97
- expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
98
- expect(response.metadata.ogSiteName).toBe("Roast My Website");
99
- expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
100
- expect(response.metadata.statusCode).toBe(200);
114
+
115
+ if (response.metadata !== undefined) {
116
+ expect(response.metadata.error).toBeUndefined();
117
+ expect(response.metadata.title).toBe("Roast My Website");
118
+ expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
119
+ expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
120
+ expect(response.metadata.robots).toBe("follow, index");
121
+ expect(response.metadata.ogTitle).toBe("Roast My Website");
122
+ expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
123
+ expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
124
+ expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
125
+ expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
126
+ expect(response.metadata.ogSiteName).toBe("Roast My Website");
127
+ expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
128
+ expect(response.metadata.statusCode).toBe(200);
129
+ }
130
+ }, 30000); // 30 seconds timeout
131
+
132
+ test.concurrent('should return successful response with valid API key and screenshot fullPage', async () => {
133
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
134
+ const response = await app.scrapeUrl(
135
+ 'https://roastmywebsite.ai', {
136
+ formats: ['screenshot@fullPage'],
137
+ });
138
+ if (!response.success) {
139
+ throw new Error(response.error);
140
+ }
141
+
142
+ expect(response).not.toBeNull();
143
+ expect(response.screenshot).not.toBeUndefined();
144
+ expect(response.screenshot).not.toBeNull();
145
+ expect(response.screenshot).toContain("https://");
101
146
  }, 30000); // 30 seconds timeout
102
147
 
103
148
  test.concurrent('should return successful response for valid scrape with PDF file', async () => {
104
149
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
105
- const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf') as ScrapeResponse;
150
+ const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
151
+ if (!response.success) {
152
+ throw new Error(response.error);
153
+ }
154
+
106
155
  expect(response).not.toBeNull();
107
156
  expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
108
157
  }, 30000); // 30 seconds timeout
109
158
 
110
159
  test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
111
160
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
112
- const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001') as ScrapeResponse;
161
+ const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
162
+ if (!response.success) {
163
+ throw new Error(response.error);
164
+ }
165
+
113
166
  expect(response).not.toBeNull();
114
167
  expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
115
168
  }, 30000); // 30 seconds timeout
116
169
 
117
170
  test.concurrent('should throw error for invalid API key on crawl', async () => {
118
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
119
- await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
120
- });
121
-
122
- test.concurrent('should throw error for blocklisted URL on crawl', async () => {
123
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
124
- const blocklistedUrl = "https://twitter.com/fake-test";
125
- await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.");
171
+ if (API_URL.includes('api.firecrawl.dev')) {
172
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
173
+ await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
174
+ } else {
175
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
176
+ await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
177
+ }
126
178
  });
127
179
 
128
180
  test.concurrent('should return successful response for crawl and wait for completion', async () => {
129
181
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
130
- const response = await app.crawlUrl('https://roastmywebsite.ai', {}, true, 30) as CrawlStatusResponse;
182
+ const response = await app.crawlUrl('https://roastmywebsite.ai', {}, 30) as CrawlStatusResponse;
131
183
  expect(response).not.toBeNull();
132
184
  expect(response).toHaveProperty("total");
133
185
  expect(response.total).toBeGreaterThan(0);
@@ -138,21 +190,25 @@ describe('FirecrawlApp E2E Tests', () => {
138
190
  expect(response).toHaveProperty("status");
139
191
  expect(response.status).toBe("completed");
140
192
  expect(response).not.toHaveProperty("next"); // wait until done
141
- expect(response.data?.length).toBeGreaterThan(0);
142
- expect(response.data?.[0]).toHaveProperty("markdown");
143
- expect(response.data?.[0].markdown).toContain("_Roast_");
144
- expect(response.data?.[0]).not.toHaveProperty('content'); // v0
145
- expect(response.data?.[0]).not.toHaveProperty("html");
146
- expect(response.data?.[0]).not.toHaveProperty("rawHtml");
147
- expect(response.data?.[0]).not.toHaveProperty("screenshot");
148
- expect(response.data?.[0]).not.toHaveProperty("links");
149
- expect(response.data?.[0]).toHaveProperty("metadata");
150
- expect(response.data?.[0].metadata).toHaveProperty("title");
151
- expect(response.data?.[0].metadata).toHaveProperty("description");
152
- expect(response.data?.[0].metadata).toHaveProperty("language");
153
- expect(response.data?.[0].metadata).toHaveProperty("sourceURL");
154
- expect(response.data?.[0].metadata).toHaveProperty("statusCode");
155
- expect(response.data?.[0].metadata).not.toHaveProperty("error");
193
+ expect(response.data.length).toBeGreaterThan(0);
194
+ expect(response.data[0]).not.toBeNull();
195
+ expect(response.data[0]).not.toBeUndefined();
196
+ if (response.data[0]) {
197
+ expect(response.data[0]).toHaveProperty("markdown");
198
+ expect(response.data[0].markdown).toContain("_Roast_");
199
+ expect(response.data[0]).not.toHaveProperty('content'); // v0
200
+ expect(response.data[0]).not.toHaveProperty("html");
201
+ expect(response.data[0]).not.toHaveProperty("rawHtml");
202
+ expect(response.data[0]).not.toHaveProperty("screenshot");
203
+ expect(response.data[0]).not.toHaveProperty("links");
204
+ expect(response.data[0]).toHaveProperty("metadata");
205
+ expect(response.data[0].metadata).toHaveProperty("title");
206
+ expect(response.data[0].metadata).toHaveProperty("description");
207
+ expect(response.data[0].metadata).toHaveProperty("language");
208
+ expect(response.data[0].metadata).toHaveProperty("sourceURL");
209
+ expect(response.data[0].metadata).toHaveProperty("statusCode");
210
+ expect(response.data[0].metadata).not.toHaveProperty("error");
211
+ }
156
212
  }, 60000); // 60 seconds timeout
157
213
 
158
214
  test.concurrent('should return successful response for crawl with options and wait for completion', async () => {
@@ -173,7 +229,7 @@ describe('FirecrawlApp E2E Tests', () => {
173
229
  onlyMainContent: true,
174
230
  waitFor: 1000
175
231
  }
176
- } as CrawlParams, true, 30) as CrawlStatusResponse;
232
+ } as CrawlParams, 30) as CrawlStatusResponse;
177
233
  expect(response).not.toBeNull();
178
234
  expect(response).toHaveProperty("total");
179
235
  expect(response.total).toBeGreaterThan(0);
@@ -184,41 +240,45 @@ describe('FirecrawlApp E2E Tests', () => {
184
240
  expect(response).toHaveProperty("status");
185
241
  expect(response.status).toBe("completed");
186
242
  expect(response).not.toHaveProperty("next");
187
- expect(response.data?.length).toBeGreaterThan(0);
188
- expect(response.data?.[0]).toHaveProperty("markdown");
189
- expect(response.data?.[0].markdown).toContain("_Roast_");
190
- expect(response.data?.[0]).not.toHaveProperty('content'); // v0
191
- expect(response.data?.[0]).toHaveProperty("html");
192
- expect(response.data?.[0].html).toContain("<h1");
193
- expect(response.data?.[0]).toHaveProperty("rawHtml");
194
- expect(response.data?.[0].rawHtml).toContain("<h1");
195
- expect(response.data?.[0]).toHaveProperty("screenshot");
196
- expect(response.data?.[0].screenshot).toContain("https://");
197
- expect(response.data?.[0]).toHaveProperty("links");
198
- expect(response.data?.[0].links).not.toBeNull();
199
- expect(response.data?.[0].links?.length).toBeGreaterThan(0);
200
- expect(response.data?.[0]).toHaveProperty("metadata");
201
- expect(response.data?.[0].metadata).toHaveProperty("title");
202
- expect(response.data?.[0].metadata).toHaveProperty("description");
203
- expect(response.data?.[0].metadata).toHaveProperty("language");
204
- expect(response.data?.[0].metadata).toHaveProperty("sourceURL");
205
- expect(response.data?.[0].metadata).toHaveProperty("statusCode");
206
- expect(response.data?.[0].metadata).not.toHaveProperty("error");
243
+ expect(response.data.length).toBeGreaterThan(0);
244
+ expect(response.data[0]).not.toBeNull();
245
+ expect(response.data[0]).not.toBeUndefined();
246
+ if (response.data[0]) {
247
+ expect(response.data[0]).toHaveProperty("markdown");
248
+ expect(response.data[0].markdown).toContain("_Roast_");
249
+ expect(response.data[0]).not.toHaveProperty('content'); // v0
250
+ expect(response.data[0]).toHaveProperty("html");
251
+ expect(response.data[0].html).toContain("<h1");
252
+ expect(response.data[0]).toHaveProperty("rawHtml");
253
+ expect(response.data[0].rawHtml).toContain("<h1");
254
+ expect(response.data[0]).toHaveProperty("screenshot");
255
+ expect(response.data[0].screenshot).toContain("https://");
256
+ expect(response.data[0]).toHaveProperty("links");
257
+ expect(response.data[0].links).not.toBeNull();
258
+ expect(response.data[0].links?.length).toBeGreaterThan(0);
259
+ expect(response.data[0]).toHaveProperty("metadata");
260
+ expect(response.data[0].metadata).toHaveProperty("title");
261
+ expect(response.data[0].metadata).toHaveProperty("description");
262
+ expect(response.data[0].metadata).toHaveProperty("language");
263
+ expect(response.data[0].metadata).toHaveProperty("sourceURL");
264
+ expect(response.data[0].metadata).toHaveProperty("statusCode");
265
+ expect(response.data[0].metadata).not.toHaveProperty("error");
266
+ }
207
267
  }, 60000); // 60 seconds timeout
208
268
 
209
269
  test.concurrent('should handle idempotency key for crawl', async () => {
210
270
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
211
271
  const uniqueIdempotencyKey = uuidv4();
212
- const response = await app.crawlUrl('https://roastmywebsite.ai', {}, false, 2, uniqueIdempotencyKey) as CrawlResponse;
272
+ const response = await app.asyncCrawlUrl('https://roastmywebsite.ai', {}, uniqueIdempotencyKey) as CrawlResponse;
213
273
  expect(response).not.toBeNull();
214
274
  expect(response.id).toBeDefined();
215
275
 
216
- await expect(app.crawlUrl('https://roastmywebsite.ai', {}, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
276
+ await expect(app.crawlUrl('https://roastmywebsite.ai', {}, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
217
277
  });
218
278
 
219
279
  test.concurrent('should check crawl status', async () => {
220
280
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
221
- const response = await app.crawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams, false) as CrawlResponse;
281
+ const response = await app.asyncCrawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse;
222
282
  expect(response).not.toBeNull();
223
283
  expect(response.id).toBeDefined();
224
284
 
@@ -226,7 +286,8 @@ describe('FirecrawlApp E2E Tests', () => {
226
286
  const maxChecks = 15;
227
287
  let checks = 0;
228
288
 
229
- while (statusResponse.status === 'scraping' && checks < maxChecks) {
289
+ expect(statusResponse.success).toBe(true);
290
+ while ((statusResponse as any).status === 'scraping' && checks < maxChecks) {
230
291
  await new Promise(resolve => setTimeout(resolve, 5000));
231
292
  expect(statusResponse).not.toHaveProperty("partial_data"); // v0
232
293
  expect(statusResponse).not.toHaveProperty("current"); // v0
@@ -236,49 +297,65 @@ describe('FirecrawlApp E2E Tests', () => {
236
297
  expect(statusResponse).toHaveProperty("expiresAt");
237
298
  expect(statusResponse).toHaveProperty("status");
238
299
  expect(statusResponse).toHaveProperty("next");
239
- expect(statusResponse.total).toBeGreaterThan(0);
240
- expect(statusResponse.creditsUsed).toBeGreaterThan(0);
241
- expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
242
- expect(statusResponse.status).toBe("scraping");
243
- expect(statusResponse.next).toContain("/v1/crawl/");
300
+ expect(statusResponse.success).toBe(true);
301
+ if (statusResponse.success === true) {
302
+ expect(statusResponse.total).toBeGreaterThan(0);
303
+ expect(statusResponse.creditsUsed).toBeGreaterThan(0);
304
+ expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
305
+ expect(statusResponse.status).toBe("scraping");
306
+ expect(statusResponse.next).toContain("/v1/crawl/");
307
+ }
244
308
  statusResponse = await app.checkCrawlStatus(response.id) as CrawlStatusResponse;
309
+ expect(statusResponse.success).toBe(true);
245
310
  checks++;
246
311
  }
247
312
 
248
313
  expect(statusResponse).not.toBeNull();
249
314
  expect(statusResponse).toHaveProperty("total");
250
- expect(statusResponse.total).toBeGreaterThan(0);
251
- expect(statusResponse).toHaveProperty("creditsUsed");
252
- expect(statusResponse.creditsUsed).toBeGreaterThan(0);
253
- expect(statusResponse).toHaveProperty("expiresAt");
254
- expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
255
- expect(statusResponse).toHaveProperty("status");
256
- expect(statusResponse.status).toBe("completed");
257
- expect(statusResponse.data?.length).toBeGreaterThan(0);
258
- expect(statusResponse.data?.[0]).toHaveProperty("markdown");
259
- expect(statusResponse.data?.[0].markdown?.length).toBeGreaterThan(10);
260
- expect(statusResponse.data?.[0]).not.toHaveProperty('content'); // v0
261
- expect(statusResponse.data?.[0]).toHaveProperty("html");
262
- expect(statusResponse.data?.[0].html).toContain("<div");
263
- expect(statusResponse.data?.[0]).toHaveProperty("rawHtml");
264
- expect(statusResponse.data?.[0].rawHtml).toContain("<div");
265
- expect(statusResponse.data?.[0]).toHaveProperty("screenshot");
266
- expect(statusResponse.data?.[0].screenshot).toContain("https://");
267
- expect(statusResponse.data?.[0]).toHaveProperty("links");
268
- expect(statusResponse.data?.[0].links).not.toBeNull();
269
- expect(statusResponse.data?.[0].links?.length).toBeGreaterThan(0);
270
- expect(statusResponse.data?.[0]).toHaveProperty("metadata");
271
- expect(statusResponse.data?.[0].metadata).toHaveProperty("title");
272
- expect(statusResponse.data?.[0].metadata).toHaveProperty("description");
273
- expect(statusResponse.data?.[0].metadata).toHaveProperty("language");
274
- expect(statusResponse.data?.[0].metadata).toHaveProperty("sourceURL");
275
- expect(statusResponse.data?.[0].metadata).toHaveProperty("statusCode");
276
- expect(statusResponse.data?.[0].metadata).not.toHaveProperty("error");
315
+ expect(statusResponse.success).toBe(true);
316
+ if (statusResponse.success === true) {
317
+ expect(statusResponse.total).toBeGreaterThan(0);
318
+ expect(statusResponse).toHaveProperty("creditsUsed");
319
+ expect(statusResponse.creditsUsed).toBeGreaterThan(0);
320
+ expect(statusResponse).toHaveProperty("expiresAt");
321
+ expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
322
+ expect(statusResponse).toHaveProperty("status");
323
+ expect(statusResponse.status).toBe("completed");
324
+ expect(statusResponse.data.length).toBeGreaterThan(0);
325
+ expect(statusResponse.data[0]).not.toBeNull();
326
+ expect(statusResponse.data[0]).not.toBeUndefined();
327
+ if (statusResponse.data[0]) {
328
+ expect(statusResponse.data[0]).toHaveProperty("markdown");
329
+ expect(statusResponse.data[0].markdown?.length).toBeGreaterThan(10);
330
+ expect(statusResponse.data[0]).not.toHaveProperty('content'); // v0
331
+ expect(statusResponse.data[0]).toHaveProperty("html");
332
+ expect(statusResponse.data[0].html).toContain("<div");
333
+ expect(statusResponse.data[0]).toHaveProperty("rawHtml");
334
+ expect(statusResponse.data[0].rawHtml).toContain("<div");
335
+ expect(statusResponse.data[0]).toHaveProperty("screenshot");
336
+ expect(statusResponse.data[0].screenshot).toContain("https://");
337
+ expect(statusResponse.data[0]).toHaveProperty("links");
338
+ expect(statusResponse.data[0].links).not.toBeNull();
339
+ expect(statusResponse.data[0].links?.length).toBeGreaterThan(0);
340
+ expect(statusResponse.data[0]).toHaveProperty("metadata");
341
+ expect(statusResponse.data[0].metadata).toHaveProperty("title");
342
+ expect(statusResponse.data[0].metadata).toHaveProperty("description");
343
+ expect(statusResponse.data[0].metadata).toHaveProperty("language");
344
+ expect(statusResponse.data[0].metadata).toHaveProperty("sourceURL");
345
+ expect(statusResponse.data[0].metadata).toHaveProperty("statusCode");
346
+ expect(statusResponse.data[0].metadata).not.toHaveProperty("error");
347
+ }
348
+ }
277
349
  }, 60000); // 60 seconds timeout
278
350
 
279
351
  test.concurrent('should throw error for invalid API key on map', async () => {
280
- const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
281
- await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 401");
352
+ if (API_URL.includes('api.firecrawl.dev')) {
353
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
354
+ await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
355
+ } else {
356
+ const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
357
+ await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
358
+ }
282
359
  });
283
360
 
284
361
  test.concurrent('should throw error for blocklisted URL on map', async () => {
@@ -295,8 +372,7 @@ describe('FirecrawlApp E2E Tests', () => {
295
372
  }, 30000); // 30 seconds timeout
296
373
 
297
374
  test.concurrent('should return successful response for valid map', async () => {
298
- const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
299
- const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
375
+ const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
300
376
  expect(response).not.toBeNull();
301
377
 
302
378
  expect(response.links?.length).toBeGreaterThan(0);
@@ -305,8 +381,45 @@ describe('FirecrawlApp E2E Tests', () => {
305
381
  expect(filteredLinks?.length).toBeGreaterThan(0);
306
382
  }, 30000); // 30 seconds timeout
307
383
 
308
- test('should throw NotImplementedError for search on v1', async () => {
384
+
385
+
386
+ test('should search with string query', async () => {
387
+ const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
388
+ const response = await app.search("firecrawl");
389
+ expect(response.success).toBe(true);
390
+ console.log(response.data);
391
+ expect(response.data?.length).toBeGreaterThan(0);
392
+ expect(response.data?.[0]?.markdown).toBeDefined();
393
+ expect(response.data?.[0]?.metadata).toBeDefined();
394
+ expect(response.data?.[0]?.metadata?.title).toBeDefined();
395
+ expect(response.data?.[0]?.metadata?.description).toBeDefined();
396
+ });
397
+
398
+ test('should search with params object', async () => {
309
399
  const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY });
310
- await expect(app.search("test query")).rejects.toThrow("Search is not supported in v1");
400
+ const response = await app.search("firecrawl", {
401
+ limit: 3,
402
+ lang: 'en',
403
+ country: 'us',
404
+ scrapeOptions: {
405
+ formats: ['markdown', 'html', 'links'],
406
+ onlyMainContent: true
407
+ }
408
+ });
409
+ expect(response.success).toBe(true);
410
+ expect(response.data.length).toBeLessThanOrEqual(3);
411
+ for (const doc of response.data) {
412
+ expect(doc.markdown).toBeDefined();
413
+ expect(doc.html).toBeDefined();
414
+ expect(doc.links).toBeDefined();
415
+ expect(doc.metadata).toBeDefined();
416
+ expect(doc.metadata?.title).toBeDefined();
417
+ expect(doc.metadata?.description).toBeDefined();
418
+ }
419
+ });
420
+
421
+ test('should handle invalid API key for search', async () => {
422
+ const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: "invalid_api_key" });
423
+ await expect(app.search("test query")).rejects.toThrow("Request failed with status code 404");
311
424
  });
312
425
  });