firecrawl 1.9.7 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +19 -7
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +19 -7
- package/package.json +1 -1
- package/src/__tests__/index.test.ts +18 -9
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +34 -18
- package/src/index.ts +21 -8
package/dist/index.cjs
CHANGED
|
@@ -49,16 +49,20 @@ var FirecrawlError = class extends Error {
|
|
|
49
49
|
var FirecrawlApp = class {
|
|
50
50
|
apiKey;
|
|
51
51
|
apiUrl;
|
|
52
|
+
isCloudService(url) {
|
|
53
|
+
return url.includes("api.firecrawl.dev");
|
|
54
|
+
}
|
|
52
55
|
/**
|
|
53
56
|
* Initializes a new instance of the FirecrawlApp class.
|
|
54
57
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
55
58
|
*/
|
|
56
59
|
constructor({ apiKey = null, apiUrl = null }) {
|
|
57
|
-
|
|
60
|
+
const baseUrl = apiUrl || "https://api.firecrawl.dev";
|
|
61
|
+
if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
|
|
58
62
|
throw new FirecrawlError("No API key provided", 401);
|
|
59
63
|
}
|
|
60
|
-
this.apiKey = apiKey;
|
|
61
|
-
this.apiUrl =
|
|
64
|
+
this.apiKey = apiKey || "";
|
|
65
|
+
this.apiUrl = baseUrl;
|
|
62
66
|
}
|
|
63
67
|
/**
|
|
64
68
|
* Scrapes a URL using the Firecrawl API.
|
|
@@ -641,12 +645,20 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
|
|
|
641
645
|
this.ws.close();
|
|
642
646
|
return;
|
|
643
647
|
}
|
|
644
|
-
|
|
645
|
-
|
|
648
|
+
try {
|
|
649
|
+
const msg = JSON.parse(ev.data);
|
|
650
|
+
messageHandler(msg);
|
|
651
|
+
} catch (error) {
|
|
652
|
+
console.error("Error on message", error);
|
|
653
|
+
}
|
|
646
654
|
}).bind(this);
|
|
647
655
|
this.ws.onclose = ((ev) => {
|
|
648
|
-
|
|
649
|
-
|
|
656
|
+
try {
|
|
657
|
+
const msg = JSON.parse(ev.reason);
|
|
658
|
+
messageHandler(msg);
|
|
659
|
+
} catch (error) {
|
|
660
|
+
console.error("Error on close", error);
|
|
661
|
+
}
|
|
650
662
|
}).bind(this);
|
|
651
663
|
this.ws.onerror = ((_) => {
|
|
652
664
|
this.status = "failed";
|
package/dist/index.d.cts
CHANGED
|
@@ -265,6 +265,7 @@ declare class FirecrawlError extends Error {
|
|
|
265
265
|
declare class FirecrawlApp {
|
|
266
266
|
apiKey: string;
|
|
267
267
|
apiUrl: string;
|
|
268
|
+
private isCloudService;
|
|
268
269
|
/**
|
|
269
270
|
* Initializes a new instance of the FirecrawlApp class.
|
|
270
271
|
* @param config - Configuration options for the FirecrawlApp instance.
|
package/dist/index.d.ts
CHANGED
|
@@ -265,6 +265,7 @@ declare class FirecrawlError extends Error {
|
|
|
265
265
|
declare class FirecrawlApp {
|
|
266
266
|
apiKey: string;
|
|
267
267
|
apiUrl: string;
|
|
268
|
+
private isCloudService;
|
|
268
269
|
/**
|
|
269
270
|
* Initializes a new instance of the FirecrawlApp class.
|
|
270
271
|
* @param config - Configuration options for the FirecrawlApp instance.
|
package/dist/index.js
CHANGED
|
@@ -13,16 +13,20 @@ var FirecrawlError = class extends Error {
|
|
|
13
13
|
var FirecrawlApp = class {
|
|
14
14
|
apiKey;
|
|
15
15
|
apiUrl;
|
|
16
|
+
isCloudService(url) {
|
|
17
|
+
return url.includes("api.firecrawl.dev");
|
|
18
|
+
}
|
|
16
19
|
/**
|
|
17
20
|
* Initializes a new instance of the FirecrawlApp class.
|
|
18
21
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
19
22
|
*/
|
|
20
23
|
constructor({ apiKey = null, apiUrl = null }) {
|
|
21
|
-
|
|
24
|
+
const baseUrl = apiUrl || "https://api.firecrawl.dev";
|
|
25
|
+
if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
|
|
22
26
|
throw new FirecrawlError("No API key provided", 401);
|
|
23
27
|
}
|
|
24
|
-
this.apiKey = apiKey;
|
|
25
|
-
this.apiUrl =
|
|
28
|
+
this.apiKey = apiKey || "";
|
|
29
|
+
this.apiUrl = baseUrl;
|
|
26
30
|
}
|
|
27
31
|
/**
|
|
28
32
|
* Scrapes a URL using the Firecrawl API.
|
|
@@ -605,12 +609,20 @@ var CrawlWatcher = class extends TypedEventTarget {
|
|
|
605
609
|
this.ws.close();
|
|
606
610
|
return;
|
|
607
611
|
}
|
|
608
|
-
|
|
609
|
-
|
|
612
|
+
try {
|
|
613
|
+
const msg = JSON.parse(ev.data);
|
|
614
|
+
messageHandler(msg);
|
|
615
|
+
} catch (error) {
|
|
616
|
+
console.error("Error on message", error);
|
|
617
|
+
}
|
|
610
618
|
}).bind(this);
|
|
611
619
|
this.ws.onclose = ((ev) => {
|
|
612
|
-
|
|
613
|
-
|
|
620
|
+
try {
|
|
621
|
+
const msg = JSON.parse(ev.reason);
|
|
622
|
+
messageHandler(msg);
|
|
623
|
+
} catch (error) {
|
|
624
|
+
console.error("Error on close", error);
|
|
625
|
+
}
|
|
614
626
|
}).bind(this);
|
|
615
627
|
this.ws.onerror = ((_) => {
|
|
616
628
|
this.status = "failed";
|
package/package.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { describe,
|
|
2
|
-
import axios from 'axios';
|
|
3
|
-
import FirecrawlApp from '../index';
|
|
1
|
+
import { describe, expect, jest, test } from '@jest/globals';
|
|
4
2
|
|
|
5
|
-
import
|
|
3
|
+
import FirecrawlApp from '../index';
|
|
4
|
+
import axios from 'axios';
|
|
6
5
|
import { join } from 'path';
|
|
6
|
+
import { readFile } from 'fs/promises';
|
|
7
7
|
|
|
8
8
|
// Mock jest and set the type
|
|
9
9
|
jest.mock('axios');
|
|
@@ -14,13 +14,22 @@ async function loadFixture(name: string): Promise<string> {
|
|
|
14
14
|
return await readFile(join(__dirname, 'fixtures', `${name}.json`), 'utf-8')
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
+
const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
|
|
18
|
+
|
|
17
19
|
describe('the firecrawl JS SDK', () => {
|
|
18
20
|
|
|
19
|
-
test('Should require an API key
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
test('Should require an API key only for cloud service', async () => {
|
|
22
|
+
if (API_URL.includes('api.firecrawl.dev')) {
|
|
23
|
+
// Should throw for cloud service
|
|
24
|
+
expect(() => {
|
|
25
|
+
new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
|
|
26
|
+
}).toThrow('No API key provided');
|
|
27
|
+
} else {
|
|
28
|
+
// Should not throw for self-hosted
|
|
29
|
+
expect(() => {
|
|
30
|
+
new FirecrawlApp({ apiKey: undefined, apiUrl: API_URL });
|
|
31
|
+
}).not.toThrow();
|
|
32
|
+
}
|
|
24
33
|
});
|
|
25
34
|
|
|
26
35
|
test('Should return scraped data from a /scrape API call', async () => {
|
|
@@ -9,15 +9,28 @@ const TEST_API_KEY = process.env.TEST_API_KEY;
|
|
|
9
9
|
const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
|
|
10
10
|
|
|
11
11
|
describe('FirecrawlApp E2E Tests', () => {
|
|
12
|
-
test.concurrent('should throw error for no API key', async () => {
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
test.concurrent('should throw error for no API key only for cloud service', async () => {
|
|
13
|
+
if (API_URL.includes('api.firecrawl.dev')) {
|
|
14
|
+
// Should throw for cloud service
|
|
15
|
+
expect(() => {
|
|
16
|
+
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
|
17
|
+
}).toThrow("No API key provided");
|
|
18
|
+
} else {
|
|
19
|
+
// Should not throw for self-hosted
|
|
20
|
+
expect(() => {
|
|
21
|
+
new FirecrawlApp({ apiKey: null, apiUrl: API_URL });
|
|
22
|
+
}).not.toThrow();
|
|
23
|
+
}
|
|
16
24
|
});
|
|
17
25
|
|
|
18
26
|
test.concurrent('should throw error for invalid API key on scrape', async () => {
|
|
19
|
-
|
|
20
|
-
|
|
27
|
+
if (API_URL.includes('api.firecrawl.dev')) {
|
|
28
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
29
|
+
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).rejects.toThrow("Unexpected error occurred while trying to scrape URL. Status code: 404");
|
|
30
|
+
} else {
|
|
31
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
32
|
+
await expect(invalidApp.scrapeUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
|
|
33
|
+
}
|
|
21
34
|
});
|
|
22
35
|
|
|
23
36
|
test.concurrent('should throw error for blocklisted URL on scrape', async () => {
|
|
@@ -155,14 +168,13 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
155
168
|
}, 30000); // 30 seconds timeout
|
|
156
169
|
|
|
157
170
|
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
await expect(app.crawlUrl(blocklistedUrl)).rejects.toThrow("URL is blocked. Firecrawl currently does not support social media scraping due to policy restrictions.");
|
|
171
|
+
if (API_URL.includes('api.firecrawl.dev')) {
|
|
172
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
173
|
+
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
|
|
174
|
+
} else {
|
|
175
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
176
|
+
await expect(invalidApp.crawlUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
|
|
177
|
+
}
|
|
166
178
|
});
|
|
167
179
|
|
|
168
180
|
test.concurrent('should return successful response for crawl and wait for completion', async () => {
|
|
@@ -337,8 +349,13 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
337
349
|
}, 60000); // 60 seconds timeout
|
|
338
350
|
|
|
339
351
|
test.concurrent('should throw error for invalid API key on map', async () => {
|
|
340
|
-
|
|
341
|
-
|
|
352
|
+
if (API_URL.includes('api.firecrawl.dev')) {
|
|
353
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
354
|
+
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).rejects.toThrow("Request failed with status code 404");
|
|
355
|
+
} else {
|
|
356
|
+
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
|
357
|
+
await expect(invalidApp.mapUrl('https://roastmywebsite.ai')).resolves.not.toThrow();
|
|
358
|
+
}
|
|
342
359
|
});
|
|
343
360
|
|
|
344
361
|
test.concurrent('should throw error for blocklisted URL on map', async () => {
|
|
@@ -355,8 +372,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
355
372
|
}, 30000); // 30 seconds timeout
|
|
356
373
|
|
|
357
374
|
test.concurrent('should return successful response for valid map', async () => {
|
|
358
|
-
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
359
|
-
const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
|
|
375
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse;
|
|
360
376
|
expect(response).not.toBeNull();
|
|
361
377
|
|
|
362
378
|
expect(response.links?.length).toBeGreaterThan(0);
|
package/src/index.ts
CHANGED
|
@@ -290,17 +290,23 @@ export default class FirecrawlApp {
|
|
|
290
290
|
public apiKey: string;
|
|
291
291
|
public apiUrl: string;
|
|
292
292
|
|
|
293
|
+
private isCloudService(url: string): boolean {
|
|
294
|
+
return url.includes('api.firecrawl.dev');
|
|
295
|
+
}
|
|
296
|
+
|
|
293
297
|
/**
|
|
294
298
|
* Initializes a new instance of the FirecrawlApp class.
|
|
295
299
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
296
300
|
*/
|
|
297
301
|
constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
|
|
298
|
-
|
|
302
|
+
const baseUrl = apiUrl || "https://api.firecrawl.dev";
|
|
303
|
+
|
|
304
|
+
if (this.isCloudService(baseUrl) && typeof apiKey !== "string") {
|
|
299
305
|
throw new FirecrawlError("No API key provided", 401);
|
|
300
306
|
}
|
|
301
307
|
|
|
302
|
-
this.apiKey = apiKey;
|
|
303
|
-
this.apiUrl =
|
|
308
|
+
this.apiKey = apiKey || '';
|
|
309
|
+
this.apiUrl = baseUrl;
|
|
304
310
|
}
|
|
305
311
|
|
|
306
312
|
/**
|
|
@@ -1009,14 +1015,21 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
1009
1015
|
this.ws.close();
|
|
1010
1016
|
return;
|
|
1011
1017
|
}
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1018
|
+
try {
|
|
1019
|
+
const msg = JSON.parse(ev.data) as Message;
|
|
1020
|
+
messageHandler(msg);
|
|
1021
|
+
} catch (error) {
|
|
1022
|
+
console.error("Error on message", error);
|
|
1023
|
+
}
|
|
1015
1024
|
}).bind(this);
|
|
1016
1025
|
|
|
1017
1026
|
this.ws.onclose = ((ev: CloseEvent) => {
|
|
1018
|
-
|
|
1019
|
-
|
|
1027
|
+
try {
|
|
1028
|
+
const msg = JSON.parse(ev.reason) as Message;
|
|
1029
|
+
messageHandler(msg);
|
|
1030
|
+
} catch (error) {
|
|
1031
|
+
console.error("Error on close", error);
|
|
1032
|
+
}
|
|
1020
1033
|
}).bind(this);
|
|
1021
1034
|
|
|
1022
1035
|
this.ws.onerror = ((_: Event) => {
|