firecrawl 1.4.2 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +10 -2
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +11 -3
- package/package.json +1 -1
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +42 -5
- package/src/index.ts +12 -4
package/dist/index.cjs
CHANGED
|
@@ -271,8 +271,16 @@ var FirecrawlApp = class {
|
|
|
271
271
|
* @param headers - The headers for the request.
|
|
272
272
|
* @returns The response from the GET request.
|
|
273
273
|
*/
|
|
274
|
-
getRequest(url, headers) {
|
|
275
|
-
|
|
274
|
+
async getRequest(url, headers) {
|
|
275
|
+
try {
|
|
276
|
+
return await import_axios.default.get(url, { headers });
|
|
277
|
+
} catch (error) {
|
|
278
|
+
if (error instanceof import_axios.AxiosError && error.response) {
|
|
279
|
+
return error.response;
|
|
280
|
+
} else {
|
|
281
|
+
throw error;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
276
284
|
}
|
|
277
285
|
/**
|
|
278
286
|
* Monitors the status of a crawl job until completion or failure.
|
package/dist/index.d.cts
CHANGED
|
@@ -69,7 +69,7 @@ interface FirecrawlDocument<T> {
|
|
|
69
69
|
* Defines the options and configurations available for scraping web content.
|
|
70
70
|
*/
|
|
71
71
|
interface CrawlScrapeOptions {
|
|
72
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "
|
|
72
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
|
|
73
73
|
headers?: Record<string, string>;
|
|
74
74
|
includeTags?: string[];
|
|
75
75
|
excludeTags?: string[];
|
package/dist/index.d.ts
CHANGED
|
@@ -69,7 +69,7 @@ interface FirecrawlDocument<T> {
|
|
|
69
69
|
* Defines the options and configurations available for scraping web content.
|
|
70
70
|
*/
|
|
71
71
|
interface CrawlScrapeOptions {
|
|
72
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "
|
|
72
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
|
|
73
73
|
headers?: Record<string, string>;
|
|
74
74
|
includeTags?: string[];
|
|
75
75
|
excludeTags?: string[];
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// src/index.ts
|
|
2
|
-
import axios from "axios";
|
|
2
|
+
import axios, { AxiosError } from "axios";
|
|
3
3
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
4
|
import { WebSocket } from "isows";
|
|
5
5
|
import { TypedEventTarget } from "typescript-event-target";
|
|
@@ -236,8 +236,16 @@ var FirecrawlApp = class {
|
|
|
236
236
|
* @param headers - The headers for the request.
|
|
237
237
|
* @returns The response from the GET request.
|
|
238
238
|
*/
|
|
239
|
-
getRequest(url, headers) {
|
|
240
|
-
|
|
239
|
+
async getRequest(url, headers) {
|
|
240
|
+
try {
|
|
241
|
+
return await axios.get(url, { headers });
|
|
242
|
+
} catch (error) {
|
|
243
|
+
if (error instanceof AxiosError && error.response) {
|
|
244
|
+
return error.response;
|
|
245
|
+
} else {
|
|
246
|
+
throw error;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
241
249
|
}
|
|
242
250
|
/**
|
|
243
251
|
* Monitors the status of a crawl job until completion or failure.
|
package/package.json
CHANGED
|
@@ -28,14 +28,22 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
28
28
|
|
|
29
29
|
test.concurrent('should return successful response with valid preview token', async () => {
|
|
30
30
|
const app = new FirecrawlApp({ apiKey: "this_is_just_a_preview_token", apiUrl: API_URL });
|
|
31
|
-
const response = await app.scrapeUrl('https://roastmywebsite.ai')
|
|
31
|
+
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
32
|
+
if (!response.success) {
|
|
33
|
+
throw new Error(response.error);
|
|
34
|
+
}
|
|
35
|
+
|
|
32
36
|
expect(response).not.toBeNull();
|
|
33
37
|
expect(response?.markdown).toContain("_Roast_");
|
|
34
38
|
}, 30000); // 30 seconds timeout
|
|
35
39
|
|
|
36
40
|
test.concurrent('should return successful response for valid scrape', async () => {
|
|
37
41
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
38
|
-
const response = await app.scrapeUrl('https://roastmywebsite.ai')
|
|
42
|
+
const response = await app.scrapeUrl('https://roastmywebsite.ai');
|
|
43
|
+
if (!response.success) {
|
|
44
|
+
throw new Error(response.error);
|
|
45
|
+
}
|
|
46
|
+
|
|
39
47
|
expect(response).not.toBeNull();
|
|
40
48
|
expect(response).not.toHaveProperty('content'); // v0
|
|
41
49
|
expect(response).not.toHaveProperty('html');
|
|
@@ -58,7 +66,11 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
58
66
|
onlyMainContent: true,
|
|
59
67
|
timeout: 30000,
|
|
60
68
|
waitFor: 1000
|
|
61
|
-
})
|
|
69
|
+
});
|
|
70
|
+
if (!response.success) {
|
|
71
|
+
throw new Error(response.error);
|
|
72
|
+
}
|
|
73
|
+
|
|
62
74
|
expect(response).not.toBeNull();
|
|
63
75
|
expect(response).not.toHaveProperty('content'); // v0
|
|
64
76
|
expect(response.markdown).toContain("_Roast_");
|
|
@@ -86,6 +98,7 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
86
98
|
expect(response.metadata).not.toHaveProperty("pageStatusCode");
|
|
87
99
|
expect(response.metadata).toHaveProperty("statusCode");
|
|
88
100
|
expect(response.metadata).not.toHaveProperty("pageError");
|
|
101
|
+
|
|
89
102
|
if (response.metadata !== undefined) {
|
|
90
103
|
expect(response.metadata.error).toBeUndefined();
|
|
91
104
|
expect(response.metadata.title).toBe("Roast My Website");
|
|
@@ -103,16 +116,40 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
103
116
|
}
|
|
104
117
|
}, 30000); // 30 seconds timeout
|
|
105
118
|
|
|
119
|
+
test.concurrent('should return successful response with valid API key and screenshot fullPage', async () => {
|
|
120
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
121
|
+
const response = await app.scrapeUrl(
|
|
122
|
+
'https://roastmywebsite.ai', {
|
|
123
|
+
formats: ['screenshot@fullPage'],
|
|
124
|
+
});
|
|
125
|
+
if (!response.success) {
|
|
126
|
+
throw new Error(response.error);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
expect(response).not.toBeNull();
|
|
130
|
+
expect(response.screenshot).not.toBeUndefined();
|
|
131
|
+
expect(response.screenshot).not.toBeNull();
|
|
132
|
+
expect(response.screenshot).toContain("https://");
|
|
133
|
+
}, 30000); // 30 seconds timeout
|
|
134
|
+
|
|
106
135
|
test.concurrent('should return successful response for valid scrape with PDF file', async () => {
|
|
107
136
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
108
|
-
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf')
|
|
137
|
+
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf');
|
|
138
|
+
if (!response.success) {
|
|
139
|
+
throw new Error(response.error);
|
|
140
|
+
}
|
|
141
|
+
|
|
109
142
|
expect(response).not.toBeNull();
|
|
110
143
|
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
111
144
|
}, 30000); // 30 seconds timeout
|
|
112
145
|
|
|
113
146
|
test.concurrent('should return successful response for valid scrape with PDF file without explicit extension', async () => {
|
|
114
147
|
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
115
|
-
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001')
|
|
148
|
+
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001');
|
|
149
|
+
if (!response.success) {
|
|
150
|
+
throw new Error(response.error);
|
|
151
|
+
}
|
|
152
|
+
|
|
116
153
|
expect(response).not.toBeNull();
|
|
117
154
|
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
118
155
|
}, 30000); // 30 seconds timeout
|
package/src/index.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import axios, { type AxiosResponse, type AxiosRequestHeaders } from "axios";
|
|
1
|
+
import axios, { type AxiosResponse, type AxiosRequestHeaders, AxiosError } from "axios";
|
|
2
2
|
import type * as zt from "zod";
|
|
3
3
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
4
|
import { WebSocket } from "isows";
|
|
@@ -74,7 +74,7 @@ export interface FirecrawlDocument<T> {
|
|
|
74
74
|
* Defines the options and configurations available for scraping web content.
|
|
75
75
|
*/
|
|
76
76
|
export interface CrawlScrapeOptions {
|
|
77
|
-
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "
|
|
77
|
+
formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
|
|
78
78
|
headers?: Record<string, string>;
|
|
79
79
|
includeTags?: string[];
|
|
80
80
|
excludeTags?: string[];
|
|
@@ -452,11 +452,19 @@ export default class FirecrawlApp {
|
|
|
452
452
|
* @param headers - The headers for the request.
|
|
453
453
|
* @returns The response from the GET request.
|
|
454
454
|
*/
|
|
455
|
-
getRequest(
|
|
455
|
+
async getRequest(
|
|
456
456
|
url: string,
|
|
457
457
|
headers: AxiosRequestHeaders
|
|
458
458
|
): Promise<AxiosResponse> {
|
|
459
|
-
|
|
459
|
+
try {
|
|
460
|
+
return await axios.get(url, { headers });
|
|
461
|
+
} catch (error) {
|
|
462
|
+
if (error instanceof AxiosError && error.response) {
|
|
463
|
+
return error.response as AxiosResponse;
|
|
464
|
+
} else {
|
|
465
|
+
throw error;
|
|
466
|
+
}
|
|
467
|
+
}
|
|
460
468
|
}
|
|
461
469
|
|
|
462
470
|
/**
|