firecrawl 1.27.0 → 1.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "1.
|
|
38
|
+
version: "1.29.0",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
package/dist/index.d.cts
CHANGED
|
@@ -120,6 +120,7 @@ interface CrawlScrapeOptions {
|
|
|
120
120
|
proxy?: "basic" | "stealth" | "auto";
|
|
121
121
|
storeInCache?: boolean;
|
|
122
122
|
maxAge?: number;
|
|
123
|
+
parsePDF?: boolean;
|
|
123
124
|
}
|
|
124
125
|
type Action = {
|
|
125
126
|
type: "wait";
|
|
@@ -132,6 +133,7 @@ type Action = {
|
|
|
132
133
|
} | {
|
|
133
134
|
type: "screenshot";
|
|
134
135
|
fullPage?: boolean;
|
|
136
|
+
quality?: number;
|
|
135
137
|
} | {
|
|
136
138
|
type: "write";
|
|
137
139
|
text: string;
|
|
@@ -217,6 +219,7 @@ interface CrawlParams {
|
|
|
217
219
|
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
218
220
|
*/
|
|
219
221
|
delay?: number;
|
|
222
|
+
allowSubdomains?: boolean;
|
|
220
223
|
maxConcurrency?: number;
|
|
221
224
|
}
|
|
222
225
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -120,6 +120,7 @@ interface CrawlScrapeOptions {
|
|
|
120
120
|
proxy?: "basic" | "stealth" | "auto";
|
|
121
121
|
storeInCache?: boolean;
|
|
122
122
|
maxAge?: number;
|
|
123
|
+
parsePDF?: boolean;
|
|
123
124
|
}
|
|
124
125
|
type Action = {
|
|
125
126
|
type: "wait";
|
|
@@ -132,6 +133,7 @@ type Action = {
|
|
|
132
133
|
} | {
|
|
133
134
|
type: "screenshot";
|
|
134
135
|
fullPage?: boolean;
|
|
136
|
+
quality?: number;
|
|
135
137
|
} | {
|
|
136
138
|
type: "write";
|
|
137
139
|
text: string;
|
|
@@ -217,6 +219,7 @@ interface CrawlParams {
|
|
|
217
219
|
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
218
220
|
*/
|
|
219
221
|
delay?: number;
|
|
222
|
+
allowSubdomains?: boolean;
|
|
220
223
|
maxConcurrency?: number;
|
|
221
224
|
}
|
|
222
225
|
/**
|
package/dist/index.js
CHANGED
|
@@ -29,7 +29,7 @@ var FirecrawlApp = class {
|
|
|
29
29
|
}
|
|
30
30
|
async getVersion() {
|
|
31
31
|
try {
|
|
32
|
-
const packageJson = await import("./package-
|
|
32
|
+
const packageJson = await import("./package-SROKDQ7E.js");
|
|
33
33
|
return packageJson.default.version;
|
|
34
34
|
} catch (error) {
|
|
35
35
|
console.error("Error getting version:", error);
|
package/package.json
CHANGED
|
@@ -103,6 +103,32 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
103
103
|
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
104
104
|
}, 30000); // 30 seconds timeout
|
|
105
105
|
|
|
106
|
+
test.concurrent('should return successful response for valid scrape with PDF file and parsePDF true', async () => {
|
|
107
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
108
|
+
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf', {
|
|
109
|
+
parsePDF: true
|
|
110
|
+
});
|
|
111
|
+
if (!response.success) {
|
|
112
|
+
throw new Error(response.error);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
expect(response).not.toBeNull();
|
|
116
|
+
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
117
|
+
}, 30000); // 30 seconds timeout
|
|
118
|
+
|
|
119
|
+
test.concurrent('should return successful response for valid scrape with PDF file and parsePDF false', async () => {
|
|
120
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
121
|
+
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf', {
|
|
122
|
+
parsePDF: false
|
|
123
|
+
});
|
|
124
|
+
if (!response.success) {
|
|
125
|
+
throw new Error(response.error);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
expect(response).not.toBeNull();
|
|
129
|
+
expect(response?.markdown).toMatch(/^[A-Za-z0-9+/]+=*$/);
|
|
130
|
+
}, 30000); // 30 seconds timeout
|
|
131
|
+
|
|
106
132
|
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
|
107
133
|
if (API_URL.includes('api.firecrawl.dev')) {
|
|
108
134
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
package/src/index.ts
CHANGED
|
@@ -125,6 +125,7 @@ export interface CrawlScrapeOptions {
|
|
|
125
125
|
proxy?: "basic" | "stealth" | "auto";
|
|
126
126
|
storeInCache?: boolean;
|
|
127
127
|
maxAge?: number;
|
|
128
|
+
parsePDF?: boolean;
|
|
128
129
|
}
|
|
129
130
|
|
|
130
131
|
export type Action = {
|
|
@@ -138,6 +139,7 @@ export type Action = {
|
|
|
138
139
|
} | {
|
|
139
140
|
type: "screenshot",
|
|
140
141
|
fullPage?: boolean,
|
|
142
|
+
quality?: number,
|
|
141
143
|
} | {
|
|
142
144
|
type: "write",
|
|
143
145
|
text: string,
|
|
@@ -227,6 +229,7 @@ export interface CrawlParams {
|
|
|
227
229
|
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
228
230
|
*/
|
|
229
231
|
delay?: number;
|
|
232
|
+
allowSubdomains?: boolean;
|
|
230
233
|
maxConcurrency?: number;
|
|
231
234
|
}
|
|
232
235
|
|