@mendable/firecrawl 1.27.0 → 1.29.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "1.
|
|
38
|
+
version: "1.29.1",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
package/dist/index.d.cts
CHANGED
|
@@ -120,6 +120,7 @@ interface CrawlScrapeOptions {
|
|
|
120
120
|
proxy?: "basic" | "stealth" | "auto";
|
|
121
121
|
storeInCache?: boolean;
|
|
122
122
|
maxAge?: number;
|
|
123
|
+
parsePDF?: boolean;
|
|
123
124
|
}
|
|
124
125
|
type Action = {
|
|
125
126
|
type: "wait";
|
|
@@ -132,6 +133,7 @@ type Action = {
|
|
|
132
133
|
} | {
|
|
133
134
|
type: "screenshot";
|
|
134
135
|
fullPage?: boolean;
|
|
136
|
+
quality?: number;
|
|
135
137
|
} | {
|
|
136
138
|
type: "write";
|
|
137
139
|
text: string;
|
|
@@ -167,6 +169,7 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
167
169
|
};
|
|
168
170
|
actions?: ActionsSchema;
|
|
169
171
|
agent?: AgentOptions;
|
|
172
|
+
zeroDataRetention?: boolean;
|
|
170
173
|
}
|
|
171
174
|
interface ActionsResult {
|
|
172
175
|
screenshots: string[];
|
|
@@ -217,7 +220,9 @@ interface CrawlParams {
|
|
|
217
220
|
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
218
221
|
*/
|
|
219
222
|
delay?: number;
|
|
223
|
+
allowSubdomains?: boolean;
|
|
220
224
|
maxConcurrency?: number;
|
|
225
|
+
zeroDataRetention?: boolean;
|
|
221
226
|
}
|
|
222
227
|
/**
|
|
223
228
|
* Response interface for crawling operations.
|
package/dist/index.d.ts
CHANGED
|
@@ -120,6 +120,7 @@ interface CrawlScrapeOptions {
|
|
|
120
120
|
proxy?: "basic" | "stealth" | "auto";
|
|
121
121
|
storeInCache?: boolean;
|
|
122
122
|
maxAge?: number;
|
|
123
|
+
parsePDF?: boolean;
|
|
123
124
|
}
|
|
124
125
|
type Action = {
|
|
125
126
|
type: "wait";
|
|
@@ -132,6 +133,7 @@ type Action = {
|
|
|
132
133
|
} | {
|
|
133
134
|
type: "screenshot";
|
|
134
135
|
fullPage?: boolean;
|
|
136
|
+
quality?: number;
|
|
135
137
|
} | {
|
|
136
138
|
type: "write";
|
|
137
139
|
text: string;
|
|
@@ -167,6 +169,7 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
167
169
|
};
|
|
168
170
|
actions?: ActionsSchema;
|
|
169
171
|
agent?: AgentOptions;
|
|
172
|
+
zeroDataRetention?: boolean;
|
|
170
173
|
}
|
|
171
174
|
interface ActionsResult {
|
|
172
175
|
screenshots: string[];
|
|
@@ -217,7 +220,9 @@ interface CrawlParams {
|
|
|
217
220
|
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
218
221
|
*/
|
|
219
222
|
delay?: number;
|
|
223
|
+
allowSubdomains?: boolean;
|
|
220
224
|
maxConcurrency?: number;
|
|
225
|
+
zeroDataRetention?: boolean;
|
|
221
226
|
}
|
|
222
227
|
/**
|
|
223
228
|
* Response interface for crawling operations.
|
package/dist/index.js
CHANGED
|
@@ -29,7 +29,7 @@ var FirecrawlApp = class {
|
|
|
29
29
|
}
|
|
30
30
|
async getVersion() {
|
|
31
31
|
try {
|
|
32
|
-
const packageJson = await import("./package-
|
|
32
|
+
const packageJson = await import("./package-TKKTR5R7.js");
|
|
33
33
|
return packageJson.default.version;
|
|
34
34
|
} catch (error) {
|
|
35
35
|
console.error("Error getting version:", error);
|
package/package.json
CHANGED
|
@@ -103,6 +103,32 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
103
103
|
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
104
104
|
}, 30000); // 30 seconds timeout
|
|
105
105
|
|
|
106
|
+
test.concurrent('should return successful response for valid scrape with PDF file and parsePDF true', async () => {
|
|
107
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
108
|
+
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf', {
|
|
109
|
+
parsePDF: true
|
|
110
|
+
});
|
|
111
|
+
if (!response.success) {
|
|
112
|
+
throw new Error(response.error);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
expect(response).not.toBeNull();
|
|
116
|
+
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
117
|
+
}, 30000); // 30 seconds timeout
|
|
118
|
+
|
|
119
|
+
test.concurrent('should return successful response for valid scrape with PDF file and parsePDF false', async () => {
|
|
120
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
121
|
+
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf', {
|
|
122
|
+
parsePDF: false
|
|
123
|
+
});
|
|
124
|
+
if (!response.success) {
|
|
125
|
+
throw new Error(response.error);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
expect(response).not.toBeNull();
|
|
129
|
+
expect(response?.markdown).toMatch(/^[A-Za-z0-9+/]+=*$/);
|
|
130
|
+
}, 30000); // 30 seconds timeout
|
|
131
|
+
|
|
106
132
|
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
|
107
133
|
if (API_URL.includes('api.firecrawl.dev')) {
|
|
108
134
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
package/src/index.ts
CHANGED
|
@@ -125,6 +125,7 @@ export interface CrawlScrapeOptions {
|
|
|
125
125
|
proxy?: "basic" | "stealth" | "auto";
|
|
126
126
|
storeInCache?: boolean;
|
|
127
127
|
maxAge?: number;
|
|
128
|
+
parsePDF?: boolean;
|
|
128
129
|
}
|
|
129
130
|
|
|
130
131
|
export type Action = {
|
|
@@ -138,6 +139,7 @@ export type Action = {
|
|
|
138
139
|
} | {
|
|
139
140
|
type: "screenshot",
|
|
140
141
|
fullPage?: boolean,
|
|
142
|
+
quality?: number,
|
|
141
143
|
} | {
|
|
142
144
|
type: "write",
|
|
143
145
|
text: string,
|
|
@@ -174,6 +176,7 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
|
|
174
176
|
}
|
|
175
177
|
actions?: ActionsSchema;
|
|
176
178
|
agent?: AgentOptions;
|
|
179
|
+
zeroDataRetention?: boolean;
|
|
177
180
|
}
|
|
178
181
|
|
|
179
182
|
export interface ActionsResult {
|
|
@@ -227,7 +230,9 @@ export interface CrawlParams {
|
|
|
227
230
|
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
228
231
|
*/
|
|
229
232
|
delay?: number;
|
|
233
|
+
allowSubdomains?: boolean;
|
|
230
234
|
maxConcurrency?: number;
|
|
235
|
+
zeroDataRetention?: boolean;
|
|
231
236
|
}
|
|
232
237
|
|
|
233
238
|
/**
|