@mendable/firecrawl 1.25.6 → 1.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "1.
|
|
38
|
+
version: "1.29.0",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -109,7 +109,7 @@ __export(src_exports, {
|
|
|
109
109
|
});
|
|
110
110
|
module.exports = __toCommonJS(src_exports);
|
|
111
111
|
var import_axios = __toESM(require("axios"), 1);
|
|
112
|
-
var zt =
|
|
112
|
+
var zt = require("zod");
|
|
113
113
|
var import_zod_to_json_schema = require("zod-to-json-schema");
|
|
114
114
|
|
|
115
115
|
// node_modules/typescript-event-target/dist/index.mjs
|
|
@@ -713,10 +713,12 @@ var FirecrawlApp = class {
|
|
|
713
713
|
try {
|
|
714
714
|
if (!params?.schema) {
|
|
715
715
|
jsonSchema = void 0;
|
|
716
|
-
} else if (typeof params.schema === "object" && params.schema !== null && Object.getPrototypeOf(params.schema)?.constructor?.name?.startsWith("Zod")) {
|
|
717
|
-
jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema);
|
|
718
716
|
} else {
|
|
719
|
-
|
|
717
|
+
try {
|
|
718
|
+
jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema);
|
|
719
|
+
} catch (_) {
|
|
720
|
+
jsonSchema = params.schema;
|
|
721
|
+
}
|
|
720
722
|
}
|
|
721
723
|
} catch (error) {
|
|
722
724
|
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
@@ -773,10 +775,14 @@ var FirecrawlApp = class {
|
|
|
773
775
|
let jsonData = { urls, ...params };
|
|
774
776
|
let jsonSchema;
|
|
775
777
|
try {
|
|
776
|
-
if (params?.schema
|
|
777
|
-
jsonSchema =
|
|
778
|
+
if (!params?.schema) {
|
|
779
|
+
jsonSchema = void 0;
|
|
778
780
|
} else {
|
|
779
|
-
|
|
781
|
+
try {
|
|
782
|
+
jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema);
|
|
783
|
+
} catch (_) {
|
|
784
|
+
jsonSchema = params.schema;
|
|
785
|
+
}
|
|
780
786
|
}
|
|
781
787
|
} catch (error) {
|
|
782
788
|
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
package/dist/index.d.cts
CHANGED
|
@@ -120,6 +120,7 @@ interface CrawlScrapeOptions {
|
|
|
120
120
|
proxy?: "basic" | "stealth" | "auto";
|
|
121
121
|
storeInCache?: boolean;
|
|
122
122
|
maxAge?: number;
|
|
123
|
+
parsePDF?: boolean;
|
|
123
124
|
}
|
|
124
125
|
type Action = {
|
|
125
126
|
type: "wait";
|
|
@@ -132,6 +133,7 @@ type Action = {
|
|
|
132
133
|
} | {
|
|
133
134
|
type: "screenshot";
|
|
134
135
|
fullPage?: boolean;
|
|
136
|
+
quality?: number;
|
|
135
137
|
} | {
|
|
136
138
|
type: "write";
|
|
137
139
|
text: string;
|
|
@@ -199,6 +201,7 @@ interface CrawlParams {
|
|
|
199
201
|
maxDiscoveryDepth?: number;
|
|
200
202
|
limit?: number;
|
|
201
203
|
allowBackwardLinks?: boolean;
|
|
204
|
+
crawlEntireDomain?: boolean;
|
|
202
205
|
allowExternalLinks?: boolean;
|
|
203
206
|
ignoreSitemap?: boolean;
|
|
204
207
|
scrapeOptions?: CrawlScrapeOptions;
|
|
@@ -216,6 +219,7 @@ interface CrawlParams {
|
|
|
216
219
|
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
217
220
|
*/
|
|
218
221
|
delay?: number;
|
|
222
|
+
allowSubdomains?: boolean;
|
|
219
223
|
maxConcurrency?: number;
|
|
220
224
|
}
|
|
221
225
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -120,6 +120,7 @@ interface CrawlScrapeOptions {
|
|
|
120
120
|
proxy?: "basic" | "stealth" | "auto";
|
|
121
121
|
storeInCache?: boolean;
|
|
122
122
|
maxAge?: number;
|
|
123
|
+
parsePDF?: boolean;
|
|
123
124
|
}
|
|
124
125
|
type Action = {
|
|
125
126
|
type: "wait";
|
|
@@ -132,6 +133,7 @@ type Action = {
|
|
|
132
133
|
} | {
|
|
133
134
|
type: "screenshot";
|
|
134
135
|
fullPage?: boolean;
|
|
136
|
+
quality?: number;
|
|
135
137
|
} | {
|
|
136
138
|
type: "write";
|
|
137
139
|
text: string;
|
|
@@ -199,6 +201,7 @@ interface CrawlParams {
|
|
|
199
201
|
maxDiscoveryDepth?: number;
|
|
200
202
|
limit?: number;
|
|
201
203
|
allowBackwardLinks?: boolean;
|
|
204
|
+
crawlEntireDomain?: boolean;
|
|
202
205
|
allowExternalLinks?: boolean;
|
|
203
206
|
ignoreSitemap?: boolean;
|
|
204
207
|
scrapeOptions?: CrawlScrapeOptions;
|
|
@@ -216,6 +219,7 @@ interface CrawlParams {
|
|
|
216
219
|
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
217
220
|
*/
|
|
218
221
|
delay?: number;
|
|
222
|
+
allowSubdomains?: boolean;
|
|
219
223
|
maxConcurrency?: number;
|
|
220
224
|
}
|
|
221
225
|
/**
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// src/index.ts
|
|
2
2
|
import axios, { AxiosError } from "axios";
|
|
3
|
-
import
|
|
3
|
+
import "zod";
|
|
4
4
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
5
5
|
|
|
6
6
|
// node_modules/typescript-event-target/dist/index.mjs
|
|
@@ -29,7 +29,7 @@ var FirecrawlApp = class {
|
|
|
29
29
|
}
|
|
30
30
|
async getVersion() {
|
|
31
31
|
try {
|
|
32
|
-
const packageJson = await import("./package-
|
|
32
|
+
const packageJson = await import("./package-SROKDQ7E.js");
|
|
33
33
|
return packageJson.default.version;
|
|
34
34
|
} catch (error) {
|
|
35
35
|
console.error("Error getting version:", error);
|
|
@@ -604,10 +604,12 @@ var FirecrawlApp = class {
|
|
|
604
604
|
try {
|
|
605
605
|
if (!params?.schema) {
|
|
606
606
|
jsonSchema = void 0;
|
|
607
|
-
} else if (typeof params.schema === "object" && params.schema !== null && Object.getPrototypeOf(params.schema)?.constructor?.name?.startsWith("Zod")) {
|
|
608
|
-
jsonSchema = zodToJsonSchema(params.schema);
|
|
609
607
|
} else {
|
|
610
|
-
|
|
608
|
+
try {
|
|
609
|
+
jsonSchema = zodToJsonSchema(params.schema);
|
|
610
|
+
} catch (_) {
|
|
611
|
+
jsonSchema = params.schema;
|
|
612
|
+
}
|
|
611
613
|
}
|
|
612
614
|
} catch (error) {
|
|
613
615
|
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
@@ -664,10 +666,14 @@ var FirecrawlApp = class {
|
|
|
664
666
|
let jsonData = { urls, ...params };
|
|
665
667
|
let jsonSchema;
|
|
666
668
|
try {
|
|
667
|
-
if (params?.schema
|
|
668
|
-
jsonSchema =
|
|
669
|
+
if (!params?.schema) {
|
|
670
|
+
jsonSchema = void 0;
|
|
669
671
|
} else {
|
|
670
|
-
|
|
672
|
+
try {
|
|
673
|
+
jsonSchema = zodToJsonSchema(params.schema);
|
|
674
|
+
} catch (_) {
|
|
675
|
+
jsonSchema = params.schema;
|
|
676
|
+
}
|
|
671
677
|
}
|
|
672
678
|
} catch (error) {
|
|
673
679
|
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
package/package.json
CHANGED
|
@@ -103,6 +103,32 @@ describe('FirecrawlApp E2E Tests', () => {
|
|
|
103
103
|
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
104
104
|
}, 30000); // 30 seconds timeout
|
|
105
105
|
|
|
106
|
+
test.concurrent('should return successful response for valid scrape with PDF file and parsePDF true', async () => {
|
|
107
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
108
|
+
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf', {
|
|
109
|
+
parsePDF: true
|
|
110
|
+
});
|
|
111
|
+
if (!response.success) {
|
|
112
|
+
throw new Error(response.error);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
expect(response).not.toBeNull();
|
|
116
|
+
expect(response?.markdown).toContain('We present spectrophotometric observations of the Broad Line Radio Galaxy');
|
|
117
|
+
}, 30000); // 30 seconds timeout
|
|
118
|
+
|
|
119
|
+
test.concurrent('should return successful response for valid scrape with PDF file and parsePDF false', async () => {
|
|
120
|
+
const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
|
|
121
|
+
const response = await app.scrapeUrl('https://arxiv.org/pdf/astro-ph/9301001.pdf', {
|
|
122
|
+
parsePDF: false
|
|
123
|
+
});
|
|
124
|
+
if (!response.success) {
|
|
125
|
+
throw new Error(response.error);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
expect(response).not.toBeNull();
|
|
129
|
+
expect(response?.markdown).toMatch(/^[A-Za-z0-9+/]+=*$/);
|
|
130
|
+
}, 30000); // 30 seconds timeout
|
|
131
|
+
|
|
106
132
|
test.concurrent('should throw error for invalid API key on crawl', async () => {
|
|
107
133
|
if (API_URL.includes('api.firecrawl.dev')) {
|
|
108
134
|
const invalidApp = new FirecrawlApp({ apiKey: "invalid_api_key", apiUrl: API_URL });
|
package/src/index.ts
CHANGED
|
@@ -125,6 +125,7 @@ export interface CrawlScrapeOptions {
|
|
|
125
125
|
proxy?: "basic" | "stealth" | "auto";
|
|
126
126
|
storeInCache?: boolean;
|
|
127
127
|
maxAge?: number;
|
|
128
|
+
parsePDF?: boolean;
|
|
128
129
|
}
|
|
129
130
|
|
|
130
131
|
export type Action = {
|
|
@@ -138,6 +139,7 @@ export type Action = {
|
|
|
138
139
|
} | {
|
|
139
140
|
type: "screenshot",
|
|
140
141
|
fullPage?: boolean,
|
|
142
|
+
quality?: number,
|
|
141
143
|
} | {
|
|
142
144
|
type: "write",
|
|
143
145
|
text: string,
|
|
@@ -209,6 +211,7 @@ export interface CrawlParams {
|
|
|
209
211
|
maxDiscoveryDepth?: number;
|
|
210
212
|
limit?: number;
|
|
211
213
|
allowBackwardLinks?: boolean;
|
|
214
|
+
crawlEntireDomain?: boolean;
|
|
212
215
|
allowExternalLinks?: boolean;
|
|
213
216
|
ignoreSitemap?: boolean;
|
|
214
217
|
scrapeOptions?: CrawlScrapeOptions;
|
|
@@ -226,6 +229,7 @@ export interface CrawlParams {
|
|
|
226
229
|
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
227
230
|
*/
|
|
228
231
|
delay?: number;
|
|
232
|
+
allowSubdomains?: boolean;
|
|
229
233
|
maxConcurrency?: number;
|
|
230
234
|
}
|
|
231
235
|
|
|
@@ -1243,10 +1247,12 @@ export default class FirecrawlApp {
|
|
|
1243
1247
|
try {
|
|
1244
1248
|
if (!params?.schema) {
|
|
1245
1249
|
jsonSchema = undefined;
|
|
1246
|
-
} else if (typeof params.schema === "object" && params.schema !== null && Object.getPrototypeOf(params.schema)?.constructor?.name?.startsWith("Zod")) {
|
|
1247
|
-
jsonSchema = zodToJsonSchema(params.schema as zt.ZodType);
|
|
1248
1250
|
} else {
|
|
1249
|
-
|
|
1251
|
+
try {
|
|
1252
|
+
jsonSchema = zodToJsonSchema(params.schema as zt.ZodType);
|
|
1253
|
+
} catch (_) {
|
|
1254
|
+
jsonSchema = params.schema;
|
|
1255
|
+
}
|
|
1250
1256
|
}
|
|
1251
1257
|
} catch (error: any) {
|
|
1252
1258
|
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
@@ -1311,10 +1317,14 @@ export default class FirecrawlApp {
|
|
|
1311
1317
|
let jsonSchema: any;
|
|
1312
1318
|
|
|
1313
1319
|
try {
|
|
1314
|
-
if (params?.schema
|
|
1315
|
-
jsonSchema =
|
|
1320
|
+
if (!params?.schema) {
|
|
1321
|
+
jsonSchema = undefined;
|
|
1316
1322
|
} else {
|
|
1317
|
-
|
|
1323
|
+
try {
|
|
1324
|
+
jsonSchema = zodToJsonSchema(params.schema as zt.ZodType);
|
|
1325
|
+
} catch (_) {
|
|
1326
|
+
jsonSchema = params.schema;
|
|
1327
|
+
}
|
|
1318
1328
|
}
|
|
1319
1329
|
} catch (error: any) {
|
|
1320
1330
|
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|