@mendable/firecrawl 1.24.0 → 1.25.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +107 -12
- package/dist/index.d.cts +16 -1
- package/dist/index.d.ts +16 -1
- package/dist/index.js +34 -12
- package/dist/package-SG22PRGT.js +102 -0
- package/package.json +1 -1
- package/src/index.ts +52 -15
package/dist/index.cjs
CHANGED
|
@@ -5,6 +5,9 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
|
5
5
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
6
|
var __getProtoOf = Object.getPrototypeOf;
|
|
7
7
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __commonJS = (cb, mod) => function __require() {
|
|
9
|
+
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
10
|
+
};
|
|
8
11
|
var __export = (target, all) => {
|
|
9
12
|
for (var name in all)
|
|
10
13
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
@@ -27,6 +30,76 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
27
30
|
));
|
|
28
31
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
32
|
|
|
33
|
+
// package.json
|
|
34
|
+
var require_package = __commonJS({
|
|
35
|
+
"package.json"(exports2, module2) {
|
|
36
|
+
module2.exports = {
|
|
37
|
+
name: "@mendable/firecrawl-js",
|
|
38
|
+
version: "1.25.4",
|
|
39
|
+
description: "JavaScript SDK for Firecrawl API",
|
|
40
|
+
main: "dist/index.js",
|
|
41
|
+
types: "dist/index.d.ts",
|
|
42
|
+
exports: {
|
|
43
|
+
"./package.json": "./package.json",
|
|
44
|
+
".": {
|
|
45
|
+
import: "./dist/index.js",
|
|
46
|
+
default: "./dist/index.cjs"
|
|
47
|
+
}
|
|
48
|
+
},
|
|
49
|
+
type: "module",
|
|
50
|
+
scripts: {
|
|
51
|
+
build: "tsup",
|
|
52
|
+
"build-and-publish": "npm run build && npm publish --access public",
|
|
53
|
+
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
54
|
+
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
55
|
+
},
|
|
56
|
+
repository: {
|
|
57
|
+
type: "git",
|
|
58
|
+
url: "git+https://github.com/mendableai/firecrawl.git"
|
|
59
|
+
},
|
|
60
|
+
author: "Mendable.ai",
|
|
61
|
+
license: "MIT",
|
|
62
|
+
dependencies: {
|
|
63
|
+
"typescript-event-target": "^1.1.1",
|
|
64
|
+
zod: "^3.23.8",
|
|
65
|
+
"zod-to-json-schema": "^3.23.0",
|
|
66
|
+
axios: "^1.6.8"
|
|
67
|
+
},
|
|
68
|
+
bugs: {
|
|
69
|
+
url: "https://github.com/mendableai/firecrawl/issues"
|
|
70
|
+
},
|
|
71
|
+
homepage: "https://github.com/mendableai/firecrawl#readme",
|
|
72
|
+
devDependencies: {
|
|
73
|
+
"@jest/globals": "^29.7.0",
|
|
74
|
+
"@types/axios": "^0.14.0",
|
|
75
|
+
"@types/dotenv": "^8.2.0",
|
|
76
|
+
"@types/jest": "^29.5.14",
|
|
77
|
+
"@types/mocha": "^10.0.6",
|
|
78
|
+
"@types/node": "^20.12.12",
|
|
79
|
+
"@types/uuid": "^9.0.8",
|
|
80
|
+
dotenv: "^16.4.5",
|
|
81
|
+
jest: "^29.7.0",
|
|
82
|
+
"ts-jest": "^29.2.2",
|
|
83
|
+
tsup: "^8.2.4",
|
|
84
|
+
typescript: "^5.4.5",
|
|
85
|
+
uuid: "^9.0.1"
|
|
86
|
+
},
|
|
87
|
+
keywords: [
|
|
88
|
+
"firecrawl",
|
|
89
|
+
"mendable",
|
|
90
|
+
"crawler",
|
|
91
|
+
"web",
|
|
92
|
+
"scraper",
|
|
93
|
+
"api",
|
|
94
|
+
"sdk"
|
|
95
|
+
],
|
|
96
|
+
engines: {
|
|
97
|
+
node: ">=22.0.0"
|
|
98
|
+
}
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
30
103
|
// src/index.ts
|
|
31
104
|
var src_exports = {};
|
|
32
105
|
__export(src_exports, {
|
|
@@ -59,9 +132,22 @@ var FirecrawlError = class extends Error {
|
|
|
59
132
|
var FirecrawlApp = class {
|
|
60
133
|
apiKey;
|
|
61
134
|
apiUrl;
|
|
135
|
+
version = "1.25.1";
|
|
62
136
|
isCloudService(url) {
|
|
63
137
|
return url.includes("api.firecrawl.dev");
|
|
64
138
|
}
|
|
139
|
+
async getVersion() {
|
|
140
|
+
try {
|
|
141
|
+
const packageJson = await Promise.resolve().then(() => __toESM(require_package(), 1));
|
|
142
|
+
return packageJson.default.version;
|
|
143
|
+
} catch (error) {
|
|
144
|
+
console.error("Error getting version:", error);
|
|
145
|
+
return "1.25.1";
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
async init() {
|
|
149
|
+
this.version = await this.getVersion();
|
|
150
|
+
}
|
|
65
151
|
/**
|
|
66
152
|
* Initializes a new instance of the FirecrawlApp class.
|
|
67
153
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -73,6 +159,7 @@ var FirecrawlApp = class {
|
|
|
73
159
|
}
|
|
74
160
|
this.apiKey = apiKey || "";
|
|
75
161
|
this.apiUrl = baseUrl;
|
|
162
|
+
this.init();
|
|
76
163
|
}
|
|
77
164
|
/**
|
|
78
165
|
* Scrapes a URL using the Firecrawl API.
|
|
@@ -85,7 +172,7 @@ var FirecrawlApp = class {
|
|
|
85
172
|
"Content-Type": "application/json",
|
|
86
173
|
Authorization: `Bearer ${this.apiKey}`
|
|
87
174
|
};
|
|
88
|
-
let jsonData = { url, ...params };
|
|
175
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
89
176
|
if (jsonData?.extract?.schema) {
|
|
90
177
|
let schema = jsonData.extract.schema;
|
|
91
178
|
try {
|
|
@@ -159,7 +246,7 @@ var FirecrawlApp = class {
|
|
|
159
246
|
lang: params?.lang ?? "en",
|
|
160
247
|
country: params?.country ?? "us",
|
|
161
248
|
location: params?.location,
|
|
162
|
-
origin:
|
|
249
|
+
origin: `js-sdk@${this.version}`,
|
|
163
250
|
timeout: params?.timeout ?? 6e4,
|
|
164
251
|
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
|
|
165
252
|
};
|
|
@@ -219,7 +306,7 @@ var FirecrawlApp = class {
|
|
|
219
306
|
*/
|
|
220
307
|
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
|
221
308
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
222
|
-
let jsonData = { url, ...params };
|
|
309
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
223
310
|
try {
|
|
224
311
|
const response = await this.postRequest(
|
|
225
312
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -243,7 +330,7 @@ var FirecrawlApp = class {
|
|
|
243
330
|
}
|
|
244
331
|
async asyncCrawlUrl(url, params, idempotencyKey) {
|
|
245
332
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
246
|
-
let jsonData = { url, ...params };
|
|
333
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
247
334
|
try {
|
|
248
335
|
const response = await this.postRequest(
|
|
249
336
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -402,7 +489,7 @@ var FirecrawlApp = class {
|
|
|
402
489
|
*/
|
|
403
490
|
async mapUrl(url, params) {
|
|
404
491
|
const headers = this.prepareHeaders();
|
|
405
|
-
let jsonData = { url, ...params };
|
|
492
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
406
493
|
try {
|
|
407
494
|
const response = await this.postRequest(
|
|
408
495
|
this.apiUrl + `/v1/map`,
|
|
@@ -431,7 +518,7 @@ var FirecrawlApp = class {
|
|
|
431
518
|
*/
|
|
432
519
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
433
520
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
434
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
521
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
435
522
|
if (jsonData?.extract?.schema) {
|
|
436
523
|
let schema = jsonData.extract.schema;
|
|
437
524
|
try {
|
|
@@ -483,7 +570,7 @@ var FirecrawlApp = class {
|
|
|
483
570
|
}
|
|
484
571
|
async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
485
572
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
486
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params
|
|
573
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
487
574
|
try {
|
|
488
575
|
const response = await this.postRequest(
|
|
489
576
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -637,7 +724,7 @@ var FirecrawlApp = class {
|
|
|
637
724
|
try {
|
|
638
725
|
const response = await this.postRequest(
|
|
639
726
|
this.apiUrl + `/v1/extract`,
|
|
640
|
-
{ ...jsonData, schema: jsonSchema, origin:
|
|
727
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
641
728
|
headers
|
|
642
729
|
);
|
|
643
730
|
if (response.status === 200) {
|
|
@@ -697,7 +784,7 @@ var FirecrawlApp = class {
|
|
|
697
784
|
try {
|
|
698
785
|
const response = await this.postRequest(
|
|
699
786
|
this.apiUrl + `/v1/extract`,
|
|
700
|
-
{ ...jsonData, schema: jsonSchema },
|
|
787
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
701
788
|
headers
|
|
702
789
|
);
|
|
703
790
|
if (response.status === 200) {
|
|
@@ -849,6 +936,12 @@ var FirecrawlApp = class {
|
|
|
849
936
|
* @param {string} action - The action being performed when the error occurred.
|
|
850
937
|
*/
|
|
851
938
|
handleError(response, action) {
|
|
939
|
+
if (!response) {
|
|
940
|
+
throw new FirecrawlError(
|
|
941
|
+
`No response received while trying to ${action}. This may be a network error or the server is unreachable.`,
|
|
942
|
+
0
|
|
943
|
+
);
|
|
944
|
+
}
|
|
852
945
|
if ([400, 402, 403, 408, 409, 500].includes(response.status)) {
|
|
853
946
|
const errorMessage = response.data.error || "Unknown error occurred";
|
|
854
947
|
const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
|
|
@@ -930,7 +1023,7 @@ var FirecrawlApp = class {
|
|
|
930
1023
|
*/
|
|
931
1024
|
async asyncDeepResearch(query, params) {
|
|
932
1025
|
const headers = this.prepareHeaders();
|
|
933
|
-
let jsonData = { query, ...params };
|
|
1026
|
+
let jsonData = { query, ...params, origin: `js-sdk@${this.version}` };
|
|
934
1027
|
if (jsonData?.jsonOptions?.schema) {
|
|
935
1028
|
let schema = jsonData.jsonOptions.schema;
|
|
936
1029
|
try {
|
|
@@ -1053,9 +1146,10 @@ var FirecrawlApp = class {
|
|
|
1053
1146
|
async __asyncDeepResearch(topic, params) {
|
|
1054
1147
|
const headers = this.prepareHeaders();
|
|
1055
1148
|
try {
|
|
1149
|
+
let jsonData = { topic, ...params, origin: `js-sdk@${this.version}` };
|
|
1056
1150
|
const response = await this.postRequest(
|
|
1057
1151
|
`${this.apiUrl}/v1/deep-research`,
|
|
1058
|
-
|
|
1152
|
+
jsonData,
|
|
1059
1153
|
headers
|
|
1060
1154
|
);
|
|
1061
1155
|
if (response.status === 200) {
|
|
@@ -1150,10 +1244,11 @@ var FirecrawlApp = class {
|
|
|
1150
1244
|
*/
|
|
1151
1245
|
async asyncGenerateLLMsText(url, params) {
|
|
1152
1246
|
const headers = this.prepareHeaders();
|
|
1247
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
1153
1248
|
try {
|
|
1154
1249
|
const response = await this.postRequest(
|
|
1155
1250
|
`${this.apiUrl}/v1/llmstxt`,
|
|
1156
|
-
|
|
1251
|
+
jsonData,
|
|
1157
1252
|
headers
|
|
1158
1253
|
);
|
|
1159
1254
|
if (response.status === 200) {
|
package/dist/index.d.cts
CHANGED
|
@@ -114,7 +114,8 @@ interface CrawlScrapeOptions {
|
|
|
114
114
|
skipTlsVerification?: boolean;
|
|
115
115
|
removeBase64Images?: boolean;
|
|
116
116
|
blockAds?: boolean;
|
|
117
|
-
proxy?: "basic" | "stealth";
|
|
117
|
+
proxy?: "basic" | "stealth" | "auto";
|
|
118
|
+
storeInCache?: boolean;
|
|
118
119
|
}
|
|
119
120
|
type Action = {
|
|
120
121
|
type: "wait";
|
|
@@ -158,6 +159,7 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
158
159
|
prompt?: string;
|
|
159
160
|
schema?: any;
|
|
160
161
|
modes?: ("json" | "git-diff")[];
|
|
162
|
+
tag?: string | null;
|
|
161
163
|
};
|
|
162
164
|
actions?: ActionsSchema;
|
|
163
165
|
agent?: AgentOptions;
|
|
@@ -205,6 +207,11 @@ interface CrawlParams {
|
|
|
205
207
|
deduplicateSimilarURLs?: boolean;
|
|
206
208
|
ignoreQueryParameters?: boolean;
|
|
207
209
|
regexOnFullURL?: boolean;
|
|
210
|
+
/**
|
|
211
|
+
* Delay in seconds between scrapes. This helps respect website rate limits.
|
|
212
|
+
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
213
|
+
*/
|
|
214
|
+
delay?: number;
|
|
208
215
|
}
|
|
209
216
|
/**
|
|
210
217
|
* Response interface for crawling operations.
|
|
@@ -477,6 +484,11 @@ interface GenerateLLMsTextParams {
|
|
|
477
484
|
* @default false
|
|
478
485
|
*/
|
|
479
486
|
showFullText?: boolean;
|
|
487
|
+
/**
|
|
488
|
+
* Whether to use cached content if available
|
|
489
|
+
* @default true
|
|
490
|
+
*/
|
|
491
|
+
cache?: boolean;
|
|
480
492
|
/**
|
|
481
493
|
* Experimental flag for streaming
|
|
482
494
|
*/
|
|
@@ -509,7 +521,10 @@ interface GenerateLLMsTextStatusResponse {
|
|
|
509
521
|
declare class FirecrawlApp {
|
|
510
522
|
apiKey: string;
|
|
511
523
|
apiUrl: string;
|
|
524
|
+
version: string;
|
|
512
525
|
private isCloudService;
|
|
526
|
+
private getVersion;
|
|
527
|
+
private init;
|
|
513
528
|
/**
|
|
514
529
|
* Initializes a new instance of the FirecrawlApp class.
|
|
515
530
|
* @param config - Configuration options for the FirecrawlApp instance.
|
package/dist/index.d.ts
CHANGED
|
@@ -114,7 +114,8 @@ interface CrawlScrapeOptions {
|
|
|
114
114
|
skipTlsVerification?: boolean;
|
|
115
115
|
removeBase64Images?: boolean;
|
|
116
116
|
blockAds?: boolean;
|
|
117
|
-
proxy?: "basic" | "stealth";
|
|
117
|
+
proxy?: "basic" | "stealth" | "auto";
|
|
118
|
+
storeInCache?: boolean;
|
|
118
119
|
}
|
|
119
120
|
type Action = {
|
|
120
121
|
type: "wait";
|
|
@@ -158,6 +159,7 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
158
159
|
prompt?: string;
|
|
159
160
|
schema?: any;
|
|
160
161
|
modes?: ("json" | "git-diff")[];
|
|
162
|
+
tag?: string | null;
|
|
161
163
|
};
|
|
162
164
|
actions?: ActionsSchema;
|
|
163
165
|
agent?: AgentOptions;
|
|
@@ -205,6 +207,11 @@ interface CrawlParams {
|
|
|
205
207
|
deduplicateSimilarURLs?: boolean;
|
|
206
208
|
ignoreQueryParameters?: boolean;
|
|
207
209
|
regexOnFullURL?: boolean;
|
|
210
|
+
/**
|
|
211
|
+
* Delay in seconds between scrapes. This helps respect website rate limits.
|
|
212
|
+
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
213
|
+
*/
|
|
214
|
+
delay?: number;
|
|
208
215
|
}
|
|
209
216
|
/**
|
|
210
217
|
* Response interface for crawling operations.
|
|
@@ -477,6 +484,11 @@ interface GenerateLLMsTextParams {
|
|
|
477
484
|
* @default false
|
|
478
485
|
*/
|
|
479
486
|
showFullText?: boolean;
|
|
487
|
+
/**
|
|
488
|
+
* Whether to use cached content if available
|
|
489
|
+
* @default true
|
|
490
|
+
*/
|
|
491
|
+
cache?: boolean;
|
|
480
492
|
/**
|
|
481
493
|
* Experimental flag for streaming
|
|
482
494
|
*/
|
|
@@ -509,7 +521,10 @@ interface GenerateLLMsTextStatusResponse {
|
|
|
509
521
|
declare class FirecrawlApp {
|
|
510
522
|
apiKey: string;
|
|
511
523
|
apiUrl: string;
|
|
524
|
+
version: string;
|
|
512
525
|
private isCloudService;
|
|
526
|
+
private getVersion;
|
|
527
|
+
private init;
|
|
513
528
|
/**
|
|
514
529
|
* Initializes a new instance of the FirecrawlApp class.
|
|
515
530
|
* @param config - Configuration options for the FirecrawlApp instance.
|
package/dist/index.js
CHANGED
|
@@ -23,9 +23,22 @@ var FirecrawlError = class extends Error {
|
|
|
23
23
|
var FirecrawlApp = class {
|
|
24
24
|
apiKey;
|
|
25
25
|
apiUrl;
|
|
26
|
+
version = "1.25.1";
|
|
26
27
|
isCloudService(url) {
|
|
27
28
|
return url.includes("api.firecrawl.dev");
|
|
28
29
|
}
|
|
30
|
+
async getVersion() {
|
|
31
|
+
try {
|
|
32
|
+
const packageJson = await import("./package-SG22PRGT.js");
|
|
33
|
+
return packageJson.default.version;
|
|
34
|
+
} catch (error) {
|
|
35
|
+
console.error("Error getting version:", error);
|
|
36
|
+
return "1.25.1";
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
async init() {
|
|
40
|
+
this.version = await this.getVersion();
|
|
41
|
+
}
|
|
29
42
|
/**
|
|
30
43
|
* Initializes a new instance of the FirecrawlApp class.
|
|
31
44
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -37,6 +50,7 @@ var FirecrawlApp = class {
|
|
|
37
50
|
}
|
|
38
51
|
this.apiKey = apiKey || "";
|
|
39
52
|
this.apiUrl = baseUrl;
|
|
53
|
+
this.init();
|
|
40
54
|
}
|
|
41
55
|
/**
|
|
42
56
|
* Scrapes a URL using the Firecrawl API.
|
|
@@ -49,7 +63,7 @@ var FirecrawlApp = class {
|
|
|
49
63
|
"Content-Type": "application/json",
|
|
50
64
|
Authorization: `Bearer ${this.apiKey}`
|
|
51
65
|
};
|
|
52
|
-
let jsonData = { url, ...params };
|
|
66
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
53
67
|
if (jsonData?.extract?.schema) {
|
|
54
68
|
let schema = jsonData.extract.schema;
|
|
55
69
|
try {
|
|
@@ -123,7 +137,7 @@ var FirecrawlApp = class {
|
|
|
123
137
|
lang: params?.lang ?? "en",
|
|
124
138
|
country: params?.country ?? "us",
|
|
125
139
|
location: params?.location,
|
|
126
|
-
origin:
|
|
140
|
+
origin: `js-sdk@${this.version}`,
|
|
127
141
|
timeout: params?.timeout ?? 6e4,
|
|
128
142
|
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
|
|
129
143
|
};
|
|
@@ -183,7 +197,7 @@ var FirecrawlApp = class {
|
|
|
183
197
|
*/
|
|
184
198
|
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
|
185
199
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
186
|
-
let jsonData = { url, ...params };
|
|
200
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
187
201
|
try {
|
|
188
202
|
const response = await this.postRequest(
|
|
189
203
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -207,7 +221,7 @@ var FirecrawlApp = class {
|
|
|
207
221
|
}
|
|
208
222
|
async asyncCrawlUrl(url, params, idempotencyKey) {
|
|
209
223
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
210
|
-
let jsonData = { url, ...params };
|
|
224
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
211
225
|
try {
|
|
212
226
|
const response = await this.postRequest(
|
|
213
227
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -366,7 +380,7 @@ var FirecrawlApp = class {
|
|
|
366
380
|
*/
|
|
367
381
|
async mapUrl(url, params) {
|
|
368
382
|
const headers = this.prepareHeaders();
|
|
369
|
-
let jsonData = { url, ...params };
|
|
383
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
370
384
|
try {
|
|
371
385
|
const response = await this.postRequest(
|
|
372
386
|
this.apiUrl + `/v1/map`,
|
|
@@ -395,7 +409,7 @@ var FirecrawlApp = class {
|
|
|
395
409
|
*/
|
|
396
410
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
397
411
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
398
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
412
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
399
413
|
if (jsonData?.extract?.schema) {
|
|
400
414
|
let schema = jsonData.extract.schema;
|
|
401
415
|
try {
|
|
@@ -447,7 +461,7 @@ var FirecrawlApp = class {
|
|
|
447
461
|
}
|
|
448
462
|
async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
449
463
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
450
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params
|
|
464
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
451
465
|
try {
|
|
452
466
|
const response = await this.postRequest(
|
|
453
467
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -601,7 +615,7 @@ var FirecrawlApp = class {
|
|
|
601
615
|
try {
|
|
602
616
|
const response = await this.postRequest(
|
|
603
617
|
this.apiUrl + `/v1/extract`,
|
|
604
|
-
{ ...jsonData, schema: jsonSchema, origin:
|
|
618
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
605
619
|
headers
|
|
606
620
|
);
|
|
607
621
|
if (response.status === 200) {
|
|
@@ -661,7 +675,7 @@ var FirecrawlApp = class {
|
|
|
661
675
|
try {
|
|
662
676
|
const response = await this.postRequest(
|
|
663
677
|
this.apiUrl + `/v1/extract`,
|
|
664
|
-
{ ...jsonData, schema: jsonSchema },
|
|
678
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
665
679
|
headers
|
|
666
680
|
);
|
|
667
681
|
if (response.status === 200) {
|
|
@@ -813,6 +827,12 @@ var FirecrawlApp = class {
|
|
|
813
827
|
* @param {string} action - The action being performed when the error occurred.
|
|
814
828
|
*/
|
|
815
829
|
handleError(response, action) {
|
|
830
|
+
if (!response) {
|
|
831
|
+
throw new FirecrawlError(
|
|
832
|
+
`No response received while trying to ${action}. This may be a network error or the server is unreachable.`,
|
|
833
|
+
0
|
|
834
|
+
);
|
|
835
|
+
}
|
|
816
836
|
if ([400, 402, 403, 408, 409, 500].includes(response.status)) {
|
|
817
837
|
const errorMessage = response.data.error || "Unknown error occurred";
|
|
818
838
|
const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
|
|
@@ -894,7 +914,7 @@ var FirecrawlApp = class {
|
|
|
894
914
|
*/
|
|
895
915
|
async asyncDeepResearch(query, params) {
|
|
896
916
|
const headers = this.prepareHeaders();
|
|
897
|
-
let jsonData = { query, ...params };
|
|
917
|
+
let jsonData = { query, ...params, origin: `js-sdk@${this.version}` };
|
|
898
918
|
if (jsonData?.jsonOptions?.schema) {
|
|
899
919
|
let schema = jsonData.jsonOptions.schema;
|
|
900
920
|
try {
|
|
@@ -1017,9 +1037,10 @@ var FirecrawlApp = class {
|
|
|
1017
1037
|
async __asyncDeepResearch(topic, params) {
|
|
1018
1038
|
const headers = this.prepareHeaders();
|
|
1019
1039
|
try {
|
|
1040
|
+
let jsonData = { topic, ...params, origin: `js-sdk@${this.version}` };
|
|
1020
1041
|
const response = await this.postRequest(
|
|
1021
1042
|
`${this.apiUrl}/v1/deep-research`,
|
|
1022
|
-
|
|
1043
|
+
jsonData,
|
|
1023
1044
|
headers
|
|
1024
1045
|
);
|
|
1025
1046
|
if (response.status === 200) {
|
|
@@ -1114,10 +1135,11 @@ var FirecrawlApp = class {
|
|
|
1114
1135
|
*/
|
|
1115
1136
|
async asyncGenerateLLMsText(url, params) {
|
|
1116
1137
|
const headers = this.prepareHeaders();
|
|
1138
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
1117
1139
|
try {
|
|
1118
1140
|
const response = await this.postRequest(
|
|
1119
1141
|
`${this.apiUrl}/v1/llmstxt`,
|
|
1120
|
-
|
|
1142
|
+
jsonData,
|
|
1121
1143
|
headers
|
|
1122
1144
|
);
|
|
1123
1145
|
if (response.status === 200) {
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
// package.json
|
|
2
|
+
var name = "@mendable/firecrawl-js";
|
|
3
|
+
var version = "1.25.4";
|
|
4
|
+
var description = "JavaScript SDK for Firecrawl API";
|
|
5
|
+
var main = "dist/index.js";
|
|
6
|
+
var types = "dist/index.d.ts";
|
|
7
|
+
var exports = {
|
|
8
|
+
"./package.json": "./package.json",
|
|
9
|
+
".": {
|
|
10
|
+
import: "./dist/index.js",
|
|
11
|
+
default: "./dist/index.cjs"
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
var type = "module";
|
|
15
|
+
var scripts = {
|
|
16
|
+
build: "tsup",
|
|
17
|
+
"build-and-publish": "npm run build && npm publish --access public",
|
|
18
|
+
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
19
|
+
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
20
|
+
};
|
|
21
|
+
var repository = {
|
|
22
|
+
type: "git",
|
|
23
|
+
url: "git+https://github.com/mendableai/firecrawl.git"
|
|
24
|
+
};
|
|
25
|
+
var author = "Mendable.ai";
|
|
26
|
+
var license = "MIT";
|
|
27
|
+
var dependencies = {
|
|
28
|
+
"typescript-event-target": "^1.1.1",
|
|
29
|
+
zod: "^3.23.8",
|
|
30
|
+
"zod-to-json-schema": "^3.23.0",
|
|
31
|
+
axios: "^1.6.8"
|
|
32
|
+
};
|
|
33
|
+
var bugs = {
|
|
34
|
+
url: "https://github.com/mendableai/firecrawl/issues"
|
|
35
|
+
};
|
|
36
|
+
var homepage = "https://github.com/mendableai/firecrawl#readme";
|
|
37
|
+
var devDependencies = {
|
|
38
|
+
"@jest/globals": "^29.7.0",
|
|
39
|
+
"@types/axios": "^0.14.0",
|
|
40
|
+
"@types/dotenv": "^8.2.0",
|
|
41
|
+
"@types/jest": "^29.5.14",
|
|
42
|
+
"@types/mocha": "^10.0.6",
|
|
43
|
+
"@types/node": "^20.12.12",
|
|
44
|
+
"@types/uuid": "^9.0.8",
|
|
45
|
+
dotenv: "^16.4.5",
|
|
46
|
+
jest: "^29.7.0",
|
|
47
|
+
"ts-jest": "^29.2.2",
|
|
48
|
+
tsup: "^8.2.4",
|
|
49
|
+
typescript: "^5.4.5",
|
|
50
|
+
uuid: "^9.0.1"
|
|
51
|
+
};
|
|
52
|
+
var keywords = [
|
|
53
|
+
"firecrawl",
|
|
54
|
+
"mendable",
|
|
55
|
+
"crawler",
|
|
56
|
+
"web",
|
|
57
|
+
"scraper",
|
|
58
|
+
"api",
|
|
59
|
+
"sdk"
|
|
60
|
+
];
|
|
61
|
+
var engines = {
|
|
62
|
+
node: ">=22.0.0"
|
|
63
|
+
};
|
|
64
|
+
var package_default = {
|
|
65
|
+
name,
|
|
66
|
+
version,
|
|
67
|
+
description,
|
|
68
|
+
main,
|
|
69
|
+
types,
|
|
70
|
+
exports,
|
|
71
|
+
type,
|
|
72
|
+
scripts,
|
|
73
|
+
repository,
|
|
74
|
+
author,
|
|
75
|
+
license,
|
|
76
|
+
dependencies,
|
|
77
|
+
bugs,
|
|
78
|
+
homepage,
|
|
79
|
+
devDependencies,
|
|
80
|
+
keywords,
|
|
81
|
+
engines
|
|
82
|
+
};
|
|
83
|
+
export {
|
|
84
|
+
author,
|
|
85
|
+
bugs,
|
|
86
|
+
package_default as default,
|
|
87
|
+
dependencies,
|
|
88
|
+
description,
|
|
89
|
+
devDependencies,
|
|
90
|
+
engines,
|
|
91
|
+
exports,
|
|
92
|
+
homepage,
|
|
93
|
+
keywords,
|
|
94
|
+
license,
|
|
95
|
+
main,
|
|
96
|
+
name,
|
|
97
|
+
repository,
|
|
98
|
+
scripts,
|
|
99
|
+
type,
|
|
100
|
+
types,
|
|
101
|
+
version
|
|
102
|
+
};
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -119,7 +119,8 @@ export interface CrawlScrapeOptions {
|
|
|
119
119
|
skipTlsVerification?: boolean;
|
|
120
120
|
removeBase64Images?: boolean;
|
|
121
121
|
blockAds?: boolean;
|
|
122
|
-
proxy?: "basic" | "stealth";
|
|
122
|
+
proxy?: "basic" | "stealth" | "auto";
|
|
123
|
+
storeInCache?: boolean;
|
|
123
124
|
}
|
|
124
125
|
|
|
125
126
|
export type Action = {
|
|
@@ -165,6 +166,7 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
|
|
165
166
|
prompt?: string;
|
|
166
167
|
schema?: any;
|
|
167
168
|
modes?: ("json" | "git-diff")[];
|
|
169
|
+
tag?: string | null;
|
|
168
170
|
}
|
|
169
171
|
actions?: ActionsSchema;
|
|
170
172
|
agent?: AgentOptions;
|
|
@@ -215,6 +217,11 @@ export interface CrawlParams {
|
|
|
215
217
|
deduplicateSimilarURLs?: boolean;
|
|
216
218
|
ignoreQueryParameters?: boolean;
|
|
217
219
|
regexOnFullURL?: boolean;
|
|
220
|
+
/**
|
|
221
|
+
* Delay in seconds between scrapes. This helps respect website rate limits.
|
|
222
|
+
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
223
|
+
*/
|
|
224
|
+
delay?: number;
|
|
218
225
|
}
|
|
219
226
|
|
|
220
227
|
/**
|
|
@@ -515,6 +522,11 @@ export interface GenerateLLMsTextParams {
|
|
|
515
522
|
* @default false
|
|
516
523
|
*/
|
|
517
524
|
showFullText?: boolean;
|
|
525
|
+
/**
|
|
526
|
+
* Whether to use cached content if available
|
|
527
|
+
* @default true
|
|
528
|
+
*/
|
|
529
|
+
cache?: boolean;
|
|
518
530
|
/**
|
|
519
531
|
* Experimental flag for streaming
|
|
520
532
|
*/
|
|
@@ -550,11 +562,26 @@ export interface GenerateLLMsTextStatusResponse {
|
|
|
550
562
|
export default class FirecrawlApp {
|
|
551
563
|
public apiKey: string;
|
|
552
564
|
public apiUrl: string;
|
|
553
|
-
|
|
565
|
+
public version: string = "1.25.1";
|
|
566
|
+
|
|
554
567
|
private isCloudService(url: string): boolean {
|
|
555
568
|
return url.includes('api.firecrawl.dev');
|
|
556
569
|
}
|
|
557
570
|
|
|
571
|
+
private async getVersion(): Promise<string> {
|
|
572
|
+
try {
|
|
573
|
+
const packageJson = await import('../package.json', { assert: { type: 'json' } });
|
|
574
|
+
return packageJson.default.version;
|
|
575
|
+
} catch (error) {
|
|
576
|
+
console.error("Error getting version:", error);
|
|
577
|
+
return "1.25.1";
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
private async init() {
|
|
582
|
+
this.version = await this.getVersion();
|
|
583
|
+
}
|
|
584
|
+
|
|
558
585
|
/**
|
|
559
586
|
* Initializes a new instance of the FirecrawlApp class.
|
|
560
587
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -568,6 +595,7 @@ export default class FirecrawlApp {
|
|
|
568
595
|
|
|
569
596
|
this.apiKey = apiKey || '';
|
|
570
597
|
this.apiUrl = baseUrl;
|
|
598
|
+
this.init();
|
|
571
599
|
}
|
|
572
600
|
|
|
573
601
|
/**
|
|
@@ -584,7 +612,7 @@ export default class FirecrawlApp {
|
|
|
584
612
|
"Content-Type": "application/json",
|
|
585
613
|
Authorization: `Bearer ${this.apiKey}`,
|
|
586
614
|
} as AxiosRequestHeaders;
|
|
587
|
-
let jsonData: any = { url, ...params };
|
|
615
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
588
616
|
if (jsonData?.extract?.schema) {
|
|
589
617
|
let schema = jsonData.extract.schema;
|
|
590
618
|
|
|
@@ -666,7 +694,7 @@ export default class FirecrawlApp {
|
|
|
666
694
|
lang: params?.lang ?? "en",
|
|
667
695
|
country: params?.country ?? "us",
|
|
668
696
|
location: params?.location,
|
|
669
|
-
origin:
|
|
697
|
+
origin: `js-sdk@${this.version}`,
|
|
670
698
|
timeout: params?.timeout ?? 60000,
|
|
671
699
|
scrapeOptions: params?.scrapeOptions ?? { formats: [] },
|
|
672
700
|
};
|
|
@@ -738,7 +766,7 @@ export default class FirecrawlApp {
|
|
|
738
766
|
idempotencyKey?: string
|
|
739
767
|
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
740
768
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
741
|
-
let jsonData: any = { url, ...params };
|
|
769
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
742
770
|
try {
|
|
743
771
|
const response: AxiosResponse = await this.postRequest(
|
|
744
772
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -767,7 +795,7 @@ export default class FirecrawlApp {
|
|
|
767
795
|
idempotencyKey?: string
|
|
768
796
|
): Promise<CrawlResponse | ErrorResponse> {
|
|
769
797
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
770
|
-
let jsonData: any = { url, ...params };
|
|
798
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
771
799
|
try {
|
|
772
800
|
const response: AxiosResponse = await this.postRequest(
|
|
773
801
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -943,7 +971,7 @@ export default class FirecrawlApp {
|
|
|
943
971
|
*/
|
|
944
972
|
async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
|
|
945
973
|
const headers = this.prepareHeaders();
|
|
946
|
-
let jsonData:
|
|
974
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
947
975
|
|
|
948
976
|
try {
|
|
949
977
|
const response: AxiosResponse = await this.postRequest(
|
|
@@ -981,7 +1009,7 @@ export default class FirecrawlApp {
|
|
|
981
1009
|
ignoreInvalidURLs?: boolean,
|
|
982
1010
|
): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
|
983
1011
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
984
|
-
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
1012
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
985
1013
|
if (jsonData?.extract?.schema) {
|
|
986
1014
|
let schema = jsonData.extract.schema;
|
|
987
1015
|
|
|
@@ -1046,7 +1074,7 @@ export default class FirecrawlApp {
|
|
|
1046
1074
|
ignoreInvalidURLs?: boolean,
|
|
1047
1075
|
): Promise<BatchScrapeResponse | ErrorResponse> {
|
|
1048
1076
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
1049
|
-
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...
|
|
1077
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
1050
1078
|
try {
|
|
1051
1079
|
const response: AxiosResponse = await this.postRequest(
|
|
1052
1080
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -1220,7 +1248,7 @@ export default class FirecrawlApp {
|
|
|
1220
1248
|
try {
|
|
1221
1249
|
const response: AxiosResponse = await this.postRequest(
|
|
1222
1250
|
this.apiUrl + `/v1/extract`,
|
|
1223
|
-
{ ...jsonData, schema: jsonSchema, origin:
|
|
1251
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
1224
1252
|
headers
|
|
1225
1253
|
);
|
|
1226
1254
|
|
|
@@ -1288,7 +1316,7 @@ export default class FirecrawlApp {
|
|
|
1288
1316
|
try {
|
|
1289
1317
|
const response: AxiosResponse = await this.postRequest(
|
|
1290
1318
|
this.apiUrl + `/v1/extract`,
|
|
1291
|
-
{ ...jsonData, schema: jsonSchema },
|
|
1319
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
1292
1320
|
headers
|
|
1293
1321
|
);
|
|
1294
1322
|
|
|
@@ -1465,6 +1493,13 @@ export default class FirecrawlApp {
|
|
|
1465
1493
|
* @param {string} action - The action being performed when the error occurred.
|
|
1466
1494
|
*/
|
|
1467
1495
|
handleError(response: AxiosResponse, action: string): void {
|
|
1496
|
+
if (!response) {
|
|
1497
|
+
throw new FirecrawlError(
|
|
1498
|
+
`No response received while trying to ${action}. This may be a network error or the server is unreachable.`,
|
|
1499
|
+
0
|
|
1500
|
+
);
|
|
1501
|
+
}
|
|
1502
|
+
|
|
1468
1503
|
if ([400, 402, 403, 408, 409, 500].includes(response.status)) {
|
|
1469
1504
|
const errorMessage: string =
|
|
1470
1505
|
response.data.error || "Unknown error occurred";
|
|
@@ -1579,7 +1614,7 @@ export default class FirecrawlApp {
|
|
|
1579
1614
|
*/
|
|
1580
1615
|
async asyncDeepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>): Promise<DeepResearchResponse | ErrorResponse> {
|
|
1581
1616
|
const headers = this.prepareHeaders();
|
|
1582
|
-
let jsonData: any = { query, ...params };
|
|
1617
|
+
let jsonData: any = { query, ...params, origin: `js-sdk@${this.version}` };
|
|
1583
1618
|
|
|
1584
1619
|
if (jsonData?.jsonOptions?.schema) {
|
|
1585
1620
|
let schema = jsonData.jsonOptions.schema;
|
|
@@ -1587,7 +1622,7 @@ export default class FirecrawlApp {
|
|
|
1587
1622
|
try {
|
|
1588
1623
|
schema = zodToJsonSchema(schema);
|
|
1589
1624
|
} catch (error) {
|
|
1590
|
-
|
|
1625
|
+
// Ignore error if schema can't be parsed as Zod
|
|
1591
1626
|
}
|
|
1592
1627
|
jsonData = {
|
|
1593
1628
|
...jsonData,
|
|
@@ -1733,9 +1768,10 @@ export default class FirecrawlApp {
|
|
|
1733
1768
|
async __asyncDeepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
|
|
1734
1769
|
const headers = this.prepareHeaders();
|
|
1735
1770
|
try {
|
|
1771
|
+
let jsonData: any = { topic, ...params, origin: `js-sdk@${this.version}` };
|
|
1736
1772
|
const response: AxiosResponse = await this.postRequest(
|
|
1737
1773
|
`${this.apiUrl}/v1/deep-research`,
|
|
1738
|
-
|
|
1774
|
+
jsonData,
|
|
1739
1775
|
headers
|
|
1740
1776
|
);
|
|
1741
1777
|
|
|
@@ -1845,10 +1881,11 @@ export default class FirecrawlApp {
|
|
|
1845
1881
|
*/
|
|
1846
1882
|
async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
|
|
1847
1883
|
const headers = this.prepareHeaders();
|
|
1884
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
1848
1885
|
try {
|
|
1849
1886
|
const response: AxiosResponse = await this.postRequest(
|
|
1850
1887
|
`${this.apiUrl}/v1/llmstxt`,
|
|
1851
|
-
|
|
1888
|
+
jsonData,
|
|
1852
1889
|
headers
|
|
1853
1890
|
);
|
|
1854
1891
|
|