firecrawl 1.24.0 → 1.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +0 -0
- package/LICENSE +0 -0
- package/README.md +0 -0
- package/dist/index.cjs +107 -12
- package/dist/index.d.cts +13 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.js +34 -12
- package/dist/package-DCB4AKMM.js +102 -0
- package/dump.rdb +0 -0
- package/jest.config.js +0 -0
- package/package.json +1 -1
- package/src/__tests__/e2e_withAuth/index.test.ts +0 -0
- package/src/__tests__/fixtures/scrape.json +0 -0
- package/src/__tests__/index.test.ts +0 -0
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +0 -0
- package/src/index.ts +49 -14
- package/tsconfig.json +0 -0
- package/tsup.config.ts +0 -0
package/.env.example
CHANGED
|
File without changes
|
package/LICENSE
CHANGED
|
File without changes
|
package/README.md
CHANGED
|
File without changes
|
package/dist/index.cjs
CHANGED
|
@@ -5,6 +5,9 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
|
5
5
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
6
|
var __getProtoOf = Object.getPrototypeOf;
|
|
7
7
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __commonJS = (cb, mod) => function __require() {
|
|
9
|
+
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
10
|
+
};
|
|
8
11
|
var __export = (target, all) => {
|
|
9
12
|
for (var name in all)
|
|
10
13
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
@@ -27,6 +30,76 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
27
30
|
));
|
|
28
31
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
32
|
|
|
33
|
+
// package.json
|
|
34
|
+
var require_package = __commonJS({
|
|
35
|
+
"package.json"(exports2, module2) {
|
|
36
|
+
module2.exports = {
|
|
37
|
+
name: "firecrawl",
|
|
38
|
+
version: "1.25.1",
|
|
39
|
+
description: "JavaScript SDK for Firecrawl API",
|
|
40
|
+
main: "dist/index.js",
|
|
41
|
+
types: "dist/index.d.ts",
|
|
42
|
+
exports: {
|
|
43
|
+
"./package.json": "./package.json",
|
|
44
|
+
".": {
|
|
45
|
+
import: "./dist/index.js",
|
|
46
|
+
default: "./dist/index.cjs"
|
|
47
|
+
}
|
|
48
|
+
},
|
|
49
|
+
type: "module",
|
|
50
|
+
scripts: {
|
|
51
|
+
build: "tsup",
|
|
52
|
+
"build-and-publish": "npm run build && npm publish --access public",
|
|
53
|
+
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
54
|
+
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
55
|
+
},
|
|
56
|
+
repository: {
|
|
57
|
+
type: "git",
|
|
58
|
+
url: "git+https://github.com/mendableai/firecrawl.git"
|
|
59
|
+
},
|
|
60
|
+
author: "Mendable.ai",
|
|
61
|
+
license: "MIT",
|
|
62
|
+
dependencies: {
|
|
63
|
+
"typescript-event-target": "^1.1.1",
|
|
64
|
+
zod: "^3.23.8",
|
|
65
|
+
"zod-to-json-schema": "^3.23.0",
|
|
66
|
+
axios: "^1.6.8"
|
|
67
|
+
},
|
|
68
|
+
bugs: {
|
|
69
|
+
url: "https://github.com/mendableai/firecrawl/issues"
|
|
70
|
+
},
|
|
71
|
+
homepage: "https://github.com/mendableai/firecrawl#readme",
|
|
72
|
+
devDependencies: {
|
|
73
|
+
"@jest/globals": "^29.7.0",
|
|
74
|
+
"@types/axios": "^0.14.0",
|
|
75
|
+
"@types/dotenv": "^8.2.0",
|
|
76
|
+
"@types/jest": "^29.5.14",
|
|
77
|
+
"@types/mocha": "^10.0.6",
|
|
78
|
+
"@types/node": "^20.12.12",
|
|
79
|
+
"@types/uuid": "^9.0.8",
|
|
80
|
+
dotenv: "^16.4.5",
|
|
81
|
+
jest: "^29.7.0",
|
|
82
|
+
"ts-jest": "^29.2.2",
|
|
83
|
+
tsup: "^8.2.4",
|
|
84
|
+
typescript: "^5.4.5",
|
|
85
|
+
uuid: "^9.0.1"
|
|
86
|
+
},
|
|
87
|
+
keywords: [
|
|
88
|
+
"firecrawl",
|
|
89
|
+
"mendable",
|
|
90
|
+
"crawler",
|
|
91
|
+
"web",
|
|
92
|
+
"scraper",
|
|
93
|
+
"api",
|
|
94
|
+
"sdk"
|
|
95
|
+
],
|
|
96
|
+
engines: {
|
|
97
|
+
node: ">=22.0.0"
|
|
98
|
+
}
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
30
103
|
// src/index.ts
|
|
31
104
|
var src_exports = {};
|
|
32
105
|
__export(src_exports, {
|
|
@@ -59,9 +132,22 @@ var FirecrawlError = class extends Error {
|
|
|
59
132
|
var FirecrawlApp = class {
|
|
60
133
|
apiKey;
|
|
61
134
|
apiUrl;
|
|
135
|
+
version = "1.25.1";
|
|
62
136
|
isCloudService(url) {
|
|
63
137
|
return url.includes("api.firecrawl.dev");
|
|
64
138
|
}
|
|
139
|
+
async getVersion() {
|
|
140
|
+
try {
|
|
141
|
+
const packageJson = await Promise.resolve().then(() => __toESM(require_package(), 1));
|
|
142
|
+
return packageJson.default.version;
|
|
143
|
+
} catch (error) {
|
|
144
|
+
console.error("Error getting version:", error);
|
|
145
|
+
return "1.25.1";
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
async init() {
|
|
149
|
+
this.version = await this.getVersion();
|
|
150
|
+
}
|
|
65
151
|
/**
|
|
66
152
|
* Initializes a new instance of the FirecrawlApp class.
|
|
67
153
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -73,6 +159,7 @@ var FirecrawlApp = class {
|
|
|
73
159
|
}
|
|
74
160
|
this.apiKey = apiKey || "";
|
|
75
161
|
this.apiUrl = baseUrl;
|
|
162
|
+
this.init();
|
|
76
163
|
}
|
|
77
164
|
/**
|
|
78
165
|
* Scrapes a URL using the Firecrawl API.
|
|
@@ -85,7 +172,7 @@ var FirecrawlApp = class {
|
|
|
85
172
|
"Content-Type": "application/json",
|
|
86
173
|
Authorization: `Bearer ${this.apiKey}`
|
|
87
174
|
};
|
|
88
|
-
let jsonData = { url, ...params };
|
|
175
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
89
176
|
if (jsonData?.extract?.schema) {
|
|
90
177
|
let schema = jsonData.extract.schema;
|
|
91
178
|
try {
|
|
@@ -159,7 +246,7 @@ var FirecrawlApp = class {
|
|
|
159
246
|
lang: params?.lang ?? "en",
|
|
160
247
|
country: params?.country ?? "us",
|
|
161
248
|
location: params?.location,
|
|
162
|
-
origin:
|
|
249
|
+
origin: `js-sdk@${this.version}`,
|
|
163
250
|
timeout: params?.timeout ?? 6e4,
|
|
164
251
|
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
|
|
165
252
|
};
|
|
@@ -219,7 +306,7 @@ var FirecrawlApp = class {
|
|
|
219
306
|
*/
|
|
220
307
|
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
|
221
308
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
222
|
-
let jsonData = { url, ...params };
|
|
309
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
223
310
|
try {
|
|
224
311
|
const response = await this.postRequest(
|
|
225
312
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -243,7 +330,7 @@ var FirecrawlApp = class {
|
|
|
243
330
|
}
|
|
244
331
|
async asyncCrawlUrl(url, params, idempotencyKey) {
|
|
245
332
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
246
|
-
let jsonData = { url, ...params };
|
|
333
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
247
334
|
try {
|
|
248
335
|
const response = await this.postRequest(
|
|
249
336
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -402,7 +489,7 @@ var FirecrawlApp = class {
|
|
|
402
489
|
*/
|
|
403
490
|
async mapUrl(url, params) {
|
|
404
491
|
const headers = this.prepareHeaders();
|
|
405
|
-
let jsonData = { url, ...params };
|
|
492
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
406
493
|
try {
|
|
407
494
|
const response = await this.postRequest(
|
|
408
495
|
this.apiUrl + `/v1/map`,
|
|
@@ -431,7 +518,7 @@ var FirecrawlApp = class {
|
|
|
431
518
|
*/
|
|
432
519
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
433
520
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
434
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
521
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
435
522
|
if (jsonData?.extract?.schema) {
|
|
436
523
|
let schema = jsonData.extract.schema;
|
|
437
524
|
try {
|
|
@@ -483,7 +570,7 @@ var FirecrawlApp = class {
|
|
|
483
570
|
}
|
|
484
571
|
async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
485
572
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
486
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params
|
|
573
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
487
574
|
try {
|
|
488
575
|
const response = await this.postRequest(
|
|
489
576
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -637,7 +724,7 @@ var FirecrawlApp = class {
|
|
|
637
724
|
try {
|
|
638
725
|
const response = await this.postRequest(
|
|
639
726
|
this.apiUrl + `/v1/extract`,
|
|
640
|
-
{ ...jsonData, schema: jsonSchema, origin:
|
|
727
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
641
728
|
headers
|
|
642
729
|
);
|
|
643
730
|
if (response.status === 200) {
|
|
@@ -697,7 +784,7 @@ var FirecrawlApp = class {
|
|
|
697
784
|
try {
|
|
698
785
|
const response = await this.postRequest(
|
|
699
786
|
this.apiUrl + `/v1/extract`,
|
|
700
|
-
{ ...jsonData, schema: jsonSchema },
|
|
787
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
701
788
|
headers
|
|
702
789
|
);
|
|
703
790
|
if (response.status === 200) {
|
|
@@ -849,6 +936,12 @@ var FirecrawlApp = class {
|
|
|
849
936
|
* @param {string} action - The action being performed when the error occurred.
|
|
850
937
|
*/
|
|
851
938
|
handleError(response, action) {
|
|
939
|
+
if (!response) {
|
|
940
|
+
throw new FirecrawlError(
|
|
941
|
+
`No response received while trying to ${action}. This may be a network error or the server is unreachable.`,
|
|
942
|
+
0
|
|
943
|
+
);
|
|
944
|
+
}
|
|
852
945
|
if ([400, 402, 403, 408, 409, 500].includes(response.status)) {
|
|
853
946
|
const errorMessage = response.data.error || "Unknown error occurred";
|
|
854
947
|
const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
|
|
@@ -930,7 +1023,7 @@ var FirecrawlApp = class {
|
|
|
930
1023
|
*/
|
|
931
1024
|
async asyncDeepResearch(query, params) {
|
|
932
1025
|
const headers = this.prepareHeaders();
|
|
933
|
-
let jsonData = { query, ...params };
|
|
1026
|
+
let jsonData = { query, ...params, origin: `js-sdk@${this.version}` };
|
|
934
1027
|
if (jsonData?.jsonOptions?.schema) {
|
|
935
1028
|
let schema = jsonData.jsonOptions.schema;
|
|
936
1029
|
try {
|
|
@@ -1053,9 +1146,10 @@ var FirecrawlApp = class {
|
|
|
1053
1146
|
async __asyncDeepResearch(topic, params) {
|
|
1054
1147
|
const headers = this.prepareHeaders();
|
|
1055
1148
|
try {
|
|
1149
|
+
let jsonData = { topic, ...params, origin: `js-sdk@${this.version}` };
|
|
1056
1150
|
const response = await this.postRequest(
|
|
1057
1151
|
`${this.apiUrl}/v1/deep-research`,
|
|
1058
|
-
|
|
1152
|
+
jsonData,
|
|
1059
1153
|
headers
|
|
1060
1154
|
);
|
|
1061
1155
|
if (response.status === 200) {
|
|
@@ -1150,10 +1244,11 @@ var FirecrawlApp = class {
|
|
|
1150
1244
|
*/
|
|
1151
1245
|
async asyncGenerateLLMsText(url, params) {
|
|
1152
1246
|
const headers = this.prepareHeaders();
|
|
1247
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
1153
1248
|
try {
|
|
1154
1249
|
const response = await this.postRequest(
|
|
1155
1250
|
`${this.apiUrl}/v1/llmstxt`,
|
|
1156
|
-
|
|
1251
|
+
jsonData,
|
|
1157
1252
|
headers
|
|
1158
1253
|
);
|
|
1159
1254
|
if (response.status === 200) {
|
package/dist/index.d.cts
CHANGED
|
@@ -205,6 +205,11 @@ interface CrawlParams {
|
|
|
205
205
|
deduplicateSimilarURLs?: boolean;
|
|
206
206
|
ignoreQueryParameters?: boolean;
|
|
207
207
|
regexOnFullURL?: boolean;
|
|
208
|
+
/**
|
|
209
|
+
* Delay in seconds between scrapes. This helps respect website rate limits.
|
|
210
|
+
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
211
|
+
*/
|
|
212
|
+
delay?: number;
|
|
208
213
|
}
|
|
209
214
|
/**
|
|
210
215
|
* Response interface for crawling operations.
|
|
@@ -477,6 +482,11 @@ interface GenerateLLMsTextParams {
|
|
|
477
482
|
* @default false
|
|
478
483
|
*/
|
|
479
484
|
showFullText?: boolean;
|
|
485
|
+
/**
|
|
486
|
+
* Whether to use cached content if available
|
|
487
|
+
* @default true
|
|
488
|
+
*/
|
|
489
|
+
cache?: boolean;
|
|
480
490
|
/**
|
|
481
491
|
* Experimental flag for streaming
|
|
482
492
|
*/
|
|
@@ -509,7 +519,10 @@ interface GenerateLLMsTextStatusResponse {
|
|
|
509
519
|
declare class FirecrawlApp {
|
|
510
520
|
apiKey: string;
|
|
511
521
|
apiUrl: string;
|
|
522
|
+
version: string;
|
|
512
523
|
private isCloudService;
|
|
524
|
+
private getVersion;
|
|
525
|
+
private init;
|
|
513
526
|
/**
|
|
514
527
|
* Initializes a new instance of the FirecrawlApp class.
|
|
515
528
|
* @param config - Configuration options for the FirecrawlApp instance.
|
package/dist/index.d.ts
CHANGED
|
@@ -205,6 +205,11 @@ interface CrawlParams {
|
|
|
205
205
|
deduplicateSimilarURLs?: boolean;
|
|
206
206
|
ignoreQueryParameters?: boolean;
|
|
207
207
|
regexOnFullURL?: boolean;
|
|
208
|
+
/**
|
|
209
|
+
* Delay in seconds between scrapes. This helps respect website rate limits.
|
|
210
|
+
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
211
|
+
*/
|
|
212
|
+
delay?: number;
|
|
208
213
|
}
|
|
209
214
|
/**
|
|
210
215
|
* Response interface for crawling operations.
|
|
@@ -477,6 +482,11 @@ interface GenerateLLMsTextParams {
|
|
|
477
482
|
* @default false
|
|
478
483
|
*/
|
|
479
484
|
showFullText?: boolean;
|
|
485
|
+
/**
|
|
486
|
+
* Whether to use cached content if available
|
|
487
|
+
* @default true
|
|
488
|
+
*/
|
|
489
|
+
cache?: boolean;
|
|
480
490
|
/**
|
|
481
491
|
* Experimental flag for streaming
|
|
482
492
|
*/
|
|
@@ -509,7 +519,10 @@ interface GenerateLLMsTextStatusResponse {
|
|
|
509
519
|
declare class FirecrawlApp {
|
|
510
520
|
apiKey: string;
|
|
511
521
|
apiUrl: string;
|
|
522
|
+
version: string;
|
|
512
523
|
private isCloudService;
|
|
524
|
+
private getVersion;
|
|
525
|
+
private init;
|
|
513
526
|
/**
|
|
514
527
|
* Initializes a new instance of the FirecrawlApp class.
|
|
515
528
|
* @param config - Configuration options for the FirecrawlApp instance.
|
package/dist/index.js
CHANGED
|
@@ -23,9 +23,22 @@ var FirecrawlError = class extends Error {
|
|
|
23
23
|
var FirecrawlApp = class {
|
|
24
24
|
apiKey;
|
|
25
25
|
apiUrl;
|
|
26
|
+
version = "1.25.1";
|
|
26
27
|
isCloudService(url) {
|
|
27
28
|
return url.includes("api.firecrawl.dev");
|
|
28
29
|
}
|
|
30
|
+
async getVersion() {
|
|
31
|
+
try {
|
|
32
|
+
const packageJson = await import("./package-DCB4AKMM.js");
|
|
33
|
+
return packageJson.default.version;
|
|
34
|
+
} catch (error) {
|
|
35
|
+
console.error("Error getting version:", error);
|
|
36
|
+
return "1.25.1";
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
async init() {
|
|
40
|
+
this.version = await this.getVersion();
|
|
41
|
+
}
|
|
29
42
|
/**
|
|
30
43
|
* Initializes a new instance of the FirecrawlApp class.
|
|
31
44
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -37,6 +50,7 @@ var FirecrawlApp = class {
|
|
|
37
50
|
}
|
|
38
51
|
this.apiKey = apiKey || "";
|
|
39
52
|
this.apiUrl = baseUrl;
|
|
53
|
+
this.init();
|
|
40
54
|
}
|
|
41
55
|
/**
|
|
42
56
|
* Scrapes a URL using the Firecrawl API.
|
|
@@ -49,7 +63,7 @@ var FirecrawlApp = class {
|
|
|
49
63
|
"Content-Type": "application/json",
|
|
50
64
|
Authorization: `Bearer ${this.apiKey}`
|
|
51
65
|
};
|
|
52
|
-
let jsonData = { url, ...params };
|
|
66
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
53
67
|
if (jsonData?.extract?.schema) {
|
|
54
68
|
let schema = jsonData.extract.schema;
|
|
55
69
|
try {
|
|
@@ -123,7 +137,7 @@ var FirecrawlApp = class {
|
|
|
123
137
|
lang: params?.lang ?? "en",
|
|
124
138
|
country: params?.country ?? "us",
|
|
125
139
|
location: params?.location,
|
|
126
|
-
origin:
|
|
140
|
+
origin: `js-sdk@${this.version}`,
|
|
127
141
|
timeout: params?.timeout ?? 6e4,
|
|
128
142
|
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
|
|
129
143
|
};
|
|
@@ -183,7 +197,7 @@ var FirecrawlApp = class {
|
|
|
183
197
|
*/
|
|
184
198
|
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
|
185
199
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
186
|
-
let jsonData = { url, ...params };
|
|
200
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
187
201
|
try {
|
|
188
202
|
const response = await this.postRequest(
|
|
189
203
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -207,7 +221,7 @@ var FirecrawlApp = class {
|
|
|
207
221
|
}
|
|
208
222
|
async asyncCrawlUrl(url, params, idempotencyKey) {
|
|
209
223
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
210
|
-
let jsonData = { url, ...params };
|
|
224
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
211
225
|
try {
|
|
212
226
|
const response = await this.postRequest(
|
|
213
227
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -366,7 +380,7 @@ var FirecrawlApp = class {
|
|
|
366
380
|
*/
|
|
367
381
|
async mapUrl(url, params) {
|
|
368
382
|
const headers = this.prepareHeaders();
|
|
369
|
-
let jsonData = { url, ...params };
|
|
383
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
370
384
|
try {
|
|
371
385
|
const response = await this.postRequest(
|
|
372
386
|
this.apiUrl + `/v1/map`,
|
|
@@ -395,7 +409,7 @@ var FirecrawlApp = class {
|
|
|
395
409
|
*/
|
|
396
410
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
397
411
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
398
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
412
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
399
413
|
if (jsonData?.extract?.schema) {
|
|
400
414
|
let schema = jsonData.extract.schema;
|
|
401
415
|
try {
|
|
@@ -447,7 +461,7 @@ var FirecrawlApp = class {
|
|
|
447
461
|
}
|
|
448
462
|
async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
449
463
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
450
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params
|
|
464
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
451
465
|
try {
|
|
452
466
|
const response = await this.postRequest(
|
|
453
467
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -601,7 +615,7 @@ var FirecrawlApp = class {
|
|
|
601
615
|
try {
|
|
602
616
|
const response = await this.postRequest(
|
|
603
617
|
this.apiUrl + `/v1/extract`,
|
|
604
|
-
{ ...jsonData, schema: jsonSchema, origin:
|
|
618
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
605
619
|
headers
|
|
606
620
|
);
|
|
607
621
|
if (response.status === 200) {
|
|
@@ -661,7 +675,7 @@ var FirecrawlApp = class {
|
|
|
661
675
|
try {
|
|
662
676
|
const response = await this.postRequest(
|
|
663
677
|
this.apiUrl + `/v1/extract`,
|
|
664
|
-
{ ...jsonData, schema: jsonSchema },
|
|
678
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
665
679
|
headers
|
|
666
680
|
);
|
|
667
681
|
if (response.status === 200) {
|
|
@@ -813,6 +827,12 @@ var FirecrawlApp = class {
|
|
|
813
827
|
* @param {string} action - The action being performed when the error occurred.
|
|
814
828
|
*/
|
|
815
829
|
handleError(response, action) {
|
|
830
|
+
if (!response) {
|
|
831
|
+
throw new FirecrawlError(
|
|
832
|
+
`No response received while trying to ${action}. This may be a network error or the server is unreachable.`,
|
|
833
|
+
0
|
|
834
|
+
);
|
|
835
|
+
}
|
|
816
836
|
if ([400, 402, 403, 408, 409, 500].includes(response.status)) {
|
|
817
837
|
const errorMessage = response.data.error || "Unknown error occurred";
|
|
818
838
|
const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
|
|
@@ -894,7 +914,7 @@ var FirecrawlApp = class {
|
|
|
894
914
|
*/
|
|
895
915
|
async asyncDeepResearch(query, params) {
|
|
896
916
|
const headers = this.prepareHeaders();
|
|
897
|
-
let jsonData = { query, ...params };
|
|
917
|
+
let jsonData = { query, ...params, origin: `js-sdk@${this.version}` };
|
|
898
918
|
if (jsonData?.jsonOptions?.schema) {
|
|
899
919
|
let schema = jsonData.jsonOptions.schema;
|
|
900
920
|
try {
|
|
@@ -1017,9 +1037,10 @@ var FirecrawlApp = class {
|
|
|
1017
1037
|
async __asyncDeepResearch(topic, params) {
|
|
1018
1038
|
const headers = this.prepareHeaders();
|
|
1019
1039
|
try {
|
|
1040
|
+
let jsonData = { topic, ...params, origin: `js-sdk@${this.version}` };
|
|
1020
1041
|
const response = await this.postRequest(
|
|
1021
1042
|
`${this.apiUrl}/v1/deep-research`,
|
|
1022
|
-
|
|
1043
|
+
jsonData,
|
|
1023
1044
|
headers
|
|
1024
1045
|
);
|
|
1025
1046
|
if (response.status === 200) {
|
|
@@ -1114,10 +1135,11 @@ var FirecrawlApp = class {
|
|
|
1114
1135
|
*/
|
|
1115
1136
|
async asyncGenerateLLMsText(url, params) {
|
|
1116
1137
|
const headers = this.prepareHeaders();
|
|
1138
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
1117
1139
|
try {
|
|
1118
1140
|
const response = await this.postRequest(
|
|
1119
1141
|
`${this.apiUrl}/v1/llmstxt`,
|
|
1120
|
-
|
|
1142
|
+
jsonData,
|
|
1121
1143
|
headers
|
|
1122
1144
|
);
|
|
1123
1145
|
if (response.status === 200) {
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
// package.json
|
|
2
|
+
var name = "firecrawl";
|
|
3
|
+
var version = "1.25.1";
|
|
4
|
+
var description = "JavaScript SDK for Firecrawl API";
|
|
5
|
+
var main = "dist/index.js";
|
|
6
|
+
var types = "dist/index.d.ts";
|
|
7
|
+
var exports = {
|
|
8
|
+
"./package.json": "./package.json",
|
|
9
|
+
".": {
|
|
10
|
+
import: "./dist/index.js",
|
|
11
|
+
default: "./dist/index.cjs"
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
var type = "module";
|
|
15
|
+
var scripts = {
|
|
16
|
+
build: "tsup",
|
|
17
|
+
"build-and-publish": "npm run build && npm publish --access public",
|
|
18
|
+
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
19
|
+
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
20
|
+
};
|
|
21
|
+
var repository = {
|
|
22
|
+
type: "git",
|
|
23
|
+
url: "git+https://github.com/mendableai/firecrawl.git"
|
|
24
|
+
};
|
|
25
|
+
var author = "Mendable.ai";
|
|
26
|
+
var license = "MIT";
|
|
27
|
+
var dependencies = {
|
|
28
|
+
"typescript-event-target": "^1.1.1",
|
|
29
|
+
zod: "^3.23.8",
|
|
30
|
+
"zod-to-json-schema": "^3.23.0",
|
|
31
|
+
axios: "^1.6.8"
|
|
32
|
+
};
|
|
33
|
+
var bugs = {
|
|
34
|
+
url: "https://github.com/mendableai/firecrawl/issues"
|
|
35
|
+
};
|
|
36
|
+
var homepage = "https://github.com/mendableai/firecrawl#readme";
|
|
37
|
+
var devDependencies = {
|
|
38
|
+
"@jest/globals": "^29.7.0",
|
|
39
|
+
"@types/axios": "^0.14.0",
|
|
40
|
+
"@types/dotenv": "^8.2.0",
|
|
41
|
+
"@types/jest": "^29.5.14",
|
|
42
|
+
"@types/mocha": "^10.0.6",
|
|
43
|
+
"@types/node": "^20.12.12",
|
|
44
|
+
"@types/uuid": "^9.0.8",
|
|
45
|
+
dotenv: "^16.4.5",
|
|
46
|
+
jest: "^29.7.0",
|
|
47
|
+
"ts-jest": "^29.2.2",
|
|
48
|
+
tsup: "^8.2.4",
|
|
49
|
+
typescript: "^5.4.5",
|
|
50
|
+
uuid: "^9.0.1"
|
|
51
|
+
};
|
|
52
|
+
var keywords = [
|
|
53
|
+
"firecrawl",
|
|
54
|
+
"mendable",
|
|
55
|
+
"crawler",
|
|
56
|
+
"web",
|
|
57
|
+
"scraper",
|
|
58
|
+
"api",
|
|
59
|
+
"sdk"
|
|
60
|
+
];
|
|
61
|
+
var engines = {
|
|
62
|
+
node: ">=22.0.0"
|
|
63
|
+
};
|
|
64
|
+
var package_default = {
|
|
65
|
+
name,
|
|
66
|
+
version,
|
|
67
|
+
description,
|
|
68
|
+
main,
|
|
69
|
+
types,
|
|
70
|
+
exports,
|
|
71
|
+
type,
|
|
72
|
+
scripts,
|
|
73
|
+
repository,
|
|
74
|
+
author,
|
|
75
|
+
license,
|
|
76
|
+
dependencies,
|
|
77
|
+
bugs,
|
|
78
|
+
homepage,
|
|
79
|
+
devDependencies,
|
|
80
|
+
keywords,
|
|
81
|
+
engines
|
|
82
|
+
};
|
|
83
|
+
export {
|
|
84
|
+
author,
|
|
85
|
+
bugs,
|
|
86
|
+
package_default as default,
|
|
87
|
+
dependencies,
|
|
88
|
+
description,
|
|
89
|
+
devDependencies,
|
|
90
|
+
engines,
|
|
91
|
+
exports,
|
|
92
|
+
homepage,
|
|
93
|
+
keywords,
|
|
94
|
+
license,
|
|
95
|
+
main,
|
|
96
|
+
name,
|
|
97
|
+
repository,
|
|
98
|
+
scripts,
|
|
99
|
+
type,
|
|
100
|
+
types,
|
|
101
|
+
version
|
|
102
|
+
};
|
package/dump.rdb
ADDED
|
Binary file
|
package/jest.config.js
CHANGED
|
File without changes
|
package/package.json
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
package/src/index.ts
CHANGED
|
@@ -215,6 +215,11 @@ export interface CrawlParams {
|
|
|
215
215
|
deduplicateSimilarURLs?: boolean;
|
|
216
216
|
ignoreQueryParameters?: boolean;
|
|
217
217
|
regexOnFullURL?: boolean;
|
|
218
|
+
/**
|
|
219
|
+
* Delay in seconds between scrapes. This helps respect website rate limits.
|
|
220
|
+
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
221
|
+
*/
|
|
222
|
+
delay?: number;
|
|
218
223
|
}
|
|
219
224
|
|
|
220
225
|
/**
|
|
@@ -515,6 +520,11 @@ export interface GenerateLLMsTextParams {
|
|
|
515
520
|
* @default false
|
|
516
521
|
*/
|
|
517
522
|
showFullText?: boolean;
|
|
523
|
+
/**
|
|
524
|
+
* Whether to use cached content if available
|
|
525
|
+
* @default true
|
|
526
|
+
*/
|
|
527
|
+
cache?: boolean;
|
|
518
528
|
/**
|
|
519
529
|
* Experimental flag for streaming
|
|
520
530
|
*/
|
|
@@ -550,11 +560,26 @@ export interface GenerateLLMsTextStatusResponse {
|
|
|
550
560
|
export default class FirecrawlApp {
|
|
551
561
|
public apiKey: string;
|
|
552
562
|
public apiUrl: string;
|
|
553
|
-
|
|
563
|
+
public version: string = "1.25.1";
|
|
564
|
+
|
|
554
565
|
private isCloudService(url: string): boolean {
|
|
555
566
|
return url.includes('api.firecrawl.dev');
|
|
556
567
|
}
|
|
557
568
|
|
|
569
|
+
private async getVersion(): Promise<string> {
|
|
570
|
+
try {
|
|
571
|
+
const packageJson = await import('../package.json', { assert: { type: 'json' } });
|
|
572
|
+
return packageJson.default.version;
|
|
573
|
+
} catch (error) {
|
|
574
|
+
console.error("Error getting version:", error);
|
|
575
|
+
return "1.25.1";
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
private async init() {
|
|
580
|
+
this.version = await this.getVersion();
|
|
581
|
+
}
|
|
582
|
+
|
|
558
583
|
/**
|
|
559
584
|
* Initializes a new instance of the FirecrawlApp class.
|
|
560
585
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -568,6 +593,7 @@ export default class FirecrawlApp {
|
|
|
568
593
|
|
|
569
594
|
this.apiKey = apiKey || '';
|
|
570
595
|
this.apiUrl = baseUrl;
|
|
596
|
+
this.init();
|
|
571
597
|
}
|
|
572
598
|
|
|
573
599
|
/**
|
|
@@ -584,7 +610,7 @@ export default class FirecrawlApp {
|
|
|
584
610
|
"Content-Type": "application/json",
|
|
585
611
|
Authorization: `Bearer ${this.apiKey}`,
|
|
586
612
|
} as AxiosRequestHeaders;
|
|
587
|
-
let jsonData: any = { url, ...params };
|
|
613
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
588
614
|
if (jsonData?.extract?.schema) {
|
|
589
615
|
let schema = jsonData.extract.schema;
|
|
590
616
|
|
|
@@ -666,7 +692,7 @@ export default class FirecrawlApp {
|
|
|
666
692
|
lang: params?.lang ?? "en",
|
|
667
693
|
country: params?.country ?? "us",
|
|
668
694
|
location: params?.location,
|
|
669
|
-
origin:
|
|
695
|
+
origin: `js-sdk@${this.version}`,
|
|
670
696
|
timeout: params?.timeout ?? 60000,
|
|
671
697
|
scrapeOptions: params?.scrapeOptions ?? { formats: [] },
|
|
672
698
|
};
|
|
@@ -738,7 +764,7 @@ export default class FirecrawlApp {
|
|
|
738
764
|
idempotencyKey?: string
|
|
739
765
|
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
740
766
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
741
|
-
let jsonData: any = { url, ...params };
|
|
767
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
742
768
|
try {
|
|
743
769
|
const response: AxiosResponse = await this.postRequest(
|
|
744
770
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -767,7 +793,7 @@ export default class FirecrawlApp {
|
|
|
767
793
|
idempotencyKey?: string
|
|
768
794
|
): Promise<CrawlResponse | ErrorResponse> {
|
|
769
795
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
770
|
-
let jsonData: any = { url, ...params };
|
|
796
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
771
797
|
try {
|
|
772
798
|
const response: AxiosResponse = await this.postRequest(
|
|
773
799
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -943,7 +969,7 @@ export default class FirecrawlApp {
|
|
|
943
969
|
*/
|
|
944
970
|
async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
|
|
945
971
|
const headers = this.prepareHeaders();
|
|
946
|
-
let jsonData:
|
|
972
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
947
973
|
|
|
948
974
|
try {
|
|
949
975
|
const response: AxiosResponse = await this.postRequest(
|
|
@@ -981,7 +1007,7 @@ export default class FirecrawlApp {
|
|
|
981
1007
|
ignoreInvalidURLs?: boolean,
|
|
982
1008
|
): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
|
983
1009
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
984
|
-
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
1010
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
985
1011
|
if (jsonData?.extract?.schema) {
|
|
986
1012
|
let schema = jsonData.extract.schema;
|
|
987
1013
|
|
|
@@ -1046,7 +1072,7 @@ export default class FirecrawlApp {
|
|
|
1046
1072
|
ignoreInvalidURLs?: boolean,
|
|
1047
1073
|
): Promise<BatchScrapeResponse | ErrorResponse> {
|
|
1048
1074
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
1049
|
-
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...
|
|
1075
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
1050
1076
|
try {
|
|
1051
1077
|
const response: AxiosResponse = await this.postRequest(
|
|
1052
1078
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -1220,7 +1246,7 @@ export default class FirecrawlApp {
|
|
|
1220
1246
|
try {
|
|
1221
1247
|
const response: AxiosResponse = await this.postRequest(
|
|
1222
1248
|
this.apiUrl + `/v1/extract`,
|
|
1223
|
-
{ ...jsonData, schema: jsonSchema, origin:
|
|
1249
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
1224
1250
|
headers
|
|
1225
1251
|
);
|
|
1226
1252
|
|
|
@@ -1288,7 +1314,7 @@ export default class FirecrawlApp {
|
|
|
1288
1314
|
try {
|
|
1289
1315
|
const response: AxiosResponse = await this.postRequest(
|
|
1290
1316
|
this.apiUrl + `/v1/extract`,
|
|
1291
|
-
{ ...jsonData, schema: jsonSchema },
|
|
1317
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
1292
1318
|
headers
|
|
1293
1319
|
);
|
|
1294
1320
|
|
|
@@ -1465,6 +1491,13 @@ export default class FirecrawlApp {
|
|
|
1465
1491
|
* @param {string} action - The action being performed when the error occurred.
|
|
1466
1492
|
*/
|
|
1467
1493
|
handleError(response: AxiosResponse, action: string): void {
|
|
1494
|
+
if (!response) {
|
|
1495
|
+
throw new FirecrawlError(
|
|
1496
|
+
`No response received while trying to ${action}. This may be a network error or the server is unreachable.`,
|
|
1497
|
+
0
|
|
1498
|
+
);
|
|
1499
|
+
}
|
|
1500
|
+
|
|
1468
1501
|
if ([400, 402, 403, 408, 409, 500].includes(response.status)) {
|
|
1469
1502
|
const errorMessage: string =
|
|
1470
1503
|
response.data.error || "Unknown error occurred";
|
|
@@ -1579,7 +1612,7 @@ export default class FirecrawlApp {
|
|
|
1579
1612
|
*/
|
|
1580
1613
|
async asyncDeepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>): Promise<DeepResearchResponse | ErrorResponse> {
|
|
1581
1614
|
const headers = this.prepareHeaders();
|
|
1582
|
-
let jsonData: any = { query, ...params };
|
|
1615
|
+
let jsonData: any = { query, ...params, origin: `js-sdk@${this.version}` };
|
|
1583
1616
|
|
|
1584
1617
|
if (jsonData?.jsonOptions?.schema) {
|
|
1585
1618
|
let schema = jsonData.jsonOptions.schema;
|
|
@@ -1587,7 +1620,7 @@ export default class FirecrawlApp {
|
|
|
1587
1620
|
try {
|
|
1588
1621
|
schema = zodToJsonSchema(schema);
|
|
1589
1622
|
} catch (error) {
|
|
1590
|
-
|
|
1623
|
+
// Ignore error if schema can't be parsed as Zod
|
|
1591
1624
|
}
|
|
1592
1625
|
jsonData = {
|
|
1593
1626
|
...jsonData,
|
|
@@ -1733,9 +1766,10 @@ export default class FirecrawlApp {
|
|
|
1733
1766
|
async __asyncDeepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
|
|
1734
1767
|
const headers = this.prepareHeaders();
|
|
1735
1768
|
try {
|
|
1769
|
+
let jsonData: any = { topic, ...params, origin: `js-sdk@${this.version}` };
|
|
1736
1770
|
const response: AxiosResponse = await this.postRequest(
|
|
1737
1771
|
`${this.apiUrl}/v1/deep-research`,
|
|
1738
|
-
|
|
1772
|
+
jsonData,
|
|
1739
1773
|
headers
|
|
1740
1774
|
);
|
|
1741
1775
|
|
|
@@ -1845,10 +1879,11 @@ export default class FirecrawlApp {
|
|
|
1845
1879
|
*/
|
|
1846
1880
|
async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
|
|
1847
1881
|
const headers = this.prepareHeaders();
|
|
1882
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
1848
1883
|
try {
|
|
1849
1884
|
const response: AxiosResponse = await this.postRequest(
|
|
1850
1885
|
`${this.apiUrl}/v1/llmstxt`,
|
|
1851
|
-
|
|
1886
|
+
jsonData,
|
|
1852
1887
|
headers
|
|
1853
1888
|
);
|
|
1854
1889
|
|
package/tsconfig.json
CHANGED
|
File without changes
|
package/tsup.config.ts
CHANGED
|
File without changes
|