firecrawl 1.24.0 → 1.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +0 -0
- package/LICENSE +0 -0
- package/README.md +0 -0
- package/dist/index.cjs +101 -12
- package/dist/index.d.cts +13 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.js +28 -12
- package/dist/package-5F6D6QMY.js +102 -0
- package/dump.rdb +0 -0
- package/jest.config.js +0 -0
- package/package.json +1 -1
- package/src/__tests__/e2e_withAuth/index.test.ts +0 -0
- package/src/__tests__/fixtures/scrape.json +0 -0
- package/src/__tests__/index.test.ts +0 -0
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +0 -0
- package/src/index.ts +42 -14
- package/tsconfig.json +0 -0
- package/tsup.config.ts +0 -0
package/.env.example
CHANGED
|
File without changes
|
package/LICENSE
CHANGED
|
File without changes
|
package/README.md
CHANGED
|
File without changes
|
package/dist/index.cjs
CHANGED
|
@@ -5,6 +5,9 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
|
5
5
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
6
|
var __getProtoOf = Object.getPrototypeOf;
|
|
7
7
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __commonJS = (cb, mod) => function __require() {
|
|
9
|
+
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
10
|
+
};
|
|
8
11
|
var __export = (target, all) => {
|
|
9
12
|
for (var name in all)
|
|
10
13
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
@@ -27,6 +30,76 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
27
30
|
));
|
|
28
31
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
32
|
|
|
33
|
+
// package.json
|
|
34
|
+
var require_package = __commonJS({
|
|
35
|
+
"package.json"(exports2, module2) {
|
|
36
|
+
module2.exports = {
|
|
37
|
+
name: "firecrawl",
|
|
38
|
+
version: "1.25.0",
|
|
39
|
+
description: "JavaScript SDK for Firecrawl API",
|
|
40
|
+
main: "dist/index.js",
|
|
41
|
+
types: "dist/index.d.ts",
|
|
42
|
+
exports: {
|
|
43
|
+
"./package.json": "./package.json",
|
|
44
|
+
".": {
|
|
45
|
+
import: "./dist/index.js",
|
|
46
|
+
default: "./dist/index.cjs"
|
|
47
|
+
}
|
|
48
|
+
},
|
|
49
|
+
type: "module",
|
|
50
|
+
scripts: {
|
|
51
|
+
build: "tsup",
|
|
52
|
+
"build-and-publish": "npm run build && npm publish --access public",
|
|
53
|
+
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
54
|
+
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
55
|
+
},
|
|
56
|
+
repository: {
|
|
57
|
+
type: "git",
|
|
58
|
+
url: "git+https://github.com/mendableai/firecrawl.git"
|
|
59
|
+
},
|
|
60
|
+
author: "Mendable.ai",
|
|
61
|
+
license: "MIT",
|
|
62
|
+
dependencies: {
|
|
63
|
+
"typescript-event-target": "^1.1.1",
|
|
64
|
+
zod: "^3.23.8",
|
|
65
|
+
"zod-to-json-schema": "^3.23.0",
|
|
66
|
+
axios: "^1.6.8"
|
|
67
|
+
},
|
|
68
|
+
bugs: {
|
|
69
|
+
url: "https://github.com/mendableai/firecrawl/issues"
|
|
70
|
+
},
|
|
71
|
+
homepage: "https://github.com/mendableai/firecrawl#readme",
|
|
72
|
+
devDependencies: {
|
|
73
|
+
"@jest/globals": "^29.7.0",
|
|
74
|
+
"@types/axios": "^0.14.0",
|
|
75
|
+
"@types/dotenv": "^8.2.0",
|
|
76
|
+
"@types/jest": "^29.5.14",
|
|
77
|
+
"@types/mocha": "^10.0.6",
|
|
78
|
+
"@types/node": "^20.12.12",
|
|
79
|
+
"@types/uuid": "^9.0.8",
|
|
80
|
+
dotenv: "^16.4.5",
|
|
81
|
+
jest: "^29.7.0",
|
|
82
|
+
"ts-jest": "^29.2.2",
|
|
83
|
+
tsup: "^8.2.4",
|
|
84
|
+
typescript: "^5.4.5",
|
|
85
|
+
uuid: "^9.0.1"
|
|
86
|
+
},
|
|
87
|
+
keywords: [
|
|
88
|
+
"firecrawl",
|
|
89
|
+
"mendable",
|
|
90
|
+
"crawler",
|
|
91
|
+
"web",
|
|
92
|
+
"scraper",
|
|
93
|
+
"api",
|
|
94
|
+
"sdk"
|
|
95
|
+
],
|
|
96
|
+
engines: {
|
|
97
|
+
node: ">=22.0.0"
|
|
98
|
+
}
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
30
103
|
// src/index.ts
|
|
31
104
|
var src_exports = {};
|
|
32
105
|
__export(src_exports, {
|
|
@@ -59,9 +132,22 @@ var FirecrawlError = class extends Error {
|
|
|
59
132
|
var FirecrawlApp = class {
|
|
60
133
|
apiKey;
|
|
61
134
|
apiUrl;
|
|
135
|
+
version = "1.19.1";
|
|
62
136
|
isCloudService(url) {
|
|
63
137
|
return url.includes("api.firecrawl.dev");
|
|
64
138
|
}
|
|
139
|
+
async getVersion() {
|
|
140
|
+
try {
|
|
141
|
+
const packageJson = await Promise.resolve().then(() => __toESM(require_package(), 1));
|
|
142
|
+
return packageJson.default.version;
|
|
143
|
+
} catch (error) {
|
|
144
|
+
console.error("Error getting version:", error);
|
|
145
|
+
return "1.19.1";
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
async init() {
|
|
149
|
+
this.version = await this.getVersion();
|
|
150
|
+
}
|
|
65
151
|
/**
|
|
66
152
|
* Initializes a new instance of the FirecrawlApp class.
|
|
67
153
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -73,6 +159,7 @@ var FirecrawlApp = class {
|
|
|
73
159
|
}
|
|
74
160
|
this.apiKey = apiKey || "";
|
|
75
161
|
this.apiUrl = baseUrl;
|
|
162
|
+
this.init();
|
|
76
163
|
}
|
|
77
164
|
/**
|
|
78
165
|
* Scrapes a URL using the Firecrawl API.
|
|
@@ -85,7 +172,7 @@ var FirecrawlApp = class {
|
|
|
85
172
|
"Content-Type": "application/json",
|
|
86
173
|
Authorization: `Bearer ${this.apiKey}`
|
|
87
174
|
};
|
|
88
|
-
let jsonData = { url, ...params };
|
|
175
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
89
176
|
if (jsonData?.extract?.schema) {
|
|
90
177
|
let schema = jsonData.extract.schema;
|
|
91
178
|
try {
|
|
@@ -159,7 +246,7 @@ var FirecrawlApp = class {
|
|
|
159
246
|
lang: params?.lang ?? "en",
|
|
160
247
|
country: params?.country ?? "us",
|
|
161
248
|
location: params?.location,
|
|
162
|
-
origin:
|
|
249
|
+
origin: `js-sdk@${this.version}`,
|
|
163
250
|
timeout: params?.timeout ?? 6e4,
|
|
164
251
|
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
|
|
165
252
|
};
|
|
@@ -219,7 +306,7 @@ var FirecrawlApp = class {
|
|
|
219
306
|
*/
|
|
220
307
|
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
|
221
308
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
222
|
-
let jsonData = { url, ...params };
|
|
309
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
223
310
|
try {
|
|
224
311
|
const response = await this.postRequest(
|
|
225
312
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -243,7 +330,7 @@ var FirecrawlApp = class {
|
|
|
243
330
|
}
|
|
244
331
|
async asyncCrawlUrl(url, params, idempotencyKey) {
|
|
245
332
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
246
|
-
let jsonData = { url, ...params };
|
|
333
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
247
334
|
try {
|
|
248
335
|
const response = await this.postRequest(
|
|
249
336
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -402,7 +489,7 @@ var FirecrawlApp = class {
|
|
|
402
489
|
*/
|
|
403
490
|
async mapUrl(url, params) {
|
|
404
491
|
const headers = this.prepareHeaders();
|
|
405
|
-
let jsonData = { url, ...params };
|
|
492
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
406
493
|
try {
|
|
407
494
|
const response = await this.postRequest(
|
|
408
495
|
this.apiUrl + `/v1/map`,
|
|
@@ -431,7 +518,7 @@ var FirecrawlApp = class {
|
|
|
431
518
|
*/
|
|
432
519
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
433
520
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
434
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
521
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
435
522
|
if (jsonData?.extract?.schema) {
|
|
436
523
|
let schema = jsonData.extract.schema;
|
|
437
524
|
try {
|
|
@@ -483,7 +570,7 @@ var FirecrawlApp = class {
|
|
|
483
570
|
}
|
|
484
571
|
async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
485
572
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
486
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params
|
|
573
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
487
574
|
try {
|
|
488
575
|
const response = await this.postRequest(
|
|
489
576
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -637,7 +724,7 @@ var FirecrawlApp = class {
|
|
|
637
724
|
try {
|
|
638
725
|
const response = await this.postRequest(
|
|
639
726
|
this.apiUrl + `/v1/extract`,
|
|
640
|
-
{ ...jsonData, schema: jsonSchema, origin:
|
|
727
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
641
728
|
headers
|
|
642
729
|
);
|
|
643
730
|
if (response.status === 200) {
|
|
@@ -697,7 +784,7 @@ var FirecrawlApp = class {
|
|
|
697
784
|
try {
|
|
698
785
|
const response = await this.postRequest(
|
|
699
786
|
this.apiUrl + `/v1/extract`,
|
|
700
|
-
{ ...jsonData, schema: jsonSchema },
|
|
787
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
701
788
|
headers
|
|
702
789
|
);
|
|
703
790
|
if (response.status === 200) {
|
|
@@ -930,7 +1017,7 @@ var FirecrawlApp = class {
|
|
|
930
1017
|
*/
|
|
931
1018
|
async asyncDeepResearch(query, params) {
|
|
932
1019
|
const headers = this.prepareHeaders();
|
|
933
|
-
let jsonData = { query, ...params };
|
|
1020
|
+
let jsonData = { query, ...params, origin: `js-sdk@${this.version}` };
|
|
934
1021
|
if (jsonData?.jsonOptions?.schema) {
|
|
935
1022
|
let schema = jsonData.jsonOptions.schema;
|
|
936
1023
|
try {
|
|
@@ -1053,9 +1140,10 @@ var FirecrawlApp = class {
|
|
|
1053
1140
|
async __asyncDeepResearch(topic, params) {
|
|
1054
1141
|
const headers = this.prepareHeaders();
|
|
1055
1142
|
try {
|
|
1143
|
+
let jsonData = { topic, ...params, origin: `js-sdk@${this.version}` };
|
|
1056
1144
|
const response = await this.postRequest(
|
|
1057
1145
|
`${this.apiUrl}/v1/deep-research`,
|
|
1058
|
-
|
|
1146
|
+
jsonData,
|
|
1059
1147
|
headers
|
|
1060
1148
|
);
|
|
1061
1149
|
if (response.status === 200) {
|
|
@@ -1150,10 +1238,11 @@ var FirecrawlApp = class {
|
|
|
1150
1238
|
*/
|
|
1151
1239
|
async asyncGenerateLLMsText(url, params) {
|
|
1152
1240
|
const headers = this.prepareHeaders();
|
|
1241
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
1153
1242
|
try {
|
|
1154
1243
|
const response = await this.postRequest(
|
|
1155
1244
|
`${this.apiUrl}/v1/llmstxt`,
|
|
1156
|
-
|
|
1245
|
+
jsonData,
|
|
1157
1246
|
headers
|
|
1158
1247
|
);
|
|
1159
1248
|
if (response.status === 200) {
|
package/dist/index.d.cts
CHANGED
|
@@ -205,6 +205,11 @@ interface CrawlParams {
|
|
|
205
205
|
deduplicateSimilarURLs?: boolean;
|
|
206
206
|
ignoreQueryParameters?: boolean;
|
|
207
207
|
regexOnFullURL?: boolean;
|
|
208
|
+
/**
|
|
209
|
+
* Delay in seconds between scrapes. This helps respect website rate limits.
|
|
210
|
+
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
211
|
+
*/
|
|
212
|
+
delay?: number;
|
|
208
213
|
}
|
|
209
214
|
/**
|
|
210
215
|
* Response interface for crawling operations.
|
|
@@ -477,6 +482,11 @@ interface GenerateLLMsTextParams {
|
|
|
477
482
|
* @default false
|
|
478
483
|
*/
|
|
479
484
|
showFullText?: boolean;
|
|
485
|
+
/**
|
|
486
|
+
* Whether to use cached content if available
|
|
487
|
+
* @default true
|
|
488
|
+
*/
|
|
489
|
+
cache?: boolean;
|
|
480
490
|
/**
|
|
481
491
|
* Experimental flag for streaming
|
|
482
492
|
*/
|
|
@@ -509,7 +519,10 @@ interface GenerateLLMsTextStatusResponse {
|
|
|
509
519
|
declare class FirecrawlApp {
|
|
510
520
|
apiKey: string;
|
|
511
521
|
apiUrl: string;
|
|
522
|
+
version: string;
|
|
512
523
|
private isCloudService;
|
|
524
|
+
private getVersion;
|
|
525
|
+
private init;
|
|
513
526
|
/**
|
|
514
527
|
* Initializes a new instance of the FirecrawlApp class.
|
|
515
528
|
* @param config - Configuration options for the FirecrawlApp instance.
|
package/dist/index.d.ts
CHANGED
|
@@ -205,6 +205,11 @@ interface CrawlParams {
|
|
|
205
205
|
deduplicateSimilarURLs?: boolean;
|
|
206
206
|
ignoreQueryParameters?: boolean;
|
|
207
207
|
regexOnFullURL?: boolean;
|
|
208
|
+
/**
|
|
209
|
+
* Delay in seconds between scrapes. This helps respect website rate limits.
|
|
210
|
+
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
211
|
+
*/
|
|
212
|
+
delay?: number;
|
|
208
213
|
}
|
|
209
214
|
/**
|
|
210
215
|
* Response interface for crawling operations.
|
|
@@ -477,6 +482,11 @@ interface GenerateLLMsTextParams {
|
|
|
477
482
|
* @default false
|
|
478
483
|
*/
|
|
479
484
|
showFullText?: boolean;
|
|
485
|
+
/**
|
|
486
|
+
* Whether to use cached content if available
|
|
487
|
+
* @default true
|
|
488
|
+
*/
|
|
489
|
+
cache?: boolean;
|
|
480
490
|
/**
|
|
481
491
|
* Experimental flag for streaming
|
|
482
492
|
*/
|
|
@@ -509,7 +519,10 @@ interface GenerateLLMsTextStatusResponse {
|
|
|
509
519
|
declare class FirecrawlApp {
|
|
510
520
|
apiKey: string;
|
|
511
521
|
apiUrl: string;
|
|
522
|
+
version: string;
|
|
512
523
|
private isCloudService;
|
|
524
|
+
private getVersion;
|
|
525
|
+
private init;
|
|
513
526
|
/**
|
|
514
527
|
* Initializes a new instance of the FirecrawlApp class.
|
|
515
528
|
* @param config - Configuration options for the FirecrawlApp instance.
|
package/dist/index.js
CHANGED
|
@@ -23,9 +23,22 @@ var FirecrawlError = class extends Error {
|
|
|
23
23
|
var FirecrawlApp = class {
|
|
24
24
|
apiKey;
|
|
25
25
|
apiUrl;
|
|
26
|
+
version = "1.19.1";
|
|
26
27
|
isCloudService(url) {
|
|
27
28
|
return url.includes("api.firecrawl.dev");
|
|
28
29
|
}
|
|
30
|
+
async getVersion() {
|
|
31
|
+
try {
|
|
32
|
+
const packageJson = await import("./package-5F6D6QMY.js");
|
|
33
|
+
return packageJson.default.version;
|
|
34
|
+
} catch (error) {
|
|
35
|
+
console.error("Error getting version:", error);
|
|
36
|
+
return "1.19.1";
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
async init() {
|
|
40
|
+
this.version = await this.getVersion();
|
|
41
|
+
}
|
|
29
42
|
/**
|
|
30
43
|
* Initializes a new instance of the FirecrawlApp class.
|
|
31
44
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -37,6 +50,7 @@ var FirecrawlApp = class {
|
|
|
37
50
|
}
|
|
38
51
|
this.apiKey = apiKey || "";
|
|
39
52
|
this.apiUrl = baseUrl;
|
|
53
|
+
this.init();
|
|
40
54
|
}
|
|
41
55
|
/**
|
|
42
56
|
* Scrapes a URL using the Firecrawl API.
|
|
@@ -49,7 +63,7 @@ var FirecrawlApp = class {
|
|
|
49
63
|
"Content-Type": "application/json",
|
|
50
64
|
Authorization: `Bearer ${this.apiKey}`
|
|
51
65
|
};
|
|
52
|
-
let jsonData = { url, ...params };
|
|
66
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
53
67
|
if (jsonData?.extract?.schema) {
|
|
54
68
|
let schema = jsonData.extract.schema;
|
|
55
69
|
try {
|
|
@@ -123,7 +137,7 @@ var FirecrawlApp = class {
|
|
|
123
137
|
lang: params?.lang ?? "en",
|
|
124
138
|
country: params?.country ?? "us",
|
|
125
139
|
location: params?.location,
|
|
126
|
-
origin:
|
|
140
|
+
origin: `js-sdk@${this.version}`,
|
|
127
141
|
timeout: params?.timeout ?? 6e4,
|
|
128
142
|
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
|
|
129
143
|
};
|
|
@@ -183,7 +197,7 @@ var FirecrawlApp = class {
|
|
|
183
197
|
*/
|
|
184
198
|
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
|
185
199
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
186
|
-
let jsonData = { url, ...params };
|
|
200
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
187
201
|
try {
|
|
188
202
|
const response = await this.postRequest(
|
|
189
203
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -207,7 +221,7 @@ var FirecrawlApp = class {
|
|
|
207
221
|
}
|
|
208
222
|
async asyncCrawlUrl(url, params, idempotencyKey) {
|
|
209
223
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
210
|
-
let jsonData = { url, ...params };
|
|
224
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
211
225
|
try {
|
|
212
226
|
const response = await this.postRequest(
|
|
213
227
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -366,7 +380,7 @@ var FirecrawlApp = class {
|
|
|
366
380
|
*/
|
|
367
381
|
async mapUrl(url, params) {
|
|
368
382
|
const headers = this.prepareHeaders();
|
|
369
|
-
let jsonData = { url, ...params };
|
|
383
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
370
384
|
try {
|
|
371
385
|
const response = await this.postRequest(
|
|
372
386
|
this.apiUrl + `/v1/map`,
|
|
@@ -395,7 +409,7 @@ var FirecrawlApp = class {
|
|
|
395
409
|
*/
|
|
396
410
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
397
411
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
398
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
412
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
399
413
|
if (jsonData?.extract?.schema) {
|
|
400
414
|
let schema = jsonData.extract.schema;
|
|
401
415
|
try {
|
|
@@ -447,7 +461,7 @@ var FirecrawlApp = class {
|
|
|
447
461
|
}
|
|
448
462
|
async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
449
463
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
450
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params
|
|
464
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
451
465
|
try {
|
|
452
466
|
const response = await this.postRequest(
|
|
453
467
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -601,7 +615,7 @@ var FirecrawlApp = class {
|
|
|
601
615
|
try {
|
|
602
616
|
const response = await this.postRequest(
|
|
603
617
|
this.apiUrl + `/v1/extract`,
|
|
604
|
-
{ ...jsonData, schema: jsonSchema, origin:
|
|
618
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
605
619
|
headers
|
|
606
620
|
);
|
|
607
621
|
if (response.status === 200) {
|
|
@@ -661,7 +675,7 @@ var FirecrawlApp = class {
|
|
|
661
675
|
try {
|
|
662
676
|
const response = await this.postRequest(
|
|
663
677
|
this.apiUrl + `/v1/extract`,
|
|
664
|
-
{ ...jsonData, schema: jsonSchema },
|
|
678
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
665
679
|
headers
|
|
666
680
|
);
|
|
667
681
|
if (response.status === 200) {
|
|
@@ -894,7 +908,7 @@ var FirecrawlApp = class {
|
|
|
894
908
|
*/
|
|
895
909
|
async asyncDeepResearch(query, params) {
|
|
896
910
|
const headers = this.prepareHeaders();
|
|
897
|
-
let jsonData = { query, ...params };
|
|
911
|
+
let jsonData = { query, ...params, origin: `js-sdk@${this.version}` };
|
|
898
912
|
if (jsonData?.jsonOptions?.schema) {
|
|
899
913
|
let schema = jsonData.jsonOptions.schema;
|
|
900
914
|
try {
|
|
@@ -1017,9 +1031,10 @@ var FirecrawlApp = class {
|
|
|
1017
1031
|
async __asyncDeepResearch(topic, params) {
|
|
1018
1032
|
const headers = this.prepareHeaders();
|
|
1019
1033
|
try {
|
|
1034
|
+
let jsonData = { topic, ...params, origin: `js-sdk@${this.version}` };
|
|
1020
1035
|
const response = await this.postRequest(
|
|
1021
1036
|
`${this.apiUrl}/v1/deep-research`,
|
|
1022
|
-
|
|
1037
|
+
jsonData,
|
|
1023
1038
|
headers
|
|
1024
1039
|
);
|
|
1025
1040
|
if (response.status === 200) {
|
|
@@ -1114,10 +1129,11 @@ var FirecrawlApp = class {
|
|
|
1114
1129
|
*/
|
|
1115
1130
|
async asyncGenerateLLMsText(url, params) {
|
|
1116
1131
|
const headers = this.prepareHeaders();
|
|
1132
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
1117
1133
|
try {
|
|
1118
1134
|
const response = await this.postRequest(
|
|
1119
1135
|
`${this.apiUrl}/v1/llmstxt`,
|
|
1120
|
-
|
|
1136
|
+
jsonData,
|
|
1121
1137
|
headers
|
|
1122
1138
|
);
|
|
1123
1139
|
if (response.status === 200) {
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
// package.json
|
|
2
|
+
var name = "firecrawl";
|
|
3
|
+
var version = "1.25.0";
|
|
4
|
+
var description = "JavaScript SDK for Firecrawl API";
|
|
5
|
+
var main = "dist/index.js";
|
|
6
|
+
var types = "dist/index.d.ts";
|
|
7
|
+
var exports = {
|
|
8
|
+
"./package.json": "./package.json",
|
|
9
|
+
".": {
|
|
10
|
+
import: "./dist/index.js",
|
|
11
|
+
default: "./dist/index.cjs"
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
var type = "module";
|
|
15
|
+
var scripts = {
|
|
16
|
+
build: "tsup",
|
|
17
|
+
"build-and-publish": "npm run build && npm publish --access public",
|
|
18
|
+
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
19
|
+
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
20
|
+
};
|
|
21
|
+
var repository = {
|
|
22
|
+
type: "git",
|
|
23
|
+
url: "git+https://github.com/mendableai/firecrawl.git"
|
|
24
|
+
};
|
|
25
|
+
var author = "Mendable.ai";
|
|
26
|
+
var license = "MIT";
|
|
27
|
+
var dependencies = {
|
|
28
|
+
"typescript-event-target": "^1.1.1",
|
|
29
|
+
zod: "^3.23.8",
|
|
30
|
+
"zod-to-json-schema": "^3.23.0",
|
|
31
|
+
axios: "^1.6.8"
|
|
32
|
+
};
|
|
33
|
+
var bugs = {
|
|
34
|
+
url: "https://github.com/mendableai/firecrawl/issues"
|
|
35
|
+
};
|
|
36
|
+
var homepage = "https://github.com/mendableai/firecrawl#readme";
|
|
37
|
+
var devDependencies = {
|
|
38
|
+
"@jest/globals": "^29.7.0",
|
|
39
|
+
"@types/axios": "^0.14.0",
|
|
40
|
+
"@types/dotenv": "^8.2.0",
|
|
41
|
+
"@types/jest": "^29.5.14",
|
|
42
|
+
"@types/mocha": "^10.0.6",
|
|
43
|
+
"@types/node": "^20.12.12",
|
|
44
|
+
"@types/uuid": "^9.0.8",
|
|
45
|
+
dotenv: "^16.4.5",
|
|
46
|
+
jest: "^29.7.0",
|
|
47
|
+
"ts-jest": "^29.2.2",
|
|
48
|
+
tsup: "^8.2.4",
|
|
49
|
+
typescript: "^5.4.5",
|
|
50
|
+
uuid: "^9.0.1"
|
|
51
|
+
};
|
|
52
|
+
var keywords = [
|
|
53
|
+
"firecrawl",
|
|
54
|
+
"mendable",
|
|
55
|
+
"crawler",
|
|
56
|
+
"web",
|
|
57
|
+
"scraper",
|
|
58
|
+
"api",
|
|
59
|
+
"sdk"
|
|
60
|
+
];
|
|
61
|
+
var engines = {
|
|
62
|
+
node: ">=22.0.0"
|
|
63
|
+
};
|
|
64
|
+
var package_default = {
|
|
65
|
+
name,
|
|
66
|
+
version,
|
|
67
|
+
description,
|
|
68
|
+
main,
|
|
69
|
+
types,
|
|
70
|
+
exports,
|
|
71
|
+
type,
|
|
72
|
+
scripts,
|
|
73
|
+
repository,
|
|
74
|
+
author,
|
|
75
|
+
license,
|
|
76
|
+
dependencies,
|
|
77
|
+
bugs,
|
|
78
|
+
homepage,
|
|
79
|
+
devDependencies,
|
|
80
|
+
keywords,
|
|
81
|
+
engines
|
|
82
|
+
};
|
|
83
|
+
export {
|
|
84
|
+
author,
|
|
85
|
+
bugs,
|
|
86
|
+
package_default as default,
|
|
87
|
+
dependencies,
|
|
88
|
+
description,
|
|
89
|
+
devDependencies,
|
|
90
|
+
engines,
|
|
91
|
+
exports,
|
|
92
|
+
homepage,
|
|
93
|
+
keywords,
|
|
94
|
+
license,
|
|
95
|
+
main,
|
|
96
|
+
name,
|
|
97
|
+
repository,
|
|
98
|
+
scripts,
|
|
99
|
+
type,
|
|
100
|
+
types,
|
|
101
|
+
version
|
|
102
|
+
};
|
package/dump.rdb
ADDED
|
Binary file
|
package/jest.config.js
CHANGED
|
File without changes
|
package/package.json
CHANGED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
package/src/index.ts
CHANGED
|
@@ -215,6 +215,11 @@ export interface CrawlParams {
|
|
|
215
215
|
deduplicateSimilarURLs?: boolean;
|
|
216
216
|
ignoreQueryParameters?: boolean;
|
|
217
217
|
regexOnFullURL?: boolean;
|
|
218
|
+
/**
|
|
219
|
+
* Delay in seconds between scrapes. This helps respect website rate limits.
|
|
220
|
+
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
221
|
+
*/
|
|
222
|
+
delay?: number;
|
|
218
223
|
}
|
|
219
224
|
|
|
220
225
|
/**
|
|
@@ -515,6 +520,11 @@ export interface GenerateLLMsTextParams {
|
|
|
515
520
|
* @default false
|
|
516
521
|
*/
|
|
517
522
|
showFullText?: boolean;
|
|
523
|
+
/**
|
|
524
|
+
* Whether to use cached content if available
|
|
525
|
+
* @default true
|
|
526
|
+
*/
|
|
527
|
+
cache?: boolean;
|
|
518
528
|
/**
|
|
519
529
|
* Experimental flag for streaming
|
|
520
530
|
*/
|
|
@@ -550,11 +560,26 @@ export interface GenerateLLMsTextStatusResponse {
|
|
|
550
560
|
export default class FirecrawlApp {
|
|
551
561
|
public apiKey: string;
|
|
552
562
|
public apiUrl: string;
|
|
553
|
-
|
|
563
|
+
public version: string = "1.19.1";
|
|
564
|
+
|
|
554
565
|
private isCloudService(url: string): boolean {
|
|
555
566
|
return url.includes('api.firecrawl.dev');
|
|
556
567
|
}
|
|
557
568
|
|
|
569
|
+
private async getVersion(): Promise<string> {
|
|
570
|
+
try {
|
|
571
|
+
const packageJson = await import('../package.json', { assert: { type: 'json' } });
|
|
572
|
+
return packageJson.default.version;
|
|
573
|
+
} catch (error) {
|
|
574
|
+
console.error("Error getting version:", error);
|
|
575
|
+
return "1.19.1";
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
private async init() {
|
|
580
|
+
this.version = await this.getVersion();
|
|
581
|
+
}
|
|
582
|
+
|
|
558
583
|
/**
|
|
559
584
|
* Initializes a new instance of the FirecrawlApp class.
|
|
560
585
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -568,6 +593,7 @@ export default class FirecrawlApp {
|
|
|
568
593
|
|
|
569
594
|
this.apiKey = apiKey || '';
|
|
570
595
|
this.apiUrl = baseUrl;
|
|
596
|
+
this.init();
|
|
571
597
|
}
|
|
572
598
|
|
|
573
599
|
/**
|
|
@@ -584,7 +610,7 @@ export default class FirecrawlApp {
|
|
|
584
610
|
"Content-Type": "application/json",
|
|
585
611
|
Authorization: `Bearer ${this.apiKey}`,
|
|
586
612
|
} as AxiosRequestHeaders;
|
|
587
|
-
let jsonData: any = { url, ...params };
|
|
613
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
588
614
|
if (jsonData?.extract?.schema) {
|
|
589
615
|
let schema = jsonData.extract.schema;
|
|
590
616
|
|
|
@@ -666,7 +692,7 @@ export default class FirecrawlApp {
|
|
|
666
692
|
lang: params?.lang ?? "en",
|
|
667
693
|
country: params?.country ?? "us",
|
|
668
694
|
location: params?.location,
|
|
669
|
-
origin:
|
|
695
|
+
origin: `js-sdk@${this.version}`,
|
|
670
696
|
timeout: params?.timeout ?? 60000,
|
|
671
697
|
scrapeOptions: params?.scrapeOptions ?? { formats: [] },
|
|
672
698
|
};
|
|
@@ -738,7 +764,7 @@ export default class FirecrawlApp {
|
|
|
738
764
|
idempotencyKey?: string
|
|
739
765
|
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
740
766
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
741
|
-
let jsonData: any = { url, ...params };
|
|
767
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
742
768
|
try {
|
|
743
769
|
const response: AxiosResponse = await this.postRequest(
|
|
744
770
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -767,7 +793,7 @@ export default class FirecrawlApp {
|
|
|
767
793
|
idempotencyKey?: string
|
|
768
794
|
): Promise<CrawlResponse | ErrorResponse> {
|
|
769
795
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
770
|
-
let jsonData: any = { url, ...params };
|
|
796
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
771
797
|
try {
|
|
772
798
|
const response: AxiosResponse = await this.postRequest(
|
|
773
799
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -943,7 +969,7 @@ export default class FirecrawlApp {
|
|
|
943
969
|
*/
|
|
944
970
|
async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
|
|
945
971
|
const headers = this.prepareHeaders();
|
|
946
|
-
let jsonData:
|
|
972
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
947
973
|
|
|
948
974
|
try {
|
|
949
975
|
const response: AxiosResponse = await this.postRequest(
|
|
@@ -981,7 +1007,7 @@ export default class FirecrawlApp {
|
|
|
981
1007
|
ignoreInvalidURLs?: boolean,
|
|
982
1008
|
): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
|
983
1009
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
984
|
-
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
1010
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
985
1011
|
if (jsonData?.extract?.schema) {
|
|
986
1012
|
let schema = jsonData.extract.schema;
|
|
987
1013
|
|
|
@@ -1046,7 +1072,7 @@ export default class FirecrawlApp {
|
|
|
1046
1072
|
ignoreInvalidURLs?: boolean,
|
|
1047
1073
|
): Promise<BatchScrapeResponse | ErrorResponse> {
|
|
1048
1074
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
1049
|
-
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...
|
|
1075
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
1050
1076
|
try {
|
|
1051
1077
|
const response: AxiosResponse = await this.postRequest(
|
|
1052
1078
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -1220,7 +1246,7 @@ export default class FirecrawlApp {
|
|
|
1220
1246
|
try {
|
|
1221
1247
|
const response: AxiosResponse = await this.postRequest(
|
|
1222
1248
|
this.apiUrl + `/v1/extract`,
|
|
1223
|
-
{ ...jsonData, schema: jsonSchema, origin:
|
|
1249
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
1224
1250
|
headers
|
|
1225
1251
|
);
|
|
1226
1252
|
|
|
@@ -1288,7 +1314,7 @@ export default class FirecrawlApp {
|
|
|
1288
1314
|
try {
|
|
1289
1315
|
const response: AxiosResponse = await this.postRequest(
|
|
1290
1316
|
this.apiUrl + `/v1/extract`,
|
|
1291
|
-
{ ...jsonData, schema: jsonSchema },
|
|
1317
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
1292
1318
|
headers
|
|
1293
1319
|
);
|
|
1294
1320
|
|
|
@@ -1579,7 +1605,7 @@ export default class FirecrawlApp {
|
|
|
1579
1605
|
*/
|
|
1580
1606
|
async asyncDeepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>): Promise<DeepResearchResponse | ErrorResponse> {
|
|
1581
1607
|
const headers = this.prepareHeaders();
|
|
1582
|
-
let jsonData: any = { query, ...params };
|
|
1608
|
+
let jsonData: any = { query, ...params, origin: `js-sdk@${this.version}` };
|
|
1583
1609
|
|
|
1584
1610
|
if (jsonData?.jsonOptions?.schema) {
|
|
1585
1611
|
let schema = jsonData.jsonOptions.schema;
|
|
@@ -1587,7 +1613,7 @@ export default class FirecrawlApp {
|
|
|
1587
1613
|
try {
|
|
1588
1614
|
schema = zodToJsonSchema(schema);
|
|
1589
1615
|
} catch (error) {
|
|
1590
|
-
|
|
1616
|
+
// Ignore error if schema can't be parsed as Zod
|
|
1591
1617
|
}
|
|
1592
1618
|
jsonData = {
|
|
1593
1619
|
...jsonData,
|
|
@@ -1733,9 +1759,10 @@ export default class FirecrawlApp {
|
|
|
1733
1759
|
async __asyncDeepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
|
|
1734
1760
|
const headers = this.prepareHeaders();
|
|
1735
1761
|
try {
|
|
1762
|
+
let jsonData: any = { topic, ...params, origin: `js-sdk@${this.version}` };
|
|
1736
1763
|
const response: AxiosResponse = await this.postRequest(
|
|
1737
1764
|
`${this.apiUrl}/v1/deep-research`,
|
|
1738
|
-
|
|
1765
|
+
jsonData,
|
|
1739
1766
|
headers
|
|
1740
1767
|
);
|
|
1741
1768
|
|
|
@@ -1845,10 +1872,11 @@ export default class FirecrawlApp {
|
|
|
1845
1872
|
*/
|
|
1846
1873
|
async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
|
|
1847
1874
|
const headers = this.prepareHeaders();
|
|
1875
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
1848
1876
|
try {
|
|
1849
1877
|
const response: AxiosResponse = await this.postRequest(
|
|
1850
1878
|
`${this.apiUrl}/v1/llmstxt`,
|
|
1851
|
-
|
|
1879
|
+
jsonData,
|
|
1852
1880
|
headers
|
|
1853
1881
|
);
|
|
1854
1882
|
|
package/tsconfig.json
CHANGED
|
File without changes
|
package/tsup.config.ts
CHANGED
|
File without changes
|