@mendable/firecrawl 1.23.9 → 1.25.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +107 -12
- package/dist/index.d.cts +31 -2
- package/dist/index.d.ts +31 -2
- package/dist/index.js +34 -12
- package/dist/package-Y5V6L2WQ.js +102 -0
- package/package.json +1 -1
- package/src/index.ts +68 -15
package/dist/index.cjs
CHANGED
|
@@ -5,6 +5,9 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
|
5
5
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
6
|
var __getProtoOf = Object.getPrototypeOf;
|
|
7
7
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __commonJS = (cb, mod) => function __require() {
|
|
9
|
+
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
10
|
+
};
|
|
8
11
|
var __export = (target, all) => {
|
|
9
12
|
for (var name in all)
|
|
10
13
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
@@ -27,6 +30,76 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
27
30
|
));
|
|
28
31
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
32
|
|
|
33
|
+
// package.json
|
|
34
|
+
var require_package = __commonJS({
|
|
35
|
+
"package.json"(exports2, module2) {
|
|
36
|
+
module2.exports = {
|
|
37
|
+
name: "@mendable/firecrawl-js",
|
|
38
|
+
version: "1.25.3",
|
|
39
|
+
description: "JavaScript SDK for Firecrawl API",
|
|
40
|
+
main: "dist/index.js",
|
|
41
|
+
types: "dist/index.d.ts",
|
|
42
|
+
exports: {
|
|
43
|
+
"./package.json": "./package.json",
|
|
44
|
+
".": {
|
|
45
|
+
import: "./dist/index.js",
|
|
46
|
+
default: "./dist/index.cjs"
|
|
47
|
+
}
|
|
48
|
+
},
|
|
49
|
+
type: "module",
|
|
50
|
+
scripts: {
|
|
51
|
+
build: "tsup",
|
|
52
|
+
"build-and-publish": "npm run build && npm publish --access public",
|
|
53
|
+
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
54
|
+
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
55
|
+
},
|
|
56
|
+
repository: {
|
|
57
|
+
type: "git",
|
|
58
|
+
url: "git+https://github.com/mendableai/firecrawl.git"
|
|
59
|
+
},
|
|
60
|
+
author: "Mendable.ai",
|
|
61
|
+
license: "MIT",
|
|
62
|
+
dependencies: {
|
|
63
|
+
"typescript-event-target": "^1.1.1",
|
|
64
|
+
zod: "^3.23.8",
|
|
65
|
+
"zod-to-json-schema": "^3.23.0",
|
|
66
|
+
axios: "^1.6.8"
|
|
67
|
+
},
|
|
68
|
+
bugs: {
|
|
69
|
+
url: "https://github.com/mendableai/firecrawl/issues"
|
|
70
|
+
},
|
|
71
|
+
homepage: "https://github.com/mendableai/firecrawl#readme",
|
|
72
|
+
devDependencies: {
|
|
73
|
+
"@jest/globals": "^29.7.0",
|
|
74
|
+
"@types/axios": "^0.14.0",
|
|
75
|
+
"@types/dotenv": "^8.2.0",
|
|
76
|
+
"@types/jest": "^29.5.14",
|
|
77
|
+
"@types/mocha": "^10.0.6",
|
|
78
|
+
"@types/node": "^20.12.12",
|
|
79
|
+
"@types/uuid": "^9.0.8",
|
|
80
|
+
dotenv: "^16.4.5",
|
|
81
|
+
jest: "^29.7.0",
|
|
82
|
+
"ts-jest": "^29.2.2",
|
|
83
|
+
tsup: "^8.2.4",
|
|
84
|
+
typescript: "^5.4.5",
|
|
85
|
+
uuid: "^9.0.1"
|
|
86
|
+
},
|
|
87
|
+
keywords: [
|
|
88
|
+
"firecrawl",
|
|
89
|
+
"mendable",
|
|
90
|
+
"crawler",
|
|
91
|
+
"web",
|
|
92
|
+
"scraper",
|
|
93
|
+
"api",
|
|
94
|
+
"sdk"
|
|
95
|
+
],
|
|
96
|
+
engines: {
|
|
97
|
+
node: ">=22.0.0"
|
|
98
|
+
}
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
30
103
|
// src/index.ts
|
|
31
104
|
var src_exports = {};
|
|
32
105
|
__export(src_exports, {
|
|
@@ -59,9 +132,22 @@ var FirecrawlError = class extends Error {
|
|
|
59
132
|
var FirecrawlApp = class {
|
|
60
133
|
apiKey;
|
|
61
134
|
apiUrl;
|
|
135
|
+
version = "1.25.1";
|
|
62
136
|
isCloudService(url) {
|
|
63
137
|
return url.includes("api.firecrawl.dev");
|
|
64
138
|
}
|
|
139
|
+
async getVersion() {
|
|
140
|
+
try {
|
|
141
|
+
const packageJson = await Promise.resolve().then(() => __toESM(require_package(), 1));
|
|
142
|
+
return packageJson.default.version;
|
|
143
|
+
} catch (error) {
|
|
144
|
+
console.error("Error getting version:", error);
|
|
145
|
+
return "1.25.1";
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
async init() {
|
|
149
|
+
this.version = await this.getVersion();
|
|
150
|
+
}
|
|
65
151
|
/**
|
|
66
152
|
* Initializes a new instance of the FirecrawlApp class.
|
|
67
153
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -73,6 +159,7 @@ var FirecrawlApp = class {
|
|
|
73
159
|
}
|
|
74
160
|
this.apiKey = apiKey || "";
|
|
75
161
|
this.apiUrl = baseUrl;
|
|
162
|
+
this.init();
|
|
76
163
|
}
|
|
77
164
|
/**
|
|
78
165
|
* Scrapes a URL using the Firecrawl API.
|
|
@@ -85,7 +172,7 @@ var FirecrawlApp = class {
|
|
|
85
172
|
"Content-Type": "application/json",
|
|
86
173
|
Authorization: `Bearer ${this.apiKey}`
|
|
87
174
|
};
|
|
88
|
-
let jsonData = { url, ...params };
|
|
175
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
89
176
|
if (jsonData?.extract?.schema) {
|
|
90
177
|
let schema = jsonData.extract.schema;
|
|
91
178
|
try {
|
|
@@ -159,7 +246,7 @@ var FirecrawlApp = class {
|
|
|
159
246
|
lang: params?.lang ?? "en",
|
|
160
247
|
country: params?.country ?? "us",
|
|
161
248
|
location: params?.location,
|
|
162
|
-
origin:
|
|
249
|
+
origin: `js-sdk@${this.version}`,
|
|
163
250
|
timeout: params?.timeout ?? 6e4,
|
|
164
251
|
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
|
|
165
252
|
};
|
|
@@ -219,7 +306,7 @@ var FirecrawlApp = class {
|
|
|
219
306
|
*/
|
|
220
307
|
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
|
221
308
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
222
|
-
let jsonData = { url, ...params };
|
|
309
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
223
310
|
try {
|
|
224
311
|
const response = await this.postRequest(
|
|
225
312
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -243,7 +330,7 @@ var FirecrawlApp = class {
|
|
|
243
330
|
}
|
|
244
331
|
async asyncCrawlUrl(url, params, idempotencyKey) {
|
|
245
332
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
246
|
-
let jsonData = { url, ...params };
|
|
333
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
247
334
|
try {
|
|
248
335
|
const response = await this.postRequest(
|
|
249
336
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -402,7 +489,7 @@ var FirecrawlApp = class {
|
|
|
402
489
|
*/
|
|
403
490
|
async mapUrl(url, params) {
|
|
404
491
|
const headers = this.prepareHeaders();
|
|
405
|
-
let jsonData = { url, ...params };
|
|
492
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
406
493
|
try {
|
|
407
494
|
const response = await this.postRequest(
|
|
408
495
|
this.apiUrl + `/v1/map`,
|
|
@@ -431,7 +518,7 @@ var FirecrawlApp = class {
|
|
|
431
518
|
*/
|
|
432
519
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
433
520
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
434
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
521
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
435
522
|
if (jsonData?.extract?.schema) {
|
|
436
523
|
let schema = jsonData.extract.schema;
|
|
437
524
|
try {
|
|
@@ -483,7 +570,7 @@ var FirecrawlApp = class {
|
|
|
483
570
|
}
|
|
484
571
|
async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
485
572
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
486
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params
|
|
573
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
487
574
|
try {
|
|
488
575
|
const response = await this.postRequest(
|
|
489
576
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -637,7 +724,7 @@ var FirecrawlApp = class {
|
|
|
637
724
|
try {
|
|
638
725
|
const response = await this.postRequest(
|
|
639
726
|
this.apiUrl + `/v1/extract`,
|
|
640
|
-
{ ...jsonData, schema: jsonSchema, origin:
|
|
727
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
641
728
|
headers
|
|
642
729
|
);
|
|
643
730
|
if (response.status === 200) {
|
|
@@ -697,7 +784,7 @@ var FirecrawlApp = class {
|
|
|
697
784
|
try {
|
|
698
785
|
const response = await this.postRequest(
|
|
699
786
|
this.apiUrl + `/v1/extract`,
|
|
700
|
-
{ ...jsonData, schema: jsonSchema },
|
|
787
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
701
788
|
headers
|
|
702
789
|
);
|
|
703
790
|
if (response.status === 200) {
|
|
@@ -849,6 +936,12 @@ var FirecrawlApp = class {
|
|
|
849
936
|
* @param {string} action - The action being performed when the error occurred.
|
|
850
937
|
*/
|
|
851
938
|
handleError(response, action) {
|
|
939
|
+
if (!response) {
|
|
940
|
+
throw new FirecrawlError(
|
|
941
|
+
`No response received while trying to ${action}. This may be a network error or the server is unreachable.`,
|
|
942
|
+
0
|
|
943
|
+
);
|
|
944
|
+
}
|
|
852
945
|
if ([400, 402, 403, 408, 409, 500].includes(response.status)) {
|
|
853
946
|
const errorMessage = response.data.error || "Unknown error occurred";
|
|
854
947
|
const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
|
|
@@ -930,7 +1023,7 @@ var FirecrawlApp = class {
|
|
|
930
1023
|
*/
|
|
931
1024
|
async asyncDeepResearch(query, params) {
|
|
932
1025
|
const headers = this.prepareHeaders();
|
|
933
|
-
let jsonData = { query, ...params };
|
|
1026
|
+
let jsonData = { query, ...params, origin: `js-sdk@${this.version}` };
|
|
934
1027
|
if (jsonData?.jsonOptions?.schema) {
|
|
935
1028
|
let schema = jsonData.jsonOptions.schema;
|
|
936
1029
|
try {
|
|
@@ -1053,9 +1146,10 @@ var FirecrawlApp = class {
|
|
|
1053
1146
|
async __asyncDeepResearch(topic, params) {
|
|
1054
1147
|
const headers = this.prepareHeaders();
|
|
1055
1148
|
try {
|
|
1149
|
+
let jsonData = { topic, ...params, origin: `js-sdk@${this.version}` };
|
|
1056
1150
|
const response = await this.postRequest(
|
|
1057
1151
|
`${this.apiUrl}/v1/deep-research`,
|
|
1058
|
-
|
|
1152
|
+
jsonData,
|
|
1059
1153
|
headers
|
|
1060
1154
|
);
|
|
1061
1155
|
if (response.status === 200) {
|
|
@@ -1150,10 +1244,11 @@ var FirecrawlApp = class {
|
|
|
1150
1244
|
*/
|
|
1151
1245
|
async asyncGenerateLLMsText(url, params) {
|
|
1152
1246
|
const headers = this.prepareHeaders();
|
|
1247
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
1153
1248
|
try {
|
|
1154
1249
|
const response = await this.postRequest(
|
|
1155
1250
|
`${this.apiUrl}/v1/llmstxt`,
|
|
1156
|
-
|
|
1251
|
+
jsonData,
|
|
1157
1252
|
headers
|
|
1158
1253
|
);
|
|
1159
1254
|
if (response.status === 200) {
|
package/dist/index.d.cts
CHANGED
|
@@ -114,7 +114,8 @@ interface CrawlScrapeOptions {
|
|
|
114
114
|
skipTlsVerification?: boolean;
|
|
115
115
|
removeBase64Images?: boolean;
|
|
116
116
|
blockAds?: boolean;
|
|
117
|
-
proxy?: "basic" | "stealth";
|
|
117
|
+
proxy?: "basic" | "stealth" | "auto";
|
|
118
|
+
storeInCache?: boolean;
|
|
118
119
|
}
|
|
119
120
|
type Action = {
|
|
120
121
|
type: "wait";
|
|
@@ -160,6 +161,7 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
160
161
|
modes?: ("json" | "git-diff")[];
|
|
161
162
|
};
|
|
162
163
|
actions?: ActionsSchema;
|
|
164
|
+
agent?: AgentOptions;
|
|
163
165
|
}
|
|
164
166
|
interface ActionsResult {
|
|
165
167
|
screenshots: string[];
|
|
@@ -204,6 +206,11 @@ interface CrawlParams {
|
|
|
204
206
|
deduplicateSimilarURLs?: boolean;
|
|
205
207
|
ignoreQueryParameters?: boolean;
|
|
206
208
|
regexOnFullURL?: boolean;
|
|
209
|
+
/**
|
|
210
|
+
* Delay in seconds between scrapes. This helps respect website rate limits.
|
|
211
|
+
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
212
|
+
*/
|
|
213
|
+
delay?: number;
|
|
207
214
|
}
|
|
208
215
|
/**
|
|
209
216
|
* Response interface for crawling operations.
|
|
@@ -279,6 +286,19 @@ interface MapResponse {
|
|
|
279
286
|
* Parameters for extracting information from URLs.
|
|
280
287
|
* Defines options for extracting information from URLs.
|
|
281
288
|
*/
|
|
289
|
+
interface AgentOptions {
|
|
290
|
+
model?: string;
|
|
291
|
+
prompt?: string;
|
|
292
|
+
sessionId?: string;
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Parameters for extracting information from URLs.
|
|
296
|
+
* Defines options for extracting information from URLs.
|
|
297
|
+
*/
|
|
298
|
+
interface AgentOptionsExtract {
|
|
299
|
+
model?: string;
|
|
300
|
+
sessionId?: string;
|
|
301
|
+
}
|
|
282
302
|
interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
283
303
|
prompt?: string;
|
|
284
304
|
schema?: LLMSchema | object;
|
|
@@ -289,6 +309,7 @@ interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
|
289
309
|
origin?: string;
|
|
290
310
|
showSources?: boolean;
|
|
291
311
|
scrapeOptions?: CrawlScrapeOptions;
|
|
312
|
+
agent?: AgentOptionsExtract;
|
|
292
313
|
}
|
|
293
314
|
/**
|
|
294
315
|
* Response interface for extracting information from URLs.
|
|
@@ -462,6 +483,11 @@ interface GenerateLLMsTextParams {
|
|
|
462
483
|
* @default false
|
|
463
484
|
*/
|
|
464
485
|
showFullText?: boolean;
|
|
486
|
+
/**
|
|
487
|
+
* Whether to use cached content if available
|
|
488
|
+
* @default true
|
|
489
|
+
*/
|
|
490
|
+
cache?: boolean;
|
|
465
491
|
/**
|
|
466
492
|
* Experimental flag for streaming
|
|
467
493
|
*/
|
|
@@ -494,7 +520,10 @@ interface GenerateLLMsTextStatusResponse {
|
|
|
494
520
|
declare class FirecrawlApp {
|
|
495
521
|
apiKey: string;
|
|
496
522
|
apiUrl: string;
|
|
523
|
+
version: string;
|
|
497
524
|
private isCloudService;
|
|
525
|
+
private getVersion;
|
|
526
|
+
private init;
|
|
498
527
|
/**
|
|
499
528
|
* Initializes a new instance of the FirecrawlApp class.
|
|
500
529
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -765,4 +794,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
765
794
|
close(): void;
|
|
766
795
|
}
|
|
767
796
|
|
|
768
|
-
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type DeepResearchParams, type DeepResearchResponse, type DeepResearchStatusResponse, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type GenerateLLMsTextParams, type GenerateLLMsTextResponse, type GenerateLLMsTextStatusResponse, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
|
797
|
+
export { type Action, type ActionsResult, type AgentOptions, type AgentOptionsExtract, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type DeepResearchParams, type DeepResearchResponse, type DeepResearchStatusResponse, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type GenerateLLMsTextParams, type GenerateLLMsTextResponse, type GenerateLLMsTextStatusResponse, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
package/dist/index.d.ts
CHANGED
|
@@ -114,7 +114,8 @@ interface CrawlScrapeOptions {
|
|
|
114
114
|
skipTlsVerification?: boolean;
|
|
115
115
|
removeBase64Images?: boolean;
|
|
116
116
|
blockAds?: boolean;
|
|
117
|
-
proxy?: "basic" | "stealth";
|
|
117
|
+
proxy?: "basic" | "stealth" | "auto";
|
|
118
|
+
storeInCache?: boolean;
|
|
118
119
|
}
|
|
119
120
|
type Action = {
|
|
120
121
|
type: "wait";
|
|
@@ -160,6 +161,7 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
|
|
|
160
161
|
modes?: ("json" | "git-diff")[];
|
|
161
162
|
};
|
|
162
163
|
actions?: ActionsSchema;
|
|
164
|
+
agent?: AgentOptions;
|
|
163
165
|
}
|
|
164
166
|
interface ActionsResult {
|
|
165
167
|
screenshots: string[];
|
|
@@ -204,6 +206,11 @@ interface CrawlParams {
|
|
|
204
206
|
deduplicateSimilarURLs?: boolean;
|
|
205
207
|
ignoreQueryParameters?: boolean;
|
|
206
208
|
regexOnFullURL?: boolean;
|
|
209
|
+
/**
|
|
210
|
+
* Delay in seconds between scrapes. This helps respect website rate limits.
|
|
211
|
+
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
212
|
+
*/
|
|
213
|
+
delay?: number;
|
|
207
214
|
}
|
|
208
215
|
/**
|
|
209
216
|
* Response interface for crawling operations.
|
|
@@ -279,6 +286,19 @@ interface MapResponse {
|
|
|
279
286
|
* Parameters for extracting information from URLs.
|
|
280
287
|
* Defines options for extracting information from URLs.
|
|
281
288
|
*/
|
|
289
|
+
interface AgentOptions {
|
|
290
|
+
model?: string;
|
|
291
|
+
prompt?: string;
|
|
292
|
+
sessionId?: string;
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Parameters for extracting information from URLs.
|
|
296
|
+
* Defines options for extracting information from URLs.
|
|
297
|
+
*/
|
|
298
|
+
interface AgentOptionsExtract {
|
|
299
|
+
model?: string;
|
|
300
|
+
sessionId?: string;
|
|
301
|
+
}
|
|
282
302
|
interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
283
303
|
prompt?: string;
|
|
284
304
|
schema?: LLMSchema | object;
|
|
@@ -289,6 +309,7 @@ interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
|
289
309
|
origin?: string;
|
|
290
310
|
showSources?: boolean;
|
|
291
311
|
scrapeOptions?: CrawlScrapeOptions;
|
|
312
|
+
agent?: AgentOptionsExtract;
|
|
292
313
|
}
|
|
293
314
|
/**
|
|
294
315
|
* Response interface for extracting information from URLs.
|
|
@@ -462,6 +483,11 @@ interface GenerateLLMsTextParams {
|
|
|
462
483
|
* @default false
|
|
463
484
|
*/
|
|
464
485
|
showFullText?: boolean;
|
|
486
|
+
/**
|
|
487
|
+
* Whether to use cached content if available
|
|
488
|
+
* @default true
|
|
489
|
+
*/
|
|
490
|
+
cache?: boolean;
|
|
465
491
|
/**
|
|
466
492
|
* Experimental flag for streaming
|
|
467
493
|
*/
|
|
@@ -494,7 +520,10 @@ interface GenerateLLMsTextStatusResponse {
|
|
|
494
520
|
declare class FirecrawlApp {
|
|
495
521
|
apiKey: string;
|
|
496
522
|
apiUrl: string;
|
|
523
|
+
version: string;
|
|
497
524
|
private isCloudService;
|
|
525
|
+
private getVersion;
|
|
526
|
+
private init;
|
|
498
527
|
/**
|
|
499
528
|
* Initializes a new instance of the FirecrawlApp class.
|
|
500
529
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -765,4 +794,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
|
|
|
765
794
|
close(): void;
|
|
766
795
|
}
|
|
767
796
|
|
|
768
|
-
export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type DeepResearchParams, type DeepResearchResponse, type DeepResearchStatusResponse, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type GenerateLLMsTextParams, type GenerateLLMsTextResponse, type GenerateLLMsTextStatusResponse, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
|
797
|
+
export { type Action, type ActionsResult, type AgentOptions, type AgentOptionsExtract, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type DeepResearchParams, type DeepResearchResponse, type DeepResearchStatusResponse, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type GenerateLLMsTextParams, type GenerateLLMsTextResponse, type GenerateLLMsTextStatusResponse, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
|
package/dist/index.js
CHANGED
|
@@ -23,9 +23,22 @@ var FirecrawlError = class extends Error {
|
|
|
23
23
|
var FirecrawlApp = class {
|
|
24
24
|
apiKey;
|
|
25
25
|
apiUrl;
|
|
26
|
+
version = "1.25.1";
|
|
26
27
|
isCloudService(url) {
|
|
27
28
|
return url.includes("api.firecrawl.dev");
|
|
28
29
|
}
|
|
30
|
+
async getVersion() {
|
|
31
|
+
try {
|
|
32
|
+
const packageJson = await import("./package-Y5V6L2WQ.js");
|
|
33
|
+
return packageJson.default.version;
|
|
34
|
+
} catch (error) {
|
|
35
|
+
console.error("Error getting version:", error);
|
|
36
|
+
return "1.25.1";
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
async init() {
|
|
40
|
+
this.version = await this.getVersion();
|
|
41
|
+
}
|
|
29
42
|
/**
|
|
30
43
|
* Initializes a new instance of the FirecrawlApp class.
|
|
31
44
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -37,6 +50,7 @@ var FirecrawlApp = class {
|
|
|
37
50
|
}
|
|
38
51
|
this.apiKey = apiKey || "";
|
|
39
52
|
this.apiUrl = baseUrl;
|
|
53
|
+
this.init();
|
|
40
54
|
}
|
|
41
55
|
/**
|
|
42
56
|
* Scrapes a URL using the Firecrawl API.
|
|
@@ -49,7 +63,7 @@ var FirecrawlApp = class {
|
|
|
49
63
|
"Content-Type": "application/json",
|
|
50
64
|
Authorization: `Bearer ${this.apiKey}`
|
|
51
65
|
};
|
|
52
|
-
let jsonData = { url, ...params };
|
|
66
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
53
67
|
if (jsonData?.extract?.schema) {
|
|
54
68
|
let schema = jsonData.extract.schema;
|
|
55
69
|
try {
|
|
@@ -123,7 +137,7 @@ var FirecrawlApp = class {
|
|
|
123
137
|
lang: params?.lang ?? "en",
|
|
124
138
|
country: params?.country ?? "us",
|
|
125
139
|
location: params?.location,
|
|
126
|
-
origin:
|
|
140
|
+
origin: `js-sdk@${this.version}`,
|
|
127
141
|
timeout: params?.timeout ?? 6e4,
|
|
128
142
|
scrapeOptions: params?.scrapeOptions ?? { formats: [] }
|
|
129
143
|
};
|
|
@@ -183,7 +197,7 @@ var FirecrawlApp = class {
|
|
|
183
197
|
*/
|
|
184
198
|
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
|
185
199
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
186
|
-
let jsonData = { url, ...params };
|
|
200
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
187
201
|
try {
|
|
188
202
|
const response = await this.postRequest(
|
|
189
203
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -207,7 +221,7 @@ var FirecrawlApp = class {
|
|
|
207
221
|
}
|
|
208
222
|
async asyncCrawlUrl(url, params, idempotencyKey) {
|
|
209
223
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
210
|
-
let jsonData = { url, ...params };
|
|
224
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
211
225
|
try {
|
|
212
226
|
const response = await this.postRequest(
|
|
213
227
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -366,7 +380,7 @@ var FirecrawlApp = class {
|
|
|
366
380
|
*/
|
|
367
381
|
async mapUrl(url, params) {
|
|
368
382
|
const headers = this.prepareHeaders();
|
|
369
|
-
let jsonData = { url, ...params };
|
|
383
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
370
384
|
try {
|
|
371
385
|
const response = await this.postRequest(
|
|
372
386
|
this.apiUrl + `/v1/map`,
|
|
@@ -395,7 +409,7 @@ var FirecrawlApp = class {
|
|
|
395
409
|
*/
|
|
396
410
|
async batchScrapeUrls(urls, params, pollInterval = 2, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
397
411
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
398
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
412
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
399
413
|
if (jsonData?.extract?.schema) {
|
|
400
414
|
let schema = jsonData.extract.schema;
|
|
401
415
|
try {
|
|
@@ -447,7 +461,7 @@ var FirecrawlApp = class {
|
|
|
447
461
|
}
|
|
448
462
|
async asyncBatchScrapeUrls(urls, params, idempotencyKey, webhook, ignoreInvalidURLs) {
|
|
449
463
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
450
|
-
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params
|
|
464
|
+
let jsonData = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
451
465
|
try {
|
|
452
466
|
const response = await this.postRequest(
|
|
453
467
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -601,7 +615,7 @@ var FirecrawlApp = class {
|
|
|
601
615
|
try {
|
|
602
616
|
const response = await this.postRequest(
|
|
603
617
|
this.apiUrl + `/v1/extract`,
|
|
604
|
-
{ ...jsonData, schema: jsonSchema, origin:
|
|
618
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
605
619
|
headers
|
|
606
620
|
);
|
|
607
621
|
if (response.status === 200) {
|
|
@@ -661,7 +675,7 @@ var FirecrawlApp = class {
|
|
|
661
675
|
try {
|
|
662
676
|
const response = await this.postRequest(
|
|
663
677
|
this.apiUrl + `/v1/extract`,
|
|
664
|
-
{ ...jsonData, schema: jsonSchema },
|
|
678
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
665
679
|
headers
|
|
666
680
|
);
|
|
667
681
|
if (response.status === 200) {
|
|
@@ -813,6 +827,12 @@ var FirecrawlApp = class {
|
|
|
813
827
|
* @param {string} action - The action being performed when the error occurred.
|
|
814
828
|
*/
|
|
815
829
|
handleError(response, action) {
|
|
830
|
+
if (!response) {
|
|
831
|
+
throw new FirecrawlError(
|
|
832
|
+
`No response received while trying to ${action}. This may be a network error or the server is unreachable.`,
|
|
833
|
+
0
|
|
834
|
+
);
|
|
835
|
+
}
|
|
816
836
|
if ([400, 402, 403, 408, 409, 500].includes(response.status)) {
|
|
817
837
|
const errorMessage = response.data.error || "Unknown error occurred";
|
|
818
838
|
const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
|
|
@@ -894,7 +914,7 @@ var FirecrawlApp = class {
|
|
|
894
914
|
*/
|
|
895
915
|
async asyncDeepResearch(query, params) {
|
|
896
916
|
const headers = this.prepareHeaders();
|
|
897
|
-
let jsonData = { query, ...params };
|
|
917
|
+
let jsonData = { query, ...params, origin: `js-sdk@${this.version}` };
|
|
898
918
|
if (jsonData?.jsonOptions?.schema) {
|
|
899
919
|
let schema = jsonData.jsonOptions.schema;
|
|
900
920
|
try {
|
|
@@ -1017,9 +1037,10 @@ var FirecrawlApp = class {
|
|
|
1017
1037
|
async __asyncDeepResearch(topic, params) {
|
|
1018
1038
|
const headers = this.prepareHeaders();
|
|
1019
1039
|
try {
|
|
1040
|
+
let jsonData = { topic, ...params, origin: `js-sdk@${this.version}` };
|
|
1020
1041
|
const response = await this.postRequest(
|
|
1021
1042
|
`${this.apiUrl}/v1/deep-research`,
|
|
1022
|
-
|
|
1043
|
+
jsonData,
|
|
1023
1044
|
headers
|
|
1024
1045
|
);
|
|
1025
1046
|
if (response.status === 200) {
|
|
@@ -1114,10 +1135,11 @@ var FirecrawlApp = class {
|
|
|
1114
1135
|
*/
|
|
1115
1136
|
async asyncGenerateLLMsText(url, params) {
|
|
1116
1137
|
const headers = this.prepareHeaders();
|
|
1138
|
+
let jsonData = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
1117
1139
|
try {
|
|
1118
1140
|
const response = await this.postRequest(
|
|
1119
1141
|
`${this.apiUrl}/v1/llmstxt`,
|
|
1120
|
-
|
|
1142
|
+
jsonData,
|
|
1121
1143
|
headers
|
|
1122
1144
|
);
|
|
1123
1145
|
if (response.status === 200) {
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
// package.json
|
|
2
|
+
var name = "@mendable/firecrawl-js";
|
|
3
|
+
var version = "1.25.3";
|
|
4
|
+
var description = "JavaScript SDK for Firecrawl API";
|
|
5
|
+
var main = "dist/index.js";
|
|
6
|
+
var types = "dist/index.d.ts";
|
|
7
|
+
var exports = {
|
|
8
|
+
"./package.json": "./package.json",
|
|
9
|
+
".": {
|
|
10
|
+
import: "./dist/index.js",
|
|
11
|
+
default: "./dist/index.cjs"
|
|
12
|
+
}
|
|
13
|
+
};
|
|
14
|
+
var type = "module";
|
|
15
|
+
var scripts = {
|
|
16
|
+
build: "tsup",
|
|
17
|
+
"build-and-publish": "npm run build && npm publish --access public",
|
|
18
|
+
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
19
|
+
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
20
|
+
};
|
|
21
|
+
var repository = {
|
|
22
|
+
type: "git",
|
|
23
|
+
url: "git+https://github.com/mendableai/firecrawl.git"
|
|
24
|
+
};
|
|
25
|
+
var author = "Mendable.ai";
|
|
26
|
+
var license = "MIT";
|
|
27
|
+
var dependencies = {
|
|
28
|
+
"typescript-event-target": "^1.1.1",
|
|
29
|
+
zod: "^3.23.8",
|
|
30
|
+
"zod-to-json-schema": "^3.23.0",
|
|
31
|
+
axios: "^1.6.8"
|
|
32
|
+
};
|
|
33
|
+
var bugs = {
|
|
34
|
+
url: "https://github.com/mendableai/firecrawl/issues"
|
|
35
|
+
};
|
|
36
|
+
var homepage = "https://github.com/mendableai/firecrawl#readme";
|
|
37
|
+
var devDependencies = {
|
|
38
|
+
"@jest/globals": "^29.7.0",
|
|
39
|
+
"@types/axios": "^0.14.0",
|
|
40
|
+
"@types/dotenv": "^8.2.0",
|
|
41
|
+
"@types/jest": "^29.5.14",
|
|
42
|
+
"@types/mocha": "^10.0.6",
|
|
43
|
+
"@types/node": "^20.12.12",
|
|
44
|
+
"@types/uuid": "^9.0.8",
|
|
45
|
+
dotenv: "^16.4.5",
|
|
46
|
+
jest: "^29.7.0",
|
|
47
|
+
"ts-jest": "^29.2.2",
|
|
48
|
+
tsup: "^8.2.4",
|
|
49
|
+
typescript: "^5.4.5",
|
|
50
|
+
uuid: "^9.0.1"
|
|
51
|
+
};
|
|
52
|
+
var keywords = [
|
|
53
|
+
"firecrawl",
|
|
54
|
+
"mendable",
|
|
55
|
+
"crawler",
|
|
56
|
+
"web",
|
|
57
|
+
"scraper",
|
|
58
|
+
"api",
|
|
59
|
+
"sdk"
|
|
60
|
+
];
|
|
61
|
+
var engines = {
|
|
62
|
+
node: ">=22.0.0"
|
|
63
|
+
};
|
|
64
|
+
var package_default = {
|
|
65
|
+
name,
|
|
66
|
+
version,
|
|
67
|
+
description,
|
|
68
|
+
main,
|
|
69
|
+
types,
|
|
70
|
+
exports,
|
|
71
|
+
type,
|
|
72
|
+
scripts,
|
|
73
|
+
repository,
|
|
74
|
+
author,
|
|
75
|
+
license,
|
|
76
|
+
dependencies,
|
|
77
|
+
bugs,
|
|
78
|
+
homepage,
|
|
79
|
+
devDependencies,
|
|
80
|
+
keywords,
|
|
81
|
+
engines
|
|
82
|
+
};
|
|
83
|
+
export {
|
|
84
|
+
author,
|
|
85
|
+
bugs,
|
|
86
|
+
package_default as default,
|
|
87
|
+
dependencies,
|
|
88
|
+
description,
|
|
89
|
+
devDependencies,
|
|
90
|
+
engines,
|
|
91
|
+
exports,
|
|
92
|
+
homepage,
|
|
93
|
+
keywords,
|
|
94
|
+
license,
|
|
95
|
+
main,
|
|
96
|
+
name,
|
|
97
|
+
repository,
|
|
98
|
+
scripts,
|
|
99
|
+
type,
|
|
100
|
+
types,
|
|
101
|
+
version
|
|
102
|
+
};
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -119,7 +119,8 @@ export interface CrawlScrapeOptions {
|
|
|
119
119
|
skipTlsVerification?: boolean;
|
|
120
120
|
removeBase64Images?: boolean;
|
|
121
121
|
blockAds?: boolean;
|
|
122
|
-
proxy?: "basic" | "stealth";
|
|
122
|
+
proxy?: "basic" | "stealth" | "auto";
|
|
123
|
+
storeInCache?: boolean;
|
|
123
124
|
}
|
|
124
125
|
|
|
125
126
|
export type Action = {
|
|
@@ -167,6 +168,7 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
|
|
167
168
|
modes?: ("json" | "git-diff")[];
|
|
168
169
|
}
|
|
169
170
|
actions?: ActionsSchema;
|
|
171
|
+
agent?: AgentOptions;
|
|
170
172
|
}
|
|
171
173
|
|
|
172
174
|
export interface ActionsResult {
|
|
@@ -214,6 +216,11 @@ export interface CrawlParams {
|
|
|
214
216
|
deduplicateSimilarURLs?: boolean;
|
|
215
217
|
ignoreQueryParameters?: boolean;
|
|
216
218
|
regexOnFullURL?: boolean;
|
|
219
|
+
/**
|
|
220
|
+
* Delay in seconds between scrapes. This helps respect website rate limits.
|
|
221
|
+
* If not provided, the crawler may use the robots.txt crawl delay if available.
|
|
222
|
+
*/
|
|
223
|
+
delay?: number;
|
|
217
224
|
}
|
|
218
225
|
|
|
219
226
|
/**
|
|
@@ -296,6 +303,21 @@ export interface MapResponse {
|
|
|
296
303
|
* Parameters for extracting information from URLs.
|
|
297
304
|
* Defines options for extracting information from URLs.
|
|
298
305
|
*/
|
|
306
|
+
export interface AgentOptions {
|
|
307
|
+
model?: string;
|
|
308
|
+
prompt?: string;
|
|
309
|
+
sessionId?: string;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Parameters for extracting information from URLs.
|
|
314
|
+
* Defines options for extracting information from URLs.
|
|
315
|
+
*/
|
|
316
|
+
export interface AgentOptionsExtract {
|
|
317
|
+
model?: string;
|
|
318
|
+
sessionId?: string;
|
|
319
|
+
}
|
|
320
|
+
|
|
299
321
|
export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
300
322
|
prompt?: string;
|
|
301
323
|
schema?: LLMSchema | object;
|
|
@@ -306,6 +328,7 @@ export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
|
306
328
|
origin?: string;
|
|
307
329
|
showSources?: boolean;
|
|
308
330
|
scrapeOptions?: CrawlScrapeOptions;
|
|
331
|
+
agent?: AgentOptionsExtract;
|
|
309
332
|
}
|
|
310
333
|
|
|
311
334
|
/**
|
|
@@ -498,6 +521,11 @@ export interface GenerateLLMsTextParams {
|
|
|
498
521
|
* @default false
|
|
499
522
|
*/
|
|
500
523
|
showFullText?: boolean;
|
|
524
|
+
/**
|
|
525
|
+
* Whether to use cached content if available
|
|
526
|
+
* @default true
|
|
527
|
+
*/
|
|
528
|
+
cache?: boolean;
|
|
501
529
|
/**
|
|
502
530
|
* Experimental flag for streaming
|
|
503
531
|
*/
|
|
@@ -533,11 +561,26 @@ export interface GenerateLLMsTextStatusResponse {
|
|
|
533
561
|
export default class FirecrawlApp {
|
|
534
562
|
public apiKey: string;
|
|
535
563
|
public apiUrl: string;
|
|
536
|
-
|
|
564
|
+
public version: string = "1.25.1";
|
|
565
|
+
|
|
537
566
|
private isCloudService(url: string): boolean {
|
|
538
567
|
return url.includes('api.firecrawl.dev');
|
|
539
568
|
}
|
|
540
569
|
|
|
570
|
+
private async getVersion(): Promise<string> {
|
|
571
|
+
try {
|
|
572
|
+
const packageJson = await import('../package.json', { assert: { type: 'json' } });
|
|
573
|
+
return packageJson.default.version;
|
|
574
|
+
} catch (error) {
|
|
575
|
+
console.error("Error getting version:", error);
|
|
576
|
+
return "1.25.1";
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
private async init() {
|
|
581
|
+
this.version = await this.getVersion();
|
|
582
|
+
}
|
|
583
|
+
|
|
541
584
|
/**
|
|
542
585
|
* Initializes a new instance of the FirecrawlApp class.
|
|
543
586
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
@@ -551,6 +594,7 @@ export default class FirecrawlApp {
|
|
|
551
594
|
|
|
552
595
|
this.apiKey = apiKey || '';
|
|
553
596
|
this.apiUrl = baseUrl;
|
|
597
|
+
this.init();
|
|
554
598
|
}
|
|
555
599
|
|
|
556
600
|
/**
|
|
@@ -567,7 +611,7 @@ export default class FirecrawlApp {
|
|
|
567
611
|
"Content-Type": "application/json",
|
|
568
612
|
Authorization: `Bearer ${this.apiKey}`,
|
|
569
613
|
} as AxiosRequestHeaders;
|
|
570
|
-
let jsonData: any = { url, ...params };
|
|
614
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
571
615
|
if (jsonData?.extract?.schema) {
|
|
572
616
|
let schema = jsonData.extract.schema;
|
|
573
617
|
|
|
@@ -649,7 +693,7 @@ export default class FirecrawlApp {
|
|
|
649
693
|
lang: params?.lang ?? "en",
|
|
650
694
|
country: params?.country ?? "us",
|
|
651
695
|
location: params?.location,
|
|
652
|
-
origin:
|
|
696
|
+
origin: `js-sdk@${this.version}`,
|
|
653
697
|
timeout: params?.timeout ?? 60000,
|
|
654
698
|
scrapeOptions: params?.scrapeOptions ?? { formats: [] },
|
|
655
699
|
};
|
|
@@ -721,7 +765,7 @@ export default class FirecrawlApp {
|
|
|
721
765
|
idempotencyKey?: string
|
|
722
766
|
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
723
767
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
724
|
-
let jsonData: any = { url, ...params };
|
|
768
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
725
769
|
try {
|
|
726
770
|
const response: AxiosResponse = await this.postRequest(
|
|
727
771
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -750,7 +794,7 @@ export default class FirecrawlApp {
|
|
|
750
794
|
idempotencyKey?: string
|
|
751
795
|
): Promise<CrawlResponse | ErrorResponse> {
|
|
752
796
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
753
|
-
let jsonData: any = { url, ...params };
|
|
797
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
754
798
|
try {
|
|
755
799
|
const response: AxiosResponse = await this.postRequest(
|
|
756
800
|
this.apiUrl + `/v1/crawl`,
|
|
@@ -926,7 +970,7 @@ export default class FirecrawlApp {
|
|
|
926
970
|
*/
|
|
927
971
|
async mapUrl(url: string, params?: MapParams): Promise<MapResponse | ErrorResponse> {
|
|
928
972
|
const headers = this.prepareHeaders();
|
|
929
|
-
let jsonData:
|
|
973
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
930
974
|
|
|
931
975
|
try {
|
|
932
976
|
const response: AxiosResponse = await this.postRequest(
|
|
@@ -964,7 +1008,7 @@ export default class FirecrawlApp {
|
|
|
964
1008
|
ignoreInvalidURLs?: boolean,
|
|
965
1009
|
): Promise<BatchScrapeStatusResponse | ErrorResponse> {
|
|
966
1010
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
967
|
-
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params };
|
|
1011
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
968
1012
|
if (jsonData?.extract?.schema) {
|
|
969
1013
|
let schema = jsonData.extract.schema;
|
|
970
1014
|
|
|
@@ -1029,7 +1073,7 @@ export default class FirecrawlApp {
|
|
|
1029
1073
|
ignoreInvalidURLs?: boolean,
|
|
1030
1074
|
): Promise<BatchScrapeResponse | ErrorResponse> {
|
|
1031
1075
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
1032
|
-
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...
|
|
1076
|
+
let jsonData: any = { urls, webhook, ignoreInvalidURLs, ...params, origin: `js-sdk@${this.version}` };
|
|
1033
1077
|
try {
|
|
1034
1078
|
const response: AxiosResponse = await this.postRequest(
|
|
1035
1079
|
this.apiUrl + `/v1/batch/scrape`,
|
|
@@ -1203,7 +1247,7 @@ export default class FirecrawlApp {
|
|
|
1203
1247
|
try {
|
|
1204
1248
|
const response: AxiosResponse = await this.postRequest(
|
|
1205
1249
|
this.apiUrl + `/v1/extract`,
|
|
1206
|
-
{ ...jsonData, schema: jsonSchema, origin:
|
|
1250
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
1207
1251
|
headers
|
|
1208
1252
|
);
|
|
1209
1253
|
|
|
@@ -1271,7 +1315,7 @@ export default class FirecrawlApp {
|
|
|
1271
1315
|
try {
|
|
1272
1316
|
const response: AxiosResponse = await this.postRequest(
|
|
1273
1317
|
this.apiUrl + `/v1/extract`,
|
|
1274
|
-
{ ...jsonData, schema: jsonSchema },
|
|
1318
|
+
{ ...jsonData, schema: jsonSchema, origin: `js-sdk@${this.version}` },
|
|
1275
1319
|
headers
|
|
1276
1320
|
);
|
|
1277
1321
|
|
|
@@ -1448,6 +1492,13 @@ export default class FirecrawlApp {
|
|
|
1448
1492
|
* @param {string} action - The action being performed when the error occurred.
|
|
1449
1493
|
*/
|
|
1450
1494
|
handleError(response: AxiosResponse, action: string): void {
|
|
1495
|
+
if (!response) {
|
|
1496
|
+
throw new FirecrawlError(
|
|
1497
|
+
`No response received while trying to ${action}. This may be a network error or the server is unreachable.`,
|
|
1498
|
+
0
|
|
1499
|
+
);
|
|
1500
|
+
}
|
|
1501
|
+
|
|
1451
1502
|
if ([400, 402, 403, 408, 409, 500].includes(response.status)) {
|
|
1452
1503
|
const errorMessage: string =
|
|
1453
1504
|
response.data.error || "Unknown error occurred";
|
|
@@ -1562,7 +1613,7 @@ export default class FirecrawlApp {
|
|
|
1562
1613
|
*/
|
|
1563
1614
|
async asyncDeepResearch(query: string, params: DeepResearchParams<zt.ZodSchema>): Promise<DeepResearchResponse | ErrorResponse> {
|
|
1564
1615
|
const headers = this.prepareHeaders();
|
|
1565
|
-
let jsonData: any = { query, ...params };
|
|
1616
|
+
let jsonData: any = { query, ...params, origin: `js-sdk@${this.version}` };
|
|
1566
1617
|
|
|
1567
1618
|
if (jsonData?.jsonOptions?.schema) {
|
|
1568
1619
|
let schema = jsonData.jsonOptions.schema;
|
|
@@ -1570,7 +1621,7 @@ export default class FirecrawlApp {
|
|
|
1570
1621
|
try {
|
|
1571
1622
|
schema = zodToJsonSchema(schema);
|
|
1572
1623
|
} catch (error) {
|
|
1573
|
-
|
|
1624
|
+
// Ignore error if schema can't be parsed as Zod
|
|
1574
1625
|
}
|
|
1575
1626
|
jsonData = {
|
|
1576
1627
|
...jsonData,
|
|
@@ -1716,9 +1767,10 @@ export default class FirecrawlApp {
|
|
|
1716
1767
|
async __asyncDeepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
|
|
1717
1768
|
const headers = this.prepareHeaders();
|
|
1718
1769
|
try {
|
|
1770
|
+
let jsonData: any = { topic, ...params, origin: `js-sdk@${this.version}` };
|
|
1719
1771
|
const response: AxiosResponse = await this.postRequest(
|
|
1720
1772
|
`${this.apiUrl}/v1/deep-research`,
|
|
1721
|
-
|
|
1773
|
+
jsonData,
|
|
1722
1774
|
headers
|
|
1723
1775
|
);
|
|
1724
1776
|
|
|
@@ -1828,10 +1880,11 @@ export default class FirecrawlApp {
|
|
|
1828
1880
|
*/
|
|
1829
1881
|
async asyncGenerateLLMsText(url: string, params?: GenerateLLMsTextParams): Promise<GenerateLLMsTextResponse | ErrorResponse> {
|
|
1830
1882
|
const headers = this.prepareHeaders();
|
|
1883
|
+
let jsonData: any = { url, ...params, origin: `js-sdk@${this.version}` };
|
|
1831
1884
|
try {
|
|
1832
1885
|
const response: AxiosResponse = await this.postRequest(
|
|
1833
1886
|
`${this.apiUrl}/v1/llmstxt`,
|
|
1834
|
-
|
|
1887
|
+
jsonData,
|
|
1835
1888
|
headers
|
|
1836
1889
|
);
|
|
1837
1890
|
|