@mendable/firecrawl-js 0.0.35 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/cjs/index.js +216 -147
- package/build/esm/index.js +216 -147
- package/package.json +10 -4
- package/src/__tests__/e2e_withAuth/index.test.ts +299 -128
- package/src/__tests__/index.test.ts +1 -1
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +312 -0
- package/src/index.ts +385 -108
- package/tsconfig.json +3 -3
- package/types/index.d.ts +232 -53
package/build/cjs/index.js
CHANGED
|
@@ -1,13 +1,4 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
2
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
4
|
};
|
|
@@ -17,148 +8,179 @@ const zod_1 = require("zod");
|
|
|
17
8
|
const zod_to_json_schema_1 = require("zod-to-json-schema");
|
|
18
9
|
/**
|
|
19
10
|
* Main class for interacting with the Firecrawl API.
|
|
11
|
+
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
20
12
|
*/
|
|
21
13
|
class FirecrawlApp {
|
|
22
14
|
/**
|
|
23
15
|
* Initializes a new instance of the FirecrawlApp class.
|
|
24
|
-
* @param
|
|
16
|
+
* @param config - Configuration options for the FirecrawlApp instance.
|
|
25
17
|
*/
|
|
26
|
-
constructor({ apiKey = null, apiUrl = null }) {
|
|
18
|
+
constructor({ apiKey = null, apiUrl = null, version = "v1" }) {
|
|
27
19
|
this.apiKey = apiKey || "";
|
|
28
20
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
21
|
+
this.version = version;
|
|
29
22
|
if (!this.apiKey) {
|
|
30
23
|
throw new Error("No API key provided");
|
|
31
24
|
}
|
|
32
25
|
}
|
|
33
26
|
/**
|
|
34
27
|
* Scrapes a URL using the Firecrawl API.
|
|
35
|
-
* @param
|
|
36
|
-
* @param
|
|
37
|
-
* @returns
|
|
28
|
+
* @param url - The URL to scrape.
|
|
29
|
+
* @param params - Additional parameters for the scrape request.
|
|
30
|
+
* @returns The response from the scrape operation.
|
|
38
31
|
*/
|
|
39
|
-
scrapeUrl(
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
let
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
if (schema instanceof zod_1.z.ZodSchema) {
|
|
51
|
-
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
|
52
|
-
}
|
|
53
|
-
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
|
|
32
|
+
async scrapeUrl(url, params) {
|
|
33
|
+
const headers = {
|
|
34
|
+
"Content-Type": "application/json",
|
|
35
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
36
|
+
};
|
|
37
|
+
let jsonData = { url, ...params };
|
|
38
|
+
if (jsonData?.extractorOptions?.extractionSchema) {
|
|
39
|
+
let schema = jsonData.extractorOptions.extractionSchema;
|
|
40
|
+
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
|
41
|
+
if (schema instanceof zod_1.z.ZodSchema) {
|
|
42
|
+
schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
|
|
54
43
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
44
|
+
jsonData = {
|
|
45
|
+
...jsonData,
|
|
46
|
+
extractorOptions: {
|
|
47
|
+
...jsonData.extractorOptions,
|
|
48
|
+
extractionSchema: schema,
|
|
49
|
+
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
50
|
+
},
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
try {
|
|
54
|
+
const response = await axios_1.default.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
|
|
55
|
+
if (response.status === 200) {
|
|
56
|
+
const responseData = response.data;
|
|
57
|
+
if (responseData.success) {
|
|
58
|
+
return (this.version === 'v0' ? responseData : {
|
|
59
|
+
success: true,
|
|
60
|
+
warning: responseData.warning,
|
|
61
|
+
error: responseData.error,
|
|
62
|
+
...responseData.data
|
|
63
|
+
});
|
|
65
64
|
}
|
|
66
65
|
else {
|
|
67
|
-
|
|
66
|
+
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
68
67
|
}
|
|
69
68
|
}
|
|
70
|
-
|
|
71
|
-
|
|
69
|
+
else {
|
|
70
|
+
this.handleError(response, "scrape URL");
|
|
72
71
|
}
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
}
|
|
73
|
+
catch (error) {
|
|
74
|
+
throw new Error(error.message);
|
|
75
|
+
}
|
|
76
|
+
return { success: false, error: "Internal server error." };
|
|
75
77
|
}
|
|
76
78
|
/**
|
|
77
79
|
* Searches for a query using the Firecrawl API.
|
|
78
|
-
* @param
|
|
79
|
-
* @param
|
|
80
|
-
* @returns
|
|
80
|
+
* @param query - The query to search for.
|
|
81
|
+
* @param params - Additional parameters for the search request.
|
|
82
|
+
* @returns The response from the search operation.
|
|
81
83
|
*/
|
|
82
|
-
search(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
101
|
-
}
|
|
84
|
+
async search(query, params) {
|
|
85
|
+
if (this.version === "v1") {
|
|
86
|
+
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
87
|
+
}
|
|
88
|
+
const headers = {
|
|
89
|
+
"Content-Type": "application/json",
|
|
90
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
91
|
+
};
|
|
92
|
+
let jsonData = { query };
|
|
93
|
+
if (params) {
|
|
94
|
+
jsonData = { ...jsonData, ...params };
|
|
95
|
+
}
|
|
96
|
+
try {
|
|
97
|
+
const response = await axios_1.default.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
|
98
|
+
if (response.status === 200) {
|
|
99
|
+
const responseData = response.data;
|
|
100
|
+
if (responseData.success) {
|
|
101
|
+
return responseData;
|
|
102
102
|
}
|
|
103
103
|
else {
|
|
104
|
-
|
|
104
|
+
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
105
105
|
}
|
|
106
106
|
}
|
|
107
|
-
|
|
108
|
-
|
|
107
|
+
else {
|
|
108
|
+
this.handleError(response, "search");
|
|
109
109
|
}
|
|
110
|
-
|
|
111
|
-
|
|
110
|
+
}
|
|
111
|
+
catch (error) {
|
|
112
|
+
throw new Error(error.message);
|
|
113
|
+
}
|
|
114
|
+
return { success: false, error: "Internal server error." };
|
|
112
115
|
}
|
|
113
116
|
/**
|
|
114
117
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
115
|
-
* @param
|
|
116
|
-
* @param
|
|
117
|
-
* @param
|
|
118
|
-
* @param
|
|
119
|
-
* @param
|
|
120
|
-
* @returns
|
|
118
|
+
* @param url - The URL to crawl.
|
|
119
|
+
* @param params - Additional parameters for the crawl request.
|
|
120
|
+
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
|
121
|
+
* @param pollInterval - Time in seconds for job status checks.
|
|
122
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
123
|
+
* @returns The response from the crawl operation.
|
|
121
124
|
*/
|
|
122
|
-
crawlUrl(
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
if (waitUntilDone) {
|
|
134
|
-
return this.monitorJobStatus(jobId, headers, pollInterval);
|
|
135
|
-
}
|
|
136
|
-
else {
|
|
137
|
-
return { success: true, jobId };
|
|
125
|
+
async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
|
126
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
127
|
+
let jsonData = { url, ...params };
|
|
128
|
+
try {
|
|
129
|
+
const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers);
|
|
130
|
+
if (response.status === 200) {
|
|
131
|
+
const id = this.version === 'v0' ? response.data.jobId : response.data.id;
|
|
132
|
+
let checkUrl = undefined;
|
|
133
|
+
if (waitUntilDone) {
|
|
134
|
+
if (this.version === 'v1') {
|
|
135
|
+
checkUrl = response.data.url;
|
|
138
136
|
}
|
|
137
|
+
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
|
|
139
138
|
}
|
|
140
139
|
else {
|
|
141
|
-
this.
|
|
140
|
+
if (this.version === 'v0') {
|
|
141
|
+
return {
|
|
142
|
+
success: true,
|
|
143
|
+
jobId: id
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
else {
|
|
147
|
+
return {
|
|
148
|
+
success: true,
|
|
149
|
+
id: id
|
|
150
|
+
};
|
|
151
|
+
}
|
|
142
152
|
}
|
|
143
153
|
}
|
|
144
|
-
|
|
145
|
-
|
|
154
|
+
else {
|
|
155
|
+
this.handleError(response, "start crawl job");
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
catch (error) {
|
|
159
|
+
if (error.response?.data?.error) {
|
|
160
|
+
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
|
161
|
+
}
|
|
162
|
+
else {
|
|
146
163
|
throw new Error(error.message);
|
|
147
164
|
}
|
|
148
|
-
|
|
149
|
-
}
|
|
165
|
+
}
|
|
166
|
+
return { success: false, error: "Internal server error." };
|
|
150
167
|
}
|
|
151
168
|
/**
|
|
152
169
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
153
|
-
* @param
|
|
154
|
-
* @returns
|
|
170
|
+
* @param id - The ID of the crawl operation.
|
|
171
|
+
* @returns The response containing the job status.
|
|
155
172
|
*/
|
|
156
|
-
checkCrawlStatus(
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
173
|
+
async checkCrawlStatus(id) {
|
|
174
|
+
if (!id) {
|
|
175
|
+
throw new Error("No crawl ID provided");
|
|
176
|
+
}
|
|
177
|
+
const headers = this.prepareHeaders();
|
|
178
|
+
try {
|
|
179
|
+
const response = await this.getRequest(this.version === 'v1' ?
|
|
180
|
+
`${this.apiUrl}/${this.version}/crawl/${id}` :
|
|
181
|
+
`${this.apiUrl}/${this.version}/crawl/status/${id}`, headers);
|
|
182
|
+
if (response.status === 200) {
|
|
183
|
+
if (this.version === 'v0') {
|
|
162
184
|
return {
|
|
163
185
|
success: true,
|
|
164
186
|
status: response.data.status,
|
|
@@ -173,13 +195,28 @@ class FirecrawlApp {
|
|
|
173
195
|
};
|
|
174
196
|
}
|
|
175
197
|
else {
|
|
176
|
-
|
|
198
|
+
return {
|
|
199
|
+
success: true,
|
|
200
|
+
status: response.data.status,
|
|
201
|
+
total: response.data.total,
|
|
202
|
+
completed: response.data.completed,
|
|
203
|
+
creditsUsed: response.data.creditsUsed,
|
|
204
|
+
expiresAt: new Date(response.data.expiresAt),
|
|
205
|
+
next: response.data.next,
|
|
206
|
+
data: response.data.data,
|
|
207
|
+
error: response.data.error
|
|
208
|
+
};
|
|
177
209
|
}
|
|
178
210
|
}
|
|
179
|
-
|
|
180
|
-
|
|
211
|
+
else {
|
|
212
|
+
this.handleError(response, "check crawl status");
|
|
181
213
|
}
|
|
182
|
-
|
|
214
|
+
}
|
|
215
|
+
catch (error) {
|
|
216
|
+
throw new Error(error.message);
|
|
217
|
+
}
|
|
218
|
+
return this.version === 'v0' ?
|
|
219
|
+
{
|
|
183
220
|
success: false,
|
|
184
221
|
status: "unknown",
|
|
185
222
|
current: 0,
|
|
@@ -187,71 +224,103 @@ class FirecrawlApp {
|
|
|
187
224
|
current_step: "",
|
|
188
225
|
total: 0,
|
|
189
226
|
error: "Internal server error.",
|
|
227
|
+
} :
|
|
228
|
+
{
|
|
229
|
+
success: false,
|
|
230
|
+
error: "Internal server error.",
|
|
190
231
|
};
|
|
191
|
-
|
|
232
|
+
}
|
|
233
|
+
async mapUrl(url, params) {
|
|
234
|
+
if (this.version == 'v0') {
|
|
235
|
+
throw new Error("Map is not supported in v0");
|
|
236
|
+
}
|
|
237
|
+
const headers = this.prepareHeaders();
|
|
238
|
+
let jsonData = { url, ...params };
|
|
239
|
+
try {
|
|
240
|
+
const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers);
|
|
241
|
+
if (response.status === 200) {
|
|
242
|
+
return response.data;
|
|
243
|
+
}
|
|
244
|
+
else {
|
|
245
|
+
this.handleError(response, "map");
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
catch (error) {
|
|
249
|
+
throw new Error(error.message);
|
|
250
|
+
}
|
|
251
|
+
return { success: false, error: "Internal server error." };
|
|
192
252
|
}
|
|
193
253
|
/**
|
|
194
254
|
* Prepares the headers for an API request.
|
|
195
|
-
* @
|
|
255
|
+
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
256
|
+
* @returns The prepared headers.
|
|
196
257
|
*/
|
|
197
258
|
prepareHeaders(idempotencyKey) {
|
|
198
|
-
return
|
|
259
|
+
return {
|
|
260
|
+
"Content-Type": "application/json",
|
|
261
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
262
|
+
...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
|
|
263
|
+
};
|
|
199
264
|
}
|
|
200
265
|
/**
|
|
201
266
|
* Sends a POST request to the specified URL.
|
|
202
|
-
* @param
|
|
203
|
-
* @param
|
|
204
|
-
* @param
|
|
205
|
-
* @returns
|
|
267
|
+
* @param url - The URL to send the request to.
|
|
268
|
+
* @param data - The data to send in the request.
|
|
269
|
+
* @param headers - The headers for the request.
|
|
270
|
+
* @returns The response from the POST request.
|
|
206
271
|
*/
|
|
207
272
|
postRequest(url, data, headers) {
|
|
208
273
|
return axios_1.default.post(url, data, { headers });
|
|
209
274
|
}
|
|
210
275
|
/**
|
|
211
276
|
* Sends a GET request to the specified URL.
|
|
212
|
-
* @param
|
|
213
|
-
* @param
|
|
214
|
-
* @returns
|
|
277
|
+
* @param url - The URL to send the request to.
|
|
278
|
+
* @param headers - The headers for the request.
|
|
279
|
+
* @returns The response from the GET request.
|
|
215
280
|
*/
|
|
216
281
|
getRequest(url, headers) {
|
|
217
282
|
return axios_1.default.get(url, { headers });
|
|
218
283
|
}
|
|
219
284
|
/**
|
|
220
285
|
* Monitors the status of a crawl job until completion or failure.
|
|
221
|
-
* @param
|
|
222
|
-
* @param
|
|
223
|
-
* @param
|
|
224
|
-
* @
|
|
286
|
+
* @param id - The ID of the crawl operation.
|
|
287
|
+
* @param headers - The headers for the request.
|
|
288
|
+
* @param checkInterval - Interval in seconds for job status checks.
|
|
289
|
+
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
|
290
|
+
* @returns The final job status or data.
|
|
225
291
|
*/
|
|
226
|
-
monitorJobStatus(
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
if (checkInterval < 2) {
|
|
242
|
-
checkInterval = 2;
|
|
243
|
-
}
|
|
244
|
-
yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
292
|
+
async monitorJobStatus(id, headers, checkInterval, checkUrl) {
|
|
293
|
+
let apiUrl = '';
|
|
294
|
+
while (true) {
|
|
295
|
+
if (this.version === 'v1') {
|
|
296
|
+
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
|
|
297
|
+
}
|
|
298
|
+
else if (this.version === 'v0') {
|
|
299
|
+
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
|
|
300
|
+
}
|
|
301
|
+
const statusResponse = await this.getRequest(apiUrl, headers);
|
|
302
|
+
if (statusResponse.status === 200) {
|
|
303
|
+
const statusData = statusResponse.data;
|
|
304
|
+
if (statusData.status === "completed") {
|
|
305
|
+
if ("data" in statusData) {
|
|
306
|
+
return this.version === 'v0' ? statusData.data : statusData;
|
|
245
307
|
}
|
|
246
308
|
else {
|
|
247
|
-
throw new Error(
|
|
309
|
+
throw new Error("Crawl job completed but no data was returned");
|
|
248
310
|
}
|
|
249
311
|
}
|
|
312
|
+
else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) {
|
|
313
|
+
checkInterval = Math.max(checkInterval, 2);
|
|
314
|
+
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000));
|
|
315
|
+
}
|
|
250
316
|
else {
|
|
251
|
-
|
|
317
|
+
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
|
252
318
|
}
|
|
253
319
|
}
|
|
254
|
-
|
|
320
|
+
else {
|
|
321
|
+
this.handleError(statusResponse, "check crawl status");
|
|
322
|
+
}
|
|
323
|
+
}
|
|
255
324
|
}
|
|
256
325
|
/**
|
|
257
326
|
* Handles errors from API responses.
|