@mendable/firecrawl-js 0.0.36 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/cjs/index.js +216 -147
- package/build/esm/index.js +216 -147
- package/package.json +2 -2
- package/src/__tests__/e2e_withAuth/index.test.ts +299 -128
- package/src/__tests__/index.test.ts +1 -1
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +312 -0
- package/src/index.ts +385 -108
- package/tsconfig.json +3 -3
- package/types/index.d.ts +232 -53
package/build/esm/index.js
CHANGED
|
@@ -1,159 +1,181 @@
|
|
|
1
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
-
});
|
|
9
|
-
};
|
|
10
1
|
import axios from "axios";
|
|
11
2
|
import { z } from "zod";
|
|
12
3
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
13
4
|
/**
|
|
14
5
|
* Main class for interacting with the Firecrawl API.
|
|
6
|
+
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
15
7
|
*/
|
|
16
8
|
export default class FirecrawlApp {
|
|
17
9
|
/**
|
|
18
10
|
* Initializes a new instance of the FirecrawlApp class.
|
|
19
|
-
* @param
|
|
11
|
+
* @param config - Configuration options for the FirecrawlApp instance.
|
|
20
12
|
*/
|
|
21
|
-
constructor({ apiKey = null, apiUrl = null }) {
|
|
13
|
+
constructor({ apiKey = null, apiUrl = null, version = "v1" }) {
|
|
22
14
|
this.apiKey = apiKey || "";
|
|
23
15
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
16
|
+
this.version = version;
|
|
24
17
|
if (!this.apiKey) {
|
|
25
18
|
throw new Error("No API key provided");
|
|
26
19
|
}
|
|
27
20
|
}
|
|
28
21
|
/**
|
|
29
22
|
* Scrapes a URL using the Firecrawl API.
|
|
30
|
-
* @param
|
|
31
|
-
* @param
|
|
32
|
-
* @returns
|
|
23
|
+
* @param url - The URL to scrape.
|
|
24
|
+
* @param params - Additional parameters for the scrape request.
|
|
25
|
+
* @returns The response from the scrape operation.
|
|
33
26
|
*/
|
|
34
|
-
scrapeUrl(
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
let
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
if (schema instanceof z.ZodSchema) {
|
|
46
|
-
schema = zodToJsonSchema(schema);
|
|
47
|
-
}
|
|
48
|
-
jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
|
|
27
|
+
async scrapeUrl(url, params) {
|
|
28
|
+
const headers = {
|
|
29
|
+
"Content-Type": "application/json",
|
|
30
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
31
|
+
};
|
|
32
|
+
let jsonData = { url, ...params };
|
|
33
|
+
if (jsonData?.extractorOptions?.extractionSchema) {
|
|
34
|
+
let schema = jsonData.extractorOptions.extractionSchema;
|
|
35
|
+
// Check if schema is an instance of ZodSchema to correctly identify Zod schemas
|
|
36
|
+
if (schema instanceof z.ZodSchema) {
|
|
37
|
+
schema = zodToJsonSchema(schema);
|
|
49
38
|
}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
39
|
+
jsonData = {
|
|
40
|
+
...jsonData,
|
|
41
|
+
extractorOptions: {
|
|
42
|
+
...jsonData.extractorOptions,
|
|
43
|
+
extractionSchema: schema,
|
|
44
|
+
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
45
|
+
},
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
try {
|
|
49
|
+
const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
|
|
50
|
+
if (response.status === 200) {
|
|
51
|
+
const responseData = response.data;
|
|
52
|
+
if (responseData.success) {
|
|
53
|
+
return (this.version === 'v0' ? responseData : {
|
|
54
|
+
success: true,
|
|
55
|
+
warning: responseData.warning,
|
|
56
|
+
error: responseData.error,
|
|
57
|
+
...responseData.data
|
|
58
|
+
});
|
|
60
59
|
}
|
|
61
60
|
else {
|
|
62
|
-
|
|
61
|
+
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
63
62
|
}
|
|
64
63
|
}
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
else {
|
|
65
|
+
this.handleError(response, "scrape URL");
|
|
67
66
|
}
|
|
68
|
-
|
|
69
|
-
|
|
67
|
+
}
|
|
68
|
+
catch (error) {
|
|
69
|
+
throw new Error(error.message);
|
|
70
|
+
}
|
|
71
|
+
return { success: false, error: "Internal server error." };
|
|
70
72
|
}
|
|
71
73
|
/**
|
|
72
74
|
* Searches for a query using the Firecrawl API.
|
|
73
|
-
* @param
|
|
74
|
-
* @param
|
|
75
|
-
* @returns
|
|
75
|
+
* @param query - The query to search for.
|
|
76
|
+
* @param params - Additional parameters for the search request.
|
|
77
|
+
* @returns The response from the search operation.
|
|
76
78
|
*/
|
|
77
|
-
search(
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
96
|
-
}
|
|
79
|
+
async search(query, params) {
|
|
80
|
+
if (this.version === "v1") {
|
|
81
|
+
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
82
|
+
}
|
|
83
|
+
const headers = {
|
|
84
|
+
"Content-Type": "application/json",
|
|
85
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
86
|
+
};
|
|
87
|
+
let jsonData = { query };
|
|
88
|
+
if (params) {
|
|
89
|
+
jsonData = { ...jsonData, ...params };
|
|
90
|
+
}
|
|
91
|
+
try {
|
|
92
|
+
const response = await axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
|
|
93
|
+
if (response.status === 200) {
|
|
94
|
+
const responseData = response.data;
|
|
95
|
+
if (responseData.success) {
|
|
96
|
+
return responseData;
|
|
97
97
|
}
|
|
98
98
|
else {
|
|
99
|
-
|
|
99
|
+
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
100
100
|
}
|
|
101
101
|
}
|
|
102
|
-
|
|
103
|
-
|
|
102
|
+
else {
|
|
103
|
+
this.handleError(response, "search");
|
|
104
104
|
}
|
|
105
|
-
|
|
106
|
-
|
|
105
|
+
}
|
|
106
|
+
catch (error) {
|
|
107
|
+
throw new Error(error.message);
|
|
108
|
+
}
|
|
109
|
+
return { success: false, error: "Internal server error." };
|
|
107
110
|
}
|
|
108
111
|
/**
|
|
109
112
|
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
110
|
-
* @param
|
|
111
|
-
* @param
|
|
112
|
-
* @param
|
|
113
|
-
* @param
|
|
114
|
-
* @param
|
|
115
|
-
* @returns
|
|
113
|
+
* @param url - The URL to crawl.
|
|
114
|
+
* @param params - Additional parameters for the crawl request.
|
|
115
|
+
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
|
116
|
+
* @param pollInterval - Time in seconds for job status checks.
|
|
117
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
118
|
+
* @returns The response from the crawl operation.
|
|
116
119
|
*/
|
|
117
|
-
crawlUrl(
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if (waitUntilDone) {
|
|
129
|
-
return this.monitorJobStatus(jobId, headers, pollInterval);
|
|
130
|
-
}
|
|
131
|
-
else {
|
|
132
|
-
return { success: true, jobId };
|
|
120
|
+
async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
|
121
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
122
|
+
let jsonData = { url, ...params };
|
|
123
|
+
try {
|
|
124
|
+
const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers);
|
|
125
|
+
if (response.status === 200) {
|
|
126
|
+
const id = this.version === 'v0' ? response.data.jobId : response.data.id;
|
|
127
|
+
let checkUrl = undefined;
|
|
128
|
+
if (waitUntilDone) {
|
|
129
|
+
if (this.version === 'v1') {
|
|
130
|
+
checkUrl = response.data.url;
|
|
133
131
|
}
|
|
132
|
+
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
|
|
134
133
|
}
|
|
135
134
|
else {
|
|
136
|
-
this.
|
|
135
|
+
if (this.version === 'v0') {
|
|
136
|
+
return {
|
|
137
|
+
success: true,
|
|
138
|
+
jobId: id
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
return {
|
|
143
|
+
success: true,
|
|
144
|
+
id: id
|
|
145
|
+
};
|
|
146
|
+
}
|
|
137
147
|
}
|
|
138
148
|
}
|
|
139
|
-
|
|
140
|
-
|
|
149
|
+
else {
|
|
150
|
+
this.handleError(response, "start crawl job");
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
catch (error) {
|
|
154
|
+
if (error.response?.data?.error) {
|
|
155
|
+
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
141
158
|
throw new Error(error.message);
|
|
142
159
|
}
|
|
143
|
-
|
|
144
|
-
}
|
|
160
|
+
}
|
|
161
|
+
return { success: false, error: "Internal server error." };
|
|
145
162
|
}
|
|
146
163
|
/**
|
|
147
164
|
* Checks the status of a crawl job using the Firecrawl API.
|
|
148
|
-
* @param
|
|
149
|
-
* @returns
|
|
165
|
+
* @param id - The ID of the crawl operation.
|
|
166
|
+
* @returns The response containing the job status.
|
|
150
167
|
*/
|
|
151
|
-
checkCrawlStatus(
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
168
|
+
async checkCrawlStatus(id) {
|
|
169
|
+
if (!id) {
|
|
170
|
+
throw new Error("No crawl ID provided");
|
|
171
|
+
}
|
|
172
|
+
const headers = this.prepareHeaders();
|
|
173
|
+
try {
|
|
174
|
+
const response = await this.getRequest(this.version === 'v1' ?
|
|
175
|
+
`${this.apiUrl}/${this.version}/crawl/${id}` :
|
|
176
|
+
`${this.apiUrl}/${this.version}/crawl/status/${id}`, headers);
|
|
177
|
+
if (response.status === 200) {
|
|
178
|
+
if (this.version === 'v0') {
|
|
157
179
|
return {
|
|
158
180
|
success: true,
|
|
159
181
|
status: response.data.status,
|
|
@@ -168,13 +190,28 @@ export default class FirecrawlApp {
|
|
|
168
190
|
};
|
|
169
191
|
}
|
|
170
192
|
else {
|
|
171
|
-
|
|
193
|
+
return {
|
|
194
|
+
success: true,
|
|
195
|
+
status: response.data.status,
|
|
196
|
+
total: response.data.total,
|
|
197
|
+
completed: response.data.completed,
|
|
198
|
+
creditsUsed: response.data.creditsUsed,
|
|
199
|
+
expiresAt: new Date(response.data.expiresAt),
|
|
200
|
+
next: response.data.next,
|
|
201
|
+
data: response.data.data,
|
|
202
|
+
error: response.data.error
|
|
203
|
+
};
|
|
172
204
|
}
|
|
173
205
|
}
|
|
174
|
-
|
|
175
|
-
|
|
206
|
+
else {
|
|
207
|
+
this.handleError(response, "check crawl status");
|
|
176
208
|
}
|
|
177
|
-
|
|
209
|
+
}
|
|
210
|
+
catch (error) {
|
|
211
|
+
throw new Error(error.message);
|
|
212
|
+
}
|
|
213
|
+
return this.version === 'v0' ?
|
|
214
|
+
{
|
|
178
215
|
success: false,
|
|
179
216
|
status: "unknown",
|
|
180
217
|
current: 0,
|
|
@@ -182,71 +219,103 @@ export default class FirecrawlApp {
|
|
|
182
219
|
current_step: "",
|
|
183
220
|
total: 0,
|
|
184
221
|
error: "Internal server error.",
|
|
222
|
+
} :
|
|
223
|
+
{
|
|
224
|
+
success: false,
|
|
225
|
+
error: "Internal server error.",
|
|
185
226
|
};
|
|
186
|
-
|
|
227
|
+
}
|
|
228
|
+
async mapUrl(url, params) {
|
|
229
|
+
if (this.version == 'v0') {
|
|
230
|
+
throw new Error("Map is not supported in v0");
|
|
231
|
+
}
|
|
232
|
+
const headers = this.prepareHeaders();
|
|
233
|
+
let jsonData = { url, ...params };
|
|
234
|
+
try {
|
|
235
|
+
const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers);
|
|
236
|
+
if (response.status === 200) {
|
|
237
|
+
return response.data;
|
|
238
|
+
}
|
|
239
|
+
else {
|
|
240
|
+
this.handleError(response, "map");
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
catch (error) {
|
|
244
|
+
throw new Error(error.message);
|
|
245
|
+
}
|
|
246
|
+
return { success: false, error: "Internal server error." };
|
|
187
247
|
}
|
|
188
248
|
/**
|
|
189
249
|
* Prepares the headers for an API request.
|
|
190
|
-
* @
|
|
250
|
+
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
251
|
+
* @returns The prepared headers.
|
|
191
252
|
*/
|
|
192
253
|
prepareHeaders(idempotencyKey) {
|
|
193
|
-
return
|
|
254
|
+
return {
|
|
255
|
+
"Content-Type": "application/json",
|
|
256
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
257
|
+
...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
|
|
258
|
+
};
|
|
194
259
|
}
|
|
195
260
|
/**
|
|
196
261
|
* Sends a POST request to the specified URL.
|
|
197
|
-
* @param
|
|
198
|
-
* @param
|
|
199
|
-
* @param
|
|
200
|
-
* @returns
|
|
262
|
+
* @param url - The URL to send the request to.
|
|
263
|
+
* @param data - The data to send in the request.
|
|
264
|
+
* @param headers - The headers for the request.
|
|
265
|
+
* @returns The response from the POST request.
|
|
201
266
|
*/
|
|
202
267
|
postRequest(url, data, headers) {
|
|
203
268
|
return axios.post(url, data, { headers });
|
|
204
269
|
}
|
|
205
270
|
/**
|
|
206
271
|
* Sends a GET request to the specified URL.
|
|
207
|
-
* @param
|
|
208
|
-
* @param
|
|
209
|
-
* @returns
|
|
272
|
+
* @param url - The URL to send the request to.
|
|
273
|
+
* @param headers - The headers for the request.
|
|
274
|
+
* @returns The response from the GET request.
|
|
210
275
|
*/
|
|
211
276
|
getRequest(url, headers) {
|
|
212
277
|
return axios.get(url, { headers });
|
|
213
278
|
}
|
|
214
279
|
/**
|
|
215
280
|
* Monitors the status of a crawl job until completion or failure.
|
|
216
|
-
* @param
|
|
217
|
-
* @param
|
|
218
|
-
* @param
|
|
219
|
-
* @
|
|
281
|
+
* @param id - The ID of the crawl operation.
|
|
282
|
+
* @param headers - The headers for the request.
|
|
283
|
+
* @param checkInterval - Interval in seconds for job status checks.
|
|
284
|
+
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
|
285
|
+
* @returns The final job status or data.
|
|
220
286
|
*/
|
|
221
|
-
monitorJobStatus(
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
if (checkInterval < 2) {
|
|
237
|
-
checkInterval = 2;
|
|
238
|
-
}
|
|
239
|
-
yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
|
|
287
|
+
async monitorJobStatus(id, headers, checkInterval, checkUrl) {
|
|
288
|
+
let apiUrl = '';
|
|
289
|
+
while (true) {
|
|
290
|
+
if (this.version === 'v1') {
|
|
291
|
+
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
|
|
292
|
+
}
|
|
293
|
+
else if (this.version === 'v0') {
|
|
294
|
+
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
|
|
295
|
+
}
|
|
296
|
+
const statusResponse = await this.getRequest(apiUrl, headers);
|
|
297
|
+
if (statusResponse.status === 200) {
|
|
298
|
+
const statusData = statusResponse.data;
|
|
299
|
+
if (statusData.status === "completed") {
|
|
300
|
+
if ("data" in statusData) {
|
|
301
|
+
return this.version === 'v0' ? statusData.data : statusData;
|
|
240
302
|
}
|
|
241
303
|
else {
|
|
242
|
-
throw new Error(
|
|
304
|
+
throw new Error("Crawl job completed but no data was returned");
|
|
243
305
|
}
|
|
244
306
|
}
|
|
307
|
+
else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) {
|
|
308
|
+
checkInterval = Math.max(checkInterval, 2);
|
|
309
|
+
await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000));
|
|
310
|
+
}
|
|
245
311
|
else {
|
|
246
|
-
|
|
312
|
+
throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
|
|
247
313
|
}
|
|
248
314
|
}
|
|
249
|
-
|
|
315
|
+
else {
|
|
316
|
+
this.handleError(statusResponse, "check crawl status");
|
|
317
|
+
}
|
|
318
|
+
}
|
|
250
319
|
}
|
|
251
320
|
/**
|
|
252
321
|
* Handles errors from API responses.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl-js",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "1.0.1",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
5
|
"main": "build/cjs/index.js",
|
|
6
6
|
"types": "types/index.d.ts",
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
"build": "tsc --module commonjs --moduleResolution node10 --outDir build/cjs/ && echo '{\"type\": \"commonjs\"}' > build/cjs/package.json && npx tsc --module NodeNext --moduleResolution NodeNext --outDir build/esm/ && echo '{\"type\": \"module\"}' > build/esm/package.json",
|
|
20
20
|
"build-and-publish": "npm run build && npm publish --access public",
|
|
21
21
|
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
22
|
-
"test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/**/*.test.ts"
|
|
22
|
+
"test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
23
23
|
},
|
|
24
24
|
"repository": {
|
|
25
25
|
"type": "git",
|