@mendable/firecrawl-js 1.0.4 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -9
- package/build/cjs/index.js +140 -133
- package/build/esm/index.js +138 -133
- package/package.json +3 -1
- package/src/__tests__/e2e_withAuth/index.test.ts +0 -1
- package/src/index.ts +223 -306
- package/types/index.d.ts +54 -162
package/build/esm/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import axios from "axios";
|
|
2
|
-
import { z } from "zod";
|
|
3
2
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
3
|
+
import { WebSocket } from "isows";
|
|
4
|
+
import { TypedEventTarget } from "typescript-event-target";
|
|
4
5
|
/**
|
|
5
6
|
* Main class for interacting with the Firecrawl API.
|
|
6
7
|
* Provides methods for scraping, searching, crawling, and mapping web content.
|
|
@@ -10,13 +11,9 @@ export default class FirecrawlApp {
|
|
|
10
11
|
* Initializes a new instance of the FirecrawlApp class.
|
|
11
12
|
* @param config - Configuration options for the FirecrawlApp instance.
|
|
12
13
|
*/
|
|
13
|
-
constructor({ apiKey = null, apiUrl = null
|
|
14
|
+
constructor({ apiKey = null, apiUrl = null }) {
|
|
14
15
|
this.apiKey = apiKey || "";
|
|
15
16
|
this.apiUrl = apiUrl || "https://api.firecrawl.dev";
|
|
16
|
-
this.version = version;
|
|
17
|
-
if (!this.apiKey) {
|
|
18
|
-
throw new Error("No API key provided");
|
|
19
|
-
}
|
|
20
17
|
}
|
|
21
18
|
/**
|
|
22
19
|
* Scrapes a URL using the Firecrawl API.
|
|
@@ -30,32 +27,33 @@ export default class FirecrawlApp {
|
|
|
30
27
|
Authorization: `Bearer ${this.apiKey}`,
|
|
31
28
|
};
|
|
32
29
|
let jsonData = { url, ...params };
|
|
33
|
-
if (jsonData?.
|
|
34
|
-
let schema = jsonData.
|
|
35
|
-
//
|
|
36
|
-
|
|
30
|
+
if (jsonData?.extract?.schema) {
|
|
31
|
+
let schema = jsonData.extract.schema;
|
|
32
|
+
// Try parsing the schema as a Zod schema
|
|
33
|
+
try {
|
|
37
34
|
schema = zodToJsonSchema(schema);
|
|
38
35
|
}
|
|
36
|
+
catch (error) {
|
|
37
|
+
}
|
|
39
38
|
jsonData = {
|
|
40
39
|
...jsonData,
|
|
41
|
-
|
|
42
|
-
...jsonData.
|
|
43
|
-
|
|
44
|
-
mode: jsonData.extractorOptions.mode || "llm-extraction",
|
|
40
|
+
extract: {
|
|
41
|
+
...jsonData.extract,
|
|
42
|
+
schema: schema,
|
|
45
43
|
},
|
|
46
44
|
};
|
|
47
45
|
}
|
|
48
46
|
try {
|
|
49
|
-
const response = await axios.post(this.apiUrl +
|
|
47
|
+
const response = await axios.post(this.apiUrl + `/v1/scrape`, jsonData, { headers });
|
|
50
48
|
if (response.status === 200) {
|
|
51
49
|
const responseData = response.data;
|
|
52
50
|
if (responseData.success) {
|
|
53
|
-
return
|
|
51
|
+
return {
|
|
54
52
|
success: true,
|
|
55
53
|
warning: responseData.warning,
|
|
56
54
|
error: responseData.error,
|
|
57
55
|
...responseData.data
|
|
58
|
-
}
|
|
56
|
+
};
|
|
59
57
|
}
|
|
60
58
|
else {
|
|
61
59
|
throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
|
|
@@ -71,80 +69,52 @@ export default class FirecrawlApp {
|
|
|
71
69
|
return { success: false, error: "Internal server error." };
|
|
72
70
|
}
|
|
73
71
|
/**
|
|
74
|
-
*
|
|
75
|
-
* @param query - The query
|
|
76
|
-
* @param params - Additional parameters for the search
|
|
77
|
-
* @returns
|
|
72
|
+
* This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
|
|
73
|
+
* @param query - The search query string.
|
|
74
|
+
* @param params - Additional parameters for the search.
|
|
75
|
+
* @returns Throws an error advising to use version 0 of the API.
|
|
78
76
|
*/
|
|
79
77
|
async search(query, params) {
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
78
|
+
throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
82
|
+
* @param url - The URL to crawl.
|
|
83
|
+
* @param params - Additional parameters for the crawl request.
|
|
84
|
+
* @param pollInterval - Time in seconds for job status checks.
|
|
85
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
86
|
+
* @returns The response from the crawl operation.
|
|
87
|
+
*/
|
|
88
|
+
async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
|
|
89
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
90
|
+
let jsonData = { url, ...params };
|
|
91
91
|
try {
|
|
92
|
-
const response = await
|
|
92
|
+
const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
|
|
93
93
|
if (response.status === 200) {
|
|
94
|
-
const
|
|
95
|
-
|
|
96
|
-
return responseData;
|
|
97
|
-
}
|
|
98
|
-
else {
|
|
99
|
-
throw new Error(`Failed to search. Error: ${responseData.error}`);
|
|
100
|
-
}
|
|
94
|
+
const id = response.data.id;
|
|
95
|
+
return this.monitorJobStatus(id, headers, pollInterval);
|
|
101
96
|
}
|
|
102
97
|
else {
|
|
103
|
-
this.handleError(response, "
|
|
98
|
+
this.handleError(response, "start crawl job");
|
|
104
99
|
}
|
|
105
100
|
}
|
|
106
101
|
catch (error) {
|
|
107
|
-
|
|
102
|
+
if (error.response?.data?.error) {
|
|
103
|
+
throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
throw new Error(error.message);
|
|
107
|
+
}
|
|
108
108
|
}
|
|
109
109
|
return { success: false, error: "Internal server error." };
|
|
110
110
|
}
|
|
111
|
-
|
|
112
|
-
* Initiates a crawl job for a URL using the Firecrawl API.
|
|
113
|
-
* @param url - The URL to crawl.
|
|
114
|
-
* @param params - Additional parameters for the crawl request.
|
|
115
|
-
* @param waitUntilDone - Whether to wait for the crawl job to complete.
|
|
116
|
-
* @param pollInterval - Time in seconds for job status checks.
|
|
117
|
-
* @param idempotencyKey - Optional idempotency key for the request.
|
|
118
|
-
* @returns The response from the crawl operation.
|
|
119
|
-
*/
|
|
120
|
-
async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
|
|
111
|
+
async asyncCrawlUrl(url, params, idempotencyKey) {
|
|
121
112
|
const headers = this.prepareHeaders(idempotencyKey);
|
|
122
113
|
let jsonData = { url, ...params };
|
|
123
114
|
try {
|
|
124
|
-
const response = await this.postRequest(this.apiUrl +
|
|
115
|
+
const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
|
|
125
116
|
if (response.status === 200) {
|
|
126
|
-
|
|
127
|
-
let checkUrl = undefined;
|
|
128
|
-
if (waitUntilDone) {
|
|
129
|
-
if (this.version === 'v1') {
|
|
130
|
-
checkUrl = response.data.url;
|
|
131
|
-
}
|
|
132
|
-
return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
|
|
133
|
-
}
|
|
134
|
-
else {
|
|
135
|
-
if (this.version === 'v0') {
|
|
136
|
-
return {
|
|
137
|
-
success: true,
|
|
138
|
-
jobId: id
|
|
139
|
-
};
|
|
140
|
-
}
|
|
141
|
-
else {
|
|
142
|
-
return {
|
|
143
|
-
success: true,
|
|
144
|
-
id: id
|
|
145
|
-
};
|
|
146
|
-
}
|
|
147
|
-
}
|
|
117
|
+
return response.data;
|
|
148
118
|
}
|
|
149
119
|
else {
|
|
150
120
|
this.handleError(response, "start crawl job");
|
|
@@ -171,37 +141,19 @@ export default class FirecrawlApp {
|
|
|
171
141
|
}
|
|
172
142
|
const headers = this.prepareHeaders();
|
|
173
143
|
try {
|
|
174
|
-
const response = await this.getRequest(this.
|
|
175
|
-
`${this.apiUrl}/${this.version}/crawl/${id}` :
|
|
176
|
-
`${this.apiUrl}/${this.version}/crawl/status/${id}`, headers);
|
|
144
|
+
const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
|
|
177
145
|
if (response.status === 200) {
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
: undefined,
|
|
190
|
-
};
|
|
191
|
-
}
|
|
192
|
-
else {
|
|
193
|
-
return {
|
|
194
|
-
success: true,
|
|
195
|
-
status: response.data.status,
|
|
196
|
-
total: response.data.total,
|
|
197
|
-
completed: response.data.completed,
|
|
198
|
-
creditsUsed: response.data.creditsUsed,
|
|
199
|
-
expiresAt: new Date(response.data.expiresAt),
|
|
200
|
-
next: response.data.next,
|
|
201
|
-
data: response.data.data,
|
|
202
|
-
error: response.data.error
|
|
203
|
-
};
|
|
204
|
-
}
|
|
146
|
+
return ({
|
|
147
|
+
success: true,
|
|
148
|
+
status: response.data.status,
|
|
149
|
+
total: response.data.total,
|
|
150
|
+
completed: response.data.completed,
|
|
151
|
+
creditsUsed: response.data.creditsUsed,
|
|
152
|
+
expiresAt: new Date(response.data.expiresAt),
|
|
153
|
+
next: response.data.next,
|
|
154
|
+
data: response.data.data,
|
|
155
|
+
error: response.data.error
|
|
156
|
+
});
|
|
205
157
|
}
|
|
206
158
|
else {
|
|
207
159
|
this.handleError(response, "check crawl status");
|
|
@@ -210,29 +162,21 @@ export default class FirecrawlApp {
|
|
|
210
162
|
catch (error) {
|
|
211
163
|
throw new Error(error.message);
|
|
212
164
|
}
|
|
213
|
-
return
|
|
214
|
-
{
|
|
215
|
-
success: false,
|
|
216
|
-
status: "unknown",
|
|
217
|
-
current: 0,
|
|
218
|
-
current_url: "",
|
|
219
|
-
current_step: "",
|
|
220
|
-
total: 0,
|
|
221
|
-
error: "Internal server error.",
|
|
222
|
-
} :
|
|
223
|
-
{
|
|
224
|
-
success: false,
|
|
225
|
-
error: "Internal server error.",
|
|
226
|
-
};
|
|
165
|
+
return { success: false, error: "Internal server error." };
|
|
227
166
|
}
|
|
228
|
-
async
|
|
229
|
-
|
|
230
|
-
|
|
167
|
+
async crawlUrlAndWatch(url, params, idempotencyKey) {
|
|
168
|
+
const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
|
|
169
|
+
if (crawl.success && crawl.id) {
|
|
170
|
+
const id = crawl.id;
|
|
171
|
+
return new CrawlWatcher(id, this);
|
|
231
172
|
}
|
|
173
|
+
throw new Error("Crawl job failed to start");
|
|
174
|
+
}
|
|
175
|
+
async mapUrl(url, params) {
|
|
232
176
|
const headers = this.prepareHeaders();
|
|
233
177
|
let jsonData = { url, ...params };
|
|
234
178
|
try {
|
|
235
|
-
const response = await this.postRequest(this.apiUrl +
|
|
179
|
+
const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers);
|
|
236
180
|
if (response.status === 200) {
|
|
237
181
|
return response.data;
|
|
238
182
|
}
|
|
@@ -284,21 +228,14 @@ export default class FirecrawlApp {
|
|
|
284
228
|
* @param checkUrl - Optional URL to check the status (used for v1 API)
|
|
285
229
|
* @returns The final job status or data.
|
|
286
230
|
*/
|
|
287
|
-
async monitorJobStatus(id, headers, checkInterval
|
|
288
|
-
let apiUrl = '';
|
|
231
|
+
async monitorJobStatus(id, headers, checkInterval) {
|
|
289
232
|
while (true) {
|
|
290
|
-
|
|
291
|
-
apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
|
|
292
|
-
}
|
|
293
|
-
else if (this.version === 'v0') {
|
|
294
|
-
apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
|
|
295
|
-
}
|
|
296
|
-
const statusResponse = await this.getRequest(apiUrl, headers);
|
|
233
|
+
const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
|
|
297
234
|
if (statusResponse.status === 200) {
|
|
298
235
|
const statusData = statusResponse.data;
|
|
299
236
|
if (statusData.status === "completed") {
|
|
300
237
|
if ("data" in statusData) {
|
|
301
|
-
return
|
|
238
|
+
return statusData;
|
|
302
239
|
}
|
|
303
240
|
else {
|
|
304
241
|
throw new Error("Crawl job completed but no data was returned");
|
|
@@ -332,3 +269,71 @@ export default class FirecrawlApp {
|
|
|
332
269
|
}
|
|
333
270
|
}
|
|
334
271
|
}
|
|
272
|
+
export class CrawlWatcher extends TypedEventTarget {
|
|
273
|
+
constructor(id, app) {
|
|
274
|
+
super();
|
|
275
|
+
this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
|
|
276
|
+
this.status = "scraping";
|
|
277
|
+
this.data = [];
|
|
278
|
+
const messageHandler = (msg) => {
|
|
279
|
+
if (msg.type === "done") {
|
|
280
|
+
this.status = "completed";
|
|
281
|
+
this.dispatchTypedEvent("done", new CustomEvent("done", {
|
|
282
|
+
detail: {
|
|
283
|
+
status: this.status,
|
|
284
|
+
data: this.data,
|
|
285
|
+
},
|
|
286
|
+
}));
|
|
287
|
+
}
|
|
288
|
+
else if (msg.type === "error") {
|
|
289
|
+
this.status = "failed";
|
|
290
|
+
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
|
291
|
+
detail: {
|
|
292
|
+
status: this.status,
|
|
293
|
+
data: this.data,
|
|
294
|
+
error: msg.error,
|
|
295
|
+
},
|
|
296
|
+
}));
|
|
297
|
+
}
|
|
298
|
+
else if (msg.type === "catchup") {
|
|
299
|
+
this.status = msg.data.status;
|
|
300
|
+
this.data.push(...(msg.data.data ?? []));
|
|
301
|
+
for (const doc of this.data) {
|
|
302
|
+
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
|
303
|
+
detail: doc,
|
|
304
|
+
}));
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
else if (msg.type === "document") {
|
|
308
|
+
this.dispatchTypedEvent("document", new CustomEvent("document", {
|
|
309
|
+
detail: msg.data,
|
|
310
|
+
}));
|
|
311
|
+
}
|
|
312
|
+
};
|
|
313
|
+
this.ws.onmessage = ((ev) => {
|
|
314
|
+
if (typeof ev.data !== "string") {
|
|
315
|
+
this.ws.close();
|
|
316
|
+
return;
|
|
317
|
+
}
|
|
318
|
+
const msg = JSON.parse(ev.data);
|
|
319
|
+
messageHandler(msg);
|
|
320
|
+
}).bind(this);
|
|
321
|
+
this.ws.onclose = ((ev) => {
|
|
322
|
+
const msg = JSON.parse(ev.reason);
|
|
323
|
+
messageHandler(msg);
|
|
324
|
+
}).bind(this);
|
|
325
|
+
this.ws.onerror = ((_) => {
|
|
326
|
+
this.status = "failed";
|
|
327
|
+
this.dispatchTypedEvent("error", new CustomEvent("error", {
|
|
328
|
+
detail: {
|
|
329
|
+
status: this.status,
|
|
330
|
+
data: this.data,
|
|
331
|
+
error: "WebSocket error",
|
|
332
|
+
},
|
|
333
|
+
}));
|
|
334
|
+
}).bind(this);
|
|
335
|
+
}
|
|
336
|
+
close() {
|
|
337
|
+
this.ws.close();
|
|
338
|
+
}
|
|
339
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl-js",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
5
|
"main": "build/cjs/index.js",
|
|
6
6
|
"types": "types/index.d.ts",
|
|
@@ -30,6 +30,8 @@
|
|
|
30
30
|
"dependencies": {
|
|
31
31
|
"axios": "^1.6.8",
|
|
32
32
|
"dotenv": "^16.4.5",
|
|
33
|
+
"isows": "^1.0.4",
|
|
34
|
+
"typescript-event-target": "^1.1.1",
|
|
33
35
|
"uuid": "^9.0.1",
|
|
34
36
|
"zod": "^3.23.8",
|
|
35
37
|
"zod-to-json-schema": "^3.23.0"
|