@mendable/firecrawl-js 1.0.4 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import axios from "axios";
2
- import { z } from "zod";
3
2
  import { zodToJsonSchema } from "zod-to-json-schema";
3
+ import { WebSocket } from "isows";
4
+ import { TypedEventTarget } from "typescript-event-target";
4
5
  /**
5
6
  * Main class for interacting with the Firecrawl API.
6
7
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -10,13 +11,9 @@ export default class FirecrawlApp {
10
11
  * Initializes a new instance of the FirecrawlApp class.
11
12
  * @param config - Configuration options for the FirecrawlApp instance.
12
13
  */
13
- constructor({ apiKey = null, apiUrl = null, version = "v1" }) {
14
+ constructor({ apiKey = null, apiUrl = null }) {
14
15
  this.apiKey = apiKey || "";
15
16
  this.apiUrl = apiUrl || "https://api.firecrawl.dev";
16
- this.version = version;
17
- if (!this.apiKey) {
18
- throw new Error("No API key provided");
19
- }
20
17
  }
21
18
  /**
22
19
  * Scrapes a URL using the Firecrawl API.
@@ -30,32 +27,33 @@ export default class FirecrawlApp {
30
27
  Authorization: `Bearer ${this.apiKey}`,
31
28
  };
32
29
  let jsonData = { url, ...params };
33
- if (jsonData?.extractorOptions?.extractionSchema) {
34
- let schema = jsonData.extractorOptions.extractionSchema;
35
- // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
36
- if (schema instanceof z.ZodSchema) {
30
+ if (jsonData?.extract?.schema) {
31
+ let schema = jsonData.extract.schema;
32
+ // Try parsing the schema as a Zod schema
33
+ try {
37
34
  schema = zodToJsonSchema(schema);
38
35
  }
36
+ catch (error) {
37
+ }
39
38
  jsonData = {
40
39
  ...jsonData,
41
- extractorOptions: {
42
- ...jsonData.extractorOptions,
43
- extractionSchema: schema,
44
- mode: jsonData.extractorOptions.mode || "llm-extraction",
40
+ extract: {
41
+ ...jsonData.extract,
42
+ schema: schema,
45
43
  },
46
44
  };
47
45
  }
48
46
  try {
49
- const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
47
+ const response = await axios.post(this.apiUrl + `/v1/scrape`, jsonData, { headers });
50
48
  if (response.status === 200) {
51
49
  const responseData = response.data;
52
50
  if (responseData.success) {
53
- return (this.version === 'v0' ? responseData : {
51
+ return {
54
52
  success: true,
55
53
  warning: responseData.warning,
56
54
  error: responseData.error,
57
55
  ...responseData.data
58
- });
56
+ };
59
57
  }
60
58
  else {
61
59
  throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
@@ -71,80 +69,52 @@ export default class FirecrawlApp {
71
69
  return { success: false, error: "Internal server error." };
72
70
  }
73
71
  /**
74
- * Searches for a query using the Firecrawl API.
75
- * @param query - The query to search for.
76
- * @param params - Additional parameters for the search request.
77
- * @returns The response from the search operation.
72
+ * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
73
+ * @param query - The search query string.
74
+ * @param params - Additional parameters for the search.
75
+ * @returns Throws an error advising to use version 0 of the API.
78
76
  */
79
77
  async search(query, params) {
80
- if (this.version === "v1") {
81
- throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
82
- }
83
- const headers = {
84
- "Content-Type": "application/json",
85
- Authorization: `Bearer ${this.apiKey}`,
86
- };
87
- let jsonData = { query };
88
- if (params) {
89
- jsonData = { ...jsonData, ...params };
90
- }
78
+ throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
79
+ }
80
+ /**
81
+ * Initiates a crawl job for a URL using the Firecrawl API.
82
+ * @param url - The URL to crawl.
83
+ * @param params - Additional parameters for the crawl request.
84
+ * @param pollInterval - Time in seconds for job status checks.
85
+ * @param idempotencyKey - Optional idempotency key for the request.
86
+ * @returns The response from the crawl operation.
87
+ */
88
+ async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
89
+ const headers = this.prepareHeaders(idempotencyKey);
90
+ let jsonData = { url, ...params };
91
91
  try {
92
- const response = await axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
92
+ const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
93
93
  if (response.status === 200) {
94
- const responseData = response.data;
95
- if (responseData.success) {
96
- return responseData;
97
- }
98
- else {
99
- throw new Error(`Failed to search. Error: ${responseData.error}`);
100
- }
94
+ const id = response.data.id;
95
+ return this.monitorJobStatus(id, headers, pollInterval);
101
96
  }
102
97
  else {
103
- this.handleError(response, "search");
98
+ this.handleError(response, "start crawl job");
104
99
  }
105
100
  }
106
101
  catch (error) {
107
- throw new Error(error.message);
102
+ if (error.response?.data?.error) {
103
+ throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
104
+ }
105
+ else {
106
+ throw new Error(error.message);
107
+ }
108
108
  }
109
109
  return { success: false, error: "Internal server error." };
110
110
  }
111
- /**
112
- * Initiates a crawl job for a URL using the Firecrawl API.
113
- * @param url - The URL to crawl.
114
- * @param params - Additional parameters for the crawl request.
115
- * @param waitUntilDone - Whether to wait for the crawl job to complete.
116
- * @param pollInterval - Time in seconds for job status checks.
117
- * @param idempotencyKey - Optional idempotency key for the request.
118
- * @returns The response from the crawl operation.
119
- */
120
- async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
111
+ async asyncCrawlUrl(url, params, idempotencyKey) {
121
112
  const headers = this.prepareHeaders(idempotencyKey);
122
113
  let jsonData = { url, ...params };
123
114
  try {
124
- const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers);
115
+ const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
125
116
  if (response.status === 200) {
126
- const id = this.version === 'v0' ? response.data.jobId : response.data.id;
127
- let checkUrl = undefined;
128
- if (waitUntilDone) {
129
- if (this.version === 'v1') {
130
- checkUrl = response.data.url;
131
- }
132
- return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
133
- }
134
- else {
135
- if (this.version === 'v0') {
136
- return {
137
- success: true,
138
- jobId: id
139
- };
140
- }
141
- else {
142
- return {
143
- success: true,
144
- id: id
145
- };
146
- }
147
- }
117
+ return response.data;
148
118
  }
149
119
  else {
150
120
  this.handleError(response, "start crawl job");
@@ -171,37 +141,19 @@ export default class FirecrawlApp {
171
141
  }
172
142
  const headers = this.prepareHeaders();
173
143
  try {
174
- const response = await this.getRequest(this.version === 'v1' ?
175
- `${this.apiUrl}/${this.version}/crawl/${id}` :
176
- `${this.apiUrl}/${this.version}/crawl/status/${id}`, headers);
144
+ const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
177
145
  if (response.status === 200) {
178
- if (this.version === 'v0') {
179
- return {
180
- success: true,
181
- status: response.data.status,
182
- current: response.data.current,
183
- current_url: response.data.current_url,
184
- current_step: response.data.current_step,
185
- total: response.data.total,
186
- data: response.data.data,
187
- partial_data: !response.data.data
188
- ? response.data.partial_data
189
- : undefined,
190
- };
191
- }
192
- else {
193
- return {
194
- success: true,
195
- status: response.data.status,
196
- total: response.data.total,
197
- completed: response.data.completed,
198
- creditsUsed: response.data.creditsUsed,
199
- expiresAt: new Date(response.data.expiresAt),
200
- next: response.data.next,
201
- data: response.data.data,
202
- error: response.data.error
203
- };
204
- }
146
+ return ({
147
+ success: true,
148
+ status: response.data.status,
149
+ total: response.data.total,
150
+ completed: response.data.completed,
151
+ creditsUsed: response.data.creditsUsed,
152
+ expiresAt: new Date(response.data.expiresAt),
153
+ next: response.data.next,
154
+ data: response.data.data,
155
+ error: response.data.error
156
+ });
205
157
  }
206
158
  else {
207
159
  this.handleError(response, "check crawl status");
@@ -210,29 +162,21 @@ export default class FirecrawlApp {
210
162
  catch (error) {
211
163
  throw new Error(error.message);
212
164
  }
213
- return this.version === 'v0' ?
214
- {
215
- success: false,
216
- status: "unknown",
217
- current: 0,
218
- current_url: "",
219
- current_step: "",
220
- total: 0,
221
- error: "Internal server error.",
222
- } :
223
- {
224
- success: false,
225
- error: "Internal server error.",
226
- };
165
+ return { success: false, error: "Internal server error." };
227
166
  }
228
- async mapUrl(url, params) {
229
- if (this.version == 'v0') {
230
- throw new Error("Map is not supported in v0");
167
+ async crawlUrlAndWatch(url, params, idempotencyKey) {
168
+ const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
169
+ if (crawl.success && crawl.id) {
170
+ const id = crawl.id;
171
+ return new CrawlWatcher(id, this);
231
172
  }
173
+ throw new Error("Crawl job failed to start");
174
+ }
175
+ async mapUrl(url, params) {
232
176
  const headers = this.prepareHeaders();
233
177
  let jsonData = { url, ...params };
234
178
  try {
235
- const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers);
179
+ const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers);
236
180
  if (response.status === 200) {
237
181
  return response.data;
238
182
  }
@@ -284,21 +228,14 @@ export default class FirecrawlApp {
284
228
  * @param checkUrl - Optional URL to check the status (used for v1 API)
285
229
  * @returns The final job status or data.
286
230
  */
287
- async monitorJobStatus(id, headers, checkInterval, checkUrl) {
288
- let apiUrl = '';
231
+ async monitorJobStatus(id, headers, checkInterval) {
289
232
  while (true) {
290
- if (this.version === 'v1') {
291
- apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
292
- }
293
- else if (this.version === 'v0') {
294
- apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
295
- }
296
- const statusResponse = await this.getRequest(apiUrl, headers);
233
+ const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
297
234
  if (statusResponse.status === 200) {
298
235
  const statusData = statusResponse.data;
299
236
  if (statusData.status === "completed") {
300
237
  if ("data" in statusData) {
301
- return this.version === 'v0' ? statusData.data : statusData;
238
+ return statusData;
302
239
  }
303
240
  else {
304
241
  throw new Error("Crawl job completed but no data was returned");
@@ -332,3 +269,71 @@ export default class FirecrawlApp {
332
269
  }
333
270
  }
334
271
  }
272
+ export class CrawlWatcher extends TypedEventTarget {
273
+ constructor(id, app) {
274
+ super();
275
+ this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
276
+ this.status = "scraping";
277
+ this.data = [];
278
+ const messageHandler = (msg) => {
279
+ if (msg.type === "done") {
280
+ this.status = "completed";
281
+ this.dispatchTypedEvent("done", new CustomEvent("done", {
282
+ detail: {
283
+ status: this.status,
284
+ data: this.data,
285
+ },
286
+ }));
287
+ }
288
+ else if (msg.type === "error") {
289
+ this.status = "failed";
290
+ this.dispatchTypedEvent("error", new CustomEvent("error", {
291
+ detail: {
292
+ status: this.status,
293
+ data: this.data,
294
+ error: msg.error,
295
+ },
296
+ }));
297
+ }
298
+ else if (msg.type === "catchup") {
299
+ this.status = msg.data.status;
300
+ this.data.push(...(msg.data.data ?? []));
301
+ for (const doc of this.data) {
302
+ this.dispatchTypedEvent("document", new CustomEvent("document", {
303
+ detail: doc,
304
+ }));
305
+ }
306
+ }
307
+ else if (msg.type === "document") {
308
+ this.dispatchTypedEvent("document", new CustomEvent("document", {
309
+ detail: msg.data,
310
+ }));
311
+ }
312
+ };
313
+ this.ws.onmessage = ((ev) => {
314
+ if (typeof ev.data !== "string") {
315
+ this.ws.close();
316
+ return;
317
+ }
318
+ const msg = JSON.parse(ev.data);
319
+ messageHandler(msg);
320
+ }).bind(this);
321
+ this.ws.onclose = ((ev) => {
322
+ const msg = JSON.parse(ev.reason);
323
+ messageHandler(msg);
324
+ }).bind(this);
325
+ this.ws.onerror = ((_) => {
326
+ this.status = "failed";
327
+ this.dispatchTypedEvent("error", new CustomEvent("error", {
328
+ detail: {
329
+ status: this.status,
330
+ data: this.data,
331
+ error: "WebSocket error",
332
+ },
333
+ }));
334
+ }).bind(this);
335
+ }
336
+ close() {
337
+ this.ws.close();
338
+ }
339
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mendable/firecrawl-js",
3
- "version": "1.0.4",
3
+ "version": "1.2.0",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "build/cjs/index.js",
6
6
  "types": "types/index.d.ts",
@@ -30,6 +30,8 @@
30
30
  "dependencies": {
31
31
  "axios": "^1.6.8",
32
32
  "dotenv": "^16.4.5",
33
+ "isows": "^1.0.4",
34
+ "typescript-event-target": "^1.1.1",
33
35
  "uuid": "^9.0.1",
34
36
  "zod": "^3.23.8",
35
37
  "zod-to-json-schema": "^3.23.0"
@@ -189,7 +189,6 @@ describe('FirecrawlApp<"v0"> E2E Tests', () => {
189
189
  10
190
190
  )) as FirecrawlDocumentV0[];
191
191
  expect(response).not.toBeNull();
192
- console.log({ response });
193
192
  expect(response[0].content).toContain("_Roast_");
194
193
  },
195
194
  60000