@mendable/firecrawl-js 1.0.4 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -37,11 +37,9 @@ const crawlResponse = await app.crawlUrl('https://firecrawl.dev', {
37
37
  scrapeOptions: {
38
38
  formats: ['markdown', 'html'],
39
39
  }
40
- } as CrawlParams, true, 30) as CrawlStatusResponse;
40
+ })
41
41
 
42
- if (crawlResponse) {
43
- console.log(crawlResponse)
44
- }
42
+ console.log(crawlResponse)
45
43
  ```
46
44
 
47
45
  ### Scraping a URL
@@ -63,16 +61,21 @@ const crawlResponse = await app.crawlUrl('https://firecrawl.dev', {
63
61
  scrapeOptions: {
64
62
  formats: ['markdown', 'html'],
65
63
  }
66
- } as CrawlParams, true, 30) as CrawlStatusResponse;
64
+ })
65
+ ```
67
66
 
68
- if (crawlResponse) {
69
- console.log(crawlResponse)
70
- }
67
+
68
+ ### Asynchronous Crawl
69
+
70
+ To initiate an asynchronous crawl of a website, utilize the AsyncCrawlURL method. This method requires the starting URL and optional parameters as inputs. The params argument enables you to define various settings for the asynchronous crawl, such as the maximum number of pages to crawl, permitted domains, and the output format. Upon successful initiation, this method returns an ID, which is essential for subsequently checking the status of the crawl.
71
+
72
+ ```js
73
+ const asyncCrawlResult = await app.asyncCrawlUrl('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
71
74
  ```
72
75
 
73
76
  ### Checking Crawl Status
74
77
 
75
- To check the status of a crawl job with error handling, use the `checkCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job.
78
+ To check the status of a crawl job with error handling, use the `checkCrawlStatus` method. It takes the job ID as a parameter and returns the current status of the crawl job`
76
79
 
77
80
  ```js
78
81
  const status = await app.checkCrawlStatus(id);
@@ -121,6 +124,27 @@ const mapResult = await app.mapUrl('https://example.com') as MapResponse;
121
124
  console.log(mapResult)
122
125
  ```
123
126
 
127
+ ### Crawl a website with WebSockets
128
+
129
+ To crawl a website with WebSockets, use the `crawlUrlAndWatch` method. It takes the starting URL and optional parameters as arguments. The `params` argument allows you to specify additional options for the crawl job, such as the maximum number of pages to crawl, allowed domains, and the output format.
130
+
131
+ ```js
132
+ // Crawl a website with WebSockets:
133
+ const watch = await app.crawlUrlAndWatch('mendable.ai', { excludePaths: ['blog/*'], limit: 5});
134
+
135
+ watch.addEventListener("document", doc => {
136
+ console.log("DOC", doc.detail);
137
+ });
138
+
139
+ watch.addEventListener("error", err => {
140
+ console.error("ERR", err.detail.error);
141
+ });
142
+
143
+ watch.addEventListener("done", state => {
144
+ console.log("DONE", state.detail.status);
145
+ });
146
+ ```
147
+
124
148
  ## Error Handling
125
149
 
126
150
  The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message. The examples above demonstrate how to handle these errors using `try/catch` blocks.
@@ -3,9 +3,11 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.CrawlWatcher = void 0;
6
7
  const axios_1 = __importDefault(require("axios"));
7
- const zod_1 = require("zod");
8
8
  const zod_to_json_schema_1 = require("zod-to-json-schema");
9
+ const isows_1 = require("isows");
10
+ const typescript_event_target_1 = require("typescript-event-target");
9
11
  /**
10
12
  * Main class for interacting with the Firecrawl API.
11
13
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -15,13 +17,9 @@ class FirecrawlApp {
15
17
  * Initializes a new instance of the FirecrawlApp class.
16
18
  * @param config - Configuration options for the FirecrawlApp instance.
17
19
  */
18
- constructor({ apiKey = null, apiUrl = null, version = "v1" }) {
20
+ constructor({ apiKey = null, apiUrl = null }) {
19
21
  this.apiKey = apiKey || "";
20
22
  this.apiUrl = apiUrl || "https://api.firecrawl.dev";
21
- this.version = version;
22
- if (!this.apiKey) {
23
- throw new Error("No API key provided");
24
- }
25
23
  }
26
24
  /**
27
25
  * Scrapes a URL using the Firecrawl API.
@@ -35,32 +33,33 @@ class FirecrawlApp {
35
33
  Authorization: `Bearer ${this.apiKey}`,
36
34
  };
37
35
  let jsonData = { url, ...params };
38
- if (jsonData?.extractorOptions?.extractionSchema) {
39
- let schema = jsonData.extractorOptions.extractionSchema;
40
- // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
41
- if (schema instanceof zod_1.z.ZodSchema) {
36
+ if (jsonData?.extract?.schema) {
37
+ let schema = jsonData.extract.schema;
38
+ // Try parsing the schema as a Zod schema
39
+ try {
42
40
  schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
43
41
  }
42
+ catch (error) {
43
+ }
44
44
  jsonData = {
45
45
  ...jsonData,
46
- extractorOptions: {
47
- ...jsonData.extractorOptions,
48
- extractionSchema: schema,
49
- mode: jsonData.extractorOptions.mode || "llm-extraction",
46
+ extract: {
47
+ ...jsonData.extract,
48
+ schema: schema,
50
49
  },
51
50
  };
52
51
  }
53
52
  try {
54
- const response = await axios_1.default.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
53
+ const response = await axios_1.default.post(this.apiUrl + `/v1/scrape`, jsonData, { headers });
55
54
  if (response.status === 200) {
56
55
  const responseData = response.data;
57
56
  if (responseData.success) {
58
- return (this.version === 'v0' ? responseData : {
57
+ return {
59
58
  success: true,
60
59
  warning: responseData.warning,
61
60
  error: responseData.error,
62
61
  ...responseData.data
63
- });
62
+ };
64
63
  }
65
64
  else {
66
65
  throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
@@ -76,80 +75,52 @@ class FirecrawlApp {
76
75
  return { success: false, error: "Internal server error." };
77
76
  }
78
77
  /**
79
- * Searches for a query using the Firecrawl API.
80
- * @param query - The query to search for.
81
- * @param params - Additional parameters for the search request.
82
- * @returns The response from the search operation.
78
+ * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
79
+ * @param query - The search query string.
80
+ * @param params - Additional parameters for the search.
81
+ * @returns Throws an error advising to use version 0 of the API.
83
82
  */
84
83
  async search(query, params) {
85
- if (this.version === "v1") {
86
- throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
87
- }
88
- const headers = {
89
- "Content-Type": "application/json",
90
- Authorization: `Bearer ${this.apiKey}`,
91
- };
92
- let jsonData = { query };
93
- if (params) {
94
- jsonData = { ...jsonData, ...params };
95
- }
84
+ throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
85
+ }
86
+ /**
87
+ * Initiates a crawl job for a URL using the Firecrawl API.
88
+ * @param url - The URL to crawl.
89
+ * @param params - Additional parameters for the crawl request.
90
+ * @param pollInterval - Time in seconds for job status checks.
91
+ * @param idempotencyKey - Optional idempotency key for the request.
92
+ * @returns The response from the crawl operation.
93
+ */
94
+ async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
95
+ const headers = this.prepareHeaders(idempotencyKey);
96
+ let jsonData = { url, ...params };
96
97
  try {
97
- const response = await axios_1.default.post(this.apiUrl + "/v0/search", jsonData, { headers });
98
+ const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
98
99
  if (response.status === 200) {
99
- const responseData = response.data;
100
- if (responseData.success) {
101
- return responseData;
102
- }
103
- else {
104
- throw new Error(`Failed to search. Error: ${responseData.error}`);
105
- }
100
+ const id = response.data.id;
101
+ return this.monitorJobStatus(id, headers, pollInterval);
106
102
  }
107
103
  else {
108
- this.handleError(response, "search");
104
+ this.handleError(response, "start crawl job");
109
105
  }
110
106
  }
111
107
  catch (error) {
112
- throw new Error(error.message);
108
+ if (error.response?.data?.error) {
109
+ throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
110
+ }
111
+ else {
112
+ throw new Error(error.message);
113
+ }
113
114
  }
114
115
  return { success: false, error: "Internal server error." };
115
116
  }
116
- /**
117
- * Initiates a crawl job for a URL using the Firecrawl API.
118
- * @param url - The URL to crawl.
119
- * @param params - Additional parameters for the crawl request.
120
- * @param waitUntilDone - Whether to wait for the crawl job to complete.
121
- * @param pollInterval - Time in seconds for job status checks.
122
- * @param idempotencyKey - Optional idempotency key for the request.
123
- * @returns The response from the crawl operation.
124
- */
125
- async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
117
+ async asyncCrawlUrl(url, params, idempotencyKey) {
126
118
  const headers = this.prepareHeaders(idempotencyKey);
127
119
  let jsonData = { url, ...params };
128
120
  try {
129
- const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers);
121
+ const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
130
122
  if (response.status === 200) {
131
- const id = this.version === 'v0' ? response.data.jobId : response.data.id;
132
- let checkUrl = undefined;
133
- if (waitUntilDone) {
134
- if (this.version === 'v1') {
135
- checkUrl = response.data.url;
136
- }
137
- return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
138
- }
139
- else {
140
- if (this.version === 'v0') {
141
- return {
142
- success: true,
143
- jobId: id
144
- };
145
- }
146
- else {
147
- return {
148
- success: true,
149
- id: id
150
- };
151
- }
152
- }
123
+ return response.data;
153
124
  }
154
125
  else {
155
126
  this.handleError(response, "start crawl job");
@@ -176,37 +147,19 @@ class FirecrawlApp {
176
147
  }
177
148
  const headers = this.prepareHeaders();
178
149
  try {
179
- const response = await this.getRequest(this.version === 'v1' ?
180
- `${this.apiUrl}/${this.version}/crawl/${id}` :
181
- `${this.apiUrl}/${this.version}/crawl/status/${id}`, headers);
150
+ const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
182
151
  if (response.status === 200) {
183
- if (this.version === 'v0') {
184
- return {
185
- success: true,
186
- status: response.data.status,
187
- current: response.data.current,
188
- current_url: response.data.current_url,
189
- current_step: response.data.current_step,
190
- total: response.data.total,
191
- data: response.data.data,
192
- partial_data: !response.data.data
193
- ? response.data.partial_data
194
- : undefined,
195
- };
196
- }
197
- else {
198
- return {
199
- success: true,
200
- status: response.data.status,
201
- total: response.data.total,
202
- completed: response.data.completed,
203
- creditsUsed: response.data.creditsUsed,
204
- expiresAt: new Date(response.data.expiresAt),
205
- next: response.data.next,
206
- data: response.data.data,
207
- error: response.data.error
208
- };
209
- }
152
+ return ({
153
+ success: true,
154
+ status: response.data.status,
155
+ total: response.data.total,
156
+ completed: response.data.completed,
157
+ creditsUsed: response.data.creditsUsed,
158
+ expiresAt: new Date(response.data.expiresAt),
159
+ next: response.data.next,
160
+ data: response.data.data,
161
+ error: response.data.error
162
+ });
210
163
  }
211
164
  else {
212
165
  this.handleError(response, "check crawl status");
@@ -215,29 +168,21 @@ class FirecrawlApp {
215
168
  catch (error) {
216
169
  throw new Error(error.message);
217
170
  }
218
- return this.version === 'v0' ?
219
- {
220
- success: false,
221
- status: "unknown",
222
- current: 0,
223
- current_url: "",
224
- current_step: "",
225
- total: 0,
226
- error: "Internal server error.",
227
- } :
228
- {
229
- success: false,
230
- error: "Internal server error.",
231
- };
171
+ return { success: false, error: "Internal server error." };
232
172
  }
233
- async mapUrl(url, params) {
234
- if (this.version == 'v0') {
235
- throw new Error("Map is not supported in v0");
173
+ async crawlUrlAndWatch(url, params, idempotencyKey) {
174
+ const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
175
+ if (crawl.success && crawl.id) {
176
+ const id = crawl.id;
177
+ return new CrawlWatcher(id, this);
236
178
  }
179
+ throw new Error("Crawl job failed to start");
180
+ }
181
+ async mapUrl(url, params) {
237
182
  const headers = this.prepareHeaders();
238
183
  let jsonData = { url, ...params };
239
184
  try {
240
- const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers);
185
+ const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers);
241
186
  if (response.status === 200) {
242
187
  return response.data;
243
188
  }
@@ -289,21 +234,14 @@ class FirecrawlApp {
289
234
  * @param checkUrl - Optional URL to check the status (used for v1 API)
290
235
  * @returns The final job status or data.
291
236
  */
292
- async monitorJobStatus(id, headers, checkInterval, checkUrl) {
293
- let apiUrl = '';
237
+ async monitorJobStatus(id, headers, checkInterval) {
294
238
  while (true) {
295
- if (this.version === 'v1') {
296
- apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
297
- }
298
- else if (this.version === 'v0') {
299
- apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
300
- }
301
- const statusResponse = await this.getRequest(apiUrl, headers);
239
+ const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
302
240
  if (statusResponse.status === 200) {
303
241
  const statusData = statusResponse.data;
304
242
  if (statusData.status === "completed") {
305
243
  if ("data" in statusData) {
306
- return this.version === 'v0' ? statusData.data : statusData;
244
+ return statusData;
307
245
  }
308
246
  else {
309
247
  throw new Error("Crawl job completed but no data was returned");
@@ -338,3 +276,72 @@ class FirecrawlApp {
338
276
  }
339
277
  }
340
278
  exports.default = FirecrawlApp;
279
+ class CrawlWatcher extends typescript_event_target_1.TypedEventTarget {
280
+ constructor(id, app) {
281
+ super();
282
+ this.ws = new isows_1.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
283
+ this.status = "scraping";
284
+ this.data = [];
285
+ const messageHandler = (msg) => {
286
+ if (msg.type === "done") {
287
+ this.status = "completed";
288
+ this.dispatchTypedEvent("done", new CustomEvent("done", {
289
+ detail: {
290
+ status: this.status,
291
+ data: this.data,
292
+ },
293
+ }));
294
+ }
295
+ else if (msg.type === "error") {
296
+ this.status = "failed";
297
+ this.dispatchTypedEvent("error", new CustomEvent("error", {
298
+ detail: {
299
+ status: this.status,
300
+ data: this.data,
301
+ error: msg.error,
302
+ },
303
+ }));
304
+ }
305
+ else if (msg.type === "catchup") {
306
+ this.status = msg.data.status;
307
+ this.data.push(...(msg.data.data ?? []));
308
+ for (const doc of this.data) {
309
+ this.dispatchTypedEvent("document", new CustomEvent("document", {
310
+ detail: doc,
311
+ }));
312
+ }
313
+ }
314
+ else if (msg.type === "document") {
315
+ this.dispatchTypedEvent("document", new CustomEvent("document", {
316
+ detail: msg.data,
317
+ }));
318
+ }
319
+ };
320
+ this.ws.onmessage = ((ev) => {
321
+ if (typeof ev.data !== "string") {
322
+ this.ws.close();
323
+ return;
324
+ }
325
+ const msg = JSON.parse(ev.data);
326
+ messageHandler(msg);
327
+ }).bind(this);
328
+ this.ws.onclose = ((ev) => {
329
+ const msg = JSON.parse(ev.reason);
330
+ messageHandler(msg);
331
+ }).bind(this);
332
+ this.ws.onerror = ((_) => {
333
+ this.status = "failed";
334
+ this.dispatchTypedEvent("error", new CustomEvent("error", {
335
+ detail: {
336
+ status: this.status,
337
+ data: this.data,
338
+ error: "WebSocket error",
339
+ },
340
+ }));
341
+ }).bind(this);
342
+ }
343
+ close() {
344
+ this.ws.close();
345
+ }
346
+ }
347
+ exports.CrawlWatcher = CrawlWatcher;