@mendable/firecrawl-js 0.0.36 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,159 +1,181 @@
1
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
- return new (P || (P = Promise))(function (resolve, reject) {
4
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
- step((generator = generator.apply(thisArg, _arguments || [])).next());
8
- });
9
- };
10
1
  import axios from "axios";
11
2
  import { z } from "zod";
12
3
  import { zodToJsonSchema } from "zod-to-json-schema";
13
4
  /**
14
5
  * Main class for interacting with the Firecrawl API.
6
+ * Provides methods for scraping, searching, crawling, and mapping web content.
15
7
  */
16
8
  export default class FirecrawlApp {
17
9
  /**
18
10
  * Initializes a new instance of the FirecrawlApp class.
19
- * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
11
+ * @param config - Configuration options for the FirecrawlApp instance.
20
12
  */
21
- constructor({ apiKey = null, apiUrl = null }) {
13
+ constructor({ apiKey = null, apiUrl = null, version = "v1" }) {
22
14
  this.apiKey = apiKey || "";
23
15
  this.apiUrl = apiUrl || "https://api.firecrawl.dev";
16
+ this.version = version;
24
17
  if (!this.apiKey) {
25
18
  throw new Error("No API key provided");
26
19
  }
27
20
  }
28
21
  /**
29
22
  * Scrapes a URL using the Firecrawl API.
30
- * @param {string} url - The URL to scrape.
31
- * @param {Params | null} params - Additional parameters for the scrape request.
32
- * @returns {Promise<ScrapeResponse>} The response from the scrape operation.
23
+ * @param url - The URL to scrape.
24
+ * @param params - Additional parameters for the scrape request.
25
+ * @returns The response from the scrape operation.
33
26
  */
34
- scrapeUrl(url_1) {
35
- return __awaiter(this, arguments, void 0, function* (url, params = null) {
36
- var _a;
37
- const headers = {
38
- "Content-Type": "application/json",
39
- Authorization: `Bearer ${this.apiKey}`,
40
- };
41
- let jsonData = Object.assign({ url }, params);
42
- if ((_a = params === null || params === void 0 ? void 0 : params.extractorOptions) === null || _a === void 0 ? void 0 : _a.extractionSchema) {
43
- let schema = params.extractorOptions.extractionSchema;
44
- // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
45
- if (schema instanceof z.ZodSchema) {
46
- schema = zodToJsonSchema(schema);
47
- }
48
- jsonData = Object.assign(Object.assign({}, jsonData), { extractorOptions: Object.assign(Object.assign({}, params.extractorOptions), { extractionSchema: schema, mode: params.extractorOptions.mode || "llm-extraction" }) });
27
+ async scrapeUrl(url, params) {
28
+ const headers = {
29
+ "Content-Type": "application/json",
30
+ Authorization: `Bearer ${this.apiKey}`,
31
+ };
32
+ let jsonData = { url, ...params };
33
+ if (jsonData?.extractorOptions?.extractionSchema) {
34
+ let schema = jsonData.extractorOptions.extractionSchema;
35
+ // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
36
+ if (schema instanceof z.ZodSchema) {
37
+ schema = zodToJsonSchema(schema);
49
38
  }
50
- try {
51
- const response = yield axios.post(this.apiUrl + "/v0/scrape", jsonData, { headers });
52
- if (response.status === 200) {
53
- const responseData = response.data;
54
- if (responseData.success) {
55
- return responseData;
56
- }
57
- else {
58
- throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
59
- }
39
+ jsonData = {
40
+ ...jsonData,
41
+ extractorOptions: {
42
+ ...jsonData.extractorOptions,
43
+ extractionSchema: schema,
44
+ mode: jsonData.extractorOptions.mode || "llm-extraction",
45
+ },
46
+ };
47
+ }
48
+ try {
49
+ const response = await axios.post(this.apiUrl + `/${this.version}/scrape`, jsonData, { headers });
50
+ if (response.status === 200) {
51
+ const responseData = response.data;
52
+ if (responseData.success) {
53
+ return (this.version === 'v0' ? responseData : {
54
+ success: true,
55
+ warning: responseData.warning,
56
+ error: responseData.error,
57
+ ...responseData.data
58
+ });
60
59
  }
61
60
  else {
62
- this.handleError(response, "scrape URL");
61
+ throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
63
62
  }
64
63
  }
65
- catch (error) {
66
- throw new Error(error.message);
64
+ else {
65
+ this.handleError(response, "scrape URL");
67
66
  }
68
- return { success: false, error: "Internal server error." };
69
- });
67
+ }
68
+ catch (error) {
69
+ throw new Error(error.message);
70
+ }
71
+ return { success: false, error: "Internal server error." };
70
72
  }
71
73
  /**
72
74
  * Searches for a query using the Firecrawl API.
73
- * @param {string} query - The query to search for.
74
- * @param {Params | null} params - Additional parameters for the search request.
75
- * @returns {Promise<SearchResponse>} The response from the search operation.
75
+ * @param query - The query to search for.
76
+ * @param params - Additional parameters for the search request.
77
+ * @returns The response from the search operation.
76
78
  */
77
- search(query_1) {
78
- return __awaiter(this, arguments, void 0, function* (query, params = null) {
79
- const headers = {
80
- "Content-Type": "application/json",
81
- Authorization: `Bearer ${this.apiKey}`,
82
- };
83
- let jsonData = { query };
84
- if (params) {
85
- jsonData = Object.assign(Object.assign({}, jsonData), params);
86
- }
87
- try {
88
- const response = yield axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
89
- if (response.status === 200) {
90
- const responseData = response.data;
91
- if (responseData.success) {
92
- return responseData;
93
- }
94
- else {
95
- throw new Error(`Failed to search. Error: ${responseData.error}`);
96
- }
79
+ async search(query, params) {
80
+ if (this.version === "v1") {
81
+ throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
82
+ }
83
+ const headers = {
84
+ "Content-Type": "application/json",
85
+ Authorization: `Bearer ${this.apiKey}`,
86
+ };
87
+ let jsonData = { query };
88
+ if (params) {
89
+ jsonData = { ...jsonData, ...params };
90
+ }
91
+ try {
92
+ const response = await axios.post(this.apiUrl + "/v0/search", jsonData, { headers });
93
+ if (response.status === 200) {
94
+ const responseData = response.data;
95
+ if (responseData.success) {
96
+ return responseData;
97
97
  }
98
98
  else {
99
- this.handleError(response, "search");
99
+ throw new Error(`Failed to search. Error: ${responseData.error}`);
100
100
  }
101
101
  }
102
- catch (error) {
103
- throw new Error(error.message);
102
+ else {
103
+ this.handleError(response, "search");
104
104
  }
105
- return { success: false, error: "Internal server error." };
106
- });
105
+ }
106
+ catch (error) {
107
+ throw new Error(error.message);
108
+ }
109
+ return { success: false, error: "Internal server error." };
107
110
  }
108
111
  /**
109
112
  * Initiates a crawl job for a URL using the Firecrawl API.
110
- * @param {string} url - The URL to crawl.
111
- * @param {Params | null} params - Additional parameters for the crawl request.
112
- * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
113
- * @param {number} pollInterval - Time in seconds for job status checks.
114
- * @param {string} idempotencyKey - Optional idempotency key for the request.
115
- * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
113
+ * @param url - The URL to crawl.
114
+ * @param params - Additional parameters for the crawl request.
115
+ * @param waitUntilDone - Whether to wait for the crawl job to complete.
116
+ * @param pollInterval - Time in seconds for job status checks.
117
+ * @param idempotencyKey - Optional idempotency key for the request.
118
+ * @returns The response from the crawl operation.
116
119
  */
117
- crawlUrl(url_1) {
118
- return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
119
- const headers = this.prepareHeaders(idempotencyKey);
120
- let jsonData = { url };
121
- if (params) {
122
- jsonData = Object.assign(Object.assign({}, jsonData), params);
123
- }
124
- try {
125
- const response = yield this.postRequest(this.apiUrl + "/v0/crawl", jsonData, headers);
126
- if (response.status === 200) {
127
- const jobId = response.data.jobId;
128
- if (waitUntilDone) {
129
- return this.monitorJobStatus(jobId, headers, pollInterval);
130
- }
131
- else {
132
- return { success: true, jobId };
120
+ async crawlUrl(url, params, waitUntilDone = true, pollInterval = 2, idempotencyKey) {
121
+ const headers = this.prepareHeaders(idempotencyKey);
122
+ let jsonData = { url, ...params };
123
+ try {
124
+ const response = await this.postRequest(this.apiUrl + `/${this.version}/crawl`, jsonData, headers);
125
+ if (response.status === 200) {
126
+ const id = this.version === 'v0' ? response.data.jobId : response.data.id;
127
+ let checkUrl = undefined;
128
+ if (waitUntilDone) {
129
+ if (this.version === 'v1') {
130
+ checkUrl = response.data.url;
133
131
  }
132
+ return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
134
133
  }
135
134
  else {
136
- this.handleError(response, "start crawl job");
135
+ if (this.version === 'v0') {
136
+ return {
137
+ success: true,
138
+ jobId: id
139
+ };
140
+ }
141
+ else {
142
+ return {
143
+ success: true,
144
+ id: id
145
+ };
146
+ }
137
147
  }
138
148
  }
139
- catch (error) {
140
- console.log(error);
149
+ else {
150
+ this.handleError(response, "start crawl job");
151
+ }
152
+ }
153
+ catch (error) {
154
+ if (error.response?.data?.error) {
155
+ throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
156
+ }
157
+ else {
141
158
  throw new Error(error.message);
142
159
  }
143
- return { success: false, error: "Internal server error." };
144
- });
160
+ }
161
+ return { success: false, error: "Internal server error." };
145
162
  }
146
163
  /**
147
164
  * Checks the status of a crawl job using the Firecrawl API.
148
- * @param {string} jobId - The job ID of the crawl operation.
149
- * @returns {Promise<JobStatusResponse>} The response containing the job status.
165
+ * @param id - The ID of the crawl operation.
166
+ * @returns The response containing the job status.
150
167
  */
151
- checkCrawlStatus(jobId) {
152
- return __awaiter(this, void 0, void 0, function* () {
153
- const headers = this.prepareHeaders();
154
- try {
155
- const response = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
156
- if (response.status === 200) {
168
+ async checkCrawlStatus(id) {
169
+ if (!id) {
170
+ throw new Error("No crawl ID provided");
171
+ }
172
+ const headers = this.prepareHeaders();
173
+ try {
174
+ const response = await this.getRequest(this.version === 'v1' ?
175
+ `${this.apiUrl}/${this.version}/crawl/${id}` :
176
+ `${this.apiUrl}/${this.version}/crawl/status/${id}`, headers);
177
+ if (response.status === 200) {
178
+ if (this.version === 'v0') {
157
179
  return {
158
180
  success: true,
159
181
  status: response.data.status,
@@ -168,13 +190,28 @@ export default class FirecrawlApp {
168
190
  };
169
191
  }
170
192
  else {
171
- this.handleError(response, "check crawl status");
193
+ return {
194
+ success: true,
195
+ status: response.data.status,
196
+ total: response.data.total,
197
+ completed: response.data.completed,
198
+ creditsUsed: response.data.creditsUsed,
199
+ expiresAt: new Date(response.data.expiresAt),
200
+ next: response.data.next,
201
+ data: response.data.data,
202
+ error: response.data.error
203
+ };
172
204
  }
173
205
  }
174
- catch (error) {
175
- throw new Error(error.message);
206
+ else {
207
+ this.handleError(response, "check crawl status");
176
208
  }
177
- return {
209
+ }
210
+ catch (error) {
211
+ throw new Error(error.message);
212
+ }
213
+ return this.version === 'v0' ?
214
+ {
178
215
  success: false,
179
216
  status: "unknown",
180
217
  current: 0,
@@ -182,71 +219,103 @@ export default class FirecrawlApp {
182
219
  current_step: "",
183
220
  total: 0,
184
221
  error: "Internal server error.",
222
+ } :
223
+ {
224
+ success: false,
225
+ error: "Internal server error.",
185
226
  };
186
- });
227
+ }
228
+ async mapUrl(url, params) {
229
+ if (this.version == 'v0') {
230
+ throw new Error("Map is not supported in v0");
231
+ }
232
+ const headers = this.prepareHeaders();
233
+ let jsonData = { url, ...params };
234
+ try {
235
+ const response = await this.postRequest(this.apiUrl + `/${this.version}/map`, jsonData, headers);
236
+ if (response.status === 200) {
237
+ return response.data;
238
+ }
239
+ else {
240
+ this.handleError(response, "map");
241
+ }
242
+ }
243
+ catch (error) {
244
+ throw new Error(error.message);
245
+ }
246
+ return { success: false, error: "Internal server error." };
187
247
  }
188
248
  /**
189
249
  * Prepares the headers for an API request.
190
- * @returns {AxiosRequestHeaders} The prepared headers.
250
+ * @param idempotencyKey - Optional key to ensure idempotency.
251
+ * @returns The prepared headers.
191
252
  */
192
253
  prepareHeaders(idempotencyKey) {
193
- return Object.assign({ "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}` }, (idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}));
254
+ return {
255
+ "Content-Type": "application/json",
256
+ Authorization: `Bearer ${this.apiKey}`,
257
+ ...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
258
+ };
194
259
  }
195
260
  /**
196
261
  * Sends a POST request to the specified URL.
197
- * @param {string} url - The URL to send the request to.
198
- * @param {Params} data - The data to send in the request.
199
- * @param {AxiosRequestHeaders} headers - The headers for the request.
200
- * @returns {Promise<AxiosResponse>} The response from the POST request.
262
+ * @param url - The URL to send the request to.
263
+ * @param data - The data to send in the request.
264
+ * @param headers - The headers for the request.
265
+ * @returns The response from the POST request.
201
266
  */
202
267
  postRequest(url, data, headers) {
203
268
  return axios.post(url, data, { headers });
204
269
  }
205
270
  /**
206
271
  * Sends a GET request to the specified URL.
207
- * @param {string} url - The URL to send the request to.
208
- * @param {AxiosRequestHeaders} headers - The headers for the request.
209
- * @returns {Promise<AxiosResponse>} The response from the GET request.
272
+ * @param url - The URL to send the request to.
273
+ * @param headers - The headers for the request.
274
+ * @returns The response from the GET request.
210
275
  */
211
276
  getRequest(url, headers) {
212
277
  return axios.get(url, { headers });
213
278
  }
214
279
  /**
215
280
  * Monitors the status of a crawl job until completion or failure.
216
- * @param {string} jobId - The job ID of the crawl operation.
217
- * @param {AxiosRequestHeaders} headers - The headers for the request.
218
- * @param {number} timeout - Timeout in seconds for job status checks.
219
- * @returns {Promise<any>} The final job status or data.
281
+ * @param id - The ID of the crawl operation.
282
+ * @param headers - The headers for the request.
283
+ * @param checkInterval - Interval in seconds for job status checks.
284
+ * @param checkUrl - Optional URL to check the status (used for v1 API)
285
+ * @returns The final job status or data.
220
286
  */
221
- monitorJobStatus(jobId, headers, checkInterval) {
222
- return __awaiter(this, void 0, void 0, function* () {
223
- while (true) {
224
- const statusResponse = yield this.getRequest(this.apiUrl + `/v0/crawl/status/${jobId}`, headers);
225
- if (statusResponse.status === 200) {
226
- const statusData = statusResponse.data;
227
- if (statusData.status === "completed") {
228
- if ("data" in statusData) {
229
- return statusData.data;
230
- }
231
- else {
232
- throw new Error("Crawl job completed but no data was returned");
233
- }
234
- }
235
- else if (["active", "paused", "pending", "queued"].includes(statusData.status)) {
236
- if (checkInterval < 2) {
237
- checkInterval = 2;
238
- }
239
- yield new Promise((resolve) => setTimeout(resolve, checkInterval * 1000)); // Wait for the specified timeout before checking again
287
+ async monitorJobStatus(id, headers, checkInterval, checkUrl) {
288
+ let apiUrl = '';
289
+ while (true) {
290
+ if (this.version === 'v1') {
291
+ apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
292
+ }
293
+ else if (this.version === 'v0') {
294
+ apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
295
+ }
296
+ const statusResponse = await this.getRequest(apiUrl, headers);
297
+ if (statusResponse.status === 200) {
298
+ const statusData = statusResponse.data;
299
+ if (statusData.status === "completed") {
300
+ if ("data" in statusData) {
301
+ return this.version === 'v0' ? statusData.data : statusData;
240
302
  }
241
303
  else {
242
- throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
304
+ throw new Error("Crawl job completed but no data was returned");
243
305
  }
244
306
  }
307
+ else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) {
308
+ checkInterval = Math.max(checkInterval, 2);
309
+ await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000));
310
+ }
245
311
  else {
246
- this.handleError(statusResponse, "check crawl status");
312
+ throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
247
313
  }
248
314
  }
249
- });
315
+ else {
316
+ this.handleError(statusResponse, "check crawl status");
317
+ }
318
+ }
250
319
  }
251
320
  /**
252
321
  * Handles errors from API responses.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mendable/firecrawl-js",
3
- "version": "0.0.36",
3
+ "version": "1.0.1",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "build/cjs/index.js",
6
6
  "types": "types/index.d.ts",
@@ -19,7 +19,7 @@
19
19
  "build": "tsc --module commonjs --moduleResolution node10 --outDir build/cjs/ && echo '{\"type\": \"commonjs\"}' > build/cjs/package.json && npx tsc --module NodeNext --moduleResolution NodeNext --outDir build/esm/ && echo '{\"type\": \"module\"}' > build/esm/package.json",
20
20
  "build-and-publish": "npm run build && npm publish --access public",
21
21
  "publish-beta": "npm run build && npm publish --access public --tag beta",
22
- "test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/**/*.test.ts"
22
+ "test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
23
23
  },
24
24
  "repository": {
25
25
  "type": "git",