firecrawl 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,347 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.CrawlWatcher = void 0;
7
- const axios_1 = __importDefault(require("axios"));
8
- const zod_to_json_schema_1 = require("zod-to-json-schema");
9
- const isows_1 = require("isows");
10
- const typescript_event_target_1 = require("typescript-event-target");
11
- /**
12
- * Main class for interacting with the Firecrawl API.
13
- * Provides methods for scraping, searching, crawling, and mapping web content.
14
- */
15
- class FirecrawlApp {
16
- /**
17
- * Initializes a new instance of the FirecrawlApp class.
18
- * @param config - Configuration options for the FirecrawlApp instance.
19
- */
20
- constructor({ apiKey = null, apiUrl = null }) {
21
- this.apiKey = apiKey || "";
22
- this.apiUrl = apiUrl || "https://api.firecrawl.dev";
23
- }
24
- /**
25
- * Scrapes a URL using the Firecrawl API.
26
- * @param url - The URL to scrape.
27
- * @param params - Additional parameters for the scrape request.
28
- * @returns The response from the scrape operation.
29
- */
30
- async scrapeUrl(url, params) {
31
- const headers = {
32
- "Content-Type": "application/json",
33
- Authorization: `Bearer ${this.apiKey}`,
34
- };
35
- let jsonData = { url, ...params };
36
- if (jsonData?.extract?.schema) {
37
- let schema = jsonData.extract.schema;
38
- // Try parsing the schema as a Zod schema
39
- try {
40
- schema = (0, zod_to_json_schema_1.zodToJsonSchema)(schema);
41
- }
42
- catch (error) {
43
- }
44
- jsonData = {
45
- ...jsonData,
46
- extract: {
47
- ...jsonData.extract,
48
- schema: schema,
49
- },
50
- };
51
- }
52
- try {
53
- const response = await axios_1.default.post(this.apiUrl + `/v1/scrape`, jsonData, { headers });
54
- if (response.status === 200) {
55
- const responseData = response.data;
56
- if (responseData.success) {
57
- return {
58
- success: true,
59
- warning: responseData.warning,
60
- error: responseData.error,
61
- ...responseData.data
62
- };
63
- }
64
- else {
65
- throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
66
- }
67
- }
68
- else {
69
- this.handleError(response, "scrape URL");
70
- }
71
- }
72
- catch (error) {
73
- throw new Error(error.message);
74
- }
75
- return { success: false, error: "Internal server error." };
76
- }
77
- /**
78
- * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
79
- * @param query - The search query string.
80
- * @param params - Additional parameters for the search.
81
- * @returns Throws an error advising to use version 0 of the API.
82
- */
83
- async search(query, params) {
84
- throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
85
- }
86
- /**
87
- * Initiates a crawl job for a URL using the Firecrawl API.
88
- * @param url - The URL to crawl.
89
- * @param params - Additional parameters for the crawl request.
90
- * @param pollInterval - Time in seconds for job status checks.
91
- * @param idempotencyKey - Optional idempotency key for the request.
92
- * @returns The response from the crawl operation.
93
- */
94
- async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
95
- const headers = this.prepareHeaders(idempotencyKey);
96
- let jsonData = { url, ...params };
97
- try {
98
- const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
99
- if (response.status === 200) {
100
- const id = response.data.id;
101
- return this.monitorJobStatus(id, headers, pollInterval);
102
- }
103
- else {
104
- this.handleError(response, "start crawl job");
105
- }
106
- }
107
- catch (error) {
108
- if (error.response?.data?.error) {
109
- throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
110
- }
111
- else {
112
- throw new Error(error.message);
113
- }
114
- }
115
- return { success: false, error: "Internal server error." };
116
- }
117
- async asyncCrawlUrl(url, params, idempotencyKey) {
118
- const headers = this.prepareHeaders(idempotencyKey);
119
- let jsonData = { url, ...params };
120
- try {
121
- const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
122
- if (response.status === 200) {
123
- return response.data;
124
- }
125
- else {
126
- this.handleError(response, "start crawl job");
127
- }
128
- }
129
- catch (error) {
130
- if (error.response?.data?.error) {
131
- throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
132
- }
133
- else {
134
- throw new Error(error.message);
135
- }
136
- }
137
- return { success: false, error: "Internal server error." };
138
- }
139
- /**
140
- * Checks the status of a crawl job using the Firecrawl API.
141
- * @param id - The ID of the crawl operation.
142
- * @returns The response containing the job status.
143
- */
144
- async checkCrawlStatus(id) {
145
- if (!id) {
146
- throw new Error("No crawl ID provided");
147
- }
148
- const headers = this.prepareHeaders();
149
- try {
150
- const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
151
- if (response.status === 200) {
152
- return ({
153
- success: true,
154
- status: response.data.status,
155
- total: response.data.total,
156
- completed: response.data.completed,
157
- creditsUsed: response.data.creditsUsed,
158
- expiresAt: new Date(response.data.expiresAt),
159
- next: response.data.next,
160
- data: response.data.data,
161
- error: response.data.error
162
- });
163
- }
164
- else {
165
- this.handleError(response, "check crawl status");
166
- }
167
- }
168
- catch (error) {
169
- throw new Error(error.message);
170
- }
171
- return { success: false, error: "Internal server error." };
172
- }
173
- async crawlUrlAndWatch(url, params, idempotencyKey) {
174
- const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
175
- if (crawl.success && crawl.id) {
176
- const id = crawl.id;
177
- return new CrawlWatcher(id, this);
178
- }
179
- throw new Error("Crawl job failed to start");
180
- }
181
- async mapUrl(url, params) {
182
- const headers = this.prepareHeaders();
183
- let jsonData = { url, ...params };
184
- try {
185
- const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers);
186
- if (response.status === 200) {
187
- return response.data;
188
- }
189
- else {
190
- this.handleError(response, "map");
191
- }
192
- }
193
- catch (error) {
194
- throw new Error(error.message);
195
- }
196
- return { success: false, error: "Internal server error." };
197
- }
198
- /**
199
- * Prepares the headers for an API request.
200
- * @param idempotencyKey - Optional key to ensure idempotency.
201
- * @returns The prepared headers.
202
- */
203
- prepareHeaders(idempotencyKey) {
204
- return {
205
- "Content-Type": "application/json",
206
- Authorization: `Bearer ${this.apiKey}`,
207
- ...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
208
- };
209
- }
210
- /**
211
- * Sends a POST request to the specified URL.
212
- * @param url - The URL to send the request to.
213
- * @param data - The data to send in the request.
214
- * @param headers - The headers for the request.
215
- * @returns The response from the POST request.
216
- */
217
- postRequest(url, data, headers) {
218
- return axios_1.default.post(url, data, { headers });
219
- }
220
- /**
221
- * Sends a GET request to the specified URL.
222
- * @param url - The URL to send the request to.
223
- * @param headers - The headers for the request.
224
- * @returns The response from the GET request.
225
- */
226
- getRequest(url, headers) {
227
- return axios_1.default.get(url, { headers });
228
- }
229
- /**
230
- * Monitors the status of a crawl job until completion or failure.
231
- * @param id - The ID of the crawl operation.
232
- * @param headers - The headers for the request.
233
- * @param checkInterval - Interval in seconds for job status checks.
234
- * @param checkUrl - Optional URL to check the status (used for v1 API)
235
- * @returns The final job status or data.
236
- */
237
- async monitorJobStatus(id, headers, checkInterval) {
238
- while (true) {
239
- const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
240
- if (statusResponse.status === 200) {
241
- const statusData = statusResponse.data;
242
- if (statusData.status === "completed") {
243
- if ("data" in statusData) {
244
- return statusData;
245
- }
246
- else {
247
- throw new Error("Crawl job completed but no data was returned");
248
- }
249
- }
250
- else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) {
251
- checkInterval = Math.max(checkInterval, 2);
252
- await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000));
253
- }
254
- else {
255
- throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
256
- }
257
- }
258
- else {
259
- this.handleError(statusResponse, "check crawl status");
260
- }
261
- }
262
- }
263
- /**
264
- * Handles errors from API responses.
265
- * @param {AxiosResponse} response - The response from the API.
266
- * @param {string} action - The action being performed when the error occurred.
267
- */
268
- handleError(response, action) {
269
- if ([402, 408, 409, 500].includes(response.status)) {
270
- const errorMessage = response.data.error || "Unknown error occurred";
271
- throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
272
- }
273
- else {
274
- throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
275
- }
276
- }
277
- }
278
- exports.default = FirecrawlApp;
279
- class CrawlWatcher extends typescript_event_target_1.TypedEventTarget {
280
- constructor(id, app) {
281
- super();
282
- this.ws = new isows_1.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
283
- this.status = "scraping";
284
- this.data = [];
285
- const messageHandler = (msg) => {
286
- if (msg.type === "done") {
287
- this.status = "completed";
288
- this.dispatchTypedEvent("done", new CustomEvent("done", {
289
- detail: {
290
- status: this.status,
291
- data: this.data,
292
- },
293
- }));
294
- }
295
- else if (msg.type === "error") {
296
- this.status = "failed";
297
- this.dispatchTypedEvent("error", new CustomEvent("error", {
298
- detail: {
299
- status: this.status,
300
- data: this.data,
301
- error: msg.error,
302
- },
303
- }));
304
- }
305
- else if (msg.type === "catchup") {
306
- this.status = msg.data.status;
307
- this.data.push(...(msg.data.data ?? []));
308
- for (const doc of this.data) {
309
- this.dispatchTypedEvent("document", new CustomEvent("document", {
310
- detail: doc,
311
- }));
312
- }
313
- }
314
- else if (msg.type === "document") {
315
- this.dispatchTypedEvent("document", new CustomEvent("document", {
316
- detail: msg.data,
317
- }));
318
- }
319
- };
320
- this.ws.onmessage = ((ev) => {
321
- if (typeof ev.data !== "string") {
322
- this.ws.close();
323
- return;
324
- }
325
- const msg = JSON.parse(ev.data);
326
- messageHandler(msg);
327
- }).bind(this);
328
- this.ws.onclose = ((ev) => {
329
- const msg = JSON.parse(ev.reason);
330
- messageHandler(msg);
331
- }).bind(this);
332
- this.ws.onerror = ((_) => {
333
- this.status = "failed";
334
- this.dispatchTypedEvent("error", new CustomEvent("error", {
335
- detail: {
336
- status: this.status,
337
- data: this.data,
338
- error: "WebSocket error",
339
- },
340
- }));
341
- }).bind(this);
342
- }
343
- close() {
344
- this.ws.close();
345
- }
346
- }
347
- exports.CrawlWatcher = CrawlWatcher;
@@ -1 +0,0 @@
1
- {"type": "commonjs"}
@@ -1,339 +0,0 @@
1
- import axios from "axios";
2
- import { zodToJsonSchema } from "zod-to-json-schema";
3
- import { WebSocket } from "isows";
4
- import { TypedEventTarget } from "typescript-event-target";
5
- /**
6
- * Main class for interacting with the Firecrawl API.
7
- * Provides methods for scraping, searching, crawling, and mapping web content.
8
- */
9
- export default class FirecrawlApp {
10
- /**
11
- * Initializes a new instance of the FirecrawlApp class.
12
- * @param config - Configuration options for the FirecrawlApp instance.
13
- */
14
- constructor({ apiKey = null, apiUrl = null }) {
15
- this.apiKey = apiKey || "";
16
- this.apiUrl = apiUrl || "https://api.firecrawl.dev";
17
- }
18
- /**
19
- * Scrapes a URL using the Firecrawl API.
20
- * @param url - The URL to scrape.
21
- * @param params - Additional parameters for the scrape request.
22
- * @returns The response from the scrape operation.
23
- */
24
- async scrapeUrl(url, params) {
25
- const headers = {
26
- "Content-Type": "application/json",
27
- Authorization: `Bearer ${this.apiKey}`,
28
- };
29
- let jsonData = { url, ...params };
30
- if (jsonData?.extract?.schema) {
31
- let schema = jsonData.extract.schema;
32
- // Try parsing the schema as a Zod schema
33
- try {
34
- schema = zodToJsonSchema(schema);
35
- }
36
- catch (error) {
37
- }
38
- jsonData = {
39
- ...jsonData,
40
- extract: {
41
- ...jsonData.extract,
42
- schema: schema,
43
- },
44
- };
45
- }
46
- try {
47
- const response = await axios.post(this.apiUrl + `/v1/scrape`, jsonData, { headers });
48
- if (response.status === 200) {
49
- const responseData = response.data;
50
- if (responseData.success) {
51
- return {
52
- success: true,
53
- warning: responseData.warning,
54
- error: responseData.error,
55
- ...responseData.data
56
- };
57
- }
58
- else {
59
- throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
60
- }
61
- }
62
- else {
63
- this.handleError(response, "scrape URL");
64
- }
65
- }
66
- catch (error) {
67
- throw new Error(error.message);
68
- }
69
- return { success: false, error: "Internal server error." };
70
- }
71
- /**
72
- * This method is intended to search for a query using the Firecrawl API. However, it is not supported in version 1 of the API.
73
- * @param query - The search query string.
74
- * @param params - Additional parameters for the search.
75
- * @returns Throws an error advising to use version 0 of the API.
76
- */
77
- async search(query, params) {
78
- throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
79
- }
80
- /**
81
- * Initiates a crawl job for a URL using the Firecrawl API.
82
- * @param url - The URL to crawl.
83
- * @param params - Additional parameters for the crawl request.
84
- * @param pollInterval - Time in seconds for job status checks.
85
- * @param idempotencyKey - Optional idempotency key for the request.
86
- * @returns The response from the crawl operation.
87
- */
88
- async crawlUrl(url, params, pollInterval = 2, idempotencyKey) {
89
- const headers = this.prepareHeaders(idempotencyKey);
90
- let jsonData = { url, ...params };
91
- try {
92
- const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
93
- if (response.status === 200) {
94
- const id = response.data.id;
95
- return this.monitorJobStatus(id, headers, pollInterval);
96
- }
97
- else {
98
- this.handleError(response, "start crawl job");
99
- }
100
- }
101
- catch (error) {
102
- if (error.response?.data?.error) {
103
- throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
104
- }
105
- else {
106
- throw new Error(error.message);
107
- }
108
- }
109
- return { success: false, error: "Internal server error." };
110
- }
111
- async asyncCrawlUrl(url, params, idempotencyKey) {
112
- const headers = this.prepareHeaders(idempotencyKey);
113
- let jsonData = { url, ...params };
114
- try {
115
- const response = await this.postRequest(this.apiUrl + `/v1/crawl`, jsonData, headers);
116
- if (response.status === 200) {
117
- return response.data;
118
- }
119
- else {
120
- this.handleError(response, "start crawl job");
121
- }
122
- }
123
- catch (error) {
124
- if (error.response?.data?.error) {
125
- throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
126
- }
127
- else {
128
- throw new Error(error.message);
129
- }
130
- }
131
- return { success: false, error: "Internal server error." };
132
- }
133
- /**
134
- * Checks the status of a crawl job using the Firecrawl API.
135
- * @param id - The ID of the crawl operation.
136
- * @returns The response containing the job status.
137
- */
138
- async checkCrawlStatus(id) {
139
- if (!id) {
140
- throw new Error("No crawl ID provided");
141
- }
142
- const headers = this.prepareHeaders();
143
- try {
144
- const response = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
145
- if (response.status === 200) {
146
- return ({
147
- success: true,
148
- status: response.data.status,
149
- total: response.data.total,
150
- completed: response.data.completed,
151
- creditsUsed: response.data.creditsUsed,
152
- expiresAt: new Date(response.data.expiresAt),
153
- next: response.data.next,
154
- data: response.data.data,
155
- error: response.data.error
156
- });
157
- }
158
- else {
159
- this.handleError(response, "check crawl status");
160
- }
161
- }
162
- catch (error) {
163
- throw new Error(error.message);
164
- }
165
- return { success: false, error: "Internal server error." };
166
- }
167
- async crawlUrlAndWatch(url, params, idempotencyKey) {
168
- const crawl = await this.asyncCrawlUrl(url, params, idempotencyKey);
169
- if (crawl.success && crawl.id) {
170
- const id = crawl.id;
171
- return new CrawlWatcher(id, this);
172
- }
173
- throw new Error("Crawl job failed to start");
174
- }
175
- async mapUrl(url, params) {
176
- const headers = this.prepareHeaders();
177
- let jsonData = { url, ...params };
178
- try {
179
- const response = await this.postRequest(this.apiUrl + `/v1/map`, jsonData, headers);
180
- if (response.status === 200) {
181
- return response.data;
182
- }
183
- else {
184
- this.handleError(response, "map");
185
- }
186
- }
187
- catch (error) {
188
- throw new Error(error.message);
189
- }
190
- return { success: false, error: "Internal server error." };
191
- }
192
- /**
193
- * Prepares the headers for an API request.
194
- * @param idempotencyKey - Optional key to ensure idempotency.
195
- * @returns The prepared headers.
196
- */
197
- prepareHeaders(idempotencyKey) {
198
- return {
199
- "Content-Type": "application/json",
200
- Authorization: `Bearer ${this.apiKey}`,
201
- ...(idempotencyKey ? { "x-idempotency-key": idempotencyKey } : {}),
202
- };
203
- }
204
- /**
205
- * Sends a POST request to the specified URL.
206
- * @param url - The URL to send the request to.
207
- * @param data - The data to send in the request.
208
- * @param headers - The headers for the request.
209
- * @returns The response from the POST request.
210
- */
211
- postRequest(url, data, headers) {
212
- return axios.post(url, data, { headers });
213
- }
214
- /**
215
- * Sends a GET request to the specified URL.
216
- * @param url - The URL to send the request to.
217
- * @param headers - The headers for the request.
218
- * @returns The response from the GET request.
219
- */
220
- getRequest(url, headers) {
221
- return axios.get(url, { headers });
222
- }
223
- /**
224
- * Monitors the status of a crawl job until completion or failure.
225
- * @param id - The ID of the crawl operation.
226
- * @param headers - The headers for the request.
227
- * @param checkInterval - Interval in seconds for job status checks.
228
- * @param checkUrl - Optional URL to check the status (used for v1 API)
229
- * @returns The final job status or data.
230
- */
231
- async monitorJobStatus(id, headers, checkInterval) {
232
- while (true) {
233
- const statusResponse = await this.getRequest(`${this.apiUrl}/v1/crawl/${id}`, headers);
234
- if (statusResponse.status === 200) {
235
- const statusData = statusResponse.data;
236
- if (statusData.status === "completed") {
237
- if ("data" in statusData) {
238
- return statusData;
239
- }
240
- else {
241
- throw new Error("Crawl job completed but no data was returned");
242
- }
243
- }
244
- else if (["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)) {
245
- checkInterval = Math.max(checkInterval, 2);
246
- await new Promise((resolve) => setTimeout(resolve, checkInterval * 1000));
247
- }
248
- else {
249
- throw new Error(`Crawl job failed or was stopped. Status: ${statusData.status}`);
250
- }
251
- }
252
- else {
253
- this.handleError(statusResponse, "check crawl status");
254
- }
255
- }
256
- }
257
- /**
258
- * Handles errors from API responses.
259
- * @param {AxiosResponse} response - The response from the API.
260
- * @param {string} action - The action being performed when the error occurred.
261
- */
262
- handleError(response, action) {
263
- if ([402, 408, 409, 500].includes(response.status)) {
264
- const errorMessage = response.data.error || "Unknown error occurred";
265
- throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`);
266
- }
267
- else {
268
- throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`);
269
- }
270
- }
271
- }
272
- export class CrawlWatcher extends TypedEventTarget {
273
- constructor(id, app) {
274
- super();
275
- this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
276
- this.status = "scraping";
277
- this.data = [];
278
- const messageHandler = (msg) => {
279
- if (msg.type === "done") {
280
- this.status = "completed";
281
- this.dispatchTypedEvent("done", new CustomEvent("done", {
282
- detail: {
283
- status: this.status,
284
- data: this.data,
285
- },
286
- }));
287
- }
288
- else if (msg.type === "error") {
289
- this.status = "failed";
290
- this.dispatchTypedEvent("error", new CustomEvent("error", {
291
- detail: {
292
- status: this.status,
293
- data: this.data,
294
- error: msg.error,
295
- },
296
- }));
297
- }
298
- else if (msg.type === "catchup") {
299
- this.status = msg.data.status;
300
- this.data.push(...(msg.data.data ?? []));
301
- for (const doc of this.data) {
302
- this.dispatchTypedEvent("document", new CustomEvent("document", {
303
- detail: doc,
304
- }));
305
- }
306
- }
307
- else if (msg.type === "document") {
308
- this.dispatchTypedEvent("document", new CustomEvent("document", {
309
- detail: msg.data,
310
- }));
311
- }
312
- };
313
- this.ws.onmessage = ((ev) => {
314
- if (typeof ev.data !== "string") {
315
- this.ws.close();
316
- return;
317
- }
318
- const msg = JSON.parse(ev.data);
319
- messageHandler(msg);
320
- }).bind(this);
321
- this.ws.onclose = ((ev) => {
322
- const msg = JSON.parse(ev.reason);
323
- messageHandler(msg);
324
- }).bind(this);
325
- this.ws.onerror = ((_) => {
326
- this.status = "failed";
327
- this.dispatchTypedEvent("error", new CustomEvent("error", {
328
- detail: {
329
- status: this.status,
330
- data: this.data,
331
- error: "WebSocket error",
332
- },
333
- }));
334
- }).bind(this);
335
- }
336
- close() {
337
- this.ws.close();
338
- }
339
- }
@@ -1 +0,0 @@
1
- {"type": "module"}