firecrawl 1.2.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import FirecrawlApp, { CrawlParams, CrawlResponse, CrawlStatusResponse, MapResponse, ScrapeParams, ScrapeResponse } from '../../../index';
1
+ import FirecrawlApp, { type CrawlParams, type CrawlResponse, type CrawlStatusResponse, type MapResponse, type ScrapeResponse } from '../../../index';
2
2
  import { v4 as uuidv4 } from 'uuid';
3
3
  import dotenv from 'dotenv';
4
4
  import { describe, test, expect } from '@jest/globals';
@@ -6,7 +6,7 @@ import { describe, test, expect } from '@jest/globals';
6
6
  dotenv.config();
7
7
 
8
8
  const TEST_API_KEY = process.env.TEST_API_KEY;
9
- const API_URL = "http://127.0.0.1:3002";
9
+ const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
10
10
 
11
11
  describe('FirecrawlApp E2E Tests', () => {
12
12
  test.concurrent('should throw error for no API key', async () => {
@@ -71,6 +71,7 @@ describe('FirecrawlApp E2E Tests', () => {
71
71
  expect(response.links?.length).toBeGreaterThan(0);
72
72
  expect(response.links?.[0]).toContain("https://");
73
73
  expect(response.metadata).not.toBeNull();
74
+ expect(response.metadata).not.toBeUndefined();
74
75
  expect(response.metadata).toHaveProperty("title");
75
76
  expect(response.metadata).toHaveProperty("description");
76
77
  expect(response.metadata).toHaveProperty("keywords");
@@ -85,19 +86,21 @@ describe('FirecrawlApp E2E Tests', () => {
85
86
  expect(response.metadata).not.toHaveProperty("pageStatusCode");
86
87
  expect(response.metadata).toHaveProperty("statusCode");
87
88
  expect(response.metadata).not.toHaveProperty("pageError");
88
- expect(response.metadata.error).toBeUndefined();
89
- expect(response.metadata.title).toBe("Roast My Website");
90
- expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
91
- expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
92
- expect(response.metadata.robots).toBe("follow, index");
93
- expect(response.metadata.ogTitle).toBe("Roast My Website");
94
- expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
95
- expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
96
- expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
97
- expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
98
- expect(response.metadata.ogSiteName).toBe("Roast My Website");
99
- expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
100
- expect(response.metadata.statusCode).toBe(200);
89
+ if (response.metadata !== undefined) {
90
+ expect(response.metadata.error).toBeUndefined();
91
+ expect(response.metadata.title).toBe("Roast My Website");
92
+ expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
93
+ expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
94
+ expect(response.metadata.robots).toBe("follow, index");
95
+ expect(response.metadata.ogTitle).toBe("Roast My Website");
96
+ expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
97
+ expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
98
+ expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
99
+ expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
100
+ expect(response.metadata.ogSiteName).toBe("Roast My Website");
101
+ expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
102
+ expect(response.metadata.statusCode).toBe(200);
103
+ }
101
104
  }, 30000); // 30 seconds timeout
102
105
 
103
106
  test.concurrent('should return successful response for valid scrape with PDF file', async () => {
@@ -127,7 +130,7 @@ describe('FirecrawlApp E2E Tests', () => {
127
130
 
128
131
  test.concurrent('should return successful response for crawl and wait for completion', async () => {
129
132
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
130
- const response = await app.crawlUrl('https://roastmywebsite.ai', {}, true, 30) as CrawlStatusResponse;
133
+ const response = await app.crawlUrl('https://roastmywebsite.ai', {}, 30) as CrawlStatusResponse;
131
134
  expect(response).not.toBeNull();
132
135
  expect(response).toHaveProperty("total");
133
136
  expect(response.total).toBeGreaterThan(0);
@@ -138,21 +141,25 @@ describe('FirecrawlApp E2E Tests', () => {
138
141
  expect(response).toHaveProperty("status");
139
142
  expect(response.status).toBe("completed");
140
143
  expect(response).not.toHaveProperty("next"); // wait until done
141
- expect(response.data?.length).toBeGreaterThan(0);
142
- expect(response.data?.[0]).toHaveProperty("markdown");
143
- expect(response.data?.[0].markdown).toContain("_Roast_");
144
- expect(response.data?.[0]).not.toHaveProperty('content'); // v0
145
- expect(response.data?.[0]).not.toHaveProperty("html");
146
- expect(response.data?.[0]).not.toHaveProperty("rawHtml");
147
- expect(response.data?.[0]).not.toHaveProperty("screenshot");
148
- expect(response.data?.[0]).not.toHaveProperty("links");
149
- expect(response.data?.[0]).toHaveProperty("metadata");
150
- expect(response.data?.[0].metadata).toHaveProperty("title");
151
- expect(response.data?.[0].metadata).toHaveProperty("description");
152
- expect(response.data?.[0].metadata).toHaveProperty("language");
153
- expect(response.data?.[0].metadata).toHaveProperty("sourceURL");
154
- expect(response.data?.[0].metadata).toHaveProperty("statusCode");
155
- expect(response.data?.[0].metadata).not.toHaveProperty("error");
144
+ expect(response.data.length).toBeGreaterThan(0);
145
+ expect(response.data[0]).not.toBeNull();
146
+ expect(response.data[0]).not.toBeUndefined();
147
+ if (response.data[0]) {
148
+ expect(response.data[0]).toHaveProperty("markdown");
149
+ expect(response.data[0].markdown).toContain("_Roast_");
150
+ expect(response.data[0]).not.toHaveProperty('content'); // v0
151
+ expect(response.data[0]).not.toHaveProperty("html");
152
+ expect(response.data[0]).not.toHaveProperty("rawHtml");
153
+ expect(response.data[0]).not.toHaveProperty("screenshot");
154
+ expect(response.data[0]).not.toHaveProperty("links");
155
+ expect(response.data[0]).toHaveProperty("metadata");
156
+ expect(response.data[0].metadata).toHaveProperty("title");
157
+ expect(response.data[0].metadata).toHaveProperty("description");
158
+ expect(response.data[0].metadata).toHaveProperty("language");
159
+ expect(response.data[0].metadata).toHaveProperty("sourceURL");
160
+ expect(response.data[0].metadata).toHaveProperty("statusCode");
161
+ expect(response.data[0].metadata).not.toHaveProperty("error");
162
+ }
156
163
  }, 60000); // 60 seconds timeout
157
164
 
158
165
  test.concurrent('should return successful response for crawl with options and wait for completion', async () => {
@@ -173,7 +180,7 @@ describe('FirecrawlApp E2E Tests', () => {
173
180
  onlyMainContent: true,
174
181
  waitFor: 1000
175
182
  }
176
- } as CrawlParams, true, 30) as CrawlStatusResponse;
183
+ } as CrawlParams, 30) as CrawlStatusResponse;
177
184
  expect(response).not.toBeNull();
178
185
  expect(response).toHaveProperty("total");
179
186
  expect(response.total).toBeGreaterThan(0);
@@ -184,41 +191,45 @@ describe('FirecrawlApp E2E Tests', () => {
184
191
  expect(response).toHaveProperty("status");
185
192
  expect(response.status).toBe("completed");
186
193
  expect(response).not.toHaveProperty("next");
187
- expect(response.data?.length).toBeGreaterThan(0);
188
- expect(response.data?.[0]).toHaveProperty("markdown");
189
- expect(response.data?.[0].markdown).toContain("_Roast_");
190
- expect(response.data?.[0]).not.toHaveProperty('content'); // v0
191
- expect(response.data?.[0]).toHaveProperty("html");
192
- expect(response.data?.[0].html).toContain("<h1");
193
- expect(response.data?.[0]).toHaveProperty("rawHtml");
194
- expect(response.data?.[0].rawHtml).toContain("<h1");
195
- expect(response.data?.[0]).toHaveProperty("screenshot");
196
- expect(response.data?.[0].screenshot).toContain("https://");
197
- expect(response.data?.[0]).toHaveProperty("links");
198
- expect(response.data?.[0].links).not.toBeNull();
199
- expect(response.data?.[0].links?.length).toBeGreaterThan(0);
200
- expect(response.data?.[0]).toHaveProperty("metadata");
201
- expect(response.data?.[0].metadata).toHaveProperty("title");
202
- expect(response.data?.[0].metadata).toHaveProperty("description");
203
- expect(response.data?.[0].metadata).toHaveProperty("language");
204
- expect(response.data?.[0].metadata).toHaveProperty("sourceURL");
205
- expect(response.data?.[0].metadata).toHaveProperty("statusCode");
206
- expect(response.data?.[0].metadata).not.toHaveProperty("error");
194
+ expect(response.data.length).toBeGreaterThan(0);
195
+ expect(response.data[0]).not.toBeNull();
196
+ expect(response.data[0]).not.toBeUndefined();
197
+ if (response.data[0]) {
198
+ expect(response.data[0]).toHaveProperty("markdown");
199
+ expect(response.data[0].markdown).toContain("_Roast_");
200
+ expect(response.data[0]).not.toHaveProperty('content'); // v0
201
+ expect(response.data[0]).toHaveProperty("html");
202
+ expect(response.data[0].html).toContain("<h1");
203
+ expect(response.data[0]).toHaveProperty("rawHtml");
204
+ expect(response.data[0].rawHtml).toContain("<h1");
205
+ expect(response.data[0]).toHaveProperty("screenshot");
206
+ expect(response.data[0].screenshot).toContain("https://");
207
+ expect(response.data[0]).toHaveProperty("links");
208
+ expect(response.data[0].links).not.toBeNull();
209
+ expect(response.data[0].links?.length).toBeGreaterThan(0);
210
+ expect(response.data[0]).toHaveProperty("metadata");
211
+ expect(response.data[0].metadata).toHaveProperty("title");
212
+ expect(response.data[0].metadata).toHaveProperty("description");
213
+ expect(response.data[0].metadata).toHaveProperty("language");
214
+ expect(response.data[0].metadata).toHaveProperty("sourceURL");
215
+ expect(response.data[0].metadata).toHaveProperty("statusCode");
216
+ expect(response.data[0].metadata).not.toHaveProperty("error");
217
+ }
207
218
  }, 60000); // 60 seconds timeout
208
219
 
209
220
  test.concurrent('should handle idempotency key for crawl', async () => {
210
221
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
211
222
  const uniqueIdempotencyKey = uuidv4();
212
- const response = await app.crawlUrl('https://roastmywebsite.ai', {}, false, 2, uniqueIdempotencyKey) as CrawlResponse;
223
+ const response = await app.asyncCrawlUrl('https://roastmywebsite.ai', {}, uniqueIdempotencyKey) as CrawlResponse;
213
224
  expect(response).not.toBeNull();
214
225
  expect(response.id).toBeDefined();
215
226
 
216
- await expect(app.crawlUrl('https://roastmywebsite.ai', {}, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
227
+ await expect(app.crawlUrl('https://roastmywebsite.ai', {}, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
217
228
  });
218
229
 
219
230
  test.concurrent('should check crawl status', async () => {
220
231
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
221
- const response = await app.crawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams, false) as CrawlResponse;
232
+ const response = await app.asyncCrawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse;
222
233
  expect(response).not.toBeNull();
223
234
  expect(response.id).toBeDefined();
224
235
 
@@ -226,7 +237,8 @@ describe('FirecrawlApp E2E Tests', () => {
226
237
  const maxChecks = 15;
227
238
  let checks = 0;
228
239
 
229
- while (statusResponse.status === 'scraping' && checks < maxChecks) {
240
+ expect(statusResponse.success).toBe(true);
241
+ while ((statusResponse as any).status === 'scraping' && checks < maxChecks) {
230
242
  await new Promise(resolve => setTimeout(resolve, 5000));
231
243
  expect(statusResponse).not.toHaveProperty("partial_data"); // v0
232
244
  expect(statusResponse).not.toHaveProperty("current"); // v0
@@ -236,44 +248,55 @@ describe('FirecrawlApp E2E Tests', () => {
236
248
  expect(statusResponse).toHaveProperty("expiresAt");
237
249
  expect(statusResponse).toHaveProperty("status");
238
250
  expect(statusResponse).toHaveProperty("next");
239
- expect(statusResponse.total).toBeGreaterThan(0);
240
- expect(statusResponse.creditsUsed).toBeGreaterThan(0);
241
- expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
242
- expect(statusResponse.status).toBe("scraping");
243
- expect(statusResponse.next).toContain("/v1/crawl/");
251
+ expect(statusResponse.success).toBe(true);
252
+ if (statusResponse.success === true) {
253
+ expect(statusResponse.total).toBeGreaterThan(0);
254
+ expect(statusResponse.creditsUsed).toBeGreaterThan(0);
255
+ expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
256
+ expect(statusResponse.status).toBe("scraping");
257
+ expect(statusResponse.next).toContain("/v1/crawl/");
258
+ }
244
259
  statusResponse = await app.checkCrawlStatus(response.id) as CrawlStatusResponse;
260
+ expect(statusResponse.success).toBe(true);
245
261
  checks++;
246
262
  }
247
263
 
248
264
  expect(statusResponse).not.toBeNull();
249
265
  expect(statusResponse).toHaveProperty("total");
250
- expect(statusResponse.total).toBeGreaterThan(0);
251
- expect(statusResponse).toHaveProperty("creditsUsed");
252
- expect(statusResponse.creditsUsed).toBeGreaterThan(0);
253
- expect(statusResponse).toHaveProperty("expiresAt");
254
- expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
255
- expect(statusResponse).toHaveProperty("status");
256
- expect(statusResponse.status).toBe("completed");
257
- expect(statusResponse.data?.length).toBeGreaterThan(0);
258
- expect(statusResponse.data?.[0]).toHaveProperty("markdown");
259
- expect(statusResponse.data?.[0].markdown?.length).toBeGreaterThan(10);
260
- expect(statusResponse.data?.[0]).not.toHaveProperty('content'); // v0
261
- expect(statusResponse.data?.[0]).toHaveProperty("html");
262
- expect(statusResponse.data?.[0].html).toContain("<div");
263
- expect(statusResponse.data?.[0]).toHaveProperty("rawHtml");
264
- expect(statusResponse.data?.[0].rawHtml).toContain("<div");
265
- expect(statusResponse.data?.[0]).toHaveProperty("screenshot");
266
- expect(statusResponse.data?.[0].screenshot).toContain("https://");
267
- expect(statusResponse.data?.[0]).toHaveProperty("links");
268
- expect(statusResponse.data?.[0].links).not.toBeNull();
269
- expect(statusResponse.data?.[0].links?.length).toBeGreaterThan(0);
270
- expect(statusResponse.data?.[0]).toHaveProperty("metadata");
271
- expect(statusResponse.data?.[0].metadata).toHaveProperty("title");
272
- expect(statusResponse.data?.[0].metadata).toHaveProperty("description");
273
- expect(statusResponse.data?.[0].metadata).toHaveProperty("language");
274
- expect(statusResponse.data?.[0].metadata).toHaveProperty("sourceURL");
275
- expect(statusResponse.data?.[0].metadata).toHaveProperty("statusCode");
276
- expect(statusResponse.data?.[0].metadata).not.toHaveProperty("error");
266
+ expect(statusResponse.success).toBe(true);
267
+ if (statusResponse.success === true) {
268
+ expect(statusResponse.total).toBeGreaterThan(0);
269
+ expect(statusResponse).toHaveProperty("creditsUsed");
270
+ expect(statusResponse.creditsUsed).toBeGreaterThan(0);
271
+ expect(statusResponse).toHaveProperty("expiresAt");
272
+ expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
273
+ expect(statusResponse).toHaveProperty("status");
274
+ expect(statusResponse.status).toBe("completed");
275
+ expect(statusResponse.data.length).toBeGreaterThan(0);
276
+ expect(statusResponse.data[0]).not.toBeNull();
277
+ expect(statusResponse.data[0]).not.toBeUndefined();
278
+ if (statusResponse.data[0]) {
279
+ expect(statusResponse.data[0]).toHaveProperty("markdown");
280
+ expect(statusResponse.data[0].markdown?.length).toBeGreaterThan(10);
281
+ expect(statusResponse.data[0]).not.toHaveProperty('content'); // v0
282
+ expect(statusResponse.data[0]).toHaveProperty("html");
283
+ expect(statusResponse.data[0].html).toContain("<div");
284
+ expect(statusResponse.data[0]).toHaveProperty("rawHtml");
285
+ expect(statusResponse.data[0].rawHtml).toContain("<div");
286
+ expect(statusResponse.data[0]).toHaveProperty("screenshot");
287
+ expect(statusResponse.data[0].screenshot).toContain("https://");
288
+ expect(statusResponse.data[0]).toHaveProperty("links");
289
+ expect(statusResponse.data[0].links).not.toBeNull();
290
+ expect(statusResponse.data[0].links?.length).toBeGreaterThan(0);
291
+ expect(statusResponse.data[0]).toHaveProperty("metadata");
292
+ expect(statusResponse.data[0].metadata).toHaveProperty("title");
293
+ expect(statusResponse.data[0].metadata).toHaveProperty("description");
294
+ expect(statusResponse.data[0].metadata).toHaveProperty("language");
295
+ expect(statusResponse.data[0].metadata).toHaveProperty("sourceURL");
296
+ expect(statusResponse.data[0].metadata).toHaveProperty("statusCode");
297
+ expect(statusResponse.data[0].metadata).not.toHaveProperty("error");
298
+ }
299
+ }
277
300
  }, 60000); // 60 seconds timeout
278
301
 
279
302
  test.concurrent('should throw error for invalid API key on map', async () => {
package/src/index.ts CHANGED
@@ -1,5 +1,5 @@
1
- import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
2
- import { z } from "zod";
1
+ import axios, { type AxiosResponse, type AxiosRequestHeaders } from "axios";
2
+ import type * as zt from "zod";
3
3
  import { zodToJsonSchema } from "zod-to-json-schema";
4
4
  import { WebSocket } from "isows";
5
5
  import { TypedEventTarget } from "typescript-event-target";
@@ -58,13 +58,13 @@ export interface FirecrawlDocumentMetadata {
58
58
  * Document interface for Firecrawl.
59
59
  * Represents a document retrieved or processed by Firecrawl.
60
60
  */
61
- export interface FirecrawlDocument {
61
+ export interface FirecrawlDocument<T> {
62
62
  url?: string;
63
63
  markdown?: string;
64
64
  html?: string;
65
65
  rawHtml?: string;
66
66
  links?: string[];
67
- extract?: Record<any, any>;
67
+ extract?: T;
68
68
  screenshot?: string;
69
69
  metadata?: FirecrawlDocumentMetadata;
70
70
  }
@@ -73,26 +73,29 @@ export interface FirecrawlDocument {
73
73
  * Parameters for scraping operations.
74
74
  * Defines the options and configurations available for scraping web content.
75
75
  */
76
- export interface ScrapeParams {
76
+ export interface CrawlScrapeOptions {
77
77
  formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
78
78
  headers?: Record<string, string>;
79
79
  includeTags?: string[];
80
80
  excludeTags?: string[];
81
81
  onlyMainContent?: boolean;
82
+ waitFor?: number;
83
+ timeout?: number;
84
+ }
85
+
86
+ export interface ScrapeParams<LLMSchema extends zt.ZodSchema> extends CrawlScrapeOptions {
82
87
  extract?: {
83
88
  prompt?: string;
84
- schema?: z.ZodSchema | any;
89
+ schema?: LLMSchema;
85
90
  systemPrompt?: string;
86
91
  };
87
- waitFor?: number;
88
- timeout?: number;
89
92
  }
90
93
 
91
94
  /**
92
95
  * Response interface for scraping operations.
93
96
  * Defines the structure of the response received after a scraping operation.
94
97
  */
95
- export interface ScrapeResponse extends FirecrawlDocument {
98
+ export interface ScrapeResponse<LLMResult> extends FirecrawlDocument<LLMResult> {
96
99
  success: true;
97
100
  warning?: string;
98
101
  error?: string;
@@ -110,7 +113,8 @@ export interface CrawlParams {
110
113
  allowBackwardLinks?: boolean;
111
114
  allowExternalLinks?: boolean;
112
115
  ignoreSitemap?: boolean;
113
- scrapeOptions?: ScrapeParams;
116
+ scrapeOptions?: CrawlScrapeOptions;
117
+ webhook?: string;
114
118
  }
115
119
 
116
120
  /**
@@ -130,15 +134,14 @@ export interface CrawlResponse {
130
134
  */
131
135
  export interface CrawlStatusResponse {
132
136
  success: true;
133
- total: number;
137
+ status: "scraping" | "completed" | "failed" | "cancelled";
134
138
  completed: number;
139
+ total: number;
135
140
  creditsUsed: number;
136
141
  expiresAt: Date;
137
- status: "scraping" | "completed" | "failed";
138
- next: string;
139
- data?: FirecrawlDocument[];
140
- error?: string;
141
- }
142
+ next?: string;
143
+ data: FirecrawlDocument<undefined>[];
144
+ };
142
145
 
143
146
  /**
144
147
  * Parameters for mapping operations.
@@ -183,7 +186,11 @@ export default class FirecrawlApp {
183
186
  * @param config - Configuration options for the FirecrawlApp instance.
184
187
  */
185
188
  constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
186
- this.apiKey = apiKey || "";
189
+ if (typeof apiKey !== "string") {
190
+ throw new Error("No API key provided");
191
+ }
192
+
193
+ this.apiKey = apiKey;
187
194
  this.apiUrl = apiUrl || "https://api.firecrawl.dev";
188
195
  }
189
196
 
@@ -193,10 +200,10 @@ export default class FirecrawlApp {
193
200
  * @param params - Additional parameters for the scrape request.
194
201
  * @returns The response from the scrape operation.
195
202
  */
196
- async scrapeUrl(
203
+ async scrapeUrl<T extends zt.ZodSchema>(
197
204
  url: string,
198
- params?: ScrapeParams
199
- ): Promise<ScrapeResponse | ErrorResponse> {
205
+ params?: ScrapeParams<T>
206
+ ): Promise<ScrapeResponse<zt.infer<T>> | ErrorResponse> {
200
207
  const headers: AxiosRequestHeaders = {
201
208
  "Content-Type": "application/json",
202
209
  Authorization: `Bearer ${this.apiKey}`,
@@ -328,9 +335,10 @@ export default class FirecrawlApp {
328
335
  /**
329
336
  * Checks the status of a crawl job using the Firecrawl API.
330
337
  * @param id - The ID of the crawl operation.
338
+ * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
331
339
  * @returns The response containing the job status.
332
340
  */
333
- async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
341
+ async checkCrawlStatus(id?: string, getAllData = false): Promise<CrawlStatusResponse | ErrorResponse> {
334
342
  if (!id) {
335
343
  throw new Error("No crawl ID provided");
336
344
  }
@@ -342,16 +350,28 @@ export default class FirecrawlApp {
342
350
  headers
343
351
  );
344
352
  if (response.status === 200) {
353
+ let allData = response.data.data;
354
+ if (getAllData && response.data.status === "completed") {
355
+ let statusData = response.data
356
+ if ("data" in statusData) {
357
+ let data = statusData.data;
358
+ while ('next' in statusData) {
359
+ statusData = (await this.getRequest(statusData.next, headers)).data;
360
+ data = data.concat(statusData.data);
361
+ }
362
+ allData = data;
363
+ }
364
+ }
345
365
  return ({
346
- success: true,
366
+ success: response.data.success,
347
367
  status: response.data.status,
348
368
  total: response.data.total,
349
369
  completed: response.data.completed,
350
370
  creditsUsed: response.data.creditsUsed,
351
371
  expiresAt: new Date(response.data.expiresAt),
352
372
  next: response.data.next,
353
- data: response.data.data,
354
- error: response.data.error
373
+ data: allData,
374
+ error: response.data.error,
355
375
  })
356
376
  } else {
357
377
  this.handleError(response, "check crawl status");
@@ -451,22 +471,29 @@ export default class FirecrawlApp {
451
471
  id: string,
452
472
  headers: AxiosRequestHeaders,
453
473
  checkInterval: number
454
- ): Promise<CrawlStatusResponse> {
474
+ ): Promise<CrawlStatusResponse | ErrorResponse> {
455
475
  while (true) {
456
- const statusResponse: AxiosResponse = await this.getRequest(
476
+ let statusResponse: AxiosResponse = await this.getRequest(
457
477
  `${this.apiUrl}/v1/crawl/${id}`,
458
478
  headers
459
479
  );
460
480
  if (statusResponse.status === 200) {
461
- const statusData = statusResponse.data;
462
- if (statusData.status === "completed") {
463
- if ("data" in statusData) {
464
- return statusData;
465
- } else {
466
- throw new Error("Crawl job completed but no data was returned");
467
- }
468
- } else if (
469
- ["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)
481
+ let statusData = statusResponse.data;
482
+ if (statusData.status === "completed") {
483
+ if ("data" in statusData) {
484
+ let data = statusData.data;
485
+ while ('next' in statusData) {
486
+ statusResponse = await this.getRequest(statusData.next, headers);
487
+ statusData = statusResponse.data;
488
+ data = data.concat(statusData.data);
489
+ }
490
+ statusData.data = data;
491
+ return statusData;
492
+ } else {
493
+ throw new Error("Crawl job completed but no data was returned");
494
+ }
495
+ } else if (
496
+ ["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
470
497
  ) {
471
498
  checkInterval = Math.max(checkInterval, 2);
472
499
  await new Promise((resolve) =>
@@ -504,21 +531,21 @@ export default class FirecrawlApp {
504
531
  }
505
532
 
506
533
  interface CrawlWatcherEvents {
507
- document: CustomEvent<FirecrawlDocument>,
534
+ document: CustomEvent<FirecrawlDocument<undefined>>,
508
535
  done: CustomEvent<{
509
536
  status: CrawlStatusResponse["status"];
510
- data: FirecrawlDocument[];
537
+ data: FirecrawlDocument<undefined>[];
511
538
  }>,
512
539
  error: CustomEvent<{
513
540
  status: CrawlStatusResponse["status"],
514
- data: FirecrawlDocument[],
541
+ data: FirecrawlDocument<undefined>[],
515
542
  error: string,
516
543
  }>,
517
544
  }
518
545
 
519
546
  export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
520
547
  private ws: WebSocket;
521
- public data: FirecrawlDocument[];
548
+ public data: FirecrawlDocument<undefined>[];
522
549
  public status: CrawlStatusResponse["status"];
523
550
 
524
551
  constructor(id: string, app: FirecrawlApp) {
@@ -539,7 +566,7 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
539
566
 
540
567
  type DocumentMessage = {
541
568
  type: "document",
542
- data: FirecrawlDocument,
569
+ data: FirecrawlDocument<undefined>,
543
570
  }
544
571
 
545
572
  type DoneMessage = { type: "done" }