@mendable/firecrawl-js 1.2.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import FirecrawlApp, { CrawlParams, CrawlResponse, CrawlStatusResponse, MapResponse, ScrapeParams, ScrapeResponse } from '../../../index';
1
+ import FirecrawlApp, { type CrawlParams, type CrawlResponse, type CrawlStatusResponse, type MapResponse, type ScrapeResponse } from '../../../index';
2
2
  import { v4 as uuidv4 } from 'uuid';
3
3
  import dotenv from 'dotenv';
4
4
  import { describe, test, expect } from '@jest/globals';
@@ -6,7 +6,7 @@ import { describe, test, expect } from '@jest/globals';
6
6
  dotenv.config();
7
7
 
8
8
  const TEST_API_KEY = process.env.TEST_API_KEY;
9
- const API_URL = "http://127.0.0.1:3002";
9
+ const API_URL = process.env.API_URL ?? "https://api.firecrawl.dev";
10
10
 
11
11
  describe('FirecrawlApp E2E Tests', () => {
12
12
  test.concurrent('should throw error for no API key', async () => {
@@ -71,6 +71,7 @@ describe('FirecrawlApp E2E Tests', () => {
71
71
  expect(response.links?.length).toBeGreaterThan(0);
72
72
  expect(response.links?.[0]).toContain("https://");
73
73
  expect(response.metadata).not.toBeNull();
74
+ expect(response.metadata).not.toBeUndefined();
74
75
  expect(response.metadata).toHaveProperty("title");
75
76
  expect(response.metadata).toHaveProperty("description");
76
77
  expect(response.metadata).toHaveProperty("keywords");
@@ -85,19 +86,21 @@ describe('FirecrawlApp E2E Tests', () => {
85
86
  expect(response.metadata).not.toHaveProperty("pageStatusCode");
86
87
  expect(response.metadata).toHaveProperty("statusCode");
87
88
  expect(response.metadata).not.toHaveProperty("pageError");
88
- expect(response.metadata.error).toBeUndefined();
89
- expect(response.metadata.title).toBe("Roast My Website");
90
- expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
91
- expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
92
- expect(response.metadata.robots).toBe("follow, index");
93
- expect(response.metadata.ogTitle).toBe("Roast My Website");
94
- expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
95
- expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
96
- expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
97
- expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
98
- expect(response.metadata.ogSiteName).toBe("Roast My Website");
99
- expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
100
- expect(response.metadata.statusCode).toBe(200);
89
+ if (response.metadata !== undefined) {
90
+ expect(response.metadata.error).toBeUndefined();
91
+ expect(response.metadata.title).toBe("Roast My Website");
92
+ expect(response.metadata.description).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
93
+ expect(response.metadata.keywords).toBe("Roast My Website,Roast,Website,GitHub,Firecrawl");
94
+ expect(response.metadata.robots).toBe("follow, index");
95
+ expect(response.metadata.ogTitle).toBe("Roast My Website");
96
+ expect(response.metadata.ogDescription).toBe("Welcome to Roast My Website, the ultimate tool for putting your website through the wringer! This repository harnesses the power of Firecrawl to scrape and capture screenshots of websites, and then unleashes the latest LLM vision models to mercilessly roast them. 🌶️");
97
+ expect(response.metadata.ogUrl).toBe("https://www.roastmywebsite.ai");
98
+ expect(response.metadata.ogImage).toBe("https://www.roastmywebsite.ai/og.png");
99
+ expect(response.metadata.ogLocaleAlternate).toStrictEqual([]);
100
+ expect(response.metadata.ogSiteName).toBe("Roast My Website");
101
+ expect(response.metadata.sourceURL).toBe("https://roastmywebsite.ai");
102
+ expect(response.metadata.statusCode).toBe(200);
103
+ }
101
104
  }, 30000); // 30 seconds timeout
102
105
 
103
106
  test.concurrent('should return successful response for valid scrape with PDF file', async () => {
@@ -127,7 +130,7 @@ describe('FirecrawlApp E2E Tests', () => {
127
130
 
128
131
  test.concurrent('should return successful response for crawl and wait for completion', async () => {
129
132
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
130
- const response = await app.crawlUrl('https://roastmywebsite.ai', {}, true, 30) as CrawlStatusResponse;
133
+ const response = await app.crawlUrl('https://roastmywebsite.ai', {}, 30) as CrawlStatusResponse;
131
134
  expect(response).not.toBeNull();
132
135
  expect(response).toHaveProperty("total");
133
136
  expect(response.total).toBeGreaterThan(0);
@@ -138,21 +141,25 @@ describe('FirecrawlApp E2E Tests', () => {
138
141
  expect(response).toHaveProperty("status");
139
142
  expect(response.status).toBe("completed");
140
143
  expect(response).not.toHaveProperty("next"); // wait until done
141
- expect(response.data?.length).toBeGreaterThan(0);
142
- expect(response.data?.[0]).toHaveProperty("markdown");
143
- expect(response.data?.[0].markdown).toContain("_Roast_");
144
- expect(response.data?.[0]).not.toHaveProperty('content'); // v0
145
- expect(response.data?.[0]).not.toHaveProperty("html");
146
- expect(response.data?.[0]).not.toHaveProperty("rawHtml");
147
- expect(response.data?.[0]).not.toHaveProperty("screenshot");
148
- expect(response.data?.[0]).not.toHaveProperty("links");
149
- expect(response.data?.[0]).toHaveProperty("metadata");
150
- expect(response.data?.[0].metadata).toHaveProperty("title");
151
- expect(response.data?.[0].metadata).toHaveProperty("description");
152
- expect(response.data?.[0].metadata).toHaveProperty("language");
153
- expect(response.data?.[0].metadata).toHaveProperty("sourceURL");
154
- expect(response.data?.[0].metadata).toHaveProperty("statusCode");
155
- expect(response.data?.[0].metadata).not.toHaveProperty("error");
144
+ expect(response.data.length).toBeGreaterThan(0);
145
+ expect(response.data[0]).not.toBeNull();
146
+ expect(response.data[0]).not.toBeUndefined();
147
+ if (response.data[0]) {
148
+ expect(response.data[0]).toHaveProperty("markdown");
149
+ expect(response.data[0].markdown).toContain("_Roast_");
150
+ expect(response.data[0]).not.toHaveProperty('content'); // v0
151
+ expect(response.data[0]).not.toHaveProperty("html");
152
+ expect(response.data[0]).not.toHaveProperty("rawHtml");
153
+ expect(response.data[0]).not.toHaveProperty("screenshot");
154
+ expect(response.data[0]).not.toHaveProperty("links");
155
+ expect(response.data[0]).toHaveProperty("metadata");
156
+ expect(response.data[0].metadata).toHaveProperty("title");
157
+ expect(response.data[0].metadata).toHaveProperty("description");
158
+ expect(response.data[0].metadata).toHaveProperty("language");
159
+ expect(response.data[0].metadata).toHaveProperty("sourceURL");
160
+ expect(response.data[0].metadata).toHaveProperty("statusCode");
161
+ expect(response.data[0].metadata).not.toHaveProperty("error");
162
+ }
156
163
  }, 60000); // 60 seconds timeout
157
164
 
158
165
  test.concurrent('should return successful response for crawl with options and wait for completion', async () => {
@@ -173,7 +180,7 @@ describe('FirecrawlApp E2E Tests', () => {
173
180
  onlyMainContent: true,
174
181
  waitFor: 1000
175
182
  }
176
- } as CrawlParams, true, 30) as CrawlStatusResponse;
183
+ } as CrawlParams, 30) as CrawlStatusResponse;
177
184
  expect(response).not.toBeNull();
178
185
  expect(response).toHaveProperty("total");
179
186
  expect(response.total).toBeGreaterThan(0);
@@ -184,41 +191,45 @@ describe('FirecrawlApp E2E Tests', () => {
184
191
  expect(response).toHaveProperty("status");
185
192
  expect(response.status).toBe("completed");
186
193
  expect(response).not.toHaveProperty("next");
187
- expect(response.data?.length).toBeGreaterThan(0);
188
- expect(response.data?.[0]).toHaveProperty("markdown");
189
- expect(response.data?.[0].markdown).toContain("_Roast_");
190
- expect(response.data?.[0]).not.toHaveProperty('content'); // v0
191
- expect(response.data?.[0]).toHaveProperty("html");
192
- expect(response.data?.[0].html).toContain("<h1");
193
- expect(response.data?.[0]).toHaveProperty("rawHtml");
194
- expect(response.data?.[0].rawHtml).toContain("<h1");
195
- expect(response.data?.[0]).toHaveProperty("screenshot");
196
- expect(response.data?.[0].screenshot).toContain("https://");
197
- expect(response.data?.[0]).toHaveProperty("links");
198
- expect(response.data?.[0].links).not.toBeNull();
199
- expect(response.data?.[0].links?.length).toBeGreaterThan(0);
200
- expect(response.data?.[0]).toHaveProperty("metadata");
201
- expect(response.data?.[0].metadata).toHaveProperty("title");
202
- expect(response.data?.[0].metadata).toHaveProperty("description");
203
- expect(response.data?.[0].metadata).toHaveProperty("language");
204
- expect(response.data?.[0].metadata).toHaveProperty("sourceURL");
205
- expect(response.data?.[0].metadata).toHaveProperty("statusCode");
206
- expect(response.data?.[0].metadata).not.toHaveProperty("error");
194
+ expect(response.data.length).toBeGreaterThan(0);
195
+ expect(response.data[0]).not.toBeNull();
196
+ expect(response.data[0]).not.toBeUndefined();
197
+ if (response.data[0]) {
198
+ expect(response.data[0]).toHaveProperty("markdown");
199
+ expect(response.data[0].markdown).toContain("_Roast_");
200
+ expect(response.data[0]).not.toHaveProperty('content'); // v0
201
+ expect(response.data[0]).toHaveProperty("html");
202
+ expect(response.data[0].html).toContain("<h1");
203
+ expect(response.data[0]).toHaveProperty("rawHtml");
204
+ expect(response.data[0].rawHtml).toContain("<h1");
205
+ expect(response.data[0]).toHaveProperty("screenshot");
206
+ expect(response.data[0].screenshot).toContain("https://");
207
+ expect(response.data[0]).toHaveProperty("links");
208
+ expect(response.data[0].links).not.toBeNull();
209
+ expect(response.data[0].links?.length).toBeGreaterThan(0);
210
+ expect(response.data[0]).toHaveProperty("metadata");
211
+ expect(response.data[0].metadata).toHaveProperty("title");
212
+ expect(response.data[0].metadata).toHaveProperty("description");
213
+ expect(response.data[0].metadata).toHaveProperty("language");
214
+ expect(response.data[0].metadata).toHaveProperty("sourceURL");
215
+ expect(response.data[0].metadata).toHaveProperty("statusCode");
216
+ expect(response.data[0].metadata).not.toHaveProperty("error");
217
+ }
207
218
  }, 60000); // 60 seconds timeout
208
219
 
209
220
  test.concurrent('should handle idempotency key for crawl', async () => {
210
221
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
211
222
  const uniqueIdempotencyKey = uuidv4();
212
- const response = await app.crawlUrl('https://roastmywebsite.ai', {}, false, 2, uniqueIdempotencyKey) as CrawlResponse;
223
+ const response = await app.asyncCrawlUrl('https://roastmywebsite.ai', {}, uniqueIdempotencyKey) as CrawlResponse;
213
224
  expect(response).not.toBeNull();
214
225
  expect(response.id).toBeDefined();
215
226
 
216
- await expect(app.crawlUrl('https://roastmywebsite.ai', {}, true, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
227
+ await expect(app.crawlUrl('https://roastmywebsite.ai', {}, 2, uniqueIdempotencyKey)).rejects.toThrow("Request failed with status code 409");
217
228
  });
218
229
 
219
230
  test.concurrent('should check crawl status', async () => {
220
231
  const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL });
221
- const response = await app.crawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams, false) as CrawlResponse;
232
+ const response = await app.asyncCrawlUrl('https://firecrawl.dev', { scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse;
222
233
  expect(response).not.toBeNull();
223
234
  expect(response.id).toBeDefined();
224
235
 
@@ -226,7 +237,8 @@ describe('FirecrawlApp E2E Tests', () => {
226
237
  const maxChecks = 15;
227
238
  let checks = 0;
228
239
 
229
- while (statusResponse.status === 'scraping' && checks < maxChecks) {
240
+ expect(statusResponse.success).toBe(true);
241
+ while ((statusResponse as any).status === 'scraping' && checks < maxChecks) {
230
242
  await new Promise(resolve => setTimeout(resolve, 5000));
231
243
  expect(statusResponse).not.toHaveProperty("partial_data"); // v0
232
244
  expect(statusResponse).not.toHaveProperty("current"); // v0
@@ -236,44 +248,55 @@ describe('FirecrawlApp E2E Tests', () => {
236
248
  expect(statusResponse).toHaveProperty("expiresAt");
237
249
  expect(statusResponse).toHaveProperty("status");
238
250
  expect(statusResponse).toHaveProperty("next");
239
- expect(statusResponse.total).toBeGreaterThan(0);
240
- expect(statusResponse.creditsUsed).toBeGreaterThan(0);
241
- expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
242
- expect(statusResponse.status).toBe("scraping");
243
- expect(statusResponse.next).toContain("/v1/crawl/");
251
+ expect(statusResponse.success).toBe(true);
252
+ if (statusResponse.success === true) {
253
+ expect(statusResponse.total).toBeGreaterThan(0);
254
+ expect(statusResponse.creditsUsed).toBeGreaterThan(0);
255
+ expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
256
+ expect(statusResponse.status).toBe("scraping");
257
+ expect(statusResponse.next).toContain("/v1/crawl/");
258
+ }
244
259
  statusResponse = await app.checkCrawlStatus(response.id) as CrawlStatusResponse;
260
+ expect(statusResponse.success).toBe(true);
245
261
  checks++;
246
262
  }
247
263
 
248
264
  expect(statusResponse).not.toBeNull();
249
265
  expect(statusResponse).toHaveProperty("total");
250
- expect(statusResponse.total).toBeGreaterThan(0);
251
- expect(statusResponse).toHaveProperty("creditsUsed");
252
- expect(statusResponse.creditsUsed).toBeGreaterThan(0);
253
- expect(statusResponse).toHaveProperty("expiresAt");
254
- expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
255
- expect(statusResponse).toHaveProperty("status");
256
- expect(statusResponse.status).toBe("completed");
257
- expect(statusResponse.data?.length).toBeGreaterThan(0);
258
- expect(statusResponse.data?.[0]).toHaveProperty("markdown");
259
- expect(statusResponse.data?.[0].markdown?.length).toBeGreaterThan(10);
260
- expect(statusResponse.data?.[0]).not.toHaveProperty('content'); // v0
261
- expect(statusResponse.data?.[0]).toHaveProperty("html");
262
- expect(statusResponse.data?.[0].html).toContain("<div");
263
- expect(statusResponse.data?.[0]).toHaveProperty("rawHtml");
264
- expect(statusResponse.data?.[0].rawHtml).toContain("<div");
265
- expect(statusResponse.data?.[0]).toHaveProperty("screenshot");
266
- expect(statusResponse.data?.[0].screenshot).toContain("https://");
267
- expect(statusResponse.data?.[0]).toHaveProperty("links");
268
- expect(statusResponse.data?.[0].links).not.toBeNull();
269
- expect(statusResponse.data?.[0].links?.length).toBeGreaterThan(0);
270
- expect(statusResponse.data?.[0]).toHaveProperty("metadata");
271
- expect(statusResponse.data?.[0].metadata).toHaveProperty("title");
272
- expect(statusResponse.data?.[0].metadata).toHaveProperty("description");
273
- expect(statusResponse.data?.[0].metadata).toHaveProperty("language");
274
- expect(statusResponse.data?.[0].metadata).toHaveProperty("sourceURL");
275
- expect(statusResponse.data?.[0].metadata).toHaveProperty("statusCode");
276
- expect(statusResponse.data?.[0].metadata).not.toHaveProperty("error");
266
+ expect(statusResponse.success).toBe(true);
267
+ if (statusResponse.success === true) {
268
+ expect(statusResponse.total).toBeGreaterThan(0);
269
+ expect(statusResponse).toHaveProperty("creditsUsed");
270
+ expect(statusResponse.creditsUsed).toBeGreaterThan(0);
271
+ expect(statusResponse).toHaveProperty("expiresAt");
272
+ expect(statusResponse.expiresAt.getTime()).toBeGreaterThan(Date.now());
273
+ expect(statusResponse).toHaveProperty("status");
274
+ expect(statusResponse.status).toBe("completed");
275
+ expect(statusResponse.data.length).toBeGreaterThan(0);
276
+ expect(statusResponse.data[0]).not.toBeNull();
277
+ expect(statusResponse.data[0]).not.toBeUndefined();
278
+ if (statusResponse.data[0]) {
279
+ expect(statusResponse.data[0]).toHaveProperty("markdown");
280
+ expect(statusResponse.data[0].markdown?.length).toBeGreaterThan(10);
281
+ expect(statusResponse.data[0]).not.toHaveProperty('content'); // v0
282
+ expect(statusResponse.data[0]).toHaveProperty("html");
283
+ expect(statusResponse.data[0].html).toContain("<div");
284
+ expect(statusResponse.data[0]).toHaveProperty("rawHtml");
285
+ expect(statusResponse.data[0].rawHtml).toContain("<div");
286
+ expect(statusResponse.data[0]).toHaveProperty("screenshot");
287
+ expect(statusResponse.data[0].screenshot).toContain("https://");
288
+ expect(statusResponse.data[0]).toHaveProperty("links");
289
+ expect(statusResponse.data[0].links).not.toBeNull();
290
+ expect(statusResponse.data[0].links?.length).toBeGreaterThan(0);
291
+ expect(statusResponse.data[0]).toHaveProperty("metadata");
292
+ expect(statusResponse.data[0].metadata).toHaveProperty("title");
293
+ expect(statusResponse.data[0].metadata).toHaveProperty("description");
294
+ expect(statusResponse.data[0].metadata).toHaveProperty("language");
295
+ expect(statusResponse.data[0].metadata).toHaveProperty("sourceURL");
296
+ expect(statusResponse.data[0].metadata).toHaveProperty("statusCode");
297
+ expect(statusResponse.data[0].metadata).not.toHaveProperty("error");
298
+ }
299
+ }
277
300
  }, 60000); // 60 seconds timeout
278
301
 
279
302
  test.concurrent('should throw error for invalid API key on map', async () => {
package/src/index.ts CHANGED
@@ -1,5 +1,5 @@
1
- import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
2
- import { z } from "zod";
1
+ import axios, { type AxiosResponse, type AxiosRequestHeaders } from "axios";
2
+ import type { infer as ZodInfer, ZodSchema } from "zod";
3
3
  import { zodToJsonSchema } from "zod-to-json-schema";
4
4
  import { WebSocket } from "isows";
5
5
  import { TypedEventTarget } from "typescript-event-target";
@@ -58,13 +58,13 @@ export interface FirecrawlDocumentMetadata {
58
58
  * Document interface for Firecrawl.
59
59
  * Represents a document retrieved or processed by Firecrawl.
60
60
  */
61
- export interface FirecrawlDocument {
61
+ export interface FirecrawlDocument<T> {
62
62
  url?: string;
63
63
  markdown?: string;
64
64
  html?: string;
65
65
  rawHtml?: string;
66
66
  links?: string[];
67
- extract?: Record<any, any>;
67
+ extract?: T;
68
68
  screenshot?: string;
69
69
  metadata?: FirecrawlDocumentMetadata;
70
70
  }
@@ -73,26 +73,29 @@ export interface FirecrawlDocument {
73
73
  * Parameters for scraping operations.
74
74
  * Defines the options and configurations available for scraping web content.
75
75
  */
76
- export interface ScrapeParams {
76
+ export interface CrawlScrapeOptions {
77
77
  formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "extract" | "full@scrennshot")[];
78
78
  headers?: Record<string, string>;
79
79
  includeTags?: string[];
80
80
  excludeTags?: string[];
81
81
  onlyMainContent?: boolean;
82
+ waitFor?: number;
83
+ timeout?: number;
84
+ }
85
+
86
+ export interface ScrapeParams<LLMSchema extends ZodSchema> extends CrawlScrapeOptions {
82
87
  extract?: {
83
88
  prompt?: string;
84
- schema?: z.ZodSchema | any;
89
+ schema?: LLMSchema;
85
90
  systemPrompt?: string;
86
91
  };
87
- waitFor?: number;
88
- timeout?: number;
89
92
  }
90
93
 
91
94
  /**
92
95
  * Response interface for scraping operations.
93
96
  * Defines the structure of the response received after a scraping operation.
94
97
  */
95
- export interface ScrapeResponse extends FirecrawlDocument {
98
+ export interface ScrapeResponse<LLMResult> extends FirecrawlDocument<LLMResult> {
96
99
  success: true;
97
100
  warning?: string;
98
101
  error?: string;
@@ -110,7 +113,7 @@ export interface CrawlParams {
110
113
  allowBackwardLinks?: boolean;
111
114
  allowExternalLinks?: boolean;
112
115
  ignoreSitemap?: boolean;
113
- scrapeOptions?: ScrapeParams;
116
+ scrapeOptions?: CrawlScrapeOptions;
114
117
  webhook?: string;
115
118
  }
116
119
 
@@ -131,15 +134,14 @@ export interface CrawlResponse {
131
134
  */
132
135
  export interface CrawlStatusResponse {
133
136
  success: true;
134
- total: number;
137
+ status: "scraping" | "completed" | "failed" | "cancelled";
135
138
  completed: number;
139
+ total: number;
136
140
  creditsUsed: number;
137
141
  expiresAt: Date;
138
- status: "scraping" | "completed" | "failed";
139
- next: string;
140
- data?: FirecrawlDocument[];
141
- error?: string;
142
- }
142
+ next?: string;
143
+ data: FirecrawlDocument<undefined>[];
144
+ };
143
145
 
144
146
  /**
145
147
  * Parameters for mapping operations.
@@ -184,7 +186,11 @@ export default class FirecrawlApp {
184
186
  * @param config - Configuration options for the FirecrawlApp instance.
185
187
  */
186
188
  constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
187
- this.apiKey = apiKey || "";
189
+ if (typeof apiKey !== "string") {
190
+ throw new Error("No API key provided");
191
+ }
192
+
193
+ this.apiKey = apiKey;
188
194
  this.apiUrl = apiUrl || "https://api.firecrawl.dev";
189
195
  }
190
196
 
@@ -194,10 +200,10 @@ export default class FirecrawlApp {
194
200
  * @param params - Additional parameters for the scrape request.
195
201
  * @returns The response from the scrape operation.
196
202
  */
197
- async scrapeUrl(
203
+ async scrapeUrl<T extends ZodSchema>(
198
204
  url: string,
199
- params?: ScrapeParams
200
- ): Promise<ScrapeResponse | ErrorResponse> {
205
+ params?: ScrapeParams<T>
206
+ ): Promise<ScrapeResponse<ZodInfer<T>> | ErrorResponse> {
201
207
  const headers: AxiosRequestHeaders = {
202
208
  "Content-Type": "application/json",
203
209
  Authorization: `Bearer ${this.apiKey}`,
@@ -329,9 +335,10 @@ export default class FirecrawlApp {
329
335
  /**
330
336
  * Checks the status of a crawl job using the Firecrawl API.
331
337
  * @param id - The ID of the crawl operation.
338
+ * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
332
339
  * @returns The response containing the job status.
333
340
  */
334
- async checkCrawlStatus(id?: string): Promise<CrawlStatusResponse | ErrorResponse> {
341
+ async checkCrawlStatus(id?: string, getAllData = false): Promise<CrawlStatusResponse | ErrorResponse> {
335
342
  if (!id) {
336
343
  throw new Error("No crawl ID provided");
337
344
  }
@@ -343,16 +350,28 @@ export default class FirecrawlApp {
343
350
  headers
344
351
  );
345
352
  if (response.status === 200) {
353
+ let allData = response.data.data;
354
+ if (getAllData && response.data.status === "completed") {
355
+ let statusData = response.data
356
+ if ("data" in statusData) {
357
+ let data = statusData.data;
358
+ while ('next' in statusData) {
359
+ statusData = (await this.getRequest(statusData.next, headers)).data;
360
+ data = data.concat(statusData.data);
361
+ }
362
+ allData = data;
363
+ }
364
+ }
346
365
  return ({
347
- success: true,
366
+ success: response.data.success,
348
367
  status: response.data.status,
349
368
  total: response.data.total,
350
369
  completed: response.data.completed,
351
370
  creditsUsed: response.data.creditsUsed,
352
371
  expiresAt: new Date(response.data.expiresAt),
353
372
  next: response.data.next,
354
- data: response.data.data,
355
- error: response.data.error
373
+ data: allData,
374
+ error: response.data.error,
356
375
  })
357
376
  } else {
358
377
  this.handleError(response, "check crawl status");
@@ -452,7 +471,7 @@ export default class FirecrawlApp {
452
471
  id: string,
453
472
  headers: AxiosRequestHeaders,
454
473
  checkInterval: number
455
- ): Promise<CrawlStatusResponse> {
474
+ ): Promise<CrawlStatusResponse | ErrorResponse> {
456
475
  while (true) {
457
476
  let statusResponse: AxiosResponse = await this.getRequest(
458
477
  `${this.apiUrl}/v1/crawl/${id}`,
@@ -460,20 +479,20 @@ export default class FirecrawlApp {
460
479
  );
461
480
  if (statusResponse.status === 200) {
462
481
  let statusData = statusResponse.data;
463
- if (statusData.status === "completed") {
464
- if ("data" in statusData) {
465
- let data = statusData.data;
466
- while ('next' in statusData) {
467
- statusResponse = await this.getRequest(statusData.next, headers);
468
- statusData = statusResponse.data;
469
- data = data.concat(statusData.data);
482
+ if (statusData.status === "completed") {
483
+ if ("data" in statusData) {
484
+ let data = statusData.data;
485
+ while ('next' in statusData) {
486
+ statusResponse = await this.getRequest(statusData.next, headers);
487
+ statusData = statusResponse.data;
488
+ data = data.concat(statusData.data);
489
+ }
490
+ statusData.data = data;
491
+ return statusData;
492
+ } else {
493
+ throw new Error("Crawl job completed but no data was returned");
470
494
  }
471
- statusData.data = data;
472
- return statusData;
473
- } else {
474
- throw new Error("Crawl job completed but no data was returned");
475
- }
476
- } else if (
495
+ } else if (
477
496
  ["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
478
497
  ) {
479
498
  checkInterval = Math.max(checkInterval, 2);
@@ -512,21 +531,21 @@ export default class FirecrawlApp {
512
531
  }
513
532
 
514
533
  interface CrawlWatcherEvents {
515
- document: CustomEvent<FirecrawlDocument>,
534
+ document: CustomEvent<FirecrawlDocument<undefined>>,
516
535
  done: CustomEvent<{
517
536
  status: CrawlStatusResponse["status"];
518
- data: FirecrawlDocument[];
537
+ data: FirecrawlDocument<undefined>[];
519
538
  }>,
520
539
  error: CustomEvent<{
521
540
  status: CrawlStatusResponse["status"],
522
- data: FirecrawlDocument[],
541
+ data: FirecrawlDocument<undefined>[],
523
542
  error: string,
524
543
  }>,
525
544
  }
526
545
 
527
546
  export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
528
547
  private ws: WebSocket;
529
- public data: FirecrawlDocument[];
548
+ public data: FirecrawlDocument<undefined>[];
530
549
  public status: CrawlStatusResponse["status"];
531
550
 
532
551
  constructor(id: string, app: FirecrawlApp) {
@@ -547,7 +566,7 @@ export class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
547
566
 
548
567
  type DocumentMessage = {
549
568
  type: "document",
550
- data: FirecrawlDocument,
569
+ data: FirecrawlDocument<undefined>,
551
570
  }
552
571
 
553
572
  type DoneMessage = { type: "done" }