@mendable/firecrawl-js 0.0.36 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -1,16 +1,22 @@
1
1
  import axios, { AxiosResponse, AxiosRequestHeaders } from "axios";
2
2
  import { z } from "zod";
3
3
  import { zodToJsonSchema } from "zod-to-json-schema";
4
+
4
5
  /**
5
6
  * Configuration interface for FirecrawlApp.
7
+ * @param apiKey - Optional API key for authentication.
8
+ * @param apiUrl - Optional base URL of the API; defaults to 'https://api.firecrawl.dev'.
9
+ * @param version - API version, either 'v0' or 'v1'.
6
10
  */
7
11
  export interface FirecrawlAppConfig {
8
12
  apiKey?: string | null;
9
13
  apiUrl?: string | null;
14
+ version?: "v0" | "v1";
10
15
  }
11
16
 
12
17
  /**
13
18
  * Metadata for a Firecrawl document.
19
+ * Includes various optional properties for document metadata.
14
20
  */
15
21
  export interface FirecrawlDocumentMetadata {
16
22
  title?: string;
@@ -43,6 +49,17 @@ export interface FirecrawlDocumentMetadata {
43
49
  articleTag?: string;
44
50
  articleSection?: string;
45
51
  sourceURL?: string;
52
+ statusCode?: number;
53
+ error?: string;
54
+ [key: string]: any; // Allows for additional metadata properties not explicitly defined.
55
+ }
56
+
57
+ /**
58
+ * Metadata for a Firecrawl document on v0.
59
+ * Similar to FirecrawlDocumentMetadata but includes properties specific to API version v0.
60
+ */
61
+ export interface FirecrawlDocumentMetadataV0 {
62
+ // Similar properties as FirecrawlDocumentMetadata with additional v0 specific adjustments
46
63
  pageStatusCode?: number;
47
64
  pageError?: string;
48
65
  [key: string]: any;
@@ -50,8 +67,23 @@ export interface FirecrawlDocumentMetadata {
50
67
 
51
68
  /**
52
69
  * Document interface for Firecrawl.
70
+ * Represents a document retrieved or processed by Firecrawl.
53
71
  */
54
72
  export interface FirecrawlDocument {
73
+ url?: string;
74
+ markdown?: string;
75
+ html?: string;
76
+ rawHtml?: string;
77
+ links?: string[];
78
+ screenshot?: string;
79
+ metadata: FirecrawlDocumentMetadata;
80
+ }
81
+
82
+ /**
83
+ * Document interface for Firecrawl on v0.
84
+ * Represents a document specifically for API version v0 with additional properties.
85
+ */
86
+ export interface FirecrawlDocumentV0 {
55
87
  id?: string;
56
88
  url?: string;
57
89
  content: string;
@@ -61,79 +93,242 @@ export interface FirecrawlDocument {
61
93
  createdAt?: Date;
62
94
  updatedAt?: Date;
63
95
  type?: string;
64
- metadata: FirecrawlDocumentMetadata;
96
+ metadata: FirecrawlDocumentMetadataV0;
65
97
  childrenLinks?: string[];
66
98
  provider?: string;
67
99
  warning?: string;
68
-
69
100
  index?: number;
70
101
  }
71
102
 
103
+ /**
104
+ * Parameters for scraping operations.
105
+ * Defines the options and configurations available for scraping web content.
106
+ */
107
+ export interface ScrapeParams {
108
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot")[];
109
+ headers?: Record<string, string>;
110
+ includeTags?: string[];
111
+ excludeTags?: string[];
112
+ onlyMainContent?: boolean;
113
+ screenshotMode?: "desktop" | "full-desktop" | "mobile" | "full-mobile";
114
+ waitFor?: number;
115
+ timeout?: number;
116
+ }
117
+
118
+ /**
119
+ * Parameters for scraping operations on v0.
120
+ * Includes page and extractor options specific to API version v0.
121
+ */
122
+ export interface ScrapeParamsV0 {
123
+ pageOptions?: {
124
+ headers?: Record<string, string>;
125
+ includeHtml?: boolean;
126
+ includeRawHtml?: boolean;
127
+ onlyIncludeTags?: string[];
128
+ onlyMainContent?: boolean;
129
+ removeTags?: string[];
130
+ replaceAllPathsWithAbsolutePaths?: boolean;
131
+ screenshot?: boolean;
132
+ fullPageScreenshot?: boolean;
133
+ waitFor?: number;
134
+ };
135
+ extractorOptions?: {
136
+ mode?: "markdown" | "llm-extraction" | "llm-extraction-from-raw-html" | "llm-extraction-from-markdown";
137
+ extractionPrompt?: string;
138
+ extractionSchema?: Record<string, any> | z.ZodSchema | any;
139
+ };
140
+ timeout?: number;
141
+ }
142
+
72
143
  /**
73
144
  * Response interface for scraping operations.
145
+ * Defines the structure of the response received after a scraping operation.
74
146
  */
75
- export interface ScrapeResponse {
147
+ export interface ScrapeResponse extends FirecrawlDocument {
76
148
  success: boolean;
77
- data?: FirecrawlDocument;
149
+ warning?: string;
78
150
  error?: string;
79
151
  }
152
+
80
153
  /**
81
- * Response interface for searching operations.
154
+ * Response interface for scraping operations on v0.
155
+ * Similar to ScrapeResponse but tailored for responses from API version v0.
82
156
  */
83
- export interface SearchResponse {
157
+ export interface ScrapeResponseV0 {
84
158
  success: boolean;
85
- data?: FirecrawlDocument[];
159
+ data?: FirecrawlDocumentV0;
86
160
  error?: string;
87
161
  }
162
+
163
+ /**
164
+ * Parameters for crawling operations.
165
+ * Includes options for both scraping and mapping during a crawl.
166
+ */
167
+ export interface CrawlParams {
168
+ scrapeOptions?: ScrapeParams;
169
+ crawlerOptions?: {
170
+ includePaths?: string[]
171
+ excludePaths?: string[]
172
+ maxDepth?: number
173
+ limit?: number
174
+ allowBackwardLinks?: boolean
175
+ allowExternalLinks?: boolean
176
+ ignoreSitemap?: boolean
177
+ };
178
+ }
179
+
180
+ /**
181
+ * Parameters for crawling operations on v0.
182
+ * Tailored for API version v0, includes specific options for crawling.
183
+ */
184
+ export interface CrawlParamsV0 {
185
+ crawlerOptions?: {
186
+ includes?: string[];
187
+ excludes?: string[];
188
+ generateImgAltText?: boolean;
189
+ returnOnlyUrls?: boolean;
190
+ maxDepth?: number;
191
+ mode?: "default" | "fast";
192
+ ignoreSitemap?: boolean;
193
+ limit?: number;
194
+ allowBackwardCrawling?: boolean;
195
+ allowExternalContentLinks?: boolean;
196
+ };
197
+ pageOptions?: {
198
+ headers?: Record<string, string>;
199
+ includeHtml?: boolean;
200
+ includeRawHtml?: boolean;
201
+ onlyIncludeTags?: string[];
202
+ onlyMainContent?: boolean;
203
+ removeTags?: string[];
204
+ replaceAllPathsWithAbsolutePaths?: boolean;
205
+ screenshot?: boolean;
206
+ fullPageScreenshot?: boolean;
207
+ waitFor?: number;
208
+ };
209
+ }
210
+
88
211
  /**
89
212
  * Response interface for crawling operations.
213
+ * Defines the structure of the response received after initiating a crawl.
90
214
  */
91
215
  export interface CrawlResponse {
216
+ id?: string;
217
+ url?: string;
92
218
  success: boolean;
219
+ error?: string;
220
+ }
221
+
222
+ /**
223
+ * Response interface for crawling operations on v0.
224
+ * Similar to CrawlResponse but tailored for responses from API version v0.
225
+ */
226
+ export interface CrawlResponseV0 {
93
227
  jobId?: string;
94
- data?: FirecrawlDocument[];
228
+ success: boolean;
95
229
  error?: string;
96
230
  }
231
+
97
232
  /**
98
233
  * Response interface for job status checks.
234
+ * Provides detailed status of a crawl job including progress and results.
99
235
  */
100
- export interface JobStatusResponse {
236
+ export interface CrawlStatusResponse {
237
+ success: boolean;
238
+ total: number;
239
+ completed: number;
240
+ creditsUsed: number;
241
+ expiresAt: Date;
242
+ status: "scraping" | "completed" | "failed";
243
+ next: string;
244
+ data?: FirecrawlDocument[];
245
+ error?: string;
246
+ }
247
+
248
+ /**
249
+ * Response interface for job status checks on v0.
250
+ * Tailored for API version v0, provides status and partial data of a crawl job.
251
+ */
252
+ export interface CrawlStatusResponseV0 {
101
253
  success: boolean;
102
254
  status: string;
103
255
  current?: number;
104
256
  current_url?: string;
105
257
  current_step?: string;
106
258
  total?: number;
107
- jobId?: string;
108
- data?: FirecrawlDocument[];
109
- partial_data?: FirecrawlDocument[];
259
+ data?: FirecrawlDocumentV0[];
260
+ partial_data?: FirecrawlDocumentV0[];
110
261
  error?: string;
111
262
  }
263
+
264
+
112
265
  /**
113
- * Generic parameter interface.
266
+ * Parameters for mapping operations.
267
+ * Defines options for mapping URLs during a crawl.
114
268
  */
115
- export interface Params {
116
- [key: string]: any;
117
- extractorOptions?: {
118
- extractionSchema: z.ZodSchema | any;
119
- mode?: "llm-extraction";
120
- extractionPrompt?: string;
269
+ export interface MapParams {
270
+ includePaths?: string[]
271
+ excludePaths?: string[]
272
+ maxDepth?: number
273
+ limit?: number
274
+ allowBackwardLinks?: boolean
275
+ allowExternalLinks?: boolean
276
+ ignoreSitemap?: boolean
277
+ }
278
+
279
+ /**
280
+ * Response interface for mapping operations.
281
+ * Defines the structure of the response received after a mapping operation.
282
+ */
283
+ export interface MapResponse {
284
+ success: boolean;
285
+ links?: string[];
286
+ error?: string;
287
+ }
288
+
289
+ /**
290
+ * Parameters for searching operations on v0.
291
+ * Tailored for API version v0, includes specific options for searching content.
292
+ */
293
+ export interface SearchParamsV0 {
294
+ pageOptions?: {
295
+ onlyMainContent?: boolean;
296
+ fetchPageContent?: boolean;
297
+ includeHtml?: boolean;
298
+ includeRawHtml?: boolean;
121
299
  };
300
+ searchOptions?: {
301
+ limit?: number;
302
+ };
303
+ }
304
+
305
+ /**
306
+ * Response interface for searching operations on v0.
307
+ * Defines the structure of the response received after a search operation on v0.
308
+ */
309
+ export interface SearchResponseV0 {
310
+ success: boolean;
311
+ data?: FirecrawlDocumentV0[];
312
+ error?: string;
122
313
  }
314
+
123
315
  /**
124
316
  * Main class for interacting with the Firecrawl API.
317
+ * Provides methods for scraping, searching, crawling, and mapping web content.
125
318
  */
126
- export default class FirecrawlApp {
319
+ export default class FirecrawlApp<T extends "v0" | "v1"> {
127
320
  private apiKey: string;
128
321
  private apiUrl: string;
322
+ public version: T;
129
323
 
130
324
  /**
131
325
  * Initializes a new instance of the FirecrawlApp class.
132
- * @param {FirecrawlAppConfig} config - Configuration options for the FirecrawlApp instance.
326
+ * @param config - Configuration options for the FirecrawlApp instance.
133
327
  */
134
- constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
328
+ constructor({ apiKey = null, apiUrl = null, version = "v1" }: FirecrawlAppConfig) {
135
329
  this.apiKey = apiKey || "";
136
330
  this.apiUrl = apiUrl || "https://api.firecrawl.dev";
331
+ this.version = version as T;
137
332
  if (!this.apiKey) {
138
333
  throw new Error("No API key provided");
139
334
  }
@@ -141,21 +336,21 @@ export default class FirecrawlApp {
141
336
 
142
337
  /**
143
338
  * Scrapes a URL using the Firecrawl API.
144
- * @param {string} url - The URL to scrape.
145
- * @param {Params | null} params - Additional parameters for the scrape request.
146
- * @returns {Promise<ScrapeResponse>} The response from the scrape operation.
339
+ * @param url - The URL to scrape.
340
+ * @param params - Additional parameters for the scrape request.
341
+ * @returns The response from the scrape operation.
147
342
  */
148
343
  async scrapeUrl(
149
344
  url: string,
150
- params: Params | null = null
151
- ): Promise<ScrapeResponse> {
345
+ params?: ScrapeParams | ScrapeParamsV0
346
+ ): Promise<this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse> {
152
347
  const headers: AxiosRequestHeaders = {
153
348
  "Content-Type": "application/json",
154
349
  Authorization: `Bearer ${this.apiKey}`,
155
350
  } as AxiosRequestHeaders;
156
- let jsonData: Params = { url, ...params };
157
- if (params?.extractorOptions?.extractionSchema) {
158
- let schema = params.extractorOptions.extractionSchema;
351
+ let jsonData: any = { url, ...params };
352
+ if (jsonData?.extractorOptions?.extractionSchema) {
353
+ let schema = jsonData.extractorOptions.extractionSchema;
159
354
  // Check if schema is an instance of ZodSchema to correctly identify Zod schemas
160
355
  if (schema instanceof z.ZodSchema) {
161
356
  schema = zodToJsonSchema(schema);
@@ -163,22 +358,27 @@ export default class FirecrawlApp {
163
358
  jsonData = {
164
359
  ...jsonData,
165
360
  extractorOptions: {
166
- ...params.extractorOptions,
361
+ ...jsonData.extractorOptions,
167
362
  extractionSchema: schema,
168
- mode: params.extractorOptions.mode || "llm-extraction",
363
+ mode: jsonData.extractorOptions.mode || "llm-extraction",
169
364
  },
170
365
  };
171
366
  }
172
367
  try {
173
368
  const response: AxiosResponse = await axios.post(
174
- this.apiUrl + "/v0/scrape",
369
+ this.apiUrl + `/${this.version}/scrape`,
175
370
  jsonData,
176
371
  { headers }
177
372
  );
178
373
  if (response.status === 200) {
179
374
  const responseData = response.data;
180
375
  if (responseData.success) {
181
- return responseData;
376
+ return (this.version === 'v0' ? responseData as ScrapeResponseV0 : {
377
+ success: true,
378
+ warning: responseData.warning,
379
+ error: responseData.error,
380
+ ...responseData.data
381
+ }) as ScrapeResponse;
182
382
  } else {
183
383
  throw new Error(`Failed to scrape URL. Error: ${responseData.error}`);
184
384
  }
@@ -188,24 +388,28 @@ export default class FirecrawlApp {
188
388
  } catch (error: any) {
189
389
  throw new Error(error.message);
190
390
  }
191
- return { success: false, error: "Internal server error." };
391
+ return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? ScrapeResponseV0 : ScrapeResponse;
192
392
  }
193
393
 
194
394
  /**
195
395
  * Searches for a query using the Firecrawl API.
196
- * @param {string} query - The query to search for.
197
- * @param {Params | null} params - Additional parameters for the search request.
198
- * @returns {Promise<SearchResponse>} The response from the search operation.
396
+ * @param query - The query to search for.
397
+ * @param params - Additional parameters for the search request.
398
+ * @returns The response from the search operation.
199
399
  */
200
400
  async search(
201
401
  query: string,
202
- params: Params | null = null
203
- ): Promise<SearchResponse> {
402
+ params?: SearchParamsV0
403
+ ): Promise<SearchResponseV0> {
404
+ if (this.version === "v1") {
405
+ throw new Error("Search is not supported in v1, please update FirecrawlApp() initialization to use v0.");
406
+ }
407
+
204
408
  const headers: AxiosRequestHeaders = {
205
409
  "Content-Type": "application/json",
206
410
  Authorization: `Bearer ${this.apiKey}`,
207
411
  } as AxiosRequestHeaders;
208
- let jsonData: Params = { query };
412
+ let jsonData: any = { query };
209
413
  if (params) {
210
414
  jsonData = { ...jsonData, ...params };
211
415
  }
@@ -233,93 +437,160 @@ export default class FirecrawlApp {
233
437
 
234
438
  /**
235
439
  * Initiates a crawl job for a URL using the Firecrawl API.
236
- * @param {string} url - The URL to crawl.
237
- * @param {Params | null} params - Additional parameters for the crawl request.
238
- * @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
239
- * @param {number} pollInterval - Time in seconds for job status checks.
240
- * @param {string} idempotencyKey - Optional idempotency key for the request.
241
- * @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
440
+ * @param url - The URL to crawl.
441
+ * @param params - Additional parameters for the crawl request.
442
+ * @param waitUntilDone - Whether to wait for the crawl job to complete.
443
+ * @param pollInterval - Time in seconds for job status checks.
444
+ * @param idempotencyKey - Optional idempotency key for the request.
445
+ * @returns The response from the crawl operation.
242
446
  */
243
447
  async crawlUrl(
244
448
  url: string,
245
- params: Params | null = null,
449
+ params?: this['version'] extends 'v0' ? CrawlParamsV0 : CrawlParams,
246
450
  waitUntilDone: boolean = true,
247
451
  pollInterval: number = 2,
248
452
  idempotencyKey?: string
249
- ): Promise<CrawlResponse | any> {
453
+ ): Promise<
454
+ this['version'] extends 'v0'
455
+ ? CrawlResponseV0 | CrawlStatusResponseV0 | FirecrawlDocumentV0[]
456
+ : CrawlResponse | CrawlStatusResponse
457
+ > {
250
458
  const headers = this.prepareHeaders(idempotencyKey);
251
- let jsonData: Params = { url };
252
- if (params) {
253
- jsonData = { ...jsonData, ...params };
254
- }
459
+ let jsonData: any = { url, ...params };
255
460
  try {
256
461
  const response: AxiosResponse = await this.postRequest(
257
- this.apiUrl + "/v0/crawl",
462
+ this.apiUrl + `/${this.version}/crawl`,
258
463
  jsonData,
259
464
  headers
260
465
  );
261
466
  if (response.status === 200) {
262
- const jobId: string = response.data.jobId;
467
+ const id: string = this.version === 'v0' ? response.data.jobId : response.data.id;
468
+ let checkUrl: string | undefined = undefined;
263
469
  if (waitUntilDone) {
264
- return this.monitorJobStatus(jobId, headers, pollInterval);
470
+ if (this.version === 'v1') { checkUrl = response.data.url }
471
+ return this.monitorJobStatus(id, headers, pollInterval, checkUrl);
265
472
  } else {
266
- return { success: true, jobId };
473
+ if (this.version === 'v0') {
474
+ return {
475
+ success: true,
476
+ jobId: id
477
+ } as CrawlResponseV0;
478
+ } else {
479
+ return {
480
+ success: true,
481
+ id: id
482
+ } as CrawlResponse;
483
+ }
267
484
  }
268
485
  } else {
269
486
  this.handleError(response, "start crawl job");
270
487
  }
271
488
  } catch (error: any) {
272
- console.log(error);
273
- throw new Error(error.message);
489
+ if (error.response?.data?.error) {
490
+ throw new Error(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`);
491
+ } else {
492
+ throw new Error(error.message);
493
+ }
274
494
  }
275
- return { success: false, error: "Internal server error." };
495
+ return { success: false, error: "Internal server error." } as this['version'] extends 'v0' ? CrawlResponseV0 : CrawlResponse;
276
496
  }
277
497
 
278
498
  /**
279
499
  * Checks the status of a crawl job using the Firecrawl API.
280
- * @param {string} jobId - The job ID of the crawl operation.
281
- * @returns {Promise<JobStatusResponse>} The response containing the job status.
500
+ * @param id - The ID of the crawl operation.
501
+ * @returns The response containing the job status.
282
502
  */
283
- async checkCrawlStatus(jobId: string): Promise<JobStatusResponse> {
503
+ async checkCrawlStatus(id?: string): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse> {
504
+ if (!id) {
505
+ throw new Error("No crawl ID provided");
506
+ }
507
+
284
508
  const headers: AxiosRequestHeaders = this.prepareHeaders();
285
509
  try {
286
510
  const response: AxiosResponse = await this.getRequest(
287
- this.apiUrl + `/v0/crawl/status/${jobId}`,
511
+ this.version === 'v1' ?
512
+ `${this.apiUrl}/${this.version}/crawl/${id}` :
513
+ `${this.apiUrl}/${this.version}/crawl/status/${id}`,
288
514
  headers
289
515
  );
290
516
  if (response.status === 200) {
291
- return {
292
- success: true,
293
- status: response.data.status,
294
- current: response.data.current,
295
- current_url: response.data.current_url,
296
- current_step: response.data.current_step,
297
- total: response.data.total,
298
- data: response.data.data,
299
- partial_data: !response.data.data
300
- ? response.data.partial_data
301
- : undefined,
302
- };
517
+ if (this.version === 'v0') {
518
+ return ({
519
+ success: true,
520
+ status: response.data.status,
521
+ current: response.data.current,
522
+ current_url: response.data.current_url,
523
+ current_step: response.data.current_step,
524
+ total: response.data.total,
525
+ data: response.data.data,
526
+ partial_data: !response.data.data
527
+ ? response.data.partial_data
528
+ : undefined,
529
+ } as CrawlStatusResponseV0) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
530
+ } else {
531
+ return ({
532
+ success: true,
533
+ status: response.data.status,
534
+ total: response.data.total,
535
+ completed: response.data.completed,
536
+ creditsUsed: response.data.creditsUsed,
537
+ expiresAt: new Date(response.data.expiresAt),
538
+ next: response.data.next,
539
+ data: response.data.data,
540
+ error: response.data.error
541
+ } as CrawlStatusResponse) as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse;
542
+ }
303
543
  } else {
304
544
  this.handleError(response, "check crawl status");
305
545
  }
306
546
  } catch (error: any) {
307
547
  throw new Error(error.message);
308
548
  }
309
- return {
310
- success: false,
311
- status: "unknown",
312
- current: 0,
313
- current_url: "",
314
- current_step: "",
315
- total: 0,
316
- error: "Internal server error.",
317
- };
549
+
550
+ return this.version === 'v0' ?
551
+ ({
552
+ success: false,
553
+ status: "unknown",
554
+ current: 0,
555
+ current_url: "",
556
+ current_step: "",
557
+ total: 0,
558
+ error: "Internal server error.",
559
+ } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse) :
560
+ ({
561
+ success: false,
562
+ error: "Internal server error.",
563
+ } as this['version'] extends 'v0' ? CrawlStatusResponseV0 : CrawlStatusResponse);
564
+ }
565
+
566
+ async mapUrl(url: string, params?: MapParams): Promise<MapResponse> {
567
+ if (this.version == 'v0') {
568
+ throw new Error("Map is not supported in v0");
569
+ }
570
+ const headers = this.prepareHeaders();
571
+ let jsonData: { url: string } & MapParams = { url, ...params };
572
+
573
+ try {
574
+ const response: AxiosResponse = await this.postRequest(
575
+ this.apiUrl + `/${this.version}/map`,
576
+ jsonData,
577
+ headers
578
+ );
579
+ if (response.status === 200) {
580
+ return response.data as MapResponse;
581
+ } else {
582
+ this.handleError(response, "map");
583
+ }
584
+ } catch (error: any) {
585
+ throw new Error(error.message);
586
+ }
587
+ return { success: false, error: "Internal server error." } as MapResponse;
318
588
  }
319
589
 
320
590
  /**
321
591
  * Prepares the headers for an API request.
322
- * @returns {AxiosRequestHeaders} The prepared headers.
592
+ * @param idempotencyKey - Optional key to ensure idempotency.
593
+ * @returns The prepared headers.
323
594
  */
324
595
  prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
325
596
  return {
@@ -331,14 +602,14 @@ export default class FirecrawlApp {
331
602
 
332
603
  /**
333
604
  * Sends a POST request to the specified URL.
334
- * @param {string} url - The URL to send the request to.
335
- * @param {Params} data - The data to send in the request.
336
- * @param {AxiosRequestHeaders} headers - The headers for the request.
337
- * @returns {Promise<AxiosResponse>} The response from the POST request.
605
+ * @param url - The URL to send the request to.
606
+ * @param data - The data to send in the request.
607
+ * @param headers - The headers for the request.
608
+ * @returns The response from the POST request.
338
609
  */
339
610
  postRequest(
340
611
  url: string,
341
- data: Params,
612
+ data: any,
342
613
  headers: AxiosRequestHeaders
343
614
  ): Promise<AxiosResponse> {
344
615
  return axios.post(url, data, { headers });
@@ -346,9 +617,9 @@ export default class FirecrawlApp {
346
617
 
347
618
  /**
348
619
  * Sends a GET request to the specified URL.
349
- * @param {string} url - The URL to send the request to.
350
- * @param {AxiosRequestHeaders} headers - The headers for the request.
351
- * @returns {Promise<AxiosResponse>} The response from the GET request.
620
+ * @param url - The URL to send the request to.
621
+ * @param headers - The headers for the request.
622
+ * @returns The response from the GET request.
352
623
  */
353
624
  getRequest(
354
625
  url: string,
@@ -359,38 +630,44 @@ export default class FirecrawlApp {
359
630
 
360
631
  /**
361
632
  * Monitors the status of a crawl job until completion or failure.
362
- * @param {string} jobId - The job ID of the crawl operation.
363
- * @param {AxiosRequestHeaders} headers - The headers for the request.
364
- * @param {number} timeout - Timeout in seconds for job status checks.
365
- * @returns {Promise<any>} The final job status or data.
633
+ * @param id - The ID of the crawl operation.
634
+ * @param headers - The headers for the request.
635
+ * @param checkInterval - Interval in seconds for job status checks.
636
+ * @param checkUrl - Optional URL to check the status (used for v1 API)
637
+ * @returns The final job status or data.
366
638
  */
367
639
  async monitorJobStatus(
368
- jobId: string,
640
+ id: string,
369
641
  headers: AxiosRequestHeaders,
370
- checkInterval: number
371
- ): Promise<any> {
642
+ checkInterval: number,
643
+ checkUrl?: string
644
+ ): Promise<this['version'] extends 'v0' ? CrawlStatusResponseV0 | FirecrawlDocumentV0[] : CrawlStatusResponse> {
645
+ let apiUrl: string = '';
372
646
  while (true) {
647
+ if (this.version === 'v1') {
648
+ apiUrl = checkUrl ?? `${this.apiUrl}/v1/crawl/${id}`;
649
+ } else if (this.version === 'v0') {
650
+ apiUrl = `${this.apiUrl}/v0/crawl/status/${id}`;
651
+ }
373
652
  const statusResponse: AxiosResponse = await this.getRequest(
374
- this.apiUrl + `/v0/crawl/status/${jobId}`,
653
+ apiUrl,
375
654
  headers
376
655
  );
377
656
  if (statusResponse.status === 200) {
378
657
  const statusData = statusResponse.data;
379
658
  if (statusData.status === "completed") {
380
659
  if ("data" in statusData) {
381
- return statusData.data;
660
+ return this.version === 'v0' ? statusData.data : statusData;
382
661
  } else {
383
662
  throw new Error("Crawl job completed but no data was returned");
384
663
  }
385
664
  } else if (
386
- ["active", "paused", "pending", "queued"].includes(statusData.status)
665
+ ["active", "paused", "pending", "queued", "scraping"].includes(statusData.status)
387
666
  ) {
388
- if (checkInterval < 2) {
389
- checkInterval = 2;
390
- }
667
+ checkInterval = Math.max(checkInterval, 2);
391
668
  await new Promise((resolve) =>
392
669
  setTimeout(resolve, checkInterval * 1000)
393
- ); // Wait for the specified timeout before checking again
670
+ );
394
671
  } else {
395
672
  throw new Error(
396
673
  `Crawl job failed or was stopped. Status: ${statusData.status}`