firecrawl 1.16.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -42,11 +42,9 @@ var import_isows = require("isows");
42
42
  var import_typescript_event_target = require("typescript-event-target");
43
43
  var FirecrawlError = class extends Error {
44
44
  statusCode;
45
- details;
46
- constructor(message, statusCode, details) {
45
+ constructor(message, statusCode) {
47
46
  super(message);
48
47
  this.statusCode = statusCode;
49
- this.details = details;
50
48
  }
51
49
  };
52
50
  var FirecrawlApp = class {
@@ -93,20 +91,6 @@ var FirecrawlApp = class {
93
91
  }
94
92
  };
95
93
  }
96
- if (jsonData?.jsonOptions?.schema) {
97
- let schema = jsonData.jsonOptions.schema;
98
- try {
99
- schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
100
- } catch (error) {
101
- }
102
- jsonData = {
103
- ...jsonData,
104
- jsonOptions: {
105
- ...jsonData.jsonOptions,
106
- schema
107
- }
108
- };
109
- }
110
94
  try {
111
95
  const response = await import_axios.default.post(
112
96
  this.apiUrl + `/v1/scrape`,
@@ -261,26 +245,16 @@ var FirecrawlApp = class {
261
245
  * Checks the status of a crawl job using the Firecrawl API.
262
246
  * @param id - The ID of the crawl operation.
263
247
  * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
264
- * @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
265
- * @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
266
- * @param limit - How many entries to return. Only used when `getAllData = false`.
267
248
  * @returns The response containing the job status.
268
249
  */
269
- async checkCrawlStatus(id, getAllData = false, nextURL, skip, limit) {
250
+ async checkCrawlStatus(id, getAllData = false) {
270
251
  if (!id) {
271
252
  throw new FirecrawlError("No crawl ID provided", 400);
272
253
  }
273
254
  const headers = this.prepareHeaders();
274
- const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/crawl/${id}`);
275
- if (skip !== void 0) {
276
- targetURL.searchParams.set("skip", skip.toString());
277
- }
278
- if (limit !== void 0) {
279
- targetURL.searchParams.set("limit", limit.toString());
280
- }
281
255
  try {
282
256
  const response = await this.getRequest(
283
- targetURL.href,
257
+ `${this.apiUrl}/v1/crawl/${id}`,
284
258
  headers
285
259
  );
286
260
  if (response.status === 200) {
@@ -305,7 +279,6 @@ var FirecrawlApp = class {
305
279
  total: response.data.total,
306
280
  completed: response.data.completed,
307
281
  creditsUsed: response.data.creditsUsed,
308
- next: getAllData ? void 0 : response.data.next,
309
282
  expiresAt: new Date(response.data.expiresAt),
310
283
  data: allData
311
284
  };
@@ -328,28 +301,6 @@ var FirecrawlApp = class {
328
301
  }
329
302
  return { success: false, error: "Internal server error." };
330
303
  }
331
- /**
332
- * Returns information about crawl errors.
333
- * @param id - The ID of the crawl operation.
334
- * @returns Information about crawl errors.
335
- */
336
- async checkCrawlErrors(id) {
337
- const headers = this.prepareHeaders();
338
- try {
339
- const response = await this.deleteRequest(
340
- `${this.apiUrl}/v1/crawl/${id}/errors`,
341
- headers
342
- );
343
- if (response.status === 200) {
344
- return response.data;
345
- } else {
346
- this.handleError(response, "check crawl errors");
347
- }
348
- } catch (error) {
349
- throw new FirecrawlError(error.message, 500);
350
- }
351
- return { success: false, error: "Internal server error." };
352
- }
353
304
  /**
354
305
  * Cancels a crawl job using the Firecrawl API.
355
306
  * @param id - The ID of the crawl operation.
@@ -438,20 +389,6 @@ var FirecrawlApp = class {
438
389
  }
439
390
  };
440
391
  }
441
- if (jsonData?.jsonOptions?.schema) {
442
- let schema = jsonData.jsonOptions.schema;
443
- try {
444
- schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
445
- } catch (error) {
446
- }
447
- jsonData = {
448
- ...jsonData,
449
- jsonOptions: {
450
- ...jsonData.jsonOptions,
451
- schema
452
- }
453
- };
454
- }
455
392
  try {
456
393
  const response = await this.postRequest(
457
394
  this.apiUrl + `/v1/batch/scrape`,
@@ -515,26 +452,16 @@ var FirecrawlApp = class {
515
452
  * Checks the status of a batch scrape job using the Firecrawl API.
516
453
  * @param id - The ID of the batch scrape operation.
517
454
  * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
518
- * @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
519
- * @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
520
- * @param limit - How many entries to return. Only used when `getAllData = false`.
521
455
  * @returns The response containing the job status.
522
456
  */
523
- async checkBatchScrapeStatus(id, getAllData = false, nextURL, skip, limit) {
457
+ async checkBatchScrapeStatus(id, getAllData = false) {
524
458
  if (!id) {
525
459
  throw new FirecrawlError("No batch scrape ID provided", 400);
526
460
  }
527
461
  const headers = this.prepareHeaders();
528
- const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/batch/scrape/${id}`);
529
- if (skip !== void 0) {
530
- targetURL.searchParams.set("skip", skip.toString());
531
- }
532
- if (limit !== void 0) {
533
- targetURL.searchParams.set("limit", limit.toString());
534
- }
535
462
  try {
536
463
  const response = await this.getRequest(
537
- targetURL.href,
464
+ `${this.apiUrl}/v1/batch/scrape/${id}`,
538
465
  headers
539
466
  );
540
467
  if (response.status === 200) {
@@ -559,7 +486,6 @@ var FirecrawlApp = class {
559
486
  total: response.data.total,
560
487
  completed: response.data.completed,
561
488
  creditsUsed: response.data.creditsUsed,
562
- next: getAllData ? void 0 : response.data.next,
563
489
  expiresAt: new Date(response.data.expiresAt),
564
490
  data: allData
565
491
  };
@@ -582,28 +508,6 @@ var FirecrawlApp = class {
582
508
  }
583
509
  return { success: false, error: "Internal server error." };
584
510
  }
585
- /**
586
- * Returns information about batch scrape errors.
587
- * @param id - The ID of the batch scrape operation.
588
- * @returns Information about batch scrape errors.
589
- */
590
- async checkBatchScrapeErrors(id) {
591
- const headers = this.prepareHeaders();
592
- try {
593
- const response = await this.deleteRequest(
594
- `${this.apiUrl}/v1/batch/scrape/${id}/errors`,
595
- headers
596
- );
597
- if (response.status === 200) {
598
- return response.data;
599
- } else {
600
- this.handleError(response, "check batch scrape errors");
601
- }
602
- } catch (error) {
603
- throw new FirecrawlError(error.message, 500);
604
- }
605
- return { success: false, error: "Internal server error." };
606
- }
607
511
  /**
608
512
  * Extracts information from URLs using the Firecrawl API.
609
513
  * Currently in Beta. Expect breaking changes on future minor versions.
@@ -626,65 +530,6 @@ var FirecrawlApp = class {
626
530
  } catch (error) {
627
531
  throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
628
532
  }
629
- try {
630
- const response = await this.postRequest(
631
- this.apiUrl + `/v1/extract`,
632
- { ...jsonData, schema: jsonSchema, origin: params?.origin || "api-sdk" },
633
- headers
634
- );
635
- if (response.status === 200) {
636
- const jobId = response.data.id;
637
- let extractStatus;
638
- do {
639
- const statusResponse = await this.getRequest(
640
- `${this.apiUrl}/v1/extract/${jobId}`,
641
- headers
642
- );
643
- extractStatus = statusResponse.data;
644
- if (extractStatus.status === "completed") {
645
- if (extractStatus.success) {
646
- return {
647
- success: true,
648
- data: extractStatus.data,
649
- warning: extractStatus.warning,
650
- error: extractStatus.error
651
- };
652
- } else {
653
- throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
654
- }
655
- } else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
656
- throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
657
- }
658
- await new Promise((resolve) => setTimeout(resolve, 1e3));
659
- } while (extractStatus.status !== "completed");
660
- } else {
661
- this.handleError(response, "extract");
662
- }
663
- } catch (error) {
664
- throw new FirecrawlError(error.message, 500, error.response?.data?.details);
665
- }
666
- return { success: false, error: "Internal server error." };
667
- }
668
- /**
669
- * Initiates an asynchronous extract job for a URL using the Firecrawl API.
670
- * @param url - The URL to extract data from.
671
- * @param params - Additional parameters for the extract request.
672
- * @param idempotencyKey - Optional idempotency key for the request.
673
- * @returns The response from the extract operation.
674
- */
675
- async asyncExtract(urls, params, idempotencyKey) {
676
- const headers = this.prepareHeaders(idempotencyKey);
677
- let jsonData = { urls, ...params };
678
- let jsonSchema;
679
- try {
680
- if (params?.schema instanceof zt.ZodType) {
681
- jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema);
682
- } else {
683
- jsonSchema = params?.schema;
684
- }
685
- } catch (error) {
686
- throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
687
- }
688
533
  try {
689
534
  const response = await this.postRequest(
690
535
  this.apiUrl + `/v1/extract`,
@@ -692,34 +537,24 @@ var FirecrawlApp = class {
692
537
  headers
693
538
  );
694
539
  if (response.status === 200) {
695
- return response.data;
696
- } else {
697
- this.handleError(response, "start extract job");
698
- }
699
- } catch (error) {
700
- throw new FirecrawlError(error.message, 500, error.response?.data?.details);
701
- }
702
- return { success: false, error: "Internal server error." };
703
- }
704
- /**
705
- * Retrieves the status of an extract job.
706
- * @param jobId - The ID of the extract job.
707
- * @returns The status of the extract job.
708
- */
709
- async getExtractStatus(jobId) {
710
- try {
711
- const response = await this.getRequest(
712
- `${this.apiUrl}/v1/extract/${jobId}`,
713
- this.prepareHeaders()
714
- );
715
- if (response.status === 200) {
716
- return response.data;
540
+ const responseData = response.data;
541
+ if (responseData.success) {
542
+ return {
543
+ success: true,
544
+ data: responseData.data,
545
+ warning: responseData.warning,
546
+ error: responseData.error
547
+ };
548
+ } else {
549
+ throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
550
+ }
717
551
  } else {
718
- this.handleError(response, "get extract status");
552
+ this.handleError(response, "extract");
719
553
  }
720
554
  } catch (error) {
721
555
  throw new FirecrawlError(error.message, 500);
722
556
  }
557
+ return { success: false, error: "Internal server error." };
723
558
  }
724
559
  /**
725
560
  * Prepares the headers for an API request.
@@ -835,13 +670,11 @@ var FirecrawlApp = class {
835
670
  * @param {string} action - The action being performed when the error occurred.
836
671
  */
837
672
  handleError(response, action) {
838
- if ([400, 402, 408, 409, 500].includes(response.status)) {
673
+ if ([402, 408, 409, 500].includes(response.status)) {
839
674
  const errorMessage = response.data.error || "Unknown error occurred";
840
- const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
841
675
  throw new FirecrawlError(
842
- `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}${details}`,
843
- response.status,
844
- response?.data?.details
676
+ `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`,
677
+ response.status
845
678
  );
846
679
  } else {
847
680
  throw new FirecrawlError(
@@ -859,8 +692,7 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
859
692
  constructor(id, app) {
860
693
  super();
861
694
  this.id = id;
862
- const wsUrl = app.apiUrl.replace(/^http/, "ws");
863
- this.ws = new import_isows.WebSocket(`${wsUrl}/v1/crawl/${id}`, app.apiKey);
695
+ this.ws = new import_isows.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
864
696
  this.status = "scraping";
865
697
  this.data = [];
866
698
  const messageHandler = (msg) => {
package/dist/index.d.cts CHANGED
@@ -61,7 +61,6 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
61
61
  rawHtml?: string;
62
62
  links?: string[];
63
63
  extract?: T;
64
- json?: T;
65
64
  screenshot?: string;
66
65
  metadata?: FirecrawlDocumentMetadata;
67
66
  actions: ActionsSchema;
@@ -73,7 +72,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
73
72
  * Defines the options and configurations available for scraping web content.
74
73
  */
75
74
  interface CrawlScrapeOptions {
76
- formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
75
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
77
76
  headers?: Record<string, string>;
78
77
  includeTags?: string[];
79
78
  excludeTags?: string[];
@@ -87,7 +86,6 @@ interface CrawlScrapeOptions {
87
86
  mobile?: boolean;
88
87
  skipTlsVerification?: boolean;
89
88
  removeBase64Images?: boolean;
90
- blockAds?: boolean;
91
89
  }
92
90
  type Action = {
93
91
  type: "wait";
@@ -121,11 +119,6 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
121
119
  schema?: LLMSchema;
122
120
  systemPrompt?: string;
123
121
  };
124
- jsonOptions?: {
125
- prompt?: string;
126
- schema?: LLMSchema;
127
- systemPrompt?: string;
128
- };
129
122
  actions?: ActionsSchema;
130
123
  }
131
124
  interface ActionsResult {
@@ -157,7 +150,6 @@ interface CrawlParams {
157
150
  url: string;
158
151
  headers?: Record<string, string>;
159
152
  metadata?: Record<string, string>;
160
- events?: ["completed", "failed", "page", "started"][number][];
161
153
  };
162
154
  deduplicateSimilarURLs?: boolean;
163
155
  ignoreQueryParameters?: boolean;
@@ -221,7 +213,6 @@ interface MapParams {
221
213
  includeSubdomains?: boolean;
222
214
  sitemapOnly?: boolean;
223
215
  limit?: number;
224
- timeout?: number;
225
216
  }
226
217
  /**
227
218
  * Response interface for mapping operations.
@@ -241,9 +232,7 @@ interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
241
232
  schema?: LLMSchema | object;
242
233
  systemPrompt?: string;
243
234
  allowExternalLinks?: boolean;
244
- enableWebSearch?: boolean;
245
235
  includeSubdomains?: boolean;
246
- origin?: string;
247
236
  }
248
237
  /**
249
238
  * Response interface for extracting information from URLs.
@@ -269,8 +258,7 @@ interface ErrorResponse {
269
258
  */
270
259
  declare class FirecrawlError extends Error {
271
260
  statusCode: number;
272
- details?: any;
273
- constructor(message: string, statusCode: number, details?: any);
261
+ constructor(message: string, statusCode: number);
274
262
  }
275
263
  /**
276
264
  * Parameters for search operations.
@@ -297,24 +285,6 @@ interface SearchResponse {
297
285
  warning?: string;
298
286
  error?: string;
299
287
  }
300
- /**
301
- * Response interface for crawl/batch scrape error monitoring.
302
- */
303
- interface CrawlErrorsResponse {
304
- /**
305
- * Scrapes that errored out + error details
306
- */
307
- errors: {
308
- id: string;
309
- timestamp?: string;
310
- url: string;
311
- error: string;
312
- }[];
313
- /**
314
- * URLs blocked by robots.txt
315
- */
316
- robotsBlocked: string[];
317
- }
318
288
  /**
319
289
  * Main class for interacting with the Firecrawl API.
320
290
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -356,18 +326,9 @@ declare class FirecrawlApp {
356
326
  * Checks the status of a crawl job using the Firecrawl API.
357
327
  * @param id - The ID of the crawl operation.
358
328
  * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
359
- * @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
360
- * @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
361
- * @param limit - How many entries to return. Only used when `getAllData = false`.
362
329
  * @returns The response containing the job status.
363
330
  */
364
- checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
365
- /**
366
- * Returns information about crawl errors.
367
- * @param id - The ID of the crawl operation.
368
- * @returns Information about crawl errors.
369
- */
370
- checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
331
+ checkCrawlStatus(id?: string, getAllData?: boolean): Promise<CrawlStatusResponse | ErrorResponse>;
371
332
  /**
372
333
  * Cancels a crawl job using the Firecrawl API.
373
334
  * @param id - The ID of the crawl operation.
@@ -412,18 +373,9 @@ declare class FirecrawlApp {
412
373
  * Checks the status of a batch scrape job using the Firecrawl API.
413
374
  * @param id - The ID of the batch scrape operation.
414
375
  * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
415
- * @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
416
- * @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
417
- * @param limit - How many entries to return. Only used when `getAllData = false`.
418
376
  * @returns The response containing the job status.
419
377
  */
420
- checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
421
- /**
422
- * Returns information about batch scrape errors.
423
- * @param id - The ID of the batch scrape operation.
424
- * @returns Information about batch scrape errors.
425
- */
426
- checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
378
+ checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
427
379
  /**
428
380
  * Extracts information from URLs using the Firecrawl API.
429
381
  * Currently in Beta. Expect breaking changes on future minor versions.
@@ -432,20 +384,6 @@ declare class FirecrawlApp {
432
384
  * @returns The response from the extract operation.
433
385
  */
434
386
  extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
435
- /**
436
- * Initiates an asynchronous extract job for a URL using the Firecrawl API.
437
- * @param url - The URL to extract data from.
438
- * @param params - Additional parameters for the extract request.
439
- * @param idempotencyKey - Optional idempotency key for the request.
440
- * @returns The response from the extract operation.
441
- */
442
- asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
443
- /**
444
- * Retrieves the status of an extract job.
445
- * @param jobId - The ID of the extract job.
446
- * @returns The status of the extract job.
447
- */
448
- getExtractStatus(jobId: string): Promise<any>;
449
387
  /**
450
388
  * Prepares the headers for an API request.
451
389
  * @param idempotencyKey - Optional key to ensure idempotency.
@@ -511,4 +449,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
511
449
  close(): void;
512
450
  }
513
451
 
514
- export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
452
+ export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
package/dist/index.d.ts CHANGED
@@ -61,7 +61,6 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
61
61
  rawHtml?: string;
62
62
  links?: string[];
63
63
  extract?: T;
64
- json?: T;
65
64
  screenshot?: string;
66
65
  metadata?: FirecrawlDocumentMetadata;
67
66
  actions: ActionsSchema;
@@ -73,7 +72,7 @@ interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult | neve
73
72
  * Defines the options and configurations available for scraping web content.
74
73
  */
75
74
  interface CrawlScrapeOptions {
76
- formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json")[];
75
+ formats: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract")[];
77
76
  headers?: Record<string, string>;
78
77
  includeTags?: string[];
79
78
  excludeTags?: string[];
@@ -87,7 +86,6 @@ interface CrawlScrapeOptions {
87
86
  mobile?: boolean;
88
87
  skipTlsVerification?: boolean;
89
88
  removeBase64Images?: boolean;
90
- blockAds?: boolean;
91
89
  }
92
90
  type Action = {
93
91
  type: "wait";
@@ -121,11 +119,6 @@ interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchema exten
121
119
  schema?: LLMSchema;
122
120
  systemPrompt?: string;
123
121
  };
124
- jsonOptions?: {
125
- prompt?: string;
126
- schema?: LLMSchema;
127
- systemPrompt?: string;
128
- };
129
122
  actions?: ActionsSchema;
130
123
  }
131
124
  interface ActionsResult {
@@ -157,7 +150,6 @@ interface CrawlParams {
157
150
  url: string;
158
151
  headers?: Record<string, string>;
159
152
  metadata?: Record<string, string>;
160
- events?: ["completed", "failed", "page", "started"][number][];
161
153
  };
162
154
  deduplicateSimilarURLs?: boolean;
163
155
  ignoreQueryParameters?: boolean;
@@ -221,7 +213,6 @@ interface MapParams {
221
213
  includeSubdomains?: boolean;
222
214
  sitemapOnly?: boolean;
223
215
  limit?: number;
224
- timeout?: number;
225
216
  }
226
217
  /**
227
218
  * Response interface for mapping operations.
@@ -241,9 +232,7 @@ interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
241
232
  schema?: LLMSchema | object;
242
233
  systemPrompt?: string;
243
234
  allowExternalLinks?: boolean;
244
- enableWebSearch?: boolean;
245
235
  includeSubdomains?: boolean;
246
- origin?: string;
247
236
  }
248
237
  /**
249
238
  * Response interface for extracting information from URLs.
@@ -269,8 +258,7 @@ interface ErrorResponse {
269
258
  */
270
259
  declare class FirecrawlError extends Error {
271
260
  statusCode: number;
272
- details?: any;
273
- constructor(message: string, statusCode: number, details?: any);
261
+ constructor(message: string, statusCode: number);
274
262
  }
275
263
  /**
276
264
  * Parameters for search operations.
@@ -297,24 +285,6 @@ interface SearchResponse {
297
285
  warning?: string;
298
286
  error?: string;
299
287
  }
300
- /**
301
- * Response interface for crawl/batch scrape error monitoring.
302
- */
303
- interface CrawlErrorsResponse {
304
- /**
305
- * Scrapes that errored out + error details
306
- */
307
- errors: {
308
- id: string;
309
- timestamp?: string;
310
- url: string;
311
- error: string;
312
- }[];
313
- /**
314
- * URLs blocked by robots.txt
315
- */
316
- robotsBlocked: string[];
317
- }
318
288
  /**
319
289
  * Main class for interacting with the Firecrawl API.
320
290
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -356,18 +326,9 @@ declare class FirecrawlApp {
356
326
  * Checks the status of a crawl job using the Firecrawl API.
357
327
  * @param id - The ID of the crawl operation.
358
328
  * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
359
- * @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
360
- * @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
361
- * @param limit - How many entries to return. Only used when `getAllData = false`.
362
329
  * @returns The response containing the job status.
363
330
  */
364
- checkCrawlStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<CrawlStatusResponse | ErrorResponse>;
365
- /**
366
- * Returns information about crawl errors.
367
- * @param id - The ID of the crawl operation.
368
- * @returns Information about crawl errors.
369
- */
370
- checkCrawlErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
331
+ checkCrawlStatus(id?: string, getAllData?: boolean): Promise<CrawlStatusResponse | ErrorResponse>;
371
332
  /**
372
333
  * Cancels a crawl job using the Firecrawl API.
373
334
  * @param id - The ID of the crawl operation.
@@ -412,18 +373,9 @@ declare class FirecrawlApp {
412
373
  * Checks the status of a batch scrape job using the Firecrawl API.
413
374
  * @param id - The ID of the batch scrape operation.
414
375
  * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
415
- * @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
416
- * @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
417
- * @param limit - How many entries to return. Only used when `getAllData = false`.
418
376
  * @returns The response containing the job status.
419
377
  */
420
- checkBatchScrapeStatus(id?: string, getAllData?: boolean, nextURL?: string, skip?: number, limit?: number): Promise<BatchScrapeStatusResponse | ErrorResponse>;
421
- /**
422
- * Returns information about batch scrape errors.
423
- * @param id - The ID of the batch scrape operation.
424
- * @returns Information about batch scrape errors.
425
- */
426
- checkBatchScrapeErrors(id: string): Promise<CrawlErrorsResponse | ErrorResponse>;
378
+ checkBatchScrapeStatus(id?: string, getAllData?: boolean): Promise<BatchScrapeStatusResponse | ErrorResponse>;
427
379
  /**
428
380
  * Extracts information from URLs using the Firecrawl API.
429
381
  * Currently in Beta. Expect breaking changes on future minor versions.
@@ -432,20 +384,6 @@ declare class FirecrawlApp {
432
384
  * @returns The response from the extract operation.
433
385
  */
434
386
  extract<T extends zt.ZodSchema = any>(urls: string[], params?: ExtractParams<T>): Promise<ExtractResponse<zt.infer<T>> | ErrorResponse>;
435
- /**
436
- * Initiates an asynchronous extract job for a URL using the Firecrawl API.
437
- * @param url - The URL to extract data from.
438
- * @param params - Additional parameters for the extract request.
439
- * @param idempotencyKey - Optional idempotency key for the request.
440
- * @returns The response from the extract operation.
441
- */
442
- asyncExtract(urls: string[], params?: ExtractParams, idempotencyKey?: string): Promise<ExtractResponse | ErrorResponse>;
443
- /**
444
- * Retrieves the status of an extract job.
445
- * @param jobId - The ID of the extract job.
446
- * @returns The status of the extract job.
447
- */
448
- getExtractStatus(jobId: string): Promise<any>;
449
387
  /**
450
388
  * Prepares the headers for an API request.
451
389
  * @param idempotencyKey - Optional key to ensure idempotency.
@@ -511,4 +449,4 @@ declare class CrawlWatcher extends TypedEventTarget<CrawlWatcherEvents> {
511
449
  close(): void;
512
450
  }
513
451
 
514
- export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlErrorsResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
452
+ export { type Action, type ActionsResult, type BatchScrapeResponse, type BatchScrapeStatusResponse, type CrawlParams, type CrawlResponse, type CrawlScrapeOptions, type CrawlStatusResponse, CrawlWatcher, type ErrorResponse, type ExtractParams, type ExtractResponse, type FirecrawlAppConfig, type FirecrawlDocument, type FirecrawlDocumentMetadata, FirecrawlError, type MapParams, type MapResponse, type ScrapeParams, type ScrapeResponse, type SearchParams, type SearchResponse, FirecrawlApp as default };
package/dist/index.js CHANGED
@@ -6,11 +6,9 @@ import { WebSocket } from "isows";
6
6
  import { TypedEventTarget } from "typescript-event-target";
7
7
  var FirecrawlError = class extends Error {
8
8
  statusCode;
9
- details;
10
- constructor(message, statusCode, details) {
9
+ constructor(message, statusCode) {
11
10
  super(message);
12
11
  this.statusCode = statusCode;
13
- this.details = details;
14
12
  }
15
13
  };
16
14
  var FirecrawlApp = class {
@@ -57,20 +55,6 @@ var FirecrawlApp = class {
57
55
  }
58
56
  };
59
57
  }
60
- if (jsonData?.jsonOptions?.schema) {
61
- let schema = jsonData.jsonOptions.schema;
62
- try {
63
- schema = zodToJsonSchema(schema);
64
- } catch (error) {
65
- }
66
- jsonData = {
67
- ...jsonData,
68
- jsonOptions: {
69
- ...jsonData.jsonOptions,
70
- schema
71
- }
72
- };
73
- }
74
58
  try {
75
59
  const response = await axios.post(
76
60
  this.apiUrl + `/v1/scrape`,
@@ -225,26 +209,16 @@ var FirecrawlApp = class {
225
209
  * Checks the status of a crawl job using the Firecrawl API.
226
210
  * @param id - The ID of the crawl operation.
227
211
  * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
228
- * @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
229
- * @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
230
- * @param limit - How many entries to return. Only used when `getAllData = false`.
231
212
  * @returns The response containing the job status.
232
213
  */
233
- async checkCrawlStatus(id, getAllData = false, nextURL, skip, limit) {
214
+ async checkCrawlStatus(id, getAllData = false) {
234
215
  if (!id) {
235
216
  throw new FirecrawlError("No crawl ID provided", 400);
236
217
  }
237
218
  const headers = this.prepareHeaders();
238
- const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/crawl/${id}`);
239
- if (skip !== void 0) {
240
- targetURL.searchParams.set("skip", skip.toString());
241
- }
242
- if (limit !== void 0) {
243
- targetURL.searchParams.set("limit", limit.toString());
244
- }
245
219
  try {
246
220
  const response = await this.getRequest(
247
- targetURL.href,
221
+ `${this.apiUrl}/v1/crawl/${id}`,
248
222
  headers
249
223
  );
250
224
  if (response.status === 200) {
@@ -269,7 +243,6 @@ var FirecrawlApp = class {
269
243
  total: response.data.total,
270
244
  completed: response.data.completed,
271
245
  creditsUsed: response.data.creditsUsed,
272
- next: getAllData ? void 0 : response.data.next,
273
246
  expiresAt: new Date(response.data.expiresAt),
274
247
  data: allData
275
248
  };
@@ -292,28 +265,6 @@ var FirecrawlApp = class {
292
265
  }
293
266
  return { success: false, error: "Internal server error." };
294
267
  }
295
- /**
296
- * Returns information about crawl errors.
297
- * @param id - The ID of the crawl operation.
298
- * @returns Information about crawl errors.
299
- */
300
- async checkCrawlErrors(id) {
301
- const headers = this.prepareHeaders();
302
- try {
303
- const response = await this.deleteRequest(
304
- `${this.apiUrl}/v1/crawl/${id}/errors`,
305
- headers
306
- );
307
- if (response.status === 200) {
308
- return response.data;
309
- } else {
310
- this.handleError(response, "check crawl errors");
311
- }
312
- } catch (error) {
313
- throw new FirecrawlError(error.message, 500);
314
- }
315
- return { success: false, error: "Internal server error." };
316
- }
317
268
  /**
318
269
  * Cancels a crawl job using the Firecrawl API.
319
270
  * @param id - The ID of the crawl operation.
@@ -402,20 +353,6 @@ var FirecrawlApp = class {
402
353
  }
403
354
  };
404
355
  }
405
- if (jsonData?.jsonOptions?.schema) {
406
- let schema = jsonData.jsonOptions.schema;
407
- try {
408
- schema = zodToJsonSchema(schema);
409
- } catch (error) {
410
- }
411
- jsonData = {
412
- ...jsonData,
413
- jsonOptions: {
414
- ...jsonData.jsonOptions,
415
- schema
416
- }
417
- };
418
- }
419
356
  try {
420
357
  const response = await this.postRequest(
421
358
  this.apiUrl + `/v1/batch/scrape`,
@@ -479,26 +416,16 @@ var FirecrawlApp = class {
479
416
  * Checks the status of a batch scrape job using the Firecrawl API.
480
417
  * @param id - The ID of the batch scrape operation.
481
418
  * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
482
- * @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
483
- * @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
484
- * @param limit - How many entries to return. Only used when `getAllData = false`.
485
419
  * @returns The response containing the job status.
486
420
  */
487
- async checkBatchScrapeStatus(id, getAllData = false, nextURL, skip, limit) {
421
+ async checkBatchScrapeStatus(id, getAllData = false) {
488
422
  if (!id) {
489
423
  throw new FirecrawlError("No batch scrape ID provided", 400);
490
424
  }
491
425
  const headers = this.prepareHeaders();
492
- const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/batch/scrape/${id}`);
493
- if (skip !== void 0) {
494
- targetURL.searchParams.set("skip", skip.toString());
495
- }
496
- if (limit !== void 0) {
497
- targetURL.searchParams.set("limit", limit.toString());
498
- }
499
426
  try {
500
427
  const response = await this.getRequest(
501
- targetURL.href,
428
+ `${this.apiUrl}/v1/batch/scrape/${id}`,
502
429
  headers
503
430
  );
504
431
  if (response.status === 200) {
@@ -523,7 +450,6 @@ var FirecrawlApp = class {
523
450
  total: response.data.total,
524
451
  completed: response.data.completed,
525
452
  creditsUsed: response.data.creditsUsed,
526
- next: getAllData ? void 0 : response.data.next,
527
453
  expiresAt: new Date(response.data.expiresAt),
528
454
  data: allData
529
455
  };
@@ -546,28 +472,6 @@ var FirecrawlApp = class {
546
472
  }
547
473
  return { success: false, error: "Internal server error." };
548
474
  }
549
- /**
550
- * Returns information about batch scrape errors.
551
- * @param id - The ID of the batch scrape operation.
552
- * @returns Information about batch scrape errors.
553
- */
554
- async checkBatchScrapeErrors(id) {
555
- const headers = this.prepareHeaders();
556
- try {
557
- const response = await this.deleteRequest(
558
- `${this.apiUrl}/v1/batch/scrape/${id}/errors`,
559
- headers
560
- );
561
- if (response.status === 200) {
562
- return response.data;
563
- } else {
564
- this.handleError(response, "check batch scrape errors");
565
- }
566
- } catch (error) {
567
- throw new FirecrawlError(error.message, 500);
568
- }
569
- return { success: false, error: "Internal server error." };
570
- }
571
475
  /**
572
476
  * Extracts information from URLs using the Firecrawl API.
573
477
  * Currently in Beta. Expect breaking changes on future minor versions.
@@ -590,65 +494,6 @@ var FirecrawlApp = class {
590
494
  } catch (error) {
591
495
  throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
592
496
  }
593
- try {
594
- const response = await this.postRequest(
595
- this.apiUrl + `/v1/extract`,
596
- { ...jsonData, schema: jsonSchema, origin: params?.origin || "api-sdk" },
597
- headers
598
- );
599
- if (response.status === 200) {
600
- const jobId = response.data.id;
601
- let extractStatus;
602
- do {
603
- const statusResponse = await this.getRequest(
604
- `${this.apiUrl}/v1/extract/${jobId}`,
605
- headers
606
- );
607
- extractStatus = statusResponse.data;
608
- if (extractStatus.status === "completed") {
609
- if (extractStatus.success) {
610
- return {
611
- success: true,
612
- data: extractStatus.data,
613
- warning: extractStatus.warning,
614
- error: extractStatus.error
615
- };
616
- } else {
617
- throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
618
- }
619
- } else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
620
- throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
621
- }
622
- await new Promise((resolve) => setTimeout(resolve, 1e3));
623
- } while (extractStatus.status !== "completed");
624
- } else {
625
- this.handleError(response, "extract");
626
- }
627
- } catch (error) {
628
- throw new FirecrawlError(error.message, 500, error.response?.data?.details);
629
- }
630
- return { success: false, error: "Internal server error." };
631
- }
632
- /**
633
- * Initiates an asynchronous extract job for a URL using the Firecrawl API.
634
- * @param url - The URL to extract data from.
635
- * @param params - Additional parameters for the extract request.
636
- * @param idempotencyKey - Optional idempotency key for the request.
637
- * @returns The response from the extract operation.
638
- */
639
- async asyncExtract(urls, params, idempotencyKey) {
640
- const headers = this.prepareHeaders(idempotencyKey);
641
- let jsonData = { urls, ...params };
642
- let jsonSchema;
643
- try {
644
- if (params?.schema instanceof zt.ZodType) {
645
- jsonSchema = zodToJsonSchema(params.schema);
646
- } else {
647
- jsonSchema = params?.schema;
648
- }
649
- } catch (error) {
650
- throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
651
- }
652
497
  try {
653
498
  const response = await this.postRequest(
654
499
  this.apiUrl + `/v1/extract`,
@@ -656,34 +501,24 @@ var FirecrawlApp = class {
656
501
  headers
657
502
  );
658
503
  if (response.status === 200) {
659
- return response.data;
660
- } else {
661
- this.handleError(response, "start extract job");
662
- }
663
- } catch (error) {
664
- throw new FirecrawlError(error.message, 500, error.response?.data?.details);
665
- }
666
- return { success: false, error: "Internal server error." };
667
- }
668
- /**
669
- * Retrieves the status of an extract job.
670
- * @param jobId - The ID of the extract job.
671
- * @returns The status of the extract job.
672
- */
673
- async getExtractStatus(jobId) {
674
- try {
675
- const response = await this.getRequest(
676
- `${this.apiUrl}/v1/extract/${jobId}`,
677
- this.prepareHeaders()
678
- );
679
- if (response.status === 200) {
680
- return response.data;
504
+ const responseData = response.data;
505
+ if (responseData.success) {
506
+ return {
507
+ success: true,
508
+ data: responseData.data,
509
+ warning: responseData.warning,
510
+ error: responseData.error
511
+ };
512
+ } else {
513
+ throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
514
+ }
681
515
  } else {
682
- this.handleError(response, "get extract status");
516
+ this.handleError(response, "extract");
683
517
  }
684
518
  } catch (error) {
685
519
  throw new FirecrawlError(error.message, 500);
686
520
  }
521
+ return { success: false, error: "Internal server error." };
687
522
  }
688
523
  /**
689
524
  * Prepares the headers for an API request.
@@ -799,13 +634,11 @@ var FirecrawlApp = class {
799
634
  * @param {string} action - The action being performed when the error occurred.
800
635
  */
801
636
  handleError(response, action) {
802
- if ([400, 402, 408, 409, 500].includes(response.status)) {
637
+ if ([402, 408, 409, 500].includes(response.status)) {
803
638
  const errorMessage = response.data.error || "Unknown error occurred";
804
- const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
805
639
  throw new FirecrawlError(
806
- `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}${details}`,
807
- response.status,
808
- response?.data?.details
640
+ `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`,
641
+ response.status
809
642
  );
810
643
  } else {
811
644
  throw new FirecrawlError(
@@ -823,8 +656,7 @@ var CrawlWatcher = class extends TypedEventTarget {
823
656
  constructor(id, app) {
824
657
  super();
825
658
  this.id = id;
826
- const wsUrl = app.apiUrl.replace(/^http/, "ws");
827
- this.ws = new WebSocket(`${wsUrl}/v1/crawl/${id}`, app.apiKey);
659
+ this.ws = new WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
828
660
  this.status = "scraping";
829
661
  this.data = [];
830
662
  const messageHandler = (msg) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "firecrawl",
3
- "version": "1.16.0",
3
+ "version": "1.18.0",
4
4
  "description": "JavaScript SDK for Firecrawl API",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
package/src/index.ts CHANGED
@@ -94,6 +94,7 @@ export interface CrawlScrapeOptions {
94
94
  skipTlsVerification?: boolean;
95
95
  removeBase64Images?: boolean;
96
96
  blockAds?: boolean;
97
+ proxy?: "basic" | "stealth";
97
98
  }
98
99
 
99
100
  export type Action = {
@@ -262,6 +263,8 @@ export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
262
263
  enableWebSearch?: boolean;
263
264
  includeSubdomains?: boolean;
264
265
  origin?: string;
266
+ showSources?: boolean;
267
+ scrapeOptions?: CrawlScrapeOptions;
265
268
  }
266
269
 
267
270
  /**
@@ -273,6 +276,7 @@ export interface ExtractResponse<LLMSchema extends zt.ZodSchema = any> {
273
276
  data: LLMSchema;
274
277
  error?: string;
275
278
  warning?: string;
279
+ sources?: string[];
276
280
  }
277
281
 
278
282
  /**
@@ -345,6 +349,70 @@ export interface CrawlErrorsResponse {
345
349
  robotsBlocked: string[];
346
350
  };
347
351
 
352
+ /**
353
+ * Parameters for deep research operations.
354
+ * Defines options for conducting deep research on a topic.
355
+ */
356
+ export interface DeepResearchParams {
357
+ /**
358
+ * Maximum depth of research iterations (1-10)
359
+ * @default 7
360
+ */
361
+ maxDepth?: number;
362
+ /**
363
+ * Time limit in seconds (30-300)
364
+ * @default 270
365
+ */
366
+ timeLimit?: number;
367
+ /**
368
+ * Experimental flag for streaming steps
369
+ */
370
+ __experimental_streamSteps?: boolean;
371
+ }
372
+
373
+ /**
374
+ * Response interface for deep research operations.
375
+ */
376
+ export interface DeepResearchResponse {
377
+ success: boolean;
378
+ id: string;
379
+ }
380
+
381
+ /**
382
+ * Status response interface for deep research operations.
383
+ */
384
+ export interface DeepResearchStatusResponse {
385
+ success: boolean;
386
+ data: {
387
+ findings: Array<{
388
+ text: string;
389
+ source: string;
390
+ }>;
391
+ finalAnalysis: string;
392
+ analysis: string;
393
+ completedSteps: number;
394
+ totalSteps: number;
395
+ };
396
+ status: "processing" | "completed" | "failed";
397
+ error?: string;
398
+ expiresAt: string;
399
+ currentDepth: number;
400
+ maxDepth: number;
401
+ activities: Array<{
402
+ type: string;
403
+ status: string;
404
+ message: string;
405
+ timestamp: string;
406
+ depth: number;
407
+ }>;
408
+ sources: Array<{
409
+ url: string;
410
+ title: string;
411
+ description: string;
412
+ }>;
413
+ summaries: string[];
414
+ }
415
+
348
416
  /**
349
417
  * Main class for interacting with the Firecrawl API.
350
418
  * Provides methods for scraping, searching, crawling, and mapping web content.
@@ -1041,7 +1109,8 @@ export default class FirecrawlApp {
1041
1109
  success: true,
1042
1110
  data: extractStatus.data,
1043
1111
  warning: extractStatus.warning,
1044
- error: extractStatus.error
1112
+ error: extractStatus.error,
1113
+ sources: extractStatus?.sources || undefined,
1045
1114
  };
1046
1115
  } else {
1047
1116
  throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
@@ -1277,6 +1346,119 @@ export default class FirecrawlApp {
1277
1346
  );
1278
1347
  }
1279
1348
  }
1349
+
1350
+ /**
1351
+ * Initiates a deep research operation on a given topic and polls until completion.
1352
+ * @param params - Parameters for the deep research operation.
1353
+ * @returns The final research results.
1354
+ */
1355
+ async __deepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchStatusResponse | ErrorResponse> {
1356
+ try {
1357
+ const response = await this.__asyncDeepResearch(topic, params);
1358
+
1359
+ if (!response.success || 'error' in response) {
1360
+ return { success: false, error: 'error' in response ? response.error : 'Unknown error' };
1361
+ }
1362
+
1363
+ if (!response.id) {
1364
+ throw new FirecrawlError(`Failed to start research. No job ID returned.`, 500);
1365
+ }
1366
+
1367
+ const jobId = response.id;
1368
+ let researchStatus;
1369
+
1370
+ while (true) {
1371
+ // console.log("Checking research status...");
1372
+ researchStatus = await this.__checkDeepResearchStatus(jobId);
1373
+ // console.log("Research status:", researchStatus);
1374
+
1375
+ if ('error' in researchStatus && !researchStatus.success) {
1376
+ return researchStatus;
1377
+ }
1378
+
1379
+ if (researchStatus.status === "completed") {
1380
+ return researchStatus;
1381
+ }
1382
+
1383
+ if (researchStatus.status === "failed") {
1384
+ throw new FirecrawlError(
1385
+ `Research job ${researchStatus.status}. Error: ${researchStatus.error}`,
1386
+ 500
1387
+ );
1388
+ }
1389
+
1390
+ if (researchStatus.status !== "processing") {
1391
+ break;
1392
+ }
1393
+
1394
+ await new Promise(resolve => setTimeout(resolve, 2000));
1395
+ }
1396
+ // console.log("Research status finished:", researchStatus);
1397
+
1398
+ return { success: false, error: "Research job terminated unexpectedly" };
1399
+ } catch (error: any) {
1400
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
1401
+ }
1402
+ }
1403
+
1404
+ /**
1405
+ * Initiates a deep research operation on a given topic without polling.
1406
+ * @param params - Parameters for the deep research operation.
1407
+ * @returns The response containing the research job ID.
1408
+ */
1409
+ async __asyncDeepResearch(topic: string, params: DeepResearchParams): Promise<DeepResearchResponse | ErrorResponse> {
1410
+ const headers = this.prepareHeaders();
1411
+ try {
1412
+ const response: AxiosResponse = await this.postRequest(
1413
+ `${this.apiUrl}/v1/deep-research`,
1414
+ { topic, ...params },
1415
+ headers
1416
+ );
1417
+
1418
+ if (response.status === 200) {
1419
+ return response.data;
1420
+ } else {
1421
+ this.handleError(response, "start deep research");
1422
+ }
1423
+ } catch (error: any) {
1424
+ if (error.response?.data?.error) {
1425
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
1426
+ } else {
1427
+ throw new FirecrawlError(error.message, 500);
1428
+ }
1429
+ }
1430
+ return { success: false, error: "Internal server error." };
1431
+ }
1432
+
1433
+ /**
1434
+ * Checks the status of a deep research operation.
1435
+ * @param id - The ID of the deep research operation.
1436
+ * @returns The current status and results of the research operation.
1437
+ */
1438
+ async __checkDeepResearchStatus(id: string): Promise<DeepResearchStatusResponse | ErrorResponse> {
1439
+ const headers = this.prepareHeaders();
1440
+ try {
1441
+ const response: AxiosResponse = await this.getRequest(
1442
+ `${this.apiUrl}/v1/deep-research/${id}`,
1443
+ headers
1444
+ );
1445
+
1446
+ if (response.status === 200) {
1447
+ return response.data;
1448
+ } else if (response.status === 404) {
1449
+ throw new FirecrawlError("Deep research job not found", 404);
1450
+ } else {
1451
+ this.handleError(response, "check deep research status");
1452
+ }
1453
+ } catch (error: any) {
1454
+ if (error.response?.data?.error) {
1455
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ''}`, error.response.status);
1456
+ } else {
1457
+ throw new FirecrawlError(error.message, 500);
1458
+ }
1459
+ }
1460
+ return { success: false, error: "Internal server error." };
1461
+ }
1280
1462
  }
1281
1463
 
1282
1464
  interface CrawlWatcherEvents {
package/dump.rdb DELETED
Binary file