firecrawl 1.18.1 → 1.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -42,9 +42,11 @@ var import_isows = require("isows");
42
42
  var import_typescript_event_target = require("typescript-event-target");
43
43
  var FirecrawlError = class extends Error {
44
44
  statusCode;
45
- constructor(message, statusCode) {
45
+ details;
46
+ constructor(message, statusCode, details) {
46
47
  super(message);
47
48
  this.statusCode = statusCode;
49
+ this.details = details;
48
50
  }
49
51
  };
50
52
  var FirecrawlApp = class {
@@ -91,6 +93,20 @@ var FirecrawlApp = class {
91
93
  }
92
94
  };
93
95
  }
96
+ if (jsonData?.jsonOptions?.schema) {
97
+ let schema = jsonData.jsonOptions.schema;
98
+ try {
99
+ schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
100
+ } catch (error) {
101
+ }
102
+ jsonData = {
103
+ ...jsonData,
104
+ jsonOptions: {
105
+ ...jsonData.jsonOptions,
106
+ schema
107
+ }
108
+ };
109
+ }
94
110
  try {
95
111
  const response = await import_axios.default.post(
96
112
  this.apiUrl + `/v1/scrape`,
@@ -245,16 +261,26 @@ var FirecrawlApp = class {
245
261
  * Checks the status of a crawl job using the Firecrawl API.
246
262
  * @param id - The ID of the crawl operation.
247
263
  * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
264
+ * @param nextURL - The `next` URL from the previous crawl status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
265
+ * @param skip - How many entries to skip to paginate. Only required if you're not providing `nextURL`. Only used when `getAllData = false`.
266
+ * @param limit - How many entries to return. Only used when `getAllData = false`.
248
267
  * @returns The response containing the job status.
249
268
  */
250
- async checkCrawlStatus(id, getAllData = false) {
269
+ async checkCrawlStatus(id, getAllData = false, nextURL, skip, limit) {
251
270
  if (!id) {
252
271
  throw new FirecrawlError("No crawl ID provided", 400);
253
272
  }
254
273
  const headers = this.prepareHeaders();
274
+ const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/crawl/${id}`);
275
+ if (skip !== void 0) {
276
+ targetURL.searchParams.set("skip", skip.toString());
277
+ }
278
+ if (limit !== void 0) {
279
+ targetURL.searchParams.set("limit", limit.toString());
280
+ }
255
281
  try {
256
282
  const response = await this.getRequest(
257
- `${this.apiUrl}/v1/crawl/${id}`,
283
+ targetURL.href,
258
284
  headers
259
285
  );
260
286
  if (response.status === 200) {
@@ -279,6 +305,7 @@ var FirecrawlApp = class {
279
305
  total: response.data.total,
280
306
  completed: response.data.completed,
281
307
  creditsUsed: response.data.creditsUsed,
308
+ next: getAllData ? void 0 : response.data.next,
282
309
  expiresAt: new Date(response.data.expiresAt),
283
310
  data: allData
284
311
  };
@@ -301,6 +328,28 @@ var FirecrawlApp = class {
301
328
  }
302
329
  return { success: false, error: "Internal server error." };
303
330
  }
331
+ /**
332
+ * Returns information about crawl errors.
333
+ * @param id - The ID of the crawl operation.
334
+ * @returns Information about crawl errors.
335
+ */
336
+ async checkCrawlErrors(id) {
337
+ const headers = this.prepareHeaders();
338
+ try {
339
+ const response = await this.deleteRequest(
340
+ `${this.apiUrl}/v1/crawl/${id}/errors`,
341
+ headers
342
+ );
343
+ if (response.status === 200) {
344
+ return response.data;
345
+ } else {
346
+ this.handleError(response, "check crawl errors");
347
+ }
348
+ } catch (error) {
349
+ throw new FirecrawlError(error.message, 500);
350
+ }
351
+ return { success: false, error: "Internal server error." };
352
+ }
304
353
  /**
305
354
  * Cancels a crawl job using the Firecrawl API.
306
355
  * @param id - The ID of the crawl operation.
@@ -389,6 +438,20 @@ var FirecrawlApp = class {
389
438
  }
390
439
  };
391
440
  }
441
+ if (jsonData?.jsonOptions?.schema) {
442
+ let schema = jsonData.jsonOptions.schema;
443
+ try {
444
+ schema = (0, import_zod_to_json_schema.zodToJsonSchema)(schema);
445
+ } catch (error) {
446
+ }
447
+ jsonData = {
448
+ ...jsonData,
449
+ jsonOptions: {
450
+ ...jsonData.jsonOptions,
451
+ schema
452
+ }
453
+ };
454
+ }
392
455
  try {
393
456
  const response = await this.postRequest(
394
457
  this.apiUrl + `/v1/batch/scrape`,
@@ -452,16 +515,26 @@ var FirecrawlApp = class {
452
515
  * Checks the status of a batch scrape job using the Firecrawl API.
453
516
  * @param id - The ID of the batch scrape operation.
454
517
  * @param getAllData - Paginate through all the pages of documents, returning the full list of all documents. (default: `false`)
518
+ * @param nextURL - The `next` URL from the previous batch scrape status. Only required if you're not manually increasing `skip`. Only used when `getAllData = false`.
519
+ * @param skip - How many entries to skip to paginate. Only used when `getAllData = false`.
520
+ * @param limit - How many entries to return. Only used when `getAllData = false`.
455
521
  * @returns The response containing the job status.
456
522
  */
457
- async checkBatchScrapeStatus(id, getAllData = false) {
523
+ async checkBatchScrapeStatus(id, getAllData = false, nextURL, skip, limit) {
458
524
  if (!id) {
459
525
  throw new FirecrawlError("No batch scrape ID provided", 400);
460
526
  }
461
527
  const headers = this.prepareHeaders();
528
+ const targetURL = new URL(nextURL ?? `${this.apiUrl}/v1/batch/scrape/${id}`);
529
+ if (skip !== void 0) {
530
+ targetURL.searchParams.set("skip", skip.toString());
531
+ }
532
+ if (limit !== void 0) {
533
+ targetURL.searchParams.set("limit", limit.toString());
534
+ }
462
535
  try {
463
536
  const response = await this.getRequest(
464
- `${this.apiUrl}/v1/batch/scrape/${id}`,
537
+ targetURL.href,
465
538
  headers
466
539
  );
467
540
  if (response.status === 200) {
@@ -486,6 +559,7 @@ var FirecrawlApp = class {
486
559
  total: response.data.total,
487
560
  completed: response.data.completed,
488
561
  creditsUsed: response.data.creditsUsed,
562
+ next: getAllData ? void 0 : response.data.next,
489
563
  expiresAt: new Date(response.data.expiresAt),
490
564
  data: allData
491
565
  };
@@ -508,6 +582,28 @@ var FirecrawlApp = class {
508
582
  }
509
583
  return { success: false, error: "Internal server error." };
510
584
  }
585
+ /**
586
+ * Returns information about batch scrape errors.
587
+ * @param id - The ID of the batch scrape operation.
588
+ * @returns Information about batch scrape errors.
589
+ */
590
+ async checkBatchScrapeErrors(id) {
591
+ const headers = this.prepareHeaders();
592
+ try {
593
+ const response = await this.deleteRequest(
594
+ `${this.apiUrl}/v1/batch/scrape/${id}/errors`,
595
+ headers
596
+ );
597
+ if (response.status === 200) {
598
+ return response.data;
599
+ } else {
600
+ this.handleError(response, "check batch scrape errors");
601
+ }
602
+ } catch (error) {
603
+ throw new FirecrawlError(error.message, 500);
604
+ }
605
+ return { success: false, error: "Internal server error." };
606
+ }
511
607
  /**
512
608
  * Extracts information from URLs using the Firecrawl API.
513
609
  * Currently in Beta. Expect breaking changes on future minor versions.
@@ -533,29 +629,99 @@ var FirecrawlApp = class {
533
629
  try {
534
630
  const response = await this.postRequest(
535
631
  this.apiUrl + `/v1/extract`,
536
- { ...jsonData, schema: jsonSchema },
632
+ { ...jsonData, schema: jsonSchema, origin: params?.origin || "api-sdk" },
537
633
  headers
538
634
  );
539
635
  if (response.status === 200) {
540
- const responseData = response.data;
541
- if (responseData.success) {
542
- return {
543
- success: true,
544
- data: responseData.data,
545
- warning: responseData.warning,
546
- error: responseData.error
547
- };
548
- } else {
549
- throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
550
- }
636
+ const jobId = response.data.id;
637
+ let extractStatus;
638
+ do {
639
+ const statusResponse = await this.getRequest(
640
+ `${this.apiUrl}/v1/extract/${jobId}`,
641
+ headers
642
+ );
643
+ extractStatus = statusResponse.data;
644
+ if (extractStatus.status === "completed") {
645
+ if (extractStatus.success) {
646
+ return {
647
+ success: true,
648
+ data: extractStatus.data,
649
+ warning: extractStatus.warning,
650
+ error: extractStatus.error,
651
+ sources: extractStatus?.sources || void 0
652
+ };
653
+ } else {
654
+ throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
655
+ }
656
+ } else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
657
+ throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
658
+ }
659
+ await new Promise((resolve) => setTimeout(resolve, 1e3));
660
+ } while (extractStatus.status !== "completed");
551
661
  } else {
552
662
  this.handleError(response, "extract");
553
663
  }
554
664
  } catch (error) {
555
- throw new FirecrawlError(error.message, 500);
665
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
666
+ }
667
+ return { success: false, error: "Internal server error." };
668
+ }
669
+ /**
670
+ * Initiates an asynchronous extract job for a URL using the Firecrawl API.
671
+ * @param url - The URL to extract data from.
672
+ * @param params - Additional parameters for the extract request.
673
+ * @param idempotencyKey - Optional idempotency key for the request.
674
+ * @returns The response from the extract operation.
675
+ */
676
+ async asyncExtract(urls, params, idempotencyKey) {
677
+ const headers = this.prepareHeaders(idempotencyKey);
678
+ let jsonData = { urls, ...params };
679
+ let jsonSchema;
680
+ try {
681
+ if (params?.schema instanceof zt.ZodType) {
682
+ jsonSchema = (0, import_zod_to_json_schema.zodToJsonSchema)(params.schema);
683
+ } else {
684
+ jsonSchema = params?.schema;
685
+ }
686
+ } catch (error) {
687
+ throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
688
+ }
689
+ try {
690
+ const response = await this.postRequest(
691
+ this.apiUrl + `/v1/extract`,
692
+ { ...jsonData, schema: jsonSchema },
693
+ headers
694
+ );
695
+ if (response.status === 200) {
696
+ return response.data;
697
+ } else {
698
+ this.handleError(response, "start extract job");
699
+ }
700
+ } catch (error) {
701
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
556
702
  }
557
703
  return { success: false, error: "Internal server error." };
558
704
  }
705
+ /**
706
+ * Retrieves the status of an extract job.
707
+ * @param jobId - The ID of the extract job.
708
+ * @returns The status of the extract job.
709
+ */
710
+ async getExtractStatus(jobId) {
711
+ try {
712
+ const response = await this.getRequest(
713
+ `${this.apiUrl}/v1/extract/${jobId}`,
714
+ this.prepareHeaders()
715
+ );
716
+ if (response.status === 200) {
717
+ return response.data;
718
+ } else {
719
+ this.handleError(response, "get extract status");
720
+ }
721
+ } catch (error) {
722
+ throw new FirecrawlError(error.message, 500);
723
+ }
724
+ }
559
725
  /**
560
726
  * Prepares the headers for an API request.
561
727
  * @param idempotencyKey - Optional key to ensure idempotency.
@@ -670,11 +836,13 @@ var FirecrawlApp = class {
670
836
  * @param {string} action - The action being performed when the error occurred.
671
837
  */
672
838
  handleError(response, action) {
673
- if ([402, 408, 409, 500].includes(response.status)) {
839
+ if ([400, 402, 408, 409, 500].includes(response.status)) {
674
840
  const errorMessage = response.data.error || "Unknown error occurred";
841
+ const details = response.data.details ? ` - ${JSON.stringify(response.data.details)}` : "";
675
842
  throw new FirecrawlError(
676
- `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`,
677
- response.status
843
+ `Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}${details}`,
844
+ response.status,
845
+ response?.data?.details
678
846
  );
679
847
  } else {
680
848
  throw new FirecrawlError(
@@ -683,6 +851,198 @@ var FirecrawlApp = class {
683
851
  );
684
852
  }
685
853
  }
854
+ /**
855
+ * Initiates a deep research operation on a given topic and polls until completion.
856
+ * @param params - Parameters for the deep research operation.
857
+ * @returns The final research results.
858
+ */
859
+ async __deepResearch(topic, params) {
860
+ try {
861
+ const response = await this.__asyncDeepResearch(topic, params);
862
+ if (!response.success || "error" in response) {
863
+ return { success: false, error: "error" in response ? response.error : "Unknown error" };
864
+ }
865
+ if (!response.id) {
866
+ throw new FirecrawlError(`Failed to start research. No job ID returned.`, 500);
867
+ }
868
+ const jobId = response.id;
869
+ let researchStatus;
870
+ while (true) {
871
+ researchStatus = await this.__checkDeepResearchStatus(jobId);
872
+ if ("error" in researchStatus && !researchStatus.success) {
873
+ return researchStatus;
874
+ }
875
+ if (researchStatus.status === "completed") {
876
+ return researchStatus;
877
+ }
878
+ if (researchStatus.status === "failed") {
879
+ throw new FirecrawlError(
880
+ `Research job ${researchStatus.status}. Error: ${researchStatus.error}`,
881
+ 500
882
+ );
883
+ }
884
+ if (researchStatus.status !== "processing") {
885
+ break;
886
+ }
887
+ await new Promise((resolve) => setTimeout(resolve, 2e3));
888
+ }
889
+ return { success: false, error: "Research job terminated unexpectedly" };
890
+ } catch (error) {
891
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
892
+ }
893
+ }
894
+ /**
895
+ * Initiates a deep research operation on a given topic without polling.
896
+ * @param params - Parameters for the deep research operation.
897
+ * @returns The response containing the research job ID.
898
+ */
899
+ async __asyncDeepResearch(topic, params) {
900
+ const headers = this.prepareHeaders();
901
+ try {
902
+ const response = await this.postRequest(
903
+ `${this.apiUrl}/v1/deep-research`,
904
+ { topic, ...params },
905
+ headers
906
+ );
907
+ if (response.status === 200) {
908
+ return response.data;
909
+ } else {
910
+ this.handleError(response, "start deep research");
911
+ }
912
+ } catch (error) {
913
+ if (error.response?.data?.error) {
914
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
915
+ } else {
916
+ throw new FirecrawlError(error.message, 500);
917
+ }
918
+ }
919
+ return { success: false, error: "Internal server error." };
920
+ }
921
+ /**
922
+ * Checks the status of a deep research operation.
923
+ * @param id - The ID of the deep research operation.
924
+ * @returns The current status and results of the research operation.
925
+ */
926
+ async __checkDeepResearchStatus(id) {
927
+ const headers = this.prepareHeaders();
928
+ try {
929
+ const response = await this.getRequest(
930
+ `${this.apiUrl}/v1/deep-research/${id}`,
931
+ headers
932
+ );
933
+ if (response.status === 200) {
934
+ return response.data;
935
+ } else if (response.status === 404) {
936
+ throw new FirecrawlError("Deep research job not found", 404);
937
+ } else {
938
+ this.handleError(response, "check deep research status");
939
+ }
940
+ } catch (error) {
941
+ if (error.response?.data?.error) {
942
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
943
+ } else {
944
+ throw new FirecrawlError(error.message, 500);
945
+ }
946
+ }
947
+ return { success: false, error: "Internal server error." };
948
+ }
949
+ /**
950
+ * Generates LLMs.txt for a given URL and polls until completion.
951
+ * @param url - The URL to generate LLMs.txt from.
952
+ * @param params - Parameters for the LLMs.txt generation operation.
953
+ * @returns The final generation results.
954
+ */
955
+ async generateLLMsText(url, params) {
956
+ try {
957
+ const response = await this.asyncGenerateLLMsText(url, params);
958
+ if (!response.success || "error" in response) {
959
+ return { success: false, error: "error" in response ? response.error : "Unknown error" };
960
+ }
961
+ if (!response.id) {
962
+ throw new FirecrawlError(`Failed to start LLMs.txt generation. No job ID returned.`, 500);
963
+ }
964
+ const jobId = response.id;
965
+ let generationStatus;
966
+ while (true) {
967
+ generationStatus = await this.checkGenerateLLMsTextStatus(jobId);
968
+ if ("error" in generationStatus && !generationStatus.success) {
969
+ return generationStatus;
970
+ }
971
+ if (generationStatus.status === "completed") {
972
+ return generationStatus;
973
+ }
974
+ if (generationStatus.status === "failed") {
975
+ throw new FirecrawlError(
976
+ `LLMs.txt generation job ${generationStatus.status}. Error: ${generationStatus.error}`,
977
+ 500
978
+ );
979
+ }
980
+ if (generationStatus.status !== "processing") {
981
+ break;
982
+ }
983
+ await new Promise((resolve) => setTimeout(resolve, 2e3));
984
+ }
985
+ return { success: false, error: "LLMs.txt generation job terminated unexpectedly" };
986
+ } catch (error) {
987
+ throw new FirecrawlError(error.message, 500, error.response?.data?.details);
988
+ }
989
+ }
990
+ /**
991
+ * Initiates a LLMs.txt generation operation without polling.
992
+ * @param url - The URL to generate LLMs.txt from.
993
+ * @param params - Parameters for the LLMs.txt generation operation.
994
+ * @returns The response containing the generation job ID.
995
+ */
996
+ async asyncGenerateLLMsText(url, params) {
997
+ const headers = this.prepareHeaders();
998
+ try {
999
+ const response = await this.postRequest(
1000
+ `${this.apiUrl}/v1/llmstxt`,
1001
+ { url, ...params },
1002
+ headers
1003
+ );
1004
+ if (response.status === 200) {
1005
+ return response.data;
1006
+ } else {
1007
+ this.handleError(response, "start LLMs.txt generation");
1008
+ }
1009
+ } catch (error) {
1010
+ if (error.response?.data?.error) {
1011
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
1012
+ } else {
1013
+ throw new FirecrawlError(error.message, 500);
1014
+ }
1015
+ }
1016
+ return { success: false, error: "Internal server error." };
1017
+ }
1018
+ /**
1019
+ * Checks the status of a LLMs.txt generation operation.
1020
+ * @param id - The ID of the LLMs.txt generation operation.
1021
+ * @returns The current status and results of the generation operation.
1022
+ */
1023
+ async checkGenerateLLMsTextStatus(id) {
1024
+ const headers = this.prepareHeaders();
1025
+ try {
1026
+ const response = await this.getRequest(
1027
+ `${this.apiUrl}/v1/llmstxt/${id}`,
1028
+ headers
1029
+ );
1030
+ if (response.status === 200) {
1031
+ return response.data;
1032
+ } else if (response.status === 404) {
1033
+ throw new FirecrawlError("LLMs.txt generation job not found", 404);
1034
+ } else {
1035
+ this.handleError(response, "check LLMs.txt generation status");
1036
+ }
1037
+ } catch (error) {
1038
+ if (error.response?.data?.error) {
1039
+ throw new FirecrawlError(`Request failed with status code ${error.response.status}. Error: ${error.response.data.error} ${error.response.data.details ? ` - ${JSON.stringify(error.response.data.details)}` : ""}`, error.response.status);
1040
+ } else {
1041
+ throw new FirecrawlError(error.message, 500);
1042
+ }
1043
+ }
1044
+ return { success: false, error: "Internal server error." };
1045
+ }
686
1046
  };
687
1047
  var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget {
688
1048
  ws;
@@ -692,7 +1052,8 @@ var CrawlWatcher = class extends import_typescript_event_target.TypedEventTarget
692
1052
  constructor(id, app) {
693
1053
  super();
694
1054
  this.id = id;
695
- this.ws = new import_isows.WebSocket(`${app.apiUrl}/v1/crawl/${id}`, app.apiKey);
1055
+ const wsUrl = app.apiUrl.replace(/^http/, "ws");
1056
+ this.ws = new import_isows.WebSocket(`${wsUrl}/v1/crawl/${id}`, app.apiKey);
696
1057
  this.status = "scraping";
697
1058
  this.data = [];
698
1059
  const messageHandler = (msg) => {