firecrawl 1.11.2 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -565,23 +565,39 @@ export default class FirecrawlApp {
565
565
  if ("data" in statusData) {
566
566
  let data = statusData.data;
567
567
  while (typeof statusData === 'object' && 'next' in statusData) {
568
+ if (data.length === 0) {
569
+ break
570
+ }
568
571
  statusData = (await this.getRequest(statusData.next, headers)).data;
569
572
  data = data.concat(statusData.data);
570
573
  }
571
574
  allData = data;
572
575
  }
573
576
  }
574
- return ({
577
+
578
+ let resp: CrawlStatusResponse | ErrorResponse = {
575
579
  success: response.data.success,
576
580
  status: response.data.status,
577
581
  total: response.data.total,
578
582
  completed: response.data.completed,
579
583
  creditsUsed: response.data.creditsUsed,
580
584
  expiresAt: new Date(response.data.expiresAt),
581
- next: response.data.next,
582
- data: allData,
583
- error: response.data.error,
584
- })
585
+ data: allData
586
+ }
587
+
588
+ if (!response.data.success && response.data.error) {
589
+ resp = {
590
+ ...resp,
591
+ success: false,
592
+ error: response.data.error
593
+ } as ErrorResponse;
594
+ }
595
+
596
+ if (response.data.next) {
597
+ (resp as CrawlStatusResponse).next = response.data.next;
598
+ }
599
+
600
+ return resp;
585
601
  } else {
586
602
  this.handleError(response, "check crawl status");
587
603
  }
@@ -799,23 +815,39 @@ export default class FirecrawlApp {
799
815
  if ("data" in statusData) {
800
816
  let data = statusData.data;
801
817
  while (typeof statusData === 'object' && 'next' in statusData) {
818
+ if (data.length === 0) {
819
+ break
820
+ }
802
821
  statusData = (await this.getRequest(statusData.next, headers)).data;
803
822
  data = data.concat(statusData.data);
804
823
  }
805
824
  allData = data;
806
825
  }
807
826
  }
808
- return ({
827
+
828
+ let resp: BatchScrapeStatusResponse | ErrorResponse = {
809
829
  success: response.data.success,
810
830
  status: response.data.status,
811
831
  total: response.data.total,
812
832
  completed: response.data.completed,
813
833
  creditsUsed: response.data.creditsUsed,
814
834
  expiresAt: new Date(response.data.expiresAt),
815
- next: response.data.next,
816
- data: allData,
817
- error: response.data.error,
818
- })
835
+ data: allData
836
+ }
837
+
838
+ if (!response.data.success && response.data.error) {
839
+ resp = {
840
+ ...resp,
841
+ success: false,
842
+ error: response.data.error
843
+ } as ErrorResponse;
844
+ }
845
+
846
+ if (response.data.next) {
847
+ (resp as BatchScrapeStatusResponse).next = response.data.next;
848
+ }
849
+
850
+ return resp;
819
851
  } else {
820
852
  this.handleError(response, "check batch scrape status");
821
853
  }
@@ -852,21 +884,35 @@ export default class FirecrawlApp {
852
884
  try {
853
885
  const response: AxiosResponse = await this.postRequest(
854
886
  this.apiUrl + `/v1/extract`,
855
- { ...jsonData, schema: jsonSchema },
887
+ { ...jsonData, schema: jsonSchema, origin: "api-sdk" },
856
888
  headers
857
889
  );
890
+
858
891
  if (response.status === 200) {
859
- const responseData = response.data as ExtractResponse<T>;
860
- if (responseData.success) {
861
- return {
862
- success: true,
863
- data: responseData.data,
864
- warning: responseData.warning,
865
- error: responseData.error
866
- };
867
- } else {
868
- throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
869
- }
892
+ const jobId = response.data.id;
893
+ let extractStatus;
894
+ do {
895
+ const statusResponse: AxiosResponse = await this.getRequest(
896
+ `${this.apiUrl}/v1/extract/${jobId}`,
897
+ headers
898
+ );
899
+ extractStatus = statusResponse.data;
900
+ if (extractStatus.status === "completed") {
901
+ if (extractStatus.success) {
902
+ return {
903
+ success: true,
904
+ data: extractStatus.data,
905
+ warning: extractStatus.warning,
906
+ error: extractStatus.error
907
+ };
908
+ } else {
909
+ throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
910
+ }
911
+ } else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
912
+ throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
913
+ }
914
+ await new Promise(resolve => setTimeout(resolve, 1000)); // Polling interval
915
+ } while (extractStatus.status !== "completed");
870
916
  } else {
871
917
  this.handleError(response, "extract");
872
918
  }
@@ -876,6 +922,72 @@ export default class FirecrawlApp {
876
922
  return { success: false, error: "Internal server error." };
877
923
  }
878
924
 
925
+ /**
926
+ * Initiates an asynchronous extract job for a URL using the Firecrawl API.
927
+ * @param url - The URL to extract data from.
928
+ * @param params - Additional parameters for the extract request.
929
+ * @param idempotencyKey - Optional idempotency key for the request.
930
+ * @returns The response from the extract operation.
931
+ */
932
+ async asyncExtract(
933
+ url: string,
934
+ params?: ExtractParams,
935
+ idempotencyKey?: string
936
+ ): Promise<ExtractResponse | ErrorResponse> {
937
+ const headers = this.prepareHeaders(idempotencyKey);
938
+ let jsonData: any = { url, ...params };
939
+ let jsonSchema: any;
940
+
941
+ try {
942
+ if (params?.schema instanceof zt.ZodType) {
943
+ jsonSchema = zodToJsonSchema(params.schema);
944
+ } else {
945
+ jsonSchema = params?.schema;
946
+ }
947
+ } catch (error: any) {
948
+ throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
949
+ }
950
+
951
+ try {
952
+ const response: AxiosResponse = await this.postRequest(
953
+ this.apiUrl + `/v1/extract`,
954
+ { ...jsonData, schema: jsonSchema },
955
+ headers
956
+ );
957
+
958
+ if (response.status === 200) {
959
+ return response.data;
960
+ } else {
961
+ this.handleError(response, "start extract job");
962
+ }
963
+ } catch (error: any) {
964
+ throw new FirecrawlError(error.message, 500);
965
+ }
966
+ return { success: false, error: "Internal server error." };
967
+ }
968
+
969
+ /**
970
+ * Retrieves the status of an extract job.
971
+ * @param jobId - The ID of the extract job.
972
+ * @returns The status of the extract job.
973
+ */
974
+ async getExtractStatus(jobId: string): Promise<any> {
975
+ try {
976
+ const response: AxiosResponse = await this.getRequest(
977
+ `${this.apiUrl}/v1/extract/${jobId}`,
978
+ this.prepareHeaders()
979
+ );
980
+
981
+ if (response.status === 200) {
982
+ return response.data;
983
+ } else {
984
+ this.handleError(response, "get extract status");
985
+ }
986
+ } catch (error: any) {
987
+ throw new FirecrawlError(error.message, 500);
988
+ }
989
+ }
990
+
879
991
  /**
880
992
  * Prepares the headers for an API request.
881
993
  * @param idempotencyKey - Optional key to ensure idempotency.
@@ -971,6 +1083,9 @@ export default class FirecrawlApp {
971
1083
  if ("data" in statusData) {
972
1084
  let data = statusData.data;
973
1085
  while (typeof statusData === 'object' && 'next' in statusData) {
1086
+ if (data.length === 0) {
1087
+ break
1088
+ }
974
1089
  statusResponse = await this.getRequest(statusData.next, headers);
975
1090
  statusData = statusResponse.data;
976
1091
  data = data.concat(statusData.data);