firecrawl 1.11.1 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import axios, { type AxiosResponse, type AxiosRequestHeaders, AxiosError } from "axios";
2
- import type * as zt from "zod";
2
+ import * as zt from "zod";
3
3
  import { zodToJsonSchema } from "zod-to-json-schema";
4
4
  import { WebSocket } from "isows";
5
5
  import { TypedEventTarget } from "typescript-event-target";
@@ -247,7 +247,7 @@ export interface MapResponse {
247
247
  */
248
248
  export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
249
249
  prompt?: string;
250
- schema?: LLMSchema;
250
+ schema?: LLMSchema | object;
251
251
  systemPrompt?: string;
252
252
  allowExternalLinks?: boolean;
253
253
  includeSubdomains?: boolean;
@@ -565,23 +565,39 @@ export default class FirecrawlApp {
565
565
  if ("data" in statusData) {
566
566
  let data = statusData.data;
567
567
  while (typeof statusData === 'object' && 'next' in statusData) {
568
+ if (data.length === 0) {
569
+ break
570
+ }
568
571
  statusData = (await this.getRequest(statusData.next, headers)).data;
569
572
  data = data.concat(statusData.data);
570
573
  }
571
574
  allData = data;
572
575
  }
573
576
  }
574
- return ({
577
+
578
+ let resp: CrawlStatusResponse | ErrorResponse = {
575
579
  success: response.data.success,
576
580
  status: response.data.status,
577
581
  total: response.data.total,
578
582
  completed: response.data.completed,
579
583
  creditsUsed: response.data.creditsUsed,
580
584
  expiresAt: new Date(response.data.expiresAt),
581
- next: response.data.next,
582
- data: allData,
583
- error: response.data.error,
584
- })
585
+ data: allData
586
+ }
587
+
588
+ if (!response.data.success && response.data.error) {
589
+ resp = {
590
+ ...resp,
591
+ success: false,
592
+ error: response.data.error
593
+ } as ErrorResponse;
594
+ }
595
+
596
+ if (response.data.next) {
597
+ (resp as CrawlStatusResponse).next = response.data.next;
598
+ }
599
+
600
+ return resp;
585
601
  } else {
586
602
  this.handleError(response, "check crawl status");
587
603
  }
@@ -799,23 +815,39 @@ export default class FirecrawlApp {
799
815
  if ("data" in statusData) {
800
816
  let data = statusData.data;
801
817
  while (typeof statusData === 'object' && 'next' in statusData) {
818
+ if (data.length === 0) {
819
+ break
820
+ }
802
821
  statusData = (await this.getRequest(statusData.next, headers)).data;
803
822
  data = data.concat(statusData.data);
804
823
  }
805
824
  allData = data;
806
825
  }
807
826
  }
808
- return ({
827
+
828
+ let resp: BatchScrapeStatusResponse | ErrorResponse = {
809
829
  success: response.data.success,
810
830
  status: response.data.status,
811
831
  total: response.data.total,
812
832
  completed: response.data.completed,
813
833
  creditsUsed: response.data.creditsUsed,
814
834
  expiresAt: new Date(response.data.expiresAt),
815
- next: response.data.next,
816
- data: allData,
817
- error: response.data.error,
818
- })
835
+ data: allData
836
+ }
837
+
838
+ if (!response.data.success && response.data.error) {
839
+ resp = {
840
+ ...resp,
841
+ success: false,
842
+ error: response.data.error
843
+ } as ErrorResponse;
844
+ }
845
+
846
+ if (response.data.next) {
847
+ (resp as BatchScrapeStatusResponse).next = response.data.next;
848
+ }
849
+
850
+ return resp;
819
851
  } else {
820
852
  this.handleError(response, "check batch scrape status");
821
853
  }
@@ -838,29 +870,49 @@ export default class FirecrawlApp {
838
870
  let jsonData: { urls: string[] } & ExtractParams<T> = { urls, ...params };
839
871
  let jsonSchema: any;
840
872
  try {
841
- jsonSchema = params?.schema ? zodToJsonSchema(params.schema) : undefined;
873
+ if (!params?.schema) {
874
+ jsonSchema = undefined;
875
+ } else if (params.schema instanceof zt.ZodType) {
876
+ jsonSchema = zodToJsonSchema(params.schema);
877
+ } else {
878
+ jsonSchema = params.schema;
879
+ }
842
880
  } catch (error: any) {
843
- throw new FirecrawlError("Invalid schema. Use a valid Zod schema.", 400);
881
+ throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
844
882
  }
845
883
 
846
884
  try {
847
885
  const response: AxiosResponse = await this.postRequest(
848
886
  this.apiUrl + `/v1/extract`,
849
- { ...jsonData, schema: jsonSchema },
887
+ { ...jsonData, schema: jsonSchema, origin: "api-sdk" },
850
888
  headers
851
889
  );
890
+
852
891
  if (response.status === 200) {
853
- const responseData = response.data as ExtractResponse<T>;
854
- if (responseData.success) {
855
- return {
856
- success: true,
857
- data: responseData.data,
858
- warning: responseData.warning,
859
- error: responseData.error
860
- };
861
- } else {
862
- throw new FirecrawlError(`Failed to scrape URL. Error: ${responseData.error}`, response.status);
863
- }
892
+ const jobId = response.data.id;
893
+ let extractStatus;
894
+ do {
895
+ const statusResponse: AxiosResponse = await this.getRequest(
896
+ `${this.apiUrl}/v1/extract/${jobId}`,
897
+ headers
898
+ );
899
+ extractStatus = statusResponse.data;
900
+ if (extractStatus.status === "completed") {
901
+ if (extractStatus.success) {
902
+ return {
903
+ success: true,
904
+ data: extractStatus.data,
905
+ warning: extractStatus.warning,
906
+ error: extractStatus.error
907
+ };
908
+ } else {
909
+ throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
910
+ }
911
+ } else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
912
+ throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
913
+ }
914
+ await new Promise(resolve => setTimeout(resolve, 1000)); // Polling interval
915
+ } while (extractStatus.status !== "completed");
864
916
  } else {
865
917
  this.handleError(response, "extract");
866
918
  }
@@ -870,6 +922,72 @@ export default class FirecrawlApp {
870
922
  return { success: false, error: "Internal server error." };
871
923
  }
872
924
 
925
+ /**
926
+ * Initiates an asynchronous extract job for a URL using the Firecrawl API.
927
+ * @param url - The URL to extract data from.
928
+ * @param params - Additional parameters for the extract request.
929
+ * @param idempotencyKey - Optional idempotency key for the request.
930
+ * @returns The response from the extract operation.
931
+ */
932
+ async asyncExtract(
933
+ url: string,
934
+ params?: ExtractParams,
935
+ idempotencyKey?: string
936
+ ): Promise<ExtractResponse | ErrorResponse> {
937
+ const headers = this.prepareHeaders(idempotencyKey);
938
+ let jsonData: any = { url, ...params };
939
+ let jsonSchema: any;
940
+
941
+ try {
942
+ if (params?.schema instanceof zt.ZodType) {
943
+ jsonSchema = zodToJsonSchema(params.schema);
944
+ } else {
945
+ jsonSchema = params?.schema;
946
+ }
947
+ } catch (error: any) {
948
+ throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
949
+ }
950
+
951
+ try {
952
+ const response: AxiosResponse = await this.postRequest(
953
+ this.apiUrl + `/v1/extract`,
954
+ { ...jsonData, schema: jsonSchema },
955
+ headers
956
+ );
957
+
958
+ if (response.status === 200) {
959
+ return response.data;
960
+ } else {
961
+ this.handleError(response, "start extract job");
962
+ }
963
+ } catch (error: any) {
964
+ throw new FirecrawlError(error.message, 500);
965
+ }
966
+ return { success: false, error: "Internal server error." };
967
+ }
968
+
969
+ /**
970
+ * Retrieves the status of an extract job.
971
+ * @param jobId - The ID of the extract job.
972
+ * @returns The status of the extract job.
973
+ */
974
+ async getExtractStatus(jobId: string): Promise<any> {
975
+ try {
976
+ const response: AxiosResponse = await this.getRequest(
977
+ `${this.apiUrl}/v1/extract/${jobId}`,
978
+ this.prepareHeaders()
979
+ );
980
+
981
+ if (response.status === 200) {
982
+ return response.data;
983
+ } else {
984
+ this.handleError(response, "get extract status");
985
+ }
986
+ } catch (error: any) {
987
+ throw new FirecrawlError(error.message, 500);
988
+ }
989
+ }
990
+
873
991
  /**
874
992
  * Prepares the headers for an API request.
875
993
  * @param idempotencyKey - Optional key to ensure idempotency.
@@ -965,6 +1083,9 @@ export default class FirecrawlApp {
965
1083
  if ("data" in statusData) {
966
1084
  let data = statusData.data;
967
1085
  while (typeof statusData === 'object' && 'next' in statusData) {
1086
+ if (data.length === 0) {
1087
+ break
1088
+ }
968
1089
  statusResponse = await this.getRequest(statusData.next, headers);
969
1090
  statusData = statusResponse.data;
970
1091
  data = data.concat(statusData.data);