firecrawl 1.11.2 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +116 -20
- package/dist/index.d.cts +14 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.js +116 -20
- package/package.json +1 -1
- package/src/index.js +1002 -0
- package/src/index.ts +137 -22
package/src/index.ts
CHANGED
|
@@ -565,23 +565,39 @@ export default class FirecrawlApp {
|
|
|
565
565
|
if ("data" in statusData) {
|
|
566
566
|
let data = statusData.data;
|
|
567
567
|
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
568
|
+
if (data.length === 0) {
|
|
569
|
+
break
|
|
570
|
+
}
|
|
568
571
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
569
572
|
data = data.concat(statusData.data);
|
|
570
573
|
}
|
|
571
574
|
allData = data;
|
|
572
575
|
}
|
|
573
576
|
}
|
|
574
|
-
|
|
577
|
+
|
|
578
|
+
let resp: CrawlStatusResponse | ErrorResponse = {
|
|
575
579
|
success: response.data.success,
|
|
576
580
|
status: response.data.status,
|
|
577
581
|
total: response.data.total,
|
|
578
582
|
completed: response.data.completed,
|
|
579
583
|
creditsUsed: response.data.creditsUsed,
|
|
580
584
|
expiresAt: new Date(response.data.expiresAt),
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
+
data: allData
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
if (!response.data.success && response.data.error) {
|
|
589
|
+
resp = {
|
|
590
|
+
...resp,
|
|
591
|
+
success: false,
|
|
592
|
+
error: response.data.error
|
|
593
|
+
} as ErrorResponse;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
if (response.data.next) {
|
|
597
|
+
(resp as CrawlStatusResponse).next = response.data.next;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
return resp;
|
|
585
601
|
} else {
|
|
586
602
|
this.handleError(response, "check crawl status");
|
|
587
603
|
}
|
|
@@ -799,23 +815,39 @@ export default class FirecrawlApp {
|
|
|
799
815
|
if ("data" in statusData) {
|
|
800
816
|
let data = statusData.data;
|
|
801
817
|
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
818
|
+
if (data.length === 0) {
|
|
819
|
+
break
|
|
820
|
+
}
|
|
802
821
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
803
822
|
data = data.concat(statusData.data);
|
|
804
823
|
}
|
|
805
824
|
allData = data;
|
|
806
825
|
}
|
|
807
826
|
}
|
|
808
|
-
|
|
827
|
+
|
|
828
|
+
let resp: BatchScrapeStatusResponse | ErrorResponse = {
|
|
809
829
|
success: response.data.success,
|
|
810
830
|
status: response.data.status,
|
|
811
831
|
total: response.data.total,
|
|
812
832
|
completed: response.data.completed,
|
|
813
833
|
creditsUsed: response.data.creditsUsed,
|
|
814
834
|
expiresAt: new Date(response.data.expiresAt),
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
835
|
+
data: allData
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
if (!response.data.success && response.data.error) {
|
|
839
|
+
resp = {
|
|
840
|
+
...resp,
|
|
841
|
+
success: false,
|
|
842
|
+
error: response.data.error
|
|
843
|
+
} as ErrorResponse;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
if (response.data.next) {
|
|
847
|
+
(resp as BatchScrapeStatusResponse).next = response.data.next;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
return resp;
|
|
819
851
|
} else {
|
|
820
852
|
this.handleError(response, "check batch scrape status");
|
|
821
853
|
}
|
|
@@ -852,21 +884,35 @@ export default class FirecrawlApp {
|
|
|
852
884
|
try {
|
|
853
885
|
const response: AxiosResponse = await this.postRequest(
|
|
854
886
|
this.apiUrl + `/v1/extract`,
|
|
855
|
-
{ ...jsonData, schema: jsonSchema },
|
|
887
|
+
{ ...jsonData, schema: jsonSchema, origin: "api-sdk" },
|
|
856
888
|
headers
|
|
857
889
|
);
|
|
890
|
+
|
|
858
891
|
if (response.status === 200) {
|
|
859
|
-
const
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
892
|
+
const jobId = response.data.id;
|
|
893
|
+
let extractStatus;
|
|
894
|
+
do {
|
|
895
|
+
const statusResponse: AxiosResponse = await this.getRequest(
|
|
896
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
897
|
+
headers
|
|
898
|
+
);
|
|
899
|
+
extractStatus = statusResponse.data;
|
|
900
|
+
if (extractStatus.status === "completed") {
|
|
901
|
+
if (extractStatus.success) {
|
|
902
|
+
return {
|
|
903
|
+
success: true,
|
|
904
|
+
data: extractStatus.data,
|
|
905
|
+
warning: extractStatus.warning,
|
|
906
|
+
error: extractStatus.error
|
|
907
|
+
};
|
|
908
|
+
} else {
|
|
909
|
+
throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
|
|
910
|
+
}
|
|
911
|
+
} else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
|
|
912
|
+
throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
|
|
913
|
+
}
|
|
914
|
+
await new Promise(resolve => setTimeout(resolve, 1000)); // Polling interval
|
|
915
|
+
} while (extractStatus.status !== "completed");
|
|
870
916
|
} else {
|
|
871
917
|
this.handleError(response, "extract");
|
|
872
918
|
}
|
|
@@ -876,6 +922,72 @@ export default class FirecrawlApp {
|
|
|
876
922
|
return { success: false, error: "Internal server error." };
|
|
877
923
|
}
|
|
878
924
|
|
|
925
|
+
/**
|
|
926
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
927
|
+
* @param url - The URL to extract data from.
|
|
928
|
+
* @param params - Additional parameters for the extract request.
|
|
929
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
930
|
+
* @returns The response from the extract operation.
|
|
931
|
+
*/
|
|
932
|
+
async asyncExtract(
|
|
933
|
+
url: string,
|
|
934
|
+
params?: ExtractParams,
|
|
935
|
+
idempotencyKey?: string
|
|
936
|
+
): Promise<ExtractResponse | ErrorResponse> {
|
|
937
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
938
|
+
let jsonData: any = { url, ...params };
|
|
939
|
+
let jsonSchema: any;
|
|
940
|
+
|
|
941
|
+
try {
|
|
942
|
+
if (params?.schema instanceof zt.ZodType) {
|
|
943
|
+
jsonSchema = zodToJsonSchema(params.schema);
|
|
944
|
+
} else {
|
|
945
|
+
jsonSchema = params?.schema;
|
|
946
|
+
}
|
|
947
|
+
} catch (error: any) {
|
|
948
|
+
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
try {
|
|
952
|
+
const response: AxiosResponse = await this.postRequest(
|
|
953
|
+
this.apiUrl + `/v1/extract`,
|
|
954
|
+
{ ...jsonData, schema: jsonSchema },
|
|
955
|
+
headers
|
|
956
|
+
);
|
|
957
|
+
|
|
958
|
+
if (response.status === 200) {
|
|
959
|
+
return response.data;
|
|
960
|
+
} else {
|
|
961
|
+
this.handleError(response, "start extract job");
|
|
962
|
+
}
|
|
963
|
+
} catch (error: any) {
|
|
964
|
+
throw new FirecrawlError(error.message, 500);
|
|
965
|
+
}
|
|
966
|
+
return { success: false, error: "Internal server error." };
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
/**
|
|
970
|
+
* Retrieves the status of an extract job.
|
|
971
|
+
* @param jobId - The ID of the extract job.
|
|
972
|
+
* @returns The status of the extract job.
|
|
973
|
+
*/
|
|
974
|
+
async getExtractStatus(jobId: string): Promise<any> {
|
|
975
|
+
try {
|
|
976
|
+
const response: AxiosResponse = await this.getRequest(
|
|
977
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
978
|
+
this.prepareHeaders()
|
|
979
|
+
);
|
|
980
|
+
|
|
981
|
+
if (response.status === 200) {
|
|
982
|
+
return response.data;
|
|
983
|
+
} else {
|
|
984
|
+
this.handleError(response, "get extract status");
|
|
985
|
+
}
|
|
986
|
+
} catch (error: any) {
|
|
987
|
+
throw new FirecrawlError(error.message, 500);
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
|
|
879
991
|
/**
|
|
880
992
|
* Prepares the headers for an API request.
|
|
881
993
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
@@ -971,6 +1083,9 @@ export default class FirecrawlApp {
|
|
|
971
1083
|
if ("data" in statusData) {
|
|
972
1084
|
let data = statusData.data;
|
|
973
1085
|
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
1086
|
+
if (data.length === 0) {
|
|
1087
|
+
break
|
|
1088
|
+
}
|
|
974
1089
|
statusResponse = await this.getRequest(statusData.next, headers);
|
|
975
1090
|
statusData = statusResponse.data;
|
|
976
1091
|
data = data.concat(statusData.data);
|