firecrawl 1.11.1 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +125 -22
- package/dist/index.d.cts +15 -1
- package/dist/index.d.ts +15 -1
- package/dist/index.js +125 -22
- package/package.json +1 -1
- package/src/index.js +1002 -0
- package/src/index.ts +147 -26
package/src/index.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import axios, { type AxiosResponse, type AxiosRequestHeaders, AxiosError } from "axios";
|
|
2
|
-
import
|
|
2
|
+
import * as zt from "zod";
|
|
3
3
|
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
4
|
import { WebSocket } from "isows";
|
|
5
5
|
import { TypedEventTarget } from "typescript-event-target";
|
|
@@ -247,7 +247,7 @@ export interface MapResponse {
|
|
|
247
247
|
*/
|
|
248
248
|
export interface ExtractParams<LLMSchema extends zt.ZodSchema = any> {
|
|
249
249
|
prompt?: string;
|
|
250
|
-
schema?: LLMSchema;
|
|
250
|
+
schema?: LLMSchema | object;
|
|
251
251
|
systemPrompt?: string;
|
|
252
252
|
allowExternalLinks?: boolean;
|
|
253
253
|
includeSubdomains?: boolean;
|
|
@@ -565,23 +565,39 @@ export default class FirecrawlApp {
|
|
|
565
565
|
if ("data" in statusData) {
|
|
566
566
|
let data = statusData.data;
|
|
567
567
|
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
568
|
+
if (data.length === 0) {
|
|
569
|
+
break
|
|
570
|
+
}
|
|
568
571
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
569
572
|
data = data.concat(statusData.data);
|
|
570
573
|
}
|
|
571
574
|
allData = data;
|
|
572
575
|
}
|
|
573
576
|
}
|
|
574
|
-
|
|
577
|
+
|
|
578
|
+
let resp: CrawlStatusResponse | ErrorResponse = {
|
|
575
579
|
success: response.data.success,
|
|
576
580
|
status: response.data.status,
|
|
577
581
|
total: response.data.total,
|
|
578
582
|
completed: response.data.completed,
|
|
579
583
|
creditsUsed: response.data.creditsUsed,
|
|
580
584
|
expiresAt: new Date(response.data.expiresAt),
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
+
data: allData
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
if (!response.data.success && response.data.error) {
|
|
589
|
+
resp = {
|
|
590
|
+
...resp,
|
|
591
|
+
success: false,
|
|
592
|
+
error: response.data.error
|
|
593
|
+
} as ErrorResponse;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
if (response.data.next) {
|
|
597
|
+
(resp as CrawlStatusResponse).next = response.data.next;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
return resp;
|
|
585
601
|
} else {
|
|
586
602
|
this.handleError(response, "check crawl status");
|
|
587
603
|
}
|
|
@@ -799,23 +815,39 @@ export default class FirecrawlApp {
|
|
|
799
815
|
if ("data" in statusData) {
|
|
800
816
|
let data = statusData.data;
|
|
801
817
|
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
818
|
+
if (data.length === 0) {
|
|
819
|
+
break
|
|
820
|
+
}
|
|
802
821
|
statusData = (await this.getRequest(statusData.next, headers)).data;
|
|
803
822
|
data = data.concat(statusData.data);
|
|
804
823
|
}
|
|
805
824
|
allData = data;
|
|
806
825
|
}
|
|
807
826
|
}
|
|
808
|
-
|
|
827
|
+
|
|
828
|
+
let resp: BatchScrapeStatusResponse | ErrorResponse = {
|
|
809
829
|
success: response.data.success,
|
|
810
830
|
status: response.data.status,
|
|
811
831
|
total: response.data.total,
|
|
812
832
|
completed: response.data.completed,
|
|
813
833
|
creditsUsed: response.data.creditsUsed,
|
|
814
834
|
expiresAt: new Date(response.data.expiresAt),
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
835
|
+
data: allData
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
if (!response.data.success && response.data.error) {
|
|
839
|
+
resp = {
|
|
840
|
+
...resp,
|
|
841
|
+
success: false,
|
|
842
|
+
error: response.data.error
|
|
843
|
+
} as ErrorResponse;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
if (response.data.next) {
|
|
847
|
+
(resp as BatchScrapeStatusResponse).next = response.data.next;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
return resp;
|
|
819
851
|
} else {
|
|
820
852
|
this.handleError(response, "check batch scrape status");
|
|
821
853
|
}
|
|
@@ -838,29 +870,49 @@ export default class FirecrawlApp {
|
|
|
838
870
|
let jsonData: { urls: string[] } & ExtractParams<T> = { urls, ...params };
|
|
839
871
|
let jsonSchema: any;
|
|
840
872
|
try {
|
|
841
|
-
|
|
873
|
+
if (!params?.schema) {
|
|
874
|
+
jsonSchema = undefined;
|
|
875
|
+
} else if (params.schema instanceof zt.ZodType) {
|
|
876
|
+
jsonSchema = zodToJsonSchema(params.schema);
|
|
877
|
+
} else {
|
|
878
|
+
jsonSchema = params.schema;
|
|
879
|
+
}
|
|
842
880
|
} catch (error: any) {
|
|
843
|
-
throw new FirecrawlError("Invalid schema.
|
|
881
|
+
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
844
882
|
}
|
|
845
883
|
|
|
846
884
|
try {
|
|
847
885
|
const response: AxiosResponse = await this.postRequest(
|
|
848
886
|
this.apiUrl + `/v1/extract`,
|
|
849
|
-
{ ...jsonData, schema: jsonSchema },
|
|
887
|
+
{ ...jsonData, schema: jsonSchema, origin: "api-sdk" },
|
|
850
888
|
headers
|
|
851
889
|
);
|
|
890
|
+
|
|
852
891
|
if (response.status === 200) {
|
|
853
|
-
const
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
892
|
+
const jobId = response.data.id;
|
|
893
|
+
let extractStatus;
|
|
894
|
+
do {
|
|
895
|
+
const statusResponse: AxiosResponse = await this.getRequest(
|
|
896
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
897
|
+
headers
|
|
898
|
+
);
|
|
899
|
+
extractStatus = statusResponse.data;
|
|
900
|
+
if (extractStatus.status === "completed") {
|
|
901
|
+
if (extractStatus.success) {
|
|
902
|
+
return {
|
|
903
|
+
success: true,
|
|
904
|
+
data: extractStatus.data,
|
|
905
|
+
warning: extractStatus.warning,
|
|
906
|
+
error: extractStatus.error
|
|
907
|
+
};
|
|
908
|
+
} else {
|
|
909
|
+
throw new FirecrawlError(`Failed to extract data. Error: ${extractStatus.error}`, statusResponse.status);
|
|
910
|
+
}
|
|
911
|
+
} else if (extractStatus.status === "failed" || extractStatus.status === "cancelled") {
|
|
912
|
+
throw new FirecrawlError(`Extract job ${extractStatus.status}. Error: ${extractStatus.error}`, statusResponse.status);
|
|
913
|
+
}
|
|
914
|
+
await new Promise(resolve => setTimeout(resolve, 1000)); // Polling interval
|
|
915
|
+
} while (extractStatus.status !== "completed");
|
|
864
916
|
} else {
|
|
865
917
|
this.handleError(response, "extract");
|
|
866
918
|
}
|
|
@@ -870,6 +922,72 @@ export default class FirecrawlApp {
|
|
|
870
922
|
return { success: false, error: "Internal server error." };
|
|
871
923
|
}
|
|
872
924
|
|
|
925
|
+
/**
|
|
926
|
+
* Initiates an asynchronous extract job for a URL using the Firecrawl API.
|
|
927
|
+
* @param url - The URL to extract data from.
|
|
928
|
+
* @param params - Additional parameters for the extract request.
|
|
929
|
+
* @param idempotencyKey - Optional idempotency key for the request.
|
|
930
|
+
* @returns The response from the extract operation.
|
|
931
|
+
*/
|
|
932
|
+
async asyncExtract(
|
|
933
|
+
url: string,
|
|
934
|
+
params?: ExtractParams,
|
|
935
|
+
idempotencyKey?: string
|
|
936
|
+
): Promise<ExtractResponse | ErrorResponse> {
|
|
937
|
+
const headers = this.prepareHeaders(idempotencyKey);
|
|
938
|
+
let jsonData: any = { url, ...params };
|
|
939
|
+
let jsonSchema: any;
|
|
940
|
+
|
|
941
|
+
try {
|
|
942
|
+
if (params?.schema instanceof zt.ZodType) {
|
|
943
|
+
jsonSchema = zodToJsonSchema(params.schema);
|
|
944
|
+
} else {
|
|
945
|
+
jsonSchema = params?.schema;
|
|
946
|
+
}
|
|
947
|
+
} catch (error: any) {
|
|
948
|
+
throw new FirecrawlError("Invalid schema. Schema must be either a valid Zod schema or JSON schema object.", 400);
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
try {
|
|
952
|
+
const response: AxiosResponse = await this.postRequest(
|
|
953
|
+
this.apiUrl + `/v1/extract`,
|
|
954
|
+
{ ...jsonData, schema: jsonSchema },
|
|
955
|
+
headers
|
|
956
|
+
);
|
|
957
|
+
|
|
958
|
+
if (response.status === 200) {
|
|
959
|
+
return response.data;
|
|
960
|
+
} else {
|
|
961
|
+
this.handleError(response, "start extract job");
|
|
962
|
+
}
|
|
963
|
+
} catch (error: any) {
|
|
964
|
+
throw new FirecrawlError(error.message, 500);
|
|
965
|
+
}
|
|
966
|
+
return { success: false, error: "Internal server error." };
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
/**
|
|
970
|
+
* Retrieves the status of an extract job.
|
|
971
|
+
* @param jobId - The ID of the extract job.
|
|
972
|
+
* @returns The status of the extract job.
|
|
973
|
+
*/
|
|
974
|
+
async getExtractStatus(jobId: string): Promise<any> {
|
|
975
|
+
try {
|
|
976
|
+
const response: AxiosResponse = await this.getRequest(
|
|
977
|
+
`${this.apiUrl}/v1/extract/${jobId}`,
|
|
978
|
+
this.prepareHeaders()
|
|
979
|
+
);
|
|
980
|
+
|
|
981
|
+
if (response.status === 200) {
|
|
982
|
+
return response.data;
|
|
983
|
+
} else {
|
|
984
|
+
this.handleError(response, "get extract status");
|
|
985
|
+
}
|
|
986
|
+
} catch (error: any) {
|
|
987
|
+
throw new FirecrawlError(error.message, 500);
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
|
|
873
991
|
/**
|
|
874
992
|
* Prepares the headers for an API request.
|
|
875
993
|
* @param idempotencyKey - Optional key to ensure idempotency.
|
|
@@ -965,6 +1083,9 @@ export default class FirecrawlApp {
|
|
|
965
1083
|
if ("data" in statusData) {
|
|
966
1084
|
let data = statusData.data;
|
|
967
1085
|
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
1086
|
+
if (data.length === 0) {
|
|
1087
|
+
break
|
|
1088
|
+
}
|
|
968
1089
|
statusResponse = await this.getRequest(statusData.next, headers);
|
|
969
1090
|
statusData = statusResponse.data;
|
|
970
1091
|
data = data.concat(statusData.data);
|