@mendable/firecrawl 1.29.1 → 1.29.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +62 -18
- package/dist/index.d.cts +6 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +43 -6
- package/dist/{package-TKKTR5R7.js → package-Z6F7JDXI.js} +20 -11
- package/package.json +17 -10
- package/src/__tests__/v1/e2e_withAuth/index.test.ts +1 -0
- package/src/__tests__/v1/unit/monitor-job-status-retry.test.ts +154 -0
- package/src/index.ts +83 -20
package/dist/index.cjs
CHANGED
|
@@ -35,7 +35,7 @@ var require_package = __commonJS({
|
|
|
35
35
|
"package.json"(exports2, module2) {
|
|
36
36
|
module2.exports = {
|
|
37
37
|
name: "@mendable/firecrawl-js",
|
|
38
|
-
version: "1.29.
|
|
38
|
+
version: "1.29.3",
|
|
39
39
|
description: "JavaScript SDK for Firecrawl API",
|
|
40
40
|
main: "dist/index.js",
|
|
41
41
|
types: "dist/index.d.ts",
|
|
@@ -51,7 +51,8 @@ var require_package = __commonJS({
|
|
|
51
51
|
build: "tsup",
|
|
52
52
|
"build-and-publish": "npm run build && npm publish --access public",
|
|
53
53
|
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
54
|
-
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
54
|
+
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts",
|
|
55
|
+
"test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/unit/*.test.ts"
|
|
55
56
|
},
|
|
56
57
|
repository: {
|
|
57
58
|
type: "git",
|
|
@@ -60,27 +61,26 @@ var require_package = __commonJS({
|
|
|
60
61
|
author: "Mendable.ai",
|
|
61
62
|
license: "MIT",
|
|
62
63
|
dependencies: {
|
|
64
|
+
axios: "^1.11.0",
|
|
63
65
|
"typescript-event-target": "^1.1.1",
|
|
64
66
|
zod: "^3.23.8",
|
|
65
|
-
"zod-to-json-schema": "^3.23.0"
|
|
66
|
-
axios: "^1.6.8"
|
|
67
|
+
"zod-to-json-schema": "^3.23.0"
|
|
67
68
|
},
|
|
68
69
|
bugs: {
|
|
69
70
|
url: "https://github.com/mendableai/firecrawl/issues"
|
|
70
71
|
},
|
|
71
72
|
homepage: "https://github.com/mendableai/firecrawl#readme",
|
|
72
73
|
devDependencies: {
|
|
73
|
-
"@jest/globals": "^
|
|
74
|
-
"@types/axios": "^0.14.0",
|
|
74
|
+
"@jest/globals": "^30.0.5",
|
|
75
75
|
"@types/dotenv": "^8.2.0",
|
|
76
|
-
"@types/jest": "^
|
|
76
|
+
"@types/jest": "^30.0.0",
|
|
77
77
|
"@types/mocha": "^10.0.6",
|
|
78
78
|
"@types/node": "^20.12.12",
|
|
79
79
|
"@types/uuid": "^9.0.8",
|
|
80
80
|
dotenv: "^16.4.5",
|
|
81
|
-
jest: "^
|
|
82
|
-
"ts-jest": "^29.
|
|
83
|
-
tsup: "^8.
|
|
81
|
+
jest: "^30.0.5",
|
|
82
|
+
"ts-jest": "^29.4.0",
|
|
83
|
+
tsup: "^8.5.0",
|
|
84
84
|
typescript: "^5.4.5",
|
|
85
85
|
uuid: "^9.0.1"
|
|
86
86
|
},
|
|
@@ -95,19 +95,26 @@ var require_package = __commonJS({
|
|
|
95
95
|
],
|
|
96
96
|
engines: {
|
|
97
97
|
node: ">=22.0.0"
|
|
98
|
+
},
|
|
99
|
+
pnpm: {
|
|
100
|
+
overrides: {
|
|
101
|
+
"@babel/helpers@<7.26.10": ">=7.26.10",
|
|
102
|
+
"brace-expansion@>=1.0.0 <=1.1.11": ">=1.1.12",
|
|
103
|
+
"brace-expansion@>=2.0.0 <=2.0.1": ">=2.0.2"
|
|
104
|
+
}
|
|
98
105
|
}
|
|
99
106
|
};
|
|
100
107
|
}
|
|
101
108
|
});
|
|
102
109
|
|
|
103
110
|
// src/index.ts
|
|
104
|
-
var
|
|
105
|
-
__export(
|
|
111
|
+
var index_exports = {};
|
|
112
|
+
__export(index_exports, {
|
|
106
113
|
CrawlWatcher: () => CrawlWatcher,
|
|
107
114
|
FirecrawlError: () => FirecrawlError,
|
|
108
115
|
default: () => FirecrawlApp
|
|
109
116
|
});
|
|
110
|
-
module.exports = __toCommonJS(
|
|
117
|
+
module.exports = __toCommonJS(index_exports);
|
|
111
118
|
var import_axios = __toESM(require("axios"), 1);
|
|
112
119
|
var zt = require("zod");
|
|
113
120
|
var import_zod_to_json_schema = require("zod-to-json-schema");
|
|
@@ -888,15 +895,18 @@ var FirecrawlApp = class {
|
|
|
888
895
|
* @returns The final job status or data.
|
|
889
896
|
*/
|
|
890
897
|
async monitorJobStatus(id, headers, checkInterval) {
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
898
|
+
let failedTries = 0;
|
|
899
|
+
let networkRetries = 0;
|
|
900
|
+
const maxNetworkRetries = 3;
|
|
901
|
+
while (true) {
|
|
902
|
+
try {
|
|
894
903
|
let statusResponse = await this.getRequest(
|
|
895
904
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
896
905
|
headers
|
|
897
906
|
);
|
|
898
907
|
if (statusResponse.status === 200) {
|
|
899
908
|
failedTries = 0;
|
|
909
|
+
networkRetries = 0;
|
|
900
910
|
let statusData = statusResponse.data;
|
|
901
911
|
if (statusData.status === "completed") {
|
|
902
912
|
if ("data" in statusData) {
|
|
@@ -931,10 +941,44 @@ var FirecrawlApp = class {
|
|
|
931
941
|
this.handleError(statusResponse, "check crawl status");
|
|
932
942
|
}
|
|
933
943
|
}
|
|
944
|
+
} catch (error) {
|
|
945
|
+
if (this.isRetryableError(error) && networkRetries < maxNetworkRetries) {
|
|
946
|
+
networkRetries++;
|
|
947
|
+
const backoffDelay = Math.min(1e3 * Math.pow(2, networkRetries - 1), 1e4);
|
|
948
|
+
await new Promise((resolve) => setTimeout(resolve, backoffDelay));
|
|
949
|
+
continue;
|
|
950
|
+
}
|
|
951
|
+
throw new FirecrawlError(error, 500);
|
|
952
|
+
}
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
/**
|
|
956
|
+
* Determines if an error is retryable (transient network error)
|
|
957
|
+
* @param error - The error to check
|
|
958
|
+
* @returns True if the error should be retried
|
|
959
|
+
*/
|
|
960
|
+
isRetryableError(error) {
|
|
961
|
+
if (error instanceof import_axios.AxiosError) {
|
|
962
|
+
if (!error.response) {
|
|
963
|
+
const code = error.code;
|
|
964
|
+
const message = error.message?.toLowerCase() || "";
|
|
965
|
+
return code === "ECONNRESET" || code === "ETIMEDOUT" || code === "ENOTFOUND" || code === "ECONNREFUSED" || message.includes("socket hang up") || message.includes("network error") || message.includes("timeout");
|
|
966
|
+
}
|
|
967
|
+
if (error.response?.status === 408 || error.response?.status === 504) {
|
|
968
|
+
return true;
|
|
969
|
+
}
|
|
970
|
+
}
|
|
971
|
+
if (error && typeof error === "object") {
|
|
972
|
+
const code = error.code;
|
|
973
|
+
const message = error.message?.toLowerCase() || "";
|
|
974
|
+
if (code === "ECONNRESET" || code === "ETIMEDOUT" || code === "ENOTFOUND" || code === "ECONNREFUSED" || message.includes("socket hang up") || message.includes("network error") || message.includes("timeout")) {
|
|
975
|
+
return true;
|
|
976
|
+
}
|
|
977
|
+
if (error.response?.status === 408 || error.response?.status === 504) {
|
|
978
|
+
return true;
|
|
934
979
|
}
|
|
935
|
-
} catch (error) {
|
|
936
|
-
throw new FirecrawlError(error, 500);
|
|
937
980
|
}
|
|
981
|
+
return false;
|
|
938
982
|
}
|
|
939
983
|
/**
|
|
940
984
|
* Handles errors from API responses.
|
package/dist/index.d.cts
CHANGED
|
@@ -698,6 +698,12 @@ declare class FirecrawlApp {
|
|
|
698
698
|
* @returns The final job status or data.
|
|
699
699
|
*/
|
|
700
700
|
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
701
|
+
/**
|
|
702
|
+
* Determines if an error is retryable (transient network error)
|
|
703
|
+
* @param error - The error to check
|
|
704
|
+
* @returns True if the error should be retried
|
|
705
|
+
*/
|
|
706
|
+
private isRetryableError;
|
|
701
707
|
/**
|
|
702
708
|
* Handles errors from API responses.
|
|
703
709
|
* @param {AxiosResponse} response - The response from the API.
|
package/dist/index.d.ts
CHANGED
|
@@ -698,6 +698,12 @@ declare class FirecrawlApp {
|
|
|
698
698
|
* @returns The final job status or data.
|
|
699
699
|
*/
|
|
700
700
|
monitorJobStatus(id: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<CrawlStatusResponse | ErrorResponse>;
|
|
701
|
+
/**
|
|
702
|
+
* Determines if an error is retryable (transient network error)
|
|
703
|
+
* @param error - The error to check
|
|
704
|
+
* @returns True if the error should be retried
|
|
705
|
+
*/
|
|
706
|
+
private isRetryableError;
|
|
701
707
|
/**
|
|
702
708
|
* Handles errors from API responses.
|
|
703
709
|
* @param {AxiosResponse} response - The response from the API.
|
package/dist/index.js
CHANGED
|
@@ -29,7 +29,7 @@ var FirecrawlApp = class {
|
|
|
29
29
|
}
|
|
30
30
|
async getVersion() {
|
|
31
31
|
try {
|
|
32
|
-
const packageJson = await import("./package-
|
|
32
|
+
const packageJson = await import("./package-Z6F7JDXI.js");
|
|
33
33
|
return packageJson.default.version;
|
|
34
34
|
} catch (error) {
|
|
35
35
|
console.error("Error getting version:", error);
|
|
@@ -779,15 +779,18 @@ var FirecrawlApp = class {
|
|
|
779
779
|
* @returns The final job status or data.
|
|
780
780
|
*/
|
|
781
781
|
async monitorJobStatus(id, headers, checkInterval) {
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
782
|
+
let failedTries = 0;
|
|
783
|
+
let networkRetries = 0;
|
|
784
|
+
const maxNetworkRetries = 3;
|
|
785
|
+
while (true) {
|
|
786
|
+
try {
|
|
785
787
|
let statusResponse = await this.getRequest(
|
|
786
788
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
787
789
|
headers
|
|
788
790
|
);
|
|
789
791
|
if (statusResponse.status === 200) {
|
|
790
792
|
failedTries = 0;
|
|
793
|
+
networkRetries = 0;
|
|
791
794
|
let statusData = statusResponse.data;
|
|
792
795
|
if (statusData.status === "completed") {
|
|
793
796
|
if ("data" in statusData) {
|
|
@@ -822,10 +825,44 @@ var FirecrawlApp = class {
|
|
|
822
825
|
this.handleError(statusResponse, "check crawl status");
|
|
823
826
|
}
|
|
824
827
|
}
|
|
828
|
+
} catch (error) {
|
|
829
|
+
if (this.isRetryableError(error) && networkRetries < maxNetworkRetries) {
|
|
830
|
+
networkRetries++;
|
|
831
|
+
const backoffDelay = Math.min(1e3 * Math.pow(2, networkRetries - 1), 1e4);
|
|
832
|
+
await new Promise((resolve) => setTimeout(resolve, backoffDelay));
|
|
833
|
+
continue;
|
|
834
|
+
}
|
|
835
|
+
throw new FirecrawlError(error, 500);
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
/**
|
|
840
|
+
* Determines if an error is retryable (transient network error)
|
|
841
|
+
* @param error - The error to check
|
|
842
|
+
* @returns True if the error should be retried
|
|
843
|
+
*/
|
|
844
|
+
isRetryableError(error) {
|
|
845
|
+
if (error instanceof AxiosError) {
|
|
846
|
+
if (!error.response) {
|
|
847
|
+
const code = error.code;
|
|
848
|
+
const message = error.message?.toLowerCase() || "";
|
|
849
|
+
return code === "ECONNRESET" || code === "ETIMEDOUT" || code === "ENOTFOUND" || code === "ECONNREFUSED" || message.includes("socket hang up") || message.includes("network error") || message.includes("timeout");
|
|
850
|
+
}
|
|
851
|
+
if (error.response?.status === 408 || error.response?.status === 504) {
|
|
852
|
+
return true;
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
if (error && typeof error === "object") {
|
|
856
|
+
const code = error.code;
|
|
857
|
+
const message = error.message?.toLowerCase() || "";
|
|
858
|
+
if (code === "ECONNRESET" || code === "ETIMEDOUT" || code === "ENOTFOUND" || code === "ECONNREFUSED" || message.includes("socket hang up") || message.includes("network error") || message.includes("timeout")) {
|
|
859
|
+
return true;
|
|
860
|
+
}
|
|
861
|
+
if (error.response?.status === 408 || error.response?.status === 504) {
|
|
862
|
+
return true;
|
|
825
863
|
}
|
|
826
|
-
} catch (error) {
|
|
827
|
-
throw new FirecrawlError(error, 500);
|
|
828
864
|
}
|
|
865
|
+
return false;
|
|
829
866
|
}
|
|
830
867
|
/**
|
|
831
868
|
* Handles errors from API responses.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// package.json
|
|
2
2
|
var name = "@mendable/firecrawl-js";
|
|
3
|
-
var version = "1.29.
|
|
3
|
+
var version = "1.29.3";
|
|
4
4
|
var description = "JavaScript SDK for Firecrawl API";
|
|
5
5
|
var main = "dist/index.js";
|
|
6
6
|
var types = "dist/index.d.ts";
|
|
@@ -16,7 +16,8 @@ var scripts = {
|
|
|
16
16
|
build: "tsup",
|
|
17
17
|
"build-and-publish": "npm run build && npm publish --access public",
|
|
18
18
|
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
19
|
-
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
19
|
+
test: "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts",
|
|
20
|
+
"test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/unit/*.test.ts"
|
|
20
21
|
};
|
|
21
22
|
var repository = {
|
|
22
23
|
type: "git",
|
|
@@ -25,27 +26,26 @@ var repository = {
|
|
|
25
26
|
var author = "Mendable.ai";
|
|
26
27
|
var license = "MIT";
|
|
27
28
|
var dependencies = {
|
|
29
|
+
axios: "^1.11.0",
|
|
28
30
|
"typescript-event-target": "^1.1.1",
|
|
29
31
|
zod: "^3.23.8",
|
|
30
|
-
"zod-to-json-schema": "^3.23.0"
|
|
31
|
-
axios: "^1.6.8"
|
|
32
|
+
"zod-to-json-schema": "^3.23.0"
|
|
32
33
|
};
|
|
33
34
|
var bugs = {
|
|
34
35
|
url: "https://github.com/mendableai/firecrawl/issues"
|
|
35
36
|
};
|
|
36
37
|
var homepage = "https://github.com/mendableai/firecrawl#readme";
|
|
37
38
|
var devDependencies = {
|
|
38
|
-
"@jest/globals": "^
|
|
39
|
-
"@types/axios": "^0.14.0",
|
|
39
|
+
"@jest/globals": "^30.0.5",
|
|
40
40
|
"@types/dotenv": "^8.2.0",
|
|
41
|
-
"@types/jest": "^
|
|
41
|
+
"@types/jest": "^30.0.0",
|
|
42
42
|
"@types/mocha": "^10.0.6",
|
|
43
43
|
"@types/node": "^20.12.12",
|
|
44
44
|
"@types/uuid": "^9.0.8",
|
|
45
45
|
dotenv: "^16.4.5",
|
|
46
|
-
jest: "^
|
|
47
|
-
"ts-jest": "^29.
|
|
48
|
-
tsup: "^8.
|
|
46
|
+
jest: "^30.0.5",
|
|
47
|
+
"ts-jest": "^29.4.0",
|
|
48
|
+
tsup: "^8.5.0",
|
|
49
49
|
typescript: "^5.4.5",
|
|
50
50
|
uuid: "^9.0.1"
|
|
51
51
|
};
|
|
@@ -61,6 +61,13 @@ var keywords = [
|
|
|
61
61
|
var engines = {
|
|
62
62
|
node: ">=22.0.0"
|
|
63
63
|
};
|
|
64
|
+
var pnpm = {
|
|
65
|
+
overrides: {
|
|
66
|
+
"@babel/helpers@<7.26.10": ">=7.26.10",
|
|
67
|
+
"brace-expansion@>=1.0.0 <=1.1.11": ">=1.1.12",
|
|
68
|
+
"brace-expansion@>=2.0.0 <=2.0.1": ">=2.0.2"
|
|
69
|
+
}
|
|
70
|
+
};
|
|
64
71
|
var package_default = {
|
|
65
72
|
name,
|
|
66
73
|
version,
|
|
@@ -78,7 +85,8 @@ var package_default = {
|
|
|
78
85
|
homepage,
|
|
79
86
|
devDependencies,
|
|
80
87
|
keywords,
|
|
81
|
-
engines
|
|
88
|
+
engines,
|
|
89
|
+
pnpm
|
|
82
90
|
};
|
|
83
91
|
export {
|
|
84
92
|
author,
|
|
@@ -94,6 +102,7 @@ export {
|
|
|
94
102
|
license,
|
|
95
103
|
main,
|
|
96
104
|
name,
|
|
105
|
+
pnpm,
|
|
97
106
|
repository,
|
|
98
107
|
scripts,
|
|
99
108
|
type,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl",
|
|
3
|
-
"version": "1.29.
|
|
3
|
+
"version": "1.29.3",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -16,7 +16,8 @@
|
|
|
16
16
|
"build": "tsup",
|
|
17
17
|
"build-and-publish": "npm run build && npm publish --access public",
|
|
18
18
|
"publish-beta": "npm run build && npm publish --access public --tag beta",
|
|
19
|
-
"test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts"
|
|
19
|
+
"test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/**/*.test.ts",
|
|
20
|
+
"test:unit": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/v1/unit/*.test.ts"
|
|
20
21
|
},
|
|
21
22
|
"repository": {
|
|
22
23
|
"type": "git",
|
|
@@ -25,27 +26,26 @@
|
|
|
25
26
|
"author": "Mendable.ai",
|
|
26
27
|
"license": "MIT",
|
|
27
28
|
"dependencies": {
|
|
29
|
+
"axios": "^1.11.0",
|
|
28
30
|
"typescript-event-target": "^1.1.1",
|
|
29
31
|
"zod": "^3.23.8",
|
|
30
|
-
"zod-to-json-schema": "^3.23.0"
|
|
31
|
-
"axios": "^1.6.8"
|
|
32
|
+
"zod-to-json-schema": "^3.23.0"
|
|
32
33
|
},
|
|
33
34
|
"bugs": {
|
|
34
35
|
"url": "https://github.com/mendableai/firecrawl/issues"
|
|
35
36
|
},
|
|
36
37
|
"homepage": "https://github.com/mendableai/firecrawl#readme",
|
|
37
38
|
"devDependencies": {
|
|
38
|
-
"@jest/globals": "^
|
|
39
|
-
"@types/axios": "^0.14.0",
|
|
39
|
+
"@jest/globals": "^30.0.5",
|
|
40
40
|
"@types/dotenv": "^8.2.0",
|
|
41
|
-
"@types/jest": "^
|
|
41
|
+
"@types/jest": "^30.0.0",
|
|
42
42
|
"@types/mocha": "^10.0.6",
|
|
43
43
|
"@types/node": "^20.12.12",
|
|
44
44
|
"@types/uuid": "^9.0.8",
|
|
45
45
|
"dotenv": "^16.4.5",
|
|
46
|
-
"jest": "^
|
|
47
|
-
"ts-jest": "^29.
|
|
48
|
-
"tsup": "^8.
|
|
46
|
+
"jest": "^30.0.5",
|
|
47
|
+
"ts-jest": "^29.4.0",
|
|
48
|
+
"tsup": "^8.5.0",
|
|
49
49
|
"typescript": "^5.4.5",
|
|
50
50
|
"uuid": "^9.0.1"
|
|
51
51
|
},
|
|
@@ -60,5 +60,12 @@
|
|
|
60
60
|
],
|
|
61
61
|
"engines": {
|
|
62
62
|
"node": ">=22.0.0"
|
|
63
|
+
},
|
|
64
|
+
"pnpm": {
|
|
65
|
+
"overrides": {
|
|
66
|
+
"@babel/helpers@<7.26.10": ">=7.26.10",
|
|
67
|
+
"brace-expansion@>=1.0.0 <=1.1.11": ">=1.1.12",
|
|
68
|
+
"brace-expansion@>=2.0.0 <=2.0.1": ">=2.0.2"
|
|
69
|
+
}
|
|
63
70
|
}
|
|
64
71
|
}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import FirecrawlApp from '../../../index';
|
|
2
|
+
import { describe, test, expect, jest, beforeEach, afterEach } from '@jest/globals';
|
|
3
|
+
|
|
4
|
+
describe('monitorJobStatus retry logic', () => {
|
|
5
|
+
let app: FirecrawlApp;
|
|
6
|
+
let originalConsoleWarn: typeof console.warn;
|
|
7
|
+
|
|
8
|
+
beforeEach(() => {
|
|
9
|
+
app = new FirecrawlApp({ apiKey: 'test-key', apiUrl: 'https://test.com' });
|
|
10
|
+
originalConsoleWarn = console.warn;
|
|
11
|
+
console.warn = jest.fn();
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
afterEach(() => {
|
|
15
|
+
console.warn = originalConsoleWarn;
|
|
16
|
+
jest.clearAllMocks();
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
test('should retry on socket hang up error', async () => {
|
|
20
|
+
const socketHangUpError = new Error('socket hang up') as any;
|
|
21
|
+
socketHangUpError.code = 'ECONNRESET';
|
|
22
|
+
|
|
23
|
+
const successResponse = {
|
|
24
|
+
status: 200,
|
|
25
|
+
data: { status: 'completed', data: [{ url: 'test.com', markdown: 'test' }] }
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const originalGetRequest = app.getRequest;
|
|
29
|
+
let callCount = 0;
|
|
30
|
+
|
|
31
|
+
app.getRequest = async function(url: string, headers: any) {
|
|
32
|
+
callCount++;
|
|
33
|
+
if (callCount === 1) {
|
|
34
|
+
throw socketHangUpError;
|
|
35
|
+
}
|
|
36
|
+
return successResponse;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
const result = await app.monitorJobStatus('test-id', {}, 1);
|
|
40
|
+
|
|
41
|
+
expect(callCount).toBe(2);
|
|
42
|
+
expect(result).toEqual(successResponse.data);
|
|
43
|
+
expect(console.warn).toHaveBeenCalledWith(
|
|
44
|
+
expect.stringContaining('Network error during job status check (attempt 1/3): socket hang up')
|
|
45
|
+
);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
test('should retry on ETIMEDOUT error', async () => {
|
|
49
|
+
const timeoutError = new Error('timeout') as any;
|
|
50
|
+
timeoutError.code = 'ETIMEDOUT';
|
|
51
|
+
|
|
52
|
+
const successResponse = {
|
|
53
|
+
status: 200,
|
|
54
|
+
data: { status: 'completed', data: [{ url: 'test.com', markdown: 'test' }] }
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
const originalGetRequest = app.getRequest;
|
|
58
|
+
let callCount = 0;
|
|
59
|
+
|
|
60
|
+
app.getRequest = async function(url: string, headers: any) {
|
|
61
|
+
callCount++;
|
|
62
|
+
if (callCount === 1) {
|
|
63
|
+
throw timeoutError;
|
|
64
|
+
}
|
|
65
|
+
return successResponse;
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
const result = await app.monitorJobStatus('test-id', {}, 1);
|
|
69
|
+
|
|
70
|
+
expect(callCount).toBe(2);
|
|
71
|
+
expect(result).toEqual(successResponse.data);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test('should fail after max retries exceeded', async () => {
|
|
75
|
+
const socketHangUpError = new Error('socket hang up') as any;
|
|
76
|
+
socketHangUpError.code = 'ECONNRESET';
|
|
77
|
+
|
|
78
|
+
app.getRequest = async function(url: string, headers: any) {
|
|
79
|
+
throw socketHangUpError;
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
await expect(app.monitorJobStatus('test-id', {}, 1)).rejects.toThrow('socket hang up');
|
|
83
|
+
|
|
84
|
+
expect(console.warn).toHaveBeenCalledTimes(3);
|
|
85
|
+
}, 15000);
|
|
86
|
+
|
|
87
|
+
test('should not retry on non-retryable errors', async () => {
|
|
88
|
+
const authError = new Error('Unauthorized') as any;
|
|
89
|
+
authError.response = { status: 401, data: { error: 'Unauthorized' } };
|
|
90
|
+
|
|
91
|
+
app.getRequest = async function(url: string, headers: any) {
|
|
92
|
+
throw authError;
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
await expect(app.monitorJobStatus('test-id', {}, 1)).rejects.toThrow('Unauthorized');
|
|
96
|
+
|
|
97
|
+
expect(console.warn).not.toHaveBeenCalled();
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
test('should retry on HTTP timeout status codes', async () => {
|
|
101
|
+
const timeoutError = new Error('Request timeout') as any;
|
|
102
|
+
timeoutError.response = { status: 408, data: { error: 'Request timeout' } };
|
|
103
|
+
|
|
104
|
+
const successResponse = {
|
|
105
|
+
status: 200,
|
|
106
|
+
data: { status: 'completed', data: [{ url: 'test.com', markdown: 'test' }] }
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
const originalGetRequest = app.getRequest;
|
|
110
|
+
let callCount = 0;
|
|
111
|
+
|
|
112
|
+
app.getRequest = async function(url: string, headers: any) {
|
|
113
|
+
callCount++;
|
|
114
|
+
if (callCount === 1) {
|
|
115
|
+
throw timeoutError;
|
|
116
|
+
}
|
|
117
|
+
return successResponse;
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
const result = await app.monitorJobStatus('test-id', {}, 1);
|
|
121
|
+
|
|
122
|
+
expect(callCount).toBe(2);
|
|
123
|
+
expect(result).toEqual(successResponse.data);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test('should use exponential backoff for retries', async () => {
|
|
127
|
+
const socketHangUpError = new Error('socket hang up') as any;
|
|
128
|
+
socketHangUpError.code = 'ECONNRESET';
|
|
129
|
+
|
|
130
|
+
const successResponse = {
|
|
131
|
+
status: 200,
|
|
132
|
+
data: { status: 'completed', data: [{ url: 'test.com', markdown: 'test' }] }
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
const originalGetRequest = app.getRequest;
|
|
136
|
+
let callCount = 0;
|
|
137
|
+
|
|
138
|
+
app.getRequest = async function(url: string, headers: any) {
|
|
139
|
+
callCount++;
|
|
140
|
+
if (callCount <= 2) {
|
|
141
|
+
throw socketHangUpError;
|
|
142
|
+
}
|
|
143
|
+
return successResponse;
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
const startTime = Date.now();
|
|
147
|
+
const result = await app.monitorJobStatus('test-id', {}, 1);
|
|
148
|
+
const endTime = Date.now();
|
|
149
|
+
|
|
150
|
+
expect(callCount).toBe(3);
|
|
151
|
+
expect(result).toEqual(successResponse.data);
|
|
152
|
+
expect(endTime - startTime).toBeGreaterThan(3000);
|
|
153
|
+
});
|
|
154
|
+
});
|
package/src/index.ts
CHANGED
|
@@ -1455,33 +1455,39 @@ export default class FirecrawlApp {
|
|
|
1455
1455
|
headers: AxiosRequestHeaders,
|
|
1456
1456
|
checkInterval: number
|
|
1457
1457
|
): Promise<CrawlStatusResponse | ErrorResponse> {
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1458
|
+
let failedTries = 0;
|
|
1459
|
+
let networkRetries = 0;
|
|
1460
|
+
const maxNetworkRetries = 3;
|
|
1461
|
+
|
|
1462
|
+
while (true) {
|
|
1463
|
+
try {
|
|
1461
1464
|
let statusResponse: AxiosResponse = await this.getRequest(
|
|
1462
1465
|
`${this.apiUrl}/v1/crawl/${id}`,
|
|
1463
1466
|
headers
|
|
1464
1467
|
);
|
|
1468
|
+
|
|
1465
1469
|
if (statusResponse.status === 200) {
|
|
1466
1470
|
failedTries = 0;
|
|
1471
|
+
networkRetries = 0;
|
|
1467
1472
|
let statusData = statusResponse.data;
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
statusResponse = await this.getRequest(statusData.next, headers);
|
|
1476
|
-
statusData = statusResponse.data;
|
|
1477
|
-
data = data.concat(statusData.data);
|
|
1473
|
+
|
|
1474
|
+
if (statusData.status === "completed") {
|
|
1475
|
+
if ("data" in statusData) {
|
|
1476
|
+
let data = statusData.data;
|
|
1477
|
+
while (typeof statusData === 'object' && 'next' in statusData) {
|
|
1478
|
+
if (data.length === 0) {
|
|
1479
|
+
break
|
|
1478
1480
|
}
|
|
1479
|
-
statusData.
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
throw new FirecrawlError("Crawl job completed but no data was returned", 500);
|
|
1481
|
+
statusResponse = await this.getRequest(statusData.next, headers);
|
|
1482
|
+
statusData = statusResponse.data;
|
|
1483
|
+
data = data.concat(statusData.data);
|
|
1483
1484
|
}
|
|
1484
|
-
|
|
1485
|
+
statusData.data = data;
|
|
1486
|
+
return statusData;
|
|
1487
|
+
} else {
|
|
1488
|
+
throw new FirecrawlError("Crawl job completed but no data was returned", 500);
|
|
1489
|
+
}
|
|
1490
|
+
} else if (
|
|
1485
1491
|
["active", "paused", "pending", "queued", "waiting", "scraping"].includes(statusData.status)
|
|
1486
1492
|
) {
|
|
1487
1493
|
checkInterval = Math.max(checkInterval, 2);
|
|
@@ -1500,12 +1506,69 @@ export default class FirecrawlApp {
|
|
|
1500
1506
|
this.handleError(statusResponse, "check crawl status");
|
|
1501
1507
|
}
|
|
1502
1508
|
}
|
|
1509
|
+
} catch (error: any) {
|
|
1510
|
+
if (this.isRetryableError(error) && networkRetries < maxNetworkRetries) {
|
|
1511
|
+
networkRetries++;
|
|
1512
|
+
const backoffDelay = Math.min(1000 * Math.pow(2, networkRetries - 1), 10000);
|
|
1513
|
+
|
|
1514
|
+
await new Promise((resolve) => setTimeout(resolve, backoffDelay));
|
|
1515
|
+
continue;
|
|
1516
|
+
}
|
|
1517
|
+
|
|
1518
|
+
throw new FirecrawlError(error, 500);
|
|
1503
1519
|
}
|
|
1504
|
-
} catch (error: any) {
|
|
1505
|
-
throw new FirecrawlError(error, 500);
|
|
1506
1520
|
}
|
|
1507
1521
|
}
|
|
1508
1522
|
|
|
1523
|
+
/**
|
|
1524
|
+
* Determines if an error is retryable (transient network error)
|
|
1525
|
+
* @param error - The error to check
|
|
1526
|
+
* @returns True if the error should be retried
|
|
1527
|
+
*/
|
|
1528
|
+
private isRetryableError(error: any): boolean {
|
|
1529
|
+
if (error instanceof AxiosError) {
|
|
1530
|
+
if (!error.response) {
|
|
1531
|
+
const code = error.code;
|
|
1532
|
+
const message = error.message?.toLowerCase() || '';
|
|
1533
|
+
|
|
1534
|
+
return (
|
|
1535
|
+
code === 'ECONNRESET' ||
|
|
1536
|
+
code === 'ETIMEDOUT' ||
|
|
1537
|
+
code === 'ENOTFOUND' ||
|
|
1538
|
+
code === 'ECONNREFUSED' ||
|
|
1539
|
+
message.includes('socket hang up') ||
|
|
1540
|
+
message.includes('network error') ||
|
|
1541
|
+
message.includes('timeout')
|
|
1542
|
+
);
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1545
|
+
if (error.response?.status === 408 || error.response?.status === 504) {
|
|
1546
|
+
return true;
|
|
1547
|
+
}
|
|
1548
|
+
}
|
|
1549
|
+
|
|
1550
|
+
if (error && typeof error === 'object') {
|
|
1551
|
+
const code = error.code;
|
|
1552
|
+
const message = error.message?.toLowerCase() || '';
|
|
1553
|
+
|
|
1554
|
+
if (code === 'ECONNRESET' ||
|
|
1555
|
+
code === 'ETIMEDOUT' ||
|
|
1556
|
+
code === 'ENOTFOUND' ||
|
|
1557
|
+
code === 'ECONNREFUSED' ||
|
|
1558
|
+
message.includes('socket hang up') ||
|
|
1559
|
+
message.includes('network error') ||
|
|
1560
|
+
message.includes('timeout')) {
|
|
1561
|
+
return true;
|
|
1562
|
+
}
|
|
1563
|
+
|
|
1564
|
+
if (error.response?.status === 408 || error.response?.status === 504) {
|
|
1565
|
+
return true;
|
|
1566
|
+
}
|
|
1567
|
+
}
|
|
1568
|
+
|
|
1569
|
+
return false;
|
|
1570
|
+
}
|
|
1571
|
+
|
|
1509
1572
|
/**
|
|
1510
1573
|
* Handles errors from API responses.
|
|
1511
1574
|
* @param {AxiosResponse} response - The response from the API.
|