@socialgouv/fiches-travail-data-types 4.916.0 → 4.917.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -19,8 +19,23 @@ async function scrap(urls) {
|
|
|
19
19
|
const inputs = urls.map(({ id, url }) => limit(() => (0, scrapUrl_1.scrapUrl)(id, url)));
|
|
20
20
|
const results = await Promise.allSettled(inputs);
|
|
21
21
|
const failedPromise = results.filter(({ status }) => status === "rejected");
|
|
22
|
-
|
|
23
|
-
|
|
22
|
+
// Separate 403 errors from other errors
|
|
23
|
+
const forbiddenErrors = failedPromise.filter(({ reason }) => reason.isForbidden);
|
|
24
|
+
const otherErrors = failedPromise.filter(({ reason }) => !reason.isForbidden);
|
|
25
|
+
// Log 403 errors as warnings and save them to a file for GitHub Actions
|
|
26
|
+
if (forbiddenErrors.length > 0) {
|
|
27
|
+
const forbiddenUrls = forbiddenErrors.map(({ reason }) => reason.url);
|
|
28
|
+
console.warn("WARNING: The following pages returned 403 Forbidden and were skipped:", forbiddenUrls);
|
|
29
|
+
// Write forbidden URLs to a file for GitHub Actions to use
|
|
30
|
+
fs_1.default.writeFileSync(path_1.default.join(__dirname, "../../forbidden-urls.json"), JSON.stringify({
|
|
31
|
+
urls: forbiddenUrls,
|
|
32
|
+
timestamp: new Date().toISOString(),
|
|
33
|
+
count: forbiddenUrls.length,
|
|
34
|
+
}, null, 2));
|
|
35
|
+
}
|
|
36
|
+
// Only fail if there are non-403 errors
|
|
37
|
+
if (otherErrors.length > 0) {
|
|
38
|
+
console.error("scrap fail", otherErrors.map(({ reason }) => reason));
|
|
24
39
|
throw new Error("Error - fetching pages fail. Some pages are missing");
|
|
25
40
|
}
|
|
26
41
|
const resolvedPromise = results.flatMap(({ status, value }) => status === "fulfilled" ? [value] : []);
|
|
@@ -39,6 +39,10 @@ async function scrapUrl(id, url) {
|
|
|
39
39
|
}
|
|
40
40
|
else if (error instanceof got_1.default.HTTPError) {
|
|
41
41
|
err = new Error(`HTTP Error: ${error.response.statusCode} - ${url} - ${error.message}`);
|
|
42
|
+
// Add a property to identify 403 errors specifically
|
|
43
|
+
if (error.response.statusCode === 403) {
|
|
44
|
+
err.isForbidden = true;
|
|
45
|
+
}
|
|
42
46
|
}
|
|
43
47
|
else {
|
|
44
48
|
err = new Error(error.message);
|