@tricoteuses/assemblee 1.6.1 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -27
- package/lib/bugs/acteur-00010/plugin.test.js +4 -4
- package/lib/bugs/acteur-00010/plugin.test.mjs +1 -1
- package/lib/bugs/acteur-00010.js +15 -16
- package/lib/bugs/acteur-00010.mjs +1 -1
- package/lib/bugs/agenda-00002/plugin.test.js +4 -4
- package/lib/bugs/agenda-00002/plugin.test.mjs +1 -1
- package/lib/bugs/agenda-00002.js +27 -28
- package/lib/bugs/agenda-00002.mjs +3 -3
- package/lib/bugs/agenda-00008/plugin.test.js +4 -4
- package/lib/bugs/agenda-00008/plugin.test.mjs +1 -1
- package/lib/bugs/agenda-00008.js +15 -16
- package/lib/bugs/agenda-00008.mjs +1 -1
- package/lib/bugs/agenda-00011/plugin.test.js +5 -5
- package/lib/bugs/agenda-00011/plugin.test.mjs +1 -1
- package/lib/bugs/agenda-00011.js +21 -22
- package/lib/bugs/agenda-00011.mjs +3 -3
- package/lib/bugs.js +17 -19
- package/lib/bugs.mjs +3 -3
- package/lib/cleaners/actes_legislatifs.js +5 -5
- package/lib/cleaners/acteurs.js +5 -5
- package/lib/cleaners/acteurs.mjs +1 -1
- package/lib/cleaners/amendements.js +5 -5
- package/lib/cleaners/documents.js +5 -5
- package/lib/cleaners/dossiers_legislatifs.js +5 -5
- package/lib/cleaners/organes.js +2 -2
- package/lib/cleaners/reunions.js +5 -5
- package/lib/cleaners/scrutins.js +23 -10
- package/lib/cleaners/scrutins.mjs +16 -3
- package/lib/cleaners/xml.js +8 -8
- package/lib/cleaners/xml.mjs +1 -1
- package/lib/datasets.d.ts +2 -2
- package/lib/datasets.js +53 -132
- package/lib/datasets.mjs +40 -118
- package/lib/dates.js +1 -2
- package/lib/dossiers_legislatifs.js +8 -8
- package/lib/file_systems.js +15 -14
- package/lib/file_systems.mjs +3 -3
- package/lib/git.d.ts +1 -0
- package/lib/git.js +21 -5
- package/lib/git.mjs +16 -1
- package/lib/inserters.js +3 -3
- package/lib/loaders.d.ts +14 -40
- package/lib/loaders.js +586 -1091
- package/lib/loaders.mjs +115 -357
- package/lib/parsers/documents.js +10 -10
- package/lib/parsers/documents.mjs +1 -1
- package/lib/parsers/recherche_amendements.js +17 -15
- package/lib/parsers/recherche_amendements.mjs +1 -1
- package/lib/raw_types/acteurs_et_organes.js +9 -9
- package/lib/raw_types/acteurs_et_organes.mjs +1 -1
- package/lib/raw_types/agendas.js +9 -9
- package/lib/raw_types/agendas.mjs +1 -1
- package/lib/raw_types/amendements.js +9 -9
- package/lib/raw_types/amendements.mjs +1 -1
- package/lib/raw_types/debats.d.ts +26 -78
- package/lib/raw_types/debats.js +49 -242
- package/lib/raw_types/debats.mjs +41 -234
- package/lib/raw_types/dossiers_legislatifs.js +9 -9
- package/lib/raw_types/dossiers_legislatifs.mjs +1 -1
- package/lib/raw_types/questions.js +9 -9
- package/lib/raw_types/questions.mjs +1 -1
- package/lib/raw_types/scrutins.d.ts +21 -9
- package/lib/raw_types/scrutins.js +46 -20
- package/lib/raw_types/scrutins.mjs +37 -11
- package/lib/scripts/bugs_helper.js +17 -18
- package/lib/scripts/bugs_helper.mjs +3 -3
- package/lib/scripts/clean_reorganized_data.js +183 -561
- package/lib/scripts/clean_reorganized_data.mjs +111 -410
- package/lib/scripts/diff_amendements.js +11 -24
- package/lib/scripts/diff_amendements.mjs +3 -17
- package/lib/scripts/document_dossiers_legislatifs.js +24 -27
- package/lib/scripts/document_dossiers_legislatifs.mjs +3 -3
- package/lib/scripts/get_today_reunions.js +5 -8
- package/lib/scripts/get_today_reunions.mjs +3 -7
- package/lib/scripts/merge_scrutins.js +11 -24
- package/lib/scripts/merge_scrutins.mjs +3 -17
- package/lib/scripts/parse_textes_lois.js +8 -38
- package/lib/scripts/parse_textes_lois.mjs +3 -33
- package/lib/scripts/raw_types_from_amendements.js +14 -27
- package/lib/scripts/raw_types_from_amendements.mjs +3 -17
- package/lib/scripts/reorganize_data.js +204 -456
- package/lib/scripts/reorganize_data.mjs +68 -260
- package/lib/scripts/retrieve_deputes_photos.d.ts +3 -1
- package/lib/scripts/retrieve_deputes_photos.js +146 -172
- package/lib/scripts/retrieve_deputes_photos.mjs +74 -98
- package/lib/scripts/retrieve_documents.js +207 -156
- package/lib/scripts/retrieve_documents.mjs +116 -128
- package/lib/scripts/retrieve_open_data.js +272 -249
- package/lib/scripts/retrieve_open_data.mjs +109 -123
- package/lib/scripts/retrieve_pending_amendments.js +16 -67
- package/lib/scripts/retrieve_pending_amendments.mjs +4 -55
- package/lib/scripts/retrieve_senateurs_photos.js +20 -77
- package/lib/scripts/retrieve_senateurs_photos.mjs +5 -61
- package/lib/scripts/retrieve_textes_lois.js +18 -46
- package/lib/scripts/retrieve_textes_lois.mjs +3 -33
- package/lib/scripts/shared/cli_helpers.d.ts +66 -0
- package/lib/scripts/shared/cli_helpers.js +90 -0
- package/lib/scripts/shared/cli_helpers.mjs +72 -0
- package/lib/scripts/test_iter_load.d.ts +1 -0
- package/lib/scripts/test_iter_load.js +154 -0
- package/lib/scripts/test_iter_load.mjs +71 -0
- package/lib/scripts/test_load.js +4 -15
- package/lib/scripts/test_load.mjs +3 -14
- package/lib/scripts/test_load_big_files.js +4 -15
- package/lib/scripts/test_load_big_files.mjs +3 -14
- package/lib/scripts/validate_json.js +15 -16
- package/lib/scripts/validate_json.mjs +3 -3
- package/lib/types/acteurs_et_organes.js +9 -9
- package/lib/types/acteurs_et_organes.mjs +1 -1
- package/lib/types/agendas.js +9 -9
- package/lib/types/agendas.mjs +1 -1
- package/lib/types/amendements.js +9 -9
- package/lib/types/amendements.mjs +1 -1
- package/lib/types/debats.d.ts +27 -79
- package/lib/types/debats.js +48 -241
- package/lib/types/debats.mjs +40 -233
- package/lib/types/dossiers_legislatifs.js +9 -9
- package/lib/types/dossiers_legislatifs.mjs +1 -1
- package/lib/types/legislatures.d.ts +4 -4
- package/lib/types/legislatures.js +5 -5
- package/lib/types/legislatures.mjs +5 -5
- package/lib/types/questions.js +9 -9
- package/lib/types/questions.mjs +1 -1
- package/lib/types/scrutins.d.ts +13 -4
- package/lib/types/scrutins.js +29 -15
- package/lib/types/scrutins.mjs +20 -6
- package/lib/urls.js +2 -2
- package/lib/urls.mjs +1 -1
- package/package.json +9 -7
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import assert from "assert";
|
|
2
|
-
import { execSync } from "child_process";
|
|
3
2
|
import commandLineArgs from "command-line-args";
|
|
4
3
|
import { createHash } from "crypto";
|
|
5
4
|
import { differenceInDays } from "date-fns";
|
|
@@ -9,78 +8,57 @@ import { walkDocumentAndDivisions } from "../dossiers_legislatifs.mjs";
|
|
|
9
8
|
import * as git from "../git.mjs";
|
|
10
9
|
import { iterLoadAssembleeDocuments, pathFromDocumentUid } from "../loaders.mjs";
|
|
11
10
|
import { DocumentUrlFormat, iterDocumentOrDivisionUrls } from "../urls.mjs";
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
help: "commit documents",
|
|
15
|
-
name: "commit",
|
|
16
|
-
type: Boolean
|
|
17
|
-
}, {
|
|
11
|
+
import { commitOption, dataDirDefaultOption, legislatureOption, pullOption, remoteOption, silentOption, verboseOption } from "./shared/cli_helpers.mjs";
|
|
12
|
+
const optionsDefinitions = [commitOption, legislatureOption, remoteOption, silentOption, verboseOption, dataDirDefaultOption, pullOption, {
|
|
18
13
|
alias: "f",
|
|
19
14
|
help: "retrieve all documents, even already retrieved ones",
|
|
20
15
|
name: "full",
|
|
21
16
|
type: Boolean
|
|
22
|
-
}, {
|
|
23
|
-
alias: "l",
|
|
24
|
-
defaultValue: "16",
|
|
25
|
-
help: 'legislatures to retrieve, "*" for all',
|
|
26
|
-
name: "legislature",
|
|
27
|
-
type: String
|
|
28
17
|
}, {
|
|
29
18
|
alias: "n",
|
|
30
19
|
help: "try to also retrieve documents that were previously not found",
|
|
31
20
|
name: "not-found",
|
|
32
21
|
type: Boolean
|
|
33
|
-
}, {
|
|
34
|
-
alias: "p",
|
|
35
|
-
help: "pull repositories before proceeding",
|
|
36
|
-
name: "pull",
|
|
37
|
-
type: Boolean
|
|
38
|
-
}, {
|
|
39
|
-
alias: "r",
|
|
40
|
-
help: "push commit to given remote",
|
|
41
|
-
multiple: true,
|
|
42
|
-
name: "remote",
|
|
43
|
-
type: String
|
|
44
|
-
}, {
|
|
45
|
-
alias: "s",
|
|
46
|
-
help: "don't log anything",
|
|
47
|
-
name: "silent",
|
|
48
|
-
type: Boolean
|
|
49
22
|
}, {
|
|
50
23
|
alias: "u",
|
|
51
24
|
help: "UID of first Assemblée's document or division to retrieve",
|
|
52
25
|
name: "uid",
|
|
53
26
|
type: String
|
|
54
27
|
}, {
|
|
55
|
-
alias: "
|
|
56
|
-
help: "
|
|
57
|
-
name: "
|
|
58
|
-
type:
|
|
28
|
+
alias: "F",
|
|
29
|
+
help: "format of documents to retrieve",
|
|
30
|
+
name: "format",
|
|
31
|
+
type: String
|
|
59
32
|
}, {
|
|
60
|
-
|
|
61
|
-
help: "
|
|
62
|
-
|
|
33
|
+
alias: "T",
|
|
34
|
+
help: "type of documents to retrieve (for example: PION)",
|
|
35
|
+
multiple: true,
|
|
36
|
+
name: "document-type",
|
|
63
37
|
type: String
|
|
64
38
|
}];
|
|
65
39
|
const options = commandLineArgs(optionsDefinitions);
|
|
66
40
|
const today = new Date();
|
|
41
|
+
async function fetchWithRetry(url, retries = 3, backoff = 300) {
|
|
42
|
+
for (let attempt = 0; attempt < retries; attempt++) {
|
|
43
|
+
try {
|
|
44
|
+
return await fetch(url);
|
|
45
|
+
} catch (error) {
|
|
46
|
+
if (attempt === retries - 1) {
|
|
47
|
+
throw error;
|
|
48
|
+
}
|
|
49
|
+
console.warn(`Fetch attempt ${attempt + 1} for ${url} failed. Retrying in ${backoff}ms...`);
|
|
50
|
+
await new Promise(resolve => setTimeout(resolve, backoff));
|
|
51
|
+
backoff *= 2;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
throw new Error(`Failed to fetch ${url} after ${retries} attempts`);
|
|
55
|
+
}
|
|
67
56
|
async function retrieveDocuments() {
|
|
68
57
|
assert(!options.commit || !options.uid, 'Options "commit" & "uid" are incompatible');
|
|
69
58
|
const dataDir = options.dataDir;
|
|
70
59
|
const documentsDir = path.join(dataDir, "Documents");
|
|
71
60
|
if (options.pull) {
|
|
72
|
-
|
|
73
|
-
cwd: documentsDir,
|
|
74
|
-
env: process.env,
|
|
75
|
-
encoding: "utf-8",
|
|
76
|
-
stdio: ["ignore", "ignore", "pipe"]
|
|
77
|
-
});
|
|
78
|
-
execSync(`git pull --rebase`, {
|
|
79
|
-
cwd: documentsDir,
|
|
80
|
-
env: process.env,
|
|
81
|
-
encoding: "utf-8",
|
|
82
|
-
stdio: ["ignore", "ignore", "pipe"]
|
|
83
|
-
});
|
|
61
|
+
git.resetAndPull(documentsDir);
|
|
84
62
|
}
|
|
85
63
|
fs.ensureDirSync(documentsDir);
|
|
86
64
|
if (options.full && !options.uid) {
|
|
@@ -108,95 +86,105 @@ async function retrieveDocuments() {
|
|
|
108
86
|
continue;
|
|
109
87
|
}
|
|
110
88
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
89
|
+
await processDocumentOrDivision(documentOrDivision, documentsDir, options);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (options.commit) {
|
|
93
|
+
return git.commitAndPush(documentsDir, "Nouvelle moisson", options.remote);
|
|
94
|
+
}
|
|
95
|
+
return 0;
|
|
96
|
+
}
|
|
97
|
+
async function processDocumentOrDivision(documentOrDivision, documentsDir, options) {
|
|
98
|
+
const documentDir = pathFromDocumentUid(documentsDir, documentOrDivision.uid);
|
|
99
|
+
fs.ensureDirSync(documentDir);
|
|
100
|
+
const filenameBySha256 = {};
|
|
101
|
+
const indexPath = path.join(documentDir, "index.json");
|
|
102
|
+
const index = fs.pathExistsSync(indexPath) ? fs.readJsonSync(indexPath) : {};
|
|
103
|
+
for (const {
|
|
104
|
+
format,
|
|
105
|
+
type,
|
|
106
|
+
url
|
|
107
|
+
} of iterDocumentOrDivisionUrls(documentOrDivision)) {
|
|
108
|
+
const filename = `${type}.${format === DocumentUrlFormat.Pdf ? "pdf" : "html"}`;
|
|
109
|
+
|
|
110
|
+
// Filter by format if option is passed
|
|
111
|
+
if (options.format && options.format !== format) {
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Filter by document type if option is passed
|
|
116
|
+
const documentType = documentOrDivision.classification?.type?.code;
|
|
117
|
+
if (options["document-type"] !== undefined && !options["document-type"].includes(documentType)) {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
let formatFilesInfos = index[format] ?? (index[format] = []);
|
|
121
|
+
let fileInfos = formatFilesInfos.find(file => file.url === url) ?? {};
|
|
122
|
+
if (!formatFilesInfos.includes(fileInfos)) {
|
|
123
|
+
formatFilesInfos.push(fileInfos);
|
|
124
|
+
}
|
|
125
|
+
fileInfos.url = url;
|
|
126
|
+
if (fileInfos.status === 200 && !options.full) {
|
|
127
|
+
filenameBySha256[fileInfos.sha256] = filename;
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
if (fileInfos.status === 404 && !options["not-found"] && differenceInDays(today, documentOrDivision.cycleDeVie.chrono.dateCreation ?? documentOrDivision.cycleDeVie.chrono.dateDepot) > 10) {
|
|
131
|
+
continue;
|
|
132
|
+
}
|
|
133
|
+
if (!options.silent) {
|
|
134
|
+
console.log(`Retrieving document or division ${documentOrDivision.uid} at ${url}…`);
|
|
135
|
+
}
|
|
136
|
+
const response = await fetchWithRetry(url);
|
|
137
|
+
const filePath = path.join(documentDir, filename);
|
|
138
|
+
if (response.ok) {
|
|
139
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
140
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
141
|
+
if (format === DocumentUrlFormat.Pdf && !buffer.subarray(0, 4).toString().startsWith("%PDF")) {
|
|
142
|
+
// Instead of a PDF, the received data may be an HTML page with a message like
|
|
143
|
+
// "Document non encore publié".
|
|
139
144
|
if (!options.silent) {
|
|
140
|
-
console.
|
|
145
|
+
console.warn(` PDF "${url}" not found.`);
|
|
141
146
|
}
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
fs.removeSync(filePath);
|
|
154
|
-
delete fileInfos.filename;
|
|
155
|
-
delete fileInfos.sha256;
|
|
156
|
-
fileInfos.status = 404;
|
|
157
|
-
} else {
|
|
158
|
-
const sha256 = createHash("sha256").update(buffer).digest("hex");
|
|
159
|
-
const existingFilename = filenameBySha256[sha256];
|
|
160
|
-
if (existingFilename === undefined) {
|
|
161
|
-
fs.createWriteStream(filePath).write(buffer);
|
|
162
|
-
fileInfos.filename = filename;
|
|
163
|
-
filenameBySha256[sha256] = filename;
|
|
164
|
-
} else {
|
|
165
|
-
fileInfos.filename = existingFilename;
|
|
166
|
-
}
|
|
167
|
-
fileInfos.sha256 = sha256;
|
|
168
|
-
fileInfos.status = response.status;
|
|
169
|
-
}
|
|
147
|
+
fs.removeSync(filePath);
|
|
148
|
+
delete fileInfos.filename;
|
|
149
|
+
delete fileInfos.sha256;
|
|
150
|
+
fileInfos.status = 404;
|
|
151
|
+
} else {
|
|
152
|
+
const sha256 = createHash("sha256").update(buffer).digest("hex");
|
|
153
|
+
const existingFilename = filenameBySha256[sha256];
|
|
154
|
+
if (existingFilename === undefined) {
|
|
155
|
+
fs.createWriteStream(filePath).write(buffer);
|
|
156
|
+
fileInfos.filename = filename;
|
|
157
|
+
filenameBySha256[sha256] = filename;
|
|
170
158
|
} else {
|
|
171
|
-
|
|
172
|
-
if (!options.silent) {
|
|
173
|
-
console.warn(` Page "${url}" not found.`);
|
|
174
|
-
}
|
|
175
|
-
} else {
|
|
176
|
-
console.error(` Error:\n${JSON.stringify({
|
|
177
|
-
code: response.status,
|
|
178
|
-
message: response.statusText
|
|
179
|
-
}, null, 2)}`);
|
|
180
|
-
}
|
|
181
|
-
fs.removeSync(filePath);
|
|
182
|
-
delete fileInfos.filename;
|
|
183
|
-
delete fileInfos.sha256;
|
|
184
|
-
fileInfos.status = response.status;
|
|
159
|
+
fileInfos.filename = existingFilename;
|
|
185
160
|
}
|
|
161
|
+
fileInfos.sha256 = sha256;
|
|
162
|
+
fileInfos.status = response.status;
|
|
186
163
|
}
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
164
|
+
} else {
|
|
165
|
+
if (response.status === 404) {
|
|
166
|
+
if (!options.silent) {
|
|
167
|
+
console.warn(` Page "${url}" not found.`);
|
|
168
|
+
}
|
|
169
|
+
} else {
|
|
170
|
+
console.error(` Error:\n${JSON.stringify({
|
|
171
|
+
code: response.status,
|
|
172
|
+
message: response.statusText
|
|
173
|
+
}, null, 2)}`);
|
|
174
|
+
}
|
|
175
|
+
fs.removeSync(filePath);
|
|
176
|
+
delete fileInfos.filename;
|
|
177
|
+
delete fileInfos.sha256;
|
|
178
|
+
fileInfos.status = response.status;
|
|
191
179
|
}
|
|
192
180
|
}
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
181
|
+
fs.writeJsonSync(indexPath, index, {
|
|
182
|
+
encoding: "utf-8",
|
|
183
|
+
spaces: 2
|
|
184
|
+
});
|
|
197
185
|
}
|
|
198
186
|
retrieveDocuments().then(exitCode => process.exit(exitCode)).catch(error => {
|
|
199
187
|
console.log(error);
|
|
200
188
|
process.exit(1);
|
|
201
189
|
});
|
|
202
|
-
//# sourceMappingURL=data:application/json;charset=utf-8;base64,{"version":3,"names":["assert","execSync","commandLineArgs","createHash","differenceInDays","fs","path","walkDocumentAndDivisions","git","iterLoadAssembleeDocuments","pathFromDocumentUid","DocumentUrlFormat","iterDocumentOrDivisionUrls","optionsDefinitions","alias","help","name","type","Boolean","defaultValue","String","multiple","defaultOption","options","today","Date","retrieveDocuments","commit","uid","dataDir","documentsDir","join","pull","cwd","env","process","encoding","stdio","ensureDirSync","full","filename","readdirSync","removeSync","firstUid","skip","document","legislature","documentOrDivision","substring","documentDir","filenameBySha256","indexPath","index","pathExistsSync","readJsonSync","format","url","formatFilesInfos","undefined","fileInfos","find","push","Pdf","status","sha256","cycleDeVie","chrono","dateCreation","dateDepot","silent","console","log","response","fetch","filePath","ok","arrayBuffer","buffer","Buffer","from","subarray","toString","startsWith","warn","update","digest","existingFilename","createWriteStream","write","error","JSON","stringify","code","message","statusText","writeJsonSync","spaces","commitAndPush","remote","then","exitCode","exit","catch"],"sources":["../../src/scripts/retrieve_documents.ts"],"sourcesContent":["import assert from \"assert\"\nimport { execSync } from \"child_process\"\nimport commandLineArgs from \"command-line-args\"\nimport { createHash } from \"crypto\"\nimport { differenceInDays } from \"date-fns\"\nimport fs from \"fs-extra\"\nimport path from \"path\"\n\nimport {\n  DocumentFileInfos,\n  DocumentFilesIndex,\n  walkDocumentAndDivisions,\n} from \"../dossiers_legislatifs\"\nimport * as git from \"../git\"\nimport { iterLoadAssembleeDocuments, pathFromDocumentUid } from \"../loaders\"\nimport { DocumentUrlFormat, iterDocumentOrDivisionUrls } from \"../urls\"\n\nconst optionsDefinitions = [\n  {\n    alias: \"c\",\n    help: \"commit documents\",\n    name: \"commit\",\n    type: Boolean,\n  },\n  {\n    alias: \"f\",\n    help: \"retrieve all documents, even already retrieved ones\",\n    name: \"full\",\n    type: Boolean,\n  },\n  {\n    alias: \"l\",\n    defaultValue: \"16\",\n    help: 'legislatures to retrieve, \"*\" for all',\n    name: \"legislature\",\n    type: String,\n  },\n  {\n    alias: \"n\",\n    help: \"try to also retrieve documents that were previously not found\",\n    name: \"not-found\",\n    type: Boolean,\n  },\n  {\n    alias: \"p\",\n    help: \"pull repositories before proceeding\",\n    name: \"pull\",\n    type: Boolean,\n  },\n  {\n    alias: \"r\",\n    help: \"push commit to given remote\",\n    multiple: true,\n    name: \"remote\",\n    type: String,\n  },\n  {\n    alias: \"s\",\n    help: \"don't log anything\",\n    name: \"silent\",\n    type: Boolean,\n  },\n  {\n    alias: \"u\",\n    help: \"UID of first Assemblée's document or division to retrieve\",\n    name: \"uid\",\n    type: String,\n  },\n  {\n    alias: \"v\",\n    help: \"verbose logs\",\n    name: \"verbose\",\n    type: Boolean,\n  },\n  {\n    defaultOption: true,\n    help: \"directory containing Assemblée open data files\",\n    name: \"dataDir\",\n    type: String,\n  },\n]\nconst options = commandLineArgs(optionsDefinitions)\n\nconst today = new Date()\n\nasync function retrieveDocuments(): Promise<number> {\n  assert(\n    !options.commit || !options.uid,\n    'Options \"commit\" & \"uid\" are incompatible',\n  )\n\n  const dataDir = options.dataDir\n  const documentsDir = path.join(dataDir, \"Documents\")\n  if (options.pull) {\n    execSync(`git reset --hard origin/master`, {\n      cwd: documentsDir,\n      env: process.env,\n      encoding: \"utf-8\",\n      stdio: [\"ignore\", \"ignore\", \"pipe\"],\n    })\n    execSync(`git pull --rebase`, {\n      cwd: documentsDir,\n      env: process.env,\n      encoding: \"utf-8\",\n      stdio: [\"ignore\", \"ignore\", \"pipe\"],\n    })\n  }\n\n  fs.ensureDirSync(documentsDir)\n  if (options.full && !options.uid) {\n    for (const filename of fs.readdirSync(documentsDir)) {\n      if (filename[0] === \".\") {\n        continue\n      }\n      fs.removeSync(path.join(documentsDir, filename))\n    }\n  }\n\n  const firstUid = options.uid\n  let skip = Boolean(firstUid)\n  for (const { document } of iterLoadAssembleeDocuments(\n    dataDir,\n    options.legislature,\n  )) {\n    for (const documentOrDivision of walkDocumentAndDivisions(document)) {\n      // Ignore documents from Sénat.\n      if (documentOrDivision.uid.substring(4, 6) === \"SN\") {\n        continue\n      }\n\n      if (skip) {\n        if (documentOrDivision.uid === firstUid) {\n          skip = false\n        } else {\n          continue\n        }\n      }\n\n      const documentDir = pathFromDocumentUid(\n        documentsDir,\n        documentOrDivision.uid,\n      )\n      fs.ensureDirSync(documentDir)\n\n      const filenameBySha256: { [digest: string]: string } = {}\n      const indexPath = path.join(documentDir, \"index.json\")\n      const index = (\n        fs.pathExistsSync(indexPath) ? fs.readJsonSync(indexPath) : {}\n      ) as DocumentFilesIndex\n      for (const { format, type, url } of iterDocumentOrDivisionUrls(\n        documentOrDivision,\n      )) {\n        let formatFilesInfos = index[format]\n        if (formatFilesInfos === undefined) {\n          formatFilesInfos = index[format] = []\n        }\n        let fileInfos = formatFilesInfos.find(\n          (fileInfos) => fileInfos.url === url,\n        )\n        if (fileInfos === undefined) {\n          fileInfos = {} as DocumentFileInfos\n          formatFilesInfos.push(fileInfos)\n        }\n        fileInfos.url = url\n\n        const filename = `${type}.${\n          format === DocumentUrlFormat.Pdf ? \"pdf\" : \"html\"\n        }`\n        if (fileInfos.status === 200 && !options.full) {\n          filenameBySha256[fileInfos.sha256 as string] = filename\n          continue\n        }\n        if (\n          fileInfos.status === 404 &&\n          !options[\"not-found\"] &&\n          differenceInDays(\n            today,\n            documentOrDivision.cycleDeVie.chrono.dateCreation ??\n              (documentOrDivision.cycleDeVie.chrono.dateDepot as Date | string),\n          ) > 10\n        ) {\n          continue\n        }\n\n        if (!options.silent) {\n          console.log(\n            `Retrieving document or division ${documentOrDivision.uid} at ${url}…`,\n          )\n        }\n\n        const response = await fetch(url)\n        const filePath = path.join(documentDir, filename)\n        if (response.ok) {\n          const arrayBuffer = await response.arrayBuffer()\n          const buffer = Buffer.from(arrayBuffer)\n          if (\n            format === DocumentUrlFormat.Pdf &&\n            !buffer.subarray(0, 4).toString().startsWith(\"%PDF\")\n          ) {\n            // Instead of a PDF, the received data may be an HTML page with a message like\n            // \"Document non encore publié\".\n            if (!options.silent) {\n              console.warn(`  PDF \"${url}\" not found.`)\n            }\n            fs.removeSync(filePath)\n            delete fileInfos.filename\n            delete fileInfos.sha256\n            fileInfos.status = 404\n          } else {\n            const sha256 = createHash(\"sha256\").update(buffer).digest(\"hex\")\n            const existingFilename = filenameBySha256[sha256]\n            if (existingFilename === undefined) {\n              fs.createWriteStream(filePath).write(buffer)\n              fileInfos.filename = filename\n              filenameBySha256[sha256] = filename\n            } else {\n              fileInfos.filename = existingFilename\n            }\n            fileInfos.sha256 = sha256\n            fileInfos.status = response.status\n          }\n        } else {\n          if (response.status === 404) {\n            if (!options.silent) {\n              console.warn(`  Page \"${url}\" not found.`)\n            }\n          } else {\n            console.error(\n              `  Error:\\n${JSON.stringify(\n                { code: response.status, message: response.statusText },\n                null,\n                2,\n              )}`,\n            )\n          }\n          fs.removeSync(filePath)\n          delete fileInfos.filename\n          delete fileInfos.sha256\n          fileInfos.status = response.status\n        }\n      }\n      fs.writeJsonSync(indexPath, index, { encoding: \"utf-8\", spaces: 2 })\n    }\n  }\n\n  if (options.commit) {\n    return git.commitAndPush(documentsDir, \"Nouvelle moisson\", options.remote)\n  }\n  return 0\n}\n\nretrieveDocuments()\n  .then((exitCode) => process.exit(exitCode))\n  .catch((error) => {\n    console.log(error)\n    process.exit(1)\n  })\n"],"mappings":"AAAA,OAAOA,MAAM,MAAM,QAAQ;AAC3B,SAASC,QAAQ,QAAQ,eAAe;AACxC,OAAOC,eAAe,MAAM,mBAAmB;AAC/C,SAASC,UAAU,QAAQ,QAAQ;AACnC,SAASC,gBAAgB,QAAQ,UAAU;AAC3C,OAAOC,EAAE,MAAM,UAAU;AACzB,OAAOC,IAAI,MAAM,MAAM;AAAA,SAKrBC,wBAAwB;AAAA,OAEnB,KAAKC,GAAG;AAAA,SACNC,0BAA0B,EAAEC,mBAAmB;AAAA,SAC/CC,iBAAiB,EAAEC,0BAA0B;AAEtD,MAAMC,kBAAkB,GAAG,CACzB;EACEC,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,kBAAkB;EACxBC,IAAI,EAAE,QAAQ;EACdC,IAAI,EAAEC;AACR,CAAC,EACD;EACEJ,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,qDAAqD;EAC3DC,IAAI,EAAE,MAAM;EACZC,IAAI,EAAEC;AACR,CAAC,EACD;EACEJ,KAAK,EAAE,GAAG;EACVK,YAAY,EAAE,IAAI;EAClBJ,IAAI,EAAE,uCAAuC;EAC7CC,IAAI,EAAE,aAAa;EACnBC,IAAI,EAAEG;AACR,CAAC,EACD;EACEN,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,+DAA+D;EACrEC,IAAI,EAAE,WAAW;EACjBC,IAAI,EAAEC;AACR,CAAC,EACD;EACEJ,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,qCAAqC;EAC3CC,IAAI,EAAE,MAAM;EACZC,IAAI,EAAEC;AACR,CAAC,EACD;EACEJ,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,6BAA6B;EACnCM,QAAQ,EAAE,IAAI;EACdL,IAAI,EAAE,QAAQ;EACdC,IAAI,EAAEG;AACR,CAAC,EACD;EACEN,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,oBAAoB;EAC1BC,IAAI,EAAE,QAAQ;EACdC,IAAI,EAAEC;AACR,CAAC,EACD;EACEJ,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,2DAA2D;EACjEC,IAAI,EAAE,KAAK;EACXC,IAAI,EAAEG;AACR,CAAC,EACD;EACEN,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,cAAc;EACpBC,IAAI,EAAE,SAAS;EACfC,IAAI,EAAEC;AACR,CAAC,EACD;EACEI,aAAa,EAAE,IAAI;EACnBP,IAAI,EAAE,gDAAgD;EACtDC,IAAI,EAAE,SAAS;EACfC,IAAI,EAAEG;AACR,CAAC,CACF;AACD,MAAMG,OAAO,GAAGrB,eAAe,CAACW,kBAAkB,CAAC;AAEnD,MAAMW,KAAK,GAAG,IAAIC,IAAI,CAAC,CAAC;AAExB,eAAeC,iBAAiBA,CAAA,EAAoB;EAClD1B,MAAM,CACJ,CAACuB,OAAO,CAACI,MAAM,IAAI,CAACJ,OAAO,CAACK,GAAG,EAC/B,2CACF,CAAC;EAED,MAAMC,OAAO,GAAGN,OAAO,CAACM,OAAO;EAC/B,MAAMC,YAAY,GAAGxB,IAAI,CAACyB,IAAI,CAACF,OAAO,EAAE,WAAW,CAAC;EACpD,IAAIN,OAAO,CAACS,IAAI,EAAE;IAChB/B,QAAQ,CAAE,gCAA+B,EAAE;MACzCgC,GAAG,EAAEH,YAAY;MACjBI,GAAG,EAAEC,OAAO,CAACD,GAAG;MAChBE,QAAQ,EAAE,OAAO;MACjBC,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM;IACpC,CAAC,CAAC;IACFpC,QAAQ,CAAE,mBAAkB,EAAE;MAC5BgC,GAAG,EAAEH,YAAY;MACjBI,GAAG,EAAEC,OAAO,CAACD,GAAG;MAChBE,QAAQ,EAAE,OAAO;MACjBC,KAAK,EAAE,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM;IACpC,CAAC,CAAC;EACJ;EAEAhC,EAAE,CAACiC,aAAa,CAACR,YAAY,CAAC;EAC9B,IAAIP,OAAO,CAACgB,IAAI,IAAI,CAAChB,OAAO,CAACK,GAAG,EAAE;IAChC,KAAK,MAAMY,QAAQ,IAAInC,EAAE,CAACoC,WAAW,CAACX,YAAY,CAAC,EAAE;MACnD,IAAIU,QAAQ,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE;QACvB;MACF;MACAnC,EAAE,CAACqC,UAAU,CAACpC,IAAI,CAACyB,IAAI,CAACD,YAAY,EAAEU,QAAQ,CAAC,CAAC;IAClD;EACF;EAEA,MAAMG,QAAQ,GAAGpB,OAAO,CAACK,GAAG;EAC5B,IAAIgB,IAAI,GAAG1B,OAAO,CAACyB,QAAQ,CAAC;EAC5B,KAAK,MAAM;IAAEE;EAAS,CAAC,IAAIpC,0BAA0B,CACnDoB,OAAO,EACPN,OAAO,CAACuB,WACV,CAAC,EAAE;IACD,KAAK,MAAMC,kBAAkB,IAAIxC,wBAAwB,CAACsC,QAAQ,CAAC,EAAE;MACnE;MACA,IAAIE,kBAAkB,CAACnB,GAAG,CAACoB,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,IAAI,EAAE;QACnD;MACF;MAEA,IAAIJ,IAAI,EAAE;QACR,IAAIG,kBAAkB,CAACnB,GAAG,KAAKe,QAAQ,EAAE;UACvCC,IAAI,GAAG,KAAK;QACd,CAAC,MAAM;UACL;QACF;MACF;MAEA,MAAMK,WAAW,GAAGvC,mBAAmB,CACrCoB,YAAY,EACZiB,kBAAkB,CAACnB,GACrB,CAAC;MACDvB,EAAE,CAACiC,aAAa,CAACW,WAAW,CAAC;MAE7B,MAAMC,gBAA8C,GAAG,CAAC,CAAC;MACzD,MAAMC,SAAS,GAAG7C,IAAI,CAACyB,IAAI,CAACkB,WAAW,EAAE,YAAY,CAAC;MACtD,MAAMG,KAAK,GACT/C,EAAE,CAACgD,cAAc,CAACF,SAAS,CAAC,GAAG9C,EAAE,CAACiD,YAAY,CAACH,SAAS,CAAC,GAAG,CAAC,CACxC;MACvB,KAAK,MAAM;QAAEI,MAAM;QAAEtC,IAAI;QAAEuC;MAAI,CAAC,IAAI5C,0BAA0B,CAC5DmC,kBACF,CAAC,EAAE;QACD,IAAIU,gBAAgB,GAAGL,KAAK,CAACG,MAAM,CAAC;QACpC,IAAIE,gBAAgB,KAAKC,SAAS,EAAE;UAClCD,gBAAgB,GAAGL,KAAK,CAACG,MAAM,CAAC,GAAG,EAAE;QACvC;QACA,IAAII,SAAS,GAAGF,gBAAgB,CAACG,IAAI,CAClCD,SAAS,IAAKA,SAAS,CAACH,GAAG,KAAKA,GACnC,CAAC;QACD,IAAIG,SAAS,KAAKD,SAAS,EAAE;UAC3BC,SAAS,GAAG,CAAC,CAAsB;UACnCF,gBAAgB,CAACI,IAAI,CAACF,SAAS,CAAC;QAClC;QACAA,SAAS,CAACH,GAAG,GAAGA,GAAG;QAEnB,MAAMhB,QAAQ,GAAI,GAAEvB,IAAK,IACvBsC,MAAM,KAAK5C,iBAAiB,CAACmD,GAAG,GAAG,KAAK,GAAG,MAC5C,EAAC;QACF,IAAIH,SAAS,CAACI,MAAM,KAAK,GAAG,IAAI,CAACxC,OAAO,CAACgB,IAAI,EAAE;UAC7CW,gBAAgB,CAACS,SAAS,CAACK,MAAM,CAAW,GAAGxB,QAAQ;UACvD;QACF;QACA,IACEmB,SAAS,CAACI,MAAM,KAAK,GAAG,IACxB,CAACxC,OAAO,CAAC,WAAW,CAAC,IACrBnB,gBAAgB,CACdoB,KAAK,EACLuB,kBAAkB,CAACkB,UAAU,CAACC,MAAM,CAACC,YAAY,IAC9CpB,kBAAkB,CAACkB,UAAU,CAACC,MAAM,CAACE,SAC1C,CAAC,GAAG,EAAE,EACN;UACA;QACF;QAEA,IAAI,CAAC7C,OAAO,CAAC8C,MAAM,EAAE;UACnBC,OAAO,CAACC,GAAG,CACR,mCAAkCxB,kBAAkB,CAACnB,GAAI,OAAM4B,GAAI,GACtE,CAAC;QACH;QAEA,MAAMgB,QAAQ,GAAG,MAAMC,KAAK,CAACjB,GAAG,CAAC;QACjC,MAAMkB,QAAQ,GAAGpE,IAAI,CAACyB,IAAI,CAACkB,WAAW,EAAET,QAAQ,CAAC;QACjD,IAAIgC,QAAQ,CAACG,EAAE,EAAE;UACf,MAAMC,WAAW,GAAG,MAAMJ,QAAQ,CAACI,WAAW,CAAC,CAAC;UAChD,MAAMC,MAAM,GAAGC,MAAM,CAACC,IAAI,CAACH,WAAW,CAAC;UACvC,IACErB,MAAM,KAAK5C,iBAAiB,CAACmD,GAAG,IAChC,CAACe,MAAM,CAACG,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,CAACC,QAAQ,CAAC,CAAC,CAACC,UAAU,CAAC,MAAM,CAAC,EACpD;YACA;YACA;YACA,IAAI,CAAC3D,OAAO,CAAC8C,MAAM,EAAE;cACnBC,OAAO,CAACa,IAAI,CAAE,UAAS3B,GAAI,cAAa,CAAC;YAC3C;YACAnD,EAAE,CAACqC,UAAU,CAACgC,QAAQ,CAAC;YACvB,OAAOf,SAAS,CAACnB,QAAQ;YACzB,OAAOmB,SAAS,CAACK,MAAM;YACvBL,SAAS,CAACI,MAAM,GAAG,GAAG;UACxB,CAAC,MAAM;YACL,MAAMC,MAAM,GAAG7D,UAAU,CAAC,QAAQ,CAAC,CAACiF,MAAM,CAACP,MAAM,CAAC,CAACQ,MAAM,CAAC,KAAK,CAAC;YAChE,MAAMC,gBAAgB,GAAGpC,gBAAgB,CAACc,MAAM,CAAC;YACjD,IAAIsB,gBAAgB,KAAK5B,SAAS,EAAE;cAClCrD,EAAE,CAACkF,iBAAiB,CAACb,QAAQ,CAAC,CAACc,KAAK,CAACX,MAAM,CAAC;cAC5ClB,SAAS,CAACnB,QAAQ,GAAGA,QAAQ;cAC7BU,gBAAgB,CAACc,MAAM,CAAC,GAAGxB,QAAQ;YACrC,CAAC,MAAM;cACLmB,SAAS,CAACnB,QAAQ,GAAG8C,gBAAgB;YACvC;YACA3B,SAAS,CAACK,MAAM,GAAGA,MAAM;YACzBL,SAAS,CAACI,MAAM,GAAGS,QAAQ,CAACT,MAAM;UACpC;QACF,CAAC,MAAM;UACL,IAAIS,QAAQ,CAACT,MAAM,KAAK,GAAG,EAAE;YAC3B,IAAI,CAACxC,OAAO,CAAC8C,MAAM,EAAE;cACnBC,OAAO,CAACa,IAAI,CAAE,WAAU3B,GAAI,cAAa,CAAC;YAC5C;UACF,CAAC,MAAM;YACLc,OAAO,CAACmB,KAAK,CACV,aAAYC,IAAI,CAACC,SAAS,CACzB;cAAEC,IAAI,EAAEpB,QAAQ,CAACT,MAAM;cAAE8B,OAAO,EAAErB,QAAQ,CAACsB;YAAW,CAAC,EACvD,IAAI,EACJ,CACF,CAAE,EACJ,CAAC;UACH;UACAzF,EAAE,CAACqC,UAAU,CAACgC,QAAQ,CAAC;UACvB,OAAOf,SAAS,CAACnB,QAAQ;UACzB,OAAOmB,SAAS,CAACK,MAAM;UACvBL,SAAS,CAACI,MAAM,GAAGS,QAAQ,CAACT,MAAM;QACpC;MACF;MACA1D,EAAE,CAAC0F,aAAa,CAAC5C,SAAS,EAAEC,KAAK,EAAE;QAAEhB,QAAQ,EAAE,OAAO;QAAE4D,MAAM,EAAE;MAAE,CAAC,CAAC;IACtE;EACF;EAEA,IAAIzE,OAAO,CAACI,MAAM,EAAE;IAClB,OAAOnB,GAAG,CAACyF,aAAa,CAACnE,YAAY,EAAE,kBAAkB,EAAEP,OAAO,CAAC2E,MAAM,CAAC;EAC5E;EACA,OAAO,CAAC;AACV;AAEAxE,iBAAiB,CAAC,CAAC,CAChByE,IAAI,CAAEC,QAAQ,IAAKjE,OAAO,CAACkE,IAAI,CAACD,QAAQ,CAAC,CAAC,CAC1CE,KAAK,CAAEb,KAAK,IAAK;EAChBnB,OAAO,CAACC,GAAG,CAACkB,KAAK,CAAC;EAClBtD,OAAO,CAACkE,IAAI,CAAC,CAAC,CAAC;AACjB,CAAC,CAAC","ignoreList":[]}
|
|
190
|
+
//# sourceMappingURL=data:application/json;charset=utf-8;base64,{"version":3,"names":["assert","commandLineArgs","createHash","differenceInDays","fs","path","walkDocumentAndDivisions","git","iterLoadAssembleeDocuments","pathFromDocumentUid","DocumentUrlFormat","iterDocumentOrDivisionUrls","commitOption","dataDirDefaultOption","legislatureOption","pullOption","remoteOption","silentOption","verboseOption","optionsDefinitions","alias","help","name","type","Boolean","String","multiple","options","today","Date","fetchWithRetry","url","retries","backoff","attempt","fetch","error","console","warn","Promise","resolve","setTimeout","Error","retrieveDocuments","commit","uid","dataDir","documentsDir","join","pull","resetAndPull","ensureDirSync","full","filename","readdirSync","removeSync","firstUid","skip","document","legislature","documentOrDivision","substring","processDocumentOrDivision","commitAndPush","remote","documentDir","filenameBySha256","indexPath","index","pathExistsSync","readJsonSync","format","Pdf","documentType","classification","code","undefined","includes","formatFilesInfos","fileInfos","find","file","push","status","sha256","cycleDeVie","chrono","dateCreation","dateDepot","silent","log","response","filePath","ok","arrayBuffer","buffer","Buffer","from","subarray","toString","startsWith","update","digest","existingFilename","createWriteStream","write","JSON","stringify","message","statusText","writeJsonSync","encoding","spaces","then","exitCode","process","exit","catch"],"sources":["../../src/scripts/retrieve_documents.ts"],"sourcesContent":["import assert from \"assert\"\nimport commandLineArgs from \"command-line-args\"\nimport { createHash } from \"crypto\"\nimport { differenceInDays } from \"date-fns\"\nimport fs from \"fs-extra\"\nimport path from \"path\"\n\nimport {\n  DocumentFileInfos,\n  DocumentFilesIndex,\n  walkDocumentAndDivisions,\n} from \"../dossiers_legislatifs\"\nimport * as git from \"../git\"\nimport { iterLoadAssembleeDocuments, pathFromDocumentUid } from \"../loaders\"\nimport { DocumentUrlFormat, iterDocumentOrDivisionUrls } from \"../urls\"\nimport {\n  commitOption,\n  dataDirDefaultOption,\n  legislatureOption,\n  pullOption,\n  remoteOption,\n  silentOption,\n  verboseOption,\n} from \"./shared/cli_helpers\"\n\nconst optionsDefinitions = [\n  commitOption,\n  legislatureOption,\n  remoteOption,\n  silentOption,\n  verboseOption,\n  dataDirDefaultOption,\n  pullOption,\n  {\n    alias: \"f\",\n    help: \"retrieve all documents, even already retrieved ones\",\n    name: \"full\",\n    type: Boolean,\n  },\n  {\n    alias: \"n\",\n    help: \"try to also retrieve documents that were previously not found\",\n    name: \"not-found\",\n    type: Boolean,\n  },\n  {\n    alias: \"u\",\n    help: \"UID of first Assemblée's document or division to retrieve\",\n    name: \"uid\",\n    type: String,\n  },\n  {\n    alias: \"F\",\n    help: \"format of documents to retrieve\",\n    name: \"format\",\n    type: String,\n  },\n  {\n    alias: \"T\",\n    help: \"type of documents to retrieve (for example: PION)\",\n    multiple: true,\n    name: \"document-type\",\n    type: String,\n  },\n]\nconst options = commandLineArgs(optionsDefinitions)\nconst today = new Date()\n\nasync function fetchWithRetry(\n  url: string,\n  retries: number = 3,\n  backoff: number = 300,\n): Promise<Response> {\n  for (let attempt = 0; attempt < retries; attempt++) {\n    try {\n      return await fetch(url)\n    } catch (error) {\n      if (attempt === retries - 1) {\n        throw error\n      }\n      console.warn(\n        `Fetch attempt ${attempt + 1} for ${url} failed. Retrying in ${backoff}ms...`,\n      )\n      await new Promise((resolve) => setTimeout(resolve, backoff))\n      backoff *= 2\n    }\n  }\n  throw new Error(`Failed to fetch ${url} after ${retries} attempts`)\n}\n\nasync function retrieveDocuments(): Promise<number> {\n  assert(\n    !options.commit || !options.uid,\n    'Options \"commit\" & \"uid\" are incompatible',\n  )\n\n  const dataDir = options.dataDir\n  const documentsDir = path.join(dataDir, \"Documents\")\n  if (options.pull) {\n    git.resetAndPull(documentsDir)\n  }\n  fs.ensureDirSync(documentsDir)\n\n  if (options.full && !options.uid) {\n    for (const filename of fs.readdirSync(documentsDir)) {\n      if (filename[0] === \".\") {\n        continue\n      }\n      fs.removeSync(path.join(documentsDir, filename))\n    }\n  }\n\n  const firstUid = options.uid\n  let skip = Boolean(firstUid)\n  for (const { document } of iterLoadAssembleeDocuments(\n    dataDir,\n    options.legislature,\n  )) {\n    for (const documentOrDivision of walkDocumentAndDivisions(document)) {\n      // Ignore documents from Sénat.\n      if (documentOrDivision.uid.substring(4, 6) === \"SN\") {\n        continue\n      }\n\n      if (skip) {\n        if (documentOrDivision.uid === firstUid) {\n          skip = false\n        } else {\n          continue\n        }\n      }\n\n      await processDocumentOrDivision(documentOrDivision, documentsDir, options)\n    }\n  }\n\n  if (options.commit) {\n    return git.commitAndPush(documentsDir, \"Nouvelle moisson\", options.remote)\n  }\n  return 0\n}\n\nasync function processDocumentOrDivision(\n  documentOrDivision: any,\n  documentsDir: string,\n  options: any,\n): Promise<void> {\n  const documentDir = pathFromDocumentUid(documentsDir, documentOrDivision.uid)\n  fs.ensureDirSync(documentDir)\n\n  const filenameBySha256: { [digest: string]: string } = {}\n  const indexPath = path.join(documentDir, \"index.json\")\n  const index = (\n    fs.pathExistsSync(indexPath) ? fs.readJsonSync(indexPath) : {}\n  ) as DocumentFilesIndex\n\n  for (const { format, type, url } of iterDocumentOrDivisionUrls(\n    documentOrDivision,\n  )) {\n    const filename = `${type}.${\n      format === DocumentUrlFormat.Pdf ? \"pdf\" : \"html\"\n    }`\n\n    // Filter by format if option is passed\n    if (options.format && options.format !== format) {\n      continue\n    }\n\n    // Filter by document type if option is passed\n    const documentType = documentOrDivision.classification?.type?.code\n    if (\n      options[\"document-type\"] !== undefined &&\n      !options[\"document-type\"].includes(documentType)\n    ) {\n      continue\n    }\n\n    let formatFilesInfos = index[format] ?? (index[format] = [])\n    let fileInfos =\n      formatFilesInfos.find((file) => file.url === url) ??\n      ({} as DocumentFileInfos)\n    if (!formatFilesInfos.includes(fileInfos)) {\n      formatFilesInfos.push(fileInfos)\n    }\n    fileInfos.url = url\n\n    if (fileInfos.status === 200 && !options.full) {\n      filenameBySha256[fileInfos.sha256 as string] = filename\n      continue\n    }\n    if (\n      fileInfos.status === 404 &&\n      !options[\"not-found\"] &&\n      differenceInDays(\n        today,\n        documentOrDivision.cycleDeVie.chrono.dateCreation ??\n          (documentOrDivision.cycleDeVie.chrono.dateDepot as Date | string),\n      ) > 10\n    ) {\n      continue\n    }\n\n    if (!options.silent) {\n      console.log(\n        `Retrieving document or division ${documentOrDivision.uid} at ${url}…`,\n      )\n    }\n\n    const response = await fetchWithRetry(url)\n    const filePath = path.join(documentDir, filename)\n\n    if (response.ok) {\n      const arrayBuffer = await response.arrayBuffer()\n      const buffer = Buffer.from(arrayBuffer)\n      if (\n        format === DocumentUrlFormat.Pdf &&\n        !buffer.subarray(0, 4).toString().startsWith(\"%PDF\")\n      ) {\n        // Instead of a PDF, the received data may be an HTML page with a message like\n        // \"Document non encore publié\".\n        if (!options.silent) {\n          console.warn(`  PDF \"${url}\" not found.`)\n        }\n        fs.removeSync(filePath)\n        delete fileInfos.filename\n        delete fileInfos.sha256\n        fileInfos.status = 404\n      } else {\n        const sha256 = createHash(\"sha256\").update(buffer).digest(\"hex\")\n        const existingFilename = filenameBySha256[sha256]\n        if (existingFilename === undefined) {\n          fs.createWriteStream(filePath).write(buffer)\n          fileInfos.filename = filename\n          filenameBySha256[sha256] = filename\n        } else {\n          fileInfos.filename = existingFilename\n        }\n        fileInfos.sha256 = sha256\n        fileInfos.status = response.status\n      }\n    } else {\n      if (response.status === 404) {\n        if (!options.silent) {\n          console.warn(`  Page \"${url}\" not found.`)\n        }\n      } else {\n        console.error(\n          `  Error:\\n${JSON.stringify(\n            { code: response.status, message: response.statusText },\n            null,\n            2,\n          )}`,\n        )\n      }\n      fs.removeSync(filePath)\n      delete fileInfos.filename\n      delete fileInfos.sha256\n      fileInfos.status = response.status\n    }\n  }\n\n  fs.writeJsonSync(indexPath, index, { encoding: \"utf-8\", spaces: 2 })\n}\n\nretrieveDocuments()\n  .then((exitCode) => process.exit(exitCode))\n  .catch((error) => {\n    console.log(error)\n    process.exit(1)\n  })\n"],"mappings":"AAAA,OAAOA,MAAM,MAAM,QAAQ;AAC3B,OAAOC,eAAe,MAAM,mBAAmB;AAC/C,SAASC,UAAU,QAAQ,QAAQ;AACnC,SAASC,gBAAgB,QAAQ,UAAU;AAC3C,OAAOC,EAAE,MAAM,UAAU;AACzB,OAAOC,IAAI,MAAM,MAAM;AAAA,SAKrBC,wBAAwB;AAAA,OAEnB,KAAKC,GAAG;AAAA,SACNC,0BAA0B,EAAEC,mBAAmB;AAAA,SAC/CC,iBAAiB,EAAEC,0BAA0B;AAAA,SAEpDC,YAAY,EACZC,oBAAoB,EACpBC,iBAAiB,EACjBC,UAAU,EACVC,YAAY,EACZC,YAAY,EACZC,aAAa;AAGf,MAAMC,kBAAkB,GAAG,CACzBP,YAAY,EACZE,iBAAiB,EACjBE,YAAY,EACZC,YAAY,EACZC,aAAa,EACbL,oBAAoB,EACpBE,UAAU,EACV;EACEK,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,qDAAqD;EAC3DC,IAAI,EAAE,MAAM;EACZC,IAAI,EAAEC;AACR,CAAC,EACD;EACEJ,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,+DAA+D;EACrEC,IAAI,EAAE,WAAW;EACjBC,IAAI,EAAEC;AACR,CAAC,EACD;EACEJ,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,2DAA2D;EACjEC,IAAI,EAAE,KAAK;EACXC,IAAI,EAAEE;AACR,CAAC,EACD;EACEL,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,iCAAiC;EACvCC,IAAI,EAAE,QAAQ;EACdC,IAAI,EAAEE;AACR,CAAC,EACD;EACEL,KAAK,EAAE,GAAG;EACVC,IAAI,EAAE,mDAAmD;EACzDK,QAAQ,EAAE,IAAI;EACdJ,IAAI,EAAE,eAAe;EACrBC,IAAI,EAAEE;AACR,CAAC,CACF;AACD,MAAME,OAAO,GAAG1B,eAAe,CAACkB,kBAAkB,CAAC;AACnD,MAAMS,KAAK,GAAG,IAAIC,IAAI,CAAC,CAAC;AAExB,eAAeC,cAAcA,CAC3BC,GAAW,EACXC,OAAe,GAAG,CAAC,EACnBC,OAAe,GAAG,GAAG,EACF;EACnB,KAAK,IAAIC,OAAO,GAAG,CAAC,EAAEA,OAAO,GAAGF,OAAO,EAAEE,OAAO,EAAE,EAAE;IAClD,IAAI;MACF,OAAO,MAAMC,KAAK,CAACJ,GAAG,CAAC;IACzB,CAAC,CAAC,OAAOK,KAAK,EAAE;MACd,IAAIF,OAAO,KAAKF,OAAO,GAAG,CAAC,EAAE;QAC3B,MAAMI,KAAK;MACb;MACAC,OAAO,CAACC,IAAI,CACV,iBAAiBJ,OAAO,GAAG,CAAC,QAAQH,GAAG,wBAAwBE,OAAO,OACxE,CAAC;MACD,MAAM,IAAIM,OAAO,CAAEC,OAAO,IAAKC,UAAU,CAACD,OAAO,EAAEP,OAAO,CAAC,CAAC;MAC5DA,OAAO,IAAI,CAAC;IACd;EACF;EACA,MAAM,IAAIS,KAAK,CAAC,mBAAmBX,GAAG,UAAUC,OAAO,WAAW,CAAC;AACrE;AAEA,eAAeW,iBAAiBA,CAAA,EAAoB;EAClD3C,MAAM,CACJ,CAAC2B,OAAO,CAACiB,MAAM,IAAI,CAACjB,OAAO,CAACkB,GAAG,EAC/B,2CACF,CAAC;EAED,MAAMC,OAAO,GAAGnB,OAAO,CAACmB,OAAO;EAC/B,MAAMC,YAAY,GAAG1C,IAAI,CAAC2C,IAAI,CAACF,OAAO,EAAE,WAAW,CAAC;EACpD,IAAInB,OAAO,CAACsB,IAAI,EAAE;IAChB1C,GAAG,CAAC2C,YAAY,CAACH,YAAY,CAAC;EAChC;EACA3C,EAAE,CAAC+C,aAAa,CAACJ,YAAY,CAAC;EAE9B,IAAIpB,OAAO,CAACyB,IAAI,IAAI,CAACzB,OAAO,CAACkB,GAAG,EAAE;IAChC,KAAK,MAAMQ,QAAQ,IAAIjD,EAAE,CAACkD,WAAW,CAACP,YAAY,CAAC,EAAE;MACnD,IAAIM,QAAQ,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE;QACvB;MACF;MACAjD,EAAE,CAACmD,UAAU,CAAClD,IAAI,CAAC2C,IAAI,CAACD,YAAY,EAAEM,QAAQ,CAAC,CAAC;IAClD;EACF;EAEA,MAAMG,QAAQ,GAAG7B,OAAO,CAACkB,GAAG;EAC5B,IAAIY,IAAI,GAAGjC,OAAO,CAACgC,QAAQ,CAAC;EAC5B,KAAK,MAAM;IAAEE;EAAS,CAAC,IAAIlD,0BAA0B,CACnDsC,OAAO,EACPnB,OAAO,CAACgC,WACV,CAAC,EAAE;IACD,KAAK,MAAMC,kBAAkB,IAAItD,wBAAwB,CAACoD,QAAQ,CAAC,EAAE;MACnE;MACA,IAAIE,kBAAkB,CAACf,GAAG,CAACgB,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,IAAI,EAAE;QACnD;MACF;MAEA,IAAIJ,IAAI,EAAE;QACR,IAAIG,kBAAkB,CAACf,GAAG,KAAKW,QAAQ,EAAE;UACvCC,IAAI,GAAG,KAAK;QACd,CAAC,MAAM;UACL;QACF;MACF;MAEA,MAAMK,yBAAyB,CAACF,kBAAkB,EAAEb,YAAY,EAAEpB,OAAO,CAAC;IAC5E;EACF;EAEA,IAAIA,OAAO,CAACiB,MAAM,EAAE;IAClB,OAAOrC,GAAG,CAACwD,aAAa,CAAChB,YAAY,EAAE,kBAAkB,EAAEpB,OAAO,CAACqC,MAAM,CAAC;EAC5E;EACA,OAAO,CAAC;AACV;AAEA,eAAeF,yBAAyBA,CACtCF,kBAAuB,EACvBb,YAAoB,EACpBpB,OAAY,EACG;EACf,MAAMsC,WAAW,GAAGxD,mBAAmB,CAACsC,YAAY,EAAEa,kBAAkB,CAACf,GAAG,CAAC;EAC7EzC,EAAE,CAAC+C,aAAa,CAACc,WAAW,CAAC;EAE7B,MAAMC,gBAA8C,GAAG,CAAC,CAAC;EACzD,MAAMC,SAAS,GAAG9D,IAAI,CAAC2C,IAAI,CAACiB,WAAW,EAAE,YAAY,CAAC;EACtD,MAAMG,KAAK,GACThE,EAAE,CAACiE,cAAc,CAACF,SAAS,CAAC,GAAG/D,EAAE,CAACkE,YAAY,CAACH,SAAS,CAAC,GAAG,CAAC,CACxC;EAEvB,KAAK,MAAM;IAAEI,MAAM;IAAEhD,IAAI;IAAEQ;EAAI,CAAC,IAAIpB,0BAA0B,CAC5DiD,kBACF,CAAC,EAAE;IACD,MAAMP,QAAQ,GAAG,GAAG9B,IAAI,IACtBgD,MAAM,KAAK7D,iBAAiB,CAAC8D,GAAG,GAAG,KAAK,GAAG,MAAM,EACjD;;IAEF;IACA,IAAI7C,OAAO,CAAC4C,MAAM,IAAI5C,OAAO,CAAC4C,MAAM,KAAKA,MAAM,EAAE;MAC/C;IACF;;IAEA;IACA,MAAME,YAAY,GAAGb,kBAAkB,CAACc,cAAc,EAAEnD,IAAI,EAAEoD,IAAI;IAClE,IACEhD,OAAO,CAAC,eAAe,CAAC,KAAKiD,SAAS,IACtC,CAACjD,OAAO,CAAC,eAAe,CAAC,CAACkD,QAAQ,CAACJ,YAAY,CAAC,EAChD;MACA;IACF;IAEA,IAAIK,gBAAgB,GAAGV,KAAK,CAACG,MAAM,CAAC,KAAKH,KAAK,CAACG,MAAM,CAAC,GAAG,EAAE,CAAC;IAC5D,IAAIQ,SAAS,GACXD,gBAAgB,CAACE,IAAI,CAAEC,IAAI,IAAKA,IAAI,CAAClD,GAAG,KAAKA,GAAG,CAAC,IAChD,CAAC,CAAuB;IAC3B,IAAI,CAAC+C,gBAAgB,CAACD,QAAQ,CAACE,SAAS,CAAC,EAAE;MACzCD,gBAAgB,CAACI,IAAI,CAACH,SAAS,CAAC;IAClC;IACAA,SAAS,CAAChD,GAAG,GAAGA,GAAG;IAEnB,IAAIgD,SAAS,CAACI,MAAM,KAAK,GAAG,IAAI,CAACxD,OAAO,CAACyB,IAAI,EAAE;MAC7Cc,gBAAgB,CAACa,SAAS,CAACK,MAAM,CAAW,GAAG/B,QAAQ;MACvD;IACF;IACA,IACE0B,SAAS,CAACI,MAAM,KAAK,GAAG,IACxB,CAACxD,OAAO,CAAC,WAAW,CAAC,IACrBxB,gBAAgB,CACdyB,KAAK,EACLgC,kBAAkB,CAACyB,UAAU,CAACC,MAAM,CAACC,YAAY,IAC9C3B,kBAAkB,CAACyB,UAAU,CAACC,MAAM,CAACE,SAC1C,CAAC,GAAG,EAAE,EACN;MACA;IACF;IAEA,IAAI,CAAC7D,OAAO,CAAC8D,MAAM,EAAE;MACnBpD,OAAO,CAACqD,GAAG,CACT,mCAAmC9B,kBAAkB,CAACf,GAAG,OAAOd,GAAG,GACrE,CAAC;IACH;IAEA,MAAM4D,QAAQ,GAAG,MAAM7D,cAAc,CAACC,GAAG,CAAC;IAC1C,MAAM6D,QAAQ,GAAGvF,IAAI,CAAC2C,IAAI,CAACiB,WAAW,EAAEZ,QAAQ,CAAC;IAEjD,IAAIsC,QAAQ,CAACE,EAAE,EAAE;MACf,MAAMC,WAAW,GAAG,MAAMH,QAAQ,CAACG,WAAW,CAAC,CAAC;MAChD,MAAMC,MAAM,GAAGC,MAAM,CAACC,IAAI,CAACH,WAAW,CAAC;MACvC,IACEvB,MAAM,KAAK7D,iBAAiB,CAAC8D,GAAG,IAChC,CAACuB,MAAM,CAACG,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,CAACC,QAAQ,CAAC,CAAC,CAACC,UAAU,CAAC,MAAM,CAAC,EACpD;QACA;QACA;QACA,IAAI,CAACzE,OAAO,CAAC8D,MAAM,EAAE;UACnBpD,OAAO,CAACC,IAAI,CAAC,UAAUP,GAAG,cAAc,CAAC;QAC3C;QACA3B,EAAE,CAACmD,UAAU,CAACqC,QAAQ,CAAC;QACvB,OAAOb,SAAS,CAAC1B,QAAQ;QACzB,OAAO0B,SAAS,CAACK,MAAM;QACvBL,SAAS,CAACI,MAAM,GAAG,GAAG;MACxB,CAAC,MAAM;QACL,MAAMC,MAAM,GAAGlF,UAAU,CAAC,QAAQ,CAAC,CAACmG,MAAM,CAACN,MAAM,CAAC,CAACO,MAAM,CAAC,KAAK,CAAC;QAChE,MAAMC,gBAAgB,GAAGrC,gBAAgB,CAACkB,MAAM,CAAC;QACjD,IAAImB,gBAAgB,KAAK3B,SAAS,EAAE;UAClCxE,EAAE,CAACoG,iBAAiB,CAACZ,QAAQ,CAAC,CAACa,KAAK,CAACV,MAAM,CAAC;UAC5ChB,SAAS,CAAC1B,QAAQ,GAAGA,QAAQ;UAC7Ba,gBAAgB,CAACkB,MAAM,CAAC,GAAG/B,QAAQ;QACrC,CAAC,MAAM;UACL0B,SAAS,CAAC1B,QAAQ,GAAGkD,gBAAgB;QACvC;QACAxB,SAAS,CAACK,MAAM,GAAGA,MAAM;QACzBL,SAAS,CAACI,MAAM,GAAGQ,QAAQ,CAACR,MAAM;MACpC;IACF,CAAC,MAAM;MACL,IAAIQ,QAAQ,CAACR,MAAM,KAAK,GAAG,EAAE;QAC3B,IAAI,CAACxD,OAAO,CAAC8D,MAAM,EAAE;UACnBpD,OAAO,CAACC,IAAI,CAAC,WAAWP,GAAG,cAAc,CAAC;QAC5C;MACF,CAAC,MAAM;QACLM,OAAO,CAACD,KAAK,CACX,aAAasE,IAAI,CAACC,SAAS,CACzB;UAAEhC,IAAI,EAAEgB,QAAQ,CAACR,MAAM;UAAEyB,OAAO,EAAEjB,QAAQ,CAACkB;QAAW,CAAC,EACvD,IAAI,EACJ,CACF,CAAC,EACH,CAAC;MACH;MACAzG,EAAE,CAACmD,UAAU,CAACqC,QAAQ,CAAC;MACvB,OAAOb,SAAS,CAAC1B,QAAQ;MACzB,OAAO0B,SAAS,CAACK,MAAM;MACvBL,SAAS,CAACI,MAAM,GAAGQ,QAAQ,CAACR,MAAM;IACpC;EACF;EAEA/E,EAAE,CAAC0G,aAAa,CAAC3C,SAAS,EAAEC,KAAK,EAAE;IAAE2C,QAAQ,EAAE,OAAO;IAAEC,MAAM,EAAE;EAAE,CAAC,CAAC;AACtE;AAEArE,iBAAiB,CAAC,CAAC,CAChBsE,IAAI,CAAEC,QAAQ,IAAKC,OAAO,CAACC,IAAI,CAACF,QAAQ,CAAC,CAAC,CAC1CG,KAAK,CAAEjF,KAAK,IAAK;EAChBC,OAAO,CAACqD,GAAG,CAACtD,KAAK,CAAC;EAClB+E,OAAO,CAACC,IAAI,CAAC,CAAC,CAAC;AACjB,CAAC,CAAC","ignoreList":[]}
|