@tricoteuses/senat 1.3.1 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/config.d.ts +1 -0
- package/lib/config.js +14 -45
- package/lib/databases.js +86 -143
- package/lib/datasets.js +78 -83
- package/lib/index.d.ts +12 -4
- package/lib/index.js +42 -419
- package/lib/loaders.js +149 -654
- package/lib/model/ameli.js +83 -21
- package/lib/model/debats.js +0 -1
- package/lib/model/dosleg.d.ts +1 -1
- package/lib/model/dosleg.js +179 -73
- package/lib/model/index.d.ts +3 -3
- package/lib/model/index.js +12 -46
- package/lib/model/questions.js +68 -39
- package/lib/model/sens.d.ts +1 -1
- package/lib/model/sens.js +383 -113
- package/lib/model/texte.js +220 -290
- package/lib/model/util.js +9 -26
- package/lib/raw_types/ameli.js +5 -6
- package/lib/raw_types/debats.js +5 -6
- package/lib/raw_types/dosleg.js +5 -6
- package/lib/raw_types/questions.js +5 -6
- package/lib/raw_types/sens.js +5 -6
- package/lib/raw_types_schemats/ameli.js +1 -43
- package/lib/raw_types_schemats/debats.js +1 -22
- package/lib/raw_types_schemats/dosleg.js +1 -96
- package/lib/raw_types_schemats/questions.js +1 -22
- package/lib/raw_types_schemats/sens.js +1 -112
- package/lib/scripts/convert_data.js +181 -631
- package/lib/scripts/datautil.js +17 -60
- package/lib/scripts/parse_textes.js +46 -129
- package/lib/scripts/retrieve_documents.js +247 -513
- package/lib/scripts/retrieve_open_data.js +211 -368
- package/lib/scripts/retrieve_senateurs_photos.js +144 -239
- package/lib/scripts/shared/cli_helpers.js +30 -30
- package/lib/scripts/shared/util.js +28 -94
- package/lib/strings.js +20 -45
- package/lib/types/ameli.d.ts +1 -1
- package/lib/types/ameli.js +14 -25
- package/lib/types/debats.d.ts +1 -1
- package/lib/types/debats.js +3 -21
- package/lib/types/dosleg.d.ts +1 -1
- package/lib/types/dosleg.js +152 -119
- package/lib/types/questions.d.ts +1 -1
- package/lib/types/questions.js +1 -13
- package/lib/types/sens.d.ts +1 -1
- package/lib/types/sens.js +1 -13
- package/lib/types/sessions.js +44 -49
- package/lib/types/texte.js +17 -22
- package/lib/validators/config.js +47 -111
- package/lib/validators/senat.js +1 -5
- package/package.json +18 -40
- package/lib/aggregates.d.ts +0 -52
- package/lib/aggregates.mjs +0 -930
- package/lib/aggregates.ts +0 -833
- package/lib/config.mjs +0 -16
- package/lib/config.ts +0 -26
- package/lib/data/legislatures.json +0 -38
- package/lib/databases.mjs +0 -57
- package/lib/databases.ts +0 -71
- package/lib/datasets.mjs +0 -78
- package/lib/datasets.ts +0 -118
- package/lib/fields.d.ts +0 -10
- package/lib/fields.mjs +0 -68
- package/lib/fields.ts +0 -29
- package/lib/index.mjs +0 -4
- package/lib/index.ts +0 -42
- package/lib/inserters.d.ts +0 -98
- package/lib/inserters.mjs +0 -500
- package/lib/inserters.ts +0 -521
- package/lib/loaders.mjs +0 -158
- package/lib/loaders.ts +0 -271
- package/lib/model/ameli.mjs +0 -84
- package/lib/model/ameli.ts +0 -100
- package/lib/model/debats.mjs +0 -1
- package/lib/model/debats.ts +0 -0
- package/lib/model/dosleg.mjs +0 -196
- package/lib/model/dosleg.ts +0 -240
- package/lib/model/index.mjs +0 -4
- package/lib/model/index.ts +0 -14
- package/lib/model/questions.mjs +0 -71
- package/lib/model/questions.ts +0 -93
- package/lib/model/sens.mjs +0 -415
- package/lib/model/sens.ts +0 -516
- package/lib/model/texte.mjs +0 -208
- package/lib/model/texte.ts +0 -229
- package/lib/model/util.mjs +0 -19
- package/lib/model/util.ts +0 -32
- package/lib/raw_types/ameli.mjs +0 -5
- package/lib/raw_types/ameli.ts +0 -951
- package/lib/raw_types/debats.mjs +0 -5
- package/lib/raw_types/debats.ts +0 -222
- package/lib/raw_types/dosleg.mjs +0 -5
- package/lib/raw_types/dosleg.ts +0 -3625
- package/lib/raw_types/questions.mjs +0 -5
- package/lib/raw_types/questions.ts +0 -427
- package/lib/raw_types/sens.mjs +0 -5
- package/lib/raw_types/sens.ts +0 -4499
- package/lib/raw_types_kysely/ameli.d.ts +0 -6
- package/lib/raw_types_kysely/ameli.mjs +0 -7
- package/lib/raw_types_kysely/ameli.ts +0 -6
- package/lib/raw_types_kysely/debats.d.ts +0 -6
- package/lib/raw_types_kysely/debats.mjs +0 -7
- package/lib/raw_types_kysely/debats.ts +0 -6
- package/lib/raw_types_kysely/dosleg.d.ts +0 -6
- package/lib/raw_types_kysely/dosleg.mjs +0 -7
- package/lib/raw_types_kysely/dosleg.ts +0 -6
- package/lib/raw_types_kysely/questions.d.ts +0 -6
- package/lib/raw_types_kysely/questions.mjs +0 -7
- package/lib/raw_types_kysely/questions.ts +0 -6
- package/lib/raw_types_kysely/sens.d.ts +0 -6
- package/lib/raw_types_kysely/sens.mjs +0 -7
- package/lib/raw_types_kysely/sens.ts +0 -6
- package/lib/raw_types_kysely/texte.d.ts +0 -45
- package/lib/raw_types_kysely/texte.mjs +0 -7
- package/lib/raw_types_kysely/texte.ts +0 -53
- package/lib/raw_types_schemats/ameli.mjs +0 -2
- package/lib/raw_types_schemats/ameli.ts +0 -601
- package/lib/raw_types_schemats/debats.mjs +0 -2
- package/lib/raw_types_schemats/debats.ts +0 -145
- package/lib/raw_types_schemats/dosleg.mjs +0 -2
- package/lib/raw_types_schemats/dosleg.ts +0 -2195
- package/lib/raw_types_schemats/questions.mjs +0 -2
- package/lib/raw_types_schemats/questions.ts +0 -251
- package/lib/raw_types_schemats/sens.mjs +0 -2
- package/lib/raw_types_schemats/sens.ts +0 -2907
- package/lib/scripts/convert_data.mjs +0 -181
- package/lib/scripts/convert_data.ts +0 -243
- package/lib/scripts/datautil.mjs +0 -16
- package/lib/scripts/datautil.ts +0 -19
- package/lib/scripts/images/transparent_150x192.jpg +0 -0
- package/lib/scripts/images/transparent_155x225.jpg +0 -0
- package/lib/scripts/parse_textes.mjs +0 -46
- package/lib/scripts/parse_textes.ts +0 -65
- package/lib/scripts/retrieve_documents.mjs +0 -249
- package/lib/scripts/retrieve_documents.ts +0 -298
- package/lib/scripts/retrieve_open_data.mjs +0 -217
- package/lib/scripts/retrieve_open_data.ts +0 -274
- package/lib/scripts/retrieve_senateurs_photos.mjs +0 -147
- package/lib/scripts/retrieve_senateurs_photos.ts +0 -177
- package/lib/scripts/retrieve_textes.d.ts +0 -1
- package/lib/scripts/retrieve_textes.mjs +0 -328
- package/lib/scripts/retrieve_textes.ts +0 -143
- package/lib/scripts/shared/cli_helpers.ts +0 -36
- package/lib/scripts/shared/util.ts +0 -33
- package/lib/src/aggregates.d.ts +0 -52
- package/lib/src/aggregates.mjs +0 -726
- package/lib/src/config.d.ts +0 -2
- package/lib/src/config.mjs +0 -16
- package/lib/src/databases.d.ts +0 -18
- package/lib/src/databases.mjs +0 -55
- package/lib/src/datasets.d.ts +0 -28
- package/lib/src/datasets.mjs +0 -78
- package/lib/src/fields.d.ts +0 -10
- package/lib/src/fields.mjs +0 -22
- package/lib/src/index.d.ts +0 -8
- package/lib/src/index.mjs +0 -7
- package/lib/src/inserters.d.ts +0 -98
- package/lib/src/inserters.mjs +0 -360
- package/lib/src/loaders.d.ts +0 -36
- package/lib/src/loaders.mjs +0 -107
- package/lib/src/model/ameli.d.ts +0 -4
- package/lib/src/model/ameli.js +0 -57
- package/lib/src/model/debats.d.ts +0 -4
- package/lib/src/model/debats.js +0 -43
- package/lib/src/model/dosleg.d.ts +0 -197
- package/lib/src/model/dosleg.js +0 -169
- package/lib/src/model/index.d.ts +0 -4
- package/lib/src/model/index.js +0 -4
- package/lib/src/model/questions.d.ts +0 -89
- package/lib/src/model/questions.js +0 -76
- package/lib/src/model/sens.d.ts +0 -390
- package/lib/src/model/sens.js +0 -339
- package/lib/src/model/texte.d.ts +0 -7
- package/lib/src/model/texte.js +0 -183
- package/lib/src/raw_types_kysely/ameli.d.ts +0 -915
- package/lib/src/raw_types_kysely/ameli.js +0 -5
- package/lib/src/raw_types_kysely/debats.d.ts +0 -207
- package/lib/src/raw_types_kysely/debats.js +0 -5
- package/lib/src/raw_types_kysely/dosleg.d.ts +0 -3532
- package/lib/src/raw_types_kysely/dosleg.js +0 -5
- package/lib/src/raw_types_kysely/questions.d.ts +0 -414
- package/lib/src/raw_types_kysely/questions.js +0 -5
- package/lib/src/raw_types_kysely/sens.d.ts +0 -4394
- package/lib/src/raw_types_kysely/sens.js +0 -5
- package/lib/src/raw_types_schemats/ameli.d.ts +0 -541
- package/lib/src/raw_types_schemats/ameli.js +0 -2
- package/lib/src/raw_types_schemats/debats.d.ts +0 -127
- package/lib/src/raw_types_schemats/debats.js +0 -2
- package/lib/src/raw_types_schemats/dosleg.d.ts +0 -2027
- package/lib/src/raw_types_schemats/dosleg.js +0 -2
- package/lib/src/raw_types_schemats/questions.d.ts +0 -231
- package/lib/src/raw_types_schemats/questions.js +0 -2
- package/lib/src/raw_types_schemats/sens.d.ts +0 -2709
- package/lib/src/raw_types_schemats/sens.js +0 -2
- package/lib/src/scripts/convert_data.d.ts +0 -1
- package/lib/src/scripts/convert_data.js +0 -95
- package/lib/src/scripts/datautil.d.ts +0 -5
- package/lib/src/scripts/datautil.js +0 -16
- package/lib/src/scripts/parse_textes.d.ts +0 -1
- package/lib/src/scripts/parse_textes.js +0 -47
- package/lib/src/scripts/retrieve_documents.d.ts +0 -1
- package/lib/src/scripts/retrieve_documents.js +0 -258
- package/lib/src/scripts/retrieve_open_data.d.ts +0 -1
- package/lib/src/scripts/retrieve_open_data.js +0 -214
- package/lib/src/scripts/retrieve_senateurs_photos.d.ts +0 -1
- package/lib/src/scripts/retrieve_senateurs_photos.js +0 -147
- package/lib/src/scripts/shared/cli_helpers.d.ts +0 -44
- package/lib/src/scripts/shared/cli_helpers.js +0 -32
- package/lib/src/scripts/shared/util.d.ts +0 -3
- package/lib/src/scripts/shared/util.js +0 -28
- package/lib/src/strings.d.ts +0 -1
- package/lib/src/strings.mjs +0 -18
- package/lib/src/types/ameli.d.ts +0 -10
- package/lib/src/types/ameli.js +0 -13
- package/lib/src/types/debats.d.ts +0 -4
- package/lib/src/types/debats.js +0 -2
- package/lib/src/types/dosleg.d.ts +0 -98
- package/lib/src/types/dosleg.js +0 -151
- package/lib/src/types/questions.d.ts +0 -2
- package/lib/src/types/questions.js +0 -1
- package/lib/src/types/sens.d.ts +0 -10
- package/lib/src/types/sens.js +0 -1
- package/lib/src/types/sessions.d.ts +0 -42
- package/lib/src/types/sessions.js +0 -43
- package/lib/src/types/texte.d.ts +0 -61
- package/lib/src/types/texte.js +0 -16
- package/lib/src/validators/config.d.ts +0 -1
- package/lib/src/validators/config.js +0 -54
- package/lib/src/validators/senat.d.ts +0 -0
- package/lib/src/validators/senat.js +0 -24
- package/lib/strings.mjs +0 -18
- package/lib/strings.ts +0 -26
- package/lib/types/ameli.mjs +0 -13
- package/lib/types/ameli.ts +0 -21
- package/lib/types/debats.mjs +0 -2
- package/lib/types/debats.ts +0 -6
- package/lib/types/dosleg.mjs +0 -151
- package/lib/types/dosleg.ts +0 -284
- package/lib/types/questions.mjs +0 -1
- package/lib/types/questions.ts +0 -3
- package/lib/types/sens.mjs +0 -1
- package/lib/types/sens.ts +0 -12
- package/lib/types/sessions.mjs +0 -43
- package/lib/types/sessions.ts +0 -42
- package/lib/types/texte.mjs +0 -16
- package/lib/types/texte.ts +0 -76
- package/lib/typings/windows-1252.d.js +0 -2
- package/lib/typings/windows-1252.d.mjs +0 -2
- package/lib/typings/windows-1252.d.ts +0 -11
- package/lib/validators/config.mjs +0 -54
- package/lib/validators/config.ts +0 -79
- package/lib/validators/senat.mjs +0 -24
- package/lib/validators/senat.ts +0 -26
|
@@ -1,521 +1,255 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
require("
|
|
7
|
-
require("
|
|
8
|
-
require("
|
|
9
|
-
require("
|
|
10
|
-
require("
|
|
11
|
-
require("
|
|
12
|
-
require("
|
|
13
|
-
require("
|
|
14
|
-
require("
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
help: "formats of documents to retrieve (xml/html/pdf for textes, html/pdf for rapports); leave empty for all",
|
|
59
|
-
multiple: true,
|
|
60
|
-
name: "formats",
|
|
61
|
-
type: String
|
|
62
|
-
}, {
|
|
63
|
-
help: "types of documents to retrieve (textes/rapports); leave empty for all",
|
|
64
|
-
multiple: true,
|
|
65
|
-
name: "types",
|
|
66
|
-
type: String
|
|
67
|
-
}, {
|
|
68
|
-
help: "force retrieve all documents, even already retrieved ones",
|
|
69
|
-
name: "force",
|
|
70
|
-
type: Boolean
|
|
71
|
-
}]);
|
|
72
|
-
var options = (0, _commandLineArgs["default"])(optionsDefinitions);
|
|
73
|
-
var textDecoder = new TextDecoder("utf8");
|
|
74
|
-
function retrieveDocument(_x) {
|
|
75
|
-
return _retrieveDocument.apply(this, arguments);
|
|
76
|
-
}
|
|
77
|
-
function _retrieveDocument() {
|
|
78
|
-
_retrieveDocument = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee(documentUrl) {
|
|
79
|
-
var response;
|
|
80
|
-
return _regeneratorRuntime().wrap(function _callee$(_context) {
|
|
81
|
-
while (1) switch (_context.prev = _context.next) {
|
|
82
|
-
case 0:
|
|
83
|
-
if (!options.silent) {
|
|
84
|
-
console.log("Retrieving document ".concat(documentUrl, "\u2026"));
|
|
85
|
-
}
|
|
86
|
-
_context.prev = 1;
|
|
87
|
-
_context.next = 4;
|
|
88
|
-
return (0, _util.fetchWithRetry)(documentUrl);
|
|
89
|
-
case 4:
|
|
90
|
-
response = _context.sent;
|
|
91
|
-
if (response.ok) {
|
|
92
|
-
_context.next = 8;
|
|
93
|
-
break;
|
|
94
|
-
}
|
|
95
|
-
if (response.status === 404) {
|
|
96
|
-
console.warn("Texte ".concat(documentUrl, " not found"));
|
|
97
|
-
} else {
|
|
98
|
-
console.error("An error occurred while retrieving texte ".concat(documentUrl, ": ").concat(response.status));
|
|
99
|
-
}
|
|
100
|
-
return _context.abrupt("return", null);
|
|
101
|
-
case 8:
|
|
102
|
-
return _context.abrupt("return", response.arrayBuffer());
|
|
103
|
-
case 11:
|
|
104
|
-
_context.prev = 11;
|
|
105
|
-
_context.t0 = _context["catch"](1);
|
|
106
|
-
console.error(_context.t0.message);
|
|
107
|
-
return _context.abrupt("return", null);
|
|
108
|
-
case 15:
|
|
109
|
-
case "end":
|
|
110
|
-
return _context.stop();
|
|
111
|
-
}
|
|
112
|
-
}, _callee, null, [[1, 11]]);
|
|
113
|
-
}));
|
|
114
|
-
return _retrieveDocument.apply(this, arguments);
|
|
115
|
-
}
|
|
116
|
-
function retrieveTextes(_x2) {
|
|
117
|
-
return _retrieveTextes.apply(this, arguments);
|
|
118
|
-
}
|
|
119
|
-
function _retrieveTextes() {
|
|
120
|
-
_retrieveTextes = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee2(dataDir) {
|
|
121
|
-
var textesDir, originalTextesDir, transformedTextesDir, retrievedTextesCount, texteUrlsNotFoundOrError, texteUrlsParseError, _iterator, _step, session, _iterator2, _step2, _texteMetadata$sessio, texteMetadata, texteDir, exposeDesMotifsContent, exposeDesMotifsPath, textePath, texteBuffer, _texteMetadata$sessio2, parsedTexte, texteXml, exposeDesMotifsHtml, transformedTexteDir, _textePath, _texteBuffer, _textePath2, _texteBuffer2;
|
|
122
|
-
return _regeneratorRuntime().wrap(function _callee2$(_context2) {
|
|
123
|
-
while (1) switch (_context2.prev = _context2.next) {
|
|
124
|
-
case 0:
|
|
125
|
-
textesDir = _path["default"].join(dataDir, _loaders.TEXTE_FOLDER);
|
|
126
|
-
_fsExtra["default"].ensureDirSync(textesDir);
|
|
127
|
-
originalTextesDir = _path["default"].join(textesDir, _loaders.TEXTE_ORIGINAL_FOLDER);
|
|
128
|
-
transformedTextesDir = _path["default"].join(textesDir, _loaders.TEXTE_TRANSFORMED_FOLDER);
|
|
129
|
-
if (options.parseDocuments) {
|
|
130
|
-
(0, _util.ensureAndClearDir)(transformedTextesDir);
|
|
131
|
-
}
|
|
132
|
-
retrievedTextesCount = 0;
|
|
133
|
-
texteUrlsNotFoundOrError = [];
|
|
134
|
-
texteUrlsParseError = [];
|
|
135
|
-
_iterator = _createForOfIteratorHelper(options.sessions);
|
|
136
|
-
_context2.prev = 9;
|
|
137
|
-
_iterator.s();
|
|
138
|
-
case 11:
|
|
139
|
-
if ((_step = _iterator.n()).done) {
|
|
140
|
-
_context2.next = 103;
|
|
141
|
-
break;
|
|
142
|
-
}
|
|
143
|
-
session = _step.value;
|
|
144
|
-
_iterator2 = _createForOfIteratorHelper((0, _loaders.iterLoadSenatDossiersLegislatifsTexteUrls)(dataDir, session));
|
|
145
|
-
_context2.prev = 14;
|
|
146
|
-
_iterator2.s();
|
|
147
|
-
case 16:
|
|
148
|
-
if ((_step2 = _iterator2.n()).done) {
|
|
149
|
-
_context2.next = 93;
|
|
150
|
-
break;
|
|
151
|
-
}
|
|
152
|
-
texteMetadata = _step2.value.item;
|
|
153
|
-
texteDir = _path["default"].join(originalTextesDir, "".concat((_texteMetadata$sessio = texteMetadata.session) !== null && _texteMetadata$sessio !== void 0 ? _texteMetadata$sessio : _datautil.UNDEFINED_SESSION), texteMetadata.name);
|
|
154
|
-
_fsExtra["default"].ensureDirSync(texteDir);
|
|
155
|
-
exposeDesMotifsContent = null;
|
|
156
|
-
if (!texteMetadata.url_expose_des_motifs) {
|
|
157
|
-
_context2.next = 30;
|
|
158
|
-
break;
|
|
159
|
-
}
|
|
160
|
-
if (!options.silent) {
|
|
161
|
-
console.log("Retrieving exposé des motifs…");
|
|
162
|
-
}
|
|
163
|
-
exposeDesMotifsPath = _path["default"].join(texteDir, "".concat(texteMetadata.name, "-expose.html"));
|
|
164
|
-
_context2.next = 26;
|
|
165
|
-
return retrieveDocument(texteMetadata.url_expose_des_motifs.toString());
|
|
166
|
-
case 26:
|
|
167
|
-
exposeDesMotifsContent = _context2.sent;
|
|
168
|
-
if (exposeDesMotifsContent) {
|
|
169
|
-
_context2.next = 29;
|
|
170
|
-
break;
|
|
171
|
-
}
|
|
172
|
-
return _context2.abrupt("continue", 91);
|
|
173
|
-
case 29:
|
|
174
|
-
_fsExtra["default"].writeFileSync(exposeDesMotifsPath, Buffer.from(exposeDesMotifsContent));
|
|
175
|
-
case 30:
|
|
176
|
-
if (!(0, _util.isOptionEmptyOrHasValue)(options.formats, "xml")) {
|
|
177
|
-
_context2.next = 63;
|
|
178
|
-
break;
|
|
179
|
-
}
|
|
180
|
-
textePath = _path["default"].join(texteDir, "".concat(texteMetadata.name, ".xml"));
|
|
181
|
-
texteBuffer = null;
|
|
182
|
-
if (!(!options.force && _fsExtra["default"].existsSync(textePath))) {
|
|
183
|
-
_context2.next = 37;
|
|
184
|
-
break;
|
|
185
|
-
}
|
|
186
|
-
if (!options.silent) {
|
|
187
|
-
console.info("Already retrieved texte ".concat(textePath, "\u2026"));
|
|
188
|
-
}
|
|
189
|
-
_context2.next = 45;
|
|
190
|
-
break;
|
|
191
|
-
case 37:
|
|
192
|
-
_context2.next = 39;
|
|
193
|
-
return retrieveDocument(texteMetadata.url_xml.toString());
|
|
194
|
-
case 39:
|
|
195
|
-
texteBuffer = _context2.sent;
|
|
196
|
-
if (texteBuffer) {
|
|
197
|
-
_context2.next = 43;
|
|
198
|
-
break;
|
|
199
|
-
}
|
|
200
|
-
texteUrlsNotFoundOrError.push(texteMetadata.url_xml);
|
|
201
|
-
return _context2.abrupt("continue", 91);
|
|
202
|
-
case 43:
|
|
203
|
-
_fsExtra["default"].writeFileSync(textePath, Buffer.from(texteBuffer));
|
|
204
|
-
retrievedTextesCount++;
|
|
205
|
-
case 45:
|
|
206
|
-
if (!options.parseDocuments) {
|
|
207
|
-
_context2.next = 63;
|
|
208
|
-
break;
|
|
209
|
-
}
|
|
210
|
-
if (!options.silent) {
|
|
211
|
-
console.log("Parsing texte ".concat(texteMetadata.name, ".xml\u2026"));
|
|
212
|
-
}
|
|
213
|
-
parsedTexte = null;
|
|
214
|
-
if (!texteBuffer) {
|
|
215
|
-
_context2.next = 53;
|
|
216
|
-
break;
|
|
217
|
-
}
|
|
218
|
-
texteXml = textDecoder.decode(texteBuffer);
|
|
219
|
-
parsedTexte = (0, _texte.parseTexte)(texteXml);
|
|
220
|
-
_context2.next = 56;
|
|
221
|
-
break;
|
|
222
|
-
case 53:
|
|
223
|
-
_context2.next = 55;
|
|
224
|
-
return (0, _texte.parseTexteFromFile)(textePath);
|
|
225
|
-
case 55:
|
|
226
|
-
parsedTexte = _context2.sent;
|
|
227
|
-
case 56:
|
|
228
|
-
if (parsedTexte) {
|
|
229
|
-
_context2.next = 59;
|
|
230
|
-
break;
|
|
231
|
-
}
|
|
232
|
-
texteUrlsParseError.push(texteMetadata.url_xml);
|
|
233
|
-
return _context2.abrupt("continue", 91);
|
|
234
|
-
case 59:
|
|
235
|
-
if (exposeDesMotifsContent) {
|
|
236
|
-
if (!options.silent) {
|
|
237
|
-
console.log("Parsing exposé des motifs…");
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
const assert_1 = __importDefault(require("assert"));
|
|
7
|
+
const command_line_args_1 = __importDefault(require("command-line-args"));
|
|
8
|
+
const fs_extra_1 = __importDefault(require("fs-extra"));
|
|
9
|
+
const path_1 = __importDefault(require("path"));
|
|
10
|
+
const loaders_1 = require("../loaders");
|
|
11
|
+
const texte_1 = require("../model/texte");
|
|
12
|
+
const datautil_1 = require("./datautil");
|
|
13
|
+
const cli_helpers_1 = require("./shared/cli_helpers");
|
|
14
|
+
const util_1 = require("./shared/util");
|
|
15
|
+
const optionsDefinitions = [
|
|
16
|
+
...cli_helpers_1.commonOptions,
|
|
17
|
+
{
|
|
18
|
+
help: "sessions of textes to retrieve; leave empty for all",
|
|
19
|
+
multiple: true,
|
|
20
|
+
name: "sessions",
|
|
21
|
+
type: String,
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
help: "parse and convert documents into JSON (textes only for now, requires format xml)",
|
|
25
|
+
name: "parseDocuments",
|
|
26
|
+
type: Boolean,
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
alias: "F",
|
|
30
|
+
help: "formats of documents to retrieve (xml/html/pdf for textes, html/pdf for rapports); leave empty for all",
|
|
31
|
+
multiple: true,
|
|
32
|
+
name: "formats",
|
|
33
|
+
type: String,
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
help: "types of documents to retrieve (textes/rapports); leave empty for all",
|
|
37
|
+
multiple: true,
|
|
38
|
+
name: "types",
|
|
39
|
+
type: String,
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
help: "force retrieve all documents, even already retrieved ones",
|
|
43
|
+
name: "force",
|
|
44
|
+
type: Boolean,
|
|
45
|
+
},
|
|
46
|
+
];
|
|
47
|
+
const options = (0, command_line_args_1.default)(optionsDefinitions);
|
|
48
|
+
const textDecoder = new TextDecoder("utf8");
|
|
49
|
+
async function retrieveDocument(documentUrl) {
|
|
50
|
+
if (!options["silent"]) {
|
|
51
|
+
console.log(`Retrieving document ${documentUrl}…`);
|
|
52
|
+
}
|
|
53
|
+
try {
|
|
54
|
+
const response = await (0, util_1.fetchWithRetry)(documentUrl);
|
|
55
|
+
if (!response.ok) {
|
|
56
|
+
if (response.status === 404) {
|
|
57
|
+
console.warn(`Texte ${documentUrl} not found`);
|
|
238
58
|
}
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
}
|
|
242
|
-
transformedTexteDir = _path["default"].join(transformedTextesDir, "".concat((_texteMetadata$sessio2 = texteMetadata.session) !== null && _texteMetadata$sessio2 !== void 0 ? _texteMetadata$sessio2 : _datautil.UNDEFINED_SESSION), texteMetadata.name);
|
|
243
|
-
_fsExtra["default"].ensureDirSync(transformedTexteDir);
|
|
244
|
-
_fsExtra["default"].writeJSONSync(_path["default"].join(transformedTexteDir, "".concat(texteMetadata.name, ".json")), parsedTexte, {
|
|
245
|
-
spaces: 2
|
|
246
|
-
});
|
|
247
|
-
case 63:
|
|
248
|
-
if (!(0, _util.isOptionEmptyOrHasValue)(options.formats, "html")) {
|
|
249
|
-
_context2.next = 77;
|
|
250
|
-
break;
|
|
251
|
-
}
|
|
252
|
-
_textePath = _path["default"].join(texteDir, "".concat(texteMetadata.name, ".html"));
|
|
253
|
-
if (!(!options.force && _fsExtra["default"].existsSync(_textePath))) {
|
|
254
|
-
_context2.next = 69;
|
|
255
|
-
break;
|
|
256
|
-
}
|
|
257
|
-
if (!options.silent) {
|
|
258
|
-
console.info("Already retrieved texte ".concat(_textePath, "\u2026"));
|
|
259
|
-
}
|
|
260
|
-
_context2.next = 77;
|
|
261
|
-
break;
|
|
262
|
-
case 69:
|
|
263
|
-
_context2.next = 71;
|
|
264
|
-
return retrieveDocument(texteMetadata.url_html.toString());
|
|
265
|
-
case 71:
|
|
266
|
-
_texteBuffer = _context2.sent;
|
|
267
|
-
if (_texteBuffer) {
|
|
268
|
-
_context2.next = 75;
|
|
269
|
-
break;
|
|
270
|
-
}
|
|
271
|
-
texteUrlsNotFoundOrError.push(texteMetadata.url_html);
|
|
272
|
-
return _context2.abrupt("continue", 91);
|
|
273
|
-
case 75:
|
|
274
|
-
_fsExtra["default"].writeFileSync(_textePath, Buffer.from(_texteBuffer));
|
|
275
|
-
retrievedTextesCount++;
|
|
276
|
-
case 77:
|
|
277
|
-
if (!(0, _util.isOptionEmptyOrHasValue)(options.formats, "pdf")) {
|
|
278
|
-
_context2.next = 91;
|
|
279
|
-
break;
|
|
280
|
-
}
|
|
281
|
-
_textePath2 = _path["default"].join(texteDir, "".concat(texteMetadata.name, ".pdf"));
|
|
282
|
-
if (!(!options.force && _fsExtra["default"].existsSync(_textePath2))) {
|
|
283
|
-
_context2.next = 83;
|
|
284
|
-
break;
|
|
285
|
-
}
|
|
286
|
-
if (!options.silent) {
|
|
287
|
-
console.info("Already retrieved texte ".concat(_textePath2, "\u2026"));
|
|
288
|
-
}
|
|
289
|
-
_context2.next = 91;
|
|
290
|
-
break;
|
|
291
|
-
case 83:
|
|
292
|
-
_context2.next = 85;
|
|
293
|
-
return retrieveDocument(texteMetadata.url_pdf.toString());
|
|
294
|
-
case 85:
|
|
295
|
-
_texteBuffer2 = _context2.sent;
|
|
296
|
-
if (_texteBuffer2) {
|
|
297
|
-
_context2.next = 89;
|
|
298
|
-
break;
|
|
299
|
-
}
|
|
300
|
-
texteUrlsNotFoundOrError.push(texteMetadata.url_pdf);
|
|
301
|
-
return _context2.abrupt("continue", 91);
|
|
302
|
-
case 89:
|
|
303
|
-
_fsExtra["default"].writeFileSync(_textePath2, Buffer.from(_texteBuffer2));
|
|
304
|
-
retrievedTextesCount++;
|
|
305
|
-
case 91:
|
|
306
|
-
_context2.next = 16;
|
|
307
|
-
break;
|
|
308
|
-
case 93:
|
|
309
|
-
_context2.next = 98;
|
|
310
|
-
break;
|
|
311
|
-
case 95:
|
|
312
|
-
_context2.prev = 95;
|
|
313
|
-
_context2.t0 = _context2["catch"](14);
|
|
314
|
-
_iterator2.e(_context2.t0);
|
|
315
|
-
case 98:
|
|
316
|
-
_context2.prev = 98;
|
|
317
|
-
_iterator2.f();
|
|
318
|
-
return _context2.finish(98);
|
|
319
|
-
case 101:
|
|
320
|
-
_context2.next = 11;
|
|
321
|
-
break;
|
|
322
|
-
case 103:
|
|
323
|
-
_context2.next = 108;
|
|
324
|
-
break;
|
|
325
|
-
case 105:
|
|
326
|
-
_context2.prev = 105;
|
|
327
|
-
_context2.t1 = _context2["catch"](9);
|
|
328
|
-
_iterator.e(_context2.t1);
|
|
329
|
-
case 108:
|
|
330
|
-
_context2.prev = 108;
|
|
331
|
-
_iterator.f();
|
|
332
|
-
return _context2.finish(108);
|
|
333
|
-
case 111:
|
|
334
|
-
if (options.verbose) {
|
|
335
|
-
console.log("".concat(retrievedTextesCount, " textes retrieved"));
|
|
336
|
-
console.log("".concat(texteUrlsNotFoundOrError.length, " textes failed to be retrieved with URLs ").concat(texteUrlsNotFoundOrError.join(", ")));
|
|
337
|
-
if (options.parseDocuments) {
|
|
338
|
-
console.log("".concat(texteUrlsParseError.length, " textes failed to be parsed with URLs ").concat(texteUrlsParseError.join(", ")));
|
|
59
|
+
else {
|
|
60
|
+
console.error(`An error occurred while retrieving texte ${documentUrl}: ${response.status}`);
|
|
339
61
|
}
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
return response.arrayBuffer();
|
|
65
|
+
}
|
|
66
|
+
catch (error) {
|
|
67
|
+
console.error(error.message);
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
348
70
|
}
|
|
349
|
-
function
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
case 62:
|
|
466
|
-
if (options.verbose) {
|
|
467
|
-
console.log("".concat(retrievedRapportsCount, " rapports retrieved"));
|
|
468
|
-
console.log("".concat(rapportUrlsNotFoundOrError.length, " rapports failed with URLs ").concat(rapportUrlsNotFoundOrError.join(", ")));
|
|
469
|
-
}
|
|
470
|
-
case 63:
|
|
471
|
-
case "end":
|
|
472
|
-
return _context3.stop();
|
|
473
|
-
}
|
|
474
|
-
}, _callee3, null, [[5, 56, 59, 62], [10, 46, 49, 52]]);
|
|
475
|
-
}));
|
|
476
|
-
return _retrieveRapports.apply(this, arguments);
|
|
71
|
+
async function retrieveTextes(dataDir) {
|
|
72
|
+
const textesDir = path_1.default.join(dataDir, loaders_1.TEXTE_FOLDER);
|
|
73
|
+
fs_extra_1.default.ensureDirSync(textesDir);
|
|
74
|
+
const originalTextesDir = path_1.default.join(textesDir, loaders_1.TEXTE_ORIGINAL_FOLDER);
|
|
75
|
+
const transformedTextesDir = path_1.default.join(textesDir, loaders_1.TEXTE_TRANSFORMED_FOLDER);
|
|
76
|
+
if (options["parseDocuments"]) {
|
|
77
|
+
(0, util_1.ensureAndClearDir)(transformedTextesDir);
|
|
78
|
+
}
|
|
79
|
+
let retrievedTextesCount = 0;
|
|
80
|
+
const texteUrlsNotFoundOrError = [];
|
|
81
|
+
const texteUrlsParseError = [];
|
|
82
|
+
for (const session of options["sessions"]) {
|
|
83
|
+
for (const { item: texteMetadata, } of (0, loaders_1.iterLoadSenatDossiersLegislatifsTexteUrls)(dataDir, session)) {
|
|
84
|
+
const texteDir = path_1.default.join(originalTextesDir, `${texteMetadata.session ?? datautil_1.UNDEFINED_SESSION}`, texteMetadata.name);
|
|
85
|
+
fs_extra_1.default.ensureDirSync(texteDir);
|
|
86
|
+
let exposeDesMotifsContent = null;
|
|
87
|
+
if (texteMetadata.url_expose_des_motifs) {
|
|
88
|
+
if (!options["silent"]) {
|
|
89
|
+
console.log("Retrieving exposé des motifs…");
|
|
90
|
+
}
|
|
91
|
+
const exposeDesMotifsPath = path_1.default.join(texteDir, `${texteMetadata.name}-expose.html`);
|
|
92
|
+
exposeDesMotifsContent = await retrieveDocument(texteMetadata.url_expose_des_motifs.toString());
|
|
93
|
+
if (!exposeDesMotifsContent) {
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
fs_extra_1.default.writeFileSync(exposeDesMotifsPath, Buffer.from(exposeDesMotifsContent));
|
|
97
|
+
}
|
|
98
|
+
if ((0, util_1.isOptionEmptyOrHasValue)(options["formats"], "xml")) {
|
|
99
|
+
const textePath = path_1.default.join(texteDir, `${texteMetadata.name}.xml`);
|
|
100
|
+
let texteBuffer = null;
|
|
101
|
+
if (!options["force"] && fs_extra_1.default.existsSync(textePath)) {
|
|
102
|
+
if (!options["silent"]) {
|
|
103
|
+
console.info(`Already retrieved texte ${textePath}…`);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
else {
|
|
107
|
+
texteBuffer = await retrieveDocument(texteMetadata.url_xml.toString());
|
|
108
|
+
if (!texteBuffer) {
|
|
109
|
+
texteUrlsNotFoundOrError.push(texteMetadata.url_xml);
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
fs_extra_1.default.writeFileSync(textePath, Buffer.from(texteBuffer));
|
|
113
|
+
retrievedTextesCount++;
|
|
114
|
+
}
|
|
115
|
+
if (options["parseDocuments"]) {
|
|
116
|
+
if (!options["silent"]) {
|
|
117
|
+
console.log(`Parsing texte ${texteMetadata.name}.xml…`);
|
|
118
|
+
}
|
|
119
|
+
let parsedTexte = null;
|
|
120
|
+
if (texteBuffer) {
|
|
121
|
+
const texteXml = textDecoder.decode(texteBuffer);
|
|
122
|
+
parsedTexte = (0, texte_1.parseTexte)(texteXml);
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
parsedTexte = await (0, texte_1.parseTexteFromFile)(textePath);
|
|
126
|
+
}
|
|
127
|
+
if (!parsedTexte) {
|
|
128
|
+
texteUrlsParseError.push(texteMetadata.url_xml);
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
if (exposeDesMotifsContent) {
|
|
132
|
+
if (!options["silent"]) {
|
|
133
|
+
console.log("Parsing exposé des motifs…");
|
|
134
|
+
}
|
|
135
|
+
const exposeDesMotifsHtml = textDecoder.decode(exposeDesMotifsContent);
|
|
136
|
+
parsedTexte.exposeDesMotifs =
|
|
137
|
+
(0, texte_1.parseExposeDesMotifs)(exposeDesMotifsHtml);
|
|
138
|
+
}
|
|
139
|
+
const transformedTexteDir = path_1.default.join(transformedTextesDir, `${texteMetadata.session ?? datautil_1.UNDEFINED_SESSION}`, texteMetadata.name);
|
|
140
|
+
fs_extra_1.default.ensureDirSync(transformedTexteDir);
|
|
141
|
+
fs_extra_1.default.writeJSONSync(path_1.default.join(transformedTexteDir, `${texteMetadata.name}.json`), parsedTexte, { spaces: 2 });
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
if ((0, util_1.isOptionEmptyOrHasValue)(options["formats"], "html")) {
|
|
145
|
+
const textePath = path_1.default.join(texteDir, `${texteMetadata.name}.html`);
|
|
146
|
+
if (!options["force"] && fs_extra_1.default.existsSync(textePath)) {
|
|
147
|
+
if (!options["silent"]) {
|
|
148
|
+
console.info(`Already retrieved texte ${textePath}…`);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
const texteBuffer = await retrieveDocument(texteMetadata.url_html.toString());
|
|
153
|
+
if (!texteBuffer) {
|
|
154
|
+
texteUrlsNotFoundOrError.push(texteMetadata.url_html);
|
|
155
|
+
continue;
|
|
156
|
+
}
|
|
157
|
+
fs_extra_1.default.writeFileSync(textePath, Buffer.from(texteBuffer));
|
|
158
|
+
retrievedTextesCount++;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
if ((0, util_1.isOptionEmptyOrHasValue)(options["formats"], "pdf")) {
|
|
162
|
+
const textePath = path_1.default.join(texteDir, `${texteMetadata.name}.pdf`);
|
|
163
|
+
if (!options["force"] && fs_extra_1.default.existsSync(textePath)) {
|
|
164
|
+
if (!options["silent"]) {
|
|
165
|
+
console.info(`Already retrieved texte ${textePath}…`);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
else {
|
|
169
|
+
const texteBuffer = await retrieveDocument(texteMetadata.url_pdf.toString());
|
|
170
|
+
if (!texteBuffer) {
|
|
171
|
+
texteUrlsNotFoundOrError.push(texteMetadata.url_pdf);
|
|
172
|
+
continue;
|
|
173
|
+
}
|
|
174
|
+
fs_extra_1.default.writeFileSync(textePath, Buffer.from(texteBuffer));
|
|
175
|
+
retrievedTextesCount++;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
if (options["verbose"]) {
|
|
181
|
+
console.log(`${retrievedTextesCount} textes retrieved`);
|
|
182
|
+
console.log(`${texteUrlsNotFoundOrError.length} textes failed to be retrieved with URLs ${texteUrlsNotFoundOrError.join(", ")}`);
|
|
183
|
+
if (options["parseDocuments"]) {
|
|
184
|
+
console.log(`${texteUrlsParseError.length} textes failed to be parsed with URLs ${texteUrlsParseError.join(", ")}`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
477
187
|
}
|
|
478
|
-
function
|
|
479
|
-
|
|
188
|
+
async function retrieveRapports(dataDir) {
|
|
189
|
+
const rapportsDir = path_1.default.join(dataDir, loaders_1.RAPPORT_FOLDER);
|
|
190
|
+
fs_extra_1.default.ensureDirSync(rapportsDir);
|
|
191
|
+
let retrievedRapportsCount = 0;
|
|
192
|
+
const rapportUrlsNotFoundOrError = [];
|
|
193
|
+
for (const session of options["sessions"]) {
|
|
194
|
+
for (const { item: rapportMetadata, } of (0, loaders_1.iterLoadSenatDossiersLegislatifsRapportUrls)(dataDir, session)) {
|
|
195
|
+
const rapportDir = path_1.default.join(rapportsDir, `${rapportMetadata.session ?? datautil_1.UNDEFINED_SESSION}`, rapportMetadata.name);
|
|
196
|
+
fs_extra_1.default.ensureDirSync(rapportDir);
|
|
197
|
+
if ((0, util_1.isOptionEmptyOrHasValue)(options["formats"], "html")) {
|
|
198
|
+
const rapportPath = path_1.default.join(rapportDir, `${rapportMetadata.name}.html`);
|
|
199
|
+
if (!options["force"] && fs_extra_1.default.existsSync(rapportPath)) {
|
|
200
|
+
if (!options["silent"]) {
|
|
201
|
+
console.info(`Already retrieved rapport ${rapportPath}…`);
|
|
202
|
+
}
|
|
203
|
+
continue;
|
|
204
|
+
}
|
|
205
|
+
const rapportBuffer = await retrieveDocument(rapportMetadata.url_html.toString());
|
|
206
|
+
if (!rapportBuffer) {
|
|
207
|
+
rapportUrlsNotFoundOrError.push(rapportMetadata.url_html);
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
fs_extra_1.default.writeFileSync(rapportPath, Buffer.from(rapportBuffer));
|
|
211
|
+
retrievedRapportsCount++;
|
|
212
|
+
}
|
|
213
|
+
if ((0, util_1.isOptionEmptyOrHasValue)(options["formats"], "pdf")) {
|
|
214
|
+
const rapportPath = path_1.default.join(rapportDir, `${rapportMetadata.name}.pdf`);
|
|
215
|
+
if (!options["force"] && fs_extra_1.default.existsSync(rapportPath)) {
|
|
216
|
+
if (!options["silent"]) {
|
|
217
|
+
console.info(`Already retrieved rapport ${rapportPath}…`);
|
|
218
|
+
}
|
|
219
|
+
continue;
|
|
220
|
+
}
|
|
221
|
+
const rapportBuffer = await retrieveDocument(rapportMetadata.url_pdf.toString());
|
|
222
|
+
if (!rapportBuffer) {
|
|
223
|
+
rapportUrlsNotFoundOrError.push(rapportMetadata.url_pdf);
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
fs_extra_1.default.writeFileSync(rapportPath, Buffer.from(rapportBuffer));
|
|
227
|
+
retrievedRapportsCount++;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
if (options["verbose"]) {
|
|
232
|
+
console.log(`${retrievedRapportsCount} rapports retrieved`);
|
|
233
|
+
console.log(`${rapportUrlsNotFoundOrError.length} rapports failed with URLs ${rapportUrlsNotFoundOrError.join(", ")}`);
|
|
234
|
+
}
|
|
480
235
|
}
|
|
481
|
-
function
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
_context4.next = 6;
|
|
495
|
-
return retrieveTextes(dataDir);
|
|
496
|
-
case 6:
|
|
497
|
-
if (!(0, _util.isOptionEmptyOrHasValue)(options.types, "rapports")) {
|
|
498
|
-
_context4.next = 9;
|
|
499
|
-
break;
|
|
500
|
-
}
|
|
501
|
-
_context4.next = 9;
|
|
502
|
-
return retrieveRapports(dataDir);
|
|
503
|
-
case 9:
|
|
504
|
-
if (!options.silent) {
|
|
505
|
-
console.timeEnd("documents processing time");
|
|
506
|
-
}
|
|
507
|
-
case 10:
|
|
508
|
-
case "end":
|
|
509
|
-
return _context4.stop();
|
|
510
|
-
}
|
|
511
|
-
}, _callee4);
|
|
512
|
-
}));
|
|
513
|
-
return _main.apply(this, arguments);
|
|
236
|
+
async function main() {
|
|
237
|
+
const dataDir = options["dataDir"];
|
|
238
|
+
(0, assert_1.default)(dataDir, "Missing argument: data directory");
|
|
239
|
+
console.time("documents processing time");
|
|
240
|
+
if ((0, util_1.isOptionEmptyOrHasValue)(options["types"], "textes")) {
|
|
241
|
+
await retrieveTextes(dataDir);
|
|
242
|
+
}
|
|
243
|
+
if ((0, util_1.isOptionEmptyOrHasValue)(options["types"], "rapports")) {
|
|
244
|
+
await retrieveRapports(dataDir);
|
|
245
|
+
}
|
|
246
|
+
if (!options["silent"]) {
|
|
247
|
+
console.timeEnd("documents processing time");
|
|
248
|
+
}
|
|
514
249
|
}
|
|
515
|
-
main()
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
250
|
+
main()
|
|
251
|
+
.then(() => process.exit(0))
|
|
252
|
+
.catch((error) => {
|
|
253
|
+
console.log(error);
|
|
254
|
+
process.exit(1);
|
|
520
255
|
});
|
|
521
|
-
//# sourceMappingURL=data:application/json;charset=utf-8;base64,
|