@tricoteuses/senat 3.1.0 → 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. package/lib/src/loaders.d.ts +3 -3
  2. package/lib/src/loaders.js +1 -1
  3. package/lib/src/model/agenda.d.ts +1 -1
  4. package/lib/src/model/commission.d.ts +2 -2
  5. package/lib/src/model/seance.d.ts +1 -1
  6. package/lib/src/types/ameli.d.ts +4 -1761
  7. package/lib/src/types/ameli.js +1 -1074
  8. package/lib/src/types/debats.d.ts +2 -380
  9. package/lib/src/types/debats.js +1 -266
  10. package/lib/src/types/dosleg.d.ts +69 -2953
  11. package/lib/src/types/dosleg.js +1 -2005
  12. package/lib/src/types/questions.d.ts +2 -699
  13. package/lib/src/types/questions.js +1 -493
  14. package/lib/src/types/sens.d.ts +7 -7842
  15. package/lib/src/types/sens.js +1 -4691
  16. package/lib/src/utils/nvs-parsing.d.ts +1 -1
  17. package/lib/src/utils/nvs-parsing.js +9 -1
  18. package/lib/src/videos/pipeline.d.ts +3 -3
  19. package/lib/src/videos/pipeline.js +2 -2
  20. package/package.json +1 -1
  21. package/lib/add-js-extensions-v2.d.ts +0 -1
  22. package/lib/add-js-extensions-v2.js +0 -23
  23. package/lib/add-js-extensions.d.ts +0 -1
  24. package/lib/add-js-extensions.js +0 -17
  25. package/lib/aggregates.d.ts +0 -52
  26. package/lib/aggregates.js +0 -930
  27. package/lib/aggregates.mjs +0 -713
  28. package/lib/aggregates.ts +0 -833
  29. package/lib/config.d.ts +0 -10
  30. package/lib/config.js +0 -16
  31. package/lib/config.mjs +0 -16
  32. package/lib/config.ts +0 -26
  33. package/lib/databases.d.ts +0 -2
  34. package/lib/databases.js +0 -26
  35. package/lib/databases.mjs +0 -57
  36. package/lib/databases.ts +0 -71
  37. package/lib/datasets.d.ts +0 -34
  38. package/lib/datasets.js +0 -233
  39. package/lib/datasets.mjs +0 -78
  40. package/lib/datasets.ts +0 -118
  41. package/lib/fields.d.ts +0 -10
  42. package/lib/fields.js +0 -68
  43. package/lib/fields.mjs +0 -22
  44. package/lib/fields.ts +0 -29
  45. package/lib/git.d.ts +0 -26
  46. package/lib/git.js +0 -167
  47. package/lib/index.d.ts +0 -13
  48. package/lib/index.js +0 -1
  49. package/lib/index.mjs +0 -7
  50. package/lib/index.ts +0 -64
  51. package/lib/inserters.d.ts +0 -98
  52. package/lib/inserters.js +0 -500
  53. package/lib/inserters.mjs +0 -360
  54. package/lib/inserters.ts +0 -521
  55. package/lib/legislatures.json +0 -38
  56. package/lib/loaders.d.ts +0 -58
  57. package/lib/loaders.js +0 -286
  58. package/lib/loaders.mjs +0 -158
  59. package/lib/loaders.ts +0 -271
  60. package/lib/model/agenda.d.ts +0 -6
  61. package/lib/model/agenda.js +0 -148
  62. package/lib/model/ameli.d.ts +0 -51
  63. package/lib/model/ameli.js +0 -149
  64. package/lib/model/ameli.mjs +0 -84
  65. package/lib/model/ameli.ts +0 -100
  66. package/lib/model/commission.d.ts +0 -18
  67. package/lib/model/commission.js +0 -269
  68. package/lib/model/debats.d.ts +0 -67
  69. package/lib/model/debats.js +0 -95
  70. package/lib/model/debats.mjs +0 -43
  71. package/lib/model/debats.ts +0 -68
  72. package/lib/model/documents.d.ts +0 -12
  73. package/lib/model/documents.js +0 -151
  74. package/lib/model/dosleg.d.ts +0 -7
  75. package/lib/model/dosleg.js +0 -326
  76. package/lib/model/dosleg.mjs +0 -196
  77. package/lib/model/dosleg.ts +0 -240
  78. package/lib/model/index.d.ts +0 -7
  79. package/lib/model/index.js +0 -7
  80. package/lib/model/index.mjs +0 -5
  81. package/lib/model/index.ts +0 -15
  82. package/lib/model/questions.d.ts +0 -45
  83. package/lib/model/questions.js +0 -89
  84. package/lib/model/questions.mjs +0 -71
  85. package/lib/model/questions.ts +0 -93
  86. package/lib/model/scrutins.d.ts +0 -13
  87. package/lib/model/scrutins.js +0 -114
  88. package/lib/model/seance.d.ts +0 -3
  89. package/lib/model/seance.js +0 -267
  90. package/lib/model/sens.d.ts +0 -146
  91. package/lib/model/sens.js +0 -454
  92. package/lib/model/sens.mjs +0 -415
  93. package/lib/model/sens.ts +0 -516
  94. package/lib/model/texte.d.ts +0 -7
  95. package/lib/model/texte.js +0 -256
  96. package/lib/model/texte.mjs +0 -208
  97. package/lib/model/texte.ts +0 -229
  98. package/lib/model/util.d.ts +0 -9
  99. package/lib/model/util.js +0 -38
  100. package/lib/model/util.mjs +0 -19
  101. package/lib/model/util.ts +0 -32
  102. package/lib/parsers/texte.d.ts +0 -7
  103. package/lib/parsers/texte.js +0 -228
  104. package/lib/raw_types/ameli.d.ts +0 -914
  105. package/lib/raw_types/ameli.js +0 -5
  106. package/lib/raw_types/ameli.mjs +0 -163
  107. package/lib/raw_types/debats.d.ts +0 -207
  108. package/lib/raw_types/debats.js +0 -5
  109. package/lib/raw_types/debats.mjs +0 -58
  110. package/lib/raw_types/dosleg.d.ts +0 -1619
  111. package/lib/raw_types/dosleg.js +0 -5
  112. package/lib/raw_types/dosleg.mjs +0 -438
  113. package/lib/raw_types/questions.d.ts +0 -419
  114. package/lib/raw_types/questions.js +0 -5
  115. package/lib/raw_types/questions.mjs +0 -11
  116. package/lib/raw_types/senat.d.ts +0 -11368
  117. package/lib/raw_types/senat.js +0 -5
  118. package/lib/raw_types/sens.d.ts +0 -8248
  119. package/lib/raw_types/sens.js +0 -5
  120. package/lib/raw_types/sens.mjs +0 -508
  121. package/lib/raw_types_kysely/ameli.d.ts +0 -915
  122. package/lib/raw_types_kysely/ameli.js +0 -7
  123. package/lib/raw_types_kysely/ameli.mjs +0 -5
  124. package/lib/raw_types_kysely/ameli.ts +0 -951
  125. package/lib/raw_types_kysely/debats.d.ts +0 -207
  126. package/lib/raw_types_kysely/debats.js +0 -7
  127. package/lib/raw_types_kysely/debats.mjs +0 -5
  128. package/lib/raw_types_kysely/debats.ts +0 -222
  129. package/lib/raw_types_kysely/dosleg.d.ts +0 -3532
  130. package/lib/raw_types_kysely/dosleg.js +0 -7
  131. package/lib/raw_types_kysely/dosleg.mjs +0 -5
  132. package/lib/raw_types_kysely/dosleg.ts +0 -3621
  133. package/lib/raw_types_kysely/questions.d.ts +0 -414
  134. package/lib/raw_types_kysely/questions.js +0 -7
  135. package/lib/raw_types_kysely/questions.mjs +0 -5
  136. package/lib/raw_types_kysely/questions.ts +0 -426
  137. package/lib/raw_types_kysely/sens.d.ts +0 -4394
  138. package/lib/raw_types_kysely/sens.js +0 -7
  139. package/lib/raw_types_kysely/sens.mjs +0 -5
  140. package/lib/raw_types_kysely/sens.ts +0 -4499
  141. package/lib/raw_types_schemats/ameli.d.ts +0 -539
  142. package/lib/raw_types_schemats/ameli.js +0 -2
  143. package/lib/raw_types_schemats/ameli.mjs +0 -2
  144. package/lib/raw_types_schemats/ameli.ts +0 -601
  145. package/lib/raw_types_schemats/debats.d.ts +0 -127
  146. package/lib/raw_types_schemats/debats.js +0 -2
  147. package/lib/raw_types_schemats/debats.mjs +0 -2
  148. package/lib/raw_types_schemats/debats.ts +0 -145
  149. package/lib/raw_types_schemats/dosleg.d.ts +0 -977
  150. package/lib/raw_types_schemats/dosleg.js +0 -2
  151. package/lib/raw_types_schemats/dosleg.mjs +0 -2
  152. package/lib/raw_types_schemats/dosleg.ts +0 -2193
  153. package/lib/raw_types_schemats/questions.d.ts +0 -235
  154. package/lib/raw_types_schemats/questions.js +0 -2
  155. package/lib/raw_types_schemats/questions.mjs +0 -2
  156. package/lib/raw_types_schemats/questions.ts +0 -249
  157. package/lib/raw_types_schemats/sens.d.ts +0 -6915
  158. package/lib/raw_types_schemats/sens.js +0 -2
  159. package/lib/raw_types_schemats/sens.mjs +0 -2
  160. package/lib/raw_types_schemats/sens.ts +0 -2907
  161. package/lib/scripts/convert_data.d.ts +0 -1
  162. package/lib/scripts/convert_data.js +0 -354
  163. package/lib/scripts/convert_data.mjs +0 -181
  164. package/lib/scripts/convert_data.ts +0 -243
  165. package/lib/scripts/data-download.d.ts +0 -1
  166. package/lib/scripts/data-download.js +0 -12
  167. package/lib/scripts/datautil.d.ts +0 -8
  168. package/lib/scripts/datautil.js +0 -34
  169. package/lib/scripts/datautil.mjs +0 -16
  170. package/lib/scripts/datautil.ts +0 -19
  171. package/lib/scripts/images/transparent_150x192.jpg +0 -0
  172. package/lib/scripts/images/transparent_155x225.jpg +0 -0
  173. package/lib/scripts/parse_textes.d.ts +0 -1
  174. package/lib/scripts/parse_textes.js +0 -44
  175. package/lib/scripts/parse_textes.mjs +0 -46
  176. package/lib/scripts/parse_textes.ts +0 -65
  177. package/lib/scripts/retrieve_agenda.d.ts +0 -1
  178. package/lib/scripts/retrieve_agenda.js +0 -132
  179. package/lib/scripts/retrieve_cr_commission.d.ts +0 -1
  180. package/lib/scripts/retrieve_cr_commission.js +0 -364
  181. package/lib/scripts/retrieve_cr_seance.d.ts +0 -6
  182. package/lib/scripts/retrieve_cr_seance.js +0 -347
  183. package/lib/scripts/retrieve_documents.d.ts +0 -3
  184. package/lib/scripts/retrieve_documents.js +0 -219
  185. package/lib/scripts/retrieve_documents.mjs +0 -249
  186. package/lib/scripts/retrieve_documents.ts +0 -298
  187. package/lib/scripts/retrieve_open_data.d.ts +0 -1
  188. package/lib/scripts/retrieve_open_data.js +0 -315
  189. package/lib/scripts/retrieve_open_data.mjs +0 -217
  190. package/lib/scripts/retrieve_open_data.ts +0 -268
  191. package/lib/scripts/retrieve_senateurs_photos.d.ts +0 -1
  192. package/lib/scripts/retrieve_senateurs_photos.js +0 -147
  193. package/lib/scripts/retrieve_senateurs_photos.mjs +0 -147
  194. package/lib/scripts/retrieve_senateurs_photos.ts +0 -177
  195. package/lib/scripts/retrieve_videos.d.ts +0 -1
  196. package/lib/scripts/retrieve_videos.js +0 -461
  197. package/lib/scripts/shared/cli_helpers.d.ts +0 -95
  198. package/lib/scripts/shared/cli_helpers.js +0 -91
  199. package/lib/scripts/shared/cli_helpers.ts +0 -36
  200. package/lib/scripts/shared/util.d.ts +0 -4
  201. package/lib/scripts/shared/util.js +0 -35
  202. package/lib/scripts/shared/util.ts +0 -33
  203. package/lib/scripts/test_iter_load.d.ts +0 -1
  204. package/lib/scripts/test_iter_load.js +0 -12
  205. package/lib/src/ameli.d.ts +0 -66
  206. package/lib/src/ameli.js +0 -1
  207. package/lib/src/databases.d.ts +0 -3
  208. package/lib/src/databases.js +0 -26
  209. package/lib/src/db_types/ameli.d.ts +0 -1762
  210. package/lib/src/db_types/ameli.js +0 -1074
  211. package/lib/src/db_types/debats.d.ts +0 -380
  212. package/lib/src/db_types/debats.js +0 -266
  213. package/lib/src/db_types/dosleg.d.ts +0 -2954
  214. package/lib/src/db_types/dosleg.js +0 -2005
  215. package/lib/src/db_types/questions.d.ts +0 -699
  216. package/lib/src/db_types/questions.js +0 -493
  217. package/lib/src/db_types/sens.d.ts +0 -7843
  218. package/lib/src/db_types/sens.js +0 -4691
  219. package/lib/src/debats.d.ts +0 -38
  220. package/lib/src/debats.js +0 -1
  221. package/lib/src/dosleg.d.ts +0 -142
  222. package/lib/src/dosleg.js +0 -193
  223. package/lib/src/model/ameli_postgres.d.ts +0 -67
  224. package/lib/src/model/ameli_postgres.js +0 -150
  225. package/lib/src/other_types/questions.d.ts +0 -2
  226. package/lib/src/other_types/questions.js +0 -1
  227. package/lib/src/questions.d.ts +0 -53
  228. package/lib/src/questions.js +0 -1
  229. package/lib/src/raw_types/senat.d.ts +0 -11372
  230. package/lib/src/raw_types/senat.js +0 -5
  231. package/lib/src/rich_types/agenda.d.ts +0 -45
  232. package/lib/src/rich_types/agenda.js +0 -1
  233. package/lib/src/rich_types/compte_rendu.d.ts +0 -83
  234. package/lib/src/rich_types/compte_rendu.js +0 -1
  235. package/lib/src/rich_types/sessions.d.ts +0 -6
  236. package/lib/src/rich_types/sessions.js +0 -19
  237. package/lib/src/rich_types/texte.d.ts +0 -72
  238. package/lib/src/rich_types/texte.js +0 -15
  239. package/lib/src/scripts/test_iter_load.d.ts +0 -1
  240. package/lib/src/scripts/test_iter_load.js +0 -12
  241. package/lib/src/sens.d.ts +0 -104
  242. package/lib/src/sens.js +0 -1
  243. package/lib/strings.d.ts +0 -1
  244. package/lib/strings.js +0 -18
  245. package/lib/strings.mjs +0 -18
  246. package/lib/strings.ts +0 -26
  247. package/lib/tsconfig.tsbuildinfo +0 -1
  248. package/lib/types/agenda.d.ts +0 -44
  249. package/lib/types/agenda.js +0 -1
  250. package/lib/types/ameli.d.ts +0 -5
  251. package/lib/types/ameli.js +0 -1
  252. package/lib/types/ameli.mjs +0 -13
  253. package/lib/types/ameli.ts +0 -21
  254. package/lib/types/compte_rendu.d.ts +0 -83
  255. package/lib/types/compte_rendu.js +0 -1
  256. package/lib/types/debats.d.ts +0 -2
  257. package/lib/types/debats.js +0 -1
  258. package/lib/types/debats.mjs +0 -2
  259. package/lib/types/debats.ts +0 -6
  260. package/lib/types/dosleg.d.ts +0 -70
  261. package/lib/types/dosleg.js +0 -1
  262. package/lib/types/dosleg.mjs +0 -151
  263. package/lib/types/dosleg.ts +0 -284
  264. package/lib/types/questions.d.ts +0 -2
  265. package/lib/types/questions.js +0 -1
  266. package/lib/types/questions.mjs +0 -1
  267. package/lib/types/questions.ts +0 -3
  268. package/lib/types/sens.d.ts +0 -10
  269. package/lib/types/sens.js +0 -1
  270. package/lib/types/sens.mjs +0 -1
  271. package/lib/types/sens.ts +0 -12
  272. package/lib/types/sessions.d.ts +0 -5
  273. package/lib/types/sessions.js +0 -84
  274. package/lib/types/sessions.mjs +0 -43
  275. package/lib/types/sessions.ts +0 -42
  276. package/lib/types/texte.d.ts +0 -74
  277. package/lib/types/texte.js +0 -16
  278. package/lib/types/texte.mjs +0 -16
  279. package/lib/types/texte.ts +0 -76
  280. package/lib/typings/windows-1252.d.js +0 -2
  281. package/lib/typings/windows-1252.d.mjs +0 -2
  282. package/lib/typings/windows-1252.d.ts +0 -11
  283. package/lib/utils/cr_spliting.d.ts +0 -28
  284. package/lib/utils/cr_spliting.js +0 -265
  285. package/lib/utils/date.d.ts +0 -10
  286. package/lib/utils/date.js +0 -100
  287. package/lib/utils/nvs-timecode.d.ts +0 -7
  288. package/lib/utils/nvs-timecode.js +0 -79
  289. package/lib/utils/reunion_grouping.d.ts +0 -9
  290. package/lib/utils/reunion_grouping.js +0 -361
  291. package/lib/utils/reunion_odj_building.d.ts +0 -5
  292. package/lib/utils/reunion_odj_building.js +0 -154
  293. package/lib/utils/reunion_parsing.d.ts +0 -23
  294. package/lib/utils/reunion_parsing.js +0 -209
  295. package/lib/utils/scoring.d.ts +0 -14
  296. package/lib/utils/scoring.js +0 -147
  297. package/lib/utils/string_cleaning.d.ts +0 -7
  298. package/lib/utils/string_cleaning.js +0 -57
  299. package/lib/validators/config.d.ts +0 -9
  300. package/lib/validators/config.js +0 -10
  301. package/lib/validators/config.mjs +0 -54
  302. package/lib/validators/config.ts +0 -79
  303. package/lib/validators/senat.d.ts +0 -0
  304. package/lib/validators/senat.js +0 -28
  305. package/lib/validators/senat.mjs +0 -24
  306. package/lib/validators/senat.ts +0 -26
@@ -1,249 +0,0 @@
1
- import assert from "assert";
2
- import commandLineArgs from "command-line-args";
3
- import fs from "fs-extra";
4
- import path from "path";
5
- import { iterLoadSenatDossiersLegislatifsRapportUrls, iterLoadSenatDossiersLegislatifsTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER, TEXTE_TRANSFORMED_FOLDER, } from "../loaders";
6
- import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../model/texte";
7
- import { UNDEFINED_SESSION } from "./datautil";
8
- import { commonOptions } from "./shared/cli_helpers";
9
- import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue } from "./shared/util";
10
- const optionsDefinitions = [
11
- ...commonOptions,
12
- {
13
- help: "sessions of textes to retrieve; leave empty for all",
14
- multiple: true,
15
- name: "sessions",
16
- type: String,
17
- },
18
- {
19
- help: "parse and convert documents into JSON (textes only for now, requires format xml)",
20
- name: "parseDocuments",
21
- type: Boolean,
22
- },
23
- {
24
- alias: "F",
25
- help: "formats of documents to retrieve (xml/html/pdf for textes, html/pdf for rapports); leave empty for all",
26
- multiple: true,
27
- name: "formats",
28
- type: String,
29
- },
30
- {
31
- help: "types of documents to retrieve (textes/rapports); leave empty for all",
32
- multiple: true,
33
- name: "types",
34
- type: String,
35
- },
36
- {
37
- help: "force retrieve all documents, even already retrieved ones",
38
- name: "force",
39
- type: Boolean,
40
- },
41
- ];
42
- const options = commandLineArgs(optionsDefinitions);
43
- const textDecoder = new TextDecoder("utf8");
44
- async function retrieveDocument(documentUrl) {
45
- if (!options.silent) {
46
- console.log(`Retrieving document ${documentUrl}…`);
47
- }
48
- try {
49
- const response = await fetchWithRetry(documentUrl);
50
- if (!response.ok) {
51
- if (response.status === 404) {
52
- console.warn(`Texte ${documentUrl} not found`);
53
- }
54
- else {
55
- console.error(`An error occurred while retrieving texte ${documentUrl}: ${response.status}`);
56
- }
57
- return null;
58
- }
59
- return response.arrayBuffer();
60
- }
61
- catch (error) {
62
- console.error(error.message);
63
- return null;
64
- }
65
- }
66
- async function retrieveTextes(dataDir) {
67
- const textesDir = path.join(dataDir, TEXTE_FOLDER);
68
- fs.ensureDirSync(textesDir);
69
- const originalTextesDir = path.join(textesDir, TEXTE_ORIGINAL_FOLDER);
70
- const transformedTextesDir = path.join(textesDir, TEXTE_TRANSFORMED_FOLDER);
71
- if (options.parseDocuments) {
72
- ensureAndClearDir(transformedTextesDir);
73
- }
74
- let retrievedTextesCount = 0;
75
- const texteUrlsNotFoundOrError = [];
76
- const texteUrlsParseError = [];
77
- for (const session of options.sessions) {
78
- for (const { item: texteMetadata } of iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session)) {
79
- const texteDir = path.join(originalTextesDir, `${texteMetadata.session ?? UNDEFINED_SESSION}`, texteMetadata.name);
80
- fs.ensureDirSync(texteDir);
81
- let exposeDesMotifsContent = null;
82
- if (texteMetadata.url_expose_des_motifs) {
83
- if (!options.silent) {
84
- console.log("Retrieving exposé des motifs…");
85
- }
86
- const exposeDesMotifsPath = path.join(texteDir, `${texteMetadata.name}-expose.html`);
87
- exposeDesMotifsContent = await retrieveDocument(texteMetadata.url_expose_des_motifs.toString());
88
- if (!exposeDesMotifsContent) {
89
- continue;
90
- }
91
- fs.writeFileSync(exposeDesMotifsPath, Buffer.from(exposeDesMotifsContent));
92
- }
93
- if (isOptionEmptyOrHasValue(options.formats, "xml")) {
94
- const textePath = path.join(texteDir, `${texteMetadata.name}.xml`);
95
- let texteBuffer = null;
96
- if (!options.force && fs.existsSync(textePath)) {
97
- if (!options.silent) {
98
- console.info(`Already retrieved texte ${textePath}…`);
99
- }
100
- }
101
- else {
102
- texteBuffer = await retrieveDocument(texteMetadata.url_xml.toString());
103
- if (!texteBuffer) {
104
- texteUrlsNotFoundOrError.push(texteMetadata.url_xml);
105
- continue;
106
- }
107
- fs.writeFileSync(textePath, Buffer.from(texteBuffer));
108
- retrievedTextesCount++;
109
- }
110
- if (options.parseDocuments) {
111
- if (!options.silent) {
112
- console.log(`Parsing texte ${texteMetadata.name}.xml…`);
113
- }
114
- let parsedTexte = null;
115
- if (texteBuffer) {
116
- const texteXml = textDecoder.decode(texteBuffer);
117
- parsedTexte = parseTexte(texteXml);
118
- }
119
- else {
120
- parsedTexte = await parseTexteFromFile(textePath);
121
- }
122
- if (!parsedTexte) {
123
- texteUrlsParseError.push(texteMetadata.url_xml);
124
- continue;
125
- }
126
- if (exposeDesMotifsContent) {
127
- if (!options.silent) {
128
- console.log("Parsing exposé des motifs…");
129
- }
130
- const exposeDesMotifsHtml = textDecoder.decode(exposeDesMotifsContent);
131
- parsedTexte.exposeDesMotifs = parseExposeDesMotifs(exposeDesMotifsHtml);
132
- }
133
- const transformedTexteDir = path.join(transformedTextesDir, `${texteMetadata.session ?? UNDEFINED_SESSION}`, texteMetadata.name);
134
- fs.ensureDirSync(transformedTexteDir);
135
- fs.writeJSONSync(path.join(transformedTexteDir, `${texteMetadata.name}.json`), parsedTexte, { spaces: 2 });
136
- }
137
- }
138
- if (isOptionEmptyOrHasValue(options.formats, "html")) {
139
- const textePath = path.join(texteDir, `${texteMetadata.name}.html`);
140
- if (!options.force && fs.existsSync(textePath)) {
141
- if (!options.silent) {
142
- console.info(`Already retrieved texte ${textePath}…`);
143
- }
144
- }
145
- else {
146
- const texteBuffer = await retrieveDocument(texteMetadata.url_html.toString());
147
- if (!texteBuffer) {
148
- texteUrlsNotFoundOrError.push(texteMetadata.url_html);
149
- continue;
150
- }
151
- fs.writeFileSync(textePath, Buffer.from(texteBuffer));
152
- retrievedTextesCount++;
153
- }
154
- }
155
- if (isOptionEmptyOrHasValue(options.formats, "pdf")) {
156
- const textePath = path.join(texteDir, `${texteMetadata.name}.pdf`);
157
- if (!options.force && fs.existsSync(textePath)) {
158
- if (!options.silent) {
159
- console.info(`Already retrieved texte ${textePath}…`);
160
- }
161
- }
162
- else {
163
- const texteBuffer = await retrieveDocument(texteMetadata.url_pdf.toString());
164
- if (!texteBuffer) {
165
- texteUrlsNotFoundOrError.push(texteMetadata.url_pdf);
166
- continue;
167
- }
168
- fs.writeFileSync(textePath, Buffer.from(texteBuffer));
169
- retrievedTextesCount++;
170
- }
171
- }
172
- }
173
- }
174
- if (options.verbose) {
175
- console.log(`${retrievedTextesCount} textes retrieved`);
176
- console.log(`${texteUrlsNotFoundOrError.length} textes failed to be retrieved with URLs ${texteUrlsNotFoundOrError.join(", ")}`);
177
- if (options.parseDocuments) {
178
- console.log(`${texteUrlsParseError.length} textes failed to be parsed with URLs ${texteUrlsParseError.join(", ")}`);
179
- }
180
- }
181
- }
182
- async function retrieveRapports(dataDir) {
183
- const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
184
- fs.ensureDirSync(rapportsDir);
185
- let retrievedRapportsCount = 0;
186
- const rapportUrlsNotFoundOrError = [];
187
- for (const session of options.sessions) {
188
- for (const { item: rapportMetadata } of iterLoadSenatDossiersLegislatifsRapportUrls(dataDir, session)) {
189
- const rapportDir = path.join(rapportsDir, `${rapportMetadata.session ?? UNDEFINED_SESSION}`, rapportMetadata.name);
190
- fs.ensureDirSync(rapportDir);
191
- if (isOptionEmptyOrHasValue(options.formats, "html")) {
192
- const rapportPath = path.join(rapportDir, `${rapportMetadata.name}.html`);
193
- if (!options.force && fs.existsSync(rapportPath)) {
194
- if (!options.silent) {
195
- console.info(`Already retrieved rapport ${rapportPath}…`);
196
- }
197
- continue;
198
- }
199
- const rapportBuffer = await retrieveDocument(rapportMetadata.url_html.toString());
200
- if (!rapportBuffer) {
201
- rapportUrlsNotFoundOrError.push(rapportMetadata.url_html);
202
- continue;
203
- }
204
- fs.writeFileSync(rapportPath, Buffer.from(rapportBuffer));
205
- retrievedRapportsCount++;
206
- }
207
- if (isOptionEmptyOrHasValue(options.formats, "pdf")) {
208
- const rapportPath = path.join(rapportDir, `${rapportMetadata.name}.pdf`);
209
- if (!options.force && fs.existsSync(rapportPath)) {
210
- if (!options.silent) {
211
- console.info(`Already retrieved rapport ${rapportPath}…`);
212
- }
213
- continue;
214
- }
215
- const rapportBuffer = await retrieveDocument(rapportMetadata.url_pdf.toString());
216
- if (!rapportBuffer) {
217
- rapportUrlsNotFoundOrError.push(rapportMetadata.url_pdf);
218
- continue;
219
- }
220
- fs.writeFileSync(rapportPath, Buffer.from(rapportBuffer));
221
- retrievedRapportsCount++;
222
- }
223
- }
224
- }
225
- if (options.verbose) {
226
- console.log(`${retrievedRapportsCount} rapports retrieved`);
227
- console.log(`${rapportUrlsNotFoundOrError.length} rapports failed with URLs ${rapportUrlsNotFoundOrError.join(", ")}`);
228
- }
229
- }
230
- async function main() {
231
- const dataDir = options.dataDir;
232
- assert(dataDir, "Missing argument: data directory");
233
- console.time("documents processing time");
234
- if (isOptionEmptyOrHasValue(options.types, "textes")) {
235
- await retrieveTextes(dataDir);
236
- }
237
- if (isOptionEmptyOrHasValue(options.types, "rapports")) {
238
- await retrieveRapports(dataDir);
239
- }
240
- if (!options.silent) {
241
- console.timeEnd("documents processing time");
242
- }
243
- }
244
- main()
245
- .then(() => process.exit(0))
246
- .catch((error) => {
247
- console.log(error);
248
- process.exit(1);
249
- });
@@ -1,298 +0,0 @@
1
- import assert from "assert"
2
- import commandLineArgs from "command-line-args"
3
- import fs from "fs-extra"
4
- import path from "path"
5
-
6
- import {
7
- iterLoadSenatDossiersLegislatifsRapportUrls,
8
- iterLoadSenatDossiersLegislatifsTexteUrls,
9
- RAPPORT_FOLDER,
10
- TEXTE_FOLDER,
11
- TEXTE_ORIGINAL_FOLDER,
12
- TEXTE_TRANSFORMED_FOLDER,
13
- } from "../loaders"
14
- import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../model/texte"
15
- import { UNDEFINED_SESSION } from "./datautil"
16
- import { commonOptions } from "./shared/cli_helpers"
17
- import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue } from "./shared/util"
18
-
19
- const optionsDefinitions = [
20
- ...commonOptions,
21
- {
22
- help: "sessions of textes to retrieve; leave empty for all",
23
- multiple: true,
24
- name: "sessions",
25
- type: String,
26
- },
27
- {
28
- help: "parse and convert documents into JSON (textes only for now, requires format xml)",
29
- name: "parseDocuments",
30
- type: Boolean,
31
- },
32
- {
33
- alias: "F",
34
- help: "formats of documents to retrieve (xml/html/pdf for textes, html/pdf for rapports); leave empty for all",
35
- multiple: true,
36
- name: "formats",
37
- type: String,
38
- },
39
- {
40
- help: "types of documents to retrieve (textes/rapports); leave empty for all",
41
- multiple: true,
42
- name: "types",
43
- type: String,
44
- },
45
- {
46
- help: "force retrieve all documents, even already retrieved ones",
47
- name: "force",
48
- type: Boolean,
49
- },
50
- ]
51
- const options = commandLineArgs(optionsDefinitions)
52
-
53
- const textDecoder = new TextDecoder("utf8")
54
-
55
- async function retrieveDocument (documentUrl: string): Promise<ArrayBuffer | null> {
56
- if (!options.silent) {
57
- console.log(`Retrieving document ${documentUrl}…`)
58
- }
59
-
60
- try {
61
- const response = await fetchWithRetry(documentUrl)
62
- if (!response.ok) {
63
- if (response.status === 404) {
64
- console.warn(`Texte ${documentUrl} not found`)
65
- } else {
66
- console.error(`An error occurred while retrieving texte ${documentUrl}: ${response.status}`)
67
- }
68
- return null
69
- }
70
- return response.arrayBuffer()
71
- } catch (error: any) {
72
- console.error(error.message)
73
- return null
74
- }
75
- }
76
-
77
- async function retrieveTextes (dataDir: string) {
78
- const textesDir = path.join(dataDir, TEXTE_FOLDER)
79
- fs.ensureDirSync(textesDir)
80
-
81
- const originalTextesDir = path.join(textesDir, TEXTE_ORIGINAL_FOLDER)
82
- const transformedTextesDir = path.join(textesDir, TEXTE_TRANSFORMED_FOLDER)
83
- if (options.parseDocuments) {
84
- ensureAndClearDir(transformedTextesDir)
85
- }
86
-
87
- let retrievedTextesCount = 0
88
- const texteUrlsNotFoundOrError = []
89
- const texteUrlsParseError = []
90
-
91
- for (const session of options.sessions) {
92
- for (const { item: texteMetadata } of iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session)) {
93
- const texteDir = path.join(originalTextesDir, `${texteMetadata.session ?? UNDEFINED_SESSION}`, texteMetadata.name)
94
- fs.ensureDirSync(texteDir)
95
-
96
- let exposeDesMotifsContent = null
97
- if (texteMetadata.url_expose_des_motifs) {
98
- if (!options.silent) {
99
- console.log("Retrieving exposé des motifs…")
100
- }
101
-
102
- const exposeDesMotifsPath = path.join(texteDir, `${texteMetadata.name}-expose.html`)
103
-
104
- exposeDesMotifsContent = await retrieveDocument(texteMetadata.url_expose_des_motifs.toString())
105
- if (!exposeDesMotifsContent) {
106
- continue
107
- }
108
- fs.writeFileSync(exposeDesMotifsPath, Buffer.from(exposeDesMotifsContent))
109
- }
110
-
111
- if (isOptionEmptyOrHasValue(options.formats, "xml")) {
112
- const textePath = path.join(texteDir, `${texteMetadata.name}.xml`)
113
- let texteBuffer = null
114
-
115
- if (!options.force && fs.existsSync(textePath)) {
116
- if (!options.silent) {
117
- console.info(`Already retrieved texte ${textePath}…`)
118
- }
119
- } else {
120
- texteBuffer = await retrieveDocument(texteMetadata.url_xml.toString())
121
- if (!texteBuffer) {
122
- texteUrlsNotFoundOrError.push(texteMetadata.url_xml)
123
- continue
124
- }
125
- fs.writeFileSync(textePath, Buffer.from(texteBuffer))
126
- retrievedTextesCount++
127
- }
128
-
129
- if (options.parseDocuments) {
130
- if (!options.silent) {
131
- console.log(`Parsing texte ${texteMetadata.name}.xml…`)
132
- }
133
-
134
- let parsedTexte = null
135
- if (texteBuffer) {
136
- const texteXml = textDecoder.decode(texteBuffer)
137
- parsedTexte = parseTexte(texteXml)
138
- } else {
139
- parsedTexte = await parseTexteFromFile(textePath)
140
- }
141
-
142
- if (!parsedTexte) {
143
- texteUrlsParseError.push(texteMetadata.url_xml)
144
- continue
145
- }
146
-
147
- if (exposeDesMotifsContent) {
148
- if (!options.silent) {
149
- console.log("Parsing exposé des motifs…")
150
- }
151
-
152
- const exposeDesMotifsHtml = textDecoder.decode(exposeDesMotifsContent)
153
- parsedTexte.exposeDesMotifs = parseExposeDesMotifs(exposeDesMotifsHtml)
154
- }
155
-
156
- const transformedTexteDir
157
- = path.join(transformedTextesDir, `${texteMetadata.session ?? UNDEFINED_SESSION}`, texteMetadata.name)
158
- fs.ensureDirSync(transformedTexteDir)
159
-
160
- fs.writeJSONSync(path.join(transformedTexteDir, `${texteMetadata.name}.json`), parsedTexte, { spaces: 2 })
161
- }
162
- }
163
-
164
- if (isOptionEmptyOrHasValue(options.formats, "html")) {
165
- const textePath = path.join(texteDir, `${texteMetadata.name}.html`)
166
-
167
- if (!options.force && fs.existsSync(textePath)) {
168
- if (!options.silent) {
169
- console.info(`Already retrieved texte ${textePath}…`)
170
- }
171
- } else {
172
-
173
- const texteBuffer = await retrieveDocument(texteMetadata.url_html.toString())
174
- if (!texteBuffer) {
175
- texteUrlsNotFoundOrError.push(texteMetadata.url_html)
176
- continue
177
- }
178
- fs.writeFileSync(textePath, Buffer.from(texteBuffer))
179
- retrievedTextesCount++
180
- }
181
- }
182
-
183
- if (isOptionEmptyOrHasValue(options.formats, "pdf")) {
184
- const textePath = path.join(texteDir, `${texteMetadata.name}.pdf`)
185
-
186
- if (!options.force && fs.existsSync(textePath)) {
187
- if (!options.silent) {
188
- console.info(`Already retrieved texte ${textePath}…`)
189
- }
190
- } else {
191
-
192
- const texteBuffer = await retrieveDocument(texteMetadata.url_pdf.toString())
193
- if (!texteBuffer) {
194
- texteUrlsNotFoundOrError.push(texteMetadata.url_pdf)
195
- continue
196
- }
197
- fs.writeFileSync(textePath, Buffer.from(texteBuffer))
198
- retrievedTextesCount++
199
- }
200
- }
201
- }
202
- }
203
-
204
- if (options.verbose) {
205
- console.log(`${retrievedTextesCount} textes retrieved`)
206
- console.log(
207
- `${texteUrlsNotFoundOrError.length} textes failed to be retrieved with URLs ${texteUrlsNotFoundOrError.join(", ")}`
208
- )
209
- if (options.parseDocuments) {
210
- console.log(`${texteUrlsParseError.length} textes failed to be parsed with URLs ${texteUrlsParseError.join(", ")}`)
211
- }
212
- }
213
- }
214
-
215
- async function retrieveRapports (dataDir: string) {
216
- const rapportsDir = path.join(dataDir, RAPPORT_FOLDER)
217
- fs.ensureDirSync(rapportsDir)
218
-
219
- let retrievedRapportsCount = 0
220
- const rapportUrlsNotFoundOrError = []
221
-
222
- for (const session of options.sessions) {
223
- for (const { item: rapportMetadata } of iterLoadSenatDossiersLegislatifsRapportUrls(dataDir, session)) {
224
- const rapportDir = path.join(rapportsDir, `${rapportMetadata.session ?? UNDEFINED_SESSION}`, rapportMetadata.name)
225
- fs.ensureDirSync(rapportDir)
226
-
227
- if (isOptionEmptyOrHasValue(options.formats, "html")) {
228
- const rapportPath = path.join(rapportDir, `${rapportMetadata.name}.html`)
229
-
230
- if (!options.force && fs.existsSync(rapportPath)) {
231
- if (!options.silent) {
232
- console.info(`Already retrieved rapport ${rapportPath}…`)
233
- }
234
- continue
235
- }
236
-
237
- const rapportBuffer = await retrieveDocument(rapportMetadata.url_html.toString())
238
- if (!rapportBuffer) {
239
- rapportUrlsNotFoundOrError.push(rapportMetadata.url_html)
240
- continue
241
- }
242
- fs.writeFileSync(rapportPath, Buffer.from(rapportBuffer))
243
- retrievedRapportsCount++
244
- }
245
-
246
- if (isOptionEmptyOrHasValue(options.formats, "pdf")) {
247
- const rapportPath = path.join(rapportDir, `${rapportMetadata.name}.pdf`)
248
-
249
- if (!options.force && fs.existsSync(rapportPath)) {
250
- if (!options.silent) {
251
- console.info(`Already retrieved rapport ${rapportPath}…`)
252
- }
253
- continue
254
- }
255
-
256
- const rapportBuffer = await retrieveDocument(rapportMetadata.url_pdf.toString())
257
- if (!rapportBuffer) {
258
- rapportUrlsNotFoundOrError.push(rapportMetadata.url_pdf)
259
- continue
260
- }
261
- fs.writeFileSync(rapportPath, Buffer.from(rapportBuffer))
262
- retrievedRapportsCount++
263
- }
264
- }
265
- }
266
-
267
- if (options.verbose) {
268
- console.log(`${retrievedRapportsCount} rapports retrieved`)
269
- console.log(
270
- `${rapportUrlsNotFoundOrError.length} rapports failed with URLs ${rapportUrlsNotFoundOrError.join(", ")}`
271
- )
272
- }
273
- }
274
-
275
- async function main() {
276
- const dataDir = options.dataDir
277
- assert(dataDir, "Missing argument: data directory")
278
-
279
- console.time("documents processing time")
280
-
281
- if (isOptionEmptyOrHasValue(options.types, "textes")) {
282
- await retrieveTextes(dataDir)
283
- }
284
- if (isOptionEmptyOrHasValue(options.types, "rapports")) {
285
- await retrieveRapports(dataDir)
286
- }
287
-
288
- if (!options.silent) {
289
- console.timeEnd("documents processing time")
290
- }
291
- }
292
-
293
- main()
294
- .then(() => process.exit(0))
295
- .catch((error) => {
296
- console.log(error)
297
- process.exit(1)
298
- })
@@ -1 +0,0 @@
1
- export {};