@tricoteuses/senat 1.1.2 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. package/README.md +53 -15
  2. package/lib/aggregates.d.ts +4 -6
  3. package/lib/aggregates.mjs +908 -780
  4. package/lib/aggregates.ts +8 -122
  5. package/lib/data/legislatures.json +38 -0
  6. package/lib/databases.d.ts +9 -0
  7. package/lib/databases.js +41 -11
  8. package/lib/databases.mjs +32 -1
  9. package/lib/databases.ts +40 -1
  10. package/lib/fields.mjs +64 -18
  11. package/lib/index.d.ts +5 -7
  12. package/lib/index.js +8 -112
  13. package/lib/index.mjs +4 -7
  14. package/lib/index.ts +9 -30
  15. package/lib/inserters.d.ts +0 -4
  16. package/lib/inserters.mjs +461 -337
  17. package/lib/inserters.ts +0 -24
  18. package/lib/legislatures.json +38 -0
  19. package/lib/loaders.d.ts +64 -0
  20. package/lib/loaders.js +682 -0
  21. package/lib/loaders.mjs +158 -0
  22. package/lib/loaders.ts +271 -0
  23. package/lib/model/ameli.d.ts +105 -4
  24. package/lib/model/ameli.js +20 -160
  25. package/lib/model/ameli.mjs +84 -57
  26. package/lib/model/ameli.ts +94 -80
  27. package/lib/model/debats.d.ts +0 -4
  28. package/lib/model/debats.js +1 -122
  29. package/lib/model/debats.mjs +1 -43
  30. package/lib/model/debats.ts +0 -68
  31. package/lib/model/dosleg.d.ts +227 -29
  32. package/lib/model/dosleg.js +92 -832
  33. package/lib/model/dosleg.mjs +196 -337
  34. package/lib/model/dosleg.ts +213 -531
  35. package/lib/model/index.d.ts +4 -5
  36. package/lib/model/index.js +14 -15
  37. package/lib/model/index.mjs +4 -5
  38. package/lib/model/index.ts +8 -5
  39. package/lib/model/questions.d.ts +88 -2
  40. package/lib/model/questions.js +40 -45
  41. package/lib/model/questions.mjs +71 -8
  42. package/lib/model/questions.ts +90 -11
  43. package/lib/model/sens.d.ts +524 -2
  44. package/lib/model/sens.js +172 -49
  45. package/lib/model/sens.mjs +415 -9
  46. package/lib/model/sens.ts +514 -16
  47. package/lib/model/texte.d.ts +7 -0
  48. package/lib/model/texte.js +321 -0
  49. package/lib/model/texte.mjs +208 -0
  50. package/lib/model/texte.ts +229 -0
  51. package/lib/model/util.d.ts +7 -1
  52. package/lib/model/util.js +37 -53
  53. package/lib/model/util.mjs +19 -10
  54. package/lib/model/util.ts +30 -14
  55. package/lib/raw_types/ameli.d.ts +912 -538
  56. package/lib/raw_types/ameli.js +1 -39
  57. package/lib/raw_types/ameli.mjs +4 -1
  58. package/lib/raw_types/ameli.ts +947 -597
  59. package/lib/raw_types/debats.d.ts +204 -124
  60. package/lib/raw_types/debats.js +1 -18
  61. package/lib/raw_types/debats.mjs +4 -1
  62. package/lib/raw_types/debats.ts +218 -141
  63. package/lib/raw_types/dosleg.d.ts +3533 -2024
  64. package/lib/raw_types/dosleg.js +1 -92
  65. package/lib/raw_types/dosleg.mjs +4 -1
  66. package/lib/raw_types/dosleg.ts +3621 -2189
  67. package/lib/raw_types/questions.d.ts +404 -220
  68. package/lib/raw_types/questions.js +1 -18
  69. package/lib/raw_types/questions.mjs +4 -1
  70. package/lib/raw_types/questions.ts +404 -226
  71. package/lib/raw_types/sens.d.ts +4391 -2706
  72. package/lib/raw_types/sens.js +1 -108
  73. package/lib/raw_types/sens.mjs +4 -1
  74. package/lib/raw_types/sens.ts +4495 -2903
  75. package/lib/raw_types_kysely/ameli.d.ts +6 -0
  76. package/lib/raw_types_kysely/ameli.mjs +7 -0
  77. package/lib/raw_types_kysely/ameli.ts +6 -0
  78. package/lib/raw_types_kysely/debats.d.ts +6 -0
  79. package/lib/raw_types_kysely/debats.mjs +7 -0
  80. package/lib/raw_types_kysely/debats.ts +6 -0
  81. package/lib/raw_types_kysely/dosleg.d.ts +6 -0
  82. package/lib/raw_types_kysely/dosleg.mjs +7 -0
  83. package/lib/raw_types_kysely/dosleg.ts +6 -0
  84. package/lib/raw_types_kysely/questions.d.ts +6 -0
  85. package/lib/raw_types_kysely/questions.mjs +7 -0
  86. package/lib/raw_types_kysely/questions.ts +6 -0
  87. package/lib/raw_types_kysely/sens.d.ts +6 -0
  88. package/lib/raw_types_kysely/sens.mjs +7 -0
  89. package/lib/raw_types_kysely/sens.ts +6 -0
  90. package/lib/raw_types_kysely/texte.d.ts +45 -0
  91. package/lib/raw_types_kysely/texte.mjs +7 -0
  92. package/lib/raw_types_kysely/texte.ts +53 -0
  93. package/lib/raw_types_schemats/ameli.d.ts +541 -0
  94. package/lib/raw_types_schemats/ameli.js +45 -0
  95. package/lib/raw_types_schemats/ameli.mjs +2 -0
  96. package/lib/raw_types_schemats/ameli.ts +601 -0
  97. package/lib/raw_types_schemats/debats.d.ts +127 -0
  98. package/lib/raw_types_schemats/debats.js +24 -0
  99. package/lib/raw_types_schemats/debats.mjs +2 -0
  100. package/lib/raw_types_schemats/debats.ts +145 -0
  101. package/lib/raw_types_schemats/dosleg.d.ts +2029 -0
  102. package/lib/raw_types_schemats/dosleg.js +98 -0
  103. package/lib/raw_types_schemats/dosleg.mjs +2 -0
  104. package/lib/raw_types_schemats/dosleg.ts +2195 -0
  105. package/lib/raw_types_schemats/questions.d.ts +233 -0
  106. package/lib/raw_types_schemats/questions.js +24 -0
  107. package/lib/raw_types_schemats/questions.mjs +2 -0
  108. package/lib/raw_types_schemats/questions.ts +251 -0
  109. package/lib/raw_types_schemats/sens.d.ts +2709 -0
  110. package/lib/raw_types_schemats/sens.js +114 -0
  111. package/lib/raw_types_schemats/sens.mjs +2 -0
  112. package/lib/raw_types_schemats/sens.ts +2907 -0
  113. package/lib/scripts/convert_data.js +574 -215
  114. package/lib/scripts/convert_data.mjs +134 -99
  115. package/lib/scripts/convert_data.ts +173 -112
  116. package/lib/scripts/datautil.d.ts +5 -0
  117. package/lib/scripts/datautil.js +64 -0
  118. package/lib/scripts/datautil.mjs +16 -0
  119. package/lib/scripts/datautil.ts +19 -0
  120. package/lib/scripts/parse_textes.js +132 -0
  121. package/lib/scripts/parse_textes.mjs +46 -0
  122. package/lib/scripts/parse_textes.ts +65 -0
  123. package/lib/scripts/retrieve_documents.d.ts +1 -0
  124. package/lib/scripts/retrieve_documents.js +521 -0
  125. package/lib/scripts/retrieve_documents.mjs +249 -0
  126. package/lib/scripts/retrieve_documents.ts +298 -0
  127. package/lib/scripts/retrieve_open_data.js +77 -69
  128. package/lib/scripts/retrieve_open_data.mjs +48 -49
  129. package/lib/scripts/retrieve_open_data.ts +74 -58
  130. package/lib/scripts/retrieve_senateurs_photos.js +45 -63
  131. package/lib/scripts/retrieve_senateurs_photos.mjs +4 -21
  132. package/lib/scripts/retrieve_senateurs_photos.ts +6 -29
  133. package/lib/scripts/retrieve_textes.mjs +325 -74
  134. package/lib/scripts/retrieve_textes.ts +111 -63
  135. package/lib/scripts/shared/cli_helpers.d.ts +44 -0
  136. package/lib/scripts/shared/cli_helpers.js +35 -0
  137. package/lib/scripts/shared/cli_helpers.ts +36 -0
  138. package/lib/scripts/shared/util.d.ts +3 -0
  139. package/lib/scripts/shared/util.js +102 -0
  140. package/lib/scripts/shared/util.ts +33 -0
  141. package/lib/src/aggregates.d.ts +52 -0
  142. package/lib/src/aggregates.mjs +726 -0
  143. package/lib/src/config.d.ts +2 -0
  144. package/lib/src/config.mjs +16 -0
  145. package/lib/src/databases.d.ts +18 -0
  146. package/lib/src/databases.mjs +55 -0
  147. package/lib/src/datasets.d.ts +28 -0
  148. package/lib/src/datasets.mjs +78 -0
  149. package/lib/src/fields.d.ts +10 -0
  150. package/lib/src/fields.mjs +22 -0
  151. package/lib/src/index.d.ts +8 -0
  152. package/lib/src/index.mjs +7 -0
  153. package/lib/src/inserters.d.ts +98 -0
  154. package/lib/src/inserters.mjs +360 -0
  155. package/lib/src/loaders.d.ts +36 -0
  156. package/lib/src/loaders.mjs +107 -0
  157. package/lib/src/model/ameli.d.ts +4 -0
  158. package/lib/src/model/ameli.js +57 -0
  159. package/lib/src/model/debats.d.ts +4 -0
  160. package/lib/src/model/debats.js +43 -0
  161. package/lib/src/model/dosleg.d.ts +197 -0
  162. package/lib/src/model/dosleg.js +169 -0
  163. package/lib/src/model/index.d.ts +4 -0
  164. package/lib/src/model/index.js +4 -0
  165. package/lib/src/model/questions.d.ts +89 -0
  166. package/lib/src/model/questions.js +76 -0
  167. package/lib/src/model/sens.d.ts +390 -0
  168. package/lib/src/model/sens.js +339 -0
  169. package/lib/src/model/texte.d.ts +7 -0
  170. package/lib/src/model/texte.js +183 -0
  171. package/lib/src/raw_types_kysely/ameli.d.ts +915 -0
  172. package/lib/src/raw_types_kysely/ameli.js +5 -0
  173. package/lib/src/raw_types_kysely/debats.d.ts +207 -0
  174. package/lib/src/raw_types_kysely/debats.js +5 -0
  175. package/lib/src/raw_types_kysely/dosleg.d.ts +3532 -0
  176. package/lib/src/raw_types_kysely/dosleg.js +5 -0
  177. package/lib/src/raw_types_kysely/questions.d.ts +414 -0
  178. package/lib/src/raw_types_kysely/questions.js +5 -0
  179. package/lib/src/raw_types_kysely/sens.d.ts +4394 -0
  180. package/lib/src/raw_types_kysely/sens.js +5 -0
  181. package/lib/src/raw_types_schemats/ameli.d.ts +541 -0
  182. package/lib/src/raw_types_schemats/ameli.js +2 -0
  183. package/lib/src/raw_types_schemats/debats.d.ts +127 -0
  184. package/lib/src/raw_types_schemats/debats.js +2 -0
  185. package/lib/src/raw_types_schemats/dosleg.d.ts +2027 -0
  186. package/lib/src/raw_types_schemats/dosleg.js +2 -0
  187. package/lib/src/raw_types_schemats/questions.d.ts +231 -0
  188. package/lib/src/raw_types_schemats/questions.js +2 -0
  189. package/lib/src/raw_types_schemats/sens.d.ts +2709 -0
  190. package/lib/src/raw_types_schemats/sens.js +2 -0
  191. package/lib/src/scripts/convert_data.d.ts +1 -0
  192. package/lib/src/scripts/convert_data.js +95 -0
  193. package/lib/src/scripts/datautil.d.ts +5 -0
  194. package/lib/src/scripts/datautil.js +16 -0
  195. package/lib/src/scripts/parse_textes.d.ts +1 -0
  196. package/lib/src/scripts/parse_textes.js +47 -0
  197. package/lib/src/scripts/retrieve_documents.d.ts +1 -0
  198. package/lib/src/scripts/retrieve_documents.js +258 -0
  199. package/lib/src/scripts/retrieve_open_data.d.ts +1 -0
  200. package/lib/src/scripts/retrieve_open_data.js +214 -0
  201. package/lib/src/scripts/retrieve_senateurs_photos.d.ts +1 -0
  202. package/lib/src/scripts/retrieve_senateurs_photos.js +147 -0
  203. package/lib/src/scripts/shared/cli_helpers.d.ts +44 -0
  204. package/lib/src/scripts/shared/cli_helpers.js +32 -0
  205. package/lib/src/scripts/shared/util.d.ts +3 -0
  206. package/lib/src/scripts/shared/util.js +28 -0
  207. package/lib/src/strings.d.ts +1 -0
  208. package/lib/src/strings.mjs +18 -0
  209. package/lib/src/types/ameli.d.ts +10 -0
  210. package/lib/src/types/ameli.js +13 -0
  211. package/lib/src/types/debats.d.ts +4 -0
  212. package/lib/src/types/debats.js +2 -0
  213. package/lib/src/types/dosleg.d.ts +98 -0
  214. package/lib/src/types/dosleg.js +151 -0
  215. package/lib/src/types/questions.d.ts +2 -0
  216. package/lib/src/types/questions.js +1 -0
  217. package/lib/src/types/sens.d.ts +10 -0
  218. package/lib/src/types/sens.js +1 -0
  219. package/lib/src/types/sessions.d.ts +42 -0
  220. package/lib/src/types/sessions.js +43 -0
  221. package/lib/src/types/texte.d.ts +61 -0
  222. package/lib/src/types/texte.js +16 -0
  223. package/lib/src/validators/config.d.ts +1 -0
  224. package/lib/src/validators/config.js +54 -0
  225. package/lib/src/validators/senat.d.ts +0 -0
  226. package/lib/src/validators/senat.js +24 -0
  227. package/lib/types/ameli.d.ts +1 -1
  228. package/lib/types/ameli.js +2 -2
  229. package/lib/types/ameli.ts +1 -1
  230. package/lib/types/debats.d.ts +1 -1
  231. package/lib/types/debats.js +2 -2
  232. package/lib/types/debats.ts +1 -1
  233. package/lib/types/dosleg.d.ts +2 -2
  234. package/lib/types/dosleg.js +2 -2
  235. package/lib/types/dosleg.ts +2 -2
  236. package/lib/types/questions.d.ts +1 -1
  237. package/lib/types/questions.js +2 -2
  238. package/lib/types/questions.ts +1 -1
  239. package/lib/types/sens.d.ts +1 -3
  240. package/lib/types/sens.js +2 -5
  241. package/lib/types/sens.mjs +1 -23
  242. package/lib/types/sens.ts +1 -25
  243. package/lib/types/sessions.d.ts +42 -0
  244. package/lib/types/sessions.js +51 -0
  245. package/lib/types/sessions.mjs +43 -0
  246. package/lib/types/sessions.ts +42 -0
  247. package/lib/types/texte.d.ts +65 -0
  248. package/lib/types/texte.js +24 -0
  249. package/lib/types/texte.mjs +16 -0
  250. package/lib/types/texte.ts +76 -0
  251. package/package.json +35 -10
  252. package/lib/aggregates.js +0 -1122
  253. package/lib/fields.js +0 -68
  254. package/lib/inserters.js +0 -518
  255. package/lib/scripts/fix_db.js +0 -144
  256. package/lib/scripts/fix_db.mjs +0 -64
  257. package/lib/scripts/fix_db.ts +0 -75
  258. package/lib/scripts/retrieve_textes.js +0 -179
  259. /package/lib/scripts/{fix_db.d.ts → parse_textes.d.ts} +0 -0
@@ -1,116 +1,89 @@
1
- import fs from 'fs-extra';
2
- import path from 'path';
3
- import commandLineArgs from 'command-line-args';
4
- import { datasets, EnabledDatasets, getEnabledDatasets, } from '../datasets';
5
- import { Aggregator, allFollows, getAllLois, getAllQuestions, getAllSens } from '../model';
6
- import { insertLoiReferences, insertQuestionReferences, insertSenReferences } from '../inserters';
1
+ import assert from "assert";
2
+ import commandLineArgs from "command-line-args";
3
+ import fs from "fs-extra";
4
+ import path from "path";
5
+ import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
6
+ import { DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER, } from "../loaders";
7
+ import { findAllAmendements, findAllCirconscriptions, findAllLois, findAllOrganismes, findAllQuestions, findAllSens, } from "../model";
8
+ import { findSenatRapportUrls, findSenatTexteUrls } from "../model/dosleg";
9
+ import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, UNDEFINED_SESSION } from "./datautil";
10
+ import { commonOptions } from "./shared/cli_helpers";
11
+ import { ensureAndClearDir } from "./shared/util";
7
12
  const optionsDefinitions = [
8
- {
9
- alias: 'k',
10
- defaultValue: ['All'],
11
- help: 'categories of datasets to reorganize',
12
- multiple: true,
13
- name: 'categories',
14
- type: String,
15
- },
16
- {
17
- alias: 's',
18
- help: 'don\'t log anything',
19
- name: 'silent',
20
- type: Boolean,
21
- },
22
- {
23
- alias: "v",
24
- help: "verbose logs",
25
- name: "verbose",
26
- type: Boolean,
27
- },
28
- {
29
- defaultOption: true,
30
- help: 'directory containing Sénat open data files',
31
- name: 'dataDir',
32
- type: String,
33
- },
13
+ ...commonOptions,
34
14
  ];
35
15
  const options = commandLineArgs(optionsDefinitions);
36
- const SIGNET_STRUCTURE_REGEXP = /^(?<type>[a-z]+)(?<session>\d{2,4})-?(?<numTexte>\d+)?/;
37
- function ensureAndClearDir(path) {
38
- if (!fs.existsSync(path)) {
39
- fs.mkdirSync(path);
40
- }
41
- else {
42
- fs.emptyDirSync(path);
43
- }
44
- }
16
+ const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/";
17
+ const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
18
+ const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/";
19
+ const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
45
20
  async function convertData() {
46
21
  const enabledDatasets = getEnabledDatasets(options.categories);
47
22
  const dataDir = options.dataDir;
48
- if (enabledDatasets & EnabledDatasets.DosLeg) {
49
- const dataset = datasets.dosleg;
23
+ assert(dataDir, "Missing argument: data directory");
24
+ console.time("data transformation time");
25
+ if (enabledDatasets & EnabledDatasets.Ameli) {
26
+ const dataset = datasets.ameli;
50
27
  if (!options.silent) {
51
28
  console.log(`Converting database ${dataset.database} data into files…`);
52
29
  }
53
- const lois = await getAllLois();
54
- const aggregator = new Aggregator(new Set(allFollows));
55
- for (const loi of lois) {
56
- aggregator.addLoi(loi);
57
- }
58
- if (options.verbose) {
59
- console.log("Starting getting all the data…");
60
- console.time("get all data");
30
+ const ameliReorganizedRootDir = path.join(dataDir, dataset.database);
31
+ ensureAndClearDir(ameliReorganizedRootDir);
32
+ for await (const amendement of findAllAmendements()) {
33
+ if (options.verbose) {
34
+ console.log(`Converting ${amendement.numero} file…`);
35
+ }
36
+ const session = String(amendement.session) || UNDEFINED_SESSION;
37
+ const signetDossierLegislatif = amendement.signet_dossier_legislatif
38
+ || `${amendement.nature_texte}-${amendement.numero_texte}`.toLowerCase();
39
+ const ameliReorganizedDir = path.join(ameliReorganizedRootDir, session, signetDossierLegislatif);
40
+ fs.ensureDirSync(ameliReorganizedDir);
41
+ const amendementFileName = `${amendement.numero}.json`;
42
+ fs.writeJSONSync(path.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 });
61
43
  }
62
- await aggregator.getAll();
63
- if (options.verbose) {
64
- console.timeEnd("get all data");
44
+ }
45
+ if (enabledDatasets & EnabledDatasets.DosLeg) {
46
+ const dataset = datasets.dosleg;
47
+ if (!options.silent) {
48
+ console.log(`Converting database ${dataset.database} data into files…`);
65
49
  }
66
- const allData = aggregator.toJson();
67
- const loiReorganizedRootDir = path.join(dataDir, dataset.database);
68
- ensureAndClearDir(loiReorganizedRootDir);
69
- for (const loi of lois) {
50
+ const doslegReorganizedRootDir = path.join(dataDir, dataset.database);
51
+ const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER);
52
+ ensureAndClearDir(doslegReorganizedRootDir);
53
+ ensureAndClearDir(dossiersReorganizedDir);
54
+ for await (const loi of findAllLois()) {
70
55
  if (options.verbose) {
71
56
  console.log(`Converting ${loi.signet} file…`);
72
57
  }
73
- insertLoiReferences(loi, allData, {});
74
- let loiFileName = `${loi.loicod}.json`;
75
- let loiReorganizedDir = path.join(loiReorganizedRootDir, loi.typloicod);
76
- if (loi.signet) {
77
- loiFileName = `${loi.signet}.json`;
78
- let signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups;
79
- if (signetParts) {
80
- const { session } = signetParts;
81
- loiReorganizedDir = path.join(loiReorganizedDir, session);
82
- }
83
- else {
84
- loiReorganizedDir = path.join(loiReorganizedDir, "non defini");
85
- }
86
- fs.ensureDirSync(loiReorganizedDir);
58
+ let loiReorganizedDir = path.join(dossiersReorganizedDir, UNDEFINED_SESSION);
59
+ const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups;
60
+ if (signetParts) {
61
+ const { session } = signetParts;
62
+ const formattedSession = formatToFourDigitSession(session);
63
+ loiReorganizedDir = path.join(dossiersReorganizedDir, formattedSession);
87
64
  }
65
+ fs.ensureDirSync(loiReorganizedDir);
66
+ const loiFileName = `${loi.signet}.json`;
88
67
  fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, { spaces: 2 });
89
68
  }
69
+ await convertTexteUrls(dataDir);
70
+ await convertRapportUrls(dataDir);
90
71
  }
91
72
  if (enabledDatasets & EnabledDatasets.Questions) {
92
73
  const dataset = datasets.questions;
93
74
  if (!options.silent) {
94
75
  console.log(`Converting database ${dataset.database} data into files…`);
95
76
  }
96
- const questions = await getAllQuestions();
97
- const aggregator = new Aggregator(new Set(allFollows));
98
- for (const question of questions) {
99
- aggregator.addQuestion(question);
100
- }
101
- await aggregator.getAll();
102
- const allData = aggregator.toJson();
103
- const questionReorganizedRootDir = path.join(dataDir, dataset.database);
104
- ensureAndClearDir(questionReorganizedRootDir);
105
- for (const question of questions) {
77
+ const questionsReorganizedRootDir = path.join(dataDir, dataset.database);
78
+ ensureAndClearDir(questionsReorganizedRootDir);
79
+ for await (const question of findAllQuestions()) {
106
80
  if (options.verbose) {
107
- console.log(`Converting ${question.id} file…`);
81
+ console.log(`Converting ${question.reference} file…`);
108
82
  }
109
- insertQuestionReferences(question, allData, {});
110
83
  const legislature = question.legislature ? question.legislature : 0;
111
- const questionReorganizedDir = path.join(questionReorganizedRootDir, String(legislature));
84
+ const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature));
112
85
  fs.ensureDirSync(questionReorganizedDir);
113
- let questionFileName = `${question.reference}.json`;
86
+ const questionFileName = `${question.reference}.json`;
114
87
  fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
115
88
  }
116
89
  }
@@ -119,24 +92,86 @@ async function convertData() {
119
92
  if (!options.silent) {
120
93
  console.log(`Converting database ${dataset.database} data into files…`);
121
94
  }
122
- const sens = await getAllSens();
123
- const aggregator = new Aggregator(new Set(allFollows));
124
- for (const sen of sens) {
125
- aggregator.addSen(sen);
95
+ const sensReorganizedRootDir = path.join(dataDir, dataset.database);
96
+ const senateursReorganizedDir = path.join(sensReorganizedRootDir, SENS_SENATEURS_FOLDER);
97
+ const circonscriptionsReorganizedDir = path.join(sensReorganizedRootDir, SENS_CIRCONSCRIPTIONS_FOLDER);
98
+ const organismesReorganizedDir = path.join(sensReorganizedRootDir, SENS_ORGANISMES_FOLDER);
99
+ ensureAndClearDir(sensReorganizedRootDir);
100
+ ensureAndClearDir(senateursReorganizedDir);
101
+ ensureAndClearDir(circonscriptionsReorganizedDir);
102
+ ensureAndClearDir(organismesReorganizedDir);
103
+ for await (const sen of findAllSens()) {
104
+ if (options.verbose) {
105
+ console.log(`Converting ${sen.matricule} file…`);
106
+ }
107
+ const senFileName = `${sen.matricule}.json`;
108
+ fs.writeJSONSync(path.join(senateursReorganizedDir, senFileName), sen, { spaces: 2 });
109
+ }
110
+ for await (const circonscription of findAllCirconscriptions()) {
111
+ if (options.verbose) {
112
+ console.log(`Converting ${circonscription.identifiant} file…`);
113
+ }
114
+ const circonscriptionFileName = `${circonscription.identifiant}.json`;
115
+ fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 });
126
116
  }
127
- await aggregator.getAll();
128
- const allData = aggregator.toJson();
129
- const senReorganizedRootDir = path.join(dataDir, dataset.database);
130
- ensureAndClearDir(senReorganizedRootDir);
131
- for (const sen of sens) {
117
+ for await (const organisme of findAllOrganismes()) {
132
118
  if (options.verbose) {
133
- console.log(`Converting ${sen.senmat} file…`);
119
+ console.log(`Converting ${organisme.code} file…`);
134
120
  }
135
- insertSenReferences(sen, allData, {});
136
- let senFileName = `${sen.senmat}.json`;
137
- fs.writeJSONSync(path.join(senReorganizedRootDir, senFileName), sen, { spaces: 2 });
121
+ const organismeFileName = `${organisme.code}.json`;
122
+ fs.writeJSONSync(path.join(organismesReorganizedDir, organismeFileName), organisme, { spaces: 2 });
138
123
  }
139
124
  }
125
+ if (!options.silent) {
126
+ console.timeEnd("data transformation time");
127
+ }
128
+ }
129
+ async function convertTexteUrls(dataDir) {
130
+ const textesDir = path.join(dataDir, TEXTE_FOLDER);
131
+ fs.ensureDirSync(textesDir);
132
+ const originalTextesDir = path.join(textesDir, TEXTE_ORIGINAL_FOLDER);
133
+ for await (const texte of findSenatTexteUrls(options.sessions)) {
134
+ const texteName = path.parse(texte.url).name;
135
+ const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName);
136
+ fs.ensureDirSync(texteDir);
137
+ const metadata = {
138
+ name: texteName,
139
+ session: texte.session,
140
+ url_expose_des_motifs: texte.hasExposeDesMotifs ?
141
+ new URL(`${texteName}-expose.html`, SENAT_EXPOSE_DES_MOTIFS_BASE_URL) : undefined,
142
+ url_xml: new URL(`${texteName}.akn.xml`, SENAT_TEXTE_XML_BASE_URL),
143
+ url_html: new URL(`${texteName}.html`, SENAT_TEXTE_BASE_URL),
144
+ url_pdf: new URL(`${texteName}.pdf`, SENAT_TEXTE_BASE_URL),
145
+ };
146
+ fs.writeJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, { spaces: 2 });
147
+ }
148
+ }
149
+ async function convertRapportUrls(dataDir) {
150
+ const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
151
+ fs.ensureDirSync(rapportsDir);
152
+ for await (const rapport of findSenatRapportUrls(options.sessions)) {
153
+ const parsedRapportUrl = path.parse(rapport.url);
154
+ const rapportName = parsedRapportUrl.name;
155
+ const rapportDir = path.join(rapportsDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName);
156
+ fs.ensureDirSync(rapportDir);
157
+ const rapportHtmlUrlBase = `${rapportName}_mono.html`;
158
+ const rapportHtmlUrl = path.format({
159
+ dir: parsedRapportUrl.dir,
160
+ base: rapportHtmlUrlBase,
161
+ });
162
+ const rapportPdfUrlBase = `${rapportName}1.pdf`;
163
+ const rapportPdfUrl = path.format({
164
+ dir: parsedRapportUrl.dir,
165
+ base: rapportPdfUrlBase,
166
+ });
167
+ const metadata = {
168
+ name: rapportName,
169
+ session: rapport.session,
170
+ url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL),
171
+ url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL),
172
+ };
173
+ fs.writeJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, { spaces: 2 });
174
+ }
140
175
  }
141
176
  convertData()
142
177
  .then(() => process.exit(0))
@@ -1,113 +1,108 @@
1
- import fs from 'fs-extra'
2
- import path from 'path'
3
- import commandLineArgs from 'command-line-args'
1
+ import assert from "assert"
2
+ import commandLineArgs from "command-line-args"
3
+ import fs from "fs-extra"
4
+ import path from "path"
4
5
 
5
- import { datasets, EnabledDatasets, getEnabledDatasets, } from '../datasets'
6
+ import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets"
6
7
  import {
7
- Aggregator,
8
- allFollows,
9
- getAllLois,
10
- getAllQuestions,
11
- getAllSens
12
- } from '../model'
8
+ DOCUMENT_METADATA_FILE,
9
+ DOSLEG_DOSSIERS_FOLDER,
10
+ RAPPORT_FOLDER,
11
+ RapportMetadata,
12
+ SENS_CIRCONSCRIPTIONS_FOLDER,
13
+ SENS_ORGANISMES_FOLDER,
14
+ SENS_SENATEURS_FOLDER,
15
+ TEXTE_FOLDER,
16
+ TEXTE_ORIGINAL_FOLDER,
17
+ TexteMetadata,
18
+ } from "../loaders"
13
19
  import {
14
- insertLoiReferences,
15
- insertQuestionReferences,
16
- insertSenReferences
17
- } from '../inserters'
20
+ findAllAmendements,
21
+ findAllCirconscriptions,
22
+ findAllLois,
23
+ findAllOrganismes,
24
+ findAllQuestions,
25
+ findAllSens,
26
+ } from "../model"
27
+ import { findSenatRapportUrls, findSenatTexteUrls } from "../model/dosleg"
28
+ import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, UNDEFINED_SESSION } from "./datautil"
29
+ import { commonOptions } from "./shared/cli_helpers"
30
+ import { ensureAndClearDir } from "./shared/util"
18
31
 
19
32
  const optionsDefinitions = [
20
- {
21
- alias: 'k',
22
- defaultValue: ['All'],
23
- help: 'categories of datasets to reorganize',
24
- multiple: true,
25
- name: 'categories',
26
- type: String,
27
- },
28
- {
29
- alias: 's',
30
- help: 'don\'t log anything',
31
- name: 'silent',
32
- type: Boolean,
33
- },
34
- {
35
- alias: "v",
36
- help: "verbose logs",
37
- name: "verbose",
38
- type: Boolean,
39
- },
40
- {
41
- defaultOption: true,
42
- help: 'directory containing Sénat open data files',
43
- name: 'dataDir',
44
- type: String,
45
- },
33
+ ...commonOptions,
46
34
  ]
47
35
  const options = commandLineArgs(optionsDefinitions)
48
36
 
49
- const SIGNET_STRUCTURE_REGEXP = /^(?<type>[a-z]+)(?<session>\d{2,4})-?(?<numTexte>\d+)?/
37
+ const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/"
38
+ const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/"
39
+ const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/"
40
+ const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/"
50
41
 
51
- function ensureAndClearDir(path: string) {
52
- if (!fs.existsSync(path)) {
53
- fs.mkdirSync(path)
54
- } else {
55
- fs.emptyDirSync(path)
56
- }
57
- }
58
-
59
- async function convertData() {
42
+ async function convertData () {
60
43
  const enabledDatasets = getEnabledDatasets(options.categories)
61
44
  const dataDir: string = options.dataDir
45
+ assert(dataDir, "Missing argument: data directory")
62
46
 
63
- if (enabledDatasets & EnabledDatasets.DosLeg) {
64
- const dataset = datasets.dosleg
47
+ console.time("data transformation time")
48
+
49
+ if (enabledDatasets & EnabledDatasets.Ameli) {
50
+ const dataset = datasets.ameli
65
51
 
66
52
  if (!options.silent) {
67
53
  console.log(`Converting database ${dataset.database} data into files…`)
68
54
  }
69
55
 
70
- const lois = await getAllLois()
71
- const aggregator = new Aggregator(new Set(allFollows))
72
- for (const loi of lois) {
73
- aggregator.addLoi(loi)
74
- }
75
- if (options.verbose) {
76
- console.log("Starting getting all the data…")
77
- console.time("get all data")
56
+ const ameliReorganizedRootDir = path.join(dataDir, dataset.database)
57
+ ensureAndClearDir(ameliReorganizedRootDir)
58
+
59
+ for await (const amendement of findAllAmendements()) {
60
+ if (options.verbose) {
61
+ console.log(`Converting ${amendement.numero} file…`)
62
+ }
63
+
64
+ const session = String(amendement.session) || UNDEFINED_SESSION
65
+ const signetDossierLegislatif = amendement.signet_dossier_legislatif
66
+ || `${amendement.nature_texte}-${amendement.numero_texte}`.toLowerCase()
67
+ const ameliReorganizedDir = path.join(ameliReorganizedRootDir, session, signetDossierLegislatif)
68
+ fs.ensureDirSync(ameliReorganizedDir)
69
+ const amendementFileName = `${amendement.numero}.json`
70
+ fs.writeJSONSync(path.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 })
78
71
  }
79
- await aggregator.getAll()
80
- if (options.verbose) {
81
- console.timeEnd("get all data")
72
+ }
73
+
74
+ if (enabledDatasets & EnabledDatasets.DosLeg) {
75
+ const dataset = datasets.dosleg
76
+
77
+ if (!options.silent) {
78
+ console.log(`Converting database ${dataset.database} data into files…`)
82
79
  }
83
- const allData = aggregator.toJson()
84
80
 
85
- const loiReorganizedRootDir = path.join(dataDir, dataset.database)
86
- ensureAndClearDir(loiReorganizedRootDir)
81
+ const doslegReorganizedRootDir = path.join(dataDir, dataset.database)
82
+ const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER)
83
+ ensureAndClearDir(doslegReorganizedRootDir)
84
+ ensureAndClearDir(dossiersReorganizedDir)
87
85
 
88
- for (const loi of lois) {
86
+ for await (const loi of findAllLois()) {
89
87
  if (options.verbose) {
90
88
  console.log(`Converting ${loi.signet} file…`)
91
89
  }
92
90
 
93
- insertLoiReferences(loi, allData, {})
94
-
95
- let loiFileName = `${loi.loicod}.json`
96
- let loiReorganizedDir = path.join(loiReorganizedRootDir, loi.typloicod)
97
- if (loi.signet) {
98
- loiFileName = `${loi.signet}.json`
99
- let signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups
100
- if (signetParts) {
101
- const { session} = signetParts
102
- loiReorganizedDir = path.join(loiReorganizedDir, session)
103
- } else {
104
- loiReorganizedDir = path.join(loiReorganizedDir, "non defini")
105
- }
106
- fs.ensureDirSync(loiReorganizedDir)
91
+ let loiReorganizedDir = path.join(dossiersReorganizedDir, UNDEFINED_SESSION)
92
+ const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups
93
+ if (signetParts) {
94
+ const { session } = signetParts
95
+ const formattedSession = formatToFourDigitSession(session)
96
+ loiReorganizedDir = path.join(dossiersReorganizedDir, formattedSession)
107
97
  }
98
+ fs.ensureDirSync(loiReorganizedDir)
108
99
 
109
- fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, {spaces: 2})
100
+ const loiFileName = `${loi.signet}.json`
101
+ fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, { spaces: 2 })
110
102
  }
103
+
104
+ await convertTexteUrls(dataDir)
105
+ await convertRapportUrls(dataDir)
111
106
  }
112
107
 
113
108
  if (enabledDatasets & EnabledDatasets.Questions) {
@@ -117,29 +112,19 @@ async function convertData() {
117
112
  console.log(`Converting database ${dataset.database} data into files…`)
118
113
  }
119
114
 
120
- const questions = await getAllQuestions()
121
- const aggregator = new Aggregator(new Set(allFollows))
122
- for (const question of questions) {
123
- aggregator.addQuestion(question)
124
- }
125
- await aggregator.getAll()
126
- const allData = aggregator.toJson()
115
+ const questionsReorganizedRootDir = path.join(dataDir, dataset.database)
116
+ ensureAndClearDir(questionsReorganizedRootDir)
127
117
 
128
- const questionReorganizedRootDir = path.join(dataDir, dataset.database)
129
- ensureAndClearDir(questionReorganizedRootDir)
130
-
131
- for (const question of questions) {
118
+ for await (const question of findAllQuestions()) {
132
119
  if (options.verbose) {
133
- console.log(`Converting ${question.id} file…`)
120
+ console.log(`Converting ${question.reference} file…`)
134
121
  }
135
122
 
136
- insertQuestionReferences(question, allData, {})
137
-
138
123
  const legislature = question.legislature ? question.legislature : 0
139
- const questionReorganizedDir = path.join(questionReorganizedRootDir, String(legislature))
124
+ const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature))
140
125
  fs.ensureDirSync(questionReorganizedDir)
141
- let questionFileName = `${question.reference}.json`
142
- fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, {spaces: 2})
126
+ const questionFileName = `${question.reference}.json`
127
+ fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 })
143
128
  }
144
129
  }
145
130
 
@@ -150,27 +135,103 @@ async function convertData() {
150
135
  console.log(`Converting database ${dataset.database} data into files…`)
151
136
  }
152
137
 
153
- const sens = await getAllSens()
154
- const aggregator = new Aggregator(new Set(allFollows))
155
- for (const sen of sens) {
156
- aggregator.addSen(sen)
138
+ const sensReorganizedRootDir = path.join(dataDir, dataset.database)
139
+ const senateursReorganizedDir = path.join(sensReorganizedRootDir, SENS_SENATEURS_FOLDER)
140
+ const circonscriptionsReorganizedDir = path.join(sensReorganizedRootDir, SENS_CIRCONSCRIPTIONS_FOLDER)
141
+ const organismesReorganizedDir = path.join(sensReorganizedRootDir, SENS_ORGANISMES_FOLDER)
142
+ ensureAndClearDir(sensReorganizedRootDir)
143
+ ensureAndClearDir(senateursReorganizedDir)
144
+ ensureAndClearDir(circonscriptionsReorganizedDir)
145
+ ensureAndClearDir(organismesReorganizedDir)
146
+
147
+ for await (const sen of findAllSens()) {
148
+ if (options.verbose) {
149
+ console.log(`Converting ${sen.matricule} file…`)
150
+ }
151
+
152
+ const senFileName = `${sen.matricule}.json`
153
+ fs.writeJSONSync(path.join(senateursReorganizedDir, senFileName), sen, { spaces: 2 })
157
154
  }
158
- await aggregator.getAll()
159
- const allData = aggregator.toJson()
160
155
 
161
- const senReorganizedRootDir = path.join(dataDir, dataset.database)
162
- ensureAndClearDir(senReorganizedRootDir)
156
+ for await (const circonscription of findAllCirconscriptions()) {
157
+ if (options.verbose) {
158
+ console.log(`Converting ${circonscription.identifiant} file…`)
159
+ }
160
+
161
+ const circonscriptionFileName = `${circonscription.identifiant}.json`
162
+ fs.writeJSONSync(
163
+ path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 }
164
+ )
165
+ }
163
166
 
164
- for (const sen of sens) {
167
+ for await (const organisme of findAllOrganismes()) {
165
168
  if (options.verbose) {
166
- console.log(`Converting ${sen.senmat} file…`)
169
+ console.log(`Converting ${organisme.code} file…`)
167
170
  }
168
171
 
169
- insertSenReferences(sen, allData, {})
172
+ const organismeFileName = `${organisme.code}.json`
173
+ fs.writeJSONSync(path.join(organismesReorganizedDir, organismeFileName), organisme, { spaces: 2 })
174
+ }
175
+ }
176
+
177
+ if (!options.silent) {
178
+ console.timeEnd("data transformation time")
179
+ }
180
+ }
181
+
182
+ async function convertTexteUrls (dataDir: string) {
183
+ const textesDir = path.join(dataDir, TEXTE_FOLDER)
184
+ fs.ensureDirSync(textesDir)
185
+
186
+ const originalTextesDir = path.join(textesDir, TEXTE_ORIGINAL_FOLDER)
187
+
188
+ for await (const texte of findSenatTexteUrls(options.sessions)) {
189
+ const texteName = path.parse(texte.url).name
190
+ const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName)
191
+ fs.ensureDirSync(texteDir)
192
+
193
+ const metadata: TexteMetadata = {
194
+ name: texteName,
195
+ session: texte.session,
196
+ url_expose_des_motifs: texte.hasExposeDesMotifs ?
197
+ new URL(`${texteName}-expose.html`, SENAT_EXPOSE_DES_MOTIFS_BASE_URL) : undefined,
198
+ url_xml: new URL(`${texteName}.akn.xml`, SENAT_TEXTE_XML_BASE_URL),
199
+ url_html: new URL(`${texteName}.html`, SENAT_TEXTE_BASE_URL),
200
+ url_pdf: new URL(`${texteName}.pdf`, SENAT_TEXTE_BASE_URL),
201
+ }
202
+ fs.writeJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, { spaces: 2 })
203
+ }
204
+ }
170
205
 
171
- let senFileName = `${sen.senmat}.json`
172
- fs.writeJSONSync(path.join(senReorganizedRootDir, senFileName), sen, {spaces: 2})
206
+ async function convertRapportUrls (dataDir: string) {
207
+ const rapportsDir = path.join(dataDir, RAPPORT_FOLDER)
208
+ fs.ensureDirSync(rapportsDir)
209
+
210
+ for await (const rapport of findSenatRapportUrls(options.sessions)) {
211
+ const parsedRapportUrl = path.parse(rapport.url)
212
+ const rapportName = parsedRapportUrl.name
213
+ const rapportDir = path.join(rapportsDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName)
214
+ fs.ensureDirSync(rapportDir)
215
+
216
+ const rapportHtmlUrlBase = `${rapportName}_mono.html`
217
+ const rapportHtmlUrl = path.format({
218
+ dir: parsedRapportUrl.dir,
219
+ base: rapportHtmlUrlBase,
220
+ })
221
+
222
+ const rapportPdfUrlBase = `${rapportName}1.pdf`
223
+ const rapportPdfUrl = path.format({
224
+ dir: parsedRapportUrl.dir,
225
+ base: rapportPdfUrlBase,
226
+ })
227
+
228
+ const metadata: RapportMetadata = {
229
+ name: rapportName,
230
+ session: rapport.session,
231
+ url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL),
232
+ url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL),
173
233
  }
234
+ fs.writeJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, { spaces: 2 })
174
235
  }
175
236
  }
176
237
 
@@ -0,0 +1,5 @@
1
+ export declare const SIGNET_STRUCTURE_REGEXP: RegExp;
2
+ export declare const AKN_IDENTIFICATION_STRUCTURE_REGEXP: RegExp;
3
+ export declare const AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP: RegExp;
4
+ export declare const UNDEFINED_SESSION = "0";
5
+ export declare function formatToFourDigitSession(session: string): string;