@tricoteuses/senat 2.20.21 → 2.20.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/lib/loaders.d.ts +2 -1
- package/lib/loaders.js +48 -3
- package/lib/model/dosleg.d.ts +1 -2
- package/lib/model/dosleg.js +183 -114
- package/lib/parsers/texte.d.ts +7 -0
- package/lib/parsers/texte.js +228 -0
- package/lib/scripts/convert_data.js +87 -62
- package/lib/scripts/data-download.js +4 -1
- package/lib/scripts/retrieve_documents.d.ts +2 -1
- package/lib/scripts/retrieve_documents.js +124 -192
- package/lib/scripts/shared/cli_helpers.d.ts +10 -0
- package/lib/scripts/shared/cli_helpers.js +12 -0
- package/lib/scripts/test_iter_load.js +11 -22
- package/package.json +5 -7
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import { JSDOM } from "jsdom";
|
|
2
|
+
import { AKN_IDENTIFICATION_STRUCTURE_REGEXP, AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil";
|
|
3
|
+
import { DivisionType, } from "../types/texte";
|
|
4
|
+
function buildWorklow(metaElement) {
|
|
5
|
+
const stepElements = metaElement.querySelectorAll("workflow step");
|
|
6
|
+
const steps = [];
|
|
7
|
+
for (const stepElement of stepElements) {
|
|
8
|
+
const identification = stepElement.getAttribute("href") ?? "";
|
|
9
|
+
const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
|
|
10
|
+
steps.push({
|
|
11
|
+
eId: stepElement.getAttribute("eId"),
|
|
12
|
+
date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
|
|
13
|
+
type: identificationParts?.["type"] || null,
|
|
14
|
+
session: identificationParts?.["session"] || null,
|
|
15
|
+
numero: identificationParts?.["numTexte"] || null,
|
|
16
|
+
version: identificationParts?.["version"] ? identificationParts["version"] : null,
|
|
17
|
+
outcome: stepElement.getAttribute("outcome"),
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
return steps;
|
|
21
|
+
}
|
|
22
|
+
function buildDivision(node, index) {
|
|
23
|
+
const eId = node.getAttribute("eId");
|
|
24
|
+
const tag = node.nodeName;
|
|
25
|
+
const level = DivisionType[tag];
|
|
26
|
+
const titleNode = node.querySelector("num");
|
|
27
|
+
const subtitleNode = node.querySelector("heading");
|
|
28
|
+
const headings = [
|
|
29
|
+
...(titleNode
|
|
30
|
+
? [
|
|
31
|
+
{
|
|
32
|
+
text: titleNode.textContent?.trim() ?? null,
|
|
33
|
+
html: titleNode.innerHTML?.trim() ?? null,
|
|
34
|
+
},
|
|
35
|
+
]
|
|
36
|
+
: []),
|
|
37
|
+
...(subtitleNode
|
|
38
|
+
? [
|
|
39
|
+
{
|
|
40
|
+
text: subtitleNode.textContent?.trim() ?? null,
|
|
41
|
+
html: subtitleNode.innerHTML?.trim() ?? null,
|
|
42
|
+
},
|
|
43
|
+
]
|
|
44
|
+
: []),
|
|
45
|
+
];
|
|
46
|
+
const division = {
|
|
47
|
+
index,
|
|
48
|
+
eId,
|
|
49
|
+
tag,
|
|
50
|
+
level,
|
|
51
|
+
headings,
|
|
52
|
+
};
|
|
53
|
+
if (tag === "article") {
|
|
54
|
+
;
|
|
55
|
+
division.alineas = [];
|
|
56
|
+
}
|
|
57
|
+
return division;
|
|
58
|
+
}
|
|
59
|
+
function buildAlinea(contentNode, alineaNode) {
|
|
60
|
+
const eId = alineaNode.getAttribute("eId");
|
|
61
|
+
const heading = {
|
|
62
|
+
text: alineaNode.querySelector("num")?.textContent ?? null,
|
|
63
|
+
};
|
|
64
|
+
const pastille = alineaNode.getAttribute("data:pastille") ?? null;
|
|
65
|
+
return {
|
|
66
|
+
eId,
|
|
67
|
+
heading,
|
|
68
|
+
text: contentNode.textContent?.trim() ?? null,
|
|
69
|
+
html: contentNode.innerHTML?.trim() ?? null,
|
|
70
|
+
pastille,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
function buildEmptyArticle(index) {
|
|
74
|
+
return {
|
|
75
|
+
index: index,
|
|
76
|
+
eId: "",
|
|
77
|
+
tag: "article",
|
|
78
|
+
level: DivisionType["article"],
|
|
79
|
+
headings: [],
|
|
80
|
+
alineas: [],
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
function flattenTexte(texteContentRoot) {
|
|
84
|
+
const divisions = [];
|
|
85
|
+
let divisionIndex = 0;
|
|
86
|
+
const iter = (node) => {
|
|
87
|
+
if (node.nodeName === "content") {
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
switch (node.nodeName) {
|
|
91
|
+
case "tome":
|
|
92
|
+
case "part":
|
|
93
|
+
case "book":
|
|
94
|
+
case "title":
|
|
95
|
+
case "subtitle":
|
|
96
|
+
case "chapter":
|
|
97
|
+
case "section":
|
|
98
|
+
case "subsection":
|
|
99
|
+
case "paragraph":
|
|
100
|
+
case "article":
|
|
101
|
+
divisions.push(buildDivision(node, divisionIndex++));
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
104
|
+
if (node.nodeName === "alinea") {
|
|
105
|
+
Array.from(node.childNodes)
|
|
106
|
+
// Find direct content children programmatically
|
|
107
|
+
// because `:scope` selector does not work
|
|
108
|
+
// https://github.com/jsdom/jsdom/issues/2998
|
|
109
|
+
.filter((alineaChildNode) => alineaChildNode.nodeName === "content")
|
|
110
|
+
.forEach((alineaContentNode) => {
|
|
111
|
+
// Hypothesis: alineas should always be enclosed in articles
|
|
112
|
+
let lastArticle = divisions.findLast((division) => division.tag === "article");
|
|
113
|
+
if (!lastArticle) {
|
|
114
|
+
lastArticle = buildEmptyArticle(divisionIndex++);
|
|
115
|
+
divisions.push(lastArticle);
|
|
116
|
+
}
|
|
117
|
+
lastArticle.alineas.push(buildAlinea(alineaContentNode, node));
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
if (node.hasChildNodes()) {
|
|
121
|
+
node.childNodes.forEach((childNode) => iter(childNode));
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
iter(texteContentRoot);
|
|
125
|
+
return divisions;
|
|
126
|
+
}
|
|
127
|
+
export function transformTexte(document) {
|
|
128
|
+
const metaElement = document.querySelector("meta");
|
|
129
|
+
const preambleElement = document.querySelector("preamble");
|
|
130
|
+
const identification = metaElement?.querySelector("FRBRExpression FRBRuri")?.getAttribute("value") ?? "";
|
|
131
|
+
const identificationParts = AKN_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
|
|
132
|
+
const bodyElement = document.querySelector("body");
|
|
133
|
+
const sessionYears = identificationParts?.["session"]?.split("-") || null;
|
|
134
|
+
const datePresentation = metaElement?.querySelector("FRBRdate[name='#presentation']")?.getAttribute("date");
|
|
135
|
+
const dateDepot = metaElement?.querySelector("FRBRdate[name='#depot']")?.getAttribute("date");
|
|
136
|
+
const datePublicationXml = metaElement?.querySelector("FRBRdate[name='#publication-xml']")?.getAttribute("date");
|
|
137
|
+
return {
|
|
138
|
+
titre: preambleElement?.querySelector("docTitle")?.textContent || null,
|
|
139
|
+
titreCourt: metaElement?.querySelector("FRBRalias[name='intitule-court']")?.getAttribute("value") || null,
|
|
140
|
+
signetDossier: metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")?.getAttribute("value") || null,
|
|
141
|
+
urlDossierSenat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
|
|
142
|
+
urlDossierAssemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
|
|
143
|
+
type: identificationParts?.["type"] || null,
|
|
144
|
+
session: sessionYears && sessionYears.length > 0 ? sessionYears[0] : null,
|
|
145
|
+
numero: identificationParts?.["numTexte"] ? parseInt(identificationParts["numTexte"]) : null,
|
|
146
|
+
datePresentation: datePresentation ? new Date(datePresentation) : null,
|
|
147
|
+
dateDepot: dateDepot ? new Date(dateDepot) : null,
|
|
148
|
+
datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
|
|
149
|
+
version: identificationParts?.["version"] ? identificationParts["version"] : null,
|
|
150
|
+
workflow: metaElement ? buildWorklow(metaElement) : [],
|
|
151
|
+
divisions: bodyElement ? flattenTexte(bodyElement) : [],
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
export function transformExposeDesMotifs(document) {
|
|
155
|
+
const sectionElements = document.querySelectorAll("section");
|
|
156
|
+
const exposeDesMotifsRegexp = new RegExp("EXPOS.{1,2}[\\n\\s]DES[\\n\\s]MOTIFS");
|
|
157
|
+
for (const sectionElement of sectionElements) {
|
|
158
|
+
const firstParagraph = sectionElement.querySelector("p:first-of-type");
|
|
159
|
+
const secondParagraph = sectionElement.querySelector("p:nth-of-type(2)");
|
|
160
|
+
if (!firstParagraph) {
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
const firstParagraphContent = firstParagraph.textContent;
|
|
164
|
+
const secondParagraphContent = secondParagraph?.textContent;
|
|
165
|
+
if (!firstParagraphContent || !exposeDesMotifsRegexp.test(firstParagraphContent.toUpperCase())) {
|
|
166
|
+
if (!secondParagraphContent || !exposeDesMotifsRegexp.test(secondParagraphContent.toUpperCase())) {
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
else {
|
|
170
|
+
secondParagraph.remove();
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
firstParagraph.remove();
|
|
174
|
+
return {
|
|
175
|
+
text: sectionElement.textContent?.trim() ?? null,
|
|
176
|
+
html: sectionElement.innerHTML?.trim() ?? null,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
return null;
|
|
180
|
+
}
|
|
181
|
+
export function parseTexte(texteXml) {
|
|
182
|
+
try {
|
|
183
|
+
const { document } = new JSDOM(texteXml, {
|
|
184
|
+
contentType: "text/xml",
|
|
185
|
+
}).window;
|
|
186
|
+
return transformTexte(document);
|
|
187
|
+
}
|
|
188
|
+
catch (error) {
|
|
189
|
+
console.error(`Could not parse texte with error ${error}`);
|
|
190
|
+
}
|
|
191
|
+
return null;
|
|
192
|
+
}
|
|
193
|
+
// Prevent from memory leak
|
|
194
|
+
// https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
|
|
195
|
+
export async function parseTexteFromFile(xmlFilePath) {
|
|
196
|
+
try {
|
|
197
|
+
const { document } = (await JSDOM.fromFile(xmlFilePath, { contentType: "text/xml" })).window;
|
|
198
|
+
return transformTexte(document);
|
|
199
|
+
}
|
|
200
|
+
catch (error) {
|
|
201
|
+
console.error(`Could not parse texte with error ${error}`);
|
|
202
|
+
}
|
|
203
|
+
return null;
|
|
204
|
+
}
|
|
205
|
+
export function parseExposeDesMotifs(exposeDesMotifsHtml) {
|
|
206
|
+
try {
|
|
207
|
+
const { document } = new JSDOM(exposeDesMotifsHtml, {
|
|
208
|
+
contentType: "text/html",
|
|
209
|
+
}).window;
|
|
210
|
+
return transformExposeDesMotifs(document);
|
|
211
|
+
}
|
|
212
|
+
catch (error) {
|
|
213
|
+
console.error(`Could not parse exposé des motifs with error ${error}`);
|
|
214
|
+
}
|
|
215
|
+
return null;
|
|
216
|
+
}
|
|
217
|
+
// Prevent from memory leak
|
|
218
|
+
// https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
|
|
219
|
+
export async function parseExposeDesMotifsFromFile(htmlFilePath) {
|
|
220
|
+
try {
|
|
221
|
+
const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window;
|
|
222
|
+
return transformExposeDesMotifs(document);
|
|
223
|
+
}
|
|
224
|
+
catch (error) {
|
|
225
|
+
console.error(`Could not parse exposé des motifs with error ${error}`);
|
|
226
|
+
}
|
|
227
|
+
return null;
|
|
228
|
+
}
|
|
@@ -5,9 +5,10 @@ import path from "path";
|
|
|
5
5
|
import pLimit from "p-limit";
|
|
6
6
|
import * as git from "../git";
|
|
7
7
|
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
|
|
8
|
-
import { DATA_ORIGINAL_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
|
|
8
|
+
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
|
|
9
9
|
import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findSenatRapportUrls, findSenatTexteUrls, } from "../model";
|
|
10
|
-
import {
|
|
10
|
+
import { processRapport, processTexte } from "./retrieve_documents";
|
|
11
|
+
import { buildActesLegislatifs } from "../model/dosleg";
|
|
11
12
|
import { UNDEFINED_SESSION } from "../types/sessions";
|
|
12
13
|
import { getSessionFromDate, getSessionFromSignet } from "./datautil";
|
|
13
14
|
import { commonOptions } from "./shared/cli_helpers";
|
|
@@ -73,7 +74,7 @@ async function convertData() {
|
|
|
73
74
|
}
|
|
74
75
|
if (enabledDatasets & EnabledDatasets.Questions) {
|
|
75
76
|
try {
|
|
76
|
-
await convertDatasetQuestions(dataDir);
|
|
77
|
+
await convertDatasetQuestions(dataDir, options);
|
|
77
78
|
const questionsDir = path.join(dataDir, datasets.questions.database);
|
|
78
79
|
exitCode = commitGit(questionsDir, options, exitCode);
|
|
79
80
|
}
|
|
@@ -83,7 +84,7 @@ async function convertData() {
|
|
|
83
84
|
}
|
|
84
85
|
if (enabledDatasets & EnabledDatasets.Sens) {
|
|
85
86
|
try {
|
|
86
|
-
await convertDatasetSens(dataDir);
|
|
87
|
+
await convertDatasetSens(dataDir, options);
|
|
87
88
|
const sensDir = path.join(dataDir, datasets.sens.database);
|
|
88
89
|
exitCode = commitGit(sensDir, options, exitCode);
|
|
89
90
|
}
|
|
@@ -102,7 +103,9 @@ async function convertDatasetAmeli(dataDir, options) {
|
|
|
102
103
|
console.log(`Converting database ${dataset.database} data into files…`);
|
|
103
104
|
}
|
|
104
105
|
const ameliReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
105
|
-
|
|
106
|
+
if (!options.keepDir) {
|
|
107
|
+
ensureAndClearDir(ameliReorganizedRootDir);
|
|
108
|
+
}
|
|
106
109
|
for await (const amendement of findAllAmendements(options["fromSession"])) {
|
|
107
110
|
if (options["verbose"]) {
|
|
108
111
|
console.log(`Converting ${amendement["numero"]} file…`);
|
|
@@ -110,11 +113,9 @@ async function convertDatasetAmeli(dataDir, options) {
|
|
|
110
113
|
const session = String(amendement["session"]) || UNDEFINED_SESSION;
|
|
111
114
|
const signetDossierLegislatif = amendement["signet_dossier_legislatif"] ||
|
|
112
115
|
`${amendement["nature_texte"]}-${amendement["numero_texte"]}`.toLowerCase();
|
|
113
|
-
const ameliReorganizedDir = path.join(ameliReorganizedRootDir, String(session), signetDossierLegislatif);
|
|
114
|
-
await fs.ensureDir(ameliReorganizedDir);
|
|
115
116
|
const amendementFileName = `${amendement["numero"]}.json`;
|
|
116
|
-
const filePath = path.join(
|
|
117
|
-
await fs.
|
|
117
|
+
const filePath = path.join(ameliReorganizedRootDir, String(session), signetDossierLegislatif, amendementFileName);
|
|
118
|
+
await fs.outputJSON(filePath, amendement, { spaces: 2 });
|
|
118
119
|
}
|
|
119
120
|
}
|
|
120
121
|
async function convertDatasetDebats(dataDir, options) {
|
|
@@ -123,7 +124,9 @@ async function convertDatasetDebats(dataDir, options) {
|
|
|
123
124
|
console.log(`Converting database ${dataset.database} data into files…`);
|
|
124
125
|
}
|
|
125
126
|
const debatsReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
126
|
-
|
|
127
|
+
if (!options.keepDir) {
|
|
128
|
+
ensureAndClearDir(debatsReorganizedRootDir);
|
|
129
|
+
}
|
|
127
130
|
for await (const debat of findAllDebats()) {
|
|
128
131
|
if (options["verbose"]) {
|
|
129
132
|
console.log(`Converting ${debat.id} file…`);
|
|
@@ -132,11 +135,9 @@ async function convertDatasetDebats(dataDir, options) {
|
|
|
132
135
|
if (options["fromSession"] && session < options["fromSession"]) {
|
|
133
136
|
continue;
|
|
134
137
|
}
|
|
135
|
-
const debatsReorganizedDir = path.join(debatsReorganizedRootDir, String(session));
|
|
136
|
-
await fs.ensureDir(debatsReorganizedDir);
|
|
137
138
|
const debatFileName = `${debat.id}.json`;
|
|
138
|
-
const filePath = path.join(
|
|
139
|
-
await fs.
|
|
139
|
+
const filePath = path.join(debatsReorganizedRootDir, String(session), debatFileName);
|
|
140
|
+
await fs.outputJSON(filePath, debat, { spaces: 2 });
|
|
140
141
|
}
|
|
141
142
|
}
|
|
142
143
|
async function convertDatasetDosLeg(dataDir, options) {
|
|
@@ -146,28 +147,30 @@ async function convertDatasetDosLeg(dataDir, options) {
|
|
|
146
147
|
}
|
|
147
148
|
const doslegReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
148
149
|
const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER);
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
150
|
+
if (!options.keepDir) {
|
|
151
|
+
ensureAndClearDir(doslegReorganizedRootDir);
|
|
152
|
+
ensureAndClearDir(dossiersReorganizedDir);
|
|
153
|
+
}
|
|
154
|
+
for await (const dossier of findAllDossiers()) {
|
|
152
155
|
if (options["verbose"]) {
|
|
153
|
-
console.log(`Converting ${
|
|
156
|
+
console.log(`Converting ${dossier["signet"]} file…`);
|
|
154
157
|
}
|
|
155
|
-
let
|
|
156
|
-
const session = getSessionFromSignet(
|
|
158
|
+
let dossierReorganizedDir = path.join(dossiersReorganizedDir, String(UNDEFINED_SESSION));
|
|
159
|
+
const session = getSessionFromSignet(dossier["signet"]) || UNDEFINED_SESSION;
|
|
157
160
|
if (options["fromSession"] && session < options["fromSession"]) {
|
|
158
161
|
continue;
|
|
159
162
|
}
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
const
|
|
167
|
-
await fs.
|
|
163
|
+
dossierReorganizedDir = path.join(dossiersReorganizedDir, String(session));
|
|
164
|
+
const actesBrutsNormalises = buildActesLegislatifs(dossier);
|
|
165
|
+
const dossierWithActes = {
|
|
166
|
+
...dossier,
|
|
167
|
+
actes_legislatifs: actesBrutsNormalises
|
|
168
|
+
};
|
|
169
|
+
const dossierFile = `${dossier["signet"]}.json`;
|
|
170
|
+
await fs.outputJSON(path.join(dossierReorganizedDir, dossierFile), dossierWithActes, { spaces: 2 });
|
|
168
171
|
}
|
|
169
|
-
await convertTexteUrls(dataDir);
|
|
170
|
-
await convertRapportUrls(dataDir);
|
|
172
|
+
await convertTexteUrls(dataDir, options);
|
|
173
|
+
await convertRapportUrls(dataDir, options);
|
|
171
174
|
}
|
|
172
175
|
async function convertDatasetScrutins(dataDir, options) {
|
|
173
176
|
const dataset = datasets.dosleg;
|
|
@@ -175,7 +178,9 @@ async function convertDatasetScrutins(dataDir, options) {
|
|
|
175
178
|
console.log(`Converting database scrutins (${dataset.database}) data into files…`);
|
|
176
179
|
}
|
|
177
180
|
const scrutinsReorganizedDir = path.join(dataDir, SCRUTINS_FOLDER);
|
|
178
|
-
|
|
181
|
+
if (!options.keepDir) {
|
|
182
|
+
ensureAndClearDir(scrutinsReorganizedDir);
|
|
183
|
+
}
|
|
179
184
|
for await (const scrutin of findAllScrutins(options["fromSession"])) {
|
|
180
185
|
if (options["verbose"]) {
|
|
181
186
|
console.log(`Converting ${scrutin["numero"]} file…`);
|
|
@@ -183,20 +188,21 @@ async function convertDatasetScrutins(dataDir, options) {
|
|
|
183
188
|
let scrutinReorganizedDir = path.join(scrutinsReorganizedDir, String(UNDEFINED_SESSION));
|
|
184
189
|
const session = scrutin["session"] || UNDEFINED_SESSION;
|
|
185
190
|
scrutinReorganizedDir = path.join(scrutinsReorganizedDir, String(session));
|
|
186
|
-
await fs.ensureDir(scrutinReorganizedDir);
|
|
187
191
|
const scrutinFileName = `${scrutin["numero"]}.json`;
|
|
188
|
-
await fs.
|
|
192
|
+
await fs.outputJSON(path.join(scrutinReorganizedDir, scrutinFileName), scrutin, {
|
|
189
193
|
spaces: 2,
|
|
190
194
|
});
|
|
191
195
|
}
|
|
192
196
|
}
|
|
193
|
-
async function convertDatasetQuestions(dataDir) {
|
|
197
|
+
async function convertDatasetQuestions(dataDir, options) {
|
|
194
198
|
const dataset = datasets.questions;
|
|
195
199
|
if (!options["silent"]) {
|
|
196
200
|
console.log(`Converting database ${dataset.database} data into files…`);
|
|
197
201
|
}
|
|
198
202
|
const questionsReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
199
|
-
|
|
203
|
+
if (!options.keepDir) {
|
|
204
|
+
ensureAndClearDir(questionsReorganizedRootDir);
|
|
205
|
+
}
|
|
200
206
|
const limit = pLimit(10);
|
|
201
207
|
const tasks = [];
|
|
202
208
|
for await (const question of findAllQuestions()) {
|
|
@@ -205,22 +211,27 @@ async function convertDatasetQuestions(dataDir) {
|
|
|
205
211
|
console.log(`Converting ${question["reference"]} file…`);
|
|
206
212
|
}
|
|
207
213
|
const legislature = question["legislature"] ? question["legislature"] : 0;
|
|
208
|
-
const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature));
|
|
209
|
-
await fs.ensureDir(questionReorganizedDir);
|
|
210
214
|
const questionFileName = `${question["reference"]}.json`;
|
|
211
|
-
await fs.
|
|
215
|
+
await fs.outputJSON(path.join(questionsReorganizedRootDir, String(legislature), questionFileName), question, {
|
|
216
|
+
spaces: 2,
|
|
217
|
+
});
|
|
212
218
|
}));
|
|
213
219
|
}
|
|
214
220
|
await Promise.all(tasks);
|
|
215
221
|
}
|
|
216
|
-
async function convertTexteUrls(dataDir) {
|
|
217
|
-
const
|
|
218
|
-
|
|
219
|
-
|
|
222
|
+
async function convertTexteUrls(dataDir, options) {
|
|
223
|
+
const originalTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
224
|
+
const transformedTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER);
|
|
225
|
+
if (!options["silent"]) {
|
|
226
|
+
console.log(`Converting database textes data into files…`);
|
|
227
|
+
}
|
|
220
228
|
for await (const texte of findSenatTexteUrls()) {
|
|
229
|
+
const session = texte.session ?? UNDEFINED_SESSION;
|
|
230
|
+
if (options["fromSession"] && session < options["fromSession"]) {
|
|
231
|
+
continue;
|
|
232
|
+
}
|
|
221
233
|
const texteName = path.parse(texte.url).name;
|
|
222
|
-
const texteDir = path.join(originalTextesDir, `${
|
|
223
|
-
fs.ensureDirSync(texteDir);
|
|
234
|
+
const texteDir = path.join(originalTextesDir, `${session}`, texteName);
|
|
224
235
|
const metadata = {
|
|
225
236
|
name: texteName,
|
|
226
237
|
session: texte.session,
|
|
@@ -232,20 +243,27 @@ async function convertTexteUrls(dataDir) {
|
|
|
232
243
|
url_html: new URL(`${texteName}.html`, SENAT_TEXTE_BASE_URL),
|
|
233
244
|
url_pdf: new URL(`${texteName}.pdf`, SENAT_TEXTE_BASE_URL),
|
|
234
245
|
};
|
|
235
|
-
fs.
|
|
246
|
+
fs.outputJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, {
|
|
236
247
|
spaces: 2,
|
|
237
248
|
});
|
|
249
|
+
if (options.fetchDocuments) {
|
|
250
|
+
await processTexte(metadata, originalTextesDir, transformedTextesDir, options);
|
|
251
|
+
}
|
|
238
252
|
}
|
|
239
253
|
}
|
|
240
|
-
async function convertRapportUrls(dataDir) {
|
|
241
|
-
const
|
|
242
|
-
|
|
243
|
-
|
|
254
|
+
async function convertRapportUrls(dataDir, options) {
|
|
255
|
+
const originalRapportsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
256
|
+
if (!options["silent"]) {
|
|
257
|
+
console.log(`Converting database rapports data into files…`);
|
|
258
|
+
}
|
|
244
259
|
for await (const rapport of findSenatRapportUrls()) {
|
|
260
|
+
const session = rapport.session ?? UNDEFINED_SESSION;
|
|
261
|
+
if (options["fromSession"] && session < options["fromSession"]) {
|
|
262
|
+
continue;
|
|
263
|
+
}
|
|
245
264
|
const parsedRapportUrl = path.parse(rapport.url);
|
|
246
265
|
const rapportName = parsedRapportUrl.name;
|
|
247
|
-
const rapportDir = path.join(
|
|
248
|
-
fs.ensureDirSync(rapportDir);
|
|
266
|
+
const rapportDir = path.join(originalRapportsDir, `${session}`, rapportName);
|
|
249
267
|
const rapportHtmlUrlBase = `${rapportName}_mono.html`;
|
|
250
268
|
const rapportHtmlUrl = path.format({
|
|
251
269
|
dir: parsedRapportUrl.dir,
|
|
@@ -263,12 +281,15 @@ async function convertRapportUrls(dataDir) {
|
|
|
263
281
|
url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL),
|
|
264
282
|
url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL),
|
|
265
283
|
};
|
|
266
|
-
fs.
|
|
284
|
+
fs.outputJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, {
|
|
267
285
|
spaces: 2,
|
|
268
286
|
});
|
|
287
|
+
if (options.fetchDocuments) {
|
|
288
|
+
await processRapport(metadata, originalRapportsDir, options);
|
|
289
|
+
}
|
|
269
290
|
}
|
|
270
291
|
}
|
|
271
|
-
async function convertDatasetSens(dataDir) {
|
|
292
|
+
async function convertDatasetSens(dataDir, options) {
|
|
272
293
|
const dataset = datasets.sens;
|
|
273
294
|
if (!options["silent"]) {
|
|
274
295
|
console.log(`Converting database ${dataset.database} data into files…`);
|
|
@@ -277,16 +298,18 @@ async function convertDatasetSens(dataDir) {
|
|
|
277
298
|
const senateursReorganizedDir = path.join(sensReorganizedRootDir, SENS_SENATEURS_FOLDER);
|
|
278
299
|
const circonscriptionsReorganizedDir = path.join(sensReorganizedRootDir, SENS_CIRCONSCRIPTIONS_FOLDER);
|
|
279
300
|
const organismesReorganizedDir = path.join(sensReorganizedRootDir, SENS_ORGANISMES_FOLDER);
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
301
|
+
if (!options.keepDir) {
|
|
302
|
+
ensureAndClearDir(sensReorganizedRootDir);
|
|
303
|
+
ensureAndClearDir(senateursReorganizedDir);
|
|
304
|
+
ensureAndClearDir(circonscriptionsReorganizedDir);
|
|
305
|
+
ensureAndClearDir(organismesReorganizedDir);
|
|
306
|
+
}
|
|
284
307
|
for await (const sen of findAllSens()) {
|
|
285
308
|
if (options["verbose"]) {
|
|
286
309
|
console.log(`Converting ${sen["matricule"]} file…`);
|
|
287
310
|
}
|
|
288
311
|
const senFileName = `${sen["matricule"]}.json`;
|
|
289
|
-
fs.
|
|
312
|
+
fs.outputJSONSync(path.join(senateursReorganizedDir, senFileName), sen, {
|
|
290
313
|
spaces: 2,
|
|
291
314
|
});
|
|
292
315
|
}
|
|
@@ -295,16 +318,18 @@ async function convertDatasetSens(dataDir) {
|
|
|
295
318
|
console.log(`Converting ${circonscription["identifiant"]} file…`);
|
|
296
319
|
}
|
|
297
320
|
const circonscriptionFileName = `${circonscription["identifiant"]}.json`;
|
|
298
|
-
fs.
|
|
321
|
+
fs.outputJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, {
|
|
322
|
+
spaces: 2,
|
|
323
|
+
});
|
|
299
324
|
}
|
|
300
325
|
for await (const organisme of findAllOrganismes()) {
|
|
301
326
|
if (options["verbose"]) {
|
|
302
327
|
console.log(`Converting ${organisme["code"]} file…`);
|
|
303
328
|
}
|
|
304
329
|
const organismeFileName = `${organisme["code"]}.json`;
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
330
|
+
fs.outputJSONSync(path.join(organismesReorganizedDir, organisme["type_code"], organismeFileName), organisme, {
|
|
331
|
+
spaces: 2,
|
|
332
|
+
});
|
|
308
333
|
}
|
|
309
334
|
}
|
|
310
335
|
convertData()
|
|
@@ -1 +1,2 @@
|
|
|
1
|
-
export
|
|
1
|
+
export declare function processTexte(texteMetadata: any, originalTextesDir: string, transformedTextesDir: string, options: any): Promise<void>;
|
|
2
|
+
export declare function processRapport(rapportMetadata: any, originalRapportsDir: string, options: any): Promise<void>;
|