@tricoteuses/senat 2.20.21 → 2.20.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,228 @@
1
+ import { JSDOM } from "jsdom";
2
+ import { AKN_IDENTIFICATION_STRUCTURE_REGEXP, AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil";
3
+ import { DivisionType, } from "../types/texte";
4
+ function buildWorklow(metaElement) {
5
+ const stepElements = metaElement.querySelectorAll("workflow step");
6
+ const steps = [];
7
+ for (const stepElement of stepElements) {
8
+ const identification = stepElement.getAttribute("href") ?? "";
9
+ const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
10
+ steps.push({
11
+ eId: stepElement.getAttribute("eId"),
12
+ date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
13
+ type: identificationParts?.["type"] || null,
14
+ session: identificationParts?.["session"] || null,
15
+ numero: identificationParts?.["numTexte"] || null,
16
+ version: identificationParts?.["version"] ? identificationParts["version"] : null,
17
+ outcome: stepElement.getAttribute("outcome"),
18
+ });
19
+ }
20
+ return steps;
21
+ }
22
+ function buildDivision(node, index) {
23
+ const eId = node.getAttribute("eId");
24
+ const tag = node.nodeName;
25
+ const level = DivisionType[tag];
26
+ const titleNode = node.querySelector("num");
27
+ const subtitleNode = node.querySelector("heading");
28
+ const headings = [
29
+ ...(titleNode
30
+ ? [
31
+ {
32
+ text: titleNode.textContent?.trim() ?? null,
33
+ html: titleNode.innerHTML?.trim() ?? null,
34
+ },
35
+ ]
36
+ : []),
37
+ ...(subtitleNode
38
+ ? [
39
+ {
40
+ text: subtitleNode.textContent?.trim() ?? null,
41
+ html: subtitleNode.innerHTML?.trim() ?? null,
42
+ },
43
+ ]
44
+ : []),
45
+ ];
46
+ const division = {
47
+ index,
48
+ eId,
49
+ tag,
50
+ level,
51
+ headings,
52
+ };
53
+ if (tag === "article") {
54
+ ;
55
+ division.alineas = [];
56
+ }
57
+ return division;
58
+ }
59
+ function buildAlinea(contentNode, alineaNode) {
60
+ const eId = alineaNode.getAttribute("eId");
61
+ const heading = {
62
+ text: alineaNode.querySelector("num")?.textContent ?? null,
63
+ };
64
+ const pastille = alineaNode.getAttribute("data:pastille") ?? null;
65
+ return {
66
+ eId,
67
+ heading,
68
+ text: contentNode.textContent?.trim() ?? null,
69
+ html: contentNode.innerHTML?.trim() ?? null,
70
+ pastille,
71
+ };
72
+ }
73
+ function buildEmptyArticle(index) {
74
+ return {
75
+ index: index,
76
+ eId: "",
77
+ tag: "article",
78
+ level: DivisionType["article"],
79
+ headings: [],
80
+ alineas: [],
81
+ };
82
+ }
83
+ function flattenTexte(texteContentRoot) {
84
+ const divisions = [];
85
+ let divisionIndex = 0;
86
+ const iter = (node) => {
87
+ if (node.nodeName === "content") {
88
+ return;
89
+ }
90
+ switch (node.nodeName) {
91
+ case "tome":
92
+ case "part":
93
+ case "book":
94
+ case "title":
95
+ case "subtitle":
96
+ case "chapter":
97
+ case "section":
98
+ case "subsection":
99
+ case "paragraph":
100
+ case "article":
101
+ divisions.push(buildDivision(node, divisionIndex++));
102
+ break;
103
+ }
104
+ if (node.nodeName === "alinea") {
105
+ Array.from(node.childNodes)
106
+ // Find direct content children programmatically
107
+ // because `:scope` selector does not work
108
+ // https://github.com/jsdom/jsdom/issues/2998
109
+ .filter((alineaChildNode) => alineaChildNode.nodeName === "content")
110
+ .forEach((alineaContentNode) => {
111
+ // Hypothesis: alineas should always be enclosed in articles
112
+ let lastArticle = divisions.findLast((division) => division.tag === "article");
113
+ if (!lastArticle) {
114
+ lastArticle = buildEmptyArticle(divisionIndex++);
115
+ divisions.push(lastArticle);
116
+ }
117
+ lastArticle.alineas.push(buildAlinea(alineaContentNode, node));
118
+ });
119
+ }
120
+ if (node.hasChildNodes()) {
121
+ node.childNodes.forEach((childNode) => iter(childNode));
122
+ }
123
+ };
124
+ iter(texteContentRoot);
125
+ return divisions;
126
+ }
127
+ export function transformTexte(document) {
128
+ const metaElement = document.querySelector("meta");
129
+ const preambleElement = document.querySelector("preamble");
130
+ const identification = metaElement?.querySelector("FRBRExpression FRBRuri")?.getAttribute("value") ?? "";
131
+ const identificationParts = AKN_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
132
+ const bodyElement = document.querySelector("body");
133
+ const sessionYears = identificationParts?.["session"]?.split("-") || null;
134
+ const datePresentation = metaElement?.querySelector("FRBRdate[name='#presentation']")?.getAttribute("date");
135
+ const dateDepot = metaElement?.querySelector("FRBRdate[name='#depot']")?.getAttribute("date");
136
+ const datePublicationXml = metaElement?.querySelector("FRBRdate[name='#publication-xml']")?.getAttribute("date");
137
+ return {
138
+ titre: preambleElement?.querySelector("docTitle")?.textContent || null,
139
+ titreCourt: metaElement?.querySelector("FRBRalias[name='intitule-court']")?.getAttribute("value") || null,
140
+ signetDossier: metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")?.getAttribute("value") || null,
141
+ urlDossierSenat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
142
+ urlDossierAssemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
143
+ type: identificationParts?.["type"] || null,
144
+ session: sessionYears && sessionYears.length > 0 ? sessionYears[0] : null,
145
+ numero: identificationParts?.["numTexte"] ? parseInt(identificationParts["numTexte"]) : null,
146
+ datePresentation: datePresentation ? new Date(datePresentation) : null,
147
+ dateDepot: dateDepot ? new Date(dateDepot) : null,
148
+ datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
149
+ version: identificationParts?.["version"] ? identificationParts["version"] : null,
150
+ workflow: metaElement ? buildWorklow(metaElement) : [],
151
+ divisions: bodyElement ? flattenTexte(bodyElement) : [],
152
+ };
153
+ }
154
+ export function transformExposeDesMotifs(document) {
155
+ const sectionElements = document.querySelectorAll("section");
156
+ const exposeDesMotifsRegexp = new RegExp("EXPOS.{1,2}[\\n\\s]DES[\\n\\s]MOTIFS");
157
+ for (const sectionElement of sectionElements) {
158
+ const firstParagraph = sectionElement.querySelector("p:first-of-type");
159
+ const secondParagraph = sectionElement.querySelector("p:nth-of-type(2)");
160
+ if (!firstParagraph) {
161
+ continue;
162
+ }
163
+ const firstParagraphContent = firstParagraph.textContent;
164
+ const secondParagraphContent = secondParagraph?.textContent;
165
+ if (!firstParagraphContent || !exposeDesMotifsRegexp.test(firstParagraphContent.toUpperCase())) {
166
+ if (!secondParagraphContent || !exposeDesMotifsRegexp.test(secondParagraphContent.toUpperCase())) {
167
+ continue;
168
+ }
169
+ else {
170
+ secondParagraph.remove();
171
+ }
172
+ }
173
+ firstParagraph.remove();
174
+ return {
175
+ text: sectionElement.textContent?.trim() ?? null,
176
+ html: sectionElement.innerHTML?.trim() ?? null,
177
+ };
178
+ }
179
+ return null;
180
+ }
181
+ export function parseTexte(texteXml) {
182
+ try {
183
+ const { document } = new JSDOM(texteXml, {
184
+ contentType: "text/xml",
185
+ }).window;
186
+ return transformTexte(document);
187
+ }
188
+ catch (error) {
189
+ console.error(`Could not parse texte with error ${error}`);
190
+ }
191
+ return null;
192
+ }
193
+ // Prevent from memory leak
194
+ // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
195
+ export async function parseTexteFromFile(xmlFilePath) {
196
+ try {
197
+ const { document } = (await JSDOM.fromFile(xmlFilePath, { contentType: "text/xml" })).window;
198
+ return transformTexte(document);
199
+ }
200
+ catch (error) {
201
+ console.error(`Could not parse texte with error ${error}`);
202
+ }
203
+ return null;
204
+ }
205
+ export function parseExposeDesMotifs(exposeDesMotifsHtml) {
206
+ try {
207
+ const { document } = new JSDOM(exposeDesMotifsHtml, {
208
+ contentType: "text/html",
209
+ }).window;
210
+ return transformExposeDesMotifs(document);
211
+ }
212
+ catch (error) {
213
+ console.error(`Could not parse exposé des motifs with error ${error}`);
214
+ }
215
+ return null;
216
+ }
217
+ // Prevent from memory leak
218
+ // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
219
+ export async function parseExposeDesMotifsFromFile(htmlFilePath) {
220
+ try {
221
+ const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window;
222
+ return transformExposeDesMotifs(document);
223
+ }
224
+ catch (error) {
225
+ console.error(`Could not parse exposé des motifs with error ${error}`);
226
+ }
227
+ return null;
228
+ }
@@ -5,9 +5,10 @@ import path from "path";
5
5
  import pLimit from "p-limit";
6
6
  import * as git from "../git";
7
7
  import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
8
- import { DATA_ORIGINAL_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
8
+ import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
9
9
  import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findSenatRapportUrls, findSenatTexteUrls, } from "../model";
10
- import { createActesLegislatifs } from "../model/dosleg";
10
+ import { processRapport, processTexte } from "./retrieve_documents";
11
+ import { buildActesLegislatifs } from "../model/dosleg";
11
12
  import { UNDEFINED_SESSION } from "../types/sessions";
12
13
  import { getSessionFromDate, getSessionFromSignet } from "./datautil";
13
14
  import { commonOptions } from "./shared/cli_helpers";
@@ -73,7 +74,7 @@ async function convertData() {
73
74
  }
74
75
  if (enabledDatasets & EnabledDatasets.Questions) {
75
76
  try {
76
- await convertDatasetQuestions(dataDir);
77
+ await convertDatasetQuestions(dataDir, options);
77
78
  const questionsDir = path.join(dataDir, datasets.questions.database);
78
79
  exitCode = commitGit(questionsDir, options, exitCode);
79
80
  }
@@ -83,7 +84,7 @@ async function convertData() {
83
84
  }
84
85
  if (enabledDatasets & EnabledDatasets.Sens) {
85
86
  try {
86
- await convertDatasetSens(dataDir);
87
+ await convertDatasetSens(dataDir, options);
87
88
  const sensDir = path.join(dataDir, datasets.sens.database);
88
89
  exitCode = commitGit(sensDir, options, exitCode);
89
90
  }
@@ -102,7 +103,9 @@ async function convertDatasetAmeli(dataDir, options) {
102
103
  console.log(`Converting database ${dataset.database} data into files…`);
103
104
  }
104
105
  const ameliReorganizedRootDir = path.join(dataDir, dataset.database);
105
- await fs.ensureDir(ameliReorganizedRootDir);
106
+ if (!options.keepDir) {
107
+ ensureAndClearDir(ameliReorganizedRootDir);
108
+ }
106
109
  for await (const amendement of findAllAmendements(options["fromSession"])) {
107
110
  if (options["verbose"]) {
108
111
  console.log(`Converting ${amendement["numero"]} file…`);
@@ -110,11 +113,9 @@ async function convertDatasetAmeli(dataDir, options) {
110
113
  const session = String(amendement["session"]) || UNDEFINED_SESSION;
111
114
  const signetDossierLegislatif = amendement["signet_dossier_legislatif"] ||
112
115
  `${amendement["nature_texte"]}-${amendement["numero_texte"]}`.toLowerCase();
113
- const ameliReorganizedDir = path.join(ameliReorganizedRootDir, String(session), signetDossierLegislatif);
114
- await fs.ensureDir(ameliReorganizedDir);
115
116
  const amendementFileName = `${amendement["numero"]}.json`;
116
- const filePath = path.join(ameliReorganizedDir, amendementFileName);
117
- await fs.writeJSON(filePath, amendement, { spaces: 2 });
117
+ const filePath = path.join(ameliReorganizedRootDir, String(session), signetDossierLegislatif, amendementFileName);
118
+ await fs.outputJSON(filePath, amendement, { spaces: 2 });
118
119
  }
119
120
  }
120
121
  async function convertDatasetDebats(dataDir, options) {
@@ -123,7 +124,9 @@ async function convertDatasetDebats(dataDir, options) {
123
124
  console.log(`Converting database ${dataset.database} data into files…`);
124
125
  }
125
126
  const debatsReorganizedRootDir = path.join(dataDir, dataset.database);
126
- ensureAndClearDir(debatsReorganizedRootDir);
127
+ if (!options.keepDir) {
128
+ ensureAndClearDir(debatsReorganizedRootDir);
129
+ }
127
130
  for await (const debat of findAllDebats()) {
128
131
  if (options["verbose"]) {
129
132
  console.log(`Converting ${debat.id} file…`);
@@ -132,11 +135,9 @@ async function convertDatasetDebats(dataDir, options) {
132
135
  if (options["fromSession"] && session < options["fromSession"]) {
133
136
  continue;
134
137
  }
135
- const debatsReorganizedDir = path.join(debatsReorganizedRootDir, String(session));
136
- await fs.ensureDir(debatsReorganizedDir);
137
138
  const debatFileName = `${debat.id}.json`;
138
- const filePath = path.join(debatsReorganizedDir, debatFileName);
139
- await fs.writeJSON(filePath, debat, { spaces: 2 });
139
+ const filePath = path.join(debatsReorganizedRootDir, String(session), debatFileName);
140
+ await fs.outputJSON(filePath, debat, { spaces: 2 });
140
141
  }
141
142
  }
142
143
  async function convertDatasetDosLeg(dataDir, options) {
@@ -146,28 +147,30 @@ async function convertDatasetDosLeg(dataDir, options) {
146
147
  }
147
148
  const doslegReorganizedRootDir = path.join(dataDir, dataset.database);
148
149
  const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER);
149
- ensureAndClearDir(doslegReorganizedRootDir);
150
- ensureAndClearDir(dossiersReorganizedDir);
151
- for await (const loi of findAllDossiers()) {
150
+ if (!options.keepDir) {
151
+ ensureAndClearDir(doslegReorganizedRootDir);
152
+ ensureAndClearDir(dossiersReorganizedDir);
153
+ }
154
+ for await (const dossier of findAllDossiers()) {
152
155
  if (options["verbose"]) {
153
- console.log(`Converting ${loi["signet"]} file…`);
156
+ console.log(`Converting ${dossier["signet"]} file…`);
154
157
  }
155
- let loiReorganizedDir = path.join(dossiersReorganizedDir, String(UNDEFINED_SESSION));
156
- const session = getSessionFromSignet(loi["signet"]) || UNDEFINED_SESSION;
158
+ let dossierReorganizedDir = path.join(dossiersReorganizedDir, String(UNDEFINED_SESSION));
159
+ const session = getSessionFromSignet(dossier["signet"]) || UNDEFINED_SESSION;
157
160
  if (options["fromSession"] && session < options["fromSession"]) {
158
161
  continue;
159
162
  }
160
- loiReorganizedDir = path.join(dossiersReorganizedDir, String(session));
161
- await fs.ensureDir(loiReorganizedDir);
162
- // Ajout des actes législatifs au dossier
163
- const actesLegislatifs = createActesLegislatifs(loi);
164
- const loiWithActes = { ...loi, actes_legislatifs: actesLegislatifs };
165
- const dossierFile = `${loi["signet"]}.json`;
166
- const filePath = path.join(loiReorganizedDir, dossierFile);
167
- await fs.writeJSON(filePath, loiWithActes, { spaces: 2 });
163
+ dossierReorganizedDir = path.join(dossiersReorganizedDir, String(session));
164
+ const actesBrutsNormalises = buildActesLegislatifs(dossier);
165
+ const dossierWithActes = {
166
+ ...dossier,
167
+ actes_legislatifs: actesBrutsNormalises
168
+ };
169
+ const dossierFile = `${dossier["signet"]}.json`;
170
+ await fs.outputJSON(path.join(dossierReorganizedDir, dossierFile), dossierWithActes, { spaces: 2 });
168
171
  }
169
- await convertTexteUrls(dataDir);
170
- await convertRapportUrls(dataDir);
172
+ await convertTexteUrls(dataDir, options);
173
+ await convertRapportUrls(dataDir, options);
171
174
  }
172
175
  async function convertDatasetScrutins(dataDir, options) {
173
176
  const dataset = datasets.dosleg;
@@ -175,7 +178,9 @@ async function convertDatasetScrutins(dataDir, options) {
175
178
  console.log(`Converting database scrutins (${dataset.database}) data into files…`);
176
179
  }
177
180
  const scrutinsReorganizedDir = path.join(dataDir, SCRUTINS_FOLDER);
178
- ensureAndClearDir(scrutinsReorganizedDir);
181
+ if (!options.keepDir) {
182
+ ensureAndClearDir(scrutinsReorganizedDir);
183
+ }
179
184
  for await (const scrutin of findAllScrutins(options["fromSession"])) {
180
185
  if (options["verbose"]) {
181
186
  console.log(`Converting ${scrutin["numero"]} file…`);
@@ -183,20 +188,21 @@ async function convertDatasetScrutins(dataDir, options) {
183
188
  let scrutinReorganizedDir = path.join(scrutinsReorganizedDir, String(UNDEFINED_SESSION));
184
189
  const session = scrutin["session"] || UNDEFINED_SESSION;
185
190
  scrutinReorganizedDir = path.join(scrutinsReorganizedDir, String(session));
186
- await fs.ensureDir(scrutinReorganizedDir);
187
191
  const scrutinFileName = `${scrutin["numero"]}.json`;
188
- await fs.writeJSON(path.join(scrutinReorganizedDir, scrutinFileName), scrutin, {
192
+ await fs.outputJSON(path.join(scrutinReorganizedDir, scrutinFileName), scrutin, {
189
193
  spaces: 2,
190
194
  });
191
195
  }
192
196
  }
193
- async function convertDatasetQuestions(dataDir) {
197
+ async function convertDatasetQuestions(dataDir, options) {
194
198
  const dataset = datasets.questions;
195
199
  if (!options["silent"]) {
196
200
  console.log(`Converting database ${dataset.database} data into files…`);
197
201
  }
198
202
  const questionsReorganizedRootDir = path.join(dataDir, dataset.database);
199
- ensureAndClearDir(questionsReorganizedRootDir);
203
+ if (!options.keepDir) {
204
+ ensureAndClearDir(questionsReorganizedRootDir);
205
+ }
200
206
  const limit = pLimit(10);
201
207
  const tasks = [];
202
208
  for await (const question of findAllQuestions()) {
@@ -205,22 +211,27 @@ async function convertDatasetQuestions(dataDir) {
205
211
  console.log(`Converting ${question["reference"]} file…`);
206
212
  }
207
213
  const legislature = question["legislature"] ? question["legislature"] : 0;
208
- const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature));
209
- await fs.ensureDir(questionReorganizedDir);
210
214
  const questionFileName = `${question["reference"]}.json`;
211
- await fs.writeJSON(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
215
+ await fs.outputJSON(path.join(questionsReorganizedRootDir, String(legislature), questionFileName), question, {
216
+ spaces: 2,
217
+ });
212
218
  }));
213
219
  }
214
220
  await Promise.all(tasks);
215
221
  }
216
- async function convertTexteUrls(dataDir) {
217
- const textesDir = path.join(dataDir, TEXTE_FOLDER);
218
- fs.ensureDirSync(textesDir);
219
- const originalTextesDir = path.join(textesDir, DATA_ORIGINAL_FOLDER);
222
+ async function convertTexteUrls(dataDir, options) {
223
+ const originalTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
224
+ const transformedTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER);
225
+ if (!options["silent"]) {
226
+ console.log(`Converting database textes data into files…`);
227
+ }
220
228
  for await (const texte of findSenatTexteUrls()) {
229
+ const session = texte.session ?? UNDEFINED_SESSION;
230
+ if (options["fromSession"] && session < options["fromSession"]) {
231
+ continue;
232
+ }
221
233
  const texteName = path.parse(texte.url).name;
222
- const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName);
223
- fs.ensureDirSync(texteDir);
234
+ const texteDir = path.join(originalTextesDir, `${session}`, texteName);
224
235
  const metadata = {
225
236
  name: texteName,
226
237
  session: texte.session,
@@ -232,20 +243,27 @@ async function convertTexteUrls(dataDir) {
232
243
  url_html: new URL(`${texteName}.html`, SENAT_TEXTE_BASE_URL),
233
244
  url_pdf: new URL(`${texteName}.pdf`, SENAT_TEXTE_BASE_URL),
234
245
  };
235
- fs.writeJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, {
246
+ fs.outputJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, {
236
247
  spaces: 2,
237
248
  });
249
+ if (options.fetchDocuments) {
250
+ await processTexte(metadata, originalTextesDir, transformedTextesDir, options);
251
+ }
238
252
  }
239
253
  }
240
- async function convertRapportUrls(dataDir) {
241
- const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
242
- fs.ensureDirSync(rapportsDir);
243
- const originalTextesDir = path.join(rapportsDir, DATA_ORIGINAL_FOLDER);
254
+ async function convertRapportUrls(dataDir, options) {
255
+ const originalRapportsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
256
+ if (!options["silent"]) {
257
+ console.log(`Converting database rapports data into files…`);
258
+ }
244
259
  for await (const rapport of findSenatRapportUrls()) {
260
+ const session = rapport.session ?? UNDEFINED_SESSION;
261
+ if (options["fromSession"] && session < options["fromSession"]) {
262
+ continue;
263
+ }
245
264
  const parsedRapportUrl = path.parse(rapport.url);
246
265
  const rapportName = parsedRapportUrl.name;
247
- const rapportDir = path.join(originalTextesDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName);
248
- fs.ensureDirSync(rapportDir);
266
+ const rapportDir = path.join(originalRapportsDir, `${session}`, rapportName);
249
267
  const rapportHtmlUrlBase = `${rapportName}_mono.html`;
250
268
  const rapportHtmlUrl = path.format({
251
269
  dir: parsedRapportUrl.dir,
@@ -263,12 +281,15 @@ async function convertRapportUrls(dataDir) {
263
281
  url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL),
264
282
  url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL),
265
283
  };
266
- fs.writeJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, {
284
+ fs.outputJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, {
267
285
  spaces: 2,
268
286
  });
287
+ if (options.fetchDocuments) {
288
+ await processRapport(metadata, originalRapportsDir, options);
289
+ }
269
290
  }
270
291
  }
271
- async function convertDatasetSens(dataDir) {
292
+ async function convertDatasetSens(dataDir, options) {
272
293
  const dataset = datasets.sens;
273
294
  if (!options["silent"]) {
274
295
  console.log(`Converting database ${dataset.database} data into files…`);
@@ -277,16 +298,18 @@ async function convertDatasetSens(dataDir) {
277
298
  const senateursReorganizedDir = path.join(sensReorganizedRootDir, SENS_SENATEURS_FOLDER);
278
299
  const circonscriptionsReorganizedDir = path.join(sensReorganizedRootDir, SENS_CIRCONSCRIPTIONS_FOLDER);
279
300
  const organismesReorganizedDir = path.join(sensReorganizedRootDir, SENS_ORGANISMES_FOLDER);
280
- ensureAndClearDir(sensReorganizedRootDir);
281
- ensureAndClearDir(senateursReorganizedDir);
282
- ensureAndClearDir(circonscriptionsReorganizedDir);
283
- ensureAndClearDir(organismesReorganizedDir);
301
+ if (!options.keepDir) {
302
+ ensureAndClearDir(sensReorganizedRootDir);
303
+ ensureAndClearDir(senateursReorganizedDir);
304
+ ensureAndClearDir(circonscriptionsReorganizedDir);
305
+ ensureAndClearDir(organismesReorganizedDir);
306
+ }
284
307
  for await (const sen of findAllSens()) {
285
308
  if (options["verbose"]) {
286
309
  console.log(`Converting ${sen["matricule"]} file…`);
287
310
  }
288
311
  const senFileName = `${sen["matricule"]}.json`;
289
- fs.writeJSONSync(path.join(senateursReorganizedDir, senFileName), sen, {
312
+ fs.outputJSONSync(path.join(senateursReorganizedDir, senFileName), sen, {
290
313
  spaces: 2,
291
314
  });
292
315
  }
@@ -295,16 +318,18 @@ async function convertDatasetSens(dataDir) {
295
318
  console.log(`Converting ${circonscription["identifiant"]} file…`);
296
319
  }
297
320
  const circonscriptionFileName = `${circonscription["identifiant"]}.json`;
298
- fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 });
321
+ fs.outputJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, {
322
+ spaces: 2,
323
+ });
299
324
  }
300
325
  for await (const organisme of findAllOrganismes()) {
301
326
  if (options["verbose"]) {
302
327
  console.log(`Converting ${organisme["code"]} file…`);
303
328
  }
304
329
  const organismeFileName = `${organisme["code"]}.json`;
305
- const organismeDir = path.join(organismesReorganizedDir, organisme["type_code"]);
306
- fs.ensureDirSync(organismeDir);
307
- fs.writeJSONSync(path.join(organismeDir, organismeFileName), organisme, { spaces: 2 });
330
+ fs.outputJSONSync(path.join(organismesReorganizedDir, organisme["type_code"], organismeFileName), organisme, {
331
+ spaces: 2,
332
+ });
308
333
  }
309
334
  }
310
335
  convertData()
@@ -5,5 +5,8 @@ try {
5
5
  execSync(`tsx src/scripts/convert_data.ts ${args}`, { stdio: "inherit" });
6
6
  }
7
7
  catch (error) {
8
- process.exit(1);
8
+ if (error.status !== 10) {
9
+ console.error("Error during data retrieval:", error);
10
+ process.exit(error.status || 1);
11
+ }
9
12
  }
@@ -1 +1,2 @@
1
- export {};
1
+ export declare function processTexte(texteMetadata: any, originalTextesDir: string, transformedTextesDir: string, options: any): Promise<void>;
2
+ export declare function processRapport(rapportMetadata: any, originalRapportsDir: string, options: any): Promise<void>;