@tricoteuses/senat 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/config.d.ts +1 -0
- package/lib/config.js +14 -45
- package/lib/databases.js +86 -143
- package/lib/datasets.js +78 -83
- package/lib/index.d.ts +7 -4
- package/lib/index.js +42 -419
- package/lib/loaders.js +149 -654
- package/lib/model/ameli.js +83 -21
- package/lib/model/debats.js +0 -1
- package/lib/model/dosleg.d.ts +1 -1
- package/lib/model/dosleg.js +179 -73
- package/lib/model/index.d.ts +3 -3
- package/lib/model/index.js +12 -46
- package/lib/model/questions.js +68 -39
- package/lib/model/sens.js +383 -113
- package/lib/model/texte.js +220 -290
- package/lib/model/util.js +9 -26
- package/lib/raw_types/ameli.js +5 -6
- package/lib/raw_types/debats.js +5 -6
- package/lib/raw_types/dosleg.js +5 -6
- package/lib/raw_types/questions.js +5 -6
- package/lib/raw_types/sens.js +5 -6
- package/lib/raw_types_schemats/ameli.js +1 -43
- package/lib/raw_types_schemats/debats.js +1 -22
- package/lib/raw_types_schemats/dosleg.js +1 -96
- package/lib/raw_types_schemats/questions.js +1 -22
- package/lib/raw_types_schemats/sens.js +1 -112
- package/lib/scripts/convert_data.js +181 -631
- package/lib/scripts/datautil.js +17 -60
- package/lib/scripts/parse_textes.js +46 -129
- package/lib/scripts/retrieve_documents.js +247 -513
- package/lib/scripts/retrieve_open_data.js +211 -368
- package/lib/scripts/retrieve_senateurs_photos.js +144 -239
- package/lib/scripts/shared/cli_helpers.js +30 -30
- package/lib/scripts/shared/util.js +28 -94
- package/lib/strings.js +20 -45
- package/lib/types/ameli.d.ts +1 -1
- package/lib/types/ameli.js +14 -25
- package/lib/types/debats.d.ts +1 -1
- package/lib/types/debats.js +3 -21
- package/lib/types/dosleg.d.ts +1 -1
- package/lib/types/dosleg.js +152 -119
- package/lib/types/questions.d.ts +1 -1
- package/lib/types/questions.js +1 -13
- package/lib/types/sens.d.ts +1 -1
- package/lib/types/sens.js +1 -13
- package/lib/types/sessions.js +44 -49
- package/lib/types/texte.js +17 -22
- package/lib/validators/config.js +47 -111
- package/lib/validators/senat.js +1 -5
- package/package.json +16 -38
- package/lib/aggregates.d.ts +0 -52
- package/lib/aggregates.mjs +0 -930
- package/lib/aggregates.ts +0 -833
- package/lib/config.mjs +0 -16
- package/lib/config.ts +0 -26
- package/lib/data/legislatures.json +0 -38
- package/lib/databases.mjs +0 -57
- package/lib/databases.ts +0 -71
- package/lib/datasets.mjs +0 -78
- package/lib/datasets.ts +0 -118
- package/lib/fields.d.ts +0 -10
- package/lib/fields.mjs +0 -68
- package/lib/fields.ts +0 -29
- package/lib/index.mjs +0 -4
- package/lib/index.ts +0 -42
- package/lib/inserters.d.ts +0 -98
- package/lib/inserters.mjs +0 -500
- package/lib/inserters.ts +0 -521
- package/lib/loaders.mjs +0 -158
- package/lib/loaders.ts +0 -271
- package/lib/model/ameli.mjs +0 -84
- package/lib/model/ameli.ts +0 -100
- package/lib/model/debats.mjs +0 -1
- package/lib/model/debats.ts +0 -0
- package/lib/model/dosleg.mjs +0 -196
- package/lib/model/dosleg.ts +0 -240
- package/lib/model/index.mjs +0 -4
- package/lib/model/index.ts +0 -14
- package/lib/model/questions.mjs +0 -71
- package/lib/model/questions.ts +0 -93
- package/lib/model/sens.mjs +0 -415
- package/lib/model/sens.ts +0 -516
- package/lib/model/texte.mjs +0 -208
- package/lib/model/texte.ts +0 -229
- package/lib/model/util.mjs +0 -19
- package/lib/model/util.ts +0 -32
- package/lib/raw_types/ameli.mjs +0 -5
- package/lib/raw_types/ameli.ts +0 -951
- package/lib/raw_types/debats.mjs +0 -5
- package/lib/raw_types/debats.ts +0 -222
- package/lib/raw_types/dosleg.mjs +0 -5
- package/lib/raw_types/dosleg.ts +0 -3625
- package/lib/raw_types/questions.mjs +0 -5
- package/lib/raw_types/questions.ts +0 -427
- package/lib/raw_types/sens.mjs +0 -5
- package/lib/raw_types/sens.ts +0 -4499
- package/lib/raw_types_kysely/ameli.d.ts +0 -6
- package/lib/raw_types_kysely/ameli.mjs +0 -7
- package/lib/raw_types_kysely/ameli.ts +0 -6
- package/lib/raw_types_kysely/debats.d.ts +0 -6
- package/lib/raw_types_kysely/debats.mjs +0 -7
- package/lib/raw_types_kysely/debats.ts +0 -6
- package/lib/raw_types_kysely/dosleg.d.ts +0 -6
- package/lib/raw_types_kysely/dosleg.mjs +0 -7
- package/lib/raw_types_kysely/dosleg.ts +0 -6
- package/lib/raw_types_kysely/questions.d.ts +0 -6
- package/lib/raw_types_kysely/questions.mjs +0 -7
- package/lib/raw_types_kysely/questions.ts +0 -6
- package/lib/raw_types_kysely/sens.d.ts +0 -6
- package/lib/raw_types_kysely/sens.mjs +0 -7
- package/lib/raw_types_kysely/sens.ts +0 -6
- package/lib/raw_types_kysely/texte.d.ts +0 -45
- package/lib/raw_types_kysely/texte.mjs +0 -7
- package/lib/raw_types_kysely/texte.ts +0 -53
- package/lib/raw_types_schemats/ameli.mjs +0 -2
- package/lib/raw_types_schemats/ameli.ts +0 -601
- package/lib/raw_types_schemats/debats.mjs +0 -2
- package/lib/raw_types_schemats/debats.ts +0 -145
- package/lib/raw_types_schemats/dosleg.mjs +0 -2
- package/lib/raw_types_schemats/dosleg.ts +0 -2195
- package/lib/raw_types_schemats/questions.mjs +0 -2
- package/lib/raw_types_schemats/questions.ts +0 -251
- package/lib/raw_types_schemats/sens.mjs +0 -2
- package/lib/raw_types_schemats/sens.ts +0 -2907
- package/lib/scripts/convert_data.mjs +0 -181
- package/lib/scripts/convert_data.ts +0 -243
- package/lib/scripts/datautil.mjs +0 -16
- package/lib/scripts/datautil.ts +0 -19
- package/lib/scripts/images/transparent_150x192.jpg +0 -0
- package/lib/scripts/images/transparent_155x225.jpg +0 -0
- package/lib/scripts/parse_textes.mjs +0 -46
- package/lib/scripts/parse_textes.ts +0 -65
- package/lib/scripts/retrieve_documents.mjs +0 -249
- package/lib/scripts/retrieve_documents.ts +0 -298
- package/lib/scripts/retrieve_open_data.mjs +0 -217
- package/lib/scripts/retrieve_open_data.ts +0 -274
- package/lib/scripts/retrieve_senateurs_photos.mjs +0 -147
- package/lib/scripts/retrieve_senateurs_photos.ts +0 -177
- package/lib/scripts/retrieve_textes.d.ts +0 -1
- package/lib/scripts/retrieve_textes.mjs +0 -328
- package/lib/scripts/retrieve_textes.ts +0 -143
- package/lib/scripts/shared/cli_helpers.ts +0 -36
- package/lib/scripts/shared/util.ts +0 -33
- package/lib/src/aggregates.d.ts +0 -52
- package/lib/src/aggregates.mjs +0 -726
- package/lib/src/config.d.ts +0 -2
- package/lib/src/config.mjs +0 -16
- package/lib/src/databases.d.ts +0 -18
- package/lib/src/databases.mjs +0 -55
- package/lib/src/datasets.d.ts +0 -28
- package/lib/src/datasets.mjs +0 -78
- package/lib/src/fields.d.ts +0 -10
- package/lib/src/fields.mjs +0 -22
- package/lib/src/index.d.ts +0 -8
- package/lib/src/index.mjs +0 -7
- package/lib/src/inserters.d.ts +0 -98
- package/lib/src/inserters.mjs +0 -360
- package/lib/src/loaders.d.ts +0 -36
- package/lib/src/loaders.mjs +0 -107
- package/lib/src/model/ameli.d.ts +0 -4
- package/lib/src/model/ameli.js +0 -57
- package/lib/src/model/debats.d.ts +0 -4
- package/lib/src/model/debats.js +0 -43
- package/lib/src/model/dosleg.d.ts +0 -197
- package/lib/src/model/dosleg.js +0 -169
- package/lib/src/model/index.d.ts +0 -4
- package/lib/src/model/index.js +0 -4
- package/lib/src/model/questions.d.ts +0 -89
- package/lib/src/model/questions.js +0 -76
- package/lib/src/model/sens.d.ts +0 -390
- package/lib/src/model/sens.js +0 -339
- package/lib/src/model/texte.d.ts +0 -7
- package/lib/src/model/texte.js +0 -183
- package/lib/src/raw_types_kysely/ameli.d.ts +0 -915
- package/lib/src/raw_types_kysely/ameli.js +0 -5
- package/lib/src/raw_types_kysely/debats.d.ts +0 -207
- package/lib/src/raw_types_kysely/debats.js +0 -5
- package/lib/src/raw_types_kysely/dosleg.d.ts +0 -3532
- package/lib/src/raw_types_kysely/dosleg.js +0 -5
- package/lib/src/raw_types_kysely/questions.d.ts +0 -414
- package/lib/src/raw_types_kysely/questions.js +0 -5
- package/lib/src/raw_types_kysely/sens.d.ts +0 -4394
- package/lib/src/raw_types_kysely/sens.js +0 -5
- package/lib/src/raw_types_schemats/ameli.d.ts +0 -541
- package/lib/src/raw_types_schemats/ameli.js +0 -2
- package/lib/src/raw_types_schemats/debats.d.ts +0 -127
- package/lib/src/raw_types_schemats/debats.js +0 -2
- package/lib/src/raw_types_schemats/dosleg.d.ts +0 -2027
- package/lib/src/raw_types_schemats/dosleg.js +0 -2
- package/lib/src/raw_types_schemats/questions.d.ts +0 -231
- package/lib/src/raw_types_schemats/questions.js +0 -2
- package/lib/src/raw_types_schemats/sens.d.ts +0 -2709
- package/lib/src/raw_types_schemats/sens.js +0 -2
- package/lib/src/scripts/convert_data.d.ts +0 -1
- package/lib/src/scripts/convert_data.js +0 -95
- package/lib/src/scripts/datautil.d.ts +0 -5
- package/lib/src/scripts/datautil.js +0 -16
- package/lib/src/scripts/parse_textes.d.ts +0 -1
- package/lib/src/scripts/parse_textes.js +0 -47
- package/lib/src/scripts/retrieve_documents.d.ts +0 -1
- package/lib/src/scripts/retrieve_documents.js +0 -258
- package/lib/src/scripts/retrieve_open_data.d.ts +0 -1
- package/lib/src/scripts/retrieve_open_data.js +0 -214
- package/lib/src/scripts/retrieve_senateurs_photos.d.ts +0 -1
- package/lib/src/scripts/retrieve_senateurs_photos.js +0 -147
- package/lib/src/scripts/shared/cli_helpers.d.ts +0 -44
- package/lib/src/scripts/shared/cli_helpers.js +0 -32
- package/lib/src/scripts/shared/util.d.ts +0 -3
- package/lib/src/scripts/shared/util.js +0 -28
- package/lib/src/strings.d.ts +0 -1
- package/lib/src/strings.mjs +0 -18
- package/lib/src/types/ameli.d.ts +0 -10
- package/lib/src/types/ameli.js +0 -13
- package/lib/src/types/debats.d.ts +0 -4
- package/lib/src/types/debats.js +0 -2
- package/lib/src/types/dosleg.d.ts +0 -98
- package/lib/src/types/dosleg.js +0 -151
- package/lib/src/types/questions.d.ts +0 -2
- package/lib/src/types/questions.js +0 -1
- package/lib/src/types/sens.d.ts +0 -10
- package/lib/src/types/sens.js +0 -1
- package/lib/src/types/sessions.d.ts +0 -42
- package/lib/src/types/sessions.js +0 -43
- package/lib/src/types/texte.d.ts +0 -61
- package/lib/src/types/texte.js +0 -16
- package/lib/src/validators/config.d.ts +0 -1
- package/lib/src/validators/config.js +0 -54
- package/lib/src/validators/senat.d.ts +0 -0
- package/lib/src/validators/senat.js +0 -24
- package/lib/strings.mjs +0 -18
- package/lib/strings.ts +0 -26
- package/lib/types/ameli.mjs +0 -13
- package/lib/types/ameli.ts +0 -21
- package/lib/types/debats.mjs +0 -2
- package/lib/types/debats.ts +0 -6
- package/lib/types/dosleg.mjs +0 -151
- package/lib/types/dosleg.ts +0 -284
- package/lib/types/questions.mjs +0 -1
- package/lib/types/questions.ts +0 -3
- package/lib/types/sens.mjs +0 -1
- package/lib/types/sens.ts +0 -12
- package/lib/types/sessions.mjs +0 -43
- package/lib/types/sessions.ts +0 -42
- package/lib/types/texte.mjs +0 -16
- package/lib/types/texte.ts +0 -76
- package/lib/typings/windows-1252.d.js +0 -2
- package/lib/typings/windows-1252.d.mjs +0 -2
- package/lib/typings/windows-1252.d.ts +0 -11
- package/lib/validators/config.mjs +0 -54
- package/lib/validators/config.ts +0 -79
- package/lib/validators/senat.mjs +0 -24
- package/lib/validators/senat.ts +0 -26
|
@@ -1,181 +0,0 @@
|
|
|
1
|
-
import assert from "assert";
|
|
2
|
-
import commandLineArgs from "command-line-args";
|
|
3
|
-
import fs from "fs-extra";
|
|
4
|
-
import path from "path";
|
|
5
|
-
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
|
|
6
|
-
import { DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER, } from "../loaders";
|
|
7
|
-
import { findAllAmendements, findAllCirconscriptions, findAllLois, findAllOrganismes, findAllQuestions, findAllSens, } from "../model";
|
|
8
|
-
import { findSenatRapportUrls, findSenatTexteUrls } from "../model/dosleg";
|
|
9
|
-
import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, UNDEFINED_SESSION } from "./datautil";
|
|
10
|
-
import { commonOptions } from "./shared/cli_helpers";
|
|
11
|
-
import { ensureAndClearDir } from "./shared/util";
|
|
12
|
-
const optionsDefinitions = [
|
|
13
|
-
...commonOptions,
|
|
14
|
-
];
|
|
15
|
-
const options = commandLineArgs(optionsDefinitions);
|
|
16
|
-
const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/";
|
|
17
|
-
const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
|
|
18
|
-
const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/";
|
|
19
|
-
const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
|
|
20
|
-
async function convertData() {
|
|
21
|
-
const enabledDatasets = getEnabledDatasets(options.categories);
|
|
22
|
-
const dataDir = options.dataDir;
|
|
23
|
-
assert(dataDir, "Missing argument: data directory");
|
|
24
|
-
console.time("data transformation time");
|
|
25
|
-
if (enabledDatasets & EnabledDatasets.Ameli) {
|
|
26
|
-
const dataset = datasets.ameli;
|
|
27
|
-
if (!options.silent) {
|
|
28
|
-
console.log(`Converting database ${dataset.database} data into files…`);
|
|
29
|
-
}
|
|
30
|
-
const ameliReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
31
|
-
ensureAndClearDir(ameliReorganizedRootDir);
|
|
32
|
-
for await (const amendement of findAllAmendements()) {
|
|
33
|
-
if (options.verbose) {
|
|
34
|
-
console.log(`Converting ${amendement.numero} file…`);
|
|
35
|
-
}
|
|
36
|
-
const session = String(amendement.session) || UNDEFINED_SESSION;
|
|
37
|
-
const signetDossierLegislatif = amendement.signet_dossier_legislatif
|
|
38
|
-
|| `${amendement.nature_texte}-${amendement.numero_texte}`.toLowerCase();
|
|
39
|
-
const ameliReorganizedDir = path.join(ameliReorganizedRootDir, session, signetDossierLegislatif);
|
|
40
|
-
fs.ensureDirSync(ameliReorganizedDir);
|
|
41
|
-
const amendementFileName = `${amendement.numero}.json`;
|
|
42
|
-
fs.writeJSONSync(path.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 });
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
if (enabledDatasets & EnabledDatasets.DosLeg) {
|
|
46
|
-
const dataset = datasets.dosleg;
|
|
47
|
-
if (!options.silent) {
|
|
48
|
-
console.log(`Converting database ${dataset.database} data into files…`);
|
|
49
|
-
}
|
|
50
|
-
const doslegReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
51
|
-
const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER);
|
|
52
|
-
ensureAndClearDir(doslegReorganizedRootDir);
|
|
53
|
-
ensureAndClearDir(dossiersReorganizedDir);
|
|
54
|
-
for await (const loi of findAllLois()) {
|
|
55
|
-
if (options.verbose) {
|
|
56
|
-
console.log(`Converting ${loi.signet} file…`);
|
|
57
|
-
}
|
|
58
|
-
let loiReorganizedDir = path.join(dossiersReorganizedDir, UNDEFINED_SESSION);
|
|
59
|
-
const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups;
|
|
60
|
-
if (signetParts) {
|
|
61
|
-
const { session } = signetParts;
|
|
62
|
-
const formattedSession = formatToFourDigitSession(session);
|
|
63
|
-
loiReorganizedDir = path.join(dossiersReorganizedDir, formattedSession);
|
|
64
|
-
}
|
|
65
|
-
fs.ensureDirSync(loiReorganizedDir);
|
|
66
|
-
const loiFileName = `${loi.signet}.json`;
|
|
67
|
-
fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, { spaces: 2 });
|
|
68
|
-
}
|
|
69
|
-
await convertTexteUrls(dataDir);
|
|
70
|
-
await convertRapportUrls(dataDir);
|
|
71
|
-
}
|
|
72
|
-
if (enabledDatasets & EnabledDatasets.Questions) {
|
|
73
|
-
const dataset = datasets.questions;
|
|
74
|
-
if (!options.silent) {
|
|
75
|
-
console.log(`Converting database ${dataset.database} data into files…`);
|
|
76
|
-
}
|
|
77
|
-
const questionsReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
78
|
-
ensureAndClearDir(questionsReorganizedRootDir);
|
|
79
|
-
for await (const question of findAllQuestions()) {
|
|
80
|
-
if (options.verbose) {
|
|
81
|
-
console.log(`Converting ${question.reference} file…`);
|
|
82
|
-
}
|
|
83
|
-
const legislature = question.legislature ? question.legislature : 0;
|
|
84
|
-
const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature));
|
|
85
|
-
fs.ensureDirSync(questionReorganizedDir);
|
|
86
|
-
const questionFileName = `${question.reference}.json`;
|
|
87
|
-
fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
if (enabledDatasets & EnabledDatasets.Sens) {
|
|
91
|
-
const dataset = datasets.sens;
|
|
92
|
-
if (!options.silent) {
|
|
93
|
-
console.log(`Converting database ${dataset.database} data into files…`);
|
|
94
|
-
}
|
|
95
|
-
const sensReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
96
|
-
const senateursReorganizedDir = path.join(sensReorganizedRootDir, SENS_SENATEURS_FOLDER);
|
|
97
|
-
const circonscriptionsReorganizedDir = path.join(sensReorganizedRootDir, SENS_CIRCONSCRIPTIONS_FOLDER);
|
|
98
|
-
const organismesReorganizedDir = path.join(sensReorganizedRootDir, SENS_ORGANISMES_FOLDER);
|
|
99
|
-
ensureAndClearDir(sensReorganizedRootDir);
|
|
100
|
-
ensureAndClearDir(senateursReorganizedDir);
|
|
101
|
-
ensureAndClearDir(circonscriptionsReorganizedDir);
|
|
102
|
-
ensureAndClearDir(organismesReorganizedDir);
|
|
103
|
-
for await (const sen of findAllSens()) {
|
|
104
|
-
if (options.verbose) {
|
|
105
|
-
console.log(`Converting ${sen.matricule} file…`);
|
|
106
|
-
}
|
|
107
|
-
const senFileName = `${sen.matricule}.json`;
|
|
108
|
-
fs.writeJSONSync(path.join(senateursReorganizedDir, senFileName), sen, { spaces: 2 });
|
|
109
|
-
}
|
|
110
|
-
for await (const circonscription of findAllCirconscriptions()) {
|
|
111
|
-
if (options.verbose) {
|
|
112
|
-
console.log(`Converting ${circonscription.identifiant} file…`);
|
|
113
|
-
}
|
|
114
|
-
const circonscriptionFileName = `${circonscription.identifiant}.json`;
|
|
115
|
-
fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 });
|
|
116
|
-
}
|
|
117
|
-
for await (const organisme of findAllOrganismes()) {
|
|
118
|
-
if (options.verbose) {
|
|
119
|
-
console.log(`Converting ${organisme.code} file…`);
|
|
120
|
-
}
|
|
121
|
-
const organismeFileName = `${organisme.code}.json`;
|
|
122
|
-
fs.writeJSONSync(path.join(organismesReorganizedDir, organismeFileName), organisme, { spaces: 2 });
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
if (!options.silent) {
|
|
126
|
-
console.timeEnd("data transformation time");
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
async function convertTexteUrls(dataDir) {
|
|
130
|
-
const textesDir = path.join(dataDir, TEXTE_FOLDER);
|
|
131
|
-
fs.ensureDirSync(textesDir);
|
|
132
|
-
const originalTextesDir = path.join(textesDir, TEXTE_ORIGINAL_FOLDER);
|
|
133
|
-
for await (const texte of findSenatTexteUrls(options.sessions)) {
|
|
134
|
-
const texteName = path.parse(texte.url).name;
|
|
135
|
-
const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName);
|
|
136
|
-
fs.ensureDirSync(texteDir);
|
|
137
|
-
const metadata = {
|
|
138
|
-
name: texteName,
|
|
139
|
-
session: texte.session,
|
|
140
|
-
url_expose_des_motifs: texte.hasExposeDesMotifs ?
|
|
141
|
-
new URL(`${texteName}-expose.html`, SENAT_EXPOSE_DES_MOTIFS_BASE_URL) : undefined,
|
|
142
|
-
url_xml: new URL(`${texteName}.akn.xml`, SENAT_TEXTE_XML_BASE_URL),
|
|
143
|
-
url_html: new URL(`${texteName}.html`, SENAT_TEXTE_BASE_URL),
|
|
144
|
-
url_pdf: new URL(`${texteName}.pdf`, SENAT_TEXTE_BASE_URL),
|
|
145
|
-
};
|
|
146
|
-
fs.writeJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, { spaces: 2 });
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
async function convertRapportUrls(dataDir) {
|
|
150
|
-
const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
|
|
151
|
-
fs.ensureDirSync(rapportsDir);
|
|
152
|
-
for await (const rapport of findSenatRapportUrls(options.sessions)) {
|
|
153
|
-
const parsedRapportUrl = path.parse(rapport.url);
|
|
154
|
-
const rapportName = parsedRapportUrl.name;
|
|
155
|
-
const rapportDir = path.join(rapportsDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName);
|
|
156
|
-
fs.ensureDirSync(rapportDir);
|
|
157
|
-
const rapportHtmlUrlBase = `${rapportName}_mono.html`;
|
|
158
|
-
const rapportHtmlUrl = path.format({
|
|
159
|
-
dir: parsedRapportUrl.dir,
|
|
160
|
-
base: rapportHtmlUrlBase,
|
|
161
|
-
});
|
|
162
|
-
const rapportPdfUrlBase = `${rapportName}1.pdf`;
|
|
163
|
-
const rapportPdfUrl = path.format({
|
|
164
|
-
dir: parsedRapportUrl.dir,
|
|
165
|
-
base: rapportPdfUrlBase,
|
|
166
|
-
});
|
|
167
|
-
const metadata = {
|
|
168
|
-
name: rapportName,
|
|
169
|
-
session: rapport.session,
|
|
170
|
-
url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL),
|
|
171
|
-
url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL),
|
|
172
|
-
};
|
|
173
|
-
fs.writeJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, { spaces: 2 });
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
convertData()
|
|
177
|
-
.then(() => process.exit(0))
|
|
178
|
-
.catch((error) => {
|
|
179
|
-
console.log(error);
|
|
180
|
-
process.exit(1);
|
|
181
|
-
});
|
|
@@ -1,243 +0,0 @@
|
|
|
1
|
-
import assert from "assert"
|
|
2
|
-
import commandLineArgs from "command-line-args"
|
|
3
|
-
import fs from "fs-extra"
|
|
4
|
-
import path from "path"
|
|
5
|
-
|
|
6
|
-
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets"
|
|
7
|
-
import {
|
|
8
|
-
DOCUMENT_METADATA_FILE,
|
|
9
|
-
DOSLEG_DOSSIERS_FOLDER,
|
|
10
|
-
RAPPORT_FOLDER,
|
|
11
|
-
RapportMetadata,
|
|
12
|
-
SENS_CIRCONSCRIPTIONS_FOLDER,
|
|
13
|
-
SENS_ORGANISMES_FOLDER,
|
|
14
|
-
SENS_SENATEURS_FOLDER,
|
|
15
|
-
TEXTE_FOLDER,
|
|
16
|
-
TEXTE_ORIGINAL_FOLDER,
|
|
17
|
-
TexteMetadata,
|
|
18
|
-
} from "../loaders"
|
|
19
|
-
import {
|
|
20
|
-
findAllAmendements,
|
|
21
|
-
findAllCirconscriptions,
|
|
22
|
-
findAllLois,
|
|
23
|
-
findAllOrganismes,
|
|
24
|
-
findAllQuestions,
|
|
25
|
-
findAllSens,
|
|
26
|
-
} from "../model"
|
|
27
|
-
import { findSenatRapportUrls, findSenatTexteUrls } from "../model/dosleg"
|
|
28
|
-
import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, UNDEFINED_SESSION } from "./datautil"
|
|
29
|
-
import { commonOptions } from "./shared/cli_helpers"
|
|
30
|
-
import { ensureAndClearDir } from "./shared/util"
|
|
31
|
-
|
|
32
|
-
const optionsDefinitions = [
|
|
33
|
-
...commonOptions,
|
|
34
|
-
]
|
|
35
|
-
const options = commandLineArgs(optionsDefinitions)
|
|
36
|
-
|
|
37
|
-
const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/"
|
|
38
|
-
const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/"
|
|
39
|
-
const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/"
|
|
40
|
-
const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/"
|
|
41
|
-
|
|
42
|
-
async function convertData () {
|
|
43
|
-
const enabledDatasets = getEnabledDatasets(options.categories)
|
|
44
|
-
const dataDir: string = options.dataDir
|
|
45
|
-
assert(dataDir, "Missing argument: data directory")
|
|
46
|
-
|
|
47
|
-
console.time("data transformation time")
|
|
48
|
-
|
|
49
|
-
if (enabledDatasets & EnabledDatasets.Ameli) {
|
|
50
|
-
const dataset = datasets.ameli
|
|
51
|
-
|
|
52
|
-
if (!options.silent) {
|
|
53
|
-
console.log(`Converting database ${dataset.database} data into files…`)
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
const ameliReorganizedRootDir = path.join(dataDir, dataset.database)
|
|
57
|
-
ensureAndClearDir(ameliReorganizedRootDir)
|
|
58
|
-
|
|
59
|
-
for await (const amendement of findAllAmendements()) {
|
|
60
|
-
if (options.verbose) {
|
|
61
|
-
console.log(`Converting ${amendement.numero} file…`)
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
const session = String(amendement.session) || UNDEFINED_SESSION
|
|
65
|
-
const signetDossierLegislatif = amendement.signet_dossier_legislatif
|
|
66
|
-
|| `${amendement.nature_texte}-${amendement.numero_texte}`.toLowerCase()
|
|
67
|
-
const ameliReorganizedDir = path.join(ameliReorganizedRootDir, session, signetDossierLegislatif)
|
|
68
|
-
fs.ensureDirSync(ameliReorganizedDir)
|
|
69
|
-
const amendementFileName = `${amendement.numero}.json`
|
|
70
|
-
fs.writeJSONSync(path.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 })
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
if (enabledDatasets & EnabledDatasets.DosLeg) {
|
|
75
|
-
const dataset = datasets.dosleg
|
|
76
|
-
|
|
77
|
-
if (!options.silent) {
|
|
78
|
-
console.log(`Converting database ${dataset.database} data into files…`)
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
const doslegReorganizedRootDir = path.join(dataDir, dataset.database)
|
|
82
|
-
const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER)
|
|
83
|
-
ensureAndClearDir(doslegReorganizedRootDir)
|
|
84
|
-
ensureAndClearDir(dossiersReorganizedDir)
|
|
85
|
-
|
|
86
|
-
for await (const loi of findAllLois()) {
|
|
87
|
-
if (options.verbose) {
|
|
88
|
-
console.log(`Converting ${loi.signet} file…`)
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
let loiReorganizedDir = path.join(dossiersReorganizedDir, UNDEFINED_SESSION)
|
|
92
|
-
const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups
|
|
93
|
-
if (signetParts) {
|
|
94
|
-
const { session } = signetParts
|
|
95
|
-
const formattedSession = formatToFourDigitSession(session)
|
|
96
|
-
loiReorganizedDir = path.join(dossiersReorganizedDir, formattedSession)
|
|
97
|
-
}
|
|
98
|
-
fs.ensureDirSync(loiReorganizedDir)
|
|
99
|
-
|
|
100
|
-
const loiFileName = `${loi.signet}.json`
|
|
101
|
-
fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, { spaces: 2 })
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
await convertTexteUrls(dataDir)
|
|
105
|
-
await convertRapportUrls(dataDir)
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
if (enabledDatasets & EnabledDatasets.Questions) {
|
|
109
|
-
const dataset = datasets.questions
|
|
110
|
-
|
|
111
|
-
if (!options.silent) {
|
|
112
|
-
console.log(`Converting database ${dataset.database} data into files…`)
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
const questionsReorganizedRootDir = path.join(dataDir, dataset.database)
|
|
116
|
-
ensureAndClearDir(questionsReorganizedRootDir)
|
|
117
|
-
|
|
118
|
-
for await (const question of findAllQuestions()) {
|
|
119
|
-
if (options.verbose) {
|
|
120
|
-
console.log(`Converting ${question.reference} file…`)
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
const legislature = question.legislature ? question.legislature : 0
|
|
124
|
-
const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature))
|
|
125
|
-
fs.ensureDirSync(questionReorganizedDir)
|
|
126
|
-
const questionFileName = `${question.reference}.json`
|
|
127
|
-
fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 })
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
if (enabledDatasets & EnabledDatasets.Sens) {
|
|
132
|
-
const dataset = datasets.sens
|
|
133
|
-
|
|
134
|
-
if (!options.silent) {
|
|
135
|
-
console.log(`Converting database ${dataset.database} data into files…`)
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
const sensReorganizedRootDir = path.join(dataDir, dataset.database)
|
|
139
|
-
const senateursReorganizedDir = path.join(sensReorganizedRootDir, SENS_SENATEURS_FOLDER)
|
|
140
|
-
const circonscriptionsReorganizedDir = path.join(sensReorganizedRootDir, SENS_CIRCONSCRIPTIONS_FOLDER)
|
|
141
|
-
const organismesReorganizedDir = path.join(sensReorganizedRootDir, SENS_ORGANISMES_FOLDER)
|
|
142
|
-
ensureAndClearDir(sensReorganizedRootDir)
|
|
143
|
-
ensureAndClearDir(senateursReorganizedDir)
|
|
144
|
-
ensureAndClearDir(circonscriptionsReorganizedDir)
|
|
145
|
-
ensureAndClearDir(organismesReorganizedDir)
|
|
146
|
-
|
|
147
|
-
for await (const sen of findAllSens()) {
|
|
148
|
-
if (options.verbose) {
|
|
149
|
-
console.log(`Converting ${sen.matricule} file…`)
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
const senFileName = `${sen.matricule}.json`
|
|
153
|
-
fs.writeJSONSync(path.join(senateursReorganizedDir, senFileName), sen, { spaces: 2 })
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
for await (const circonscription of findAllCirconscriptions()) {
|
|
157
|
-
if (options.verbose) {
|
|
158
|
-
console.log(`Converting ${circonscription.identifiant} file…`)
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
const circonscriptionFileName = `${circonscription.identifiant}.json`
|
|
162
|
-
fs.writeJSONSync(
|
|
163
|
-
path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 }
|
|
164
|
-
)
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
for await (const organisme of findAllOrganismes()) {
|
|
168
|
-
if (options.verbose) {
|
|
169
|
-
console.log(`Converting ${organisme.code} file…`)
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
const organismeFileName = `${organisme.code}.json`
|
|
173
|
-
fs.writeJSONSync(path.join(organismesReorganizedDir, organismeFileName), organisme, { spaces: 2 })
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
if (!options.silent) {
|
|
178
|
-
console.timeEnd("data transformation time")
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
async function convertTexteUrls (dataDir: string) {
|
|
183
|
-
const textesDir = path.join(dataDir, TEXTE_FOLDER)
|
|
184
|
-
fs.ensureDirSync(textesDir)
|
|
185
|
-
|
|
186
|
-
const originalTextesDir = path.join(textesDir, TEXTE_ORIGINAL_FOLDER)
|
|
187
|
-
|
|
188
|
-
for await (const texte of findSenatTexteUrls(options.sessions)) {
|
|
189
|
-
const texteName = path.parse(texte.url).name
|
|
190
|
-
const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName)
|
|
191
|
-
fs.ensureDirSync(texteDir)
|
|
192
|
-
|
|
193
|
-
const metadata: TexteMetadata = {
|
|
194
|
-
name: texteName,
|
|
195
|
-
session: texte.session,
|
|
196
|
-
url_expose_des_motifs: texte.hasExposeDesMotifs ?
|
|
197
|
-
new URL(`${texteName}-expose.html`, SENAT_EXPOSE_DES_MOTIFS_BASE_URL) : undefined,
|
|
198
|
-
url_xml: new URL(`${texteName}.akn.xml`, SENAT_TEXTE_XML_BASE_URL),
|
|
199
|
-
url_html: new URL(`${texteName}.html`, SENAT_TEXTE_BASE_URL),
|
|
200
|
-
url_pdf: new URL(`${texteName}.pdf`, SENAT_TEXTE_BASE_URL),
|
|
201
|
-
}
|
|
202
|
-
fs.writeJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, { spaces: 2 })
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
async function convertRapportUrls (dataDir: string) {
|
|
207
|
-
const rapportsDir = path.join(dataDir, RAPPORT_FOLDER)
|
|
208
|
-
fs.ensureDirSync(rapportsDir)
|
|
209
|
-
|
|
210
|
-
for await (const rapport of findSenatRapportUrls(options.sessions)) {
|
|
211
|
-
const parsedRapportUrl = path.parse(rapport.url)
|
|
212
|
-
const rapportName = parsedRapportUrl.name
|
|
213
|
-
const rapportDir = path.join(rapportsDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName)
|
|
214
|
-
fs.ensureDirSync(rapportDir)
|
|
215
|
-
|
|
216
|
-
const rapportHtmlUrlBase = `${rapportName}_mono.html`
|
|
217
|
-
const rapportHtmlUrl = path.format({
|
|
218
|
-
dir: parsedRapportUrl.dir,
|
|
219
|
-
base: rapportHtmlUrlBase,
|
|
220
|
-
})
|
|
221
|
-
|
|
222
|
-
const rapportPdfUrlBase = `${rapportName}1.pdf`
|
|
223
|
-
const rapportPdfUrl = path.format({
|
|
224
|
-
dir: parsedRapportUrl.dir,
|
|
225
|
-
base: rapportPdfUrlBase,
|
|
226
|
-
})
|
|
227
|
-
|
|
228
|
-
const metadata: RapportMetadata = {
|
|
229
|
-
name: rapportName,
|
|
230
|
-
session: rapport.session,
|
|
231
|
-
url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL),
|
|
232
|
-
url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL),
|
|
233
|
-
}
|
|
234
|
-
fs.writeJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, { spaces: 2 })
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
convertData()
|
|
239
|
-
.then(() => process.exit(0))
|
|
240
|
-
.catch((error) => {
|
|
241
|
-
console.log(error)
|
|
242
|
-
process.exit(1)
|
|
243
|
-
})
|
package/lib/scripts/datautil.mjs
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import { DateTime, Settings } from "luxon";
|
|
2
|
-
Settings.twoDigitCutoffYear = 50;
|
|
3
|
-
export const SIGNET_STRUCTURE_REGEXP = /^(?<type>[a-z]+)(?<session>\d{2,4})-?(?<numTexte>\d+)?/;
|
|
4
|
-
export const AKN_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{4}-\d{4})\/?(?<numTexte>\d+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/;
|
|
5
|
-
export const AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{2,4})\/?(?<numTexte>[a-zA-Z0-9]+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/;
|
|
6
|
-
export const UNDEFINED_SESSION = "0";
|
|
7
|
-
export function formatToFourDigitSession(session) {
|
|
8
|
-
if (session.length >= 2) {
|
|
9
|
-
const sessionFirstTwoDigits = session.substring(0, 2);
|
|
10
|
-
const sessionLastTwoDigits = session.substring(session.length - 2);
|
|
11
|
-
const twoDigitSession = parseInt(sessionFirstTwoDigits) === parseInt(sessionLastTwoDigits) - 1 ?
|
|
12
|
-
sessionFirstTwoDigits : sessionLastTwoDigits;
|
|
13
|
-
return DateTime.fromFormat(String(twoDigitSession), "yy").toFormat("yyyy");
|
|
14
|
-
}
|
|
15
|
-
return UNDEFINED_SESSION;
|
|
16
|
-
}
|
package/lib/scripts/datautil.ts
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import { DateTime, Settings } from "luxon"
|
|
2
|
-
|
|
3
|
-
Settings.twoDigitCutoffYear = 50
|
|
4
|
-
|
|
5
|
-
export const SIGNET_STRUCTURE_REGEXP = /^(?<type>[a-z]+)(?<session>\d{2,4})-?(?<numTexte>\d+)?/
|
|
6
|
-
export const AKN_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{4}-\d{4})\/?(?<numTexte>\d+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/
|
|
7
|
-
export const AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{2,4})\/?(?<numTexte>[a-zA-Z0-9]+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/
|
|
8
|
-
export const UNDEFINED_SESSION = "0"
|
|
9
|
-
|
|
10
|
-
export function formatToFourDigitSession (session: string) {
|
|
11
|
-
if (session.length >= 2) {
|
|
12
|
-
const sessionFirstTwoDigits = session.substring(0, 2)
|
|
13
|
-
const sessionLastTwoDigits = session.substring(session.length - 2)
|
|
14
|
-
const twoDigitSession = parseInt(sessionFirstTwoDigits) === parseInt(sessionLastTwoDigits) - 1 ?
|
|
15
|
-
sessionFirstTwoDigits : sessionLastTwoDigits
|
|
16
|
-
return DateTime.fromFormat(String(twoDigitSession), "yy").toFormat("yyyy")
|
|
17
|
-
}
|
|
18
|
-
return UNDEFINED_SESSION
|
|
19
|
-
}
|
|
Binary file
|
|
Binary file
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
import assert from "assert";
|
|
2
|
-
import commandLineArgs from "command-line-args";
|
|
3
|
-
import fs from "fs-extra";
|
|
4
|
-
import path from "path";
|
|
5
|
-
import { iterFilePaths, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER, TEXTE_TRANSFORMED_FOLDER } from "../loaders";
|
|
6
|
-
import { parseExposeDesMotifsFromFile, parseTexteFromFile } from "../model/texte";
|
|
7
|
-
import { commonOptions } from "./shared/cli_helpers";
|
|
8
|
-
import { ensureAndClearDir } from "./shared/util";
|
|
9
|
-
const optionsDefinitions = [
|
|
10
|
-
...commonOptions,
|
|
11
|
-
];
|
|
12
|
-
const options = commandLineArgs(optionsDefinitions);
|
|
13
|
-
async function main() {
|
|
14
|
-
const dataDir = options.dataDir;
|
|
15
|
-
assert(dataDir, "Missing argument: data directory");
|
|
16
|
-
const transformedTextesDir = path.join(options.dataDir, TEXTE_FOLDER, TEXTE_TRANSFORMED_FOLDER);
|
|
17
|
-
ensureAndClearDir(transformedTextesDir);
|
|
18
|
-
for (const filePath of iterFilePaths(path.join(dataDir, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER))) {
|
|
19
|
-
const parsedFilePath = path.parse(filePath);
|
|
20
|
-
if (parsedFilePath.ext !== ".xml") {
|
|
21
|
-
continue;
|
|
22
|
-
}
|
|
23
|
-
const texteDirFromOriginal = parsedFilePath.dir.substring(filePath.indexOf(TEXTE_ORIGINAL_FOLDER) + TEXTE_ORIGINAL_FOLDER.length);
|
|
24
|
-
const transformedTexteDir = path.join(transformedTextesDir, texteDirFromOriginal);
|
|
25
|
-
fs.ensureDirSync(transformedTexteDir);
|
|
26
|
-
if (!options.silent) {
|
|
27
|
-
console.log(`Parsing texte ${parsedFilePath.name}.xml…`);
|
|
28
|
-
}
|
|
29
|
-
const parsedTexte = await parseTexteFromFile(filePath);
|
|
30
|
-
const exposeDesMotifsFileName = `${parsedFilePath.name}-expose`;
|
|
31
|
-
const exposeDesMotifsFilePath = path.join(parsedFilePath.dir, `${exposeDesMotifsFileName}.html`);
|
|
32
|
-
if (parsedTexte && fs.existsSync(exposeDesMotifsFilePath)) {
|
|
33
|
-
if (!options.silent) {
|
|
34
|
-
console.log(`Parsing exposé des motifs ${exposeDesMotifsFileName}.html…`);
|
|
35
|
-
}
|
|
36
|
-
parsedTexte.exposeDesMotifs = await parseExposeDesMotifsFromFile(exposeDesMotifsFilePath);
|
|
37
|
-
}
|
|
38
|
-
fs.writeJSONSync(path.join(transformedTexteDir, `${parsedFilePath.name}.json`), parsedTexte, { spaces: 2 });
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
main()
|
|
42
|
-
.then(() => process.exit(0))
|
|
43
|
-
.catch((error) => {
|
|
44
|
-
console.log(error);
|
|
45
|
-
process.exit(1);
|
|
46
|
-
});
|
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
import assert from "assert"
|
|
2
|
-
import commandLineArgs from "command-line-args"
|
|
3
|
-
import fs from "fs-extra"
|
|
4
|
-
import path from "path"
|
|
5
|
-
|
|
6
|
-
import { iterFilePaths, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER, TEXTE_TRANSFORMED_FOLDER } from "../loaders"
|
|
7
|
-
import { parseExposeDesMotifsFromFile, parseTexteFromFile } from "../model/texte"
|
|
8
|
-
import { commonOptions } from "./shared/cli_helpers"
|
|
9
|
-
import { ensureAndClearDir } from "./shared/util"
|
|
10
|
-
|
|
11
|
-
const optionsDefinitions = [
|
|
12
|
-
...commonOptions,
|
|
13
|
-
]
|
|
14
|
-
const options = commandLineArgs(optionsDefinitions)
|
|
15
|
-
|
|
16
|
-
async function main() {
|
|
17
|
-
const dataDir = options.dataDir
|
|
18
|
-
assert(dataDir, "Missing argument: data directory")
|
|
19
|
-
|
|
20
|
-
const transformedTextesDir = path.join(options.dataDir, TEXTE_FOLDER, TEXTE_TRANSFORMED_FOLDER)
|
|
21
|
-
ensureAndClearDir(transformedTextesDir)
|
|
22
|
-
|
|
23
|
-
for (const filePath of iterFilePaths(path.join(dataDir, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER))) {
|
|
24
|
-
const parsedFilePath = path.parse(filePath)
|
|
25
|
-
|
|
26
|
-
if (parsedFilePath.ext !== ".xml") {
|
|
27
|
-
continue
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
const texteDirFromOriginal = parsedFilePath.dir.substring(
|
|
31
|
-
filePath.indexOf(TEXTE_ORIGINAL_FOLDER) + TEXTE_ORIGINAL_FOLDER.length
|
|
32
|
-
)
|
|
33
|
-
const transformedTexteDir = path.join(transformedTextesDir, texteDirFromOriginal)
|
|
34
|
-
fs.ensureDirSync(transformedTexteDir)
|
|
35
|
-
|
|
36
|
-
if (!options.silent) {
|
|
37
|
-
console.log(`Parsing texte ${parsedFilePath.name}.xml…`)
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
const parsedTexte = await parseTexteFromFile(filePath)
|
|
41
|
-
|
|
42
|
-
const exposeDesMotifsFileName = `${parsedFilePath.name}-expose`
|
|
43
|
-
const exposeDesMotifsFilePath = path.join(parsedFilePath.dir, `${exposeDesMotifsFileName}.html`)
|
|
44
|
-
if (parsedTexte && fs.existsSync(exposeDesMotifsFilePath)) {
|
|
45
|
-
if (!options.silent) {
|
|
46
|
-
console.log(`Parsing exposé des motifs ${exposeDesMotifsFileName}.html…`)
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
parsedTexte.exposeDesMotifs = await parseExposeDesMotifsFromFile(exposeDesMotifsFilePath)
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
fs.writeJSONSync(
|
|
53
|
-
path.join(transformedTexteDir, `${parsedFilePath.name}.json`),
|
|
54
|
-
parsedTexte,
|
|
55
|
-
{ spaces: 2 }
|
|
56
|
-
)
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
main()
|
|
61
|
-
.then(() => process.exit(0))
|
|
62
|
-
.catch((error) => {
|
|
63
|
-
console.log(error)
|
|
64
|
-
process.exit(1)
|
|
65
|
-
})
|