@tricoteuses/senat 2.8.1 → 2.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/aggregates.d.ts +52 -0
- package/lib/aggregates.js +949 -0
- package/lib/aggregates.mjs +726 -0
- package/lib/aggregates.ts +852 -0
- package/lib/config.mjs +16 -0
- package/lib/config.ts +26 -0
- package/lib/databases.mjs +55 -0
- package/lib/databases.ts +68 -0
- package/lib/datasets.mjs +78 -0
- package/lib/datasets.ts +118 -0
- package/lib/fields.d.ts +10 -0
- package/lib/fields.js +68 -0
- package/lib/fields.mjs +22 -0
- package/lib/fields.ts +29 -0
- package/lib/index.mjs +7 -0
- package/lib/index.ts +64 -0
- package/lib/inserters.d.ts +98 -0
- package/lib/inserters.js +500 -0
- package/lib/inserters.mjs +360 -0
- package/lib/inserters.ts +521 -0
- package/lib/loaders.mjs +97 -0
- package/lib/loaders.ts +173 -0
- package/lib/model/ameli.mjs +57 -0
- package/lib/model/ameli.ts +86 -0
- package/lib/model/debats.mjs +43 -0
- package/lib/model/debats.ts +68 -0
- package/lib/model/dosleg.mjs +163 -0
- package/lib/model/dosleg.ts +204 -0
- package/lib/model/index.mjs +4 -0
- package/lib/model/index.ts +13 -0
- package/lib/model/questions.d.ts +0 -20
- package/lib/model/questions.js +1 -32
- package/lib/model/questions.mjs +76 -0
- package/lib/model/questions.ts +102 -0
- package/lib/model/sens.mjs +339 -0
- package/lib/model/sens.ts +432 -0
- package/lib/model/texte.mjs +156 -0
- package/lib/model/texte.ts +174 -0
- package/lib/raw_types/ameli.d.ts +20 -0
- package/lib/raw_types/questions.d.ts +4 -70
- package/lib/raw_types_kysely/ameli.d.ts +915 -0
- package/lib/raw_types_kysely/ameli.js +7 -0
- package/lib/raw_types_kysely/ameli.mjs +5 -0
- package/lib/raw_types_kysely/ameli.ts +951 -0
- package/lib/raw_types_kysely/debats.d.ts +207 -0
- package/lib/raw_types_kysely/debats.js +7 -0
- package/lib/raw_types_kysely/debats.mjs +5 -0
- package/lib/raw_types_kysely/debats.ts +222 -0
- package/lib/raw_types_kysely/dosleg.d.ts +3532 -0
- package/lib/raw_types_kysely/dosleg.js +7 -0
- package/lib/raw_types_kysely/dosleg.mjs +5 -0
- package/lib/raw_types_kysely/dosleg.ts +3621 -0
- package/lib/raw_types_kysely/questions.d.ts +414 -0
- package/lib/raw_types_kysely/questions.js +7 -0
- package/lib/raw_types_kysely/questions.mjs +5 -0
- package/lib/raw_types_kysely/questions.ts +426 -0
- package/lib/raw_types_kysely/sens.d.ts +4394 -0
- package/lib/raw_types_kysely/sens.js +7 -0
- package/lib/raw_types_kysely/sens.mjs +5 -0
- package/lib/raw_types_kysely/sens.ts +4499 -0
- package/lib/raw_types_schemats/ameli.mjs +2 -0
- package/lib/raw_types_schemats/ameli.ts +601 -0
- package/lib/raw_types_schemats/debats.mjs +2 -0
- package/lib/raw_types_schemats/debats.ts +145 -0
- package/lib/raw_types_schemats/dosleg.mjs +2 -0
- package/lib/raw_types_schemats/dosleg.ts +2193 -0
- package/lib/raw_types_schemats/questions.mjs +2 -0
- package/lib/raw_types_schemats/questions.ts +249 -0
- package/lib/raw_types_schemats/sens.mjs +2 -0
- package/lib/raw_types_schemats/sens.ts +2907 -0
- package/lib/scripts/convert_data.mjs +95 -0
- package/lib/scripts/convert_data.ts +119 -0
- package/lib/scripts/data-download.d.ts +1 -0
- package/lib/scripts/data-download.js +9 -0
- package/lib/scripts/datautil.mjs +16 -0
- package/lib/scripts/datautil.ts +19 -0
- package/lib/scripts/images/transparent_150x192.jpg +0 -0
- package/lib/scripts/images/transparent_155x225.jpg +0 -0
- package/lib/scripts/parse_textes.mjs +38 -0
- package/lib/scripts/parse_textes.ts +52 -0
- package/lib/scripts/retrieve_documents.mjs +243 -0
- package/lib/scripts/retrieve_documents.ts +279 -0
- package/lib/scripts/retrieve_open_data.js +11 -9
- package/lib/scripts/retrieve_open_data.mjs +214 -0
- package/lib/scripts/retrieve_open_data.ts +261 -0
- package/lib/scripts/retrieve_senateurs_photos.mjs +147 -0
- package/lib/scripts/retrieve_senateurs_photos.ts +177 -0
- package/lib/scripts/retrieve_textes.d.ts +1 -0
- package/lib/scripts/retrieve_textes.mjs +165 -0
- package/lib/scripts/retrieve_textes.ts +79 -0
- package/lib/scripts/shared/cli_helpers.ts +36 -0
- package/lib/scripts/shared/util.ts +33 -0
- package/lib/strings.mjs +18 -0
- package/lib/strings.ts +26 -0
- package/lib/types/ameli.mjs +13 -0
- package/lib/types/ameli.ts +21 -0
- package/lib/types/debats.mjs +2 -0
- package/lib/types/debats.ts +6 -0
- package/lib/types/dosleg.mjs +151 -0
- package/lib/types/dosleg.ts +284 -0
- package/lib/types/questions.mjs +1 -0
- package/lib/types/questions.ts +3 -0
- package/lib/types/sens.mjs +1 -0
- package/lib/types/sens.ts +12 -0
- package/lib/types/sessions.mjs +43 -0
- package/lib/types/sessions.ts +42 -0
- package/lib/types/texte.mjs +16 -0
- package/lib/types/texte.ts +66 -0
- package/lib/typings/windows-1252.d.js +2 -0
- package/lib/typings/windows-1252.d.mjs +2 -0
- package/lib/typings/windows-1252.d.ts +11 -0
- package/lib/validators/config.mjs +54 -0
- package/lib/validators/config.ts +79 -0
- package/lib/validators/senat.mjs +24 -0
- package/lib/validators/senat.ts +26 -0
- package/package.json +6 -4
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import assert from "assert";
|
|
2
|
+
import commandLineArgs from "command-line-args";
|
|
3
|
+
import fs from "fs-extra";
|
|
4
|
+
import path from "path";
|
|
5
|
+
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
|
|
6
|
+
import { findAllCirconscriptions, findAllLois, findAllQuestions, findAllSens } from "../model";
|
|
7
|
+
import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, UNDEFINED_SESSION } from "./datautil";
|
|
8
|
+
import { commonOptions } from "./shared/cli_helpers";
|
|
9
|
+
import { ensureAndClearDir } from "./shared/util";
|
|
10
|
+
const optionsDefinitions = [
|
|
11
|
+
...commonOptions,
|
|
12
|
+
];
|
|
13
|
+
const options = commandLineArgs(optionsDefinitions);
|
|
14
|
+
async function convertData() {
|
|
15
|
+
const enabledDatasets = getEnabledDatasets(options.categories);
|
|
16
|
+
const dataDir = options.dataDir;
|
|
17
|
+
assert(dataDir, "Missing argument: data directory");
|
|
18
|
+
console.time("conversion time");
|
|
19
|
+
if (enabledDatasets & EnabledDatasets.DosLeg) {
|
|
20
|
+
const dataset = datasets.dosleg;
|
|
21
|
+
if (!options.silent) {
|
|
22
|
+
console.log(`Converting database ${dataset.database} data into files…`);
|
|
23
|
+
}
|
|
24
|
+
const loiReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
25
|
+
ensureAndClearDir(loiReorganizedRootDir);
|
|
26
|
+
for await (const loi of findAllLois()) {
|
|
27
|
+
if (options.verbose) {
|
|
28
|
+
console.log(`Converting ${loi.signet} file…`);
|
|
29
|
+
}
|
|
30
|
+
let loiReorganizedDir = path.join(loiReorganizedRootDir, UNDEFINED_SESSION);
|
|
31
|
+
const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups;
|
|
32
|
+
if (signetParts) {
|
|
33
|
+
const { session } = signetParts;
|
|
34
|
+
const formattedSession = formatToFourDigitSession(session);
|
|
35
|
+
loiReorganizedDir = path.join(loiReorganizedRootDir, formattedSession);
|
|
36
|
+
}
|
|
37
|
+
fs.ensureDirSync(loiReorganizedDir);
|
|
38
|
+
const loiFileName = `${loi.signet}.json`;
|
|
39
|
+
fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, { spaces: 2 });
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
if (enabledDatasets & EnabledDatasets.Questions) {
|
|
43
|
+
const dataset = datasets.questions;
|
|
44
|
+
if (!options.silent) {
|
|
45
|
+
console.log(`Converting database ${dataset.database} data into files…`);
|
|
46
|
+
}
|
|
47
|
+
const questionReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
48
|
+
ensureAndClearDir(questionReorganizedRootDir);
|
|
49
|
+
for await (const question of findAllQuestions()) {
|
|
50
|
+
if (options.verbose) {
|
|
51
|
+
console.log(`Converting ${question.reference} file…`);
|
|
52
|
+
}
|
|
53
|
+
const legislature = question.legislature ? question.legislature : 0;
|
|
54
|
+
const questionReorganizedDir = path.join(questionReorganizedRootDir, String(legislature));
|
|
55
|
+
fs.ensureDirSync(questionReorganizedDir);
|
|
56
|
+
const questionFileName = `${question.reference}.json`;
|
|
57
|
+
fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (enabledDatasets & EnabledDatasets.Sens) {
|
|
61
|
+
const dataset = datasets.sens;
|
|
62
|
+
if (!options.silent) {
|
|
63
|
+
console.log(`Converting database ${dataset.database} data into files…`);
|
|
64
|
+
}
|
|
65
|
+
const senReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
66
|
+
const senReorganizedDir = path.join(senReorganizedRootDir, "sens");
|
|
67
|
+
const circonscriptionsReorganizedDir = path.join(senReorganizedRootDir, "circonscriptions");
|
|
68
|
+
ensureAndClearDir(senReorganizedRootDir);
|
|
69
|
+
ensureAndClearDir(senReorganizedDir);
|
|
70
|
+
ensureAndClearDir(circonscriptionsReorganizedDir);
|
|
71
|
+
for await (const sen of findAllSens()) {
|
|
72
|
+
if (options.verbose) {
|
|
73
|
+
console.log(`Converting ${sen.matricule} file…`);
|
|
74
|
+
}
|
|
75
|
+
const senFileName = `${sen.matricule}.json`;
|
|
76
|
+
fs.writeJSONSync(path.join(senReorganizedDir, senFileName), sen, { spaces: 2 });
|
|
77
|
+
}
|
|
78
|
+
for await (const circonscription of findAllCirconscriptions()) {
|
|
79
|
+
if (options.verbose) {
|
|
80
|
+
console.log(`Converting ${circonscription.identifiant} file…`);
|
|
81
|
+
}
|
|
82
|
+
const circonscriptionFileName = `${circonscription.identifiant}.json`;
|
|
83
|
+
fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 });
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
if (!options.silent) {
|
|
87
|
+
console.timeEnd("conversion time");
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
convertData()
|
|
91
|
+
.then(() => process.exit(0))
|
|
92
|
+
.catch((error) => {
|
|
93
|
+
console.log(error);
|
|
94
|
+
process.exit(1);
|
|
95
|
+
});
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import assert from "assert"
|
|
2
|
+
import commandLineArgs from "command-line-args"
|
|
3
|
+
import fs from "fs-extra"
|
|
4
|
+
import path from "path"
|
|
5
|
+
|
|
6
|
+
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets"
|
|
7
|
+
import { findAllCirconscriptions, findAllLois, findAllQuestions, findAllSens } from "../model"
|
|
8
|
+
import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, UNDEFINED_SESSION } from "./datautil"
|
|
9
|
+
import { commonOptions } from "./shared/cli_helpers"
|
|
10
|
+
import { ensureAndClearDir } from "./shared/util"
|
|
11
|
+
|
|
12
|
+
const optionsDefinitions = [
|
|
13
|
+
...commonOptions,
|
|
14
|
+
]
|
|
15
|
+
const options = commandLineArgs(optionsDefinitions)
|
|
16
|
+
|
|
17
|
+
async function convertData() {
|
|
18
|
+
const enabledDatasets = getEnabledDatasets(options.categories)
|
|
19
|
+
const dataDir: string = options.dataDir
|
|
20
|
+
assert(dataDir, "Missing argument: data directory")
|
|
21
|
+
|
|
22
|
+
console.time("conversion time")
|
|
23
|
+
|
|
24
|
+
if (enabledDatasets & EnabledDatasets.DosLeg) {
|
|
25
|
+
const dataset = datasets.dosleg
|
|
26
|
+
|
|
27
|
+
if (!options.silent) {
|
|
28
|
+
console.log(`Converting database ${dataset.database} data into files…`)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const loiReorganizedRootDir = path.join(dataDir, dataset.database)
|
|
32
|
+
ensureAndClearDir(loiReorganizedRootDir)
|
|
33
|
+
|
|
34
|
+
for await (const loi of findAllLois()) {
|
|
35
|
+
if (options.verbose) {
|
|
36
|
+
console.log(`Converting ${loi.signet} file…`)
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
let loiReorganizedDir = path.join(loiReorganizedRootDir, UNDEFINED_SESSION)
|
|
40
|
+
const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups
|
|
41
|
+
if (signetParts) {
|
|
42
|
+
const { session } = signetParts
|
|
43
|
+
const formattedSession = formatToFourDigitSession(session)
|
|
44
|
+
loiReorganizedDir = path.join(loiReorganizedRootDir, formattedSession)
|
|
45
|
+
}
|
|
46
|
+
fs.ensureDirSync(loiReorganizedDir)
|
|
47
|
+
|
|
48
|
+
const loiFileName = `${loi.signet}.json`
|
|
49
|
+
fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, {spaces: 2})
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (enabledDatasets & EnabledDatasets.Questions) {
|
|
54
|
+
const dataset = datasets.questions
|
|
55
|
+
|
|
56
|
+
if (!options.silent) {
|
|
57
|
+
console.log(`Converting database ${dataset.database} data into files…`)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const questionReorganizedRootDir = path.join(dataDir, dataset.database)
|
|
61
|
+
ensureAndClearDir(questionReorganizedRootDir)
|
|
62
|
+
|
|
63
|
+
for await (const question of findAllQuestions()) {
|
|
64
|
+
if (options.verbose) {
|
|
65
|
+
console.log(`Converting ${question.reference} file…`)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const legislature = question.legislature ? question.legislature : 0
|
|
69
|
+
const questionReorganizedDir = path.join(questionReorganizedRootDir, String(legislature))
|
|
70
|
+
fs.ensureDirSync(questionReorganizedDir)
|
|
71
|
+
const questionFileName = `${question.reference}.json`
|
|
72
|
+
fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, {spaces: 2})
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (enabledDatasets & EnabledDatasets.Sens) {
|
|
77
|
+
const dataset = datasets.sens
|
|
78
|
+
|
|
79
|
+
if (!options.silent) {
|
|
80
|
+
console.log(`Converting database ${dataset.database} data into files…`)
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const senReorganizedRootDir = path.join(dataDir, dataset.database)
|
|
84
|
+
const senReorganizedDir = path.join(senReorganizedRootDir, "sens")
|
|
85
|
+
const circonscriptionsReorganizedDir = path.join(senReorganizedRootDir, "circonscriptions")
|
|
86
|
+
ensureAndClearDir(senReorganizedRootDir)
|
|
87
|
+
ensureAndClearDir(senReorganizedDir)
|
|
88
|
+
ensureAndClearDir(circonscriptionsReorganizedDir)
|
|
89
|
+
|
|
90
|
+
for await (const sen of findAllSens()) {
|
|
91
|
+
if (options.verbose) {
|
|
92
|
+
console.log(`Converting ${sen.matricule} file…`)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const senFileName = `${sen.matricule}.json`
|
|
96
|
+
fs.writeJSONSync(path.join(senReorganizedDir, senFileName), sen, {spaces: 2})
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
for await (const circonscription of findAllCirconscriptions()) {
|
|
100
|
+
if (options.verbose) {
|
|
101
|
+
console.log(`Converting ${circonscription.identifiant} file…`)
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const circonscriptionFileName = `${circonscription.identifiant}.json`
|
|
105
|
+
fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, {spaces: 2})
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (!options.silent) {
|
|
110
|
+
console.timeEnd("conversion time")
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
convertData()
|
|
115
|
+
.then(() => process.exit(0))
|
|
116
|
+
.catch((error) => {
|
|
117
|
+
console.log(error)
|
|
118
|
+
process.exit(1)
|
|
119
|
+
})
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { execSync } from "child_process";
|
|
2
|
+
const args = process.argv.slice(2).join(" ");
|
|
3
|
+
try {
|
|
4
|
+
execSync(`tsx src/scripts/retrieve_open_data.ts --all ${args}`, { stdio: "inherit" });
|
|
5
|
+
execSync(`tsx src/scripts/convert_data.ts ${args}`, { stdio: "inherit" });
|
|
6
|
+
}
|
|
7
|
+
catch (error) {
|
|
8
|
+
process.exit(1);
|
|
9
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { DateTime, Settings } from "luxon";
|
|
2
|
+
Settings.twoDigitCutoffYear = 50;
|
|
3
|
+
export const SIGNET_STRUCTURE_REGEXP = /^(?<type>[a-z]+)(?<session>\d{2,4})-?(?<numTexte>\d+)?/;
|
|
4
|
+
export const AKN_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{4}-\d{4})\/?(?<numTexte>\d+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/;
|
|
5
|
+
export const AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{2,4})\/?(?<numTexte>[a-zA-Z0-9]+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/;
|
|
6
|
+
export const UNDEFINED_SESSION = "0";
|
|
7
|
+
export function formatToFourDigitSession(session) {
|
|
8
|
+
if (session.length >= 2) {
|
|
9
|
+
const sessionFirstTwoDigits = session.substring(0, 2);
|
|
10
|
+
const sessionLastTwoDigits = session.substring(session.length - 2);
|
|
11
|
+
const twoDigitSession = parseInt(sessionFirstTwoDigits) === parseInt(sessionLastTwoDigits) - 1 ?
|
|
12
|
+
sessionFirstTwoDigits : sessionLastTwoDigits;
|
|
13
|
+
return DateTime.fromFormat(String(twoDigitSession), "yy").toFormat("yyyy");
|
|
14
|
+
}
|
|
15
|
+
return UNDEFINED_SESSION;
|
|
16
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { DateTime, Settings } from "luxon"
|
|
2
|
+
|
|
3
|
+
Settings.twoDigitCutoffYear = 50
|
|
4
|
+
|
|
5
|
+
export const SIGNET_STRUCTURE_REGEXP = /^(?<type>[a-z]+)(?<session>\d{2,4})-?(?<numTexte>\d+)?/
|
|
6
|
+
export const AKN_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{4}-\d{4})\/?(?<numTexte>\d+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/
|
|
7
|
+
export const AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{2,4})\/?(?<numTexte>[a-zA-Z0-9]+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/
|
|
8
|
+
export const UNDEFINED_SESSION = "0"
|
|
9
|
+
|
|
10
|
+
export function formatToFourDigitSession (session: string) {
|
|
11
|
+
if (session.length >= 2) {
|
|
12
|
+
const sessionFirstTwoDigits = session.substring(0, 2)
|
|
13
|
+
const sessionLastTwoDigits = session.substring(session.length - 2)
|
|
14
|
+
const twoDigitSession = parseInt(sessionFirstTwoDigits) === parseInt(sessionLastTwoDigits) - 1 ?
|
|
15
|
+
sessionFirstTwoDigits : sessionLastTwoDigits
|
|
16
|
+
return DateTime.fromFormat(String(twoDigitSession), "yy").toFormat("yyyy")
|
|
17
|
+
}
|
|
18
|
+
return UNDEFINED_SESSION
|
|
19
|
+
}
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import assert from "assert";
|
|
2
|
+
import commandLineArgs from "command-line-args";
|
|
3
|
+
import fs from "fs-extra";
|
|
4
|
+
import path from "path";
|
|
5
|
+
import { iterFilePaths } from "../loaders";
|
|
6
|
+
import { parseTexteFromFile } from "../model/texte";
|
|
7
|
+
import { commonOptions } from "./shared/cli_helpers";
|
|
8
|
+
import { ensureAndClearDir } from "./shared/util";
|
|
9
|
+
const optionsDefinitions = [
|
|
10
|
+
...commonOptions,
|
|
11
|
+
];
|
|
12
|
+
const options = commandLineArgs(optionsDefinitions);
|
|
13
|
+
async function main() {
|
|
14
|
+
const dataDir = options.dataDir;
|
|
15
|
+
assert(dataDir, "Missing argument: data directory");
|
|
16
|
+
const transformedTextesDir = path.join(options.dataDir, "leg", "transformed");
|
|
17
|
+
ensureAndClearDir(transformedTextesDir);
|
|
18
|
+
for (const filePath of iterFilePaths(path.join(dataDir, "leg", "original"))) {
|
|
19
|
+
const parsedFilePath = path.parse(filePath);
|
|
20
|
+
if (parsedFilePath.ext !== ".xml") {
|
|
21
|
+
continue;
|
|
22
|
+
}
|
|
23
|
+
const texteDirFromOriginal = parsedFilePath.dir.substring(filePath.indexOf("original") + "original".length);
|
|
24
|
+
const transformedTexteDir = path.join(transformedTextesDir, texteDirFromOriginal);
|
|
25
|
+
fs.ensureDirSync(transformedTexteDir);
|
|
26
|
+
if (!options.silent) {
|
|
27
|
+
console.log(`Parsing texte ${parsedFilePath.name}.xml…`);
|
|
28
|
+
}
|
|
29
|
+
const parsedTexte = await parseTexteFromFile(filePath);
|
|
30
|
+
fs.writeJSONSync(path.join(transformedTexteDir, `${parsedFilePath.name}.json`), parsedTexte, { spaces: 2 });
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
main()
|
|
34
|
+
.then(() => process.exit(0))
|
|
35
|
+
.catch((error) => {
|
|
36
|
+
console.log(error);
|
|
37
|
+
process.exit(1);
|
|
38
|
+
});
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import assert from "assert"
|
|
2
|
+
import commandLineArgs from "command-line-args"
|
|
3
|
+
import fs from "fs-extra"
|
|
4
|
+
import path from "path"
|
|
5
|
+
|
|
6
|
+
import { iterFilePaths } from "../loaders"
|
|
7
|
+
import { parseTexteFromFile } from "../model/texte"
|
|
8
|
+
import { commonOptions } from "./shared/cli_helpers"
|
|
9
|
+
import { ensureAndClearDir } from "./shared/util"
|
|
10
|
+
|
|
11
|
+
const optionsDefinitions = [
|
|
12
|
+
...commonOptions,
|
|
13
|
+
]
|
|
14
|
+
const options = commandLineArgs(optionsDefinitions)
|
|
15
|
+
|
|
16
|
+
async function main() {
|
|
17
|
+
const dataDir = options.dataDir
|
|
18
|
+
assert(dataDir, "Missing argument: data directory")
|
|
19
|
+
|
|
20
|
+
const transformedTextesDir = path.join(options.dataDir, "leg", "transformed")
|
|
21
|
+
ensureAndClearDir(transformedTextesDir)
|
|
22
|
+
|
|
23
|
+
for (const filePath of iterFilePaths(path.join(dataDir, "leg", "original"))) {
|
|
24
|
+
const parsedFilePath = path.parse(filePath)
|
|
25
|
+
|
|
26
|
+
if (parsedFilePath.ext !== ".xml") {
|
|
27
|
+
continue
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const texteDirFromOriginal = parsedFilePath.dir.substring(filePath.indexOf("original") + "original".length)
|
|
31
|
+
const transformedTexteDir = path.join(transformedTextesDir, texteDirFromOriginal)
|
|
32
|
+
fs.ensureDirSync(transformedTexteDir)
|
|
33
|
+
|
|
34
|
+
if (!options.silent) {
|
|
35
|
+
console.log(`Parsing texte ${parsedFilePath.name}.xml…`)
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const parsedTexte = await parseTexteFromFile(filePath)
|
|
39
|
+
fs.writeJSONSync(
|
|
40
|
+
path.join(transformedTexteDir, `${parsedFilePath.name}.json`),
|
|
41
|
+
parsedTexte,
|
|
42
|
+
{ spaces: 2 }
|
|
43
|
+
)
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
main()
|
|
48
|
+
.then(() => process.exit(0))
|
|
49
|
+
.catch((error) => {
|
|
50
|
+
console.log(error)
|
|
51
|
+
process.exit(1)
|
|
52
|
+
})
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
import assert from "assert";
|
|
2
|
+
import commandLineArgs from "command-line-args";
|
|
3
|
+
import fs from "fs-extra";
|
|
4
|
+
import path from "path";
|
|
5
|
+
import { findSenatRapportUrls, findSenatTexteUrls } from "../model/dosleg";
|
|
6
|
+
import { parseTexte, parseTexteFromFile } from "../model/texte";
|
|
7
|
+
import { UNDEFINED_SESSION } from "./datautil";
|
|
8
|
+
import { commonOptions } from "./shared/cli_helpers";
|
|
9
|
+
import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue } from "./shared/util";
|
|
10
|
+
const optionsDefinitions = [
|
|
11
|
+
...commonOptions,
|
|
12
|
+
{
|
|
13
|
+
help: "sessions of textes to retrieve; leave empty for all",
|
|
14
|
+
multiple: true,
|
|
15
|
+
name: "sessions",
|
|
16
|
+
type: String,
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
help: "parse and convert documents into JSON (textes only for now, requires format xml)",
|
|
20
|
+
name: "parseDocuments",
|
|
21
|
+
type: Boolean,
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
alias: "F",
|
|
25
|
+
help: "formats of documents to retrieve (xml/html/pdf for textes, html/pdf for rapports); leave empty for all",
|
|
26
|
+
multiple: true,
|
|
27
|
+
name: "formats",
|
|
28
|
+
type: String,
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
help: "types of documents to retrieve (textes/rapports); leave empty for all",
|
|
32
|
+
multiple: true,
|
|
33
|
+
name: "types",
|
|
34
|
+
type: String,
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
help: "force retrieve all documents, even already retrieved ones",
|
|
38
|
+
name: "force",
|
|
39
|
+
type: Boolean,
|
|
40
|
+
},
|
|
41
|
+
];
|
|
42
|
+
const options = commandLineArgs(optionsDefinitions);
|
|
43
|
+
const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/";
|
|
44
|
+
const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
|
|
45
|
+
const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
|
|
46
|
+
const textDecoder = new TextDecoder("utf8");
|
|
47
|
+
async function retrieveDocument(documentUrl) {
|
|
48
|
+
if (!options.silent) {
|
|
49
|
+
console.log(`Retrieving document ${documentUrl}…`);
|
|
50
|
+
}
|
|
51
|
+
try {
|
|
52
|
+
const response = await fetchWithRetry(documentUrl);
|
|
53
|
+
if (!response.ok) {
|
|
54
|
+
if (response.status === 404) {
|
|
55
|
+
console.warn(`Texte ${documentUrl} not found`);
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
console.error(`An error occurred while retrieving texte ${documentUrl}: ${response.status}`);
|
|
59
|
+
}
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
return response.arrayBuffer();
|
|
63
|
+
}
|
|
64
|
+
catch (error) {
|
|
65
|
+
console.error(error.message);
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
async function retrieveTextes(dataDir) {
|
|
70
|
+
const textesDir = path.join(dataDir, "leg");
|
|
71
|
+
fs.ensureDirSync(textesDir);
|
|
72
|
+
const originalTextesDir = path.join(textesDir, "original");
|
|
73
|
+
const transformedTextesDir = path.join(textesDir, "transformed");
|
|
74
|
+
ensureAndClearDir(transformedTextesDir);
|
|
75
|
+
let retrievedTextesCount = 0;
|
|
76
|
+
const texteUrlsNotFoundOrError = [];
|
|
77
|
+
const texteUrlsParseError = [];
|
|
78
|
+
for await (const texte of findSenatTexteUrls(options.sessions)) {
|
|
79
|
+
const texteName = path.parse(texte.url).name;
|
|
80
|
+
const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName);
|
|
81
|
+
fs.ensureDirSync(texteDir);
|
|
82
|
+
if (isOptionEmptyOrHasValue(options.formats, "xml")) {
|
|
83
|
+
const texteXmlUrl = `${texteName}.akn.xml`;
|
|
84
|
+
const texteXmlAbsoluteUrl = new URL(texteXmlUrl, SENAT_TEXTE_XML_BASE_URL).toString();
|
|
85
|
+
const textePath = path.join(texteDir, texteXmlUrl);
|
|
86
|
+
let texteBuffer = null;
|
|
87
|
+
if (!options.force && fs.existsSync(textePath)) {
|
|
88
|
+
if (!options.silent) {
|
|
89
|
+
console.info(`Already retrieved texte ${textePath}…`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
texteBuffer = await retrieveDocument(texteXmlAbsoluteUrl);
|
|
94
|
+
if (!texteBuffer) {
|
|
95
|
+
texteUrlsNotFoundOrError.push(texteXmlAbsoluteUrl);
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
fs.writeFileSync(textePath, Buffer.from(texteBuffer));
|
|
99
|
+
retrievedTextesCount++;
|
|
100
|
+
}
|
|
101
|
+
if (options.parseDocuments) {
|
|
102
|
+
if (!options.silent) {
|
|
103
|
+
console.log(`Parsing texte ${texteXmlUrl}…`);
|
|
104
|
+
}
|
|
105
|
+
let parsedTexte = null;
|
|
106
|
+
if (texteBuffer) {
|
|
107
|
+
const texteXml = textDecoder.decode(texteBuffer);
|
|
108
|
+
parsedTexte = parseTexte(texteXml);
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
parsedTexte = await parseTexteFromFile(textePath);
|
|
112
|
+
}
|
|
113
|
+
if (!parsedTexte) {
|
|
114
|
+
texteUrlsParseError.push(texteXmlAbsoluteUrl);
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
const transformedTexteDir = path.join(transformedTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName);
|
|
118
|
+
fs.ensureDirSync(transformedTexteDir);
|
|
119
|
+
fs.writeJSONSync(path.join(transformedTexteDir, `${texteName}.akn.json`), parsedTexte, { spaces: 2 });
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
if (isOptionEmptyOrHasValue(options.formats, "html")) {
|
|
123
|
+
const texteHtmlUrl = `${texteName}.html`;
|
|
124
|
+
const texteHtmlAbsoluteUrl = new URL(texteHtmlUrl, SENAT_TEXTE_BASE_URL).toString();
|
|
125
|
+
const textePath = path.join(texteDir, texteHtmlUrl);
|
|
126
|
+
if (!options.force && fs.existsSync(textePath)) {
|
|
127
|
+
if (!options.silent) {
|
|
128
|
+
console.info(`Already retrieved texte ${textePath}…`);
|
|
129
|
+
}
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
const texteBuffer = await retrieveDocument(texteHtmlAbsoluteUrl);
|
|
133
|
+
if (!texteBuffer) {
|
|
134
|
+
texteUrlsNotFoundOrError.push(texteHtmlAbsoluteUrl);
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
fs.writeFileSync(textePath, Buffer.from(texteBuffer));
|
|
138
|
+
retrievedTextesCount++;
|
|
139
|
+
}
|
|
140
|
+
if (isOptionEmptyOrHasValue(options.formats, "pdf")) {
|
|
141
|
+
const textePdfUrl = `${texteName}.pdf`;
|
|
142
|
+
const textePdfAbsoluteUrl = new URL(textePdfUrl, SENAT_TEXTE_BASE_URL).toString();
|
|
143
|
+
const textePath = path.join(texteDir, textePdfUrl);
|
|
144
|
+
if (!options.force && fs.existsSync(textePath)) {
|
|
145
|
+
if (!options.silent) {
|
|
146
|
+
console.info(`Already retrieved texte ${textePath}…`);
|
|
147
|
+
}
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
const texteBuffer = await retrieveDocument(textePdfAbsoluteUrl);
|
|
151
|
+
if (!texteBuffer) {
|
|
152
|
+
texteUrlsNotFoundOrError.push(textePdfAbsoluteUrl);
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
fs.writeFileSync(textePath, Buffer.from(texteBuffer));
|
|
156
|
+
retrievedTextesCount++;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
if (options.verbose) {
|
|
160
|
+
console.log(`${retrievedTextesCount} textes retrieved`);
|
|
161
|
+
console.log(`${texteUrlsNotFoundOrError.length} textes failed to be retrieved with URLs ${texteUrlsNotFoundOrError.join(", ")}`);
|
|
162
|
+
if (options.parseDocuments) {
|
|
163
|
+
console.log(`${texteUrlsParseError.length} textes failed to be parsed with URLs ${texteUrlsParseError.join(", ")}`);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
// TODO retrieve exposé des motifs (/leg/exposes-des-motifs)
|
|
167
|
+
}
|
|
168
|
+
async function retrieveRapports(dataDir) {
|
|
169
|
+
const rapportsDir = path.join(dataDir, "rap");
|
|
170
|
+
fs.ensureDirSync(rapportsDir);
|
|
171
|
+
let retrievedRapportsCount = 0;
|
|
172
|
+
const rapportUrlsNotFoundOrError = [];
|
|
173
|
+
for await (const rapport of findSenatRapportUrls(options.sessions)) {
|
|
174
|
+
const parsedRapportUrl = path.parse(rapport.url);
|
|
175
|
+
const rapportName = parsedRapportUrl.name;
|
|
176
|
+
const rapportDir = path.join(rapportsDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName);
|
|
177
|
+
fs.ensureDirSync(rapportDir);
|
|
178
|
+
if (isOptionEmptyOrHasValue(options.formats, "html")) {
|
|
179
|
+
const rapportHtmlUrlBase = `${rapportName}_mono.html`;
|
|
180
|
+
const rapportHtmlUrl = path.format({
|
|
181
|
+
dir: parsedRapportUrl.dir,
|
|
182
|
+
base: rapportHtmlUrlBase,
|
|
183
|
+
});
|
|
184
|
+
const rapportHtmlAbsoluteUrl = new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL).toString();
|
|
185
|
+
const rapportPath = path.join(rapportDir, rapportHtmlUrlBase);
|
|
186
|
+
if (!options.force && fs.existsSync(rapportPath)) {
|
|
187
|
+
if (!options.silent) {
|
|
188
|
+
console.info(`Already retrieved rapport ${rapportPath}…`);
|
|
189
|
+
}
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
const rapportBuffer = await retrieveDocument(rapportHtmlAbsoluteUrl);
|
|
193
|
+
if (!rapportBuffer) {
|
|
194
|
+
rapportUrlsNotFoundOrError.push(rapportHtmlAbsoluteUrl);
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
fs.writeFileSync(rapportPath, Buffer.from(rapportBuffer));
|
|
198
|
+
retrievedRapportsCount++;
|
|
199
|
+
}
|
|
200
|
+
if (isOptionEmptyOrHasValue(options.formats, "pdf")) {
|
|
201
|
+
const rapportPdfUrlBase = `${rapportName}1.pdf`;
|
|
202
|
+
const rapportPdfUrl = path.format({
|
|
203
|
+
dir: parsedRapportUrl.dir,
|
|
204
|
+
base: rapportPdfUrlBase,
|
|
205
|
+
});
|
|
206
|
+
const rapportPdfAbsoluteUrl = new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL).toString();
|
|
207
|
+
const rapportPath = path.join(rapportDir, rapportPdfUrlBase);
|
|
208
|
+
if (!options.force && fs.existsSync(rapportPath)) {
|
|
209
|
+
if (!options.silent) {
|
|
210
|
+
console.info(`Already retrieved rapport ${rapportPath}…`);
|
|
211
|
+
}
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
const rapportBuffer = await retrieveDocument(rapportPdfAbsoluteUrl);
|
|
215
|
+
if (!rapportBuffer) {
|
|
216
|
+
rapportUrlsNotFoundOrError.push(rapportPdfAbsoluteUrl);
|
|
217
|
+
continue;
|
|
218
|
+
}
|
|
219
|
+
fs.writeFileSync(rapportPath, Buffer.from(rapportBuffer));
|
|
220
|
+
retrievedRapportsCount++;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (options.verbose) {
|
|
224
|
+
console.log(`${retrievedRapportsCount} rapports retrieved`);
|
|
225
|
+
console.log(`${rapportUrlsNotFoundOrError.length} rapports failed with URLs ${rapportUrlsNotFoundOrError.join(", ")}`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
async function main() {
|
|
229
|
+
const dataDir = options.dataDir;
|
|
230
|
+
assert(dataDir, "Missing argument: data directory");
|
|
231
|
+
if (isOptionEmptyOrHasValue(options.types, "textes")) {
|
|
232
|
+
await retrieveTextes(dataDir);
|
|
233
|
+
}
|
|
234
|
+
if (isOptionEmptyOrHasValue(options.types, "rapports")) {
|
|
235
|
+
await retrieveRapports(dataDir);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
main()
|
|
239
|
+
.then(() => process.exit(0))
|
|
240
|
+
.catch((error) => {
|
|
241
|
+
console.log(error);
|
|
242
|
+
process.exit(1);
|
|
243
|
+
});
|