@tricoteuses/senat 1.3.5 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -9
- package/lib/datasets.d.ts +4 -3
- package/lib/datasets.js +3 -3
- package/lib/index.d.ts +1 -4
- package/lib/index.js +1 -4
- package/lib/loaders.d.ts +4 -3
- package/lib/loaders.js +7 -6
- package/lib/model/agenda.d.ts +2 -0
- package/lib/model/agenda.js +85 -0
- package/lib/model/ameli.d.ts +2 -2
- package/lib/model/ameli.js +14 -6
- package/lib/model/dosleg.d.ts +2 -2
- package/lib/model/dosleg.js +4 -2
- package/lib/scripts/convert_data.js +116 -102
- package/lib/scripts/datautil.d.ts +1 -2
- package/lib/scripts/datautil.js +1 -1
- package/lib/scripts/parse_textes.js +4 -4
- package/lib/scripts/retrieve_agenda.d.ts +1 -0
- package/lib/scripts/retrieve_agenda.js +73 -0
- package/lib/scripts/retrieve_documents.js +11 -16
- package/lib/scripts/retrieve_open_data.js +4 -3
- package/lib/scripts/shared/cli_helpers.d.ts +11 -0
- package/lib/scripts/shared/cli_helpers.js +8 -1
- package/lib/types/agenda.d.ts +12 -0
- package/lib/types/agenda.js +1 -0
- package/lib/types/ameli.d.ts +0 -5
- package/lib/types/ameli.js +1 -13
- package/lib/types/debats.d.ts +0 -2
- package/lib/types/debats.js +1 -2
- package/lib/types/dosleg.d.ts +0 -28
- package/lib/types/dosleg.js +1 -151
- package/lib/types/sessions.d.ts +5 -42
- package/lib/types/sessions.js +84 -43
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -43,19 +43,22 @@ npm run data:download ../senat-data -- [--categories All]
|
|
|
43
43
|
|
|
44
44
|
Data from other sources is also available :
|
|
45
45
|
```bash
|
|
46
|
-
# Retrieval of sénateurs' pictures from Sénat's website
|
|
47
|
-
npm run data:retrieve_senateurs_photos ../senat-data
|
|
48
|
-
|
|
49
46
|
# Retrieval of textes and rapports from Sénat's website
|
|
50
47
|
# Available options for optional `formats` parameter : xml, html, pdf
|
|
51
48
|
# Available options for optional `types` parameter : textes, rapports
|
|
52
|
-
npm run data:retrieve_documents ../senat-data -- --
|
|
49
|
+
npm run data:retrieve_documents ../senat-data -- --fromSession 2023 [--formats xml pdf] [--types textes]
|
|
53
50
|
|
|
54
51
|
# Retrieval & parsing (textes in xml format only for now)
|
|
55
|
-
npm run data:retrieve_documents ../senat-data -- --
|
|
52
|
+
npm run data:retrieve_documents ../senat-data -- --fromSession 2023 --parseDocuments
|
|
56
53
|
|
|
57
54
|
# Parsing only
|
|
58
55
|
npm run data:parse_textes_lois ../senat-data
|
|
56
|
+
|
|
57
|
+
# Retrieval of agenda from Sénat's website
|
|
58
|
+
npm run data:retrieve_agenda ../senat-data -- --fromSession 2023
|
|
59
|
+
|
|
60
|
+
# Retrieval of sénateurs' pictures from Sénat's website
|
|
61
|
+
npm run data:retrieve_senateurs_photos ../senat-data
|
|
59
62
|
```
|
|
60
63
|
|
|
61
64
|
## Data download using Docker
|
|
@@ -67,21 +70,19 @@ docker volume create senat-data # Create a volume to download the data
|
|
|
67
70
|
docker run --name tricoteuses-senat -v senat-data:/app/senat-data -d registry.en-root.org/tricoteuses/tricoteuses-senat:latest
|
|
68
71
|
```
|
|
69
72
|
|
|
70
|
-
Use the environment variable `CATEGORIES` and `
|
|
73
|
+
Use the environment variable `CATEGORIES` and `FROM_SESSION` if needed.
|
|
71
74
|
|
|
72
75
|
## Using the data
|
|
73
76
|
|
|
74
77
|
Once the data is downloaded, you can use loaders to retrieve it.
|
|
75
78
|
To use loaders in your project, you can install the _@tricoteuses/senat_ package, and import the iterator functions that you need.
|
|
76
79
|
|
|
77
|
-
_Only iterator for Questions, Sénateurs and Circonscriptions available for now_
|
|
78
|
-
|
|
79
80
|
```bash
|
|
80
81
|
npm install @tricoteuses/senat
|
|
81
82
|
```
|
|
82
83
|
|
|
83
84
|
```js
|
|
84
|
-
import { iterLoadSenatQuestions } from "@tricoteuses/senat/
|
|
85
|
+
import { iterLoadSenatQuestions } from "@tricoteuses/senat/loaders"
|
|
85
86
|
|
|
86
87
|
// Pass data directory and legislature as arguments
|
|
87
88
|
for (const { item: question } of iterLoadSenatQuestions("../senat-data", 17)) {
|
package/lib/datasets.d.ts
CHANGED
|
@@ -21,8 +21,9 @@ export declare enum EnabledDatasets {
|
|
|
21
21
|
Questions = 8,
|
|
22
22
|
Sens = 16,
|
|
23
23
|
PhotosSenateurs = 32,
|
|
24
|
-
|
|
24
|
+
Agenda = 64,
|
|
25
|
+
All = 127
|
|
25
26
|
}
|
|
26
27
|
export declare const datasets: Datasets;
|
|
27
|
-
export declare function getEnabledDatasets(categories:
|
|
28
|
-
export declare function
|
|
28
|
+
export declare function getEnabledDatasets(categories: string[]): EnabledDatasets;
|
|
29
|
+
export declare function getChosenDatasets(enabledDatasets: EnabledDatasets): Dataset[];
|
package/lib/datasets.js
CHANGED
|
@@ -10,7 +10,8 @@ export var EnabledDatasets;
|
|
|
10
10
|
EnabledDatasets[EnabledDatasets["Questions"] = 8] = "Questions";
|
|
11
11
|
EnabledDatasets[EnabledDatasets["Sens"] = 16] = "Sens";
|
|
12
12
|
EnabledDatasets[EnabledDatasets["PhotosSenateurs"] = 32] = "PhotosSenateurs";
|
|
13
|
-
EnabledDatasets[EnabledDatasets["
|
|
13
|
+
EnabledDatasets[EnabledDatasets["Agenda"] = 64] = "Agenda";
|
|
14
|
+
EnabledDatasets[EnabledDatasets["All"] = 127] = "All";
|
|
14
15
|
})(EnabledDatasets || (EnabledDatasets = {}));
|
|
15
16
|
export const datasets = {
|
|
16
17
|
ameli: {
|
|
@@ -68,8 +69,7 @@ export function getEnabledDatasets(categories) {
|
|
|
68
69
|
return enabledDatasets | (enabledDataset || EnabledDatasets.None);
|
|
69
70
|
}, EnabledDatasets.None);
|
|
70
71
|
}
|
|
71
|
-
export function
|
|
72
|
-
const enabledDatasets = getEnabledDatasets(categories);
|
|
72
|
+
export function getChosenDatasets(enabledDatasets) {
|
|
73
73
|
return [
|
|
74
74
|
enabledDatasets & EnabledDatasets.Ameli ? datasets.ameli : null,
|
|
75
75
|
enabledDatasets & EnabledDatasets.Debats ? datasets.debats : null,
|
package/lib/index.d.ts
CHANGED
|
@@ -2,12 +2,9 @@ export type { AmendementResult, } from "./model/ameli";
|
|
|
2
2
|
export type { DossierLegislatifResult, } from "./model/dosleg";
|
|
3
3
|
export type { QuestionResult, } from "./model/questions";
|
|
4
4
|
export type { CirconscriptionResult, OrganismeResult, SenateurResult, } from "./model/sens";
|
|
5
|
-
export { sesFieldsToParseInt, sesFieldsToTrim, subFieldsToParseInt, subFieldsToTrim, txtAmeliFieldsToTrim, } from "./types/ameli";
|
|
6
5
|
export type { Ses, Sub, TxtAmeli } from "./types/ameli";
|
|
7
|
-
export { debatsFieldsToTrim, lecassdebFieldsToTrim } from "./types/debats";
|
|
8
6
|
export type { Debat, LecAssDeb } from "./types/debats";
|
|
9
|
-
export { assFieldsToTrim, audFieldsToTrim, auteurFieldsToTrim, dateSeanceFieldsToTrim, deccocFieldsToTrim, denrapFieldsToTrim, docattFieldsToParseInt, docattFieldsToTrim, ecrFieldsToTrim, etaloiFieldsToTrim, lecassFieldsToTrim, lecassrapFieldsToTrim, lectureFieldsToTrim, loiFieldsToTrim, orgFieldsToTrim, oritxtFieldsToTrim, quaFieldsToTrim, rapFieldsToParseInt, rapFieldsToTrim, raporgFieldsToTrim, scrFieldsToTrim, texteFieldsToParseInt, texteFieldsToTrim, typattFieldsToTrim, typlecFieldsToTrim, typloiFieldsToTrim, typtxtFieldsToTrim, typurlFieldsToTrim, } from "./types/dosleg";
|
|
10
7
|
export type { Ass, Aud, Auteur, DateSeance, DecCoc, DenRap, DocAtt, Ecr, EtaLoi, LecAss, LecAssRap, Lecture, Loi, Org, OriTxt, Qua, Rap, RapOrg, Scr, Texte, TypAtt, TypLec, TypLoi, TypTxt, TypUrl, } from "./types/dosleg";
|
|
11
8
|
export type { Photo, Sen } from "./types/sens";
|
|
12
|
-
export { Session } from "./types/sessions";
|
|
9
|
+
export type { Session } from "./types/sessions";
|
|
13
10
|
export type { Alinea, Article, Division, DivisionTag, FlatTexte } from "./types/texte";
|
package/lib/index.js
CHANGED
|
@@ -1,4 +1 @@
|
|
|
1
|
-
export {
|
|
2
|
-
export { debatsFieldsToTrim, lecassdebFieldsToTrim } from "./types/debats";
|
|
3
|
-
export { assFieldsToTrim, audFieldsToTrim, auteurFieldsToTrim, dateSeanceFieldsToTrim, deccocFieldsToTrim, denrapFieldsToTrim, docattFieldsToParseInt, docattFieldsToTrim, ecrFieldsToTrim, etaloiFieldsToTrim, lecassFieldsToTrim, lecassrapFieldsToTrim, lectureFieldsToTrim, loiFieldsToTrim, orgFieldsToTrim, oritxtFieldsToTrim, quaFieldsToTrim, rapFieldsToParseInt, rapFieldsToTrim, raporgFieldsToTrim, scrFieldsToTrim, texteFieldsToParseInt, texteFieldsToTrim, typattFieldsToTrim, typlecFieldsToTrim, typloiFieldsToTrim, typtxtFieldsToTrim, typurlFieldsToTrim, } from "./types/dosleg";
|
|
4
|
-
export { Session } from "./types/sessions";
|
|
1
|
+
export {};
|
package/lib/loaders.d.ts
CHANGED
|
@@ -4,15 +4,16 @@ import { QuestionResult } from "./model/questions";
|
|
|
4
4
|
import { CirconscriptionResult, OrganismeResult, SenateurResult } from "./model/sens";
|
|
5
5
|
import { FlatTexte } from "./types/texte";
|
|
6
6
|
export { EnabledDatasets } from "./datasets";
|
|
7
|
+
export declare const AGENDA_FOLDER = "agenda";
|
|
7
8
|
export declare const DOSLEG_DOSSIERS_FOLDER = "dossiers";
|
|
9
|
+
export declare const RAPPORT_FOLDER = "rap";
|
|
8
10
|
export declare const SENS_CIRCONSCRIPTIONS_FOLDER = "circonscriptions";
|
|
9
11
|
export declare const SENS_ORGANISMES_FOLDER = "organismes";
|
|
10
12
|
export declare const SENS_SENATEURS_FOLDER = "senateurs";
|
|
11
13
|
export declare const TEXTE_FOLDER = "leg";
|
|
12
|
-
export declare const
|
|
13
|
-
export declare const
|
|
14
|
+
export declare const DATA_ORIGINAL_FOLDER = "original";
|
|
15
|
+
export declare const DATA_TRANSFORMED_FOLDER = "transformed";
|
|
14
16
|
export declare const DOCUMENT_METADATA_FILE = "metadata.json";
|
|
15
|
-
export declare const RAPPORT_FOLDER = "rap";
|
|
16
17
|
type IterItem<T> = {
|
|
17
18
|
item: T;
|
|
18
19
|
filePathFromDataset?: string;
|
package/lib/loaders.js
CHANGED
|
@@ -2,17 +2,18 @@ import fs from "fs";
|
|
|
2
2
|
import path from "path";
|
|
3
3
|
import legislatures from "./legislatures.json";
|
|
4
4
|
import { datasets } from "./datasets";
|
|
5
|
-
import { UNDEFINED_SESSION } from "./
|
|
5
|
+
import { UNDEFINED_SESSION } from "./types/sessions";
|
|
6
6
|
export { EnabledDatasets } from "./datasets";
|
|
7
|
+
export const AGENDA_FOLDER = "agenda";
|
|
7
8
|
export const DOSLEG_DOSSIERS_FOLDER = "dossiers";
|
|
9
|
+
export const RAPPORT_FOLDER = "rap";
|
|
8
10
|
export const SENS_CIRCONSCRIPTIONS_FOLDER = "circonscriptions";
|
|
9
11
|
export const SENS_ORGANISMES_FOLDER = "organismes";
|
|
10
12
|
export const SENS_SENATEURS_FOLDER = "senateurs";
|
|
11
13
|
export const TEXTE_FOLDER = "leg";
|
|
12
|
-
export const
|
|
13
|
-
export const
|
|
14
|
+
export const DATA_ORIGINAL_FOLDER = "original";
|
|
15
|
+
export const DATA_TRANSFORMED_FOLDER = "transformed";
|
|
14
16
|
export const DOCUMENT_METADATA_FILE = "metadata.json";
|
|
15
|
-
export const RAPPORT_FOLDER = "rap";
|
|
16
17
|
export function* iterFilePaths(dirPath) {
|
|
17
18
|
if (dirPath && fs.existsSync(dirPath)) {
|
|
18
19
|
const files = fs.readdirSync(dirPath, {
|
|
@@ -59,7 +60,7 @@ export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}
|
|
|
59
60
|
}
|
|
60
61
|
}
|
|
61
62
|
export function* iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session) {
|
|
62
|
-
let itemsDir = path.join(dataDir, TEXTE_FOLDER,
|
|
63
|
+
let itemsDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
63
64
|
if (session) {
|
|
64
65
|
itemsDir = path.join(itemsDir, session.toString());
|
|
65
66
|
}
|
|
@@ -121,7 +122,7 @@ export function* iterLoadSenatDossiersLegislatifsTextes(dataDir, session, option
|
|
|
121
122
|
export function loadSenatTexteContent(dataDir, textePathFromDataset) {
|
|
122
123
|
const parsedTextePath = path.parse(textePathFromDataset);
|
|
123
124
|
const jsonTexteName = `${parsedTextePath.name}.json`;
|
|
124
|
-
const fullTextePath = path.join(dataDir, TEXTE_FOLDER,
|
|
125
|
+
const fullTextePath = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER, parsedTextePath.dir, jsonTexteName);
|
|
125
126
|
if (!fs.existsSync(fullTextePath)) {
|
|
126
127
|
return { item: null };
|
|
127
128
|
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { JSDOM } from "jsdom";
|
|
2
|
+
import { DateTime } from "luxon";
|
|
3
|
+
function getEventType(eventClasses) {
|
|
4
|
+
const typeClass = [...eventClasses]
|
|
5
|
+
.find(className => className.startsWith("evt-"))
|
|
6
|
+
|| null;
|
|
7
|
+
switch (typeClass) {
|
|
8
|
+
case "evt-seance":
|
|
9
|
+
return "Séance publique";
|
|
10
|
+
case "evt-instanz":
|
|
11
|
+
return "Commissions";
|
|
12
|
+
case "evt-cemi":
|
|
13
|
+
return "Mission de contrôle";
|
|
14
|
+
case "evt-deleg":
|
|
15
|
+
return "Offices et délégations";
|
|
16
|
+
case "evt-bureau":
|
|
17
|
+
return "Instances décisionnelles";
|
|
18
|
+
}
|
|
19
|
+
return null;
|
|
20
|
+
}
|
|
21
|
+
function getUrlDossierSenat(lienElements) {
|
|
22
|
+
const urlElement = [...lienElements]
|
|
23
|
+
.find(lienElement => lienElement.textContent?.includes("dossier législatif"));
|
|
24
|
+
return urlElement ? urlElement.getAttribute("href") : null;
|
|
25
|
+
}
|
|
26
|
+
function getHeuresDebutFin(timeStr) {
|
|
27
|
+
const normalizedHeureDebut = timeStr
|
|
28
|
+
?.replace(/^À l'issue de l'espace réservé .* et au plus tard\s/i, "") // Must be first
|
|
29
|
+
?.replace(/^(?:le )?matin/i, "10h00")
|
|
30
|
+
?.replace(/^(?:l')?après-midi/i, "16h00")
|
|
31
|
+
?.replace(/^(?:le )?soir/i, "20h00")
|
|
32
|
+
?.replace(/^(?:la )?nuit/i, "22h00")
|
|
33
|
+
?.replace(/^à\s/ig, "")
|
|
34
|
+
?.replace(/heures/ig, "h00")
|
|
35
|
+
?.replace(/\set.*/i, "")
|
|
36
|
+
?.replace(/,.*/, "")
|
|
37
|
+
?.replace(/\s\(hors hémicycle\)/i, "")
|
|
38
|
+
?.replace(/\s*h\s*/ig, "h");
|
|
39
|
+
console.dir(`${timeStr};${normalizedHeureDebut}`);
|
|
40
|
+
const heureDebut = timeStr
|
|
41
|
+
? DateTime.fromFormat(timeStr, "H'h'mm").toISOTime()
|
|
42
|
+
: null;
|
|
43
|
+
return {
|
|
44
|
+
heureDebut,
|
|
45
|
+
heureFin: null
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
function transformAgenda(document) {
|
|
49
|
+
const agendaEvents = [];
|
|
50
|
+
const eventElements = document.querySelectorAll(".evt");
|
|
51
|
+
for (const eventElement of eventElements) {
|
|
52
|
+
const type = getEventType(eventElement.classList);
|
|
53
|
+
const timeOriginal = eventElement.querySelector(".time")?.textContent || null;
|
|
54
|
+
const { heureDebut, heureFin } = getHeuresDebutFin(timeOriginal);
|
|
55
|
+
const titre = eventElement.querySelector(".titre")?.textContent || null;
|
|
56
|
+
const organe = eventElement.querySelector(".organe")?.textContent || null;
|
|
57
|
+
const objet = eventElement.querySelector(".objet")?.textContent || null;
|
|
58
|
+
const lieu = eventElement.querySelector(".lieu")?.textContent || null;
|
|
59
|
+
const url_dossier_senat = getUrlDossierSenat(eventElement.querySelectorAll(".lien a"));
|
|
60
|
+
const url_video = eventElement.querySelector(".video a")?.getAttribute("href") || null;
|
|
61
|
+
agendaEvents.push({
|
|
62
|
+
type,
|
|
63
|
+
heureDebut,
|
|
64
|
+
heureFin,
|
|
65
|
+
timeOriginal,
|
|
66
|
+
titre,
|
|
67
|
+
organe,
|
|
68
|
+
objet,
|
|
69
|
+
lieu,
|
|
70
|
+
url_dossier_senat,
|
|
71
|
+
url_video,
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
return agendaEvents;
|
|
75
|
+
}
|
|
76
|
+
export async function parseAgendaFromFile(htmlFilePath) {
|
|
77
|
+
try {
|
|
78
|
+
const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window;
|
|
79
|
+
return transformAgenda(document);
|
|
80
|
+
}
|
|
81
|
+
catch (error) {
|
|
82
|
+
console.error(`Could not parse texte with error ${error}`);
|
|
83
|
+
}
|
|
84
|
+
return null;
|
|
85
|
+
}
|
package/lib/model/ameli.d.ts
CHANGED
|
@@ -49,7 +49,7 @@ declare const findAllQuery: import("kysely").SelectQueryBuilder<{
|
|
|
49
49
|
date_depot: string;
|
|
50
50
|
dispositif: string | null;
|
|
51
51
|
objet: string | null;
|
|
52
|
-
|
|
52
|
+
etat: string;
|
|
53
53
|
avis_commission: string | null;
|
|
54
54
|
avis_gouvernement: string | null;
|
|
55
55
|
sort: string | null;
|
|
@@ -86,7 +86,7 @@ export declare function findAll(): AsyncIterableIterator<{
|
|
|
86
86
|
date_depot: string;
|
|
87
87
|
dispositif: string | null;
|
|
88
88
|
objet: string | null;
|
|
89
|
-
|
|
89
|
+
etat: string;
|
|
90
90
|
avis_commission: string | null;
|
|
91
91
|
avis_gouvernement: string | null;
|
|
92
92
|
sort: string | null;
|
package/lib/model/ameli.js
CHANGED
|
@@ -43,7 +43,7 @@ const findAllQuery = dbAmeli
|
|
|
43
43
|
.then(val("Motion"))
|
|
44
44
|
.when("amd.typ", "=", "S")
|
|
45
45
|
.then(val("Sous-amendement"))
|
|
46
|
-
.else("
|
|
46
|
+
.else("")
|
|
47
47
|
.end()
|
|
48
48
|
.as("nature"),
|
|
49
49
|
"amd.id as id",
|
|
@@ -56,15 +56,23 @@ const findAllQuery = dbAmeli
|
|
|
56
56
|
"typsub.lib as subdivision_type",
|
|
57
57
|
"amd.alinea as alinea",
|
|
58
58
|
toDateString(ref("amd.datdep")).as("date_depot"),
|
|
59
|
+
"amd.dis as dispositif",
|
|
60
|
+
"amd.obj as objet",
|
|
59
61
|
eb
|
|
60
62
|
.case()
|
|
63
|
+
.when("amd.etaid", "=", 7)
|
|
64
|
+
.then(val("Diffusé"))
|
|
61
65
|
.when("amd.etaid", "=", 8)
|
|
62
|
-
.then(val("
|
|
63
|
-
.
|
|
66
|
+
.then(val("Retiré avant réunion ou séance"))
|
|
67
|
+
.when("amd.etaid", "=", 9)
|
|
68
|
+
.then(val("Examiné en commission ou séance"))
|
|
69
|
+
.when("amd.etaid", "=", 10)
|
|
70
|
+
.then(val("Irrecevable"))
|
|
71
|
+
.when("amd.etaid", "=", 11)
|
|
72
|
+
.then(val("Irrecevable"))
|
|
73
|
+
.else("")
|
|
64
74
|
.end()
|
|
65
|
-
.as("
|
|
66
|
-
"amd.obj as objet",
|
|
67
|
-
"amd.etaid as etat_id",
|
|
75
|
+
.as("etat"),
|
|
68
76
|
"avicom.lib as avis_commission",
|
|
69
77
|
"avigvt.lib as avis_gouvernement",
|
|
70
78
|
eb.fn.coalesce("sor.lib", "irr.libirr").as("sort"),
|
package/lib/model/dosleg.d.ts
CHANGED
|
@@ -214,12 +214,12 @@ export declare function findAll(): AsyncIterableIterator<{
|
|
|
214
214
|
numero_JO: string | null;
|
|
215
215
|
url_JO: string | null;
|
|
216
216
|
}>;
|
|
217
|
-
export declare function findSenatTexteUrls(sessions?:
|
|
217
|
+
export declare function findSenatTexteUrls(sessions?: number[]): AsyncIterableIterator<{
|
|
218
218
|
session: string | null | undefined;
|
|
219
219
|
url: string;
|
|
220
220
|
hasExposeDesMotifs: boolean;
|
|
221
221
|
}>;
|
|
222
|
-
export declare function findSenatRapportUrls(sessions?:
|
|
222
|
+
export declare function findSenatRapportUrls(sessions?: number[]): AsyncIterableIterator<{
|
|
223
223
|
url: string;
|
|
224
224
|
session: string | null | undefined;
|
|
225
225
|
}>;
|
package/lib/model/dosleg.js
CHANGED
|
@@ -170,11 +170,12 @@ export function findAll() {
|
|
|
170
170
|
return findAllQuery.stream();
|
|
171
171
|
}
|
|
172
172
|
export function findSenatTexteUrls(sessions = []) {
|
|
173
|
+
const sessionsStr = sessions.map(session => String(session));
|
|
173
174
|
return dbDosleg
|
|
174
175
|
.selectFrom("texte")
|
|
175
176
|
.where("texurl", "is not", null)
|
|
176
177
|
.where("typurl", "=", "I")
|
|
177
|
-
.$if(sessions.length > 0, (qb) => qb.where("sesann", "in",
|
|
178
|
+
.$if(sessions.length > 0, (qb) => qb.where("sesann", "in", sessionsStr))
|
|
178
179
|
.select(({ eb, ref }) => [
|
|
179
180
|
"sesann as session",
|
|
180
181
|
rtrim(ref("texurl")).as("url"),
|
|
@@ -190,11 +191,12 @@ export function findSenatTexteUrls(sessions = []) {
|
|
|
190
191
|
.stream();
|
|
191
192
|
}
|
|
192
193
|
export function findSenatRapportUrls(sessions = []) {
|
|
194
|
+
const sessionsStr = sessions.map(session => String(session));
|
|
193
195
|
return dbDosleg
|
|
194
196
|
.selectFrom("rap")
|
|
195
197
|
.where("rapurl", "is not", null)
|
|
196
198
|
.where("typurl", "=", "I")
|
|
197
|
-
.$if(sessions.length > 0, (qb) => qb.where("sesann", "in",
|
|
199
|
+
.$if(sessions.length > 0, (qb) => qb.where("sesann", "in", sessionsStr))
|
|
198
200
|
.select(({ ref }) => [rtrim(ref("rapurl")).as("url"), "sesann as session"])
|
|
199
201
|
.$narrowType()
|
|
200
202
|
.stream();
|
|
@@ -3,10 +3,11 @@ import commandLineArgs from "command-line-args";
|
|
|
3
3
|
import fs from "fs-extra";
|
|
4
4
|
import path from "path";
|
|
5
5
|
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
|
|
6
|
-
import { DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER,
|
|
6
|
+
import { DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER, } from "../loaders";
|
|
7
7
|
import { findAllAmendements, findAllCirconscriptions, findAllLois, findAllOrganismes, findAllQuestions, findAllSens, } from "../model";
|
|
8
8
|
import { findSenatRapportUrls, findSenatTexteUrls } from "../model/dosleg";
|
|
9
|
-
import {
|
|
9
|
+
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
10
|
+
import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, } from "./datautil";
|
|
10
11
|
import { commonOptions } from "./shared/cli_helpers";
|
|
11
12
|
import { ensureAndClearDir } from "./shared/util";
|
|
12
13
|
const optionsDefinitions = [...commonOptions];
|
|
@@ -16,123 +17,99 @@ const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
|
|
|
16
17
|
const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/";
|
|
17
18
|
const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
|
|
18
19
|
async function convertData() {
|
|
19
|
-
const enabledDatasets = getEnabledDatasets(options["categories"]);
|
|
20
20
|
const dataDir = options["dataDir"];
|
|
21
21
|
assert(dataDir, "Missing argument: data directory");
|
|
22
|
+
const enabledDatasets = getEnabledDatasets(options["categories"]);
|
|
23
|
+
const sessions = getSessionsFromStart(options["fromSession"]);
|
|
22
24
|
console.time("data transformation time");
|
|
23
25
|
if (enabledDatasets & EnabledDatasets.Ameli) {
|
|
24
|
-
|
|
25
|
-
if (!options["silent"]) {
|
|
26
|
-
console.log(`Converting database ${dataset.database} data into files…`);
|
|
27
|
-
}
|
|
28
|
-
const ameliReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
29
|
-
ensureAndClearDir(ameliReorganizedRootDir);
|
|
30
|
-
for await (const amendement of findAllAmendements()) {
|
|
31
|
-
if (options["verbose"]) {
|
|
32
|
-
console.log(`Converting ${amendement.numero} file…`);
|
|
33
|
-
}
|
|
34
|
-
const session = String(amendement.session) || UNDEFINED_SESSION;
|
|
35
|
-
const signetDossierLegislatif = amendement.signet_dossier_legislatif ||
|
|
36
|
-
`${amendement.nature_texte}-${amendement.numero_texte}`.toLowerCase();
|
|
37
|
-
const ameliReorganizedDir = path.join(ameliReorganizedRootDir, session, signetDossierLegislatif);
|
|
38
|
-
fs.ensureDirSync(ameliReorganizedDir);
|
|
39
|
-
const amendementFileName = `${amendement.numero}.json`;
|
|
40
|
-
fs.writeJSONSync(path.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 });
|
|
41
|
-
}
|
|
26
|
+
await convertDatasetAmeli(dataDir);
|
|
42
27
|
}
|
|
43
28
|
if (enabledDatasets & EnabledDatasets.DosLeg) {
|
|
44
|
-
|
|
45
|
-
if (!options["silent"]) {
|
|
46
|
-
console.log(`Converting database ${dataset.database} data into files…`);
|
|
47
|
-
}
|
|
48
|
-
const doslegReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
49
|
-
const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER);
|
|
50
|
-
ensureAndClearDir(doslegReorganizedRootDir);
|
|
51
|
-
ensureAndClearDir(dossiersReorganizedDir);
|
|
52
|
-
for await (const loi of findAllLois()) {
|
|
53
|
-
if (options["verbose"]) {
|
|
54
|
-
console.log(`Converting ${loi.signet} file…`);
|
|
55
|
-
}
|
|
56
|
-
let loiReorganizedDir = path.join(dossiersReorganizedDir, UNDEFINED_SESSION);
|
|
57
|
-
const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups;
|
|
58
|
-
if (signetParts && "session" in signetParts) {
|
|
59
|
-
const { session } = signetParts;
|
|
60
|
-
const formattedSession = formatToFourDigitSession(session);
|
|
61
|
-
loiReorganizedDir = path.join(dossiersReorganizedDir, formattedSession);
|
|
62
|
-
}
|
|
63
|
-
fs.ensureDirSync(loiReorganizedDir);
|
|
64
|
-
const loiFileName = `${loi.signet}.json`;
|
|
65
|
-
fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, {
|
|
66
|
-
spaces: 2,
|
|
67
|
-
});
|
|
68
|
-
}
|
|
69
|
-
await convertTexteUrls(dataDir);
|
|
70
|
-
await convertRapportUrls(dataDir);
|
|
29
|
+
await convertDatasetDosLeg(dataDir, sessions);
|
|
71
30
|
}
|
|
72
31
|
if (enabledDatasets & EnabledDatasets.Questions) {
|
|
73
|
-
|
|
74
|
-
if (!options["silent"]) {
|
|
75
|
-
console.log(`Converting database ${dataset.database} data into files…`);
|
|
76
|
-
}
|
|
77
|
-
const questionsReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
78
|
-
ensureAndClearDir(questionsReorganizedRootDir);
|
|
79
|
-
for await (const question of findAllQuestions()) {
|
|
80
|
-
if (options["verbose"]) {
|
|
81
|
-
console.log(`Converting ${question.reference} file…`);
|
|
82
|
-
}
|
|
83
|
-
const legislature = question.legislature ? question.legislature : 0;
|
|
84
|
-
const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature));
|
|
85
|
-
fs.ensureDirSync(questionReorganizedDir);
|
|
86
|
-
const questionFileName = `${question.reference}.json`;
|
|
87
|
-
fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
|
|
88
|
-
}
|
|
32
|
+
await convertDatasetQuestions(dataDir);
|
|
89
33
|
}
|
|
90
34
|
if (enabledDatasets & EnabledDatasets.Sens) {
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
}
|
|
107
|
-
const senFileName = `${sen.matricule}.json`;
|
|
108
|
-
fs.writeJSONSync(path.join(senateursReorganizedDir, senFileName), sen, {
|
|
109
|
-
spaces: 2,
|
|
110
|
-
});
|
|
35
|
+
await convertDatasetSens(dataDir);
|
|
36
|
+
}
|
|
37
|
+
if (!options["silent"]) {
|
|
38
|
+
console.timeEnd("data transformation time");
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
async function convertDatasetAmeli(dataDir) {
|
|
42
|
+
const dataset = datasets.ameli;
|
|
43
|
+
if (!options["silent"]) {
|
|
44
|
+
console.log(`Converting database ${dataset.database} data into files…`);
|
|
45
|
+
}
|
|
46
|
+
const ameliReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
47
|
+
ensureAndClearDir(ameliReorganizedRootDir);
|
|
48
|
+
for await (const amendement of findAllAmendements()) {
|
|
49
|
+
if (options["verbose"]) {
|
|
50
|
+
console.log(`Converting ${amendement.numero} file…`);
|
|
111
51
|
}
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
52
|
+
const session = String(amendement.session) || UNDEFINED_SESSION;
|
|
53
|
+
const signetDossierLegislatif = amendement.signet_dossier_legislatif ||
|
|
54
|
+
`${amendement.nature_texte}-${amendement.numero_texte}`.toLowerCase();
|
|
55
|
+
const ameliReorganizedDir = path.join(ameliReorganizedRootDir, String(session), signetDossierLegislatif);
|
|
56
|
+
fs.ensureDirSync(ameliReorganizedDir);
|
|
57
|
+
const amendementFileName = `${amendement.numero}.json`;
|
|
58
|
+
fs.writeJSONSync(path.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 });
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
async function convertDatasetDosLeg(dataDir, sessions) {
|
|
62
|
+
const dataset = datasets.dosleg;
|
|
63
|
+
if (!options["silent"]) {
|
|
64
|
+
console.log(`Converting database ${dataset.database} data into files…`);
|
|
65
|
+
}
|
|
66
|
+
const doslegReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
67
|
+
const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER);
|
|
68
|
+
ensureAndClearDir(doslegReorganizedRootDir);
|
|
69
|
+
ensureAndClearDir(dossiersReorganizedDir);
|
|
70
|
+
for await (const loi of findAllLois()) {
|
|
71
|
+
if (options["verbose"]) {
|
|
72
|
+
console.log(`Converting ${loi.signet} file…`);
|
|
118
73
|
}
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
}
|
|
123
|
-
const
|
|
124
|
-
|
|
74
|
+
let loiReorganizedDir = path.join(dossiersReorganizedDir, String(UNDEFINED_SESSION));
|
|
75
|
+
const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups;
|
|
76
|
+
if (signetParts && "session" in signetParts) {
|
|
77
|
+
const { session } = signetParts;
|
|
78
|
+
const formattedSession = formatToFourDigitSession(session);
|
|
79
|
+
loiReorganizedDir = path.join(dossiersReorganizedDir, String(formattedSession));
|
|
125
80
|
}
|
|
81
|
+
fs.ensureDirSync(loiReorganizedDir);
|
|
82
|
+
const loiFileName = `${loi.signet}.json`;
|
|
83
|
+
fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, {
|
|
84
|
+
spaces: 2,
|
|
85
|
+
});
|
|
126
86
|
}
|
|
87
|
+
await convertTexteUrls(dataDir, sessions);
|
|
88
|
+
await convertRapportUrls(dataDir, sessions);
|
|
89
|
+
}
|
|
90
|
+
async function convertDatasetQuestions(dataDir) {
|
|
91
|
+
const dataset = datasets.questions;
|
|
127
92
|
if (!options["silent"]) {
|
|
128
|
-
console.
|
|
93
|
+
console.log(`Converting database ${dataset.database} data into files…`);
|
|
94
|
+
}
|
|
95
|
+
const questionsReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
96
|
+
ensureAndClearDir(questionsReorganizedRootDir);
|
|
97
|
+
for await (const question of findAllQuestions()) {
|
|
98
|
+
if (options["verbose"]) {
|
|
99
|
+
console.log(`Converting ${question.reference} file…`);
|
|
100
|
+
}
|
|
101
|
+
const legislature = question.legislature ? question.legislature : 0;
|
|
102
|
+
const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature));
|
|
103
|
+
fs.ensureDirSync(questionReorganizedDir);
|
|
104
|
+
const questionFileName = `${question.reference}.json`;
|
|
105
|
+
fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
|
|
129
106
|
}
|
|
130
107
|
}
|
|
131
|
-
async function convertTexteUrls(dataDir) {
|
|
108
|
+
async function convertTexteUrls(dataDir, sessions) {
|
|
132
109
|
const textesDir = path.join(dataDir, TEXTE_FOLDER);
|
|
133
110
|
fs.ensureDirSync(textesDir);
|
|
134
|
-
const originalTextesDir = path.join(textesDir,
|
|
135
|
-
for await (const texte of findSenatTexteUrls(
|
|
111
|
+
const originalTextesDir = path.join(textesDir, DATA_ORIGINAL_FOLDER);
|
|
112
|
+
for await (const texte of findSenatTexteUrls(sessions)) {
|
|
136
113
|
const texteName = path.parse(texte.url).name;
|
|
137
114
|
const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName);
|
|
138
115
|
fs.ensureDirSync(texteDir);
|
|
@@ -151,10 +128,10 @@ async function convertTexteUrls(dataDir) {
|
|
|
151
128
|
});
|
|
152
129
|
}
|
|
153
130
|
}
|
|
154
|
-
async function convertRapportUrls(dataDir) {
|
|
131
|
+
async function convertRapportUrls(dataDir, sessions) {
|
|
155
132
|
const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
|
|
156
133
|
fs.ensureDirSync(rapportsDir);
|
|
157
|
-
for await (const rapport of findSenatRapportUrls(
|
|
134
|
+
for await (const rapport of findSenatRapportUrls(sessions)) {
|
|
158
135
|
const parsedRapportUrl = path.parse(rapport.url);
|
|
159
136
|
const rapportName = parsedRapportUrl.name;
|
|
160
137
|
const rapportDir = path.join(rapportsDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName);
|
|
@@ -180,6 +157,43 @@ async function convertRapportUrls(dataDir) {
|
|
|
180
157
|
});
|
|
181
158
|
}
|
|
182
159
|
}
|
|
160
|
+
async function convertDatasetSens(dataDir) {
|
|
161
|
+
const dataset = datasets.sens;
|
|
162
|
+
if (!options["silent"]) {
|
|
163
|
+
console.log(`Converting database ${dataset.database} data into files…`);
|
|
164
|
+
}
|
|
165
|
+
const sensReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
166
|
+
const senateursReorganizedDir = path.join(sensReorganizedRootDir, SENS_SENATEURS_FOLDER);
|
|
167
|
+
const circonscriptionsReorganizedDir = path.join(sensReorganizedRootDir, SENS_CIRCONSCRIPTIONS_FOLDER);
|
|
168
|
+
const organismesReorganizedDir = path.join(sensReorganizedRootDir, SENS_ORGANISMES_FOLDER);
|
|
169
|
+
ensureAndClearDir(sensReorganizedRootDir);
|
|
170
|
+
ensureAndClearDir(senateursReorganizedDir);
|
|
171
|
+
ensureAndClearDir(circonscriptionsReorganizedDir);
|
|
172
|
+
ensureAndClearDir(organismesReorganizedDir);
|
|
173
|
+
for await (const sen of findAllSens()) {
|
|
174
|
+
if (options["verbose"]) {
|
|
175
|
+
console.log(`Converting ${sen.matricule} file…`);
|
|
176
|
+
}
|
|
177
|
+
const senFileName = `${sen.matricule}.json`;
|
|
178
|
+
fs.writeJSONSync(path.join(senateursReorganizedDir, senFileName), sen, {
|
|
179
|
+
spaces: 2,
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
for await (const circonscription of findAllCirconscriptions()) {
|
|
183
|
+
if (options["verbose"]) {
|
|
184
|
+
console.log(`Converting ${circonscription.identifiant} file…`);
|
|
185
|
+
}
|
|
186
|
+
const circonscriptionFileName = `${circonscription.identifiant}.json`;
|
|
187
|
+
fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 });
|
|
188
|
+
}
|
|
189
|
+
for await (const organisme of findAllOrganismes()) {
|
|
190
|
+
if (options["verbose"]) {
|
|
191
|
+
console.log(`Converting ${organisme.code} file…`);
|
|
192
|
+
}
|
|
193
|
+
const organismeFileName = `${organisme.code}.json`;
|
|
194
|
+
fs.writeJSONSync(path.join(organismesReorganizedDir, organismeFileName), organisme, { spaces: 2 });
|
|
195
|
+
}
|
|
196
|
+
}
|
|
183
197
|
convertData()
|
|
184
198
|
.then(() => process.exit(0))
|
|
185
199
|
.catch((error) => {
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
export declare const SIGNET_STRUCTURE_REGEXP: RegExp;
|
|
2
2
|
export declare const AKN_IDENTIFICATION_STRUCTURE_REGEXP: RegExp;
|
|
3
3
|
export declare const AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP: RegExp;
|
|
4
|
-
export declare
|
|
5
|
-
export declare function formatToFourDigitSession(session: string): string;
|
|
4
|
+
export declare function formatToFourDigitSession(session: string): string | 0;
|