@tricoteuses/senat 2.1.2 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/lib/index.d.ts +1 -0
- package/lib/loaders.d.ts +3 -1
- package/lib/loaders.js +20 -13
- package/lib/model/agenda.js +44 -23
- package/lib/scripts/retrieve_agenda.js +26 -8
- package/lib/types/agenda.d.ts +4 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -54,8 +54,8 @@ npm run data:retrieve_documents ../senat-data -- --fromSession 2023 --parseDocum
|
|
|
54
54
|
# Parsing only
|
|
55
55
|
npm run data:parse_textes_lois ../senat-data
|
|
56
56
|
|
|
57
|
-
# Retrieval of agenda from Sénat's website
|
|
58
|
-
npm run data:retrieve_agenda ../senat-data -- --fromSession 2023
|
|
57
|
+
# Retrieval (& parsing) of agenda from Sénat's website
|
|
58
|
+
npm run data:retrieve_agenda ../senat-data -- --fromSession 2023 [--parseAgenda]
|
|
59
59
|
|
|
60
60
|
# Retrieval of sénateurs' pictures from Sénat's website
|
|
61
61
|
npm run data:retrieve_senateurs_photos ../senat-data
|
package/lib/index.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ export type { AmendementResult, } from "./model/ameli";
|
|
|
2
2
|
export type { DossierLegislatifResult, } from "./model/dosleg";
|
|
3
3
|
export type { QuestionResult, } from "./model/questions";
|
|
4
4
|
export type { CirconscriptionResult, OrganismeResult, SenateurResult, } from "./model/sens";
|
|
5
|
+
export type { AgendaEvent } from "./types/agenda";
|
|
5
6
|
export type { Ses, Sub, TxtAmeli } from "./types/ameli";
|
|
6
7
|
export type { Debat, LecAssDeb } from "./types/debats";
|
|
7
8
|
export type { Ass, Aud, Auteur, DateSeance, DecCoc, DenRap, DocAtt, Ecr, EtaLoi, LecAss, LecAssRap, Lecture, Loi, Org, OriTxt, Qua, Rap, RapOrg, Scr, Texte, TypAtt, TypLec, TypLoi, TypTxt, TypUrl, } from "./types/dosleg";
|
package/lib/loaders.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { AmendementResult } from "./model/ameli";
|
|
|
2
2
|
import { DossierLegislatifResult } from "./model/dosleg";
|
|
3
3
|
import { QuestionResult } from "./model/questions";
|
|
4
4
|
import { CirconscriptionResult, OrganismeResult, SenateurResult } from "./model/sens";
|
|
5
|
+
import { AgendaEvent } from "./types/agenda";
|
|
5
6
|
import { FlatTexte } from "./types/texte";
|
|
6
7
|
export { EnabledDatasets } from "./datasets";
|
|
7
8
|
export declare const AGENDA_FOLDER = "agenda";
|
|
@@ -59,7 +60,8 @@ export declare function iterLoadSenatDossiersLegislatifsTexteUrls(dataDir: strin
|
|
|
59
60
|
export declare function iterLoadSenatDossiersLegislatifsRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<RapportMetadata>>;
|
|
60
61
|
export declare function iterLoadSenatDossiersLegislatifsTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifTexteResult>>;
|
|
61
62
|
export declare function loadSenatTexteContent(dataDir: string, textePathFromDataset: string): IterItem<FlatTexte | null>;
|
|
63
|
+
export declare function iterLoadSenatEvenements(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AgendaEvent>>;
|
|
64
|
+
export declare function iterLoadSenatCirconscriptions(dataDir: string, options?: {}): Generator<IterItem<CirconscriptionResult>>;
|
|
62
65
|
export declare function iterLoadSenatOrganismes(dataDir: string, options?: {}): Generator<IterItem<OrganismeResult>>;
|
|
63
66
|
export declare function iterLoadSenatSenateurs(dataDir: string, legislature: number, options?: {}): Generator<IterItem<SenateurResult>>;
|
|
64
|
-
export declare function iterLoadSenatCirconscriptions(dataDir: string, options?: {}): Generator<IterItem<CirconscriptionResult>>;
|
|
65
67
|
export declare function iterLoadSenatQuestions(dataDir: string, legislature: number, options?: {}): Generator<IterItem<QuestionResult>>;
|
package/lib/loaders.js
CHANGED
|
@@ -27,8 +27,8 @@ export function* iterFilePaths(dirPath) {
|
|
|
27
27
|
}
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
|
-
function* iterLoadSenatItems(dataDir,
|
|
31
|
-
let itemsDir = path.join(dataDir,
|
|
30
|
+
function* iterLoadSenatItems(dataDir, dataName, legislatureOrSession, subDir, { log = false } = {}) {
|
|
31
|
+
let itemsDir = path.join(dataDir, dataName);
|
|
32
32
|
if (subDir) {
|
|
33
33
|
itemsDir = path.join(itemsDir, subDir);
|
|
34
34
|
}
|
|
@@ -41,7 +41,7 @@ function* iterLoadSenatItems(dataDir, dataset, legislatureOrSession, subDir, { l
|
|
|
41
41
|
}
|
|
42
42
|
const itemJson = fs.readFileSync(filePath, { encoding: "utf8" });
|
|
43
43
|
const item = JSON.parse(itemJson);
|
|
44
|
-
const filePathFromDataset = filePath.substring(filePath.indexOf(
|
|
44
|
+
const filePathFromDataset = filePath.substring(filePath.indexOf(dataName) + dataName.length);
|
|
45
45
|
yield {
|
|
46
46
|
item,
|
|
47
47
|
filePathFromDataset,
|
|
@@ -50,12 +50,12 @@ function* iterLoadSenatItems(dataDir, dataset, legislatureOrSession, subDir, { l
|
|
|
50
50
|
}
|
|
51
51
|
}
|
|
52
52
|
export function* iterLoadSenatAmendements(dataDir, session, options = {}) {
|
|
53
|
-
for (const amendementItem of iterLoadSenatItems(dataDir, datasets.ameli, session, undefined, options)) {
|
|
53
|
+
for (const amendementItem of iterLoadSenatItems(dataDir, datasets.ameli.database, session, undefined, options)) {
|
|
54
54
|
yield amendementItem;
|
|
55
55
|
}
|
|
56
56
|
}
|
|
57
57
|
export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}) {
|
|
58
|
-
for (const dossierLegislatifItem of iterLoadSenatItems(dataDir, datasets.dosleg, session, DOSLEG_DOSSIERS_FOLDER, options)) {
|
|
58
|
+
for (const dossierLegislatifItem of iterLoadSenatItems(dataDir, datasets.dosleg.database, session, DOSLEG_DOSSIERS_FOLDER, options)) {
|
|
59
59
|
yield dossierLegislatifItem;
|
|
60
60
|
}
|
|
61
61
|
}
|
|
@@ -129,15 +129,27 @@ export function loadSenatTexteContent(dataDir, textePathFromDataset) {
|
|
|
129
129
|
const texteJson = fs.readFileSync(fullTextePath, { encoding: "utf8" });
|
|
130
130
|
return { item: JSON.parse(texteJson) };
|
|
131
131
|
}
|
|
132
|
+
export function* iterLoadSenatEvenements(dataDir, session, options = {}) {
|
|
133
|
+
for (const evenementsItem of iterLoadSenatItems(dataDir, AGENDA_FOLDER, session, DATA_TRANSFORMED_FOLDER, options)) {
|
|
134
|
+
for (const evenement of evenementsItem.item) {
|
|
135
|
+
yield { item: evenement };
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
export function* iterLoadSenatCirconscriptions(dataDir, options = {}) {
|
|
140
|
+
for (const circonscriptionItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_CIRCONSCRIPTIONS_FOLDER, options)) {
|
|
141
|
+
yield circonscriptionItem;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
132
144
|
export function* iterLoadSenatOrganismes(dataDir, options = {}) {
|
|
133
|
-
for (const organismeItem of iterLoadSenatItems(dataDir, datasets.sens, undefined, SENS_ORGANISMES_FOLDER, options)) {
|
|
145
|
+
for (const organismeItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_ORGANISMES_FOLDER, options)) {
|
|
134
146
|
yield organismeItem;
|
|
135
147
|
}
|
|
136
148
|
}
|
|
137
149
|
export function* iterLoadSenatSenateurs(dataDir, legislature, options = {}) {
|
|
138
150
|
const dateDebutLegislatureStr = legislatures.find((legislatureInfo) => legislatureInfo.numero === legislature)?.date_debut;
|
|
139
151
|
const dateDebutLegislature = new Date(dateDebutLegislatureStr);
|
|
140
|
-
for (const senateurItem of iterLoadSenatItems(dataDir, datasets.sens, undefined, SENS_SENATEURS_FOLDER, options)) {
|
|
152
|
+
for (const senateurItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_SENATEURS_FOLDER, options)) {
|
|
141
153
|
const dateFinMandatSenateur = senateurItem.item.mandats_senateur[0]
|
|
142
154
|
?.date_fin
|
|
143
155
|
? new Date(senateurItem.item.mandats_senateur[0]?.date_fin)
|
|
@@ -148,13 +160,8 @@ export function* iterLoadSenatSenateurs(dataDir, legislature, options = {}) {
|
|
|
148
160
|
yield senateurItem;
|
|
149
161
|
}
|
|
150
162
|
}
|
|
151
|
-
export function* iterLoadSenatCirconscriptions(dataDir, options = {}) {
|
|
152
|
-
for (const circonscriptionItem of iterLoadSenatItems(dataDir, datasets.sens, undefined, SENS_CIRCONSCRIPTIONS_FOLDER, options)) {
|
|
153
|
-
yield circonscriptionItem;
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
163
|
export function* iterLoadSenatQuestions(dataDir, legislature, options = {}) {
|
|
157
|
-
for (const questionItem of iterLoadSenatItems(dataDir, datasets.questions, legislature, undefined, options)) {
|
|
164
|
+
for (const questionItem of iterLoadSenatItems(dataDir, datasets.questions.database, legislature, undefined, options)) {
|
|
158
165
|
yield questionItem;
|
|
159
166
|
}
|
|
160
167
|
}
|
package/lib/model/agenda.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { JSDOM } from "jsdom";
|
|
2
2
|
import { DateTime } from "luxon";
|
|
3
|
+
import path from "path";
|
|
3
4
|
function getEventType(eventClasses) {
|
|
4
5
|
const typeClass = [...eventClasses]
|
|
5
6
|
.find(className => className.startsWith("evt-"))
|
|
@@ -23,52 +24,71 @@ function getUrlDossierSenat(lienElements) {
|
|
|
23
24
|
.find(lienElement => lienElement.textContent?.includes("dossier législatif"));
|
|
24
25
|
return urlElement ? urlElement.getAttribute("href") : null;
|
|
25
26
|
}
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
?.replace(
|
|
32
|
-
?.replace(/^(?:
|
|
27
|
+
/**
|
|
28
|
+
* Normalize time string to become a simple start time ("H'h'mm") or a duration ("'de 'H'h'mm' à 'H'h'mm").
|
|
29
|
+
*/
|
|
30
|
+
function normalizeTime(timeStr) {
|
|
31
|
+
return timeStr
|
|
32
|
+
?.replace(/^À l'issue de l'espace réservé .* et au plus tard\s/i, "") // Must be processed first
|
|
33
|
+
?.replace(/^(?:le )?matin/i, "10h00") // We chose "matin" to mean 10h00
|
|
34
|
+
?.replace(/^(?:l')?après-midi/i, "16h00") // We chose "après-midi" to mean 16h00
|
|
35
|
+
?.replace(/^(?:le )?soir/i, "20h00") // We chose "soir" to mean 20h00
|
|
36
|
+
?.replace(/^(?:la )?nuit/i, "22h00") // We chose "nuit" to mean 22h00
|
|
33
37
|
?.replace(/^à\s/ig, "")
|
|
34
38
|
?.replace(/heures/ig, "h00")
|
|
35
39
|
?.replace(/\set.*/i, "")
|
|
36
40
|
?.replace(/,.*/, "")
|
|
37
41
|
?.replace(/\s\(hors hémicycle\)/i, "")
|
|
38
42
|
?.replace(/\s*h\s*/ig, "h");
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
+
}
|
|
44
|
+
function getStartAndEndTimes(timeStr) {
|
|
45
|
+
const normalizedTime = normalizeTime(timeStr);
|
|
46
|
+
const timeMatches = normalizedTime
|
|
47
|
+
?.match(/^de (?<startTime>\d{2}h\d{2}) à (?<endTime>\d{2}h\d{2})$/i);
|
|
48
|
+
if (timeMatches?.groups) {
|
|
49
|
+
const { startTime, endTime } = timeMatches.groups;
|
|
50
|
+
return {
|
|
51
|
+
startTime: startTime ? DateTime.fromFormat(startTime, "H'h'mm").toISOTime() : null,
|
|
52
|
+
endTime: endTime ? DateTime.fromFormat(endTime, "H'h'mm").toISOTime() : null,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
43
55
|
return {
|
|
44
|
-
|
|
45
|
-
|
|
56
|
+
startTime: normalizedTime ? DateTime.fromFormat(normalizedTime, "H'h'mm").toISOTime() : null,
|
|
57
|
+
endTime: null,
|
|
46
58
|
};
|
|
47
59
|
}
|
|
48
|
-
function transformAgenda(document) {
|
|
60
|
+
function transformAgenda(document, fileName) {
|
|
49
61
|
const agendaEvents = [];
|
|
50
62
|
const eventElements = document.querySelectorAll(".evt");
|
|
51
63
|
for (const eventElement of eventElements) {
|
|
64
|
+
const id = eventElement.previousElementSibling?.getAttribute("name") || null;
|
|
65
|
+
if (!id) {
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
52
68
|
const type = getEventType(eventElement.classList);
|
|
69
|
+
const date = DateTime.fromFormat(fileName, "yyyyMMdd").toFormat("yyyy-MM-dd");
|
|
53
70
|
const timeOriginal = eventElement.querySelector(".time")?.textContent || null;
|
|
54
|
-
const {
|
|
55
|
-
const titre = eventElement.querySelector(".titre")?.textContent || null;
|
|
56
|
-
const organe = eventElement.querySelector(".organe")?.textContent || null;
|
|
57
|
-
const objet = eventElement.querySelector(".objet")?.textContent
|
|
71
|
+
const { startTime, endTime } = getStartAndEndTimes(timeOriginal);
|
|
72
|
+
const titre = eventElement.querySelector(".titre")?.textContent?.trim() || null;
|
|
73
|
+
const organe = eventElement.querySelector(".organe")?.textContent?.trim() || null;
|
|
74
|
+
const objet = eventElement.querySelector(".objet")?.textContent
|
|
75
|
+
?.trim()
|
|
76
|
+
?.replace(/^- /, "")
|
|
77
|
+
|| null;
|
|
58
78
|
const lieu = eventElement.querySelector(".lieu")?.textContent || null;
|
|
59
79
|
const url_dossier_senat = getUrlDossierSenat(eventElement.querySelectorAll(".lien a"));
|
|
60
|
-
const url_video = eventElement.querySelector(".video a")?.getAttribute("href") || null;
|
|
61
80
|
agendaEvents.push({
|
|
81
|
+
id,
|
|
62
82
|
type,
|
|
63
|
-
|
|
64
|
-
|
|
83
|
+
date,
|
|
84
|
+
startTime,
|
|
85
|
+
endTime,
|
|
65
86
|
timeOriginal,
|
|
66
87
|
titre,
|
|
67
88
|
organe,
|
|
68
89
|
objet,
|
|
69
90
|
lieu,
|
|
70
91
|
url_dossier_senat,
|
|
71
|
-
url_video,
|
|
72
92
|
});
|
|
73
93
|
}
|
|
74
94
|
return agendaEvents;
|
|
@@ -76,7 +96,8 @@ function transformAgenda(document) {
|
|
|
76
96
|
export async function parseAgendaFromFile(htmlFilePath) {
|
|
77
97
|
try {
|
|
78
98
|
const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window;
|
|
79
|
-
|
|
99
|
+
const fileName = path.parse(htmlFilePath).name;
|
|
100
|
+
return transformAgenda(document, fileName);
|
|
80
101
|
}
|
|
81
102
|
catch (error) {
|
|
82
103
|
console.error(`Could not parse texte with error ${error}`);
|
|
@@ -3,35 +3,46 @@ import commandLineArgs from "command-line-args";
|
|
|
3
3
|
import fs from "fs-extra";
|
|
4
4
|
import { DateTime } from "luxon";
|
|
5
5
|
import path from "path";
|
|
6
|
-
import { AGENDA_FOLDER, DATA_ORIGINAL_FOLDER } from "../loaders";
|
|
6
|
+
import { AGENDA_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
7
|
+
import { parseAgendaFromFile } from "../model/agenda";
|
|
7
8
|
import { getSessionsFromStart } from "../types/sessions";
|
|
8
9
|
import { commonOptions } from "./shared/cli_helpers";
|
|
9
10
|
import { ensureAndClearDir } from "./shared/util";
|
|
10
11
|
const optionsDefinitions = [
|
|
11
12
|
...commonOptions,
|
|
12
13
|
{
|
|
13
|
-
help: "parse and convert
|
|
14
|
-
name: "
|
|
14
|
+
help: "parse and convert agenda events into JSON",
|
|
15
|
+
name: "parseAgenda",
|
|
15
16
|
type: Boolean,
|
|
16
17
|
},
|
|
17
18
|
];
|
|
18
19
|
const options = commandLineArgs(optionsDefinitions);
|
|
19
20
|
const SENAT_GLOBAL_AGENDA_URL_ROOT = "https://www.senat.fr/aglae/Global";
|
|
20
|
-
async function
|
|
21
|
+
async function retrieveAgendas(dataDir, sessions) {
|
|
21
22
|
const agendaRootDir = path.join(dataDir, AGENDA_FOLDER);
|
|
22
23
|
ensureAndClearDir(agendaRootDir);
|
|
23
24
|
const originalAgendaDir = path.join(agendaRootDir, DATA_ORIGINAL_FOLDER);
|
|
24
25
|
fs.ensureDirSync(originalAgendaDir);
|
|
26
|
+
const transformedAgendaDir = path.join(agendaRootDir, DATA_TRANSFORMED_FOLDER);
|
|
27
|
+
if (options["parseAgenda"]) {
|
|
28
|
+
fs.ensureDirSync(transformedAgendaDir);
|
|
29
|
+
}
|
|
25
30
|
for (const session of sessions) {
|
|
26
31
|
if (!options["silent"]) {
|
|
27
32
|
console.log(`Retrieving Agenda for session ${session}…`);
|
|
28
33
|
}
|
|
29
|
-
const
|
|
30
|
-
fs.ensureDirSync(
|
|
34
|
+
const originalAgendaSessionDir = path.join(originalAgendaDir, `${session}`);
|
|
35
|
+
fs.ensureDirSync(originalAgendaSessionDir);
|
|
36
|
+
const transformedAgendaSessionDir = path.join(transformedAgendaDir, `${session}`);
|
|
37
|
+
if (options["parseAgenda"]) {
|
|
38
|
+
fs.ensureDirSync(transformedAgendaSessionDir);
|
|
39
|
+
}
|
|
31
40
|
const fifteenDaysFromNow = new Date();
|
|
32
41
|
fifteenDaysFromNow.setDate(fifteenDaysFromNow.getDate() + 15);
|
|
33
42
|
for (const date = new Date(session, 0, 1); date <= new Date(session, 11, 31) && date <= fifteenDaysFromNow; date.setDate(date.getDate() + 1)) {
|
|
34
43
|
const agendaName = DateTime.fromJSDate(date).toFormat("ddMMyyyy");
|
|
44
|
+
const agendaFileName = DateTime.fromJSDate(date).toFormat("yyyyMMdd");
|
|
45
|
+
const agendaPath = path.join(originalAgendaSessionDir, agendaFileName);
|
|
35
46
|
try {
|
|
36
47
|
const response = await fetch(`${SENAT_GLOBAL_AGENDA_URL_ROOT}/agl${agendaName}.html`);
|
|
37
48
|
if (!response.ok) {
|
|
@@ -47,11 +58,18 @@ async function retrieveAgenda(dataDir, sessions) {
|
|
|
47
58
|
if (!agendaContent) {
|
|
48
59
|
return;
|
|
49
60
|
}
|
|
50
|
-
fs.writeFileSync(
|
|
61
|
+
fs.writeFileSync(agendaPath, Buffer.from(agendaContent));
|
|
51
62
|
}
|
|
52
63
|
catch (error) {
|
|
53
64
|
console.error(error.message);
|
|
54
65
|
}
|
|
66
|
+
if (options["parseAgenda"]) {
|
|
67
|
+
const parsedAgendaEvents = await parseAgendaFromFile(agendaPath);
|
|
68
|
+
if (!parsedAgendaEvents || parsedAgendaEvents.length === 0) {
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
fs.writeJSONSync(path.join(transformedAgendaSessionDir, `${agendaFileName}.json`), parsedAgendaEvents, { spaces: 2 });
|
|
72
|
+
}
|
|
55
73
|
}
|
|
56
74
|
}
|
|
57
75
|
}
|
|
@@ -60,7 +78,7 @@ async function main() {
|
|
|
60
78
|
assert(dataDir, "Missing argument: data directory");
|
|
61
79
|
const sessions = getSessionsFromStart(options["fromSession"]);
|
|
62
80
|
console.time("agenda processing time");
|
|
63
|
-
await
|
|
81
|
+
await retrieveAgendas(dataDir, sessions);
|
|
64
82
|
if (!options["silent"]) {
|
|
65
83
|
console.timeEnd("agenda processing time");
|
|
66
84
|
}
|
package/lib/types/agenda.d.ts
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
export interface AgendaEvent {
|
|
2
|
+
id: string;
|
|
2
3
|
type: string | null;
|
|
3
|
-
|
|
4
|
-
|
|
4
|
+
date: string | null;
|
|
5
|
+
startTime: string | null;
|
|
6
|
+
endTime: string | null;
|
|
5
7
|
timeOriginal: string | null;
|
|
6
8
|
titre: string | null;
|
|
7
9
|
organe: string | null;
|
|
8
10
|
objet: string | null;
|
|
9
11
|
lieu: string | null;
|
|
10
12
|
url_dossier_senat: string | null;
|
|
11
|
-
url_video: string | null;
|
|
12
13
|
}
|