@tricoteuses/senat 2.10.5 → 2.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/databases.d.ts +1 -28
- package/lib/databases.js +0 -6
- package/lib/datasets.d.ts +6 -0
- package/lib/datasets.js +233 -0
- package/lib/loaders.d.ts +5 -0
- package/lib/loaders.js +14 -9
- package/lib/model/ameli.d.ts +31 -143
- package/lib/model/ameli.js +102 -95
- package/lib/model/commission.d.ts +5 -0
- package/lib/model/commission.js +263 -0
- package/lib/model/debats.d.ts +13 -51
- package/lib/model/documents.d.ts +2 -0
- package/lib/model/documents.js +37 -0
- package/lib/model/dosleg.d.ts +9 -104
- package/lib/model/dosleg.js +76 -108
- package/lib/model/index.d.ts +4 -2
- package/lib/model/index.js +4 -2
- package/lib/model/questions.d.ts +10 -458
- package/lib/model/scrutins.d.ts +3 -0
- package/lib/model/scrutins.js +74 -0
- package/lib/model/{compte_rendu.js → seance.js} +47 -28
- package/lib/model/sens.d.ts +28 -1002
- package/lib/model/sens.js +65 -33
- package/lib/model/util.d.ts +1 -0
- package/lib/model/util.js +19 -1
- package/lib/raw_types/ameli.d.ts +778 -1521
- package/lib/raw_types/ameli.js +5 -345
- package/lib/raw_types/debats.d.ts +163 -306
- package/lib/raw_types/debats.js +5 -84
- package/lib/raw_types/dosleg.d.ts +1349 -2293
- package/lib/raw_types/dosleg.js +5 -550
- package/lib/raw_types/questions.d.ts +374 -519
- package/lib/raw_types/questions.js +5 -84
- package/lib/raw_types/senat.d.ts +11389 -0
- package/lib/raw_types/senat.js +5 -0
- package/lib/raw_types/sens.d.ts +6729 -12571
- package/lib/raw_types/sens.js +5 -2944
- package/lib/raw_types_schemats/ameli.d.ts +2 -2
- package/lib/raw_types_schemats/debats.d.ts +2 -2
- package/lib/raw_types_schemats/dosleg.d.ts +2 -2
- package/lib/raw_types_schemats/questions.d.ts +2 -2
- package/lib/raw_types_schemats/sens.d.ts +2 -2
- package/lib/scripts/convert_data.js +37 -31
- package/lib/scripts/retrieve_cr_commission.d.ts +1 -0
- package/lib/scripts/retrieve_cr_commission.js +291 -0
- package/lib/scripts/{retrieve_comptes_rendus.js → retrieve_cr_seance.js} +1 -1
- package/lib/scripts/retrieve_open_data.js +35 -1
- package/lib/utils/cr_spliting.d.ts +22 -1
- package/lib/utils/cr_spliting.js +273 -12
- package/lib/utils/reunion_grouping.d.ts +3 -0
- package/lib/utils/reunion_grouping.js +1 -1
- package/package.json +12 -11
- /package/lib/model/{compte_rendu.d.ts → seance.d.ts} +0 -0
- /package/lib/scripts/{retrieve_comptes_rendus.d.ts → retrieve_cr_seance.d.ts} +0 -0
|
@@ -2,9 +2,9 @@ import fs from "fs";
|
|
|
2
2
|
import * as cheerio from "cheerio";
|
|
3
3
|
import path from "path";
|
|
4
4
|
import { computeIntervalsBySlot } from "../utils/cr_spliting";
|
|
5
|
-
import { norm } from "./util";
|
|
6
|
-
const asArray = (x) => x == null ? [] : Array.isArray(x) ? x : [x];
|
|
7
|
-
const toInt = (s) => Number.isFinite(Number(s)) ? Number(s) : Number.POSITIVE_INFINITY;
|
|
5
|
+
import { norm, toCRDate } from "./util";
|
|
6
|
+
const asArray = (x) => (x == null ? [] : Array.isArray(x) ? x : [x]);
|
|
7
|
+
const toInt = (s) => (Number.isFinite(Number(s)) ? Number(s) : Number.POSITIVE_INFINITY);
|
|
8
8
|
export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firstSlotOfDay) {
|
|
9
9
|
try {
|
|
10
10
|
const raw = fs.readFileSync(xmlFilePath, "utf8");
|
|
@@ -13,7 +13,7 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
|
|
|
13
13
|
const order = $("body *").toArray();
|
|
14
14
|
const idx = new Map(order.map((el, i) => [el, i]));
|
|
15
15
|
const intervalsAll = computeIntervalsBySlot($, idx, firstSlotOfDay);
|
|
16
|
-
const intervals = intervalsAll.filter(iv => iv.slot === wantedSlot);
|
|
16
|
+
const intervals = intervalsAll.filter((iv) => iv.slot === wantedSlot);
|
|
17
17
|
if (intervals.length === 0) {
|
|
18
18
|
console.warn(`[CRI] no intervals for ${path.basename(xmlFilePath)} [${wantedSlot}]`);
|
|
19
19
|
return null;
|
|
@@ -35,7 +35,8 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
|
|
|
35
35
|
if (!elementInAnyInterval(block, idx, intervals))
|
|
36
36
|
return;
|
|
37
37
|
const $block = $(block);
|
|
38
|
-
$block
|
|
38
|
+
$block
|
|
39
|
+
.find([
|
|
39
40
|
"p[class^='titre_S']",
|
|
40
41
|
"p.mention_titre",
|
|
41
42
|
"p.intitule_titre",
|
|
@@ -45,7 +46,8 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
|
|
|
45
46
|
"p.intitule_article",
|
|
46
47
|
"p.mention_section",
|
|
47
48
|
"p.intitule_section",
|
|
48
|
-
].join(","))
|
|
49
|
+
].join(","))
|
|
50
|
+
.remove();
|
|
49
51
|
const firstP = $block.find("p").first();
|
|
50
52
|
const speakerLabelRaw = firstP.find(".orateur_nom").text() || firstP.find("a.lien_senfic").text() || "";
|
|
51
53
|
const speakerLabel = dedupeSpeaker(speakerLabelRaw);
|
|
@@ -98,7 +100,7 @@ export function parseYYYYMMDD(yyyymmdd) {
|
|
|
98
100
|
}
|
|
99
101
|
export function deriveTitreObjetFromSommaire(sommaire, slot) {
|
|
100
102
|
const items = extractLevel1Items(sommaire);
|
|
101
|
-
const meaningful = items.filter(it => !isBoilerplate(it.label));
|
|
103
|
+
const meaningful = items.filter((it) => !isBoilerplate(it.label));
|
|
102
104
|
if (meaningful.length === 0) {
|
|
103
105
|
return {
|
|
104
106
|
titre: `Séance publique ${slotLabel(slot)}`,
|
|
@@ -106,15 +108,22 @@ export function deriveTitreObjetFromSommaire(sommaire, slot) {
|
|
|
106
108
|
};
|
|
107
109
|
}
|
|
108
110
|
const titre = meaningful[0].label;
|
|
109
|
-
const objet = meaningful
|
|
111
|
+
const objet = meaningful
|
|
112
|
+
.slice(0, 3)
|
|
113
|
+
.map((it) => it.label)
|
|
114
|
+
.join(" ; ");
|
|
110
115
|
return { titre, objet };
|
|
111
116
|
}
|
|
112
117
|
function slotLabel(slot) {
|
|
113
118
|
switch (slot) {
|
|
114
|
-
case "MATIN":
|
|
115
|
-
|
|
116
|
-
case "
|
|
117
|
-
|
|
119
|
+
case "MATIN":
|
|
120
|
+
return "du matin";
|
|
121
|
+
case "APRES-MIDI":
|
|
122
|
+
return "de l’après-midi";
|
|
123
|
+
case "SOIR":
|
|
124
|
+
return "du soir";
|
|
125
|
+
default:
|
|
126
|
+
return "";
|
|
118
127
|
}
|
|
119
128
|
}
|
|
120
129
|
const BOILERPLATE_PATTERNS = [
|
|
@@ -132,18 +141,20 @@ const BOILERPLATE_PATTERNS = [
|
|
|
132
141
|
/ouverture de la séance/i,
|
|
133
142
|
/clo(?:t|̂)ure de la séance/i,
|
|
134
143
|
];
|
|
135
|
-
const isBoilerplate = (label) => !label?.trim() || BOILERPLATE_PATTERNS.some(rx => rx.test(label));
|
|
144
|
+
const isBoilerplate = (label) => !label?.trim() || BOILERPLATE_PATTERNS.some((rx) => rx.test(label));
|
|
136
145
|
function extractLevel1Items(sommaire) {
|
|
137
146
|
const level1 = asArray(sommaire?.sommaire1);
|
|
138
147
|
return level1
|
|
139
|
-
.map(el => ({
|
|
148
|
+
.map((el) => ({
|
|
140
149
|
numero: toInt(el?.valeur_pts_odj),
|
|
141
150
|
label: String(el?.titreStruct?.intitule ?? "").trim(),
|
|
142
151
|
}))
|
|
143
|
-
.filter(it => !!it.label)
|
|
152
|
+
.filter((it) => !!it.label)
|
|
144
153
|
.sort((a, b) => a.numero - b.numero);
|
|
145
154
|
}
|
|
146
|
-
function stripTrailingPunct(s) {
|
|
155
|
+
function stripTrailingPunct(s) {
|
|
156
|
+
return s.replace(/\s*([:,.;])\s*$/u, "").trim();
|
|
157
|
+
}
|
|
147
158
|
function dedupeSpeaker(raw) {
|
|
148
159
|
let s = norm(raw);
|
|
149
160
|
s = stripTrailingPunct(s);
|
|
@@ -158,7 +169,8 @@ function dedupeSpeaker(raw) {
|
|
|
158
169
|
return s.replace(/\.\s*$/, "");
|
|
159
170
|
}
|
|
160
171
|
function decodeHtmlEntities(s) {
|
|
161
|
-
return s
|
|
172
|
+
return s
|
|
173
|
+
.replace(/&#(\d+);/g, (_, d) => String.fromCharCode(parseInt(d, 10)))
|
|
162
174
|
.replace(/&#x([0-9a-fA-F]+);/g, (_, h) => String.fromCharCode(parseInt(h, 16)));
|
|
163
175
|
}
|
|
164
176
|
function fixApostrophes(s) {
|
|
@@ -169,7 +181,9 @@ function fixApostrophes(s) {
|
|
|
169
181
|
out = out.replace(/\s+([,;:.!?])/g, "$1");
|
|
170
182
|
return out;
|
|
171
183
|
}
|
|
172
|
-
function normalizeTitle(text) {
|
|
184
|
+
function normalizeTitle(text) {
|
|
185
|
+
return text.replace(/^PR[ÉE]SIDENCE DE\b/i, "Présidence de ");
|
|
186
|
+
}
|
|
173
187
|
function roleForSpeaker(labelOrQualite) {
|
|
174
188
|
const s = (labelOrQualite || "").toLowerCase();
|
|
175
189
|
if (/^(m\.|mme)?\s*(le|la)\s+pr[ée]sident(e)?\b/.test(s) || /\bpr[ée]sident[e]?\s+de\s+séance\b/.test(s))
|
|
@@ -177,7 +191,7 @@ function roleForSpeaker(labelOrQualite) {
|
|
|
177
191
|
return "";
|
|
178
192
|
}
|
|
179
193
|
function readIntervenantMeta($block) {
|
|
180
|
-
const int = $block.find(
|
|
194
|
+
const int = $block.find("cri\\:intervenant").first();
|
|
181
195
|
if (int.length)
|
|
182
196
|
return { mat: int.attr("mat") || undefined, nom: int.attr("nom") || undefined, qua: int.attr("qua") || undefined };
|
|
183
197
|
const html = $block.html() || "";
|
|
@@ -220,6 +234,7 @@ function extractAndRemoveLeadingQualite($, $block) {
|
|
|
220
234
|
else if (node.type === "text") {
|
|
221
235
|
const t = norm(node.data || "");
|
|
222
236
|
if (!t || /^[:.,;–—-]+$/.test(t)) {
|
|
237
|
+
;
|
|
223
238
|
node.data = "";
|
|
224
239
|
return;
|
|
225
240
|
}
|
|
@@ -230,11 +245,11 @@ function extractAndRemoveLeadingQualite($, $block) {
|
|
|
230
245
|
}
|
|
231
246
|
function sanitizeInterventionHtml($, $block) {
|
|
232
247
|
const $clone = $block.clone();
|
|
233
|
-
$clone.find(
|
|
248
|
+
$clone.find("a[name]").remove();
|
|
234
249
|
$clone.find('div[align="right"]').remove();
|
|
235
|
-
$clone.find(
|
|
236
|
-
$clone.find(
|
|
237
|
-
$clone.find(
|
|
250
|
+
$clone.find("a.link").remove();
|
|
251
|
+
$clone.find("img").remove();
|
|
252
|
+
$clone.find("a#ameli_amendement_cri_phrase, a#ameli_amendement_cra_contenu, a#ameli_amendement_cra_objet").remove();
|
|
238
253
|
$clone.find(".orateur_nom, .orateur_qualite").remove();
|
|
239
254
|
let html = $clone.html() || "";
|
|
240
255
|
html = html.replace(/<!--[\s\S]*?-->/g, "");
|
|
@@ -244,11 +259,14 @@ function extractSommaireForIntervals($, idx, intervals) {
|
|
|
244
259
|
const inIv = (el) => elementInAnyInterval(el, idx, intervals);
|
|
245
260
|
const root = $("body");
|
|
246
261
|
const sommaire = { presidentSeance: { _: "" }, sommaire1: [] };
|
|
247
|
-
// (1) Présidence (tm2) — première ligne dans l’intervalle
|
|
248
|
-
const pres = root
|
|
262
|
+
// (1) Présidence (tm2) — première ligne dans l’intervalle
|
|
263
|
+
const pres = root
|
|
264
|
+
.find("p.tm2")
|
|
265
|
+
.filter((_, el) => inIv(el))
|
|
266
|
+
.first();
|
|
249
267
|
if (pres.length)
|
|
250
268
|
sommaire.presidentSeance = { _: norm(pres.text()) };
|
|
251
|
-
// (2) Paras tm5 présents dans l’intervalle
|
|
269
|
+
// (2) Paras tm5 présents dans l’intervalle
|
|
252
270
|
const paras = [];
|
|
253
271
|
root.find("p.tm5").each((_, el) => {
|
|
254
272
|
if (!inIv(el))
|
|
@@ -259,7 +277,7 @@ function extractSommaireForIntervals($, idx, intervals) {
|
|
|
259
277
|
});
|
|
260
278
|
if (paras.length)
|
|
261
279
|
sommaire.para = paras.length === 1 ? paras[0] : paras;
|
|
262
|
-
// (3) Items de 1er niveau (tm3) présents dans l’intervalle
|
|
280
|
+
// (3) Items de 1er niveau (tm3) présents dans l’intervalle
|
|
263
281
|
const items = [];
|
|
264
282
|
root.find("p.tm3").each((_, el) => {
|
|
265
283
|
if (!inIv(el))
|
|
@@ -297,6 +315,7 @@ function extractMetadonnees($, filePath) {
|
|
|
297
315
|
if (m)
|
|
298
316
|
dateSeance = `${m[1]}-${m[2]}-${m[3]}`;
|
|
299
317
|
}
|
|
318
|
+
dateSeance = toCRDate(dateSeance, null);
|
|
300
319
|
return {
|
|
301
320
|
dateSeance,
|
|
302
321
|
dateSeanceJour: dateSeance,
|
|
@@ -311,7 +330,7 @@ function extractMetadonnees($, filePath) {
|
|
|
311
330
|
diffusion: "",
|
|
312
331
|
version: "1.0",
|
|
313
332
|
environnement: "",
|
|
314
|
-
heureGeneration: new Date()
|
|
333
|
+
heureGeneration: new Date(),
|
|
315
334
|
};
|
|
316
335
|
}
|
|
317
336
|
function elementInAnyInterval(el, idx, intervals) {
|