@tricoteuses/senat 2.10.5 → 2.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/databases.d.ts +1 -28
- package/lib/databases.js +0 -6
- package/lib/datasets.d.ts +6 -0
- package/lib/datasets.js +233 -0
- package/lib/loaders.d.ts +5 -0
- package/lib/loaders.js +14 -9
- package/lib/model/ameli.d.ts +31 -143
- package/lib/model/ameli.js +102 -95
- package/lib/model/commission.d.ts +5 -0
- package/lib/model/commission.js +263 -0
- package/lib/model/debats.d.ts +13 -51
- package/lib/model/documents.d.ts +2 -0
- package/lib/model/documents.js +37 -0
- package/lib/model/dosleg.d.ts +9 -104
- package/lib/model/dosleg.js +76 -108
- package/lib/model/index.d.ts +4 -2
- package/lib/model/index.js +4 -2
- package/lib/model/questions.d.ts +10 -458
- package/lib/model/scrutins.d.ts +3 -0
- package/lib/model/scrutins.js +74 -0
- package/lib/model/{compte_rendu.js → seance.js} +47 -28
- package/lib/model/sens.d.ts +28 -1002
- package/lib/model/sens.js +65 -33
- package/lib/model/util.d.ts +1 -0
- package/lib/model/util.js +19 -1
- package/lib/raw_types/ameli.d.ts +778 -1521
- package/lib/raw_types/ameli.js +5 -345
- package/lib/raw_types/debats.d.ts +163 -306
- package/lib/raw_types/debats.js +5 -84
- package/lib/raw_types/dosleg.d.ts +1349 -2293
- package/lib/raw_types/dosleg.js +5 -550
- package/lib/raw_types/questions.d.ts +374 -519
- package/lib/raw_types/questions.js +5 -84
- package/lib/raw_types/senat.d.ts +11389 -0
- package/lib/raw_types/senat.js +5 -0
- package/lib/raw_types/sens.d.ts +6729 -12571
- package/lib/raw_types/sens.js +5 -2944
- package/lib/raw_types_schemats/ameli.d.ts +2 -2
- package/lib/raw_types_schemats/debats.d.ts +2 -2
- package/lib/raw_types_schemats/dosleg.d.ts +2 -2
- package/lib/raw_types_schemats/questions.d.ts +2 -2
- package/lib/raw_types_schemats/sens.d.ts +2 -2
- package/lib/scripts/convert_data.js +37 -31
- package/lib/scripts/retrieve_cr_commission.d.ts +1 -0
- package/lib/scripts/retrieve_cr_commission.js +291 -0
- package/lib/scripts/{retrieve_comptes_rendus.js → retrieve_cr_seance.js} +1 -1
- package/lib/scripts/retrieve_open_data.js +35 -1
- package/lib/utils/cr_spliting.d.ts +22 -1
- package/lib/utils/cr_spliting.js +273 -12
- package/lib/utils/reunion_grouping.d.ts +3 -0
- package/lib/utils/reunion_grouping.js +1 -1
- package/package.json +12 -11
- /package/lib/model/{compte_rendu.d.ts → seance.d.ts} +0 -0
- /package/lib/scripts/{retrieve_comptes_rendus.d.ts → retrieve_cr_seance.d.ts} +0 -0
package/lib/model/ameli.js
CHANGED
|
@@ -1,128 +1,135 @@
|
|
|
1
1
|
import { jsonArrayFrom } from "kysely/helpers/postgres";
|
|
2
|
-
import {
|
|
2
|
+
import { dbSenat } from "../databases";
|
|
3
3
|
import { concat, toDateString } from "./util";
|
|
4
4
|
function auteurs(amendementId) {
|
|
5
|
-
return jsonArrayFrom(
|
|
6
|
-
.selectFrom("amdsen")
|
|
7
|
-
.leftJoin("sen_ameli", "amdsen.senid", "sen_ameli.entid")
|
|
8
|
-
.leftJoin("grppol_ameli", "amdsen.grpid", "grppol_ameli.entid")
|
|
9
|
-
.where("amdsen.amdid", "=", amendementId)
|
|
5
|
+
return jsonArrayFrom(dbSenat
|
|
6
|
+
.selectFrom("ameli.amdsen")
|
|
7
|
+
.leftJoin("ameli.sen_ameli", "ameli.amdsen.senid", "ameli.sen_ameli.entid")
|
|
8
|
+
.leftJoin("ameli.grppol_ameli", "ameli.amdsen.grpid", "ameli.grppol_ameli.entid")
|
|
9
|
+
.where("ameli.amdsen.amdid", "=", amendementId)
|
|
10
10
|
.select([
|
|
11
|
-
"amdsen.prenomuse as prenom",
|
|
12
|
-
"amdsen.hom as homonyme",
|
|
13
|
-
"amdsen.nomuse as nom",
|
|
14
|
-
"amdsen.qua as qualite",
|
|
15
|
-
"amdsen.rng as rang",
|
|
16
|
-
"sen_ameli.mat as matricule",
|
|
17
|
-
"amdsen.grpid as groupe_politique_id",
|
|
18
|
-
"grppol_ameli.cod as group_politique_code",
|
|
19
|
-
"grppol_ameli.libcou as groupe_politique_libelle_court",
|
|
20
|
-
"grppol_ameli.lilcou as groupe_politique_libelle",
|
|
11
|
+
"ameli.amdsen.prenomuse as prenom",
|
|
12
|
+
"ameli.amdsen.hom as homonyme",
|
|
13
|
+
"ameli.amdsen.nomuse as nom",
|
|
14
|
+
"ameli.amdsen.qua as qualite",
|
|
15
|
+
"ameli.amdsen.rng as rang",
|
|
16
|
+
"ameli.sen_ameli.mat as matricule",
|
|
17
|
+
"ameli.amdsen.grpid as groupe_politique_id",
|
|
18
|
+
"ameli.grppol_ameli.cod as group_politique_code",
|
|
19
|
+
"ameli.grppol_ameli.libcou as groupe_politique_libelle_court",
|
|
20
|
+
"ameli.grppol_ameli.lilcou as groupe_politique_libelle",
|
|
21
21
|
])
|
|
22
|
-
.orderBy("amdsen.rng asc"));
|
|
22
|
+
.orderBy("ameli.amdsen.rng asc"));
|
|
23
23
|
}
|
|
24
|
-
const
|
|
25
|
-
.selectFrom("amd")
|
|
26
|
-
.leftJoin("sub", "amd.subid", "sub.id")
|
|
27
|
-
.leftJoin("typsub", "sub.typid", "typsub.id")
|
|
28
|
-
.leftJoin("typrect", "amd.typrectid", "typrect.id")
|
|
29
|
-
.leftJoin("txt_ameli", "amd.txtid", "txt_ameli.id")
|
|
30
|
-
.leftJoin("etatxt", "txt_ameli.txtetaid", "etatxt.id")
|
|
31
|
-
.leftJoin("ses", "txt_ameli.sesdepid", "ses.id")
|
|
32
|
-
.leftJoin("typses", "typses.id", "ses.typid")
|
|
33
|
-
.leftJoin("nat", "txt_ameli.natid", "nat.id")
|
|
34
|
-
.leftJoin("lec_ameli", "txt_ameli.lecid", "lec_ameli.id")
|
|
35
|
-
.leftJoin("mot", "amd.motid", "mot.id")
|
|
36
|
-
.leftJoin("avicom", "amd.avcid", "avicom.id")
|
|
37
|
-
.leftJoin("avigvt", "amd.avgid", "avigvt.id")
|
|
38
|
-
.leftJoin("sor", "amd.sorid", "sor.id")
|
|
39
|
-
.leftJoin("irr", "amd.irrid", "irr.id")
|
|
40
|
-
.leftJoin("grppol_ameli", "amd.nomentid", "grppol_ameli.entid")
|
|
41
|
-
.leftJoin("com_ameli", "amd.nomentid", "com_ameli.entid")
|
|
42
|
-
.leftJoin("cab", "amd.nomentid", "cab.entid")
|
|
24
|
+
const findAllAmendementsQuery = dbSenat
|
|
25
|
+
.selectFrom("ameli.amd")
|
|
26
|
+
.leftJoin("ameli.sub", "ameli.amd.subid", "ameli.sub.id")
|
|
27
|
+
.leftJoin("ameli.typsub", "ameli.sub.typid", "ameli.typsub.id")
|
|
28
|
+
.leftJoin("ameli.typrect", "ameli.amd.typrectid", "ameli.typrect.id")
|
|
29
|
+
.leftJoin("ameli.txt_ameli", "ameli.amd.txtid", "ameli.txt_ameli.id")
|
|
30
|
+
.leftJoin("ameli.etatxt", "ameli.txt_ameli.txtetaid", "ameli.etatxt.id")
|
|
31
|
+
.leftJoin("ameli.ses", "ameli.txt_ameli.sesdepid", "ameli.ses.id")
|
|
32
|
+
.leftJoin("ameli.typses", "ameli.typses.id", "ameli.ses.typid")
|
|
33
|
+
.leftJoin("ameli.nat", "ameli.txt_ameli.natid", "ameli.nat.id")
|
|
34
|
+
.leftJoin("ameli.lec_ameli", "ameli.txt_ameli.lecid", "ameli.lec_ameli.id")
|
|
35
|
+
.leftJoin("ameli.mot", "ameli.amd.motid", "ameli.mot.id")
|
|
36
|
+
.leftJoin("ameli.avicom", "ameli.amd.avcid", "ameli.avicom.id")
|
|
37
|
+
.leftJoin("ameli.avigvt", "ameli.amd.avgid", "ameli.avigvt.id")
|
|
38
|
+
.leftJoin("ameli.sor", "ameli.amd.sorid", "ameli.sor.id")
|
|
39
|
+
.leftJoin("ameli.irr", "ameli.amd.irrid", "ameli.irr.id")
|
|
40
|
+
.leftJoin("ameli.grppol_ameli", "ameli.amd.nomentid", "ameli.grppol_ameli.entid")
|
|
41
|
+
.leftJoin("ameli.com_ameli", "ameli.amd.nomentid", "ameli.com_ameli.entid")
|
|
42
|
+
.leftJoin("ameli.cab", "ameli.amd.nomentid", "ameli.cab.entid")
|
|
43
|
+
.leftJoin("dosleg.amescr", "ameli.amd.num", "dosleg.amescr.amescrnum")
|
|
43
44
|
.select(({ eb, ref, val }) => [
|
|
44
|
-
"ses.ann as session",
|
|
45
|
-
"ses.lil as session_libelle",
|
|
46
|
-
"typses.lib as type_session",
|
|
47
|
-
"txt_ameli.doslegsignet as signet_dossier_legislatif",
|
|
48
|
-
"nat.libcourt as nature_texte",
|
|
49
|
-
"nat.lib as nature_texte_libelle",
|
|
50
|
-
"txt_ameli.numabs as numero_texte",
|
|
51
|
-
"txt_ameli.numado as numero_adoption_texte",
|
|
52
|
-
"txt_ameli.int as intitule_texte",
|
|
53
|
-
"etatxt.lic as etat_texte",
|
|
54
|
-
"etatxt.lib as etat_texte_libelle",
|
|
55
|
-
"etatxt.txttyp as type_texte",
|
|
56
|
-
"lec_ameli.lib as lecture",
|
|
45
|
+
"ameli.ses.ann as session",
|
|
46
|
+
"ameli.ses.lil as session_libelle",
|
|
47
|
+
"ameli.typses.lib as type_session",
|
|
48
|
+
"ameli.txt_ameli.doslegsignet as signet_dossier_legislatif",
|
|
49
|
+
"ameli.nat.libcourt as nature_texte",
|
|
50
|
+
"ameli.nat.lib as nature_texte_libelle",
|
|
51
|
+
"ameli.txt_ameli.numabs as numero_texte",
|
|
52
|
+
"ameli.txt_ameli.numado as numero_adoption_texte",
|
|
53
|
+
"ameli.txt_ameli.int as intitule_texte",
|
|
54
|
+
"ameli.etatxt.lic as etat_texte",
|
|
55
|
+
"ameli.etatxt.lib as etat_texte_libelle",
|
|
56
|
+
"ameli.etatxt.txttyp as type_texte",
|
|
57
|
+
"ameli.lec_ameli.lib as lecture",
|
|
57
58
|
eb
|
|
58
59
|
.case()
|
|
59
|
-
.when("amd.typ", "=", "A")
|
|
60
|
+
.when("ameli.amd.typ", "=", "A")
|
|
60
61
|
.then(val("Amendement"))
|
|
61
|
-
.when("amd.typ", "=", "M")
|
|
62
|
+
.when("ameli.amd.typ", "=", "M")
|
|
62
63
|
.then(val("Motion"))
|
|
63
|
-
.when("amd.typ", "=", "S")
|
|
64
|
+
.when("ameli.amd.typ", "=", "S")
|
|
64
65
|
.then(val("Sous-amendement"))
|
|
65
66
|
.else("")
|
|
66
67
|
.end()
|
|
67
68
|
.as("nature"),
|
|
68
|
-
"amd.id as id",
|
|
69
|
-
"amd.amdperid as parent_id",
|
|
70
|
-
"amd.amdrendusim as rendu_similaire_id",
|
|
71
|
-
"amd.ideid as identique_id",
|
|
72
|
-
"amd.discomid as discussion_commune_id",
|
|
73
|
-
"amd.num as numero",
|
|
74
|
-
"amd.numabs as numero_absolu",
|
|
75
|
-
"amd.ord as ordre",
|
|
76
|
-
"amd.accgou as accepte_gouvernement",
|
|
77
|
-
"
|
|
78
|
-
"sub.
|
|
79
|
-
"sub.
|
|
80
|
-
"sub.
|
|
81
|
-
"sub.
|
|
82
|
-
"sub.
|
|
83
|
-
"sub.
|
|
84
|
-
"sub.
|
|
85
|
-
"
|
|
86
|
-
"
|
|
87
|
-
"amd.
|
|
88
|
-
"amd.
|
|
89
|
-
"amd.
|
|
90
|
-
|
|
91
|
-
"amd.
|
|
92
|
-
"amd.
|
|
93
|
-
"
|
|
94
|
-
"
|
|
69
|
+
"ameli.amd.id as id",
|
|
70
|
+
"ameli.amd.amdperid as parent_id",
|
|
71
|
+
"ameli.amd.amdrendusim as rendu_similaire_id",
|
|
72
|
+
"ameli.amd.ideid as identique_id",
|
|
73
|
+
"ameli.amd.discomid as discussion_commune_id",
|
|
74
|
+
"ameli.amd.num as numero",
|
|
75
|
+
"ameli.amd.numabs as numero_absolu",
|
|
76
|
+
"ameli.amd.ord as ordre",
|
|
77
|
+
"ameli.amd.accgou as accepte_gouvernement",
|
|
78
|
+
"dosleg.amescr.scrnum as scrutin_id",
|
|
79
|
+
"ameli.sub.lib as subdivision_libelle",
|
|
80
|
+
"ameli.sub.lic as subdivision_libelle_court",
|
|
81
|
+
"ameli.sub.pos as subdivision_position_texte",
|
|
82
|
+
"ameli.sub.posder as subdivision_position_discussion",
|
|
83
|
+
"ameli.sub.merid as subdivision_mere_id",
|
|
84
|
+
"ameli.sub.sig as subdivision_signet",
|
|
85
|
+
"ameli.sub.comdelid as subdivision_commission_id",
|
|
86
|
+
"ameli.sub.dupl as subdivision_dupliquee",
|
|
87
|
+
"ameli.typsub.lib as subdivision_type",
|
|
88
|
+
"ameli.amd.alinea as alinea",
|
|
89
|
+
"ameli.amd.commentprobleme as commentaire_probleme",
|
|
90
|
+
"ameli.amd.obs as observations",
|
|
91
|
+
"ameli.amd.mot as observations_additionnelles",
|
|
92
|
+
toDateString(ref("ameli.amd.datdep")).as("date_depot"),
|
|
93
|
+
"ameli.amd.dis as dispositif",
|
|
94
|
+
"ameli.amd.obj as objet",
|
|
95
|
+
"ameli.typrect.lib as type_rectification",
|
|
96
|
+
"ameli.mot.lib as motion_libelle",
|
|
95
97
|
eb
|
|
96
98
|
.case()
|
|
97
|
-
.when("amd.etaid", "=", 7)
|
|
99
|
+
.when("ameli.amd.etaid", "=", 7)
|
|
98
100
|
.then(val("Diffusé"))
|
|
99
|
-
.when("amd.etaid", "=", 8)
|
|
101
|
+
.when("ameli.amd.etaid", "=", 8)
|
|
100
102
|
.then(val("Retiré avant réunion ou séance"))
|
|
101
|
-
.when("amd.etaid", "=", 9)
|
|
103
|
+
.when("ameli.amd.etaid", "=", 9)
|
|
102
104
|
.then(val("Examiné en commission ou séance"))
|
|
103
|
-
.when("amd.etaid", "=", 10)
|
|
105
|
+
.when("ameli.amd.etaid", "=", 10)
|
|
104
106
|
.then(val("Irrecevable"))
|
|
105
|
-
.when("amd.etaid", "=", 11)
|
|
107
|
+
.when("ameli.amd.etaid", "=", 11)
|
|
106
108
|
.then(val("Irrecevable"))
|
|
107
109
|
.else("")
|
|
108
110
|
.end()
|
|
109
111
|
.as("etat"),
|
|
110
|
-
"avicom.lib as avis_commission",
|
|
111
|
-
"avigvt.lib as avis_gouvernement",
|
|
112
|
-
eb.fn.coalesce("sor.lib", "irr.libirr").as("sort"),
|
|
113
|
-
"amd.rev as revision",
|
|
114
|
-
concat(val("https://www.senat.fr/amendements/"), ref("ses.lil"), val("/"), ref("txt_ameli.numabs"), val("/Amdt_"), ref("amd.numabs"), val(".html")).as("url"),
|
|
115
|
-
"grppol_ameli.lilcou as au_nom_de_groupe_politique",
|
|
116
|
-
"com_ameli.lil as au_nom_de_commission",
|
|
112
|
+
"ameli.avicom.lib as avis_commission",
|
|
113
|
+
"ameli.avigvt.lib as avis_gouvernement",
|
|
114
|
+
eb.fn.coalesce("ameli.sor.lib", "ameli.irr.libirr").as("sort"),
|
|
115
|
+
"ameli.amd.rev as revision",
|
|
116
|
+
concat(val("https://www.senat.fr/amendements/"), ref("ameli.ses.lil"), val("/"), ref("ameli.txt_ameli.numabs"), val("/Amdt_"), ref("ameli.amd.numabs"), val(".html")).as("url"),
|
|
117
|
+
"ameli.grppol_ameli.lilcou as au_nom_de_groupe_politique",
|
|
118
|
+
"ameli.com_ameli.lil as au_nom_de_commission",
|
|
117
119
|
eb
|
|
118
120
|
.case()
|
|
119
|
-
.when("cab.entid", "is not", null)
|
|
121
|
+
.when("ameli.cab.entid", "is not", null)
|
|
120
122
|
.then(true)
|
|
121
123
|
.else(false)
|
|
122
124
|
.end()
|
|
123
125
|
.as("auteur_est_gouvernement"),
|
|
124
|
-
auteurs(ref("amd.id")).as("auteurs")
|
|
126
|
+
auteurs(ref("ameli.amd.id")).as("auteurs")
|
|
125
127
|
]);
|
|
126
|
-
export function
|
|
127
|
-
|
|
128
|
+
export function findAllAmendements(fromSession) {
|
|
129
|
+
if (fromSession !== undefined) {
|
|
130
|
+
return findAllAmendementsQuery
|
|
131
|
+
.where("ameli.ses.ann", ">=", fromSession)
|
|
132
|
+
.stream();
|
|
133
|
+
}
|
|
134
|
+
return findAllAmendementsQuery.stream();
|
|
128
135
|
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import * as cheerio from "cheerio";
|
|
2
|
+
import { CompteRendu } from "../types/compte_rendu";
|
|
3
|
+
import { GroupedReunion } from "../types/agenda";
|
|
4
|
+
export declare function getRemainingTextAfterSpeakerHeader($: cheerio.CheerioAPI, $p: cheerio.Cheerio<any>): string;
|
|
5
|
+
export declare function parseCommissionCRFromFile(htmlFilePath: string, best: GroupedReunion): CompteRendu | null;
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
import * as cheerio from "cheerio";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
import { norm, toCRDate } from "./util";
|
|
5
|
+
const PARA_h3_SEL = "p.sh_justify, p.sh_center, p.sh_marge, p[align], li, h3";
|
|
6
|
+
function findDayRoot($, targetISO) {
|
|
7
|
+
let $root = $();
|
|
8
|
+
$("h2").each((_, el) => {
|
|
9
|
+
const txt = norm($(el).text());
|
|
10
|
+
const m = txt.match(/(?:Lundi|Mardi|Mercredi|Jeudi|Vendredi|Samedi|Dimanche)\s+(.+)$/i);
|
|
11
|
+
const iso = m ? frDateToISO(m[1]) : undefined;
|
|
12
|
+
if (iso === targetISO && $root.length === 0)
|
|
13
|
+
$root = $(el);
|
|
14
|
+
});
|
|
15
|
+
return $root;
|
|
16
|
+
}
|
|
17
|
+
function normalizeSpaces(s) {
|
|
18
|
+
return s.replace(/[\u00A0\u202F\u2009]/g, " ");
|
|
19
|
+
}
|
|
20
|
+
function stripIntroPunct(s) {
|
|
21
|
+
return s.replace(/^[\s]*[.:;]?\s*(?:[–—-]\s*)+/u, "");
|
|
22
|
+
}
|
|
23
|
+
function collectLeadingHeaderStrongEls($, $clone) {
|
|
24
|
+
const els = [];
|
|
25
|
+
const nodes = $clone.contents().toArray();
|
|
26
|
+
for (const node of nodes) {
|
|
27
|
+
if (node.type === "text") {
|
|
28
|
+
if (norm(node.data || ""))
|
|
29
|
+
break;
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
if (node.type === "tag") {
|
|
33
|
+
const $n = $(node);
|
|
34
|
+
if ($n.is("strong, b")) {
|
|
35
|
+
els.push(node);
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
if ($n.is("a") && $n.children("strong, b").length) {
|
|
39
|
+
$n.children("strong, b").each((_, el) => {
|
|
40
|
+
els.push($(el));
|
|
41
|
+
});
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return els;
|
|
48
|
+
}
|
|
49
|
+
// Remove orateur's name from text and clean intro punct
|
|
50
|
+
export function getRemainingTextAfterSpeakerHeader($, $p) {
|
|
51
|
+
const $clone = $p.clone();
|
|
52
|
+
// 1) Remove <strong> at start
|
|
53
|
+
const headerStrongEls = collectLeadingHeaderStrongEls($, $clone);
|
|
54
|
+
for (const el of headerStrongEls)
|
|
55
|
+
$(el).remove();
|
|
56
|
+
// 2) normalize + clean intro punct
|
|
57
|
+
let remainingHtml = $clone.html() || "";
|
|
58
|
+
remainingHtml = normalizeSpaces(cheerio.load(remainingHtml).text());
|
|
59
|
+
remainingHtml = stripIntroPunct(remainingHtml);
|
|
60
|
+
const remainingText = norm(remainingHtml || "");
|
|
61
|
+
return remainingText;
|
|
62
|
+
}
|
|
63
|
+
function buildPointsFromParagraphs($, paras) {
|
|
64
|
+
const points = [];
|
|
65
|
+
let ordreAbsoluSeance = 0;
|
|
66
|
+
const normSpeaker = (s) => s
|
|
67
|
+
.normalize("NFKC")
|
|
68
|
+
.replace(/\s+/g, " ")
|
|
69
|
+
.replace(/[:\.]\s*$/, "")
|
|
70
|
+
.trim();
|
|
71
|
+
const normQual = (s) => s
|
|
72
|
+
.normalize("NFKC")
|
|
73
|
+
.replace(/\s+/g, " ")
|
|
74
|
+
.replace(/^\s*,\s*|\s+$/g, "")
|
|
75
|
+
.replace(/[\s\u00A0]*[.,;:–—-]+$/u, "")
|
|
76
|
+
.trim();
|
|
77
|
+
let currentOrateur = null;
|
|
78
|
+
let currentQualite = "";
|
|
79
|
+
let currentTexte = "";
|
|
80
|
+
function isPresidentQual(qual) {
|
|
81
|
+
return /\bprésident(e)?\b/i.test(qual);
|
|
82
|
+
}
|
|
83
|
+
// Flush the buffered speaker’s text into points[] if any.
|
|
84
|
+
function flush() {
|
|
85
|
+
if (!currentOrateur || !currentTexte.trim())
|
|
86
|
+
return;
|
|
87
|
+
ordreAbsoluSeance++;
|
|
88
|
+
points.push({
|
|
89
|
+
code_grammaire: "PAROLE_GENERIQUE",
|
|
90
|
+
roledebat: isPresidentQual(currentQualite) ? "président" : "",
|
|
91
|
+
ordre_absolu_seance: String(ordreAbsoluSeance),
|
|
92
|
+
orateurs: { orateur: { nom: currentOrateur, id: "", qualite: currentQualite || "" } },
|
|
93
|
+
texte: { _: currentTexte.trim() },
|
|
94
|
+
});
|
|
95
|
+
currentOrateur = null;
|
|
96
|
+
currentQualite = "";
|
|
97
|
+
currentTexte = "";
|
|
98
|
+
}
|
|
99
|
+
function addPoint(payload) {
|
|
100
|
+
ordreAbsoluSeance++;
|
|
101
|
+
points.push({ ...payload, ordre_absolu_seance: String(ordreAbsoluSeance) });
|
|
102
|
+
}
|
|
103
|
+
for (const $p of paras) {
|
|
104
|
+
if ($p.closest("table").length)
|
|
105
|
+
continue;
|
|
106
|
+
const tagName = ($p.prop("tagName") || "").toString().toLowerCase();
|
|
107
|
+
const rawText = ($p.text() || "").replace(/\u00a0/g, " ").trim();
|
|
108
|
+
const text = norm(rawText);
|
|
109
|
+
if (!text || text.length <= 3)
|
|
110
|
+
continue;
|
|
111
|
+
const html = ($p.html() || "").trim();
|
|
112
|
+
const italicSpans = $p.find("i, em, span[style*='italic']");
|
|
113
|
+
const firstItalicOuter = italicSpans.length ? $(italicSpans[0]).prop("outerHTML") || "" : "";
|
|
114
|
+
const htmlBeforeFirstItalic = firstItalicOuter ? html.split(firstItalicOuter)[0].trim() : "";
|
|
115
|
+
const isPureItalic = italicSpans.length > 0 && italicSpans.length === $p.find("span,i,em").length && htmlBeforeFirstItalic === "";
|
|
116
|
+
if (tagName === "h3") {
|
|
117
|
+
flush();
|
|
118
|
+
addPoint({
|
|
119
|
+
code_style: "Titre",
|
|
120
|
+
code_grammaire: "TITRE_TEXTE_DISCUSSION",
|
|
121
|
+
texte: { _: text },
|
|
122
|
+
});
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
const boldSpans = $p.find("strong, b");
|
|
126
|
+
const joinedBold = norm(boldSpans
|
|
127
|
+
.map((_, el) => $(el).text() || "")
|
|
128
|
+
.get()
|
|
129
|
+
.join(""));
|
|
130
|
+
const [namePartRaw, qualPartRaw] = joinedBold.split(/\s*,\s+/, 2);
|
|
131
|
+
const namePart = namePartRaw ? normSpeaker(namePartRaw) : "";
|
|
132
|
+
const qualPart = qualPartRaw ? normQual(qualPartRaw) : "";
|
|
133
|
+
const looksLikeName = namePart.length > 3 && /^(M\.|Mme)[\s\u00A0\u202F]+/i.test(namePart);
|
|
134
|
+
const startsWithName = namePart && text.startsWith(namePart);
|
|
135
|
+
const isNewSpeaker = looksLikeName && startsWithName && namePart !== currentOrateur;
|
|
136
|
+
if (isNewSpeaker) {
|
|
137
|
+
flush();
|
|
138
|
+
currentOrateur = namePart;
|
|
139
|
+
currentQualite = qualPart;
|
|
140
|
+
const remainingText = getRemainingTextAfterSpeakerHeader($, $p);
|
|
141
|
+
currentTexte = remainingText;
|
|
142
|
+
continue;
|
|
143
|
+
}
|
|
144
|
+
if (isPureItalic || (!joinedBold && !currentOrateur && text)) {
|
|
145
|
+
flush();
|
|
146
|
+
addPoint({
|
|
147
|
+
code_style: "Info Italiques",
|
|
148
|
+
code_grammaire: "PAROLE_GENERIQUE",
|
|
149
|
+
texte: { _: "<i>" + text + "</i>" },
|
|
150
|
+
});
|
|
151
|
+
continue;
|
|
152
|
+
}
|
|
153
|
+
// concat text because same orateur
|
|
154
|
+
if (currentOrateur) {
|
|
155
|
+
const removeOrateurFromText = getRemainingTextAfterSpeakerHeader($, $p);
|
|
156
|
+
currentTexte += (currentTexte ? "<br/><br/>" : "") + removeOrateurFromText;
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
flush();
|
|
161
|
+
return points;
|
|
162
|
+
}
|
|
163
|
+
function frDateToISO(s) {
|
|
164
|
+
if (!s)
|
|
165
|
+
return;
|
|
166
|
+
const months = {
|
|
167
|
+
janvier: 1,
|
|
168
|
+
fevrier: 2,
|
|
169
|
+
février: 2,
|
|
170
|
+
mars: 3,
|
|
171
|
+
avril: 4,
|
|
172
|
+
mai: 5,
|
|
173
|
+
juin: 6,
|
|
174
|
+
juillet: 7,
|
|
175
|
+
aout: 8,
|
|
176
|
+
août: 8,
|
|
177
|
+
septembre: 9,
|
|
178
|
+
octobre: 10,
|
|
179
|
+
novembre: 11,
|
|
180
|
+
decembre: 12,
|
|
181
|
+
décembre: 12,
|
|
182
|
+
};
|
|
183
|
+
const m = norm(s).match(/^(\d{1,2})\s+([A-Za-zéûôîà]+)\s+(\d{4})$/i);
|
|
184
|
+
if (!m)
|
|
185
|
+
return;
|
|
186
|
+
const d = Number(m[1]);
|
|
187
|
+
const mon = months[m[2].toLowerCase()];
|
|
188
|
+
const y = Number(m[3]);
|
|
189
|
+
if (!mon)
|
|
190
|
+
return;
|
|
191
|
+
return `${y}-${String(mon).padStart(2, "0")}-${String(d).padStart(2, "0")}`;
|
|
192
|
+
}
|
|
193
|
+
export function parseCommissionCRFromFile(htmlFilePath, best) {
|
|
194
|
+
try {
|
|
195
|
+
const raw = fs.readFileSync(htmlFilePath, "utf8");
|
|
196
|
+
const $ = cheerio.load(raw, { xmlMode: false });
|
|
197
|
+
const dateISO = best.date;
|
|
198
|
+
const dateSeance = toCRDate(dateISO, best.startTime);
|
|
199
|
+
const $dayRoot = findDayRoot($, dateISO);
|
|
200
|
+
if ($dayRoot.length === 0) {
|
|
201
|
+
console.warn(`[COM-CR][parse] day root not found for ${dateISO} in ${path.basename(htmlFilePath)}`);
|
|
202
|
+
return null;
|
|
203
|
+
}
|
|
204
|
+
let points = [];
|
|
205
|
+
// Take all paragraphs/h3 until next h2
|
|
206
|
+
const dayParas = [];
|
|
207
|
+
let $cursor = $dayRoot.next();
|
|
208
|
+
while ($cursor.length && !$cursor.is("h2")) {
|
|
209
|
+
if ($cursor.is("h3")) {
|
|
210
|
+
dayParas.push($cursor);
|
|
211
|
+
}
|
|
212
|
+
if ($cursor.is(PARA_h3_SEL)) {
|
|
213
|
+
dayParas.push($cursor);
|
|
214
|
+
}
|
|
215
|
+
else {
|
|
216
|
+
const $ps = $cursor.find(PARA_h3_SEL);
|
|
217
|
+
if ($ps.length) {
|
|
218
|
+
$ps.each((_, p) => {
|
|
219
|
+
dayParas.push($(p));
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
$cursor = $cursor.next();
|
|
224
|
+
}
|
|
225
|
+
const allDayPoints = buildPointsFromParagraphs($, dayParas);
|
|
226
|
+
if (allDayPoints.length > 0)
|
|
227
|
+
points = allDayPoints;
|
|
228
|
+
const session = dateISO.slice(5, 7) >= "10"
|
|
229
|
+
? `${dateISO.slice(0, 4)}-${Number(dateISO.slice(0, 4)) + 1}`
|
|
230
|
+
: `${Number(dateISO.slice(0, 4)) - 1}-${dateISO.slice(0, 4)}`;
|
|
231
|
+
const contenu = {
|
|
232
|
+
quantiemes: { journee: dateISO, session },
|
|
233
|
+
point: points,
|
|
234
|
+
};
|
|
235
|
+
const metadonnees = {
|
|
236
|
+
dateSeance: dateSeance,
|
|
237
|
+
dateSeanceJour: dateISO,
|
|
238
|
+
numSeanceJour: "",
|
|
239
|
+
numSeance: "",
|
|
240
|
+
typeAssemblee: "SN",
|
|
241
|
+
legislature: "",
|
|
242
|
+
session,
|
|
243
|
+
nomFichierJo: path.basename(htmlFilePath),
|
|
244
|
+
validite: "non-certifie",
|
|
245
|
+
etat: "definitif",
|
|
246
|
+
diffusion: "publique",
|
|
247
|
+
version: "1",
|
|
248
|
+
environnement: "prod",
|
|
249
|
+
heureGeneration: new Date(),
|
|
250
|
+
};
|
|
251
|
+
return {
|
|
252
|
+
uid: best.uid.replace(/^RUSN/, "CRC"),
|
|
253
|
+
seanceRef: best.uid,
|
|
254
|
+
sessionRef: session,
|
|
255
|
+
metadonnees,
|
|
256
|
+
contenu,
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
catch (e) {
|
|
260
|
+
console.error(`[COM-CR][parse] error file=${path.basename(htmlFilePath)}:`, e);
|
|
261
|
+
return null;
|
|
262
|
+
}
|
|
263
|
+
}
|
package/lib/model/debats.d.ts
CHANGED
|
@@ -1,81 +1,43 @@
|
|
|
1
1
|
import { InferResult } from "kysely";
|
|
2
2
|
export type DebatResult = InferResult<typeof findAllQuery>[0];
|
|
3
|
-
declare const findAllQuery: import("kysely").SelectQueryBuilder<
|
|
3
|
+
declare const findAllQuery: import("kysely").SelectQueryBuilder<any, "debats", {
|
|
4
|
+
[x: string]: any;
|
|
4
5
|
id: string;
|
|
5
6
|
date_seance: string;
|
|
6
|
-
numero: number | null;
|
|
7
|
-
url: string | null;
|
|
8
|
-
etat_synchronisation: string | null;
|
|
9
7
|
sections: {
|
|
10
|
-
|
|
11
|
-
numero: string | null;
|
|
12
|
-
objet: string | null;
|
|
13
|
-
url: string | null;
|
|
14
|
-
type: string | null;
|
|
15
|
-
categorie: string | null;
|
|
8
|
+
[x: string]: any;
|
|
16
9
|
interventions: {
|
|
17
|
-
|
|
18
|
-
auteur_code: string;
|
|
19
|
-
fonction_intervenant: string | null;
|
|
20
|
-
url: string | null;
|
|
21
|
-
analyse: string | null;
|
|
10
|
+
[x: string]: any;
|
|
22
11
|
}[];
|
|
23
|
-
lecture_id: string;
|
|
24
12
|
}[];
|
|
25
13
|
sections_divers: {
|
|
26
|
-
|
|
27
|
-
categorie: string | null;
|
|
28
|
-
libelle: string | null;
|
|
29
|
-
objet: string | null;
|
|
14
|
+
[x: string]: any;
|
|
30
15
|
interventions: {
|
|
31
|
-
|
|
32
|
-
auteur_code: string;
|
|
33
|
-
fonction_intervenant: string | null;
|
|
34
|
-
url: string | null;
|
|
35
|
-
analyse: string | null;
|
|
16
|
+
[x: string]: any;
|
|
36
17
|
}[];
|
|
37
18
|
}[];
|
|
38
19
|
lectures: {
|
|
39
|
-
id:
|
|
20
|
+
id: any;
|
|
40
21
|
}[];
|
|
41
22
|
}>;
|
|
42
23
|
export declare function findAll(): AsyncIterableIterator<{
|
|
24
|
+
[x: string]: any;
|
|
43
25
|
id: string;
|
|
44
26
|
date_seance: string;
|
|
45
|
-
numero: number | null;
|
|
46
|
-
url: string | null;
|
|
47
|
-
etat_synchronisation: string | null;
|
|
48
27
|
sections: {
|
|
49
|
-
|
|
50
|
-
numero: string | null;
|
|
51
|
-
objet: string | null;
|
|
52
|
-
url: string | null;
|
|
53
|
-
type: string | null;
|
|
54
|
-
categorie: string | null;
|
|
28
|
+
[x: string]: any;
|
|
55
29
|
interventions: {
|
|
56
|
-
|
|
57
|
-
auteur_code: string;
|
|
58
|
-
fonction_intervenant: string | null;
|
|
59
|
-
url: string | null;
|
|
60
|
-
analyse: string | null;
|
|
30
|
+
[x: string]: any;
|
|
61
31
|
}[];
|
|
62
|
-
lecture_id: string;
|
|
63
32
|
}[];
|
|
64
33
|
sections_divers: {
|
|
65
|
-
|
|
66
|
-
categorie: string | null;
|
|
67
|
-
libelle: string | null;
|
|
68
|
-
objet: string | null;
|
|
34
|
+
[x: string]: any;
|
|
69
35
|
interventions: {
|
|
70
|
-
|
|
71
|
-
auteur_code: string;
|
|
72
|
-
fonction_intervenant: string | null;
|
|
73
|
-
url: string | null;
|
|
74
|
-
analyse: string | null;
|
|
36
|
+
[x: string]: any;
|
|
75
37
|
}[];
|
|
76
38
|
}[];
|
|
77
39
|
lectures: {
|
|
78
|
-
id:
|
|
40
|
+
id: any;
|
|
79
41
|
}[];
|
|
80
42
|
}>;
|
|
81
43
|
export {};
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { dbSenat } from "../databases";
|
|
2
|
+
import { rtrim } from "./util";
|
|
3
|
+
export function findSenatTexteUrls(sessions = []) {
|
|
4
|
+
return dbSenat
|
|
5
|
+
.withSchema("dosleg")
|
|
6
|
+
.selectFrom("texte")
|
|
7
|
+
.where("texurl", "is not", null)
|
|
8
|
+
.where("typurl", "=", "I")
|
|
9
|
+
.$if(sessions.length > 0, (qb) => qb.where("sesann", "in", sessions))
|
|
10
|
+
.select(({ eb, ref }) => [
|
|
11
|
+
"sesann as session",
|
|
12
|
+
rtrim(ref("texurl")).as("url"),
|
|
13
|
+
eb
|
|
14
|
+
.case()
|
|
15
|
+
.when("oritxtcod", "=", "1")
|
|
16
|
+
.then(true)
|
|
17
|
+
.else(false)
|
|
18
|
+
.end()
|
|
19
|
+
.as("hasExposeDesMotifs"),
|
|
20
|
+
])
|
|
21
|
+
.$narrowType()
|
|
22
|
+
.stream();
|
|
23
|
+
}
|
|
24
|
+
export function findSenatRapportUrls(sessions = []) {
|
|
25
|
+
return dbSenat
|
|
26
|
+
.withSchema("dosleg")
|
|
27
|
+
.selectFrom("rap")
|
|
28
|
+
.where("rapurl", "is not", null)
|
|
29
|
+
.where("typurl", "=", "I")
|
|
30
|
+
.$if(sessions.length > 0, (qb) => qb.where("sesann", "in", sessions))
|
|
31
|
+
.select(({ ref }) => [
|
|
32
|
+
"sesann as session",
|
|
33
|
+
rtrim(ref("rapurl")).as("url"),
|
|
34
|
+
])
|
|
35
|
+
.$narrowType()
|
|
36
|
+
.stream();
|
|
37
|
+
}
|