@tricoteuses/senat 2.21.6 → 2.22.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/lib/src/types/texte.d.ts +2 -5
  2. package/lib/src/types/texte.js +0 -1
  3. package/package.json +1 -1
  4. package/lib/config.d.ts +0 -3
  5. package/lib/config.js +0 -16
  6. package/lib/databases.d.ts +0 -2
  7. package/lib/databases.js +0 -26
  8. package/lib/datasets.d.ts +0 -34
  9. package/lib/datasets.js +0 -233
  10. package/lib/git.d.ts +0 -26
  11. package/lib/git.js +0 -167
  12. package/lib/index.d.ts +0 -13
  13. package/lib/index.js +0 -1
  14. package/lib/loaders.d.ts +0 -58
  15. package/lib/loaders.js +0 -286
  16. package/lib/model/agenda.d.ts +0 -6
  17. package/lib/model/agenda.js +0 -148
  18. package/lib/model/ameli.d.ts +0 -51
  19. package/lib/model/ameli.js +0 -149
  20. package/lib/model/commission.d.ts +0 -18
  21. package/lib/model/commission.js +0 -269
  22. package/lib/model/debats.d.ts +0 -67
  23. package/lib/model/debats.js +0 -95
  24. package/lib/model/documents.d.ts +0 -12
  25. package/lib/model/documents.js +0 -151
  26. package/lib/model/dosleg.d.ts +0 -7
  27. package/lib/model/dosleg.js +0 -326
  28. package/lib/model/index.d.ts +0 -7
  29. package/lib/model/index.js +0 -7
  30. package/lib/model/questions.d.ts +0 -45
  31. package/lib/model/questions.js +0 -89
  32. package/lib/model/scrutins.d.ts +0 -13
  33. package/lib/model/scrutins.js +0 -114
  34. package/lib/model/seance.d.ts +0 -3
  35. package/lib/model/seance.js +0 -267
  36. package/lib/model/sens.d.ts +0 -146
  37. package/lib/model/sens.js +0 -454
  38. package/lib/model/texte.d.ts +0 -7
  39. package/lib/model/texte.js +0 -228
  40. package/lib/model/util.d.ts +0 -9
  41. package/lib/model/util.js +0 -38
  42. package/lib/parsers/texte.d.ts +0 -7
  43. package/lib/parsers/texte.js +0 -228
  44. package/lib/raw_types/ameli.d.ts +0 -914
  45. package/lib/raw_types/ameli.js +0 -5
  46. package/lib/raw_types/debats.d.ts +0 -207
  47. package/lib/raw_types/debats.js +0 -5
  48. package/lib/raw_types/dosleg.d.ts +0 -1619
  49. package/lib/raw_types/dosleg.js +0 -5
  50. package/lib/raw_types/questions.d.ts +0 -419
  51. package/lib/raw_types/questions.js +0 -5
  52. package/lib/raw_types/senat.d.ts +0 -11368
  53. package/lib/raw_types/senat.js +0 -5
  54. package/lib/raw_types/sens.d.ts +0 -8248
  55. package/lib/raw_types/sens.js +0 -5
  56. package/lib/raw_types_schemats/ameli.d.ts +0 -539
  57. package/lib/raw_types_schemats/ameli.js +0 -2
  58. package/lib/raw_types_schemats/debats.d.ts +0 -127
  59. package/lib/raw_types_schemats/debats.js +0 -2
  60. package/lib/raw_types_schemats/dosleg.d.ts +0 -977
  61. package/lib/raw_types_schemats/dosleg.js +0 -2
  62. package/lib/raw_types_schemats/questions.d.ts +0 -235
  63. package/lib/raw_types_schemats/questions.js +0 -2
  64. package/lib/raw_types_schemats/sens.d.ts +0 -6915
  65. package/lib/raw_types_schemats/sens.js +0 -2
  66. package/lib/scripts/convert_data.d.ts +0 -1
  67. package/lib/scripts/convert_data.js +0 -354
  68. package/lib/scripts/data-download.d.ts +0 -1
  69. package/lib/scripts/data-download.js +0 -12
  70. package/lib/scripts/datautil.d.ts +0 -8
  71. package/lib/scripts/datautil.js +0 -34
  72. package/lib/scripts/parse_textes.d.ts +0 -1
  73. package/lib/scripts/parse_textes.js +0 -44
  74. package/lib/scripts/retrieve_agenda.d.ts +0 -1
  75. package/lib/scripts/retrieve_agenda.js +0 -132
  76. package/lib/scripts/retrieve_cr_commission.d.ts +0 -1
  77. package/lib/scripts/retrieve_cr_commission.js +0 -364
  78. package/lib/scripts/retrieve_cr_seance.d.ts +0 -6
  79. package/lib/scripts/retrieve_cr_seance.js +0 -347
  80. package/lib/scripts/retrieve_documents.d.ts +0 -3
  81. package/lib/scripts/retrieve_documents.js +0 -219
  82. package/lib/scripts/retrieve_open_data.d.ts +0 -1
  83. package/lib/scripts/retrieve_open_data.js +0 -315
  84. package/lib/scripts/retrieve_senateurs_photos.d.ts +0 -1
  85. package/lib/scripts/retrieve_senateurs_photos.js +0 -147
  86. package/lib/scripts/retrieve_videos.d.ts +0 -1
  87. package/lib/scripts/retrieve_videos.js +0 -461
  88. package/lib/scripts/shared/cli_helpers.d.ts +0 -95
  89. package/lib/scripts/shared/cli_helpers.js +0 -91
  90. package/lib/scripts/shared/util.d.ts +0 -4
  91. package/lib/scripts/shared/util.js +0 -35
  92. package/lib/scripts/test_iter_load.d.ts +0 -1
  93. package/lib/scripts/test_iter_load.js +0 -12
  94. package/lib/strings.d.ts +0 -1
  95. package/lib/strings.js +0 -18
  96. package/lib/types/agenda.d.ts +0 -44
  97. package/lib/types/agenda.js +0 -1
  98. package/lib/types/ameli.d.ts +0 -5
  99. package/lib/types/ameli.js +0 -1
  100. package/lib/types/compte_rendu.d.ts +0 -83
  101. package/lib/types/compte_rendu.js +0 -1
  102. package/lib/types/debats.d.ts +0 -2
  103. package/lib/types/debats.js +0 -1
  104. package/lib/types/dosleg.d.ts +0 -70
  105. package/lib/types/dosleg.js +0 -1
  106. package/lib/types/questions.d.ts +0 -2
  107. package/lib/types/questions.js +0 -1
  108. package/lib/types/sens.d.ts +0 -10
  109. package/lib/types/sens.js +0 -1
  110. package/lib/types/sessions.d.ts +0 -5
  111. package/lib/types/sessions.js +0 -84
  112. package/lib/types/texte.d.ts +0 -74
  113. package/lib/types/texte.js +0 -16
  114. package/lib/utils/cr_spliting.d.ts +0 -28
  115. package/lib/utils/cr_spliting.js +0 -265
  116. package/lib/utils/date.d.ts +0 -10
  117. package/lib/utils/date.js +0 -100
  118. package/lib/utils/nvs-timecode.d.ts +0 -7
  119. package/lib/utils/nvs-timecode.js +0 -79
  120. package/lib/utils/reunion_grouping.d.ts +0 -11
  121. package/lib/utils/reunion_grouping.js +0 -337
  122. package/lib/utils/reunion_odj_building.d.ts +0 -5
  123. package/lib/utils/reunion_odj_building.js +0 -154
  124. package/lib/utils/reunion_parsing.d.ts +0 -23
  125. package/lib/utils/reunion_parsing.js +0 -209
  126. package/lib/utils/scoring.d.ts +0 -14
  127. package/lib/utils/scoring.js +0 -147
  128. package/lib/utils/string_cleaning.d.ts +0 -7
  129. package/lib/utils/string_cleaning.js +0 -57
  130. package/lib/validators/config.d.ts +0 -1
  131. package/lib/validators/config.js +0 -54
@@ -1,267 +0,0 @@
1
- import fs from "fs";
2
- import * as cheerio from "cheerio";
3
- import { toCRDate } from "./util";
4
- import { makeReunionUid } from "../utils/reunion_parsing";
5
- import { yyyymmddFromPath } from "../utils/date";
6
- import { decodeHtmlEntities, dedupeSpeaker, fixApostrophes, norm } from "../utils/string_cleaning";
7
- export async function parseCompteRenduIntervalFromFile(xmlFilePath, startIndex, endIndex, agendaEventId) {
8
- try {
9
- const raw = fs.readFileSync(xmlFilePath, "utf8");
10
- const $ = cheerio.load(raw, { xml: false });
11
- const metadonnees = extractMetadonnees($, xmlFilePath);
12
- const order = $("body *").toArray();
13
- const idx = new Map(order.map((el, i) => [el, i]));
14
- const totalNodes = order.length;
15
- const clampedStart = Math.max(0, Math.min(startIndex, totalNodes - 1));
16
- const clampedEnd = Math.max(0, Math.min(endIndex, totalNodes - 1));
17
- const intervals = [
18
- {
19
- start: clampedStart,
20
- end: clampedEnd,
21
- },
22
- ];
23
- metadonnees.sommaire = extractSommaireForIntervals($, idx, intervals);
24
- const points = [];
25
- let ordre = 0;
26
- const addPoint = (p) => points.push({ ...p, ordre_absolu_seance: String(++ordre) });
27
- // Interventions
28
- $("div.intervenant").each((_, block) => {
29
- if (!elementInAnyInterval(block, idx, intervals))
30
- return;
31
- const $block = $(block);
32
- $block
33
- .find([
34
- "p[class^='titre_S']",
35
- "p.mention_titre",
36
- "p.intitule_titre",
37
- "p.mention_chapitre",
38
- "p.intitule_chapitre",
39
- "p.mention_article",
40
- "p.intitule_article",
41
- "p.mention_section",
42
- "p.intitule_section",
43
- ].join(","))
44
- .remove();
45
- const firstP = $block.find("p").first();
46
- if (!firstP || firstP.length === 0)
47
- return;
48
- const speakerLabelRaw = firstP.find(".orateur_nom").text() || firstP.find("a.lien_senfic").text() || "";
49
- const speakerLabel = dedupeSpeaker(speakerLabelRaw);
50
- const { mat, nom: nomCRI, qua: quaCRI } = readIntervenantMeta($block);
51
- const qualFromSpans = extractAndRemoveLeadingQualite($, $block);
52
- const qualite = norm(decodeHtmlEntities(quaCRI || "")) || qualFromSpans;
53
- const canonicalName = dedupeSpeaker(nomCRI || speakerLabel);
54
- const role = roleForSpeaker(speakerLabel) || roleForSpeaker(qualite) || roleForSpeaker(quaCRI || "");
55
- const speechHtml = sanitizeInterventionHtml($, $block);
56
- const speechText = norm(cheerio.load(speechHtml).text() || "");
57
- if (!speechText)
58
- return;
59
- addPoint({
60
- code_grammaire: "PAROLE_GENERIQUE",
61
- roledebat: role,
62
- orateurs: { orateur: { nom: canonicalName, id: mat || "", qualite } },
63
- texte: { _: speechHtml },
64
- });
65
- });
66
- const contenu = {
67
- quantiemes: {
68
- journee: metadonnees.dateSeance,
69
- session: metadonnees.session,
70
- },
71
- point: points,
72
- };
73
- const yyyymmdd = yyyymmddFromPath(xmlFilePath);
74
- const dateISO = `${yyyymmdd.slice(0, 4)}-${yyyymmdd.slice(4, 6)}-${yyyymmdd.slice(6, 8)}`;
75
- const seanceRef = makeReunionUid(dateISO, "SP", agendaEventId, null);
76
- return {
77
- uid: `CRSSN${yyyymmdd}E${agendaEventId}`,
78
- seanceRef,
79
- sessionRef: metadonnees.session,
80
- metadonnees,
81
- contenu,
82
- };
83
- }
84
- catch (e) {
85
- console.error(`[CRI] parseInterval error file=${xmlFilePath} interval=[${startIndex}..${endIndex}] event=${agendaEventId}:`, e);
86
- return null;
87
- }
88
- }
89
- export function sessionStartYearFromDate(d) {
90
- // Session (1th oct N → 30 sept N+1)
91
- const m = d.getMonth();
92
- const y = d.getFullYear();
93
- return m >= 9 ? y : y - 1;
94
- }
95
- function roleForSpeaker(labelOrQualite) {
96
- const s = (labelOrQualite || "").toLowerCase();
97
- if (/^(m\.|mme)?\s*(le|la)\s+pr[ée]sident(e)?\b/.test(s) || /\bpr[ée]sident[e]?\s+de\s+séance\b/.test(s))
98
- return "président";
99
- return "";
100
- }
101
- function readIntervenantMeta($block) {
102
- const int = $block.find("cri\\:intervenant").first();
103
- if (int.length)
104
- return { mat: int.attr("mat") || undefined, nom: int.attr("nom") || undefined, qua: int.attr("qua") || undefined };
105
- const html = $block.html() || "";
106
- const m = html.match(/<!--\s*cri:intervenant\b([^>]+)-->/i);
107
- if (!m)
108
- return {};
109
- const out = {};
110
- const re = /(\w+)="([^"]*)"/g;
111
- let a;
112
- while ((a = re.exec(m[1])))
113
- out[a[1]] = decodeHtmlEntities(a[2]);
114
- return { mat: out["mat"], nom: out["nom"], qua: out["qua"] };
115
- }
116
- function extractAndRemoveLeadingQualite($, $block) {
117
- const firstP = $block.find("p").first();
118
- if (firstP.length === 0)
119
- return "";
120
- const parts = [];
121
- let stop = false;
122
- firstP.contents().each((_, node) => {
123
- if (stop)
124
- return;
125
- if (node.type === "tag") {
126
- const $node = $(node);
127
- if ($node.hasClass("orateur_nom")) {
128
- $node.remove();
129
- return;
130
- }
131
- if ($node.hasClass("orateur_qualite")) {
132
- parts.push($node.text() || "");
133
- $node.remove();
134
- return;
135
- }
136
- const t = norm($node.text() || "");
137
- if (t)
138
- stop = true;
139
- else
140
- $node.remove();
141
- }
142
- else if (node.type === "text") {
143
- const t = norm(node.data || "");
144
- if (!t || /^[:.,;–—-]+$/.test(t)) {
145
- ;
146
- node.data = "";
147
- return;
148
- }
149
- stop = true;
150
- }
151
- });
152
- return fixApostrophes(norm(parts.join(" ")));
153
- }
154
- function sanitizeInterventionHtml($, $block) {
155
- const ps = $block.find("p").toArray();
156
- const cleaned = ps
157
- .map((p) => {
158
- const $p = $(p).clone();
159
- $p.find(".orateur_nom, .orateur_qualite").remove();
160
- $p.find("a").each((_, a) => {
161
- const $a = $(a);
162
- $a.replaceWith($a.text());
163
- });
164
- $p.find(".info_entre_parentheses").each((_, el) => {
165
- const txt = $(el).text();
166
- $(el).replaceWith($("<em/>").text(txt));
167
- });
168
- $p.find("span").each((_, span) => {
169
- const $s = $(span);
170
- if (!$s.text().trim())
171
- $s.remove();
172
- });
173
- const inner = ($p.html() || "").trim();
174
- if (!inner)
175
- return null;
176
- return `<p>${inner}</p>`;
177
- })
178
- .filter(Boolean);
179
- return cleaned.join("<br/>");
180
- }
181
- function extractSommaireForIntervals($, idx, intervals) {
182
- const inIv = (el) => elementInAnyInterval(el, idx, intervals);
183
- const root = $("body");
184
- const sommaire = { presidentSeance: { _: "" }, sommaire1: [] };
185
- // (1) Présidence (tm2) — première ligne dans l’intervalle
186
- const pres = root
187
- .find("p.tm2")
188
- .filter((_, el) => inIv(el))
189
- .first();
190
- if (pres.length)
191
- sommaire.presidentSeance = { _: norm(pres.text()) };
192
- // (2) Paras tm5 présents dans l’intervalle
193
- const paras = [];
194
- root.find("p.tm5").each((_, el) => {
195
- if (!inIv(el))
196
- return;
197
- const t = norm($(el).text());
198
- if (t)
199
- paras.push({ _: t });
200
- });
201
- if (paras.length)
202
- sommaire.para = paras.length === 1 ? paras[0] : paras;
203
- // (3) Items de 1er niveau (tm3) présents dans l’intervalle
204
- const items = [];
205
- root.find("p.tm3").each((_, el) => {
206
- if (!inIv(el))
207
- return;
208
- const $p = $(el);
209
- const full = norm($p.text() || "");
210
- if (!full)
211
- return;
212
- const numMatch = full.match(/^(\d+)\s*[.\-–—]\s*/);
213
- const valeur = numMatch ? numMatch[1] : undefined;
214
- // prefere intitule in ancre <a> if present
215
- const a = $p.find("a").first();
216
- const intituleRaw = a.length ? a.text() : full.replace(/^(\d+)\s*[.\-–—]\s*/, "");
217
- const intitule = norm(intituleRaw);
218
- // id_syceron from href="#Niv1_SOMx"
219
- const href = (a.attr("href") || "").trim();
220
- const idSyceron = href.startsWith("#") ? href.slice(1) : href;
221
- const titreStruct = { id_syceron: idSyceron || "", intitule };
222
- items.push({ valeur_pts_odj: valeur, titreStruct });
223
- });
224
- if (items.length)
225
- sommaire.sommaire1 = items;
226
- return sommaire;
227
- }
228
- function extractMetadonnees($, filePath) {
229
- let dateText = norm($("h1, h2, .page-title").first().text() || "");
230
- if (!dateText)
231
- dateText = norm($("p").first().text() || "");
232
- const dateMatch = dateText.match(/\b(\d{1,2}\s+\w+\s+\d{4})\b/i);
233
- const allText = norm($("body").text() || "");
234
- const sessionMatch = allText.match(/\bsession\s+(\d{4}-\d{4})\b/i);
235
- let dateSeance = dateMatch?.[1] || "";
236
- if (!dateSeance) {
237
- const m = filePath.match(/d(\d{4})(\d{2})(\d{2})\.xml$/i);
238
- if (m)
239
- dateSeance = `${m[1]}-${m[2]}-${m[3]}`;
240
- }
241
- dateSeance = toCRDate(dateSeance, null);
242
- return {
243
- dateSeance,
244
- dateSeanceJour: dateSeance,
245
- numSeanceJour: "",
246
- numSeance: "",
247
- typeAssemblee: "SN",
248
- legislature: "",
249
- session: sessionMatch?.[1] || "",
250
- nomFichierJo: "",
251
- validite: "",
252
- etat: "",
253
- diffusion: "",
254
- version: "1.0",
255
- environnement: "",
256
- heureGeneration: new Date(),
257
- };
258
- }
259
- function elementInAnyInterval(el, idx, intervals) {
260
- const p = idx.get(el);
261
- if (p == null)
262
- return false;
263
- for (const iv of intervals)
264
- if (p >= iv.start && p < iv.end)
265
- return true;
266
- return false;
267
- }
@@ -1,146 +0,0 @@
1
- import { InferResult } from "kysely";
2
- export type SenateurResult = InferResult<typeof findAllQuery>[0];
3
- export type CirconscriptionResult = InferResult<typeof findAllCirconscriptionsQuery>[0];
4
- export type OrganismeResult = InferResult<typeof findAllOrganismesQuery>[0];
5
- declare const findAllQuery: import("kysely").SelectQueryBuilder<{
6
- [x: string]: any;
7
- [x: number]: any;
8
- [x: symbol]: any;
9
- }, "sen" | "etasen" | "pcs" | "pcs42" | "pcs24" | "pcs8" | "grppol" | "com", {
10
- [x: string]: any;
11
- date_naissance: string;
12
- date_deces: string;
13
- urls: {
14
- code_url: any;
15
- url: any;
16
- }[];
17
- mandats_senateur: {
18
- [x: string]: any;
19
- date_debut: string;
20
- date_fin: string;
21
- }[];
22
- commissions: {
23
- [x: string]: any;
24
- date_debut: string;
25
- date_fin: string;
26
- fonctions: {
27
- date_debut: string;
28
- date_fin: string;
29
- libelle: unknown;
30
- }[];
31
- }[];
32
- delegations: {
33
- [x: string]: any;
34
- date_debut: string;
35
- date_fin: string;
36
- fonctions: {
37
- date_debut: string;
38
- date_fin: string;
39
- libelle: unknown;
40
- }[];
41
- }[];
42
- groupes: {
43
- [x: string]: any;
44
- date_debut: string;
45
- date_fin: string;
46
- fonctions: {
47
- date_debut: string;
48
- date_fin: string;
49
- libelle: unknown;
50
- }[];
51
- }[];
52
- fonctions_bureau: {
53
- date_debut: string;
54
- date_fin: string;
55
- libelle: unknown;
56
- }[];
57
- }>;
58
- declare const findAllCirconscriptionsQuery: import("kysely").SelectQueryBuilder<{
59
- [x: string]: any;
60
- [x: number]: any;
61
- [x: symbol]: any;
62
- }, "dpt" | "reg", {
63
- [x: string]: any;
64
- date_debut: string;
65
- date_fin: string;
66
- }>;
67
- declare const findAllOrganismesQuery: import("kysely").SelectQueryBuilder<{
68
- [x: string]: any;
69
- [x: number]: any;
70
- [x: symbol]: any;
71
- }, "typorg" | "all_organismes", {
72
- [x: string]: any;
73
- url: string;
74
- date_debut: string;
75
- date_fin: string;
76
- libelle_long: string;
77
- type_libelle: string;
78
- }>;
79
- export declare function findAll(): AsyncIterableIterator<{
80
- [x: string]: any;
81
- date_naissance: string;
82
- date_deces: string;
83
- urls: {
84
- code_url: any;
85
- url: any;
86
- }[];
87
- mandats_senateur: {
88
- [x: string]: any;
89
- date_debut: string;
90
- date_fin: string;
91
- }[];
92
- commissions: {
93
- [x: string]: any;
94
- date_debut: string;
95
- date_fin: string;
96
- fonctions: {
97
- date_debut: string;
98
- date_fin: string;
99
- libelle: unknown;
100
- }[];
101
- }[];
102
- delegations: {
103
- [x: string]: any;
104
- date_debut: string;
105
- date_fin: string;
106
- fonctions: {
107
- date_debut: string;
108
- date_fin: string;
109
- libelle: unknown;
110
- }[];
111
- }[];
112
- groupes: {
113
- [x: string]: any;
114
- date_debut: string;
115
- date_fin: string;
116
- fonctions: {
117
- date_debut: string;
118
- date_fin: string;
119
- libelle: unknown;
120
- }[];
121
- }[];
122
- fonctions_bureau: {
123
- date_debut: string;
124
- date_fin: string;
125
- libelle: unknown;
126
- }[];
127
- }>;
128
- export declare function findAllCirconscriptions(): AsyncIterableIterator<{
129
- [x: string]: any;
130
- date_debut: string;
131
- date_fin: string;
132
- }>;
133
- export declare function findAllOrganismes(): AsyncIterableIterator<{
134
- [x: string]: any;
135
- url: string;
136
- date_debut: string;
137
- date_fin: string;
138
- libelle_long: string;
139
- type_libelle: string;
140
- }>;
141
- export declare function findActif(): AsyncIterableIterator<{
142
- senmat: string;
143
- sennomuse: string;
144
- senprenomuse: string;
145
- }>;
146
- export {};