@tricoteuses/senat 2.22.0 → 2.22.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/package.json +2 -2
  2. package/lib/config.d.ts +0 -3
  3. package/lib/config.js +0 -16
  4. package/lib/databases.d.ts +0 -2
  5. package/lib/databases.js +0 -26
  6. package/lib/datasets.d.ts +0 -34
  7. package/lib/datasets.js +0 -233
  8. package/lib/git.d.ts +0 -26
  9. package/lib/git.js +0 -167
  10. package/lib/index.d.ts +0 -13
  11. package/lib/index.js +0 -1
  12. package/lib/loaders.d.ts +0 -58
  13. package/lib/loaders.js +0 -286
  14. package/lib/model/agenda.d.ts +0 -6
  15. package/lib/model/agenda.js +0 -148
  16. package/lib/model/ameli.d.ts +0 -51
  17. package/lib/model/ameli.js +0 -149
  18. package/lib/model/commission.d.ts +0 -18
  19. package/lib/model/commission.js +0 -269
  20. package/lib/model/debats.d.ts +0 -67
  21. package/lib/model/debats.js +0 -95
  22. package/lib/model/documents.d.ts +0 -12
  23. package/lib/model/documents.js +0 -151
  24. package/lib/model/dosleg.d.ts +0 -7
  25. package/lib/model/dosleg.js +0 -326
  26. package/lib/model/index.d.ts +0 -7
  27. package/lib/model/index.js +0 -7
  28. package/lib/model/questions.d.ts +0 -45
  29. package/lib/model/questions.js +0 -89
  30. package/lib/model/scrutins.d.ts +0 -13
  31. package/lib/model/scrutins.js +0 -114
  32. package/lib/model/seance.d.ts +0 -3
  33. package/lib/model/seance.js +0 -267
  34. package/lib/model/sens.d.ts +0 -146
  35. package/lib/model/sens.js +0 -454
  36. package/lib/model/texte.d.ts +0 -7
  37. package/lib/model/texte.js +0 -228
  38. package/lib/model/util.d.ts +0 -9
  39. package/lib/model/util.js +0 -38
  40. package/lib/parsers/texte.d.ts +0 -7
  41. package/lib/parsers/texte.js +0 -228
  42. package/lib/raw_types/ameli.d.ts +0 -914
  43. package/lib/raw_types/ameli.js +0 -5
  44. package/lib/raw_types/debats.d.ts +0 -207
  45. package/lib/raw_types/debats.js +0 -5
  46. package/lib/raw_types/dosleg.d.ts +0 -1619
  47. package/lib/raw_types/dosleg.js +0 -5
  48. package/lib/raw_types/questions.d.ts +0 -419
  49. package/lib/raw_types/questions.js +0 -5
  50. package/lib/raw_types/senat.d.ts +0 -11368
  51. package/lib/raw_types/senat.js +0 -5
  52. package/lib/raw_types/sens.d.ts +0 -8248
  53. package/lib/raw_types/sens.js +0 -5
  54. package/lib/raw_types_schemats/ameli.d.ts +0 -539
  55. package/lib/raw_types_schemats/ameli.js +0 -2
  56. package/lib/raw_types_schemats/debats.d.ts +0 -127
  57. package/lib/raw_types_schemats/debats.js +0 -2
  58. package/lib/raw_types_schemats/dosleg.d.ts +0 -977
  59. package/lib/raw_types_schemats/dosleg.js +0 -2
  60. package/lib/raw_types_schemats/questions.d.ts +0 -235
  61. package/lib/raw_types_schemats/questions.js +0 -2
  62. package/lib/raw_types_schemats/sens.d.ts +0 -6915
  63. package/lib/raw_types_schemats/sens.js +0 -2
  64. package/lib/scripts/convert_data.d.ts +0 -1
  65. package/lib/scripts/convert_data.js +0 -354
  66. package/lib/scripts/data-download.d.ts +0 -1
  67. package/lib/scripts/data-download.js +0 -12
  68. package/lib/scripts/datautil.d.ts +0 -8
  69. package/lib/scripts/datautil.js +0 -34
  70. package/lib/scripts/parse_textes.d.ts +0 -1
  71. package/lib/scripts/parse_textes.js +0 -44
  72. package/lib/scripts/retrieve_agenda.d.ts +0 -1
  73. package/lib/scripts/retrieve_agenda.js +0 -132
  74. package/lib/scripts/retrieve_cr_commission.d.ts +0 -1
  75. package/lib/scripts/retrieve_cr_commission.js +0 -364
  76. package/lib/scripts/retrieve_cr_seance.d.ts +0 -6
  77. package/lib/scripts/retrieve_cr_seance.js +0 -347
  78. package/lib/scripts/retrieve_documents.d.ts +0 -3
  79. package/lib/scripts/retrieve_documents.js +0 -219
  80. package/lib/scripts/retrieve_open_data.d.ts +0 -1
  81. package/lib/scripts/retrieve_open_data.js +0 -315
  82. package/lib/scripts/retrieve_senateurs_photos.d.ts +0 -1
  83. package/lib/scripts/retrieve_senateurs_photos.js +0 -147
  84. package/lib/scripts/retrieve_videos.d.ts +0 -1
  85. package/lib/scripts/retrieve_videos.js +0 -461
  86. package/lib/scripts/shared/cli_helpers.d.ts +0 -95
  87. package/lib/scripts/shared/cli_helpers.js +0 -91
  88. package/lib/scripts/shared/util.d.ts +0 -4
  89. package/lib/scripts/shared/util.js +0 -35
  90. package/lib/scripts/test_iter_load.d.ts +0 -1
  91. package/lib/scripts/test_iter_load.js +0 -12
  92. package/lib/strings.d.ts +0 -1
  93. package/lib/strings.js +0 -18
  94. package/lib/types/agenda.d.ts +0 -44
  95. package/lib/types/agenda.js +0 -1
  96. package/lib/types/ameli.d.ts +0 -5
  97. package/lib/types/ameli.js +0 -1
  98. package/lib/types/compte_rendu.d.ts +0 -83
  99. package/lib/types/compte_rendu.js +0 -1
  100. package/lib/types/debats.d.ts +0 -2
  101. package/lib/types/debats.js +0 -1
  102. package/lib/types/dosleg.d.ts +0 -70
  103. package/lib/types/dosleg.js +0 -1
  104. package/lib/types/questions.d.ts +0 -2
  105. package/lib/types/questions.js +0 -1
  106. package/lib/types/sens.d.ts +0 -10
  107. package/lib/types/sens.js +0 -1
  108. package/lib/types/sessions.d.ts +0 -5
  109. package/lib/types/sessions.js +0 -84
  110. package/lib/types/texte.d.ts +0 -74
  111. package/lib/types/texte.js +0 -16
  112. package/lib/utils/cr_spliting.d.ts +0 -28
  113. package/lib/utils/cr_spliting.js +0 -265
  114. package/lib/utils/date.d.ts +0 -10
  115. package/lib/utils/date.js +0 -100
  116. package/lib/utils/nvs-timecode.d.ts +0 -7
  117. package/lib/utils/nvs-timecode.js +0 -79
  118. package/lib/utils/reunion_grouping.d.ts +0 -11
  119. package/lib/utils/reunion_grouping.js +0 -337
  120. package/lib/utils/reunion_odj_building.d.ts +0 -5
  121. package/lib/utils/reunion_odj_building.js +0 -154
  122. package/lib/utils/reunion_parsing.d.ts +0 -23
  123. package/lib/utils/reunion_parsing.js +0 -209
  124. package/lib/utils/scoring.d.ts +0 -14
  125. package/lib/utils/scoring.js +0 -147
  126. package/lib/utils/string_cleaning.d.ts +0 -7
  127. package/lib/utils/string_cleaning.js +0 -57
  128. package/lib/validators/config.d.ts +0 -1
  129. package/lib/validators/config.js +0 -54
@@ -1,267 +0,0 @@
1
- import fs from "fs";
2
- import * as cheerio from "cheerio";
3
- import { toCRDate } from "./util";
4
- import { makeReunionUid } from "../utils/reunion_parsing";
5
- import { yyyymmddFromPath } from "../utils/date";
6
- import { decodeHtmlEntities, dedupeSpeaker, fixApostrophes, norm } from "../utils/string_cleaning";
7
- export async function parseCompteRenduIntervalFromFile(xmlFilePath, startIndex, endIndex, agendaEventId) {
8
- try {
9
- const raw = fs.readFileSync(xmlFilePath, "utf8");
10
- const $ = cheerio.load(raw, { xml: false });
11
- const metadonnees = extractMetadonnees($, xmlFilePath);
12
- const order = $("body *").toArray();
13
- const idx = new Map(order.map((el, i) => [el, i]));
14
- const totalNodes = order.length;
15
- const clampedStart = Math.max(0, Math.min(startIndex, totalNodes - 1));
16
- const clampedEnd = Math.max(0, Math.min(endIndex, totalNodes - 1));
17
- const intervals = [
18
- {
19
- start: clampedStart,
20
- end: clampedEnd,
21
- },
22
- ];
23
- metadonnees.sommaire = extractSommaireForIntervals($, idx, intervals);
24
- const points = [];
25
- let ordre = 0;
26
- const addPoint = (p) => points.push({ ...p, ordre_absolu_seance: String(++ordre) });
27
- // Interventions
28
- $("div.intervenant").each((_, block) => {
29
- if (!elementInAnyInterval(block, idx, intervals))
30
- return;
31
- const $block = $(block);
32
- $block
33
- .find([
34
- "p[class^='titre_S']",
35
- "p.mention_titre",
36
- "p.intitule_titre",
37
- "p.mention_chapitre",
38
- "p.intitule_chapitre",
39
- "p.mention_article",
40
- "p.intitule_article",
41
- "p.mention_section",
42
- "p.intitule_section",
43
- ].join(","))
44
- .remove();
45
- const firstP = $block.find("p").first();
46
- if (!firstP || firstP.length === 0)
47
- return;
48
- const speakerLabelRaw = firstP.find(".orateur_nom").text() || firstP.find("a.lien_senfic").text() || "";
49
- const speakerLabel = dedupeSpeaker(speakerLabelRaw);
50
- const { mat, nom: nomCRI, qua: quaCRI } = readIntervenantMeta($block);
51
- const qualFromSpans = extractAndRemoveLeadingQualite($, $block);
52
- const qualite = norm(decodeHtmlEntities(quaCRI || "")) || qualFromSpans;
53
- const canonicalName = dedupeSpeaker(nomCRI || speakerLabel);
54
- const role = roleForSpeaker(speakerLabel) || roleForSpeaker(qualite) || roleForSpeaker(quaCRI || "");
55
- const speechHtml = sanitizeInterventionHtml($, $block);
56
- const speechText = norm(cheerio.load(speechHtml).text() || "");
57
- if (!speechText)
58
- return;
59
- addPoint({
60
- code_grammaire: "PAROLE_GENERIQUE",
61
- roledebat: role,
62
- orateurs: { orateur: { nom: canonicalName, id: mat || "", qualite } },
63
- texte: { _: speechHtml },
64
- });
65
- });
66
- const contenu = {
67
- quantiemes: {
68
- journee: metadonnees.dateSeance,
69
- session: metadonnees.session,
70
- },
71
- point: points,
72
- };
73
- const yyyymmdd = yyyymmddFromPath(xmlFilePath);
74
- const dateISO = `${yyyymmdd.slice(0, 4)}-${yyyymmdd.slice(4, 6)}-${yyyymmdd.slice(6, 8)}`;
75
- const seanceRef = makeReunionUid(dateISO, "SP", agendaEventId, null);
76
- return {
77
- uid: `CRSSN${yyyymmdd}E${agendaEventId}`,
78
- seanceRef,
79
- sessionRef: metadonnees.session,
80
- metadonnees,
81
- contenu,
82
- };
83
- }
84
- catch (e) {
85
- console.error(`[CRI] parseInterval error file=${xmlFilePath} interval=[${startIndex}..${endIndex}] event=${agendaEventId}:`, e);
86
- return null;
87
- }
88
- }
89
- export function sessionStartYearFromDate(d) {
90
- // Session (1th oct N → 30 sept N+1)
91
- const m = d.getMonth();
92
- const y = d.getFullYear();
93
- return m >= 9 ? y : y - 1;
94
- }
95
- function roleForSpeaker(labelOrQualite) {
96
- const s = (labelOrQualite || "").toLowerCase();
97
- if (/^(m\.|mme)?\s*(le|la)\s+pr[ée]sident(e)?\b/.test(s) || /\bpr[ée]sident[e]?\s+de\s+séance\b/.test(s))
98
- return "président";
99
- return "";
100
- }
101
- function readIntervenantMeta($block) {
102
- const int = $block.find("cri\\:intervenant").first();
103
- if (int.length)
104
- return { mat: int.attr("mat") || undefined, nom: int.attr("nom") || undefined, qua: int.attr("qua") || undefined };
105
- const html = $block.html() || "";
106
- const m = html.match(/<!--\s*cri:intervenant\b([^>]+)-->/i);
107
- if (!m)
108
- return {};
109
- const out = {};
110
- const re = /(\w+)="([^"]*)"/g;
111
- let a;
112
- while ((a = re.exec(m[1])))
113
- out[a[1]] = decodeHtmlEntities(a[2]);
114
- return { mat: out["mat"], nom: out["nom"], qua: out["qua"] };
115
- }
116
- function extractAndRemoveLeadingQualite($, $block) {
117
- const firstP = $block.find("p").first();
118
- if (firstP.length === 0)
119
- return "";
120
- const parts = [];
121
- let stop = false;
122
- firstP.contents().each((_, node) => {
123
- if (stop)
124
- return;
125
- if (node.type === "tag") {
126
- const $node = $(node);
127
- if ($node.hasClass("orateur_nom")) {
128
- $node.remove();
129
- return;
130
- }
131
- if ($node.hasClass("orateur_qualite")) {
132
- parts.push($node.text() || "");
133
- $node.remove();
134
- return;
135
- }
136
- const t = norm($node.text() || "");
137
- if (t)
138
- stop = true;
139
- else
140
- $node.remove();
141
- }
142
- else if (node.type === "text") {
143
- const t = norm(node.data || "");
144
- if (!t || /^[:.,;–—-]+$/.test(t)) {
145
- ;
146
- node.data = "";
147
- return;
148
- }
149
- stop = true;
150
- }
151
- });
152
- return fixApostrophes(norm(parts.join(" ")));
153
- }
154
- function sanitizeInterventionHtml($, $block) {
155
- const ps = $block.find("p").toArray();
156
- const cleaned = ps
157
- .map((p) => {
158
- const $p = $(p).clone();
159
- $p.find(".orateur_nom, .orateur_qualite").remove();
160
- $p.find("a").each((_, a) => {
161
- const $a = $(a);
162
- $a.replaceWith($a.text());
163
- });
164
- $p.find(".info_entre_parentheses").each((_, el) => {
165
- const txt = $(el).text();
166
- $(el).replaceWith($("<em/>").text(txt));
167
- });
168
- $p.find("span").each((_, span) => {
169
- const $s = $(span);
170
- if (!$s.text().trim())
171
- $s.remove();
172
- });
173
- const inner = ($p.html() || "").trim();
174
- if (!inner)
175
- return null;
176
- return `<p>${inner}</p>`;
177
- })
178
- .filter(Boolean);
179
- return cleaned.join("<br/>");
180
- }
181
- function extractSommaireForIntervals($, idx, intervals) {
182
- const inIv = (el) => elementInAnyInterval(el, idx, intervals);
183
- const root = $("body");
184
- const sommaire = { presidentSeance: { _: "" }, sommaire1: [] };
185
- // (1) Présidence (tm2) — première ligne dans l’intervalle
186
- const pres = root
187
- .find("p.tm2")
188
- .filter((_, el) => inIv(el))
189
- .first();
190
- if (pres.length)
191
- sommaire.presidentSeance = { _: norm(pres.text()) };
192
- // (2) Paras tm5 présents dans l’intervalle
193
- const paras = [];
194
- root.find("p.tm5").each((_, el) => {
195
- if (!inIv(el))
196
- return;
197
- const t = norm($(el).text());
198
- if (t)
199
- paras.push({ _: t });
200
- });
201
- if (paras.length)
202
- sommaire.para = paras.length === 1 ? paras[0] : paras;
203
- // (3) Items de 1er niveau (tm3) présents dans l’intervalle
204
- const items = [];
205
- root.find("p.tm3").each((_, el) => {
206
- if (!inIv(el))
207
- return;
208
- const $p = $(el);
209
- const full = norm($p.text() || "");
210
- if (!full)
211
- return;
212
- const numMatch = full.match(/^(\d+)\s*[.\-–—]\s*/);
213
- const valeur = numMatch ? numMatch[1] : undefined;
214
- // prefere intitule in ancre <a> if present
215
- const a = $p.find("a").first();
216
- const intituleRaw = a.length ? a.text() : full.replace(/^(\d+)\s*[.\-–—]\s*/, "");
217
- const intitule = norm(intituleRaw);
218
- // id_syceron from href="#Niv1_SOMx"
219
- const href = (a.attr("href") || "").trim();
220
- const idSyceron = href.startsWith("#") ? href.slice(1) : href;
221
- const titreStruct = { id_syceron: idSyceron || "", intitule };
222
- items.push({ valeur_pts_odj: valeur, titreStruct });
223
- });
224
- if (items.length)
225
- sommaire.sommaire1 = items;
226
- return sommaire;
227
- }
228
- function extractMetadonnees($, filePath) {
229
- let dateText = norm($("h1, h2, .page-title").first().text() || "");
230
- if (!dateText)
231
- dateText = norm($("p").first().text() || "");
232
- const dateMatch = dateText.match(/\b(\d{1,2}\s+\w+\s+\d{4})\b/i);
233
- const allText = norm($("body").text() || "");
234
- const sessionMatch = allText.match(/\bsession\s+(\d{4}-\d{4})\b/i);
235
- let dateSeance = dateMatch?.[1] || "";
236
- if (!dateSeance) {
237
- const m = filePath.match(/d(\d{4})(\d{2})(\d{2})\.xml$/i);
238
- if (m)
239
- dateSeance = `${m[1]}-${m[2]}-${m[3]}`;
240
- }
241
- dateSeance = toCRDate(dateSeance, null);
242
- return {
243
- dateSeance,
244
- dateSeanceJour: dateSeance,
245
- numSeanceJour: "",
246
- numSeance: "",
247
- typeAssemblee: "SN",
248
- legislature: "",
249
- session: sessionMatch?.[1] || "",
250
- nomFichierJo: "",
251
- validite: "",
252
- etat: "",
253
- diffusion: "",
254
- version: "1.0",
255
- environnement: "",
256
- heureGeneration: new Date(),
257
- };
258
- }
259
- function elementInAnyInterval(el, idx, intervals) {
260
- const p = idx.get(el);
261
- if (p == null)
262
- return false;
263
- for (const iv of intervals)
264
- if (p >= iv.start && p < iv.end)
265
- return true;
266
- return false;
267
- }
@@ -1,146 +0,0 @@
1
- import { InferResult } from "kysely";
2
- export type SenateurResult = InferResult<typeof findAllQuery>[0];
3
- export type CirconscriptionResult = InferResult<typeof findAllCirconscriptionsQuery>[0];
4
- export type OrganismeResult = InferResult<typeof findAllOrganismesQuery>[0];
5
- declare const findAllQuery: import("kysely").SelectQueryBuilder<{
6
- [x: string]: any;
7
- [x: number]: any;
8
- [x: symbol]: any;
9
- }, "sen" | "etasen" | "pcs" | "pcs42" | "pcs24" | "pcs8" | "grppol" | "com", {
10
- [x: string]: any;
11
- date_naissance: string;
12
- date_deces: string;
13
- urls: {
14
- code_url: any;
15
- url: any;
16
- }[];
17
- mandats_senateur: {
18
- [x: string]: any;
19
- date_debut: string;
20
- date_fin: string;
21
- }[];
22
- commissions: {
23
- [x: string]: any;
24
- date_debut: string;
25
- date_fin: string;
26
- fonctions: {
27
- date_debut: string;
28
- date_fin: string;
29
- libelle: unknown;
30
- }[];
31
- }[];
32
- delegations: {
33
- [x: string]: any;
34
- date_debut: string;
35
- date_fin: string;
36
- fonctions: {
37
- date_debut: string;
38
- date_fin: string;
39
- libelle: unknown;
40
- }[];
41
- }[];
42
- groupes: {
43
- [x: string]: any;
44
- date_debut: string;
45
- date_fin: string;
46
- fonctions: {
47
- date_debut: string;
48
- date_fin: string;
49
- libelle: unknown;
50
- }[];
51
- }[];
52
- fonctions_bureau: {
53
- date_debut: string;
54
- date_fin: string;
55
- libelle: unknown;
56
- }[];
57
- }>;
58
- declare const findAllCirconscriptionsQuery: import("kysely").SelectQueryBuilder<{
59
- [x: string]: any;
60
- [x: number]: any;
61
- [x: symbol]: any;
62
- }, "dpt" | "reg", {
63
- [x: string]: any;
64
- date_debut: string;
65
- date_fin: string;
66
- }>;
67
- declare const findAllOrganismesQuery: import("kysely").SelectQueryBuilder<{
68
- [x: string]: any;
69
- [x: number]: any;
70
- [x: symbol]: any;
71
- }, "typorg" | "all_organismes", {
72
- [x: string]: any;
73
- url: string;
74
- date_debut: string;
75
- date_fin: string;
76
- libelle_long: string;
77
- type_libelle: string;
78
- }>;
79
- export declare function findAll(): AsyncIterableIterator<{
80
- [x: string]: any;
81
- date_naissance: string;
82
- date_deces: string;
83
- urls: {
84
- code_url: any;
85
- url: any;
86
- }[];
87
- mandats_senateur: {
88
- [x: string]: any;
89
- date_debut: string;
90
- date_fin: string;
91
- }[];
92
- commissions: {
93
- [x: string]: any;
94
- date_debut: string;
95
- date_fin: string;
96
- fonctions: {
97
- date_debut: string;
98
- date_fin: string;
99
- libelle: unknown;
100
- }[];
101
- }[];
102
- delegations: {
103
- [x: string]: any;
104
- date_debut: string;
105
- date_fin: string;
106
- fonctions: {
107
- date_debut: string;
108
- date_fin: string;
109
- libelle: unknown;
110
- }[];
111
- }[];
112
- groupes: {
113
- [x: string]: any;
114
- date_debut: string;
115
- date_fin: string;
116
- fonctions: {
117
- date_debut: string;
118
- date_fin: string;
119
- libelle: unknown;
120
- }[];
121
- }[];
122
- fonctions_bureau: {
123
- date_debut: string;
124
- date_fin: string;
125
- libelle: unknown;
126
- }[];
127
- }>;
128
- export declare function findAllCirconscriptions(): AsyncIterableIterator<{
129
- [x: string]: any;
130
- date_debut: string;
131
- date_fin: string;
132
- }>;
133
- export declare function findAllOrganismes(): AsyncIterableIterator<{
134
- [x: string]: any;
135
- url: string;
136
- date_debut: string;
137
- date_fin: string;
138
- libelle_long: string;
139
- type_libelle: string;
140
- }>;
141
- export declare function findActif(): AsyncIterableIterator<{
142
- senmat: string;
143
- sennomuse: string;
144
- senprenomuse: string;
145
- }>;
146
- export {};