@tricoteuses/senat 2.10.5 → 2.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/lib/databases.d.ts +1 -28
  2. package/lib/databases.js +0 -6
  3. package/lib/datasets.d.ts +6 -0
  4. package/lib/datasets.js +233 -0
  5. package/lib/loaders.d.ts +5 -0
  6. package/lib/loaders.js +14 -9
  7. package/lib/model/ameli.d.ts +31 -143
  8. package/lib/model/ameli.js +102 -95
  9. package/lib/model/commission.d.ts +5 -0
  10. package/lib/model/commission.js +263 -0
  11. package/lib/model/debats.d.ts +13 -51
  12. package/lib/model/documents.d.ts +2 -0
  13. package/lib/model/documents.js +37 -0
  14. package/lib/model/dosleg.d.ts +9 -104
  15. package/lib/model/dosleg.js +76 -108
  16. package/lib/model/index.d.ts +4 -2
  17. package/lib/model/index.js +4 -2
  18. package/lib/model/questions.d.ts +10 -458
  19. package/lib/model/scrutins.d.ts +3 -0
  20. package/lib/model/scrutins.js +74 -0
  21. package/lib/model/{compte_rendu.js → seance.js} +47 -28
  22. package/lib/model/sens.d.ts +28 -1002
  23. package/lib/model/sens.js +65 -33
  24. package/lib/model/util.d.ts +1 -0
  25. package/lib/model/util.js +19 -1
  26. package/lib/raw_types/ameli.d.ts +778 -1521
  27. package/lib/raw_types/ameli.js +5 -345
  28. package/lib/raw_types/debats.d.ts +163 -306
  29. package/lib/raw_types/debats.js +5 -84
  30. package/lib/raw_types/dosleg.d.ts +1349 -2293
  31. package/lib/raw_types/dosleg.js +5 -550
  32. package/lib/raw_types/questions.d.ts +374 -519
  33. package/lib/raw_types/questions.js +5 -84
  34. package/lib/raw_types/senat.d.ts +11389 -0
  35. package/lib/raw_types/senat.js +5 -0
  36. package/lib/raw_types/sens.d.ts +6729 -12571
  37. package/lib/raw_types/sens.js +5 -2944
  38. package/lib/raw_types_schemats/ameli.d.ts +2 -2
  39. package/lib/raw_types_schemats/debats.d.ts +2 -2
  40. package/lib/raw_types_schemats/dosleg.d.ts +2 -2
  41. package/lib/raw_types_schemats/questions.d.ts +2 -2
  42. package/lib/raw_types_schemats/sens.d.ts +2 -2
  43. package/lib/scripts/convert_data.js +37 -31
  44. package/lib/scripts/retrieve_cr_commission.d.ts +1 -0
  45. package/lib/scripts/retrieve_cr_commission.js +291 -0
  46. package/lib/scripts/{retrieve_comptes_rendus.js → retrieve_cr_seance.js} +1 -1
  47. package/lib/scripts/retrieve_open_data.js +35 -1
  48. package/lib/utils/cr_spliting.d.ts +22 -1
  49. package/lib/utils/cr_spliting.js +273 -12
  50. package/lib/utils/reunion_grouping.d.ts +3 -0
  51. package/lib/utils/reunion_grouping.js +1 -1
  52. package/package.json +12 -11
  53. /package/lib/model/{compte_rendu.d.ts → seance.d.ts} +0 -0
  54. /package/lib/scripts/{retrieve_comptes_rendus.d.ts → retrieve_cr_seance.d.ts} +0 -0
@@ -2,9 +2,9 @@ import fs from "fs";
2
2
  import * as cheerio from "cheerio";
3
3
  import path from "path";
4
4
  import { computeIntervalsBySlot } from "../utils/cr_spliting";
5
- import { norm } from "./util";
6
- const asArray = (x) => x == null ? [] : Array.isArray(x) ? x : [x];
7
- const toInt = (s) => Number.isFinite(Number(s)) ? Number(s) : Number.POSITIVE_INFINITY;
5
+ import { norm, toCRDate } from "./util";
6
+ const asArray = (x) => (x == null ? [] : Array.isArray(x) ? x : [x]);
7
+ const toInt = (s) => (Number.isFinite(Number(s)) ? Number(s) : Number.POSITIVE_INFINITY);
8
8
  export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firstSlotOfDay) {
9
9
  try {
10
10
  const raw = fs.readFileSync(xmlFilePath, "utf8");
@@ -13,7 +13,7 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
13
13
  const order = $("body *").toArray();
14
14
  const idx = new Map(order.map((el, i) => [el, i]));
15
15
  const intervalsAll = computeIntervalsBySlot($, idx, firstSlotOfDay);
16
- const intervals = intervalsAll.filter(iv => iv.slot === wantedSlot);
16
+ const intervals = intervalsAll.filter((iv) => iv.slot === wantedSlot);
17
17
  if (intervals.length === 0) {
18
18
  console.warn(`[CRI] no intervals for ${path.basename(xmlFilePath)} [${wantedSlot}]`);
19
19
  return null;
@@ -35,7 +35,8 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
35
35
  if (!elementInAnyInterval(block, idx, intervals))
36
36
  return;
37
37
  const $block = $(block);
38
- $block.find([
38
+ $block
39
+ .find([
39
40
  "p[class^='titre_S']",
40
41
  "p.mention_titre",
41
42
  "p.intitule_titre",
@@ -45,7 +46,8 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
45
46
  "p.intitule_article",
46
47
  "p.mention_section",
47
48
  "p.intitule_section",
48
- ].join(",")).remove();
49
+ ].join(","))
50
+ .remove();
49
51
  const firstP = $block.find("p").first();
50
52
  const speakerLabelRaw = firstP.find(".orateur_nom").text() || firstP.find("a.lien_senfic").text() || "";
51
53
  const speakerLabel = dedupeSpeaker(speakerLabelRaw);
@@ -98,7 +100,7 @@ export function parseYYYYMMDD(yyyymmdd) {
98
100
  }
99
101
  export function deriveTitreObjetFromSommaire(sommaire, slot) {
100
102
  const items = extractLevel1Items(sommaire);
101
- const meaningful = items.filter(it => !isBoilerplate(it.label));
103
+ const meaningful = items.filter((it) => !isBoilerplate(it.label));
102
104
  if (meaningful.length === 0) {
103
105
  return {
104
106
  titre: `Séance publique ${slotLabel(slot)}`,
@@ -106,15 +108,22 @@ export function deriveTitreObjetFromSommaire(sommaire, slot) {
106
108
  };
107
109
  }
108
110
  const titre = meaningful[0].label;
109
- const objet = meaningful.slice(0, 3).map(it => it.label).join(" ; ");
111
+ const objet = meaningful
112
+ .slice(0, 3)
113
+ .map((it) => it.label)
114
+ .join(" ; ");
110
115
  return { titre, objet };
111
116
  }
112
117
  function slotLabel(slot) {
113
118
  switch (slot) {
114
- case "MATIN": return "du matin";
115
- case "APRES-MIDI": return "de l’après-midi";
116
- case "SOIR": return "du soir";
117
- default: return "";
119
+ case "MATIN":
120
+ return "du matin";
121
+ case "APRES-MIDI":
122
+ return "de l’après-midi";
123
+ case "SOIR":
124
+ return "du soir";
125
+ default:
126
+ return "";
118
127
  }
119
128
  }
120
129
  const BOILERPLATE_PATTERNS = [
@@ -132,18 +141,20 @@ const BOILERPLATE_PATTERNS = [
132
141
  /ouverture de la séance/i,
133
142
  /clo(?:t|̂)ure de la séance/i,
134
143
  ];
135
- const isBoilerplate = (label) => !label?.trim() || BOILERPLATE_PATTERNS.some(rx => rx.test(label));
144
+ const isBoilerplate = (label) => !label?.trim() || BOILERPLATE_PATTERNS.some((rx) => rx.test(label));
136
145
  function extractLevel1Items(sommaire) {
137
146
  const level1 = asArray(sommaire?.sommaire1);
138
147
  return level1
139
- .map(el => ({
148
+ .map((el) => ({
140
149
  numero: toInt(el?.valeur_pts_odj),
141
150
  label: String(el?.titreStruct?.intitule ?? "").trim(),
142
151
  }))
143
- .filter(it => !!it.label)
152
+ .filter((it) => !!it.label)
144
153
  .sort((a, b) => a.numero - b.numero);
145
154
  }
146
- function stripTrailingPunct(s) { return s.replace(/\s*([:,.;])\s*$/u, "").trim(); }
155
+ function stripTrailingPunct(s) {
156
+ return s.replace(/\s*([:,.;])\s*$/u, "").trim();
157
+ }
147
158
  function dedupeSpeaker(raw) {
148
159
  let s = norm(raw);
149
160
  s = stripTrailingPunct(s);
@@ -158,7 +169,8 @@ function dedupeSpeaker(raw) {
158
169
  return s.replace(/\.\s*$/, "");
159
170
  }
160
171
  function decodeHtmlEntities(s) {
161
- return s.replace(/&#(\d+);/g, (_, d) => String.fromCharCode(parseInt(d, 10)))
172
+ return s
173
+ .replace(/&#(\d+);/g, (_, d) => String.fromCharCode(parseInt(d, 10)))
162
174
  .replace(/&#x([0-9a-fA-F]+);/g, (_, h) => String.fromCharCode(parseInt(h, 16)));
163
175
  }
164
176
  function fixApostrophes(s) {
@@ -169,7 +181,9 @@ function fixApostrophes(s) {
169
181
  out = out.replace(/\s+([,;:.!?])/g, "$1");
170
182
  return out;
171
183
  }
172
- function normalizeTitle(text) { return text.replace(/^PR[ÉE]SIDENCE DE\b/i, "Présidence de "); }
184
+ function normalizeTitle(text) {
185
+ return text.replace(/^PR[ÉE]SIDENCE DE\b/i, "Présidence de ");
186
+ }
173
187
  function roleForSpeaker(labelOrQualite) {
174
188
  const s = (labelOrQualite || "").toLowerCase();
175
189
  if (/^(m\.|mme)?\s*(le|la)\s+pr[ée]sident(e)?\b/.test(s) || /\bpr[ée]sident[e]?\s+de\s+séance\b/.test(s))
@@ -177,7 +191,7 @@ function roleForSpeaker(labelOrQualite) {
177
191
  return "";
178
192
  }
179
193
  function readIntervenantMeta($block) {
180
- const int = $block.find('cri\\:intervenant').first();
194
+ const int = $block.find("cri\\:intervenant").first();
181
195
  if (int.length)
182
196
  return { mat: int.attr("mat") || undefined, nom: int.attr("nom") || undefined, qua: int.attr("qua") || undefined };
183
197
  const html = $block.html() || "";
@@ -220,6 +234,7 @@ function extractAndRemoveLeadingQualite($, $block) {
220
234
  else if (node.type === "text") {
221
235
  const t = norm(node.data || "");
222
236
  if (!t || /^[:.,;–—-]+$/.test(t)) {
237
+ ;
223
238
  node.data = "";
224
239
  return;
225
240
  }
@@ -230,11 +245,11 @@ function extractAndRemoveLeadingQualite($, $block) {
230
245
  }
231
246
  function sanitizeInterventionHtml($, $block) {
232
247
  const $clone = $block.clone();
233
- $clone.find('a[name]').remove();
248
+ $clone.find("a[name]").remove();
234
249
  $clone.find('div[align="right"]').remove();
235
- $clone.find('a.link').remove();
236
- $clone.find('img').remove();
237
- $clone.find('a#ameli_amendement_cri_phrase, a#ameli_amendement_cra_contenu, a#ameli_amendement_cra_objet').remove();
250
+ $clone.find("a.link").remove();
251
+ $clone.find("img").remove();
252
+ $clone.find("a#ameli_amendement_cri_phrase, a#ameli_amendement_cra_contenu, a#ameli_amendement_cra_objet").remove();
238
253
  $clone.find(".orateur_nom, .orateur_qualite").remove();
239
254
  let html = $clone.html() || "";
240
255
  html = html.replace(/<!--[\s\S]*?-->/g, "");
@@ -244,11 +259,14 @@ function extractSommaireForIntervals($, idx, intervals) {
244
259
  const inIv = (el) => elementInAnyInterval(el, idx, intervals);
245
260
  const root = $("body");
246
261
  const sommaire = { presidentSeance: { _: "" }, sommaire1: [] };
247
- // (1) Présidence (tm2) — première ligne dans l’intervalle
248
- const pres = root.find("p.tm2").filter((_, el) => inIv(el)).first();
262
+ // (1) Présidence (tm2) — première ligne dans l’intervalle
263
+ const pres = root
264
+ .find("p.tm2")
265
+ .filter((_, el) => inIv(el))
266
+ .first();
249
267
  if (pres.length)
250
268
  sommaire.presidentSeance = { _: norm(pres.text()) };
251
- // (2) Paras tm5 présents dans l’intervalle
269
+ // (2) Paras tm5 présents dans l’intervalle
252
270
  const paras = [];
253
271
  root.find("p.tm5").each((_, el) => {
254
272
  if (!inIv(el))
@@ -259,7 +277,7 @@ function extractSommaireForIntervals($, idx, intervals) {
259
277
  });
260
278
  if (paras.length)
261
279
  sommaire.para = paras.length === 1 ? paras[0] : paras;
262
- // (3) Items de 1er niveau (tm3) présents dans l’intervalle
280
+ // (3) Items de 1er niveau (tm3) présents dans l’intervalle
263
281
  const items = [];
264
282
  root.find("p.tm3").each((_, el) => {
265
283
  if (!inIv(el))
@@ -297,6 +315,7 @@ function extractMetadonnees($, filePath) {
297
315
  if (m)
298
316
  dateSeance = `${m[1]}-${m[2]}-${m[3]}`;
299
317
  }
318
+ dateSeance = toCRDate(dateSeance, null);
300
319
  return {
301
320
  dateSeance,
302
321
  dateSeanceJour: dateSeance,
@@ -311,7 +330,7 @@ function extractMetadonnees($, filePath) {
311
330
  diffusion: "",
312
331
  version: "1.0",
313
332
  environnement: "",
314
- heureGeneration: new Date()
333
+ heureGeneration: new Date(),
315
334
  };
316
335
  }
317
336
  function elementInAnyInterval(el, idx, intervals) {