@tricoteuses/senat 3.1.4 → 3.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/lib/src/config.d.ts +43 -0
  2. package/lib/src/config.js +37 -0
  3. package/lib/src/conversion_textes.d.ts +11 -0
  4. package/lib/src/conversion_textes.js +320 -0
  5. package/lib/src/databases_postgres.d.ts +4 -0
  6. package/lib/src/databases_postgres.js +23 -0
  7. package/lib/src/datasets.d.ts +38 -0
  8. package/lib/src/datasets.js +247 -0
  9. package/lib/src/git.d.ts +27 -0
  10. package/lib/src/git.js +251 -0
  11. package/lib/src/loaders.d.ts +52 -0
  12. package/lib/src/loaders.js +260 -0
  13. package/lib/src/model/agenda.d.ts +6 -0
  14. package/lib/src/model/agenda.js +148 -0
  15. package/lib/src/model/ameli.d.ts +67 -0
  16. package/lib/src/model/ameli.js +150 -0
  17. package/lib/src/model/commission.d.ts +19 -0
  18. package/lib/src/model/commission.js +269 -0
  19. package/lib/src/model/debats.d.ts +39 -0
  20. package/lib/src/model/debats.js +112 -0
  21. package/lib/src/model/documents.d.ts +32 -0
  22. package/lib/src/model/documents.js +182 -0
  23. package/lib/src/model/dosleg.d.ts +144 -0
  24. package/lib/src/model/dosleg.js +468 -0
  25. package/lib/src/model/index.d.ts +7 -0
  26. package/lib/src/model/index.js +7 -0
  27. package/lib/src/model/questions.d.ts +54 -0
  28. package/lib/src/model/questions.js +91 -0
  29. package/lib/src/model/scrutins.d.ts +48 -0
  30. package/lib/src/model/scrutins.js +121 -0
  31. package/lib/src/model/seance.d.ts +3 -0
  32. package/lib/src/model/seance.js +267 -0
  33. package/lib/src/model/sens.d.ts +112 -0
  34. package/lib/src/model/sens.js +385 -0
  35. package/lib/src/model/util.d.ts +1 -0
  36. package/lib/src/model/util.js +15 -0
  37. package/lib/src/raw_types/ameli.d.ts +1762 -0
  38. package/lib/src/raw_types/ameli.js +1074 -0
  39. package/lib/src/raw_types/debats.d.ts +380 -0
  40. package/lib/src/raw_types/debats.js +266 -0
  41. package/lib/src/raw_types/dosleg.d.ts +2954 -0
  42. package/lib/src/raw_types/dosleg.js +2005 -0
  43. package/lib/src/raw_types/questions.d.ts +699 -0
  44. package/lib/src/raw_types/questions.js +493 -0
  45. package/lib/src/raw_types/sens.d.ts +7843 -0
  46. package/lib/src/raw_types/sens.js +4691 -0
  47. package/lib/src/raw_types_schemats/ameli.d.ts +541 -0
  48. package/lib/src/raw_types_schemats/ameli.js +2 -0
  49. package/lib/src/raw_types_schemats/debats.d.ts +127 -0
  50. package/lib/src/raw_types_schemats/debats.js +2 -0
  51. package/lib/src/raw_types_schemats/dosleg.d.ts +977 -0
  52. package/lib/src/raw_types_schemats/dosleg.js +2 -0
  53. package/lib/src/raw_types_schemats/questions.d.ts +237 -0
  54. package/lib/src/raw_types_schemats/questions.js +2 -0
  55. package/lib/src/raw_types_schemats/sens.d.ts +2709 -0
  56. package/lib/src/raw_types_schemats/sens.js +2 -0
  57. package/lib/src/scripts/debug_dosleg_query.d.ts +6 -0
  58. package/lib/src/scripts/debug_dosleg_query.js +50 -0
  59. package/lib/src/scripts/retrieve_agenda.js +2 -4
  60. package/lib/src/types/agenda.d.ts +45 -0
  61. package/lib/src/types/agenda.js +1 -0
  62. package/lib/src/types/ameli.d.ts +5 -0
  63. package/lib/src/types/ameli.js +1 -0
  64. package/lib/src/types/compte_rendu.d.ts +83 -0
  65. package/lib/src/types/compte_rendu.js +1 -0
  66. package/lib/src/types/debats.d.ts +2 -0
  67. package/lib/src/types/debats.js +1 -0
  68. package/lib/src/types/dosleg.d.ts +70 -0
  69. package/lib/src/types/dosleg.js +1 -0
  70. package/lib/src/types/questions.d.ts +2 -0
  71. package/lib/src/types/questions.js +1 -0
  72. package/lib/src/types/sens.d.ts +8 -0
  73. package/lib/src/types/sens.js +1 -0
  74. package/lib/src/types/sessions.d.ts +6 -0
  75. package/lib/src/types/sessions.js +19 -0
  76. package/lib/src/types/texte.d.ts +72 -0
  77. package/lib/src/types/texte.js +15 -0
  78. package/lib/src/validators/config.d.ts +9 -0
  79. package/lib/src/validators/config.js +10 -0
  80. package/package.json +1 -1
@@ -0,0 +1,91 @@
1
+ import { streamUnsafeQuery } from "../databases_postgres.js";
2
+ function buildFindAllQuestionsQuery() {
3
+ return {
4
+ params: [],
5
+ query: `
6
+ select
7
+ tam_questions.repub::text as republique,
8
+ tam_questions.legislature::text as legislature,
9
+ tam_questions.natquecod as nature,
10
+ naturequestion.natquelib as nature_libelle,
11
+ tam_questions.numero as numero,
12
+ tam_questions.reference as reference,
13
+ tam_questions.titre as titre,
14
+ tam_questions.nom as nom,
15
+ tam_questions.prenom as prenom,
16
+ tam_questions.nomtechnique as nom_technique,
17
+ tam_questions.codequalite as civilite,
18
+ tam_questions.matricule as matricule,
19
+ tam_questions.cirnum::text as circonscription_numero,
20
+ tam_questions.circonscription as circonscription,
21
+ tam_questions.groupe as groupe,
22
+ tam_questions.ratgrp as type_appartenance,
23
+ to_char(tam_questions.datejodepot, 'YYYY-MM-DD') as date_publication_JO,
24
+ tam_questions.pagejodepot::text as page_JO,
25
+ rtrim(tam_questions.mindepotlib) as ministere_depot,
26
+ tam_questions.mindepotid::text as ministere_depot_id,
27
+ to_char(tam_ministeres.datedebut, 'YYYY-MM-DD') as ministere_depot_date_debut,
28
+ to_char(tam_questions.datejotran, 'YYYY-MM-DD') as date_transmission,
29
+ tam_questions.mintranlib as ministere_transmission,
30
+ to_char(tam_questions.datejorep1, 'YYYY-MM-DD') as date_reponse_JO,
31
+ to_char(tam_questions.dateerr, 'YYYY-MM-DD') as date_erratum_JO,
32
+ tam_questions.pageerr::text as page_erratum_JO,
33
+ tam_questions.minreplib1 as ministere_reponse,
34
+ to_char(tam_questions.datecloture, 'YYYY-MM-DD') as date_cloture,
35
+ to_char(tam_questions.datesignal, 'YYYY-MM-DD') as date_signalement,
36
+ tam_questions.refquerappelee as reference_question_rappelee,
37
+ tam_questions.txtque as texte,
38
+ tam_questions.txterrque as texte_erratum,
39
+ tam_questions.rubrique as rubrique,
40
+ sortquestion.sorquelib as sort,
41
+ sortquestion.sorquecod as sort_code,
42
+ sortquestion.sorquenumtri::text as sort_tri,
43
+ etatquestion.etaquelib as etat_libelle,
44
+ etatquestion.etaquecod as etat_code,
45
+ etatquestion.etaquenumtri::text as etat_tri,
46
+ (
47
+ select coalesce(json_agg(theme_rows.libelle order by theme_rows.theme_order nulls last), '[]'::json)
48
+ from (
49
+ select
50
+ the.thelib as libelle,
51
+ the.thenouidt as theme_order
52
+ from regexp_matches(coalesce(tam_questions.themes, ''), '#(\\d+)', 'g') as matches(match)
53
+ join senat.questions_the as the on the.thenouidt::text = matches.match[1]
54
+ order by the.thenouidt asc
55
+ ) as theme_rows
56
+ ) as themes,
57
+ (
58
+ select coalesce(json_agg(response_rows order by response_rows.date_reponse_JO nulls last), '[]'::json)
59
+ from (
60
+ select
61
+ to_char(tam_reponses.datejorep, 'YYYY-MM-DD') as date_reponse_JO,
62
+ tam_reponses.pagejorep::text as page_JO,
63
+ to_char(tam_reponses.errdate, 'YYYY-MM-DD') as date_erratum_JO,
64
+ tam_reponses.minreplib as ministere_reponse,
65
+ tam_reponses.minrepid::text as ministere_reponse_id,
66
+ tam_reponses.urlrep as url,
67
+ tam_reponses.txtrep as texte,
68
+ tam_reponses.txterrrep as texte_erratum
69
+ from senat.questions_tam_reponses as tam_reponses
70
+ where tam_reponses.idque = tam_questions.id
71
+ order by tam_reponses.datejorep asc
72
+ ) as response_rows
73
+ ) as reponses
74
+ from senat.questions_tam_questions as tam_questions
75
+ left join senat.questions_tam_ministeres as tam_ministeres on tam_questions.mindepotid = tam_ministeres.minid
76
+ left join senat.questions_sortquestion as sortquestion on tam_questions.sorquecod::text = sortquestion.sorquecod
77
+ left join senat.questions_naturequestion as naturequestion on tam_questions.natquecod = naturequestion.natquecod
78
+ left join senat.questions_etatquestion as etatquestion on tam_questions.etaquecod::text = etatquestion.etaquecod
79
+ `,
80
+ };
81
+ }
82
+ export async function* findAll() {
83
+ const { query, params } = buildFindAllQuestionsQuery();
84
+ for await (const row of streamUnsafeQuery(query, params)) {
85
+ yield {
86
+ ...row,
87
+ reponses: row.reponses ?? [],
88
+ themes: row.themes ?? [],
89
+ };
90
+ }
91
+ }
@@ -0,0 +1,48 @@
1
+ export interface VoteScrutinRow {
2
+ groupe_politique_code: string | null;
3
+ groupe_politique_id: string | null;
4
+ matricule_delegant: string | null;
5
+ matricule_votant: string;
6
+ position: string | null;
7
+ statut_votant: string | null;
8
+ titre_votant: string | null;
9
+ }
10
+ export interface GroupeVotantRow {
11
+ groupe_politique_code: string | null;
12
+ nombre_abstentions: string;
13
+ nombre_contre: string;
14
+ nombre_non_votants: string;
15
+ nombre_pour: string;
16
+ nombre_votants: string;
17
+ }
18
+ export interface MiseAuPointRow {
19
+ ordre: string | null;
20
+ texte: string;
21
+ url: string | null;
22
+ }
23
+ export interface ScrutinResult {
24
+ date_scrutin: string | null;
25
+ date_scrutin_effective: string | null;
26
+ groupes_votants: GroupeVotantRow[];
27
+ intitule: string | null;
28
+ lecture_id: string | null;
29
+ lecture_libelle: string | null;
30
+ mises_au_point: MiseAuPointRow[];
31
+ nombre_contre: string | null;
32
+ nombre_contre_seance: string | null;
33
+ nombre_majorite: string | null;
34
+ nombre_majorite_seance: string | null;
35
+ nombre_pour: string | null;
36
+ nombre_pour_seance: string | null;
37
+ nombre_suffrages: string | null;
38
+ nombre_suffrages_seance: string | null;
39
+ nombre_votants: string | null;
40
+ nombre_votants_seance: string | null;
41
+ note: string | null;
42
+ numero: string;
43
+ session: string;
44
+ signet_dossier: string | null;
45
+ type_lecture: string | null;
46
+ votes: VoteScrutinRow[];
47
+ }
48
+ export declare function findAllScrutins(fromSession?: number): AsyncGenerator<ScrutinResult, void, unknown>;
@@ -0,0 +1,121 @@
1
+ import { streamUnsafeQuery } from "../databases_postgres.js";
2
+ function buildFindAllScrutinsQuery(fromSession) {
3
+ const params = [];
4
+ const whereSession = fromSession === undefined ? "" : "where scr.sesann >= $1";
5
+ if (fromSession !== undefined) {
6
+ params.push(String(fromSession));
7
+ }
8
+ return {
9
+ params,
10
+ query: `
11
+ select
12
+ scr.sesann::text as session,
13
+ scr.scrnum::text as numero,
14
+ rtrim(date_seance.lecidt) as lecture_id,
15
+ rtrim(lecture.leccom) as lecture_libelle,
16
+ loi.signet as signet_dossier,
17
+ rtrim(typlec.typleclib) as type_lecture,
18
+ to_char(scr.scrdat, 'YYYY-MM-DD') as date_scrutin,
19
+ to_char(scr.scrdateff, 'YYYY-MM-DD') as date_scrutin_effective,
20
+ replace(scr.scrint, E'\\u0092', '''') as intitule,
21
+ scr.scrbaspag as note,
22
+ scr.scrmaj::text as nombre_majorite,
23
+ scr.scrmajsea::text as nombre_majorite_seance,
24
+ scr.scrvot::text as nombre_votants,
25
+ scr.scrvotsea::text as nombre_votants_seance,
26
+ scr.scrsuf::text as nombre_suffrages,
27
+ scr.scrsufsea::text as nombre_suffrages_seance,
28
+ scr.scrcon::text as nombre_contre,
29
+ scr.scrconsea::text as nombre_contre_seance,
30
+ scr.scrpou::text as nombre_pour,
31
+ scr.scrpousea::text as nombre_pour_seance,
32
+ (
33
+ select coalesce(json_agg(vote_rows order by vote_rows.membership_start desc nulls last), '[]'::json)
34
+ from (
35
+ select
36
+ votsen.senmat as matricule_votant,
37
+ votsen.senmatdel as matricule_delegant,
38
+ posvot.posvotlib as position,
39
+ stavot.stavotlib as statut_votant,
40
+ titsen.titsenlib as titre_votant,
41
+ memgrppol.memgrppolid::text as groupe_politique_id,
42
+ memgrppol.grppolcod as groupe_politique_code,
43
+ memgrppol.memgrppoldatdeb as membership_start
44
+ from senat.dosleg_votsen as votsen
45
+ left join senat.dosleg_titsen as titsen on titsen.titsencod = votsen.titsencod
46
+ left join senat.dosleg_stavot as stavot on stavot.stavotidt = votsen.stavotidt
47
+ left join senat.dosleg_posvot as posvot on posvot.posvotcod = votsen.posvotcod
48
+ left join senat.sens_memgrppol as memgrppol
49
+ on memgrppol.senmat = votsen.senmat
50
+ and memgrppol.memgrppoldatdeb <= scr.scrdat
51
+ and (memgrppol.memgrppoldatfin >= scr.scrdat or memgrppol.memgrppoldatfin is null)
52
+ where votsen.scrnum = scr.scrnum
53
+ and votsen.sesann = scr.sesann
54
+ ) as vote_rows
55
+ ) as votes,
56
+ (
57
+ select coalesce(json_agg(group_rows order by group_rows.groupe_politique_code nulls last), '[]'::json)
58
+ from (
59
+ select
60
+ unique_votes.grppolcod as groupe_politique_code,
61
+ sum(case when unique_votes.posvotlib = 'pour' then 1 else 0 end)::text as nombre_pour,
62
+ sum(case when unique_votes.posvotlib = 'contre' then 1 else 0 end)::text as nombre_contre,
63
+ sum(case when unique_votes.posvotlib = 'abstention' then 1 else 0 end)::text as nombre_abstentions,
64
+ sum(case when unique_votes.posvotlib = 'non-votant' then 1 else 0 end)::text as nombre_non_votants,
65
+ count(unique_votes.senmat)::text as nombre_votants
66
+ from (
67
+ select
68
+ votsen.senmat,
69
+ posvot.posvotlib,
70
+ stavot.stavotlib,
71
+ memgrppol.grppolcod,
72
+ row_number() over (
73
+ partition by votsen.senmat, votsen.scrnum, votsen.sesann
74
+ order by memgrppol.memgrppoldatdeb desc nulls last
75
+ ) as vote_rank
76
+ from senat.dosleg_votsen as votsen
77
+ left join senat.dosleg_posvot as posvot on posvot.posvotcod = votsen.posvotcod
78
+ left join senat.dosleg_stavot as stavot on stavot.stavotidt = votsen.stavotidt
79
+ left join senat.sens_memgrppol as memgrppol
80
+ on memgrppol.senmat = votsen.senmat
81
+ and memgrppol.memgrppoldatdeb <= scr.scrdat
82
+ and (memgrppol.memgrppoldatfin >= scr.scrdat or memgrppol.memgrppoldatfin is null)
83
+ where votsen.scrnum = scr.scrnum
84
+ and votsen.sesann = scr.sesann
85
+ ) as unique_votes
86
+ where unique_votes.vote_rank = 1
87
+ group by unique_votes.grppolcod
88
+ ) as group_rows
89
+ ) as groupes_votants,
90
+ (
91
+ select coalesce(json_agg(correction_rows order by correction_rows.ordre nulls last), '[]'::json)
92
+ from (
93
+ select
94
+ corscr.corscrtxt as texte,
95
+ corscr.corscrord::text as ordre,
96
+ corscr.corscrurl as url
97
+ from senat.dosleg_corscr as corscr
98
+ where corscr.scrnum = scr.scrnum
99
+ ) as correction_rows
100
+ ) as mises_au_point
101
+ from senat.dosleg_scr as scr
102
+ left join senat.dosleg_date_seance as date_seance on scr.code = date_seance.code
103
+ left join senat.dosleg_lecass as lecass on date_seance.lecidt = lecass.lecassidt
104
+ left join senat.dosleg_lecture as lecture on lecture.lecidt = lecass.lecidt
105
+ left join senat.dosleg_loi as loi on loi.loicod = lecture.loicod
106
+ left join senat.dosleg_typlec as typlec on typlec.typleccod = lecture.typleccod
107
+ ${whereSession}
108
+ `,
109
+ };
110
+ }
111
+ export async function* findAllScrutins(fromSession) {
112
+ const { query, params } = buildFindAllScrutinsQuery(fromSession);
113
+ for await (const row of streamUnsafeQuery(query, params)) {
114
+ yield {
115
+ ...row,
116
+ groupes_votants: row.groupes_votants ?? [],
117
+ mises_au_point: row.mises_au_point ?? [],
118
+ votes: row.votes ?? [],
119
+ };
120
+ }
121
+ }
@@ -0,0 +1,3 @@
1
+ import { CompteRendu } from "../types/compte_rendu.js";
2
+ export declare function parseCompteRenduIntervalFromFile(xmlFilePath: string, startIndex: number, endIndex: number, agendaEventId: string): Promise<CompteRendu | null>;
3
+ export declare function sessionStartYearFromDate(d: Date): number;
@@ -0,0 +1,267 @@
1
+ import fs from "fs";
2
+ import * as cheerio from "cheerio";
3
+ import { toCRDate } from "./util.js";
4
+ import { makeReunionUid } from "../utils/reunion_parsing.js";
5
+ import { yyyymmddFromPath } from "../utils/date.js";
6
+ import { decodeHtmlEntities, dedupeSpeaker, fixApostrophes, norm } from "../utils/string_cleaning.js";
7
+ export async function parseCompteRenduIntervalFromFile(xmlFilePath, startIndex, endIndex, agendaEventId) {
8
+ try {
9
+ const raw = fs.readFileSync(xmlFilePath, "utf8");
10
+ const $ = cheerio.load(raw, { xml: false });
11
+ const metadonnees = extractMetadonnees($, xmlFilePath);
12
+ const order = $("body *").toArray();
13
+ const idx = new Map(order.map((el, i) => [el, i]));
14
+ const totalNodes = order.length;
15
+ const clampedStart = Math.max(0, Math.min(startIndex, totalNodes - 1));
16
+ const clampedEnd = Math.max(0, Math.min(endIndex, totalNodes - 1));
17
+ const intervals = [
18
+ {
19
+ start: clampedStart,
20
+ end: clampedEnd,
21
+ },
22
+ ];
23
+ metadonnees.sommaire = extractSommaireForIntervals($, idx, intervals);
24
+ const points = [];
25
+ let ordre = 0;
26
+ const addPoint = (p) => points.push({ ...p, ordre_absolu_seance: String(++ordre) });
27
+ // Interventions
28
+ $("div.intervenant").each((_, block) => {
29
+ if (!elementInAnyInterval(block, idx, intervals))
30
+ return;
31
+ const $block = $(block);
32
+ $block
33
+ .find([
34
+ "p[class^='titre_S']",
35
+ "p.mention_titre",
36
+ "p.intitule_titre",
37
+ "p.mention_chapitre",
38
+ "p.intitule_chapitre",
39
+ "p.mention_article",
40
+ "p.intitule_article",
41
+ "p.mention_section",
42
+ "p.intitule_section",
43
+ ].join(","))
44
+ .remove();
45
+ const firstP = $block.find("p").first();
46
+ if (!firstP || firstP.length === 0)
47
+ return;
48
+ const speakerLabelRaw = firstP.find(".orateur_nom").text() || firstP.find("a.lien_senfic").text() || "";
49
+ const speakerLabel = dedupeSpeaker(speakerLabelRaw);
50
+ const { mat, nom: nomCRI, qua: quaCRI } = readIntervenantMeta($block);
51
+ const qualFromSpans = extractAndRemoveLeadingQualite($, $block);
52
+ const qualite = norm(decodeHtmlEntities(quaCRI || "")) || qualFromSpans;
53
+ const canonicalName = dedupeSpeaker(nomCRI || speakerLabel);
54
+ const role = roleForSpeaker(speakerLabel) || roleForSpeaker(qualite) || roleForSpeaker(quaCRI || "");
55
+ const speechHtml = sanitizeInterventionHtml($, $block);
56
+ const speechText = norm(cheerio.load(speechHtml).text() || "");
57
+ if (!speechText)
58
+ return;
59
+ addPoint({
60
+ code_grammaire: "PAROLE_GENERIQUE",
61
+ roledebat: role,
62
+ orateurs: { orateur: { nom: canonicalName, id: mat || "", qualite } },
63
+ texte: { _: speechHtml },
64
+ });
65
+ });
66
+ const contenu = {
67
+ quantiemes: {
68
+ journee: metadonnees.dateSeance,
69
+ session: metadonnees.session,
70
+ },
71
+ point: points,
72
+ };
73
+ const yyyymmdd = yyyymmddFromPath(xmlFilePath);
74
+ const dateISO = `${yyyymmdd.slice(0, 4)}-${yyyymmdd.slice(4, 6)}-${yyyymmdd.slice(6, 8)}`;
75
+ const seanceRef = makeReunionUid(dateISO, "SP", agendaEventId, null);
76
+ return {
77
+ uid: `CRSSN${yyyymmdd}E${agendaEventId}`,
78
+ seanceRef,
79
+ sessionRef: metadonnees.session,
80
+ metadonnees,
81
+ contenu,
82
+ };
83
+ }
84
+ catch (e) {
85
+ console.error(`[CRI] parseInterval error file=${xmlFilePath} interval=[${startIndex}..${endIndex}] event=${agendaEventId}:`, e);
86
+ return null;
87
+ }
88
+ }
89
+ export function sessionStartYearFromDate(d) {
90
+ // Session (1th oct N → 30 sept N+1)
91
+ const m = d.getMonth();
92
+ const y = d.getFullYear();
93
+ return m >= 9 ? y : y - 1;
94
+ }
95
+ function roleForSpeaker(labelOrQualite) {
96
+ const s = (labelOrQualite || "").toLowerCase();
97
+ if (/^(m\.|mme)?\s*(le|la)\s+pr[ée]sident(e)?\b/.test(s) || /\bpr[ée]sident[e]?\s+de\s+séance\b/.test(s))
98
+ return "président";
99
+ return "";
100
+ }
101
+ function readIntervenantMeta($block) {
102
+ const int = $block.find("cri\\:intervenant").first();
103
+ if (int.length)
104
+ return { mat: int.attr("mat") || undefined, nom: int.attr("nom") || undefined, qua: int.attr("qua") || undefined };
105
+ const html = $block.html() || "";
106
+ const m = html.match(/<!--\s*cri:intervenant\b([^>]+)-->/i);
107
+ if (!m)
108
+ return {};
109
+ const out = {};
110
+ const re = /(\w+)="([^"]*)"/g;
111
+ let a;
112
+ while ((a = re.exec(m[1])))
113
+ out[a[1]] = decodeHtmlEntities(a[2]);
114
+ return { mat: out["mat"], nom: out["nom"], qua: out["qua"] };
115
+ }
116
+ function extractAndRemoveLeadingQualite($, $block) {
117
+ const firstP = $block.find("p").first();
118
+ if (firstP.length === 0)
119
+ return "";
120
+ const parts = [];
121
+ let stop = false;
122
+ firstP.contents().each((_, node) => {
123
+ if (stop)
124
+ return;
125
+ if (node.type === "tag") {
126
+ const $node = $(node);
127
+ if ($node.hasClass("orateur_nom")) {
128
+ $node.remove();
129
+ return;
130
+ }
131
+ if ($node.hasClass("orateur_qualite")) {
132
+ parts.push($node.text() || "");
133
+ $node.remove();
134
+ return;
135
+ }
136
+ const t = norm($node.text() || "");
137
+ if (t)
138
+ stop = true;
139
+ else
140
+ $node.remove();
141
+ }
142
+ else if (node.type === "text") {
143
+ const textNode = node;
144
+ const t = norm(textNode.data || "");
145
+ if (!t || /^[:.,;–—-]+$/.test(t)) {
146
+ textNode.data = "";
147
+ return;
148
+ }
149
+ stop = true;
150
+ }
151
+ });
152
+ return fixApostrophes(norm(parts.join(" ")));
153
+ }
154
+ function sanitizeInterventionHtml($, $block) {
155
+ const ps = $block.find("p").toArray();
156
+ const cleaned = ps
157
+ .map((p) => {
158
+ const $p = $(p).clone();
159
+ $p.find(".orateur_nom, .orateur_qualite").remove();
160
+ $p.find("a").each((_, a) => {
161
+ const $a = $(a);
162
+ $a.replaceWith($a.text());
163
+ });
164
+ $p.find(".info_entre_parentheses").each((_, el) => {
165
+ const txt = $(el).text();
166
+ $(el).replaceWith($("<em/>").text(txt));
167
+ });
168
+ $p.find("span").each((_, span) => {
169
+ const $s = $(span);
170
+ if (!$s.text().trim())
171
+ $s.remove();
172
+ });
173
+ const inner = ($p.html() || "").trim();
174
+ if (!inner)
175
+ return null;
176
+ return `<p>${inner}</p>`;
177
+ })
178
+ .filter(Boolean);
179
+ return cleaned.join("<br/>");
180
+ }
181
+ function extractSommaireForIntervals($, idx, intervals) {
182
+ const inIv = (el) => elementInAnyInterval(el, idx, intervals);
183
+ const root = $("body");
184
+ const sommaire = { presidentSeance: { _: "" }, sommaire1: [] };
185
+ // (1) Présidence (tm2) — première ligne dans l’intervalle
186
+ const pres = root
187
+ .find("p.tm2")
188
+ .filter((_, el) => inIv(el))
189
+ .first();
190
+ if (pres.length)
191
+ sommaire.presidentSeance = { _: norm(pres.text()) };
192
+ // (2) Paras tm5 présents dans l’intervalle
193
+ const paras = [];
194
+ root.find("p.tm5").each((_, el) => {
195
+ if (!inIv(el))
196
+ return;
197
+ const t = norm($(el).text());
198
+ if (t)
199
+ paras.push({ _: t });
200
+ });
201
+ if (paras.length)
202
+ sommaire.para = paras.length === 1 ? paras[0] : paras;
203
+ // (3) Items de 1er niveau (tm3) présents dans l’intervalle
204
+ const items = [];
205
+ root.find("p.tm3").each((_, el) => {
206
+ if (!inIv(el))
207
+ return;
208
+ const $p = $(el);
209
+ const full = norm($p.text() || "");
210
+ if (!full)
211
+ return;
212
+ const numMatch = full.match(/^(\d+)\s*[.\-–—]\s*/);
213
+ const valeur = numMatch ? numMatch[1] : undefined;
214
+ // prefere intitule in ancre <a> if present
215
+ const a = $p.find("a").first();
216
+ const intituleRaw = a.length ? a.text() : full.replace(/^(\d+)\s*[.\-–—]\s*/, "");
217
+ const intitule = norm(intituleRaw);
218
+ // id_syceron from href="#Niv1_SOMx"
219
+ const href = (a.attr("href") || "").trim();
220
+ const idSyceron = href.startsWith("#") ? href.slice(1) : href;
221
+ const titreStruct = { id_syceron: idSyceron || "", intitule };
222
+ items.push({ valeur_pts_odj: valeur, titreStruct });
223
+ });
224
+ if (items.length)
225
+ sommaire.sommaire1 = items;
226
+ return sommaire;
227
+ }
228
+ function extractMetadonnees($, filePath) {
229
+ let dateText = norm($("h1, h2, .page-title").first().text() || "");
230
+ if (!dateText)
231
+ dateText = norm($("p").first().text() || "");
232
+ const dateMatch = dateText.match(/\b(\d{1,2}\s+\w+\s+\d{4})\b/i);
233
+ const allText = norm($("body").text() || "");
234
+ const sessionMatch = allText.match(/\bsession\s+(\d{4}-\d{4})\b/i);
235
+ let dateSeance = dateMatch?.[1] || "";
236
+ if (!dateSeance) {
237
+ const m = filePath.match(/d(\d{4})(\d{2})(\d{2})\.xml$/i);
238
+ if (m)
239
+ dateSeance = `${m[1]}-${m[2]}-${m[3]}`;
240
+ }
241
+ dateSeance = toCRDate(dateSeance, null);
242
+ return {
243
+ dateSeance,
244
+ dateSeanceJour: dateSeance,
245
+ numSeanceJour: "",
246
+ numSeance: "",
247
+ typeAssemblee: "SN",
248
+ legislature: "",
249
+ session: sessionMatch?.[1] || "",
250
+ nomFichierJo: "",
251
+ validite: "",
252
+ etat: "",
253
+ diffusion: "",
254
+ version: "1.0",
255
+ environnement: "",
256
+ heureGeneration: new Date(),
257
+ };
258
+ }
259
+ function elementInAnyInterval(el, idx, intervals) {
260
+ const p = idx.get(el);
261
+ if (p == null)
262
+ return false;
263
+ for (const iv of intervals)
264
+ if (p >= iv.start && p < iv.end)
265
+ return true;
266
+ return false;
267
+ }
@@ -0,0 +1,112 @@
1
+ export interface FonctionRow {
2
+ date_debut: string | null;
3
+ date_fin: string | null;
4
+ libelle: string | null;
5
+ }
6
+ export interface MandatSenateurRow {
7
+ code_circonscription: string | null;
8
+ date_debut: string | null;
9
+ date_fin: string | null;
10
+ etat: string | null;
11
+ etat_debut: string | null;
12
+ etat_fin: string | null;
13
+ }
14
+ export interface MandatOrganismeRow {
15
+ code_organisme: string;
16
+ date_debut: string | null;
17
+ date_fin: string | null;
18
+ etat: string | null;
19
+ fonctions: FonctionRow[];
20
+ libelle: string | null;
21
+ type_code_organisme: string | null;
22
+ type_organisme: string | null;
23
+ }
24
+ export interface AdressePointContactRow {
25
+ bureau_distributeur: string | null;
26
+ code_cedex: string | null;
27
+ code_postal: string | null;
28
+ commune: string | null;
29
+ complement: string | null;
30
+ complement2: string | null;
31
+ libelle_cedex: string | null;
32
+ nom_voie: string | null;
33
+ numero_voie: string | null;
34
+ }
35
+ export interface TelephonePointContactRow {
36
+ numero: string | null;
37
+ type: string;
38
+ }
39
+ export interface PointContactRow {
40
+ adresses: AdressePointContactRow[];
41
+ id: string;
42
+ libelle: string | null;
43
+ telephones: TelephonePointContactRow[];
44
+ type: string;
45
+ }
46
+ export interface UrlRow {
47
+ code_url: string;
48
+ url: string;
49
+ }
50
+ export interface SenateurResult {
51
+ PCS_INSEE: string | null;
52
+ PCS_INSEE_24: string | null;
53
+ PCS_INSEE_42: string | null;
54
+ PCS_INSEE_8: string | null;
55
+ circonscription: string | null;
56
+ code_circonscription: string | null;
57
+ code_commission_permanente: string | null;
58
+ code_groupe_politique: string | null;
59
+ commission_permanente: string | null;
60
+ commissions: MandatOrganismeRow[];
61
+ courrier_electronique: string | null;
62
+ date_deces: string | null;
63
+ date_naissance: string | null;
64
+ delegations: MandatOrganismeRow[];
65
+ description_profession: string | null;
66
+ etat: string;
67
+ fonction_bureau_senat: string | null;
68
+ fonctions_bureau: FonctionRow[];
69
+ groupe_politique: string | null;
70
+ groupes: MandatOrganismeRow[];
71
+ mandats_senateur: MandatSenateurRow[];
72
+ matricule: string;
73
+ nom_usuel: string | null;
74
+ points_contact: PointContactRow[];
75
+ prenom_usuel: string;
76
+ qualite: string;
77
+ siege: string | null;
78
+ url_hatvp: string | null;
79
+ urls: UrlRow[];
80
+ }
81
+ export interface CirconscriptionResult {
82
+ article: string | null;
83
+ code: string;
84
+ date_debut: string | null;
85
+ date_fin: string | null;
86
+ etat: string | null;
87
+ identifiant: string;
88
+ libelle_departement: string;
89
+ libelle_region: string;
90
+ nombre_senateurs: string | null;
91
+ url: string | null;
92
+ }
93
+ export interface OrganismeResult {
94
+ code: string;
95
+ date_debut: string | null;
96
+ date_fin: string | null;
97
+ etat: string | null;
98
+ libelle: string | null;
99
+ libelle_court: string | null;
100
+ libelle_long: string | null;
101
+ type_code: string;
102
+ type_libelle: string | null;
103
+ url: string | null;
104
+ }
105
+ export declare function findAll(): AsyncGenerator<SenateurResult, void, unknown>;
106
+ export declare function findAllCirconscriptions(): AsyncGenerator<CirconscriptionResult, void, unknown>;
107
+ export declare function findAllOrganismes(): AsyncGenerator<OrganismeResult, void, unknown>;
108
+ export declare function findActif(): AsyncGenerator<{
109
+ senmat: string;
110
+ sennomuse: string | null;
111
+ senprenomuse: string;
112
+ }, void, unknown>;