@tricoteuses/senat 2.22.13 → 2.22.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/lib/src/loaders.d.ts +2 -8
  2. package/lib/src/loaders.js +7 -25
  3. package/lib/tests/test_iter_load.test.js +17 -0
  4. package/package.json +2 -2
  5. package/lib/config.d.ts +0 -21
  6. package/lib/config.js +0 -27
  7. package/lib/databases.d.ts +0 -2
  8. package/lib/databases.js +0 -26
  9. package/lib/datasets.d.ts +0 -34
  10. package/lib/datasets.js +0 -233
  11. package/lib/git.d.ts +0 -26
  12. package/lib/git.js +0 -167
  13. package/lib/index.d.ts +0 -13
  14. package/lib/index.js +0 -1
  15. package/lib/loaders.d.ts +0 -58
  16. package/lib/loaders.js +0 -286
  17. package/lib/model/agenda.d.ts +0 -6
  18. package/lib/model/agenda.js +0 -148
  19. package/lib/model/ameli.d.ts +0 -51
  20. package/lib/model/ameli.js +0 -147
  21. package/lib/model/commission.d.ts +0 -18
  22. package/lib/model/commission.js +0 -269
  23. package/lib/model/debats.d.ts +0 -67
  24. package/lib/model/debats.js +0 -95
  25. package/lib/model/documents.d.ts +0 -12
  26. package/lib/model/documents.js +0 -138
  27. package/lib/model/dosleg.d.ts +0 -7
  28. package/lib/model/dosleg.js +0 -326
  29. package/lib/model/index.d.ts +0 -7
  30. package/lib/model/index.js +0 -7
  31. package/lib/model/questions.d.ts +0 -45
  32. package/lib/model/questions.js +0 -89
  33. package/lib/model/scrutins.d.ts +0 -13
  34. package/lib/model/scrutins.js +0 -114
  35. package/lib/model/seance.d.ts +0 -3
  36. package/lib/model/seance.js +0 -267
  37. package/lib/model/sens.d.ts +0 -146
  38. package/lib/model/sens.js +0 -454
  39. package/lib/model/texte.d.ts +0 -7
  40. package/lib/model/texte.js +0 -228
  41. package/lib/model/util.d.ts +0 -9
  42. package/lib/model/util.js +0 -38
  43. package/lib/parsers/texte.d.ts +0 -7
  44. package/lib/parsers/texte.js +0 -228
  45. package/lib/raw_types/ameli.d.ts +0 -914
  46. package/lib/raw_types/ameli.js +0 -5
  47. package/lib/raw_types/debats.d.ts +0 -207
  48. package/lib/raw_types/debats.js +0 -5
  49. package/lib/raw_types/dosleg.d.ts +0 -1619
  50. package/lib/raw_types/dosleg.js +0 -5
  51. package/lib/raw_types/questions.d.ts +0 -423
  52. package/lib/raw_types/questions.js +0 -5
  53. package/lib/raw_types/senat.d.ts +0 -11372
  54. package/lib/raw_types/senat.js +0 -5
  55. package/lib/raw_types/sens.d.ts +0 -8248
  56. package/lib/raw_types/sens.js +0 -5
  57. package/lib/raw_types_schemats/ameli.d.ts +0 -539
  58. package/lib/raw_types_schemats/ameli.js +0 -2
  59. package/lib/raw_types_schemats/debats.d.ts +0 -127
  60. package/lib/raw_types_schemats/debats.js +0 -2
  61. package/lib/raw_types_schemats/dosleg.d.ts +0 -977
  62. package/lib/raw_types_schemats/dosleg.js +0 -2
  63. package/lib/raw_types_schemats/questions.d.ts +0 -237
  64. package/lib/raw_types_schemats/questions.js +0 -2
  65. package/lib/raw_types_schemats/sens.d.ts +0 -6915
  66. package/lib/raw_types_schemats/sens.js +0 -2
  67. package/lib/scripts/convert_data.js +0 -354
  68. package/lib/scripts/data-download.d.ts +0 -1
  69. package/lib/scripts/data-download.js +0 -12
  70. package/lib/scripts/datautil.d.ts +0 -8
  71. package/lib/scripts/datautil.js +0 -34
  72. package/lib/scripts/parse_textes.d.ts +0 -1
  73. package/lib/scripts/parse_textes.js +0 -44
  74. package/lib/scripts/retrieve_agenda.d.ts +0 -1
  75. package/lib/scripts/retrieve_agenda.js +0 -132
  76. package/lib/scripts/retrieve_cr_commission.d.ts +0 -1
  77. package/lib/scripts/retrieve_cr_commission.js +0 -364
  78. package/lib/scripts/retrieve_cr_seance.d.ts +0 -6
  79. package/lib/scripts/retrieve_cr_seance.js +0 -347
  80. package/lib/scripts/retrieve_documents.d.ts +0 -3
  81. package/lib/scripts/retrieve_documents.js +0 -219
  82. package/lib/scripts/retrieve_open_data.d.ts +0 -1
  83. package/lib/scripts/retrieve_open_data.js +0 -316
  84. package/lib/scripts/retrieve_senateurs_photos.d.ts +0 -1
  85. package/lib/scripts/retrieve_senateurs_photos.js +0 -147
  86. package/lib/scripts/retrieve_videos.d.ts +0 -1
  87. package/lib/scripts/retrieve_videos.js +0 -461
  88. package/lib/scripts/shared/cli_helpers.d.ts +0 -95
  89. package/lib/scripts/shared/cli_helpers.js +0 -91
  90. package/lib/scripts/shared/util.d.ts +0 -4
  91. package/lib/scripts/shared/util.js +0 -35
  92. package/lib/scripts/test_iter_load.d.ts +0 -1
  93. package/lib/scripts/test_iter_load.js +0 -12
  94. package/lib/src/utils/nvs-timecode.d.ts +0 -17
  95. package/lib/src/utils/nvs-timecode.js +0 -79
  96. package/lib/src/utils/weights_scoring_config.d.ts +0 -2
  97. package/lib/src/utils/weights_scoring_config.js +0 -15
  98. package/lib/strings.d.ts +0 -1
  99. package/lib/strings.js +0 -18
  100. package/lib/types/agenda.d.ts +0 -44
  101. package/lib/types/agenda.js +0 -1
  102. package/lib/types/ameli.d.ts +0 -5
  103. package/lib/types/ameli.js +0 -1
  104. package/lib/types/compte_rendu.d.ts +0 -83
  105. package/lib/types/compte_rendu.js +0 -1
  106. package/lib/types/debats.d.ts +0 -2
  107. package/lib/types/debats.js +0 -1
  108. package/lib/types/dosleg.d.ts +0 -70
  109. package/lib/types/dosleg.js +0 -1
  110. package/lib/types/questions.d.ts +0 -2
  111. package/lib/types/questions.js +0 -1
  112. package/lib/types/sens.d.ts +0 -10
  113. package/lib/types/sens.js +0 -1
  114. package/lib/types/sessions.d.ts +0 -5
  115. package/lib/types/sessions.js +0 -84
  116. package/lib/types/texte.d.ts +0 -74
  117. package/lib/types/texte.js +0 -16
  118. package/lib/utils/cr_spliting.d.ts +0 -28
  119. package/lib/utils/cr_spliting.js +0 -265
  120. package/lib/utils/date.d.ts +0 -10
  121. package/lib/utils/date.js +0 -100
  122. package/lib/utils/nvs-timecode.d.ts +0 -7
  123. package/lib/utils/nvs-timecode.js +0 -79
  124. package/lib/utils/reunion_grouping.d.ts +0 -11
  125. package/lib/utils/reunion_grouping.js +0 -337
  126. package/lib/utils/reunion_odj_building.d.ts +0 -5
  127. package/lib/utils/reunion_odj_building.js +0 -154
  128. package/lib/utils/reunion_parsing.d.ts +0 -23
  129. package/lib/utils/reunion_parsing.js +0 -209
  130. package/lib/utils/scoring.d.ts +0 -14
  131. package/lib/utils/scoring.js +0 -147
  132. package/lib/utils/string_cleaning.d.ts +0 -7
  133. package/lib/utils/string_cleaning.js +0 -57
  134. package/lib/validators/config.d.ts +0 -9
  135. package/lib/validators/config.js +0 -10
  136. /package/lib/{scripts/convert_data.d.ts → tests/test_iter_load.test.d.ts} +0 -0
@@ -1,147 +0,0 @@
1
- import { sql } from "kysely";
2
- import { jsonArrayFrom } from "kysely/helpers/postgres";
3
- import { dbSenat } from "../databases";
4
- import { concat, toDateString } from "./util";
5
- function auteurs(amendementId) {
6
- return jsonArrayFrom(dbSenat
7
- .selectFrom("ameli.amdsen")
8
- .leftJoin("ameli.sen_ameli", "ameli.amdsen.senid", "ameli.sen_ameli.entid")
9
- .leftJoin("ameli.grppol_ameli", "ameli.amdsen.grpid", "ameli.grppol_ameli.entid")
10
- .where("ameli.amdsen.amdid", "=", amendementId)
11
- .select([
12
- "ameli.amdsen.prenomuse as prenom",
13
- "ameli.amdsen.hom as homonyme",
14
- "ameli.amdsen.nomuse as nom",
15
- "ameli.amdsen.qua as qualite",
16
- "ameli.amdsen.rng as rang",
17
- "ameli.sen_ameli.mat as matricule",
18
- "ameli.amdsen.grpid as groupe_politique_id",
19
- "ameli.grppol_ameli.cod as group_politique_code",
20
- "ameli.grppol_ameli.libcou as groupe_politique_libelle_court",
21
- "ameli.grppol_ameli.lilcou as groupe_politique_libelle",
22
- ])
23
- .orderBy("ameli.amdsen.rng", "asc"));
24
- }
25
- function scrutin(amendementNum, sesann, lecassidt) {
26
- return dbSenat
27
- .selectFrom("dosleg.amescr")
28
- .leftJoin("dosleg.scr", (join) => join
29
- .onRef("dosleg.amescr.scrnum", "=", "dosleg.scr.scrnum")
30
- .onRef("dosleg.amescr.sesann", "=", "dosleg.scr.sesann"))
31
- .leftJoin("dosleg.date_seance", "dosleg.scr.code", "dosleg.date_seance.code")
32
- .where("dosleg.amescr.amescrnum", "=", amendementNum)
33
- .where("dosleg.amescr.sesann", "=", sesann)
34
- .where("dosleg.date_seance.lecidt", "=", lecassidt)
35
- .select(["dosleg.amescr.scrnum as scrutin_num"])
36
- .limit(1)
37
- .as("scrutin_num");
38
- }
39
- const findAllAmendementsQuery = dbSenat
40
- .selectFrom("ameli.amd")
41
- .leftJoin("ameli.sub", "ameli.amd.subid", "ameli.sub.id")
42
- .leftJoin("ameli.typsub", "ameli.sub.typid", "ameli.typsub.id")
43
- .leftJoin("ameli.typrect", "ameli.amd.typrectid", "ameli.typrect.id")
44
- .leftJoin("ameli.txt_ameli", "ameli.amd.txtid", "ameli.txt_ameli.id")
45
- .leftJoin("ameli.etatxt", "ameli.txt_ameli.txtetaid", "ameli.etatxt.id")
46
- .leftJoin("ameli.ses", "ameli.txt_ameli.sesdepid", "ameli.ses.id")
47
- .leftJoin("ameli.typses", "ameli.typses.id", "ameli.ses.typid")
48
- .leftJoin("ameli.nat", "ameli.txt_ameli.natid", "ameli.nat.id")
49
- .leftJoin("ameli.lec_ameli", "ameli.txt_ameli.lecid", "ameli.lec_ameli.id")
50
- .leftJoin("dosleg.texte", (join) => join.onRef("ameli.ses.ann", "=", "dosleg.texte.sesann").onRef("ameli.txt_ameli.numabs", "=", "dosleg.texte.texnum"))
51
- .leftJoin("dosleg.lecass", "dosleg.texte.lecassidt", "dosleg.lecass.lecassidt")
52
- .leftJoin("ameli.mot", "ameli.amd.motid", "ameli.mot.id")
53
- .leftJoin("ameli.avicom", "ameli.amd.avcid", "ameli.avicom.id")
54
- .leftJoin("ameli.avigvt", "ameli.amd.avgid", "ameli.avigvt.id")
55
- .leftJoin("ameli.sor", "ameli.amd.sorid", "ameli.sor.id")
56
- .leftJoin("ameli.irr", "ameli.amd.irrid", "ameli.irr.id")
57
- .leftJoin("ameli.grppol_ameli", "ameli.amd.nomentid", "ameli.grppol_ameli.entid")
58
- .leftJoin("ameli.com_ameli", "ameli.amd.nomentid", "ameli.com_ameli.entid")
59
- .leftJoin("ameli.cab", "ameli.amd.nomentid", "ameli.cab.entid")
60
- .select(({ eb, ref, val }) => [
61
- "ameli.ses.ann as session",
62
- "ameli.ses.lil as session_libelle",
63
- "ameli.typses.lib as type_session",
64
- "ameli.txt_ameli.doslegsignet as signet_dossier_legislatif",
65
- "ameli.nat.libcourt as nature_texte",
66
- "ameli.nat.lib as nature_texte_libelle",
67
- "ameli.txt_ameli.numabs as numero_texte",
68
- "ameli.txt_ameli.numado as numero_adoption_texte",
69
- "ameli.txt_ameli.int as intitule_texte",
70
- "ameli.etatxt.lic as etat_texte",
71
- "ameli.etatxt.lib as etat_texte_libelle",
72
- "ameli.etatxt.txttyp as type_texte",
73
- "ameli.lec_ameli.lib as lecture",
74
- eb
75
- .case()
76
- .when("ameli.amd.typ", "=", "A")
77
- .then(val("Amendement"))
78
- .when("ameli.amd.typ", "=", "M")
79
- .then(val("Motion"))
80
- .when("ameli.amd.typ", "=", "S")
81
- .then(val("Sous-amendement"))
82
- .else("")
83
- .end()
84
- .as("nature"),
85
- "ameli.amd.id as id",
86
- "ameli.amd.amdperid as parent_id",
87
- "ameli.amd.ideid as identique_id",
88
- "ameli.amd.discomid as discussion_commune_id",
89
- "ameli.amd.num as numero",
90
- "ameli.amd.numabs as numero_absolu",
91
- "ameli.amd.ord as ordre",
92
- "ameli.amd.accgou as accepte_gouvernement",
93
- "ameli.amd.txtid as texte_id",
94
- "ameli.sub.lib as subdivision_libelle",
95
- "ameli.sub.lic as subdivision_libelle_court",
96
- "ameli.sub.pos as subdivision_position_texte",
97
- "ameli.sub.posder as subdivision_position_discussion",
98
- "ameli.sub.merid as subdivision_mere_id",
99
- "ameli.sub.sig as subdivision_signet",
100
- "ameli.sub.comdelid as subdivision_commission_id",
101
- "ameli.sub.dupl as subdivision_dupliquee",
102
- "ameli.typsub.lib as subdivision_type",
103
- "ameli.amd.alinea as alinea",
104
- "ameli.amd.obs as observations",
105
- "ameli.amd.mot as observations_additionnelles",
106
- toDateString(ref("ameli.amd.datdep")).as("date_depot"),
107
- "ameli.amd.dis as dispositif",
108
- "ameli.amd.obj as objet",
109
- "ameli.typrect.lib as type_rectification",
110
- "ameli.mot.lib as motion_libelle",
111
- eb
112
- .case()
113
- .when("ameli.amd.etaid", "=", 7)
114
- .then(val("Diffusé"))
115
- .when("ameli.amd.etaid", "=", 8)
116
- .then(val("Retiré avant réunion ou séance"))
117
- .when("ameli.amd.etaid", "=", 9)
118
- .then(val("Examiné en commission ou séance"))
119
- .when("ameli.amd.etaid", "=", 10)
120
- .then(val("Irrecevable"))
121
- .when("ameli.amd.etaid", "=", 11)
122
- .then(val("Irrecevable"))
123
- .else("")
124
- .end()
125
- .as("etat"),
126
- "ameli.avicom.lib as avis_commission",
127
- "ameli.avigvt.lib as avis_gouvernement",
128
- eb.fn.coalesce("ameli.sor.lib", "ameli.irr.libirr").as("sort"),
129
- "ameli.amd.rev as revision",
130
- concat(eb
131
- .case()
132
- .when("ameli.amd.num", "like", "%COM%")
133
- .then(val("https://www.senat.fr/amendements/commissions/"))
134
- .else(val("https://www.senat.fr/amendements/"))
135
- .end(), ref("ameli.ses.ann"), val("-"), sql `(ameli.ses.ann + 1)`, val("/"), ref("ameli.txt_ameli.numabs"), val("/Amdt_"), ref("ameli.amd.num"), val(".html")).as("url"),
136
- "ameli.grppol_ameli.lilcou as au_nom_de_groupe_politique",
137
- "ameli.com_ameli.lil as au_nom_de_commission",
138
- eb.case().when("ameli.cab.entid", "is not", null).then(true).else(false).end().as("auteur_est_gouvernement"),
139
- scrutin(ref("ameli.amd.num"), ref("ameli.ses.ann"), ref("dosleg.texte.lecassidt")),
140
- auteurs(ref("ameli.amd.id")).as("auteurs"),
141
- ]);
142
- export function findAllAmendements(fromSession) {
143
- if (fromSession !== undefined) {
144
- return findAllAmendementsQuery.where("ameli.ses.ann", ">=", fromSession).stream();
145
- }
146
- return findAllAmendementsQuery.stream();
147
- }
@@ -1,18 +0,0 @@
1
- import * as cheerio from "cheerio";
2
- import { CompteRendu } from "../types/compte_rendu";
3
- import { Reunion } from "../types/agenda";
4
- export declare function getRemainingTextAfterSpeakerHeader($: cheerio.CheerioAPI, $p: cheerio.Cheerio<any>): string;
5
- export type DaySection = {
6
- title: string;
7
- $start: cheerio.Cheerio<any>;
8
- time?: string;
9
- };
10
- export declare function cleanTitle(t: string): string;
11
- export declare function extractDayH3Sections($: cheerio.CheerioAPI, dateISO: string): DaySection[];
12
- export declare function parseCommissionCRSectionFromDom($: cheerio.CheerioAPI, htmlFilePath: string, opts: {
13
- dateISO: string;
14
- hourShort: string | null;
15
- organe?: string | null;
16
- section: DaySection;
17
- matched?: Reunion;
18
- }): CompteRendu | null;
@@ -1,269 +0,0 @@
1
- import * as cheerio from "cheerio";
2
- import path from "path";
3
- import { makeReunionUid } from "../utils/reunion_parsing";
4
- import { norm } from "../utils/string_cleaning";
5
- import { frDateToISO, hourShortToStartTime } from "../utils/date";
6
- import { toCRDate } from "./util";
7
- const PARA_h3_SEL = "p.sh_justify, p.sh_center, p.sh_marge, p[align], li, h3";
8
- function findDayRoot($, targetISO) {
9
- let $root = $();
10
- $("h2").each((_, el) => {
11
- const txt = norm($(el).text());
12
- const m = txt.match(/(?:Lundi|Mardi|Mercredi|Jeudi|Vendredi|Samedi|Dimanche)\s+(.+)$/i);
13
- const iso = m ? frDateToISO(m[1]) : undefined;
14
- if (iso === targetISO && $root.length === 0)
15
- $root = $(el);
16
- });
17
- return $root;
18
- }
19
- function normalizeSpaces(s) {
20
- return s.replace(/[\u00A0\u202F\u2009]/g, " ");
21
- }
22
- function stripIntroPunct(s) {
23
- return s.replace(/^[\s]*[.:;]?\s*(?:[–—-]\s*)+/u, "");
24
- }
25
- function collectLeadingHeaderStrongEls($, $clone) {
26
- const els = [];
27
- const nodes = $clone.contents().toArray();
28
- for (const node of nodes) {
29
- if (node.type === "text") {
30
- if (norm(node.data || ""))
31
- break;
32
- continue;
33
- }
34
- if (node.type === "tag") {
35
- const $n = $(node);
36
- if ($n.is("strong, b")) {
37
- els.push(node);
38
- continue;
39
- }
40
- if ($n.is("a") && $n.children("strong, b").length) {
41
- $n.children("strong, b").each((_, el) => {
42
- els.push($(el));
43
- });
44
- continue;
45
- }
46
- break;
47
- }
48
- }
49
- return els;
50
- }
51
- // Remove orateur's name from text and clean intro punct
52
- export function getRemainingTextAfterSpeakerHeader($, $p) {
53
- const $clone = $p.clone();
54
- // 1) Remove <strong> at start
55
- const headerStrongEls = collectLeadingHeaderStrongEls($, $clone);
56
- for (const el of headerStrongEls)
57
- $(el).remove();
58
- // 2) normalize + clean intro punct
59
- let remainingHtml = $clone.html() || "";
60
- remainingHtml = normalizeSpaces(cheerio.load(remainingHtml).text());
61
- remainingHtml = stripIntroPunct(remainingHtml);
62
- const remainingText = norm(remainingHtml || "");
63
- return remainingText;
64
- }
65
- function buildPointsFromParagraphs($, paras) {
66
- const points = [];
67
- let ordreAbsoluSeance = 0;
68
- const normSpeaker = (s) => s
69
- .normalize("NFKC")
70
- .replace(/\s+/g, " ")
71
- .replace(/[:\.]\s*$/, "")
72
- .trim();
73
- const normQual = (s) => s
74
- .normalize("NFKC")
75
- .replace(/\s+/g, " ")
76
- .replace(/^\s*,\s*|\s+$/g, "")
77
- .replace(/[\s\u00A0]*[.,;:–—-]+$/u, "")
78
- .trim();
79
- let currentOrateur = null;
80
- let currentQualite = "";
81
- let currentTexte = "";
82
- function isPresidentQual(qual) {
83
- return /\bprésident(e)?\b/i.test(qual);
84
- }
85
- // Flush the buffered speaker’s text into points[] if any.
86
- function flush() {
87
- if (!currentOrateur || !currentTexte.trim())
88
- return;
89
- ordreAbsoluSeance++;
90
- points.push({
91
- code_grammaire: "PAROLE_GENERIQUE",
92
- roledebat: isPresidentQual(currentQualite) ? "président" : "",
93
- ordre_absolu_seance: String(ordreAbsoluSeance),
94
- orateurs: { orateur: { nom: currentOrateur, id: "", qualite: currentQualite || "" } },
95
- texte: { _: currentTexte.trim() },
96
- });
97
- currentOrateur = null;
98
- currentQualite = "";
99
- currentTexte = "";
100
- }
101
- function addPoint(payload) {
102
- ordreAbsoluSeance++;
103
- points.push({ ...payload, ordre_absolu_seance: String(ordreAbsoluSeance) });
104
- }
105
- for (const $p of paras) {
106
- if ($p.closest("table").length)
107
- continue;
108
- const tagName = ($p.prop("tagName") || "").toString().toLowerCase();
109
- const rawText = ($p.text() || "").replace(/\u00a0/g, " ").trim();
110
- const text = norm(rawText);
111
- if (!text || text.length <= 3)
112
- continue;
113
- const html = ($p.html() || "").trim();
114
- const italicSpans = $p.find("i, em, span[style*='italic']");
115
- const firstItalicOuter = italicSpans.length ? $(italicSpans[0]).prop("outerHTML") || "" : "";
116
- const htmlBeforeFirstItalic = firstItalicOuter ? html.split(firstItalicOuter)[0].trim() : "";
117
- const isPureItalic = italicSpans.length > 0 && italicSpans.length === $p.find("span,i,em").length && htmlBeforeFirstItalic === "";
118
- if (tagName === "h3") {
119
- flush();
120
- addPoint({
121
- code_style: "Titre",
122
- code_grammaire: "TITRE_TEXTE_DISCUSSION",
123
- texte: { _: text },
124
- });
125
- continue;
126
- }
127
- const boldSpans = $p.find("strong, b");
128
- const joinedBold = norm(boldSpans
129
- .map((_, el) => $(el).text() || "")
130
- .get()
131
- .join(""));
132
- const [namePartRaw, qualPartRaw] = joinedBold.split(/\s*,\s+/, 2);
133
- const namePart = namePartRaw ? normSpeaker(namePartRaw) : "";
134
- const qualPart = qualPartRaw ? normQual(qualPartRaw) : "";
135
- const looksLikeName = namePart.length > 3 && /^(M\.|Mme)[\s\u00A0\u202F]+/i.test(namePart);
136
- const startsWithName = namePart && text.startsWith(namePart);
137
- const isNewSpeaker = looksLikeName && startsWithName && namePart !== currentOrateur;
138
- if (isNewSpeaker) {
139
- flush();
140
- currentOrateur = namePart;
141
- currentQualite = qualPart;
142
- const remainingText = getRemainingTextAfterSpeakerHeader($, $p);
143
- currentTexte = remainingText;
144
- continue;
145
- }
146
- if (isPureItalic || (!joinedBold && !currentOrateur && text)) {
147
- flush();
148
- addPoint({
149
- code_style: "Info Italiques",
150
- code_grammaire: "PAROLE_GENERIQUE",
151
- texte: { _: "<i>" + text + "</i>" },
152
- });
153
- continue;
154
- }
155
- // concat text because same orateur
156
- if (currentOrateur) {
157
- const removeOrateurFromText = getRemainingTextAfterSpeakerHeader($, $p);
158
- currentTexte += (currentTexte ? "<br/><br/>" : "") + removeOrateurFromText;
159
- continue;
160
- }
161
- }
162
- flush();
163
- return points;
164
- }
165
- const TIME_RE = /(?:\b[àa]\s*)?(\d{1,2})\s*(?:h|heures?)\s*(?:([0-5]\d))?/i;
166
- export function cleanTitle(t) {
167
- return (t || "").replace(/\s+/g, " ").trim();
168
- }
169
- function parseTimeToHHmm(text) {
170
- const m = normalizeSpaces(text).match(TIME_RE);
171
- if (!m)
172
- return undefined;
173
- const hh = m[1]?.padStart(2, "0");
174
- const mm = (m[2] ?? "00").padStart(2, "0");
175
- const h = Number(hh);
176
- if (h >= 0 && h <= 23)
177
- return `${hh}:${mm}`;
178
- return undefined;
179
- }
180
- function findNearbyTime($, $h3) {
181
- let cur = $h3.prev();
182
- for (let i = 0; i < 3 && cur.length; i++, cur = cur.prev()) {
183
- const direct = parseTimeToHHmm(cur.text());
184
- if (direct)
185
- return direct;
186
- const italic = parseTimeToHHmm(cur.find("i, em").first().text());
187
- if (italic)
188
- return italic;
189
- }
190
- return undefined;
191
- }
192
- export function extractDayH3Sections($, dateISO) {
193
- const sections = [];
194
- const $dayRoot = findDayRoot($, dateISO);
195
- if ($dayRoot.length === 0)
196
- return sections;
197
- const $range = $dayRoot.nextUntil("h2");
198
- const $h3s = $range.filter("h3").add($range.find("h3"));
199
- $h3s.each((_, el) => {
200
- const $h3 = $(el);
201
- const title = cleanTitle($h3.text());
202
- if (!title)
203
- return;
204
- const time = findNearbyTime($, $h3);
205
- sections.push({ title, $start: $h3, time });
206
- });
207
- return sections;
208
- }
209
- export function parseCommissionCRSectionFromDom($, htmlFilePath, opts) {
210
- try {
211
- const { dateISO, hourShort, organe, section, matched } = opts;
212
- const seanceRef = matched?.uid ?? makeReunionUid(dateISO, "COM", matched?.events[0].id ?? hourShort ?? "", organe ?? undefined);
213
- const uid = seanceRef.replace(/^RU/, "CRC");
214
- const dateSeance = toCRDate(dateISO, matched?.startTime ?? hourShortToStartTime(hourShort));
215
- const $dayRoot = findDayRoot($, dateISO);
216
- if ($dayRoot.length === 0) {
217
- console.warn(`[COM-CR][parse] day root not found for ${dateISO} in ${path.basename(htmlFilePath)}`);
218
- return null;
219
- }
220
- const paras = [];
221
- let $cursor = section.$start;
222
- // Jump title if we do not want to add it to paragraphes
223
- $cursor = $cursor.next();
224
- while ($cursor.length && !$cursor.is("h2") && !$cursor.is("h3")) {
225
- if ($cursor.is(PARA_h3_SEL)) {
226
- paras.push($cursor);
227
- }
228
- else {
229
- const $ps = $cursor.find(PARA_h3_SEL);
230
- if ($ps.length)
231
- $ps.each((_, p) => {
232
- paras.push($(p));
233
- });
234
- }
235
- $cursor = $cursor.next();
236
- }
237
- const points = buildPointsFromParagraphs($, paras);
238
- if (points.length < 4 || !points.some((pt) => pt.code_grammaire === "PAROLE_GENERIQUE" && pt.orateurs)) {
239
- console.warn(`[COM-CR][parse] Insufficient points or no interventions found for a section in ${path.basename(htmlFilePath)}`);
240
- return null;
241
- }
242
- const session = dateISO.slice(5, 7) >= "10" ? `${dateISO.slice(0, 4)}` : `${Number(dateISO.slice(0, 4)) - 1}`;
243
- const contenu = {
244
- quantiemes: { journee: dateISO, session },
245
- point: points,
246
- };
247
- const metadonnees = {
248
- dateSeance,
249
- dateSeanceJour: dateISO,
250
- numSeanceJour: "",
251
- numSeance: "",
252
- typeAssemblee: "SN",
253
- legislature: "",
254
- session,
255
- nomFichierJo: path.basename(htmlFilePath),
256
- validite: "non-certifie",
257
- etat: "definitif",
258
- diffusion: "publique",
259
- version: "1",
260
- environnement: "prod",
261
- heureGeneration: new Date(),
262
- };
263
- return { uid, seanceRef, sessionRef: session, metadonnees, contenu };
264
- }
265
- catch (e) {
266
- console.error(`[COM-CR][parse] error section file=${path.basename(htmlFilePath)}:`, e);
267
- return null;
268
- }
269
- }
@@ -1,67 +0,0 @@
1
- import { InferResult } from "kysely";
2
- export type DebatResult = InferResult<typeof findAllQuery>[0];
3
- declare const findAllQuery: import("kysely").SelectQueryBuilder<any, "debats.debats", {
4
- [x: string]: any;
5
- id: string;
6
- date_seance: string;
7
- sections: {
8
- [x: string]: any;
9
- interventions: {
10
- [x: string]: any;
11
- auteur: {
12
- code: any;
13
- nom: any;
14
- prenom: any;
15
- matricule: any;
16
- };
17
- }[];
18
- }[];
19
- sections_divers: {
20
- [x: string]: any;
21
- interventions: {
22
- [x: string]: any;
23
- auteur: {
24
- code: any;
25
- nom: any;
26
- prenom: any;
27
- matricule: any;
28
- };
29
- }[];
30
- }[];
31
- lectures: {
32
- id: any;
33
- }[];
34
- }>;
35
- export declare function findAll(fromSession?: number): AsyncIterableIterator<{
36
- [x: string]: any;
37
- id: string;
38
- date_seance: string;
39
- sections: {
40
- [x: string]: any;
41
- interventions: {
42
- [x: string]: any;
43
- auteur: {
44
- code: any;
45
- nom: any;
46
- prenom: any;
47
- matricule: any;
48
- };
49
- }[];
50
- }[];
51
- sections_divers: {
52
- [x: string]: any;
53
- interventions: {
54
- [x: string]: any;
55
- auteur: {
56
- code: any;
57
- nom: any;
58
- prenom: any;
59
- matricule: any;
60
- };
61
- }[];
62
- }[];
63
- lectures: {
64
- id: any;
65
- }[];
66
- }>;
67
- export {};
@@ -1,95 +0,0 @@
1
- import { jsonArrayFrom, jsonBuildObject } from "kysely/helpers/postgres";
2
- import { dbSenat } from "../databases";
3
- import { ID_DATE_FORMAT } from "../scripts/datautil";
4
- import { toDateString } from "./util";
5
- function sectionsLegislatives(dateSeance) {
6
- return jsonArrayFrom(dbSenat
7
- .selectFrom("debats.secdis")
8
- .leftJoin("debats.typsec", "debats.secdis.typseccod", "debats.typsec.typseccod")
9
- .where("debats.secdis.datsea", "=", dateSeance)
10
- .select(({ ref }) => [
11
- "debats.secdis.secdisordid as id",
12
- "debats.secdis.secdisnum as numero",
13
- "debats.secdis.secdisobj as objet",
14
- "debats.secdis.secdisurl as url",
15
- "debats.typsec.typseclib as type",
16
- "debats.typsec.typseccat as categorie",
17
- interventionsLegislatives(ref("debats.secdis.secdiscle")).as("interventions"),
18
- "debats.secdis.lecassidt as lecture_id",
19
- ])
20
- .orderBy("debats.secdis.secdisordid", "asc"));
21
- }
22
- function interventionsLegislatives(sectionId) {
23
- return jsonArrayFrom(dbSenat
24
- .selectFrom("debats.intpjl")
25
- .leftJoin("dosleg.auteur", "debats.intpjl.autcod", "dosleg.auteur.autcod")
26
- .where("debats.intpjl.secdiscle", "=", sectionId)
27
- .select(({ ref, val, fn }) => [
28
- "debats.intpjl.intordid as id",
29
- "debats.intpjl.autcod as auteur_code",
30
- "debats.intpjl.intfon as fonction_intervenant",
31
- "debats.intpjl.inturl as url",
32
- "debats.intpjl.intana as analyse",
33
- jsonBuildObject({
34
- code: ref("dosleg.auteur.autcod"),
35
- nom: ref("dosleg.auteur.nomuse"),
36
- prenom: ref("dosleg.auteur.prenom"),
37
- matricule: ref("dosleg.auteur.autmat"),
38
- }).as("auteur"),
39
- ])
40
- .orderBy("debats.intpjl.intordid", "asc"));
41
- }
42
- function sectionsNonLegislatives(dateSeance) {
43
- return jsonArrayFrom(dbSenat
44
- .selectFrom("debats.secdivers")
45
- .leftJoin("debats.typsec", "debats.secdivers.typseccod", "debats.typsec.typseccod")
46
- .where("debats.secdivers.datsea", "=", dateSeance)
47
- .select(({ ref }) => [
48
- "debats.secdivers.secdiverslibelle as libelle",
49
- "debats.secdivers.secdiversobj as objet",
50
- "debats.typsec.typseclib as type",
51
- "debats.typsec.typseccat as categorie",
52
- interventionsNonLegislatives(ref("debats.secdivers.secdiverscle")).as("interventions"),
53
- ]));
54
- }
55
- function interventionsNonLegislatives(sectionId) {
56
- return jsonArrayFrom(dbSenat
57
- .selectFrom("debats.intdivers")
58
- .leftJoin("dosleg.auteur", "debats.intdivers.autcod", "dosleg.auteur.autcod")
59
- .where("debats.intdivers.intdiverscle", "=", sectionId)
60
- .select(({ ref, val }) => [
61
- "debats.intdivers.intdiversordid as id",
62
- "debats.intdivers.autcod as auteur_code",
63
- "debats.intdivers.intfon as fonction_intervenant",
64
- "debats.intdivers.inturl as url",
65
- "debats.intdivers.intana as analyse",
66
- jsonBuildObject({
67
- code: ref("dosleg.auteur.autcod"),
68
- nom: ref("dosleg.auteur.nomuse"),
69
- prenom: ref("dosleg.auteur.prenom"),
70
- matricule: ref("dosleg.auteur.autmat"),
71
- }).as("auteur"),
72
- ])
73
- .orderBy("debats.intdivers.intdiversordid", "asc"));
74
- }
75
- function lecturesAssemblee(dateSeance) {
76
- return jsonArrayFrom(dbSenat
77
- .selectFrom("debats.lecassdeb")
78
- .where("debats.lecassdeb.datsea", "=", dateSeance)
79
- .select("debats.lecassdeb.lecassidt as id"));
80
- }
81
- const findAllQuery = dbSenat
82
- .selectFrom("debats.debats")
83
- .select(({ ref, val }) => [
84
- toDateString(ref("debats.debats.datsea"), val(ID_DATE_FORMAT)).as("id"),
85
- toDateString(ref("debats.debats.datsea")).as("date_seance"),
86
- "debats.debats.numero as numero",
87
- "debats.debats.deburl as url",
88
- "debats.debats.debsyn as etat_synchronisation",
89
- sectionsLegislatives(ref("debats.debats.datsea")).as("sections"),
90
- sectionsNonLegislatives(ref("debats.debats.datsea")).as("sections_divers"),
91
- lecturesAssemblee(ref("debats.debats.datsea")).as("lectures"),
92
- ]);
93
- export function findAll(fromSession) {
94
- return findAllQuery.stream();
95
- }
@@ -1,12 +0,0 @@
1
- import { Expression, InferResult, SelectQueryBuilder } from "kysely";
2
- export declare function rapports(lectureAssembleeId: Expression<string>): import("kysely").RawBuilder<{
3
- [x: string]: any;
4
- }[]>;
5
- declare const queryTextes: SelectQueryBuilder<any, any, any>;
6
- export declare function textes(lectureAssembleeId: Expression<string>): import("kysely").RawBuilder<{
7
- [x: string]: any;
8
- }[]>;
9
- export declare function findAllTextes(): AsyncIterableIterator<DocumentResult>;
10
- export declare function findAllRapports(): AsyncIterableIterator<DocumentResult>;
11
- export type DocumentResult = InferResult<typeof queryTextes>[0];
12
- export {};