@tricoteuses/senat 2.11.0 → 2.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/loaders.d.ts CHANGED
@@ -9,6 +9,7 @@ import { CompteRendu } from "./types/compte_rendu";
9
9
  export { EnabledDatasets } from "./datasets";
10
10
  export declare const AGENDA_FOLDER = "agenda";
11
11
  export declare const COMPTES_RENDUS_FOLDER = "seances";
12
+ export declare const COMMISSION_FOLDER = "commissions";
12
13
  export declare const DOSLEG_DOSSIERS_FOLDER = "dossiers";
13
14
  export declare const SCRUTINS_FOLDER = "scrutins";
14
15
  export declare const RAPPORT_FOLDER = "rap";
@@ -68,6 +69,10 @@ export declare function iterLoadSenatComptesRendusSeances(dataDir: string, sessi
68
69
  compteRendu: CompteRendu;
69
70
  session: number;
70
71
  }>;
72
+ export declare function iterLoadSenatComptesRendusCommissions(dataDir: string, session: number): Generator<{
73
+ compteRendu: CompteRendu;
74
+ session: number;
75
+ }>;
71
76
  export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
72
77
  export declare function iterLoadSenatDossiersLegislatifsRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<RapportMetadata>>;
73
78
  export declare function iterLoadSenatDossiersLegislatifsTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<TexteMetadata>>;
package/lib/loaders.js CHANGED
@@ -6,6 +6,7 @@ import { UNDEFINED_SESSION } from "./types/sessions";
6
6
  export { EnabledDatasets } from "./datasets";
7
7
  export const AGENDA_FOLDER = "agenda";
8
8
  export const COMPTES_RENDUS_FOLDER = "seances";
9
+ export const COMMISSION_FOLDER = "commissions";
9
10
  export const DOSLEG_DOSSIERS_FOLDER = "dossiers";
10
11
  export const SCRUTINS_FOLDER = "scrutins";
11
12
  export const RAPPORT_FOLDER = "rap";
@@ -61,13 +62,12 @@ export function* iterLoadSenatDebats(dataDir, session, options = {}) {
61
62
  yield debatItem;
62
63
  }
63
64
  }
64
- export function* iterLoadSenatComptesRendusSeances(dataDir, session) {
65
- const basePath = path.join(dataDir, COMPTES_RENDUS_FOLDER, DATA_TRANSFORMED_FOLDER, String(session));
66
- if (!fs.existsSync(basePath))
65
+ function* iterLoadSenatComptesRendusGeneric(dataDir, session, subFolder) {
66
+ const basePath = path.join(dataDir, subFolder, DATA_TRANSFORMED_FOLDER, String(session));
67
+ if (!fs.existsSync(basePath)) {
67
68
  return;
68
- const files = (fs.readdirSync(basePath) || [])
69
- .filter(f => f.endsWith(".json"))
70
- .sort();
69
+ }
70
+ const files = (fs.readdirSync(basePath) || []).filter((f) => f.endsWith(".json")).sort();
71
71
  for (const fileName of files) {
72
72
  const filePath = path.join(basePath, fileName);
73
73
  try {
@@ -84,6 +84,12 @@ export function* iterLoadSenatComptesRendusSeances(dataDir, session) {
84
84
  }
85
85
  }
86
86
  }
87
+ export function* iterLoadSenatComptesRendusSeances(dataDir, session) {
88
+ yield* iterLoadSenatComptesRendusGeneric(dataDir, session, COMPTES_RENDUS_FOLDER);
89
+ }
90
+ export function* iterLoadSenatComptesRendusCommissions(dataDir, session) {
91
+ yield* iterLoadSenatComptesRendusGeneric(dataDir, session, COMMISSION_FOLDER);
92
+ }
87
93
  export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}) {
88
94
  for (const dossierLegislatifItem of iterLoadSenatItems(dataDir, datasets.dosleg.database, session, DOSLEG_DOSSIERS_FOLDER, options)) {
89
95
  yield dossierLegislatifItem;
@@ -193,9 +199,7 @@ export function* iterLoadSenatAgendasGrouped(dataDir, session) {
193
199
  const baseDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session ?? ""));
194
200
  if (!fs.existsSync(baseDir))
195
201
  return;
196
- const files = (fs.readdirSync(baseDir) || [])
197
- .filter((f) => f.startsWith("RUSN") && f.endsWith(".json"))
198
- .sort();
202
+ const files = (fs.readdirSync(baseDir) || []).filter((f) => f.startsWith("RUSN") && f.endsWith(".json")).sort();
199
203
  for (const fileName of files) {
200
204
  const filePath = path.join(baseDir, fileName);
201
205
  let raw;
@@ -0,0 +1,5 @@
1
+ import * as cheerio from "cheerio";
2
+ import { CompteRendu } from "../types/compte_rendu";
3
+ import { GroupedReunion } from "../types/agenda";
4
+ export declare function getRemainingTextAfterSpeakerHeader($: cheerio.CheerioAPI, $p: cheerio.Cheerio<any>): string;
5
+ export declare function parseCommissionCRFromFile(htmlFilePath: string, best: GroupedReunion): CompteRendu | null;
@@ -0,0 +1,263 @@
1
+ import * as cheerio from "cheerio";
2
+ import path from "path";
3
+ import fs from "fs";
4
+ import { norm, toCRDate } from "./util";
5
+ const PARA_h3_SEL = "p.sh_justify, p.sh_center, p.sh_marge, p[align], li, h3";
6
+ function findDayRoot($, targetISO) {
7
+ let $root = $();
8
+ $("h2").each((_, el) => {
9
+ const txt = norm($(el).text());
10
+ const m = txt.match(/(?:Lundi|Mardi|Mercredi|Jeudi|Vendredi|Samedi|Dimanche)\s+(.+)$/i);
11
+ const iso = m ? frDateToISO(m[1]) : undefined;
12
+ if (iso === targetISO && $root.length === 0)
13
+ $root = $(el);
14
+ });
15
+ return $root;
16
+ }
17
+ function normalizeSpaces(s) {
18
+ return s.replace(/[\u00A0\u202F\u2009]/g, " ");
19
+ }
20
+ function stripIntroPunct(s) {
21
+ return s.replace(/^[\s]*[.:;]?\s*(?:[–—-]\s*)+/u, "");
22
+ }
23
+ function collectLeadingHeaderStrongEls($, $clone) {
24
+ const els = [];
25
+ const nodes = $clone.contents().toArray();
26
+ for (const node of nodes) {
27
+ if (node.type === "text") {
28
+ if (norm(node.data || ""))
29
+ break;
30
+ continue;
31
+ }
32
+ if (node.type === "tag") {
33
+ const $n = $(node);
34
+ if ($n.is("strong, b")) {
35
+ els.push(node);
36
+ continue;
37
+ }
38
+ if ($n.is("a") && $n.children("strong, b").length) {
39
+ $n.children("strong, b").each((_, el) => {
40
+ els.push($(el));
41
+ });
42
+ continue;
43
+ }
44
+ break;
45
+ }
46
+ }
47
+ return els;
48
+ }
49
+ // Remove orateur's name from text and clean intro punct
50
+ export function getRemainingTextAfterSpeakerHeader($, $p) {
51
+ const $clone = $p.clone();
52
+ // 1) Remove <strong> at start
53
+ const headerStrongEls = collectLeadingHeaderStrongEls($, $clone);
54
+ for (const el of headerStrongEls)
55
+ $(el).remove();
56
+ // 2) normalize + clean intro punct
57
+ let remainingHtml = $clone.html() || "";
58
+ remainingHtml = normalizeSpaces(cheerio.load(remainingHtml).text());
59
+ remainingHtml = stripIntroPunct(remainingHtml);
60
+ const remainingText = norm(remainingHtml || "");
61
+ return remainingText;
62
+ }
63
+ function buildPointsFromParagraphs($, paras) {
64
+ const points = [];
65
+ let ordreAbsoluSeance = 0;
66
+ const normSpeaker = (s) => s
67
+ .normalize("NFKC")
68
+ .replace(/\s+/g, " ")
69
+ .replace(/[:\.]\s*$/, "")
70
+ .trim();
71
+ const normQual = (s) => s
72
+ .normalize("NFKC")
73
+ .replace(/\s+/g, " ")
74
+ .replace(/^\s*,\s*|\s+$/g, "")
75
+ .replace(/[\s\u00A0]*[.,;:–—-]+$/u, "")
76
+ .trim();
77
+ let currentOrateur = null;
78
+ let currentQualite = "";
79
+ let currentTexte = "";
80
+ function isPresidentQual(qual) {
81
+ return /\bprésident(e)?\b/i.test(qual);
82
+ }
83
+ // Flush the buffered speaker’s text into points[] if any.
84
+ function flush() {
85
+ if (!currentOrateur || !currentTexte.trim())
86
+ return;
87
+ ordreAbsoluSeance++;
88
+ points.push({
89
+ code_grammaire: "PAROLE_GENERIQUE",
90
+ roledebat: isPresidentQual(currentQualite) ? "président" : "",
91
+ ordre_absolu_seance: String(ordreAbsoluSeance),
92
+ orateurs: { orateur: { nom: currentOrateur, id: "", qualite: currentQualite || "" } },
93
+ texte: { _: currentTexte.trim() },
94
+ });
95
+ currentOrateur = null;
96
+ currentQualite = "";
97
+ currentTexte = "";
98
+ }
99
+ function addPoint(payload) {
100
+ ordreAbsoluSeance++;
101
+ points.push({ ...payload, ordre_absolu_seance: String(ordreAbsoluSeance) });
102
+ }
103
+ for (const $p of paras) {
104
+ if ($p.closest("table").length)
105
+ continue;
106
+ const tagName = ($p.prop("tagName") || "").toString().toLowerCase();
107
+ const rawText = ($p.text() || "").replace(/\u00a0/g, " ").trim();
108
+ const text = norm(rawText);
109
+ if (!text || text.length <= 3)
110
+ continue;
111
+ const html = ($p.html() || "").trim();
112
+ const italicSpans = $p.find("i, em, span[style*='italic']");
113
+ const firstItalicOuter = italicSpans.length ? $(italicSpans[0]).prop("outerHTML") || "" : "";
114
+ const htmlBeforeFirstItalic = firstItalicOuter ? html.split(firstItalicOuter)[0].trim() : "";
115
+ const isPureItalic = italicSpans.length > 0 && italicSpans.length === $p.find("span,i,em").length && htmlBeforeFirstItalic === "";
116
+ if (tagName === "h3") {
117
+ flush();
118
+ addPoint({
119
+ code_style: "Titre",
120
+ code_grammaire: "TITRE_TEXTE_DISCUSSION",
121
+ texte: { _: text },
122
+ });
123
+ continue;
124
+ }
125
+ const boldSpans = $p.find("strong, b");
126
+ const joinedBold = norm(boldSpans
127
+ .map((_, el) => $(el).text() || "")
128
+ .get()
129
+ .join(""));
130
+ const [namePartRaw, qualPartRaw] = joinedBold.split(/\s*,\s+/, 2);
131
+ const namePart = namePartRaw ? normSpeaker(namePartRaw) : "";
132
+ const qualPart = qualPartRaw ? normQual(qualPartRaw) : "";
133
+ const looksLikeName = namePart.length > 3 && /^(M\.|Mme)[\s\u00A0\u202F]+/i.test(namePart);
134
+ const startsWithName = namePart && text.startsWith(namePart);
135
+ const isNewSpeaker = looksLikeName && startsWithName && namePart !== currentOrateur;
136
+ if (isNewSpeaker) {
137
+ flush();
138
+ currentOrateur = namePart;
139
+ currentQualite = qualPart;
140
+ const remainingText = getRemainingTextAfterSpeakerHeader($, $p);
141
+ currentTexte = remainingText;
142
+ continue;
143
+ }
144
+ if (isPureItalic || (!joinedBold && !currentOrateur && text)) {
145
+ flush();
146
+ addPoint({
147
+ code_style: "Info Italiques",
148
+ code_grammaire: "PAROLE_GENERIQUE",
149
+ texte: { _: "<i>" + text + "</i>" },
150
+ });
151
+ continue;
152
+ }
153
+ // concat text because same orateur
154
+ if (currentOrateur) {
155
+ const removeOrateurFromText = getRemainingTextAfterSpeakerHeader($, $p);
156
+ currentTexte += (currentTexte ? "<br/><br/>" : "") + removeOrateurFromText;
157
+ continue;
158
+ }
159
+ }
160
+ flush();
161
+ return points;
162
+ }
163
+ function frDateToISO(s) {
164
+ if (!s)
165
+ return;
166
+ const months = {
167
+ janvier: 1,
168
+ fevrier: 2,
169
+ février: 2,
170
+ mars: 3,
171
+ avril: 4,
172
+ mai: 5,
173
+ juin: 6,
174
+ juillet: 7,
175
+ aout: 8,
176
+ août: 8,
177
+ septembre: 9,
178
+ octobre: 10,
179
+ novembre: 11,
180
+ decembre: 12,
181
+ décembre: 12,
182
+ };
183
+ const m = norm(s).match(/^(\d{1,2})\s+([A-Za-zéûôîà]+)\s+(\d{4})$/i);
184
+ if (!m)
185
+ return;
186
+ const d = Number(m[1]);
187
+ const mon = months[m[2].toLowerCase()];
188
+ const y = Number(m[3]);
189
+ if (!mon)
190
+ return;
191
+ return `${y}-${String(mon).padStart(2, "0")}-${String(d).padStart(2, "0")}`;
192
+ }
193
+ export function parseCommissionCRFromFile(htmlFilePath, best) {
194
+ try {
195
+ const raw = fs.readFileSync(htmlFilePath, "utf8");
196
+ const $ = cheerio.load(raw, { xmlMode: false });
197
+ const dateISO = best.date;
198
+ const dateSeance = toCRDate(dateISO, best.startTime);
199
+ const $dayRoot = findDayRoot($, dateISO);
200
+ if ($dayRoot.length === 0) {
201
+ console.warn(`[COM-CR][parse] day root not found for ${dateISO} in ${path.basename(htmlFilePath)}`);
202
+ return null;
203
+ }
204
+ let points = [];
205
+ // Take all paragraphs/h3 until next h2
206
+ const dayParas = [];
207
+ let $cursor = $dayRoot.next();
208
+ while ($cursor.length && !$cursor.is("h2")) {
209
+ if ($cursor.is("h3")) {
210
+ dayParas.push($cursor);
211
+ }
212
+ if ($cursor.is(PARA_h3_SEL)) {
213
+ dayParas.push($cursor);
214
+ }
215
+ else {
216
+ const $ps = $cursor.find(PARA_h3_SEL);
217
+ if ($ps.length) {
218
+ $ps.each((_, p) => {
219
+ dayParas.push($(p));
220
+ });
221
+ }
222
+ }
223
+ $cursor = $cursor.next();
224
+ }
225
+ const allDayPoints = buildPointsFromParagraphs($, dayParas);
226
+ if (allDayPoints.length > 0)
227
+ points = allDayPoints;
228
+ const session = dateISO.slice(5, 7) >= "10"
229
+ ? `${dateISO.slice(0, 4)}-${Number(dateISO.slice(0, 4)) + 1}`
230
+ : `${Number(dateISO.slice(0, 4)) - 1}-${dateISO.slice(0, 4)}`;
231
+ const contenu = {
232
+ quantiemes: { journee: dateISO, session },
233
+ point: points,
234
+ };
235
+ const metadonnees = {
236
+ dateSeance: dateSeance,
237
+ dateSeanceJour: dateISO,
238
+ numSeanceJour: "",
239
+ numSeance: "",
240
+ typeAssemblee: "SN",
241
+ legislature: "",
242
+ session,
243
+ nomFichierJo: path.basename(htmlFilePath),
244
+ validite: "non-certifie",
245
+ etat: "definitif",
246
+ diffusion: "publique",
247
+ version: "1",
248
+ environnement: "prod",
249
+ heureGeneration: new Date(),
250
+ };
251
+ return {
252
+ uid: best.uid.replace(/^RUSN/, "CRC"),
253
+ seanceRef: best.uid,
254
+ sessionRef: session,
255
+ metadonnees,
256
+ contenu,
257
+ };
258
+ }
259
+ catch (e) {
260
+ console.error(`[COM-CR][parse] error file=${path.basename(htmlFilePath)}:`, e);
261
+ return null;
262
+ }
263
+ }
@@ -2,9 +2,9 @@ import fs from "fs";
2
2
  import * as cheerio from "cheerio";
3
3
  import path from "path";
4
4
  import { computeIntervalsBySlot } from "../utils/cr_spliting";
5
- import { norm } from "./util";
6
- const asArray = (x) => x == null ? [] : Array.isArray(x) ? x : [x];
7
- const toInt = (s) => Number.isFinite(Number(s)) ? Number(s) : Number.POSITIVE_INFINITY;
5
+ import { norm, toCRDate } from "./util";
6
+ const asArray = (x) => (x == null ? [] : Array.isArray(x) ? x : [x]);
7
+ const toInt = (s) => (Number.isFinite(Number(s)) ? Number(s) : Number.POSITIVE_INFINITY);
8
8
  export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firstSlotOfDay) {
9
9
  try {
10
10
  const raw = fs.readFileSync(xmlFilePath, "utf8");
@@ -13,7 +13,7 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
13
13
  const order = $("body *").toArray();
14
14
  const idx = new Map(order.map((el, i) => [el, i]));
15
15
  const intervalsAll = computeIntervalsBySlot($, idx, firstSlotOfDay);
16
- const intervals = intervalsAll.filter(iv => iv.slot === wantedSlot);
16
+ const intervals = intervalsAll.filter((iv) => iv.slot === wantedSlot);
17
17
  if (intervals.length === 0) {
18
18
  console.warn(`[CRI] no intervals for ${path.basename(xmlFilePath)} [${wantedSlot}]`);
19
19
  return null;
@@ -35,7 +35,8 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
35
35
  if (!elementInAnyInterval(block, idx, intervals))
36
36
  return;
37
37
  const $block = $(block);
38
- $block.find([
38
+ $block
39
+ .find([
39
40
  "p[class^='titre_S']",
40
41
  "p.mention_titre",
41
42
  "p.intitule_titre",
@@ -45,7 +46,8 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
45
46
  "p.intitule_article",
46
47
  "p.mention_section",
47
48
  "p.intitule_section",
48
- ].join(",")).remove();
49
+ ].join(","))
50
+ .remove();
49
51
  const firstP = $block.find("p").first();
50
52
  const speakerLabelRaw = firstP.find(".orateur_nom").text() || firstP.find("a.lien_senfic").text() || "";
51
53
  const speakerLabel = dedupeSpeaker(speakerLabelRaw);
@@ -98,7 +100,7 @@ export function parseYYYYMMDD(yyyymmdd) {
98
100
  }
99
101
  export function deriveTitreObjetFromSommaire(sommaire, slot) {
100
102
  const items = extractLevel1Items(sommaire);
101
- const meaningful = items.filter(it => !isBoilerplate(it.label));
103
+ const meaningful = items.filter((it) => !isBoilerplate(it.label));
102
104
  if (meaningful.length === 0) {
103
105
  return {
104
106
  titre: `Séance publique ${slotLabel(slot)}`,
@@ -106,15 +108,22 @@ export function deriveTitreObjetFromSommaire(sommaire, slot) {
106
108
  };
107
109
  }
108
110
  const titre = meaningful[0].label;
109
- const objet = meaningful.slice(0, 3).map(it => it.label).join(" ; ");
111
+ const objet = meaningful
112
+ .slice(0, 3)
113
+ .map((it) => it.label)
114
+ .join(" ; ");
110
115
  return { titre, objet };
111
116
  }
112
117
  function slotLabel(slot) {
113
118
  switch (slot) {
114
- case "MATIN": return "du matin";
115
- case "APRES-MIDI": return "de l’après-midi";
116
- case "SOIR": return "du soir";
117
- default: return "";
119
+ case "MATIN":
120
+ return "du matin";
121
+ case "APRES-MIDI":
122
+ return "de l’après-midi";
123
+ case "SOIR":
124
+ return "du soir";
125
+ default:
126
+ return "";
118
127
  }
119
128
  }
120
129
  const BOILERPLATE_PATTERNS = [
@@ -132,18 +141,20 @@ const BOILERPLATE_PATTERNS = [
132
141
  /ouverture de la séance/i,
133
142
  /clo(?:t|̂)ure de la séance/i,
134
143
  ];
135
- const isBoilerplate = (label) => !label?.trim() || BOILERPLATE_PATTERNS.some(rx => rx.test(label));
144
+ const isBoilerplate = (label) => !label?.trim() || BOILERPLATE_PATTERNS.some((rx) => rx.test(label));
136
145
  function extractLevel1Items(sommaire) {
137
146
  const level1 = asArray(sommaire?.sommaire1);
138
147
  return level1
139
- .map(el => ({
148
+ .map((el) => ({
140
149
  numero: toInt(el?.valeur_pts_odj),
141
150
  label: String(el?.titreStruct?.intitule ?? "").trim(),
142
151
  }))
143
- .filter(it => !!it.label)
152
+ .filter((it) => !!it.label)
144
153
  .sort((a, b) => a.numero - b.numero);
145
154
  }
146
- function stripTrailingPunct(s) { return s.replace(/\s*([:,.;])\s*$/u, "").trim(); }
155
+ function stripTrailingPunct(s) {
156
+ return s.replace(/\s*([:,.;])\s*$/u, "").trim();
157
+ }
147
158
  function dedupeSpeaker(raw) {
148
159
  let s = norm(raw);
149
160
  s = stripTrailingPunct(s);
@@ -158,7 +169,8 @@ function dedupeSpeaker(raw) {
158
169
  return s.replace(/\.\s*$/, "");
159
170
  }
160
171
  function decodeHtmlEntities(s) {
161
- return s.replace(/&#(\d+);/g, (_, d) => String.fromCharCode(parseInt(d, 10)))
172
+ return s
173
+ .replace(/&#(\d+);/g, (_, d) => String.fromCharCode(parseInt(d, 10)))
162
174
  .replace(/&#x([0-9a-fA-F]+);/g, (_, h) => String.fromCharCode(parseInt(h, 16)));
163
175
  }
164
176
  function fixApostrophes(s) {
@@ -169,7 +181,9 @@ function fixApostrophes(s) {
169
181
  out = out.replace(/\s+([,;:.!?])/g, "$1");
170
182
  return out;
171
183
  }
172
- function normalizeTitle(text) { return text.replace(/^PR[ÉE]SIDENCE DE\b/i, "Présidence de "); }
184
+ function normalizeTitle(text) {
185
+ return text.replace(/^PR[ÉE]SIDENCE DE\b/i, "Présidence de ");
186
+ }
173
187
  function roleForSpeaker(labelOrQualite) {
174
188
  const s = (labelOrQualite || "").toLowerCase();
175
189
  if (/^(m\.|mme)?\s*(le|la)\s+pr[ée]sident(e)?\b/.test(s) || /\bpr[ée]sident[e]?\s+de\s+séance\b/.test(s))
@@ -177,7 +191,7 @@ function roleForSpeaker(labelOrQualite) {
177
191
  return "";
178
192
  }
179
193
  function readIntervenantMeta($block) {
180
- const int = $block.find('cri\\:intervenant').first();
194
+ const int = $block.find("cri\\:intervenant").first();
181
195
  if (int.length)
182
196
  return { mat: int.attr("mat") || undefined, nom: int.attr("nom") || undefined, qua: int.attr("qua") || undefined };
183
197
  const html = $block.html() || "";
@@ -220,6 +234,7 @@ function extractAndRemoveLeadingQualite($, $block) {
220
234
  else if (node.type === "text") {
221
235
  const t = norm(node.data || "");
222
236
  if (!t || /^[:.,;–—-]+$/.test(t)) {
237
+ ;
223
238
  node.data = "";
224
239
  return;
225
240
  }
@@ -230,11 +245,11 @@ function extractAndRemoveLeadingQualite($, $block) {
230
245
  }
231
246
  function sanitizeInterventionHtml($, $block) {
232
247
  const $clone = $block.clone();
233
- $clone.find('a[name]').remove();
248
+ $clone.find("a[name]").remove();
234
249
  $clone.find('div[align="right"]').remove();
235
- $clone.find('a.link').remove();
236
- $clone.find('img').remove();
237
- $clone.find('a#ameli_amendement_cri_phrase, a#ameli_amendement_cra_contenu, a#ameli_amendement_cra_objet').remove();
250
+ $clone.find("a.link").remove();
251
+ $clone.find("img").remove();
252
+ $clone.find("a#ameli_amendement_cri_phrase, a#ameli_amendement_cra_contenu, a#ameli_amendement_cra_objet").remove();
238
253
  $clone.find(".orateur_nom, .orateur_qualite").remove();
239
254
  let html = $clone.html() || "";
240
255
  html = html.replace(/<!--[\s\S]*?-->/g, "");
@@ -244,11 +259,14 @@ function extractSommaireForIntervals($, idx, intervals) {
244
259
  const inIv = (el) => elementInAnyInterval(el, idx, intervals);
245
260
  const root = $("body");
246
261
  const sommaire = { presidentSeance: { _: "" }, sommaire1: [] };
247
- // (1) Présidence (tm2) — première ligne dans l’intervalle
248
- const pres = root.find("p.tm2").filter((_, el) => inIv(el)).first();
262
+ // (1) Présidence (tm2) — première ligne dans l’intervalle
263
+ const pres = root
264
+ .find("p.tm2")
265
+ .filter((_, el) => inIv(el))
266
+ .first();
249
267
  if (pres.length)
250
268
  sommaire.presidentSeance = { _: norm(pres.text()) };
251
- // (2) Paras tm5 présents dans l’intervalle
269
+ // (2) Paras tm5 présents dans l’intervalle
252
270
  const paras = [];
253
271
  root.find("p.tm5").each((_, el) => {
254
272
  if (!inIv(el))
@@ -259,7 +277,7 @@ function extractSommaireForIntervals($, idx, intervals) {
259
277
  });
260
278
  if (paras.length)
261
279
  sommaire.para = paras.length === 1 ? paras[0] : paras;
262
- // (3) Items de 1er niveau (tm3) présents dans l’intervalle
280
+ // (3) Items de 1er niveau (tm3) présents dans l’intervalle
263
281
  const items = [];
264
282
  root.find("p.tm3").each((_, el) => {
265
283
  if (!inIv(el))
@@ -297,6 +315,7 @@ function extractMetadonnees($, filePath) {
297
315
  if (m)
298
316
  dateSeance = `${m[1]}-${m[2]}-${m[3]}`;
299
317
  }
318
+ dateSeance = toCRDate(dateSeance, null);
300
319
  return {
301
320
  dateSeance,
302
321
  dateSeanceJour: dateSeance,
@@ -311,7 +330,7 @@ function extractMetadonnees($, filePath) {
311
330
  diffusion: "",
312
331
  version: "1.0",
313
332
  environnement: "",
314
- heureGeneration: new Date()
333
+ heureGeneration: new Date(),
315
334
  };
316
335
  }
317
336
  function elementInAnyInterval(el, idx, intervals) {
@@ -7,3 +7,4 @@ export declare function replace(expr: Expression<string | null | undefined>, pat
7
7
  export declare function rtrim(expr: Expression<string | null | undefined>): import("kysely").RawBuilder<string>;
8
8
  export declare function toDateString(expr: Expression<Date | null | undefined>, format?: Expression<string>): import("kysely").RawBuilder<string>;
9
9
  export declare function norm(s?: string | null): string;
10
+ export declare function toCRDate(dateISO: string, startTime?: string | null): string;
package/lib/model/util.js CHANGED
@@ -22,5 +22,23 @@ export function toDateString(expr, format = sql.val(STANDARD_DATE_FORMAT)) {
22
22
  return sql `to_char(${expr}, ${format})`;
23
23
  }
24
24
  export function norm(s) {
25
- return (s || "").replace(/\u00A0/g, " ").replace(/\s+/g, " ").trim();
25
+ return (s || "")
26
+ .replace(/\u00A0/g, " ")
27
+ .replace(/\s+/g, " ")
28
+ .trim();
29
+ }
30
+ export function toCRDate(dateISO, startTime) {
31
+ const yyyymmdd = dateISO.replace(/-/g, ""); // "20250716"
32
+ let hh = "00", mm = "00", ss = "00", SSS = "000";
33
+ if (startTime) {
34
+ // accepte "HH:MM:SS", "HH:MM:SS.mmm", "HH:MM:SS.mmm+02:00"
35
+ const m = startTime.match(/(\d{2}):(\d{2}):(\d{2})(?:\.(\d{3}))?/);
36
+ if (m) {
37
+ hh = m[1];
38
+ mm = m[2];
39
+ ss = m[3];
40
+ SSS = m[4] || "000";
41
+ }
42
+ }
43
+ return `${yyyymmdd}${hh}${mm}${ss}${SSS}`;
26
44
  }
@@ -0,0 +1 @@
1
+ export {};