npm - @tricoteuses/senat - Versions diffs - 2.11.0 → 2.11.1 - Mend

@tricoteuses/senat 2.11.0 → 2.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/lib/loaders.d.ts +5 -0
package/lib/loaders.js +14 -9
package/lib/model/commission.d.ts +5 -0
package/lib/model/commission.js +263 -0
package/lib/model/{compte_rendu.js → seance.js} +47 -28
package/lib/model/util.d.ts +1 -0
package/lib/model/util.js +19 -1
package/lib/scripts/retrieve_cr_commission.d.ts +1 -0
package/lib/scripts/retrieve_cr_commission.js +291 -0
package/lib/scripts/{retrieve_comptes_rendus.js → retrieve_cr_seance.js} +1 -1
package/lib/utils/cr_spliting.d.ts +22 -1
package/lib/utils/cr_spliting.js +273 -12
package/lib/utils/reunion_grouping.d.ts +3 -0
package/lib/utils/reunion_grouping.js +1 -1
package/package.json +3 -2
package/lib/raw_types/db.d.ts +0 -11389
package/lib/raw_types/db.js +0 -5
/package/lib/model/{compte_rendu.d.ts → seance.d.ts} +0 -0
/package/lib/scripts/{retrieve_comptes_rendus.d.ts → retrieve_cr_seance.d.ts} +0 -0

package/lib/loaders.d.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import { CompteRendu } from "./types/compte_rendu";
 export { EnabledDatasets } from "./datasets";
 export declare const AGENDA_FOLDER = "agenda";
 export declare const COMPTES_RENDUS_FOLDER = "seances";
+export declare const COMMISSION_FOLDER = "commissions";
 export declare const DOSLEG_DOSSIERS_FOLDER = "dossiers";
 export declare const SCRUTINS_FOLDER = "scrutins";
 export declare const RAPPORT_FOLDER = "rap";
@@ -68,6 +69,10 @@ export declare function iterLoadSenatComptesRendusSeances(dataDir: string, sessi
     compteRendu: CompteRendu;
     session: number;
 }>;
+export declare function iterLoadSenatComptesRendusCommissions(dataDir: string, session: number): Generator<{
+    compteRendu: CompteRendu;
+    session: number;
+}>;
 export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
 export declare function iterLoadSenatDossiersLegislatifsRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<RapportMetadata>>;
 export declare function iterLoadSenatDossiersLegislatifsTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<TexteMetadata>>;

package/lib/loaders.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { UNDEFINED_SESSION } from "./types/sessions";
 export { EnabledDatasets } from "./datasets";
 export const AGENDA_FOLDER = "agenda";
 export const COMPTES_RENDUS_FOLDER = "seances";
+export const COMMISSION_FOLDER = "commissions";
 export const DOSLEG_DOSSIERS_FOLDER = "dossiers";
 export const SCRUTINS_FOLDER = "scrutins";
 export const RAPPORT_FOLDER = "rap";
@@ -61,13 +62,13 @@ export function* iterLoadSenatDebats(dataDir, session, options = {}) {
         yield debatItem;
     }
 }
-export function* iterLoadSenatComptesRendusSeances(dataDir, session) {
-    const basePath = path.join(dataDir, COMPTES_RENDUS_FOLDER, DATA_TRANSFORMED_FOLDER, String(session));
-    if (!fs.existsSync(basePath))
+function* iterLoadSenatComptesRendusGeneric(dataDir, session, subFolder) {
+    const basePath = path.join(dataDir, subFolder, DATA_TRANSFORMED_FOLDER, String(session));
+    if (!fs.existsSync(basePath)) {
+        console.warn(`[SN] Missing basePath → ${basePath}`);
         return;
-    const files = (fs.readdirSync(basePath) || [])
-        .filter(f => f.endsWith(".json"))
-        .sort();
+    }
+    const files = (fs.readdirSync(basePath) || []).filter((f) => f.endsWith(".json")).sort();
     for (const fileName of files) {
         const filePath = path.join(basePath, fileName);
         try {
@@ -84,6 +85,12 @@ export function* iterLoadSenatComptesRendusSeances(dataDir, session) {
         }
     }
 }
+export function* iterLoadSenatComptesRendusSeances(dataDir, session) {
+    yield* iterLoadSenatComptesRendusGeneric(dataDir, session, COMPTES_RENDUS_FOLDER);
+}
+export function* iterLoadSenatComptesRendusCommissions(dataDir, session) {
+    yield* iterLoadSenatComptesRendusGeneric(dataDir, session, COMMISSION_FOLDER);
+}
 export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}) {
     for (const dossierLegislatifItem of iterLoadSenatItems(dataDir, datasets.dosleg.database, session, DOSLEG_DOSSIERS_FOLDER, options)) {
         yield dossierLegislatifItem;
@@ -193,9 +200,7 @@ export function* iterLoadSenatAgendasGrouped(dataDir, session) {
     const baseDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session ?? ""));
     if (!fs.existsSync(baseDir))
         return;
-    const files = (fs.readdirSync(baseDir) || [])
-        .filter((f) => f.startsWith("RUSN") && f.endsWith(".json"))
-        .sort();
+    const files = (fs.readdirSync(baseDir) || []).filter((f) => f.startsWith("RUSN") && f.endsWith(".json")).sort();
     for (const fileName of files) {
         const filePath = path.join(baseDir, fileName);
         let raw;

package/lib/model/commission.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+import * as cheerio from "cheerio";
+import { CompteRendu } from "../types/compte_rendu";
+import { GroupedReunion } from "../types/agenda";
+export declare function getRemainingTextAfterSpeakerHeader($: cheerio.CheerioAPI, $p: cheerio.Cheerio<any>): string;
+export declare function parseCommissionCRFromFile(htmlFilePath: string, best: GroupedReunion): CompteRendu | null;

package/lib/model/commission.js ADDED Viewed

@@ -0,0 +1,263 @@
+import * as cheerio from "cheerio";
+import path from "path";
+import fs from "fs";
+import { norm, toCRDate } from "./util";
+const PARA_h3_SEL = "p.sh_justify, p.sh_center, p.sh_marge, p[align], li, h3";
+function findDayRoot($, targetISO) {
+    let $root = $();
+    $("h2").each((_, el) => {
+        const txt = norm($(el).text());
+        const m = txt.match(/(?:Lundi|Mardi|Mercredi|Jeudi|Vendredi|Samedi|Dimanche)\s+(.+)$/i);
+        const iso = m ? frDateToISO(m[1]) : undefined;
+        if (iso === targetISO && $root.length === 0)
+            $root = $(el);
+    });
+    return $root;
+}
+function normalizeSpaces(s) {
+    return s.replace(/[\u00A0\u202F\u2009]/g, " ");
+}
+function stripIntroPunct(s) {
+    return s.replace(/^[\s]*[.:;]?\s*(?:[–—-]\s*)+/u, "");
+}
+function collectLeadingHeaderStrongEls($, $clone) {
+    const els = [];
+    const nodes = $clone.contents().toArray();
+    for (const node of nodes) {
+        if (node.type === "text") {
+            if (norm(node.data || ""))
+                break;
+            continue;
+        }
+        if (node.type === "tag") {
+            const $n = $(node);
+            if ($n.is("strong, b")) {
+                els.push(node);
+                continue;
+            }
+            if ($n.is("a") && $n.children("strong, b").length) {
+                $n.children("strong, b").each((_, el) => {
+                    els.push($(el));
+                });
+                continue;
+            }
+            break;
+        }
+    }
+    return els;
+}
+// Remove orateur's name from text and clean intro punct
+export function getRemainingTextAfterSpeakerHeader($, $p) {
+    const $clone = $p.clone();
+    // 1) Remove <strong> at start
+    const headerStrongEls = collectLeadingHeaderStrongEls($, $clone);
+    for (const el of headerStrongEls)
+        $(el).remove();
+    // 2) normalize + clean intro punct
+    let remainingHtml = $clone.html() || "";
+    remainingHtml = normalizeSpaces(cheerio.load(remainingHtml).text());
+    remainingHtml = stripIntroPunct(remainingHtml);
+    const remainingText = norm(remainingHtml || "");
+    return remainingText;
+}
+function buildPointsFromParagraphs($, paras) {
+    const points = [];
+    let ordreAbsoluSeance = 0;
+    const normSpeaker = (s) => s
+        .normalize("NFKC")
+        .replace(/\s+/g, " ")
+        .replace(/[:\.]\s*$/, "")
+        .trim();
+    const normQual = (s) => s
+        .normalize("NFKC")
+        .replace(/\s+/g, " ")
+        .replace(/^\s*,\s*|\s+$/g, "")
+        .replace(/[\s\u00A0]*[.,;:–—-]+$/u, "")
+        .trim();
+    let currentOrateur = null;
+    let currentQualite = "";
+    let currentTexte = "";
+    function isPresidentQual(qual) {
+        return /\bprésident(e)?\b/i.test(qual);
+    }
+    // Flush the buffered speaker’s text into points[] if any.
+    function flush() {
+        if (!currentOrateur || !currentTexte.trim())
+            return;
+        ordreAbsoluSeance++;
+        points.push({
+            code_grammaire: "PAROLE_GENERIQUE",
+            roledebat: isPresidentQual(currentQualite) ? "président" : "",
+            ordre_absolu_seance: String(ordreAbsoluSeance),
+            orateurs: { orateur: { nom: currentOrateur, id: "", qualite: currentQualite || "" } },
+            texte: { _: currentTexte.trim() },
+        });
+        currentOrateur = null;
+        currentQualite = "";
+        currentTexte = "";
+    }
+    function addPoint(payload) {
+        ordreAbsoluSeance++;
+        points.push({ ...payload, ordre_absolu_seance: String(ordreAbsoluSeance) });
+    }
+    for (const $p of paras) {
+        if ($p.closest("table").length)
+            continue;
+        const tagName = ($p.prop("tagName") || "").toString().toLowerCase();
+        const rawText = ($p.text() || "").replace(/\u00a0/g, " ").trim();
+        const text = norm(rawText);
+        if (!text || text.length <= 3)
+            continue;
+        const html = ($p.html() || "").trim();
+        const italicSpans = $p.find("i, em, span[style*='italic']");
+        const firstItalicOuter = italicSpans.length ? $(italicSpans[0]).prop("outerHTML") || "" : "";
+        const htmlBeforeFirstItalic = firstItalicOuter ? html.split(firstItalicOuter)[0].trim() : "";
+        const isPureItalic = italicSpans.length > 0 && italicSpans.length === $p.find("span,i,em").length && htmlBeforeFirstItalic === "";
+        if (tagName === "h3") {
+            flush();
+            addPoint({
+                code_style: "Titre",
+                code_grammaire: "TITRE_TEXTE_DISCUSSION",
+                texte: { _: text },
+            });
+            continue;
+        }
+        const boldSpans = $p.find("strong, b");
+        const joinedBold = norm(boldSpans
+            .map((_, el) => $(el).text() || "")
+            .get()
+            .join(""));
+        const [namePartRaw, qualPartRaw] = joinedBold.split(/\s*,\s+/, 2);
+        const namePart = namePartRaw ? normSpeaker(namePartRaw) : "";
+        const qualPart = qualPartRaw ? normQual(qualPartRaw) : "";
+        const looksLikeName = namePart.length > 3 && /^(M\.|Mme)[\s\u00A0\u202F]+/i.test(namePart);
+        const startsWithName = namePart && text.startsWith(namePart);
+        const isNewSpeaker = looksLikeName && startsWithName && namePart !== currentOrateur;
+        if (isNewSpeaker) {
+            flush();
+            currentOrateur = namePart;
+            currentQualite = qualPart;
+            const remainingText = getRemainingTextAfterSpeakerHeader($, $p);
+            currentTexte = remainingText;
+            continue;
+        }
+        if (isPureItalic || (!joinedBold && !currentOrateur && text)) {
+            flush();
+            addPoint({
+                code_style: "Info Italiques",
+                code_grammaire: "PAROLE_GENERIQUE",
+                texte: { _: "<i>" + text + "</i>" },
+            });
+            continue;
+        }
+        // concat text because same orateur
+        if (currentOrateur) {
+            const removeOrateurFromText = getRemainingTextAfterSpeakerHeader($, $p);
+            currentTexte += (currentTexte ? "<br/><br/>" : "") + removeOrateurFromText;
+            continue;
+        }
+    }
+    flush();
+    return points;
+}
+function frDateToISO(s) {
+    if (!s)
+        return;
+    const months = {
+        janvier: 1,
+        fevrier: 2,
+        février: 2,
+        mars: 3,
+        avril: 4,
+        mai: 5,
+        juin: 6,
+        juillet: 7,
+        aout: 8,
+        août: 8,
+        septembre: 9,
+        octobre: 10,
+        novembre: 11,
+        decembre: 12,
+        décembre: 12,
+    };
+    const m = norm(s).match(/^(\d{1,2})\s+([A-Za-zéûôîà]+)\s+(\d{4})$/i);
+    if (!m)
+        return;
+    const d = Number(m[1]);
+    const mon = months[m[2].toLowerCase()];
+    const y = Number(m[3]);
+    if (!mon)
+        return;
+    return `${y}-${String(mon).padStart(2, "0")}-${String(d).padStart(2, "0")}`;
+}
+export function parseCommissionCRFromFile(htmlFilePath, best) {
+    try {
+        const raw = fs.readFileSync(htmlFilePath, "utf8");
+        const $ = cheerio.load(raw, { xmlMode: false });
+        const dateISO = best.date;
+        const dateSeance = toCRDate(dateISO, best.startTime);
+        const $dayRoot = findDayRoot($, dateISO);
+        if ($dayRoot.length === 0) {
+            console.warn(`[COM-CR][parse] day root not found for ${dateISO} in ${path.basename(htmlFilePath)}`);
+            return null;
+        }
+        let points = [];
+        // Take all paragraphs/h3 until next h2
+        const dayParas = [];
+        let $cursor = $dayRoot.next();
+        while ($cursor.length && !$cursor.is("h2")) {
+            if ($cursor.is("h3")) {
+                dayParas.push($cursor);
+            }
+            if ($cursor.is(PARA_h3_SEL)) {
+                dayParas.push($cursor);
+            }
+            else {
+                const $ps = $cursor.find(PARA_h3_SEL);
+                if ($ps.length) {
+                    $ps.each((_, p) => {
+                        dayParas.push($(p));
+                    });
+                }
+            }
+            $cursor = $cursor.next();
+        }
+        const allDayPoints = buildPointsFromParagraphs($, dayParas);
+        if (allDayPoints.length > 0)
+            points = allDayPoints;
+        const session = dateISO.slice(5, 7) >= "10"
+            ? `${dateISO.slice(0, 4)}-${Number(dateISO.slice(0, 4)) + 1}`
+            : `${Number(dateISO.slice(0, 4)) - 1}-${dateISO.slice(0, 4)}`;
+        const contenu = {
+            quantiemes: { journee: dateISO, session },
+            point: points,
+        };
+        const metadonnees = {
+            dateSeance: dateSeance,
+            dateSeanceJour: dateISO,
+            numSeanceJour: "",
+            numSeance: "",
+            typeAssemblee: "SN",
+            legislature: "",
+            session,
+            nomFichierJo: path.basename(htmlFilePath),
+            validite: "non-certifie",
+            etat: "definitif",
+            diffusion: "publique",
+            version: "1",
+            environnement: "prod",
+            heureGeneration: new Date(),
+        };
+        return {
+            uid: best.uid.replace(/^RUSN/, "CRC"),
+            seanceRef: best.uid,
+            sessionRef: session,
+            metadonnees,
+            contenu,
+        };
+    }
+    catch (e) {
+        console.error(`[COM-CR][parse] error file=${path.basename(htmlFilePath)}:`, e);
+        return null;
+    }
+}

package/lib/model/{compte_rendu.js → seance.js} RENAMED Viewed

@@ -2,9 +2,9 @@ import fs from "fs";
 import * as cheerio from "cheerio";
 import path from "path";
 import { computeIntervalsBySlot } from "../utils/cr_spliting";
-import { norm } from "./util";
-const asArray = (x) => x == null ? [] : Array.isArray(x) ? x : [x];
-const toInt = (s) => Number.isFinite(Number(s)) ? Number(s) : Number.POSITIVE_INFINITY;
+import { norm, toCRDate } from "./util";
+const asArray = (x) => (x == null ? [] : Array.isArray(x) ? x : [x]);
+const toInt = (s) => (Number.isFinite(Number(s)) ? Number(s) : Number.POSITIVE_INFINITY);
 export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firstSlotOfDay) {
     try {
         const raw = fs.readFileSync(xmlFilePath, "utf8");
@@ -13,7 +13,7 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
         const order = $("body *").toArray();
         const idx = new Map(order.map((el, i) => [el, i]));
         const intervalsAll = computeIntervalsBySlot($, idx, firstSlotOfDay);
-        const intervals = intervalsAll.filter(iv => iv.slot === wantedSlot);
+        const intervals = intervalsAll.filter((iv) => iv.slot === wantedSlot);
         if (intervals.length === 0) {
             console.warn(`[CRI] no intervals for ${path.basename(xmlFilePath)} [${wantedSlot}]`);
             return null;
@@ -35,7 +35,8 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
             if (!elementInAnyInterval(block, idx, intervals))
                 return;
             const $block = $(block);
-            $block.find([
+            $block
+                .find([
                 "p[class^='titre_S']",
                 "p.mention_titre",
                 "p.intitule_titre",
@@ -45,7 +46,8 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
                 "p.intitule_article",
                 "p.mention_section",
                 "p.intitule_section",
-            ].join(",")).remove();
+            ].join(","))
+                .remove();
             const firstP = $block.find("p").first();
             const speakerLabelRaw = firstP.find(".orateur_nom").text() || firstP.find("a.lien_senfic").text() || "";
             const speakerLabel = dedupeSpeaker(speakerLabelRaw);
@@ -98,7 +100,7 @@ export function parseYYYYMMDD(yyyymmdd) {
 }
 export function deriveTitreObjetFromSommaire(sommaire, slot) {
     const items = extractLevel1Items(sommaire);
-    const meaningful = items.filter(it => !isBoilerplate(it.label));
+    const meaningful = items.filter((it) => !isBoilerplate(it.label));
     if (meaningful.length === 0) {
         return {
             titre: `Séance publique ${slotLabel(slot)}`,
@@ -106,15 +108,22 @@ export function deriveTitreObjetFromSommaire(sommaire, slot) {
         };
     }
     const titre = meaningful[0].label;
-    const objet = meaningful.slice(0, 3).map(it => it.label).join(" ; ");
+    const objet = meaningful
+        .slice(0, 3)
+        .map((it) => it.label)
+        .join(" ; ");
     return { titre, objet };
 }
 function slotLabel(slot) {
     switch (slot) {
-        case "MATIN": return "du matin";
-        case "APRES-MIDI": return "de l’après-midi";
-        case "SOIR": return "du soir";
-        default: return "";
+        case "MATIN":
+            return "du matin";
+        case "APRES-MIDI":
+            return "de l’après-midi";
+        case "SOIR":
+            return "du soir";
+        default:
+            return "";
     }
 }
 const BOILERPLATE_PATTERNS = [
@@ -132,18 +141,20 @@ const BOILERPLATE_PATTERNS = [
     /ouverture de la séance/i,
     /clo(?:t|̂)ure de la séance/i,
 ];
-const isBoilerplate = (label) => !label?.trim() || BOILERPLATE_PATTERNS.some(rx => rx.test(label));
+const isBoilerplate = (label) => !label?.trim() || BOILERPLATE_PATTERNS.some((rx) => rx.test(label));
 function extractLevel1Items(sommaire) {
     const level1 = asArray(sommaire?.sommaire1);
     return level1
-        .map(el => ({
+        .map((el) => ({
         numero: toInt(el?.valeur_pts_odj),
         label: String(el?.titreStruct?.intitule ?? "").trim(),
     }))
-        .filter(it => !!it.label)
+        .filter((it) => !!it.label)
         .sort((a, b) => a.numero - b.numero);
 }
-function stripTrailingPunct(s) { return s.replace(/\s*([:,.;])\s*$/u, "").trim(); }
+function stripTrailingPunct(s) {
+    return s.replace(/\s*([:,.;])\s*$/u, "").trim();
+}
 function dedupeSpeaker(raw) {
     let s = norm(raw);
     s = stripTrailingPunct(s);
@@ -158,7 +169,8 @@ function dedupeSpeaker(raw) {
     return s.replace(/\.\s*$/, "");
 }
 function decodeHtmlEntities(s) {
-    return s.replace(/&#(\d+);/g, (_, d) => String.fromCharCode(parseInt(d, 10)))
+    return s
+        .replace(/&#(\d+);/g, (_, d) => String.fromCharCode(parseInt(d, 10)))
         .replace(/&#x([0-9a-fA-F]+);/g, (_, h) => String.fromCharCode(parseInt(h, 16)));
 }
 function fixApostrophes(s) {
@@ -169,7 +181,9 @@ function fixApostrophes(s) {
     out = out.replace(/\s+([,;:.!?])/g, "$1");
     return out;
 }
-function normalizeTitle(text) { return text.replace(/^PR[ÉE]SIDENCE DE\b/i, "Présidence de "); }
+function normalizeTitle(text) {
+    return text.replace(/^PR[ÉE]SIDENCE DE\b/i, "Présidence de ");
+}
 function roleForSpeaker(labelOrQualite) {
     const s = (labelOrQualite || "").toLowerCase();
     if (/^(m\.|mme)?\s*(le|la)\s+pr[ée]sident(e)?\b/.test(s) || /\bpr[ée]sident[e]?\s+de\s+séance\b/.test(s))
@@ -177,7 +191,7 @@ function roleForSpeaker(labelOrQualite) {
     return "";
 }
 function readIntervenantMeta($block) {
-    const int = $block.find('cri\\:intervenant').first();
+    const int = $block.find("cri\\:intervenant").first();
     if (int.length)
         return { mat: int.attr("mat") || undefined, nom: int.attr("nom") || undefined, qua: int.attr("qua") || undefined };
     const html = $block.html() || "";
@@ -220,6 +234,7 @@ function extractAndRemoveLeadingQualite($, $block) {
         else if (node.type === "text") {
             const t = norm(node.data || "");
             if (!t || /^[:.,;–—-]+$/.test(t)) {
+                ;
                 node.data = "";
                 return;
             }
@@ -230,11 +245,11 @@ function extractAndRemoveLeadingQualite($, $block) {
 }
 function sanitizeInterventionHtml($, $block) {
     const $clone = $block.clone();
-    $clone.find('a[name]').remove();
+    $clone.find("a[name]").remove();
     $clone.find('div[align="right"]').remove();
-    $clone.find('a.link').remove();
-    $clone.find('img').remove();
-    $clone.find('a#ameli_amendement_cri_phrase, a#ameli_amendement_cra_contenu, a#ameli_amendement_cra_objet').remove();
+    $clone.find("a.link").remove();
+    $clone.find("img").remove();
+    $clone.find("a#ameli_amendement_cri_phrase, a#ameli_amendement_cra_contenu, a#ameli_amendement_cra_objet").remove();
     $clone.find(".orateur_nom, .orateur_qualite").remove();
     let html = $clone.html() || "";
     html = html.replace(/<!--[\s\S]*?-->/g, "");
@@ -244,11 +259,14 @@ function extractSommaireForIntervals($, idx, intervals) {
     const inIv = (el) => elementInAnyInterval(el, idx, intervals);
     const root = $("body");
     const sommaire = { presidentSeance: { _: "" }, sommaire1: [] };
-    // (1) Présidence (tm2) — première ligne dans l’intervalle
-    const pres = root.find("p.tm2").filter((_, el) => inIv(el)).first();
+    // (1) Présidence (tm2) — première ligne dans l’intervalle
+    const pres = root
+        .find("p.tm2")
+        .filter((_, el) => inIv(el))
+        .first();
     if (pres.length)
         sommaire.presidentSeance = { _: norm(pres.text()) };
-    // (2) Paras tm5 présents dans l’intervalle
+    // (2) Paras tm5 présents dans l’intervalle
     const paras = [];
     root.find("p.tm5").each((_, el) => {
         if (!inIv(el))
@@ -259,7 +277,7 @@ function extractSommaireForIntervals($, idx, intervals) {
     });
     if (paras.length)
         sommaire.para = paras.length === 1 ? paras[0] : paras;
-    // (3) Items de 1er niveau (tm3) présents dans l’intervalle
+    // (3) Items de 1er niveau (tm3) présents dans l’intervalle
     const items = [];
     root.find("p.tm3").each((_, el) => {
         if (!inIv(el))
@@ -297,6 +315,7 @@ function extractMetadonnees($, filePath) {
         if (m)
             dateSeance = `${m[1]}-${m[2]}-${m[3]}`;
     }
+    dateSeance = toCRDate(dateSeance, null);
     return {
         dateSeance,
         dateSeanceJour: dateSeance,
@@ -311,7 +330,7 @@ function extractMetadonnees($, filePath) {
         diffusion: "",
         version: "1.0",
         environnement: "",
-        heureGeneration: new Date()
+        heureGeneration: new Date(),
     };
 }
 function elementInAnyInterval(el, idx, intervals) {

package/lib/model/util.d.ts CHANGED Viewed

@@ -7,3 +7,4 @@ export declare function replace(expr: Expression<string | null | undefined>, pat
 export declare function rtrim(expr: Expression<string | null | undefined>): import("kysely").RawBuilder<string>;
 export declare function toDateString(expr: Expression<Date | null | undefined>, format?: Expression<string>): import("kysely").RawBuilder<string>;
 export declare function norm(s?: string | null): string;
+export declare function toCRDate(dateISO: string, startTime?: string | null): string;

package/lib/model/util.js CHANGED Viewed

@@ -22,5 +22,23 @@ export function toDateString(expr, format = sql.val(STANDARD_DATE_FORMAT)) {
     return sql `to_char(${expr}, ${format})`;
 }
 export function norm(s) {
-    return (s || "").replace(/\u00A0/g, " ").replace(/\s+/g, " ").trim();
+    return (s || "")
+        .replace(/\u00A0/g, " ")
+        .replace(/\s+/g, " ")
+        .trim();
+}
+export function toCRDate(dateISO, startTime) {
+    const yyyymmdd = dateISO.replace(/-/g, ""); // "20250716"
+    let hh = "00", mm = "00", ss = "00", SSS = "000";
+    if (startTime) {
+        // accepte "HH:MM:SS", "HH:MM:SS.mmm", "HH:MM:SS.mmm+02:00"
+        const m = startTime.match(/(\d{2}):(\d{2}):(\d{2})(?:\.(\d{3}))?/);
+        if (m) {
+            hh = m[1];
+            mm = m[2];
+            ss = m[3];
+            SSS = m[4] || "000";
+        }
+    }
+    return `${yyyymmdd}${hh}${mm}${ss}${SSS}`;
 }

package/lib/scripts/retrieve_cr_commission.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};