npm - @tricoteuses/senat - Versions diffs - 2.11.3 → 2.11.5 - Mend

@tricoteuses/senat 2.11.3 → 2.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +7 -3
package/lib/model/commission.d.ts +5 -1
package/lib/model/commission.js +25 -14
package/lib/model/dosleg.d.ts +2 -0
package/lib/model/dosleg.js +60 -0
package/lib/scripts/convert_data.js +22 -1
package/lib/scripts/retrieve_agenda.js +14 -4
package/lib/scripts/retrieve_cr_commission.js +30 -19
package/lib/scripts/retrieve_cr_seance.js +21 -27
package/lib/utils/cr_spliting.d.ts +12 -4
package/lib/utils/cr_spliting.js +44 -60
package/lib/utils/reunion_grouping.d.ts +1 -1
package/lib/utils/reunion_grouping.js +73 -39
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Tricoteuses-Senat
-## _Retrieve, clean up & handle  French Sénat's open data_
+## _Retrieve, clean up & handle French Sénat's open data_
 ## Requirements
@@ -42,6 +42,7 @@ npm run data:download ../senat-data -- [--categories All]
 ```
 Data from other sources is also available :
 ```bash
 # Retrieval of textes and rapports from Sénat's website
 # Available options for optional `formats` parameter : xml, html, pdf
@@ -57,8 +58,11 @@ npm run data:parse_textes_lois ../senat-data
 # Retrieval (& parsing) of agenda from Sénat's website
 npm run data:retrieve_agenda ../senat-data -- --fromSession 2022 [--parseAgenda]
-# Retrieval (& parsing) of comptes-rendus des débats from Sénat's website
-npm run data:retrieve_comptes_rendus ../senat-data -- [--parseDebats]
+# Retrieval (& parsing) of comptes-rendus de séance from Sénat's data
+npm run data:retrieve_cr_seance ../senat-data -- [--parseDebats]
+# Retrieval (& parsing) of comptes-rendus de commissions from Sénat's website
+npm run data:retrieve_cr_commission ../senat-data -- [--parseDebats]
 # Retrieval of sénateurs' pictures from Sénat's website
 npm run data:retrieve_senateurs_photos ../senat-data

package/lib/model/commission.d.ts CHANGED Viewed

@@ -2,4 +2,8 @@ import * as cheerio from "cheerio";
 import { CompteRendu } from "../types/compte_rendu";
 import { GroupedReunion } from "../types/agenda";
 export declare function getRemainingTextAfterSpeakerHeader($: cheerio.CheerioAPI, $p: cheerio.Cheerio<any>): string;
-export declare function parseCommissionCRFromFile(htmlFilePath: string, best: GroupedReunion): CompteRendu | null;
+export declare function parseCommissionCRFromFile(htmlFilePath: string, best?: GroupedReunion, fallback?: {
+    dateISO: string;
+    hourShort: string | null;
+    organe?: string | null;
+}): CompteRendu | null;

package/lib/model/commission.js CHANGED Viewed

@@ -2,6 +2,8 @@ import * as cheerio from "cheerio";
 import path from "path";
 import fs from "fs";
 import { norm, toCRDate } from "./util";
+import { makeTypeGroupUid } from "../utils/reunion_grouping";
+import { hourShortToStartTime } from "../utils/cr_spliting";
 const PARA_h3_SEL = "p.sh_justify, p.sh_center, p.sh_marge, p[align], li, h3";
 function findDayRoot($, targetISO) {
     let $root = $();
@@ -190,41 +192,50 @@ function frDateToISO(s) {
         return;
     return `${y}-${String(mon).padStart(2, "0")}-${String(d).padStart(2, "0")}`;
 }
-export function parseCommissionCRFromFile(htmlFilePath, best) {
+export function parseCommissionCRFromFile(htmlFilePath, best, fallback) {
     try {
+        if (!best && !fallback) {
+            console.warn(`[COM-CR][parse] missing both 'best' and 'fallback' for ${path.basename(htmlFilePath)}`);
+            return null;
+        }
         const raw = fs.readFileSync(htmlFilePath, "utf8");
         const $ = cheerio.load(raw, { xmlMode: false });
-        const dateISO = best.date;
-        const dateSeance = toCRDate(dateISO, best.startTime);
+        // --- champs déterminés depuis best OU fallback (aucun fallback via filename) ---
+        const dateISO = best?.date ?? fallback.dateISO;
+        const startTime = best?.startTime ?? hourShortToStartTime(fallback.hourShort);
+        const organe = best?.organe ?? fallback?.organe ?? undefined;
+        // UIDs alignés sur makeTypeGroupUid (RUSN…) mais CR = RUSN → CRC
+        const seanceRef = best?.uid ?? makeTypeGroupUid(dateISO, "COM", fallback.hourShort ?? "NA", organe);
+        const uid = seanceRef.replace(/^RU/, "CRC");
+        const dateSeance = toCRDate(dateISO, startTime);
+        // --- scope du jour ---
         const $dayRoot = findDayRoot($, dateISO);
         if ($dayRoot.length === 0) {
             console.warn(`[COM-CR][parse] day root not found for ${dateISO} in ${path.basename(htmlFilePath)}`);
             return null;
         }
-        let points = [];
-        // Take all paragraphs/h3 until next h2
+        // --- collecte des paragraphes/h3 jusqu’au prochain h2 ---
         const dayParas = [];
         let $cursor = $dayRoot.next();
         while ($cursor.length && !$cursor.is("h2")) {
-            if ($cursor.is("h3")) {
+            if ($cursor.is("h3"))
                 dayParas.push($cursor);
-            }
             if ($cursor.is(PARA_h3_SEL)) {
                 dayParas.push($cursor);
             }
             else {
                 const $ps = $cursor.find(PARA_h3_SEL);
-                if ($ps.length) {
+                if ($ps.length)
                     $ps.each((_, p) => {
                         dayParas.push($(p));
                     });
-                }
             }
             $cursor = $cursor.next();
         }
+        // --- points ---
         const allDayPoints = buildPointsFromParagraphs($, dayParas);
-        if (allDayPoints.length > 0)
-            points = allDayPoints;
+        const points = allDayPoints.length > 0 ? allDayPoints : [];
+        // --- session ---
         const session = dateISO.slice(5, 7) >= "10"
             ? `${dateISO.slice(0, 4)}-${Number(dateISO.slice(0, 4)) + 1}`
             : `${Number(dateISO.slice(0, 4)) - 1}-${dateISO.slice(0, 4)}`;
@@ -233,7 +244,7 @@ export function parseCommissionCRFromFile(htmlFilePath, best) {
             point: points,
         };
         const metadonnees = {
-            dateSeance: dateSeance,
+            dateSeance,
             dateSeanceJour: dateISO,
             numSeanceJour: "",
             numSeance: "",
@@ -249,8 +260,8 @@ export function parseCommissionCRFromFile(htmlFilePath, best) {
             heureGeneration: new Date(),
         };
         return {
-            uid: best.uid.replace(/^RUSN/, "CRC"),
-            seanceRef: best.uid,
+            uid, // ex: CRC20240117IDC…-HHMM
+            seanceRef, // ex: RUSN20240117IDC…-HHMM
             sessionRef: session,
             metadonnees,
             contenu,

package/lib/model/dosleg.d.ts CHANGED Viewed

@@ -13,6 +13,8 @@ export declare function findAuteurs(): Promise<{
     prenom: any;
     matricule: any;
 }[]>;
+export declare function getCodeActeLecture(codeNatureDossier: string, typeLecture: string, assemblee: string): string | null;
+export declare function getCodeActeTexte(codeParent: string | null, texteOrigine: string): string | null;
 export type DossierLegislatifResult = InferResult<typeof findAllDossiersQuery>[0];
 export type AuteurResult = InferResult<typeof findAuteursQuery>[0];
 export {};

package/lib/model/dosleg.js CHANGED Viewed

@@ -235,3 +235,63 @@ export async function findAuteurs() {
     return findAuteursQuery
         .execute();
 }
+export function getCodeActeLecture(codeNatureDossier, typeLecture, assemblee) {
+    const codeAssemblee = assemblee === "Sénat" ? "SN" : assemblee === "Assemblée nationale" ? "AN" : null;
+    if (typeLecture === "Commission mixte paritaire") {
+        return "CMP";
+    }
+    if (!codeAssemblee) {
+        return null;
+    }
+    if (["ppl", "pjl", "cvn"].includes(codeNatureDossier) && typeLecture === "Première lecture") {
+        return `${codeAssemblee}1`;
+    }
+    if (["ppl", "pjl", "cvn"].includes(codeNatureDossier) && typeLecture === "Deuxième lecture") {
+        return `${codeAssemblee}2`;
+    }
+    if (["ppl", "pjl", "cvn"].includes(codeNatureDossier) && typeLecture === "Troisième lecture") {
+        return `${codeAssemblee}3`;
+    }
+    if (["ppl", "pjl", "cvn"].includes(codeNatureDossier) && typeLecture === "Quatrième lecture") {
+        return `${codeAssemblee}4`;
+    }
+    if (["ppl", "pjl", "cvn"].includes(codeNatureDossier) && typeLecture === "Nouvelle lecture") {
+        return `${codeAssemblee}NLEC`;
+    }
+    if (["ppl", "pjl", "cvn"].includes(codeNatureDossier) && typeLecture === "Lecture définitive") {
+        return `${codeAssemblee}LDEF`;
+    }
+    if (["ppr"].includes(codeNatureDossier) && typeLecture === "Première lecture") {
+        return `${codeAssemblee}LUNI`;
+    }
+    return null;
+}
+export function getCodeActeTexte(codeParent, texteOrigine) {
+    if (codeParent === "CMP") {
+        if (texteOrigine === "adopté par l'Assemblée Nationale") {
+            return "CMP-DEBATS-SN";
+        }
+        else if (texteOrigine === "adopté définitivement par le Sénat") {
+            return "PROM";
+        }
+    }
+    if (texteOrigine === "transmis au Sénat" || texteOrigine === "déposé au Sénat") {
+        return `${codeParent}-DEPOT`;
+    }
+    // Rajouter une étape similaire -COM-FOND
+    if (texteOrigine === "de la commission" || texteOrigine === "de la commission (AN)" || texteOrigine === "résultat des travaux de la commission") {
+        return `${codeParent}-DEBATS-SEANCE`;
+    }
+    if (texteOrigine === "déposé à l'Assemblée Nationale") {
+        return `${codeParent}-DEPOT`;
+    }
+    if (texteOrigine === "retiré par l'auteur") {
+        return `${codeParent}-RTRINI`;
+    }
+    if (texteOrigine === "adopté par le Sénat" || texteOrigine === "adopté par l'Assemblée Nationale" || texteOrigine === "rejeté par le Sénat") {
+        return `${codeParent}-DEC`;
+    }
+    // Rajouter une étape CC-SAISIE
+    // Rajouter une étape PROM-
+    return null;
+}

package/lib/scripts/convert_data.js CHANGED Viewed

@@ -5,6 +5,7 @@ import path from "path";
 import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
 import { DATA_ORIGINAL_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
 import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findAuteurs, findSenatRapportUrls, findSenatTexteUrls, } from "../model";
+import { getCodeActeLecture, getCodeActeTexte } from "../model/dosleg";
 import { UNDEFINED_SESSION } from "../types/sessions";
 import { getSessionFromDate, getSessionFromSignet } from "./datautil";
 import { commonOptions } from "./shared/cli_helpers";
@@ -120,8 +121,28 @@ async function convertDatasetDosLeg(dataDir, options) {
         }
         loiReorganizedDir = path.join(dossiersReorganizedDir, String(session));
         fs.ensureDirSync(loiReorganizedDir);
+        // Ajout de code_acte à chaque lecture du dossier
+        const lecturesWithCodeActe = (loi["lectures"] || []).map((lecture) => {
+            const lecturesAssemblee = (lecture["lectures_assemblee"] || []).map((lectureAss) => {
+                const codeParent = getCodeActeLecture(loi["code_nature_dossier"], lecture["type_lecture"], lectureAss["assemblee"]);
+                const textesWithCodeActe = (lectureAss["textes"] || []).map((texte) => ({
+                    ...texte,
+                    code_acte: getCodeActeTexte(codeParent, texte["origine"])
+                }));
+                return {
+                    ...lectureAss,
+                    code_acte: codeParent,
+                    textes: textesWithCodeActe
+                };
+            });
+            return {
+                ...lecture,
+                lectures_assemblee: lecturesAssemblee
+            };
+        });
+        const loiWithCodeActe = { ...loi, lectures: lecturesWithCodeActe };
         const scrutinFileName = `${loi["signet"]}.json`;
-        fs.writeJSONSync(path.join(loiReorganizedDir, scrutinFileName), loi, {
+        fs.writeJSONSync(path.join(loiReorganizedDir, scrutinFileName), loiWithCodeActe, {
             spaces: 2,
         });
     }

package/lib/scripts/retrieve_agenda.js CHANGED Viewed

@@ -96,12 +96,22 @@ async function parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPa
         return;
     const flatPath = path.join(transformedAgendaSessionDir, `${agendaFileName}.json`);
     fs.writeJSONSync(flatPath, parsedAgendaEvents, { spaces: 2 });
-    // 1) SP → groubed by (date, slot)
+    // 1) SP → grouped by (date, slot)
     const spGrouped = groupSeancePubliqueBySlot(parsedAgendaEvents);
-    if (spGrouped.length > 0) {
-        writeGroupsAsFiles(transformedAgendaSessionDir, spGrouped);
+    // a) on a un Record<TimeSlot, GroupedReunion[]>, on le transforme en array
+    const spGroups = Object.values(spGrouped).flat();
+    // b) (reco) trier pour stabilité, comme pour les NON-SP
+    const PARIS = "Europe/Paris";
+    spGroups.sort((a, b) => {
+        const da = DateTime.fromISO(`${a.date}T${a.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
+        const db = DateTime.fromISO(`${b.date}T${b.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
+        // en cas d’égalité, ordre par slot pour stabilité
+        return da - db || (a.slot || "UNKNOWN").localeCompare(b.slot || "UNKNOWN");
+    });
+    if (spGroups.length > 0) {
+        writeGroupsAsFiles(transformedAgendaSessionDir, spGroups);
     }
-    // 2) NON-SP → groubed by (date, organe, hour)
+    // 2) NON-SP → grouped by (date, organe, hour)
     const groupedBySuffix = groupNonSPByTypeOrganeHour(parsedAgendaEvents);
     for (const suffix of ["IDC", "IDM", "IDO", "IDI"]) {
         const groups = groupedBySuffix[suffix] || [];

package/lib/scripts/retrieve_cr_commission.js CHANGED Viewed

@@ -3,7 +3,7 @@ import assert from "assert";
 import path from "path";
 import * as cheerio from "cheerio";
 import { COMMISSION_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
-import { createCommissionGroupIfMissing, loadCommissionAgendaForDate, parseCommissionMetadataFromHtml, } from "../utils/cr_spliting";
+import { loadCommissionAgendaForDate, parseCommissionMetadataFromHtml, linkCRtoCommissionGroup, } from "../utils/cr_spliting";
 import { parseCommissionCRFromFile } from "../model/commission";
 import commandLineArgs from "command-line-args";
 import { commonOptions } from "./shared/cli_helpers";
@@ -245,29 +245,40 @@ async function retrieveCommissionCRs(options = {}) {
                         deltaMin = candidates[0].d;
                     }
                 }
-                if (best) {
-                    const cr = parseCommissionCRFromFile(htmlPath, best);
-                    if (!cr) {
-                        console.warn(`[COM-CR][TRANSFORM] parse failed for ${f} → ${best.uid}`);
-                    }
-                    else {
-                        const fileUid = cr.uid;
-                        const outPath = path.join(transformedSessionDir, `${fileUid}.json`);
-                        await fs.writeJSON(outPath, cr, { spaces: 2 });
-                        const npts = Array.isArray(cr.contenu.point) ? cr.contenu.point.length : cr.contenu.point ? 1 : 0;
-                        if (!options["silent"]) {
-                            console.log(`[COM-CR][TRANSFORM] saved ${path.basename(outPath)} (points=${npts})`);
-                        }
-                    }
+                // Parse CR (avec ou sans best)
+                const hourShort = toHourShort(day.openTime) ?? "NA";
+                const cr = parseCommissionCRFromFile(htmlPath, best ?? undefined, {
+                    dateISO: day.date,
+                    hourShort,
+                    organe: meta.organeDetected ?? null,
+                });
+                if (!cr) {
+                    console.warn(`[COM-CR][TRANSFORM] parse failed for ${f} → ${best ? best.uid : "NO-GROUP"}`);
                 }
                 else {
+                    const fileUid = cr.uid;
+                    const outPath = path.join(transformedSessionDir, `${fileUid}.json`);
+                    await fs.writeJSON(outPath, cr, { spaces: 2 });
+                    const npts = Array.isArray(cr.contenu.point) ? cr.contenu.point.length : cr.contenu.point ? 1 : 0;
+                    if (!options["silent"]) {
+                        console.log(`[COM-CR][TRANSFORM] saved ${path.basename(outPath)} (points=${npts})`);
+                    }
                     const hourShort = toHourShort(day.openTime) ?? "NA";
                     const titreGuess = meta.organeDetected || meta.organeTitleRaw || "Commission";
-                    const { uid, filePath } = await createCommissionGroupIfMissing(dataDir, day.date, meta.organeDetected ?? null, hourShort, titreGuess);
+                    // Si on a un match agenda, on force le groupUid existant (best.uid)
+                    const up = await linkCRtoCommissionGroup({
+                        dataDir,
+                        session: session,
+                        dateISO: day.date,
+                        organeDetected: meta.organeDetected ?? null,
+                        hourShort,
+                        crUid: fileUid,
+                        titreGuess,
+                        groupUid: best ? best.uid : undefined,
+                    });
                     if (!options["silent"]) {
-                        console.log(`[COM-CR][PRE-SPLIT][${session}] ${f} | ${day.date}` +
-                            (day.openTime ? ` ${day.openTime}` : ``) +
-                            ` → NO-MATCH → CREATED uid=${uid} file=${path.basename(filePath)}`);
+                        console.log(`[AGENDA][COM] Linked CR ${fileUid} → ${path.basename(up.filePath)} ` +
+                            `${up.created ? "[created]" : "[updated]"}`);
                     }
                 }
             }

package/lib/scripts/retrieve_cr_seance.js CHANGED Viewed

@@ -9,9 +9,9 @@ import fs from "fs-extra";
 import path from "path";
 import StreamZip from "node-stream-zip";
 import * as cheerio from "cheerio";
-import { AGENDA_FOLDER, COMPTES_RENDUS_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, } from "../loaders";
+import { AGENDA_FOLDER, COMPTES_RENDUS_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
 import { commonOptions } from "./shared/cli_helpers";
-import { deriveTitreObjetFromSommaire, parseCompteRenduSlotFromFile, parseYYYYMMDD, sessionStartYearFromDate } from "../model/seance";
+import { deriveTitreObjetFromSommaire, parseCompteRenduSlotFromFile, parseYYYYMMDD, sessionStartYearFromDate, } from "../model/seance";
 import { makeGroupUid } from "../utils/reunion_grouping";
 import { getSessionsFromStart } from "../types/sessions";
 import { ensureAndClearDir, fetchWithRetry } from "./shared/util";
@@ -22,7 +22,7 @@ const optionsDefinitions = [
         help: "parse and convert comptes-rendus des débats into JSON",
         name: "parseDebats",
         type: Boolean,
-    }
+    },
 ];
 const options = commandLineArgs(optionsDefinitions);
 const CRI_ZIP_URL = "https://data.senat.fr/data/debats/cri.zip";
@@ -151,9 +151,7 @@ export async function retrieveCriXmlDump(dataDir, options = {}) {
         if (!(await fs.pathExists(originalSessionDir))) {
             continue;
         }
-        const xmlFiles = (await fs.readdir(originalSessionDir))
-            .filter((f) => /^d\d{8}\.xml$/i.test(f))
-            .sort();
+        const xmlFiles = (await fs.readdir(originalSessionDir)).filter((f) => /^d\d{8}\.xml$/i.test(f)).sort();
         const transformedSessionDir = path.join(transformedRoot, String(session));
         if (options["parseDebats"])
             await fs.ensureDir(transformedSessionDir);
@@ -224,31 +222,30 @@ main()
 async function linkCriSlotIntoAgendaGrouped(dataDir, yyyymmdd, slot, crUid, cr, session) {
     const groupedDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, session.toString());
     fs.ensureDirSync(groupedDir);
-    const groupedPath = path.join(groupedDir, 'RUSN' + yyyymmdd + 'IDS-' + slot + '.json');
-    let groups = [];
+    const groupedPath = path.join(groupedDir, `RUSN${yyyymmdd}IDS-${slot}.json`);
+    let group = null;
     if (fs.existsSync(groupedPath)) {
         try {
-            groups = JSON.parse(fs.readFileSync(groupedPath, "utf8"));
-            if (!Array.isArray(groups))
-                groups = [];
+            const parsed = JSON.parse(fs.readFileSync(groupedPath, "utf8"));
+            if (Array.isArray(parsed)) {
+                // Take correct slot if multiple or first one if no direct match ?
+                group = parsed.find((g) => g?.slot === slot) ?? parsed[0] ?? null;
+            }
+            else {
+                group = parsed;
+            }
         }
         catch (e) {
             console.warn(`[AGENDA] unreadable grouped JSON → ${groupedPath} (${e}) → recreating`);
-            groups = [];
+            group = null;
         }
     }
-    // find existing group with same slot
-    const sameSlot = groups.filter(g => g?.slot === slot);
-    let target = null;
-    if (sameSlot.length > 1) {
-        console.warn(`[AGENDA] multiple groups for ${yyyymmdd} ${slot} in ${groupedPath} → linking the first`);
-    }
-    target = sameSlot[0] ?? null;
     const dateISO = `${yyyymmdd.slice(0, 4)}-${yyyymmdd.slice(4, 6)}-${yyyymmdd.slice(6, 8)}`;
     const sommaire = cr?.metadonnees?.sommaire;
     const { titre: dTitre, objet: dObjet } = deriveTitreObjetFromSommaire(sommaire, slot);
-    if (!target) {
-        const newGroup = {
+    // Création si manquant
+    if (!group) {
+        group = {
             uid: makeGroupUid(dateISO, slot),
             chambre: "SN",
             date: dateISO,
@@ -262,13 +259,10 @@ async function linkCriSlotIntoAgendaGrouped(dataDir, yyyymmdd, slot, crUid, cr,
             events: [],
             compteRenduRefUid: crUid,
         };
-        groups.push(newGroup);
     }
     else {
-        target.compteRenduRefUid = crUid;
-    }
-    await fs.writeJSON(groupedPath, groups, { spaces: 2 });
-    if (!options["silent"]) {
-        console.log(`[AGENDA] Linked CR ${crUid} → ${path.basename(groupedPath)} [${slot}]`);
+        group.compteRenduRefUid = crUid;
     }
+    await fs.writeJSON(groupedPath, group, { spaces: 2 });
+    console.log(`[AGENDA] Linked CR ${crUid} → ${path.basename(groupedPath)} [${slot}]`);
 }

package/lib/utils/cr_spliting.d.ts CHANGED Viewed

@@ -18,11 +18,19 @@ export declare function parseCommissionMetadataFromHtml(html: string, sourceFile
     }[];
 };
 export declare function loadCommissionAgendaForDate(dataDir: string, yyyymmdd: string, session: number): Promise<GroupedReunion[]>;
-export declare function createCommissionGroupIfMissing(dataDir: string, dateISO: string, // "YYYY-MM-DD"
-organeDetected: string | null, // ex. "Commission des finances"
-hourShort: string | null, // "HHMM" | "NA"
-titreGuess?: string | null): Promise<{
+export declare function hourShortToStartTime(hourShort: string | null): string | null;
+export declare function linkCRtoCommissionGroup(opts: {
+    dataDir: string;
+    session: number;
+    dateISO: string;
+    organeDetected: string | null;
+    hourShort: string | null;
+    crUid: string;
+    titreGuess?: string | null;
+    groupUid?: string;
+}): Promise<{
     uid: string;
     filePath: string;
     created: boolean;
+    updated: boolean;
 }>;

package/lib/utils/cr_spliting.js CHANGED Viewed

@@ -2,8 +2,6 @@ import path from "path";
 import * as cheerio from "cheerio";
 import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
 import fs from "fs-extra";
-import { makeTypeGroupUid } from "./reunion_grouping";
-import { sessionStartYearFromDate } from "../model/seance";
 export function computeIntervalsBySlot($, idx, firstSlotOfDay) {
     const all = $("body *").toArray();
     const cuts = [{ pos: 0, hhmm: undefined }];
@@ -312,7 +310,7 @@ export async function loadCommissionAgendaForDate(dataDir, yyyymmdd, session) {
     }
     return out;
 }
-function hourShortToStartTime(hourShort) {
+export function hourShortToStartTime(hourShort) {
     if (!hourShort || hourShort === "NA")
         return null;
     if (!/^\d{4}$/.test(hourShort))
@@ -321,66 +319,52 @@ function hourShortToStartTime(hourShort) {
     const mm = hourShort.slice(2, 4);
     return `${hh}:${mm}`;
 }
-export async function createCommissionGroupIfMissing(dataDir, dateISO, // "YYYY-MM-DD"
-organeDetected, // ex. "Commission des finances"
-hourShort, // "HHMM" | "NA"
-titreGuess) {
-    const uid = makeTypeGroupUid(dateISO, "COM", hourShort ?? "NA", organeDetected ?? undefined);
-    const session = sessionStartYearFromDate(new Date(dateISO));
-    const dir = path.join(dataDir, "agenda", "transformed", String(session));
-    await fs.ensureDir(dir);
-    const filePath = path.join(dir, `${uid}.json`);
-    let groups = [];
+export async function linkCRtoCommissionGroup(opts) {
+    const { dataDir, session, dateISO, organeDetected, hourShort, crUid, titreGuess, groupUid } = opts;
+    const computedUid = crUid.replace(/^CRC/, "RU");
+    const uid = groupUid ?? computedUid; // <-- on respecte l’uid existant si fourni
+    const groupedDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session));
+    await fs.ensureDir(groupedDir);
+    const filePath = path.join(groupedDir, `${uid}.json`);
+    let group = null;
     let created = false;
-    if (await fs.pathExists(filePath)) {
-        try {
-            const raw = await fs.readFile(filePath, "utf8");
-            groups = JSON.parse(raw);
-            if (!Array.isArray(groups))
-                groups = [];
-        }
-        catch {
-            groups = [];
-        }
-        const exists = groups.some((g) => g?.uid === uid);
-        if (!exists) {
-            groups.push({
-                uid,
-                chambre: "SN",
-                date: dateISO,
-                slot: null,
-                type: organeDetected ?? "Commission",
-                startTime: hourShortToStartTime(hourShort),
-                endTime: null,
-                captationVideo: false,
-                titre: titreGuess ?? null,
-                objet: null,
-                reunions: [],
-                compteRenduRefUid: null,
-            });
-            await fs.writeJSON(filePath, groups, { spaces: 2 });
-            created = true;
+    try {
+        if (await fs.pathExists(filePath)) {
+            group = await fs.readJSON(filePath);
         }
     }
+    catch (e) {
+        console.warn(`[AGENDA][COM] Unreadable JSON → ${filePath} (${e?.message}) → will recreate`);
+    }
+    if (!group) {
+        // group = {
+        //   uid,
+        //   chambre: "SN",
+        //   date: dateISO,
+        //   type: organeDetected ?? "Commissions",
+        //   startTime: hourShortToStartTime(hourShort),
+        //   endTime: null,
+        //   captationVideo: false,
+        //   titre: titreGuess ?? "",
+        //   objet: "",
+        //   events: [],
+        //   compteRenduRefUid: crUid,
+        // }
+        // created = true
+    }
     else {
-        groups = [
-            {
-                uid,
-                chambre: "SN",
-                date: dateISO,
-                slot: null,
-                type: organeDetected ?? "Commission",
-                startTime: hourShortToStartTime(hourShort),
-                endTime: null,
-                captationVideo: false,
-                titre: titreGuess ?? null,
-                objet: null,
-                reunions: [],
-                compteRenduRefUid: null,
-            },
-        ];
-        await fs.writeJSON(filePath, groups, { spaces: 2 });
-        created = true;
+        group.compteRenduRefUid = crUid;
     }
-    return { uid, filePath, created };
+    // Lien CR
+    // Enrichir depuis CR si vide
+    // const sommaire = cr?.metadonnees?.sommaire as Sommaire | undefined;
+    // if (sommaire) {
+    //   const { titre: dTitre, objet: dObjet } = deriveTitreObjetFromSommaire(sommaire, undefined);
+    //   if (!group.titre && dTitre) group.titre = dTitre;
+    //   if ((!group.objet || !group.objet.trim()) && dObjet) group.objet = dObjet;
+    // } else if (!group.titre && titreGuess) {
+    //   group.titre = titreGuess;
+    // }
+    await fs.writeJSON(filePath, group, { spaces: 2 });
+    return { uid, filePath, created, updated: !created };
 }

package/lib/utils/reunion_grouping.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { AgendaEvent, GroupedReunion, TimeSlot } from "../types/agenda";
 type KnownType = "SP" | "COM" | "MC" | "OD" | "ID";
 export declare function groupNonSPByTypeOrganeHour(events: AgendaEvent[]): Record<"IDC" | "IDM" | "IDO" | "IDI", GroupedReunion[]>;
-export declare function groupSeancePubliqueBySlot(events: AgendaEvent[]): GroupedReunion[];
+export declare function groupSeancePubliqueBySlot(events: AgendaEvent[]): Record<TimeSlot, GroupedReunion[]>;
 export declare function makeTypeGroupUid(dateISO: string, kind: KnownType, hourShort: string | null, organe?: string | null): string;
 export declare function makeGroupUid(date: string, slot: TimeSlot): string;
 export declare function formatYYYYMMDD(dateYYYYMMDD: string): string;

package/lib/utils/reunion_grouping.js CHANGED Viewed

@@ -2,20 +2,30 @@ import { DateTime } from "luxon";
 import { norm } from "../model/util";
 const PARIS = "Europe/Paris";
 const STOPWORDS = new Set([
-    "de", "du", "des",
-    "la", "le", "les", "l",
+    "de",
+    "du",
+    "des",
+    "la",
+    "le",
+    "les",
+    "l",
     "d",
     "et",
     "en",
-    "au", "aux",
+    "au",
+    "aux",
     "pour",
-    "sur", "sous", "à", "a", "aux",
+    "sur",
+    "sous",
+    "à",
+    "a",
+    "aux",
 ]);
 export function groupNonSPByTypeOrganeHour(events) {
     const out = { IDC: [], IDM: [], IDO: [], IDI: [] };
     if (!events?.length)
         return out;
-    const nonSP = events.filter(e => !isSeancePublique(e?.type));
+    const nonSP = events.filter((e) => !isSeancePublique(e?.type));
     if (nonSP.length === 0)
         return out;
     const buckets = new Map();
@@ -33,15 +43,17 @@ export function groupNonSPByTypeOrganeHour(events) {
     for (const [key, list] of buckets) {
         const [date, kindStr, hourShort] = key.split("|");
         const kind = kindStr;
-        const enriched = list.map(ev => {
+        const enriched = list
+            .map((ev) => {
             const { startISO, endISO } = deriveTimesForEvent(ev);
             return { ev, startISO: startISO ?? ev.startTime, endISO: endISO ?? ev.endTime };
-        }).sort((a, b) => {
-            const ta = a.startISO ? parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER : Number.MAX_SAFE_INTEGER;
-            const tb = b.startISO ? parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER : Number.MAX_SAFE_INTEGER;
+        })
+            .sort((a, b) => {
+            const ta = a.startISO ? (parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
+            const tb = b.startISO ? (parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
             return ta - tb;
         });
-        const startTime = enriched.find(x => !!x.startISO)?.startISO ?? null;
+        const startTime = enriched.find((x) => !!x.startISO)?.startISO ?? null;
         const endTime = enriched.reduce((acc, x) => {
             const de = x.endISO ? parseISO(x.endISO)?.toMillis() : null;
             const accMs = acc ? parseISO(acc)?.toMillis() : null;
@@ -52,7 +64,7 @@ export function groupNonSPByTypeOrganeHour(events) {
         const any = enriched[0]?.ev;
         const hour = hourShort !== "NA" ? hourShort : (hourShortFromISO(startTime) ?? hourShortFromOriginal(any?.timeOriginal));
         const uid = makeTypeGroupUid(date, kind, hour ?? null, any?.organe || undefined);
-        const suffix = (kind === "COM" ? "IDC" : kind === "MC" ? "IDM" : kind === 'OD' ? 'IDO' : "IDI");
+        const suffix = (kind === "COM" ? "IDC" : kind === "MC" ? "IDM" : kind === "OD" ? "IDO" : "IDI");
         const group = {
             uid,
             chambre: "SN",
@@ -61,10 +73,10 @@ export function groupNonSPByTypeOrganeHour(events) {
             organe: any?.organe || undefined,
             startTime,
             endTime,
-            captationVideo: enriched.some(x => x.ev.captationVideo === true),
-            titre: compactTitleList(enriched.map(x => x.ev.titre || "").filter(Boolean), 8),
-            objet: joinObjets(enriched.map(x => x.ev)),
-            events: enriched.map(x => x.ev),
+            captationVideo: enriched.some((x) => x.ev.captationVideo === true),
+            titre: compactTitleList(enriched.map((x) => x.ev.titre || "").filter(Boolean), 8),
+            objet: joinObjets(enriched.map((x) => x.ev)),
+            events: enriched.map((x) => x.ev),
         };
         out[suffix].push(group);
     }
@@ -78,11 +90,15 @@ export function groupNonSPByTypeOrganeHour(events) {
     return out;
 }
 export function groupSeancePubliqueBySlot(events) {
+    // Résultat à la manière de groupNonSPByTypeOrganeHour : objet de listes, ici indexé par créneau
+    const out = {};
+    const ensureBucket = (slot) => (out[slot] ??= []);
     if (!events?.length)
-        return [];
-    const sp = events.filter(e => isSeancePublique(e?.type));
+        return out;
+    const sp = events.filter((e) => isSeancePublique(e?.type));
     if (sp.length === 0)
-        return [];
+        return out;
+    // Regroupement par date
     const byDate = new Map();
     for (const e of sp) {
         const d = norm(e.date);
@@ -92,17 +108,19 @@ export function groupSeancePubliqueBySlot(events) {
             byDate.set(d, []);
         byDate.get(d).push(e);
     }
-    const out = [];
+    // Pour chaque date : enrichir, bucketiser par slot, puis pousser dans out[slot]
     for (const [date, dayEvents] of byDate) {
         const enriched = dayEvents.map((e) => {
             const { startISO, endISO, slot } = deriveTimesForEvent(e);
             return { ev: e, startISO, endISO, slot };
         });
+        // tri par heure de début connue
         enriched.sort((a, b) => {
-            const da = a.startISO ? parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER : Number.MAX_SAFE_INTEGER;
-            const db = b.startISO ? parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER : Number.MAX_SAFE_INTEGER;
+            const da = a.startISO ? (parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
+            const db = b.startISO ? (parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
             return da - db;
         });
+        // Bucket par slot (en déduisant le slot UNKNOWN via l'heure si possible)
         const bySlot = new Map();
         for (const it of enriched) {
             let s = it.slot;
@@ -111,14 +129,17 @@ export function groupSeancePubliqueBySlot(events) {
                 if (dt)
                     s = slotOf(dt);
             }
+            if (s === "UNKNOWN")
+                continue; // on écarte les inconnus résiduels (option : créer un bucket "UNKNOWN")
             if (!bySlot.has(s))
                 bySlot.set(s, []);
             bySlot.get(s).push(it);
         }
+        // Construire les GroupedReunion et les pousser dans out[slot]
         for (const [slot, list] of bySlot) {
             const sorted = list.slice().sort((a, b) => {
-                const da = a.startISO ? parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER : Number.MAX_SAFE_INTEGER;
-                const db = b.startISO ? parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER : Number.MAX_SAFE_INTEGER;
+                const da = a.startISO ? (parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
+                const db = b.startISO ? (parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
                 return da - db;
             });
             const startTime = sorted.find((x) => !!x.startISO)?.startISO ?? null;
@@ -131,7 +152,7 @@ export function groupSeancePubliqueBySlot(events) {
             }, null);
             const titres = sorted.map((x) => x.ev.titre || "").filter(Boolean);
             const captationVideo = sorted.some((x) => x.ev.captationVideo === true);
-            out.push({
+            ensureBucket(slot).push({
                 uid: makeGroupUid(date, slot),
                 chambre: "SN",
                 date,
@@ -146,11 +167,15 @@ export function groupSeancePubliqueBySlot(events) {
             });
         }
     }
-    out.sort((a, b) => {
-        const da = DateTime.fromISO(`${a.date}T${a.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
-        const db = DateTime.fromISO(`${b.date}T${b.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
-        return da - db || a.slot.localeCompare(b.slot);
-    });
+    // Tri interne de chaque créneau (cohérent avec groupNonSPByTypeOrganeHour)
+    for (const s of Object.keys(out)) {
+        out[s].sort((a, b) => {
+            const da = DateTime.fromISO(`${a.date}T${a.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
+            const db = DateTime.fromISO(`${b.date}T${b.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
+            // puis par nom de slot pour stabilité (facultatif)
+            return da - db || (a.slot || "UNKNOWN").localeCompare(b.slot || "UNKNOWN");
+        });
+    }
     return out;
 }
 function normalizeNoAccents(s) {
@@ -179,11 +204,16 @@ function classifyAgendaType(typeLabel) {
 }
 function typeToSuffixStrict(kind) {
     switch (kind) {
-        case "SP": return "IDS";
-        case "COM": return "IDC";
-        case "MC": return "IDM";
-        case "OD": return "IDO";
-        case "ID": return "IDI";
+        case "SP":
+            return "IDS";
+        case "COM":
+            return "IDC";
+        case "MC":
+            return "IDM";
+        case "OD":
+            return "IDO";
+        case "ID":
+            return "IDI";
     }
 }
 function hourShortFromISO(iso) {
@@ -271,7 +301,7 @@ function trimWords(s, max = 40) {
     return words.length <= max ? words.join(" ") : words.slice(0, max).join(" ");
 }
 function compactTitleList(titres, maxTitles = 5) {
-    const uniq = Array.from(new Set(titres.map(t => norm(t)).filter(Boolean)));
+    const uniq = Array.from(new Set(titres.map((t) => norm(t)).filter(Boolean)));
     return uniq.slice(0, maxTitles).join(" · ") || "(sans titre)";
 }
 export function makeGroupUid(date, slot) {
@@ -288,9 +318,9 @@ export function makeReunionUid(agenda) {
 }
 function joinObjets(events) {
     const objets = events
-        .map(e => (e.objet || "").trim())
+        .map((e) => (e.objet || "").trim())
         .filter(Boolean)
-        .map(s => trimWords(s, 40));
+        .map((s) => trimWords(s, 40));
     if (objets.length === 0)
         return "";
     return objets.join(" · ");
@@ -328,8 +358,12 @@ function parseTimeOriginalFR(timeOriginal) {
     }
     return { start: null, end: null };
 }
-function clampHour(h) { return Math.max(0, Math.min(23, h)); }
-function clampMinute(m) { return Math.max(0, Math.min(59, m)); }
+function clampHour(h) {
+    return Math.max(0, Math.min(23, h));
+}
+function clampMinute(m) {
+    return Math.max(0, Math.min(59, m));
+}
 function toIsoTime(h, m) {
     return `${String(h).padStart(2, "0")}:${String(m).padStart(2, "0")}:00.000+02:00`;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tricoteuses/senat",
-  "version": "2.11.3",
+  "version": "2.11.5",
   "description": "Handle French Sénat's open data",
   "keywords": [
     "France",