npm - @tricoteuses/senat - Versions diffs - 2.20.17 → 2.20.19 - Mend

@tricoteuses/senat 2.20.17 → 2.20.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/README.md +58 -19
package/lib/git.d.ts +26 -0
package/lib/git.js +167 -0
package/lib/index.d.ts +1 -1
package/lib/loaders.d.ts +3 -2
package/lib/model/commission.d.ts +2 -2
package/lib/model/commission.js +5 -4
package/lib/model/seance.d.ts +2 -8
package/lib/model/seance.js +28 -113
package/lib/model/util.d.ts +0 -4
package/lib/model/util.js +0 -38
package/lib/scripts/convert_data.js +25 -1
package/lib/scripts/retrieve_agenda.js +7 -18
package/lib/scripts/retrieve_cr_commission.js +1 -10
package/lib/scripts/retrieve_cr_seance.d.ts +1 -1
package/lib/scripts/retrieve_cr_seance.js +183 -127
package/lib/scripts/retrieve_videos.d.ts +1 -1
package/lib/scripts/retrieve_videos.js +46 -92
package/lib/scripts/shared/cli_helpers.d.ts +25 -3
package/lib/scripts/shared/cli_helpers.js +28 -0
package/lib/types/agenda.d.ts +5 -6
package/lib/utils/cr_spliting.d.ts +2 -10
package/lib/utils/cr_spliting.js +2 -119
package/lib/utils/date.d.ts +10 -0
package/lib/utils/date.js +100 -0
package/lib/utils/reunion_odj_building.d.ts +2 -2
package/lib/utils/reunion_odj_building.js +8 -12
package/lib/utils/reunion_parsing.d.ts +23 -0
package/lib/utils/reunion_parsing.js +209 -0
package/lib/utils/scoring.d.ts +14 -0
package/lib/utils/scoring.js +147 -0
package/lib/utils/string_cleaning.d.ts +7 -0
package/lib/utils/string_cleaning.js +57 -0
package/package.json +1 -1

package/lib/scripts/convert_data.js CHANGED Viewed

@@ -3,6 +3,7 @@ import commandLineArgs from "command-line-args";
 import fs from "fs-extra";
 import path from "path";
 import pLimit from "p-limit";
+import * as git from "../git";
 import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
 import { DATA_ORIGINAL_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
 import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findSenatRapportUrls, findSenatTexteUrls, } from "../model";
@@ -17,14 +18,26 @@ const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/";
 const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
 const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/";
 const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
+function commitGit(datasetDir, options, exitCode) {
+    if (options.commit) {
+        const errorCode = git.commitAndPush(datasetDir, "Nouvelle moisson", options.remote);
+        if ((exitCode === 10 && errorCode !== 10) || (exitCode === 0 && errorCode !== 0 && errorCode !== 10)) {
+            exitCode = errorCode;
+        }
+    }
+    return exitCode;
+}
 async function convertData() {
     const dataDir = options["dataDir"];
     assert(dataDir, "Missing argument: data directory");
     const enabledDatasets = getEnabledDatasets(options["categories"]);
     console.time("data transformation time");
+    let exitCode = 0;
     if (enabledDatasets & EnabledDatasets.Ameli) {
         try {
             await convertDatasetAmeli(dataDir, options);
+            const ameliDir = path.join(dataDir, datasets.ameli.database);
+            exitCode = commitGit(ameliDir, options, exitCode);
         }
         catch (error) {
             console.error(`Error converting Ameli dataset:`, error);
@@ -33,6 +46,8 @@ async function convertData() {
     if (enabledDatasets & EnabledDatasets.Debats) {
         try {
             await convertDatasetDebats(dataDir, options);
+            const debatsDir = path.join(dataDir, datasets.debats.database);
+            exitCode = commitGit(debatsDir, options, exitCode);
         }
         catch (error) {
             console.error(`Error converting Debats dataset:`, error);
@@ -41,12 +56,16 @@ async function convertData() {
     if (enabledDatasets & EnabledDatasets.DosLeg) {
         try {
             await convertDatasetDosLeg(dataDir, options);
+            const doslegDir = path.join(dataDir, datasets.dosleg.database);
+            exitCode = commitGit(doslegDir, options, exitCode);
         }
         catch (error) {
             console.error(`Error converting DosLeg dataset:`, error);
         }
         try {
             await convertDatasetScrutins(dataDir, options);
+            const scrutinsDir = path.join(dataDir, SCRUTINS_FOLDER);
+            exitCode = commitGit(scrutinsDir, options, exitCode);
         }
         catch (error) {
             console.error(`Error converting Scrutins dataset:`, error);
@@ -55,6 +74,8 @@ async function convertData() {
     if (enabledDatasets & EnabledDatasets.Questions) {
         try {
             await convertDatasetQuestions(dataDir);
+            const questionsDir = path.join(dataDir, datasets.questions.database);
+            exitCode = commitGit(questionsDir, options, exitCode);
         }
         catch (error) {
             console.error(`Error converting Questions dataset:`, error);
@@ -63,6 +84,8 @@ async function convertData() {
     if (enabledDatasets & EnabledDatasets.Sens) {
         try {
             await convertDatasetSens(dataDir);
+            const sensDir = path.join(dataDir, datasets.sens.database);
+            exitCode = commitGit(sensDir, options, exitCode);
         }
         catch (error) {
             console.error(`Error converting Sens dataset:`, error);
@@ -71,6 +94,7 @@ async function convertData() {
     if (!options["silent"]) {
         console.timeEnd("data transformation time");
     }
+    return exitCode;
 }
 async function convertDatasetAmeli(dataDir, options) {
     const dataset = datasets.ameli;
@@ -284,7 +308,7 @@ async function convertDatasetSens(dataDir) {
     }
 }
 convertData()
-    .then(() => process.exit(0))
+    .then((exitCode) => process.exit(exitCode || 0))
     .catch((error) => {
     console.log(error);
     process.exit(1);

package/lib/scripts/retrieve_agenda.js CHANGED Viewed

@@ -9,7 +9,7 @@ import { getSessionsFromStart } from "../types/sessions";
 import { ID_DATE_FORMAT } from "./datautil";
 import { commonOptions } from "./shared/cli_helpers";
 import { fetchWithRetry } from "./shared/util";
-import { groupNonSPByTypeOrganeHour, groupSeancePubliqueBySlot } from "../utils/reunion_grouping";
+import { buildReunionsByBucket } from "../utils/reunion_parsing";
 import { buildSenatDossierIndex } from "../utils/reunion_odj_building";
 const optionsDefinitions = [
     ...commonOptions,
@@ -103,25 +103,14 @@ async function parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPa
         return;
     const flatPath = path.join(transformedAgendaSessionDir, `${agendaFileName}.json`);
     fs.writeJSONSync(flatPath, parsedAgendaEvents, { spaces: 2 });
-    // 1) SP → grouped by (date, slot)
-    const spGrouped = groupSeancePubliqueBySlot(parsedAgendaEvents, dossierBySenatUrl);
-    // a) on a un Record<TimeSlot, GroupedReunion[]>, on le transforme en array
-    const spGroups = Object.values(spGrouped).flat();
-    // b) (reco) trier pour stabilité, comme pour les NON-SP
-    const PARIS = "Europe/Paris";
-    spGroups.sort((a, b) => {
-        const da = DateTime.fromISO(`${a.date}T${a.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
-        const db = DateTime.fromISO(`${b.date}T${b.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
-        // en cas d’égalité, ordre par slot pour stabilité
-        return da - db || (a.slot || "UNKNOWN").localeCompare(b.slot || "UNKNOWN");
-    });
-    if (spGroups.length > 0) {
-        writeGroupsAsFiles(transformedAgendaSessionDir, spGroups);
+    const byBucket = buildReunionsByBucket(parsedAgendaEvents, dossierBySenatUrl);
+    // SP
+    if (byBucket.IDS.length > 0) {
+        writeGroupsAsFiles(transformedAgendaSessionDir, byBucket.IDS);
     }
-    // 2) NON-SP → grouped by (date, organe, hour)
-    const groupedBySuffix = groupNonSPByTypeOrganeHour(parsedAgendaEvents, dossierBySenatUrl);
+    // NON-SP
     for (const suffix of ["IDC", "IDM", "IDO", "IDI"]) {
-        const groups = groupedBySuffix[suffix] || [];
+        const groups = byBucket[suffix];
         if (groups.length > 0) {
             writeGroupsAsFiles(transformedAgendaSessionDir, groups);
         }

package/lib/scripts/retrieve_cr_commission.js CHANGED Viewed

@@ -10,7 +10,7 @@ import { commonOptions } from "./shared/cli_helpers";
 import { sessionStartYearFromDate } from "../model/seance";
 import { getSessionsFromStart } from "../types/sessions";
 import { ensureAndClearDir, fetchWithRetry } from "./shared/util";
-import { jaccardTokenSim } from "../model/util";
+import { jaccard, jaccardTokenSim } from "../utils/scoring";
 class CommissionCRDownloadError extends Error {
     constructor(message, url) {
         super(`An error occurred while retrieving Commission CR ${url}: ${message}`);
@@ -138,15 +138,6 @@ function toTokens(s) {
         .split(/\s+/)
         .filter((t) => t.length >= 3 && !["commission", "des", "de", "du", "d", "la", "le", "les", "et"].includes(t)));
 }
-function jaccard(a, b) {
-    if (!a.size || !b.size)
-        return 0;
-    let inter = 0;
-    for (const t of a)
-        if (b.has(t))
-            inter++;
-    return inter / (a.size + b.size - inter);
-}
 function reunionOrganeCandidates(h) {
     const any = h;
     const out = [any.organeSlug, any.organeKey, any.organe, h.titre].filter(Boolean);

package/lib/scripts/retrieve_cr_seance.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Needs to be run after retrieve_agenda.ts !
+ * Needs to be ran after retrieve_agenda.ts script !
  * - downloads the ZIP of comptes-rendus des débats (CRI) from data.senat.fr
  * - extracts XML files, distributes them by session/year
  */

package/lib/scripts/retrieve_cr_seance.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Needs to be run after retrieve_agenda.ts !
+ * Needs to be ran after retrieve_agenda.ts script !
  * - downloads the ZIP of comptes-rendus des débats (CRI) from data.senat.fr
  * - extracts XML files, distributes them by session/year
  */
@@ -11,11 +11,12 @@ import StreamZip from "node-stream-zip";
 import * as cheerio from "cheerio";
 import { AGENDA_FOLDER, COMPTES_RENDUS_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
 import { commonOptions } from "./shared/cli_helpers";
-import { deriveTitreObjetFromSommaire, parseCompteRenduSlotFromFile, parseYYYYMMDD, sessionStartYearFromDate, } from "../model/seance";
-import { makeGroupUid } from "../utils/reunion_grouping";
+import { parseCompteRenduIntervalFromFile, sessionStartYearFromDate } from "../model/seance";
+import { extractSommaireBlocks, makeReunionUid } from "../utils/reunion_parsing";
 import { getSessionsFromStart } from "../types/sessions";
-import { fetchWithRetry } from "./shared/util";
-import { computeIntervalsBySlot } from "../utils/cr_spliting";
+import { ensureAndClearDir, fetchWithRetry } from "./shared/util";
+import { isNoiseBlock, scoreSommaireBlockForEvent } from "../utils/scoring";
+import { parseYYYYMMDD } from "../utils/date";
 const optionsDefinitions = [
     ...commonOptions,
     {
@@ -26,49 +27,11 @@ const optionsDefinitions = [
 ];
 const options = commandLineArgs(optionsDefinitions);
 const CRI_ZIP_URL = "https://data.senat.fr/data/debats/cri.zip";
-const SLOT_ORDER = ["MATIN", "APRES-MIDI", "SOIR"];
 class CompteRenduError extends Error {
     constructor(message, url) {
         super(`An error occurred while retrieving ${url}: ${message}`);
     }
 }
-function pickFirstSlotOfDay(slots) {
-    for (const s of SLOT_ORDER)
-        if (slots.includes(s))
-            return s;
-    return null;
-}
-function loadAgendaSPSlotsForDate(dataDir, yyyymmdd, session) {
-    const dirPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, session.toString());
-    if (!fs.existsSync(dirPath)) {
-        console.warn(`[AGENDA] Directory not found for session ${session} → ${dirPath}`);
-        return null;
-    }
-    const pattern = new RegExp(`^RUSN${yyyymmdd}IDS-(MATIN|APRES-MIDI|SOIR)\\.json$`);
-    const ALLOWED_SLOTS = new Set(["MATIN", "APRES-MIDI", "SOIR"]);
-    try {
-        const files = fs.readdirSync(dirPath);
-        const matched = files.filter((f) => pattern.test(f));
-        if (matched.length === 0) {
-            return null;
-        }
-        const found = new Set();
-        for (const name of matched) {
-            const m = name.match(pattern);
-            const raw = (m?.[1] ?? "");
-            if (ALLOWED_SLOTS.has(raw))
-                found.add(raw);
-        }
-        const slots = Array.from(found);
-        if (slots.length === 0) {
-            return null;
-        }
-        return { filePath: dirPath, slots };
-    }
-    catch {
-        return null;
-    }
-}
 async function downloadCriZip(zipPath) {
     if (!options["silent"])
         console.log(`Downloading CRI zip ${CRI_ZIP_URL}…`);
@@ -117,10 +80,19 @@ export async function retrieveCriXmlDump(dataDir, options = {}) {
     const root = path.join(dataDir, COMPTES_RENDUS_FOLDER);
     ensureDirSync(root);
     const originalRoot = path.join(root, DATA_ORIGINAL_FOLDER);
-    fs.ensureDirSync(originalRoot);
+    if (!options["keepDir"]) {
+        ensureAndClearDir(originalRoot);
+    }
+    else {
+        fs.ensureDirSync(originalRoot);
+    }
     const transformedRoot = path.join(root, DATA_TRANSFORMED_FOLDER);
-    if (options["parseDebats"])
+    if (!options["keepDir"]) {
+        ensureAndClearDir(transformedRoot);
+    }
+    else {
         fs.ensureDirSync(transformedRoot);
+    }
     const sessions = getSessionsFromStart(options["fromSession"]);
     // 1) Download ZIP global + distribut by session
     const zipPath = path.join(dataDir, "cri.zip");
@@ -158,77 +130,208 @@ export async function retrieveCriXmlDump(dataDir, options = {}) {
         for (const f of xmlFiles) {
             const yyyymmdd = f.slice(1, 9);
             const xmlPath = path.join(originalSessionDir, f);
+            // === ONLY-RECENT
             if (options["only-recent"]) {
                 const cutoff = now - options["only-recent"] * 24 * 3600 * 1000;
-                const seanceTs = Date.parse(yyyymmdd.slice(0, 4) + "-" + yyyymmdd.slice(4, 6) + "-" + yyyymmdd.slice(6, 8));
+                const seanceTs = Date.parse(`${yyyymmdd.slice(0, 4)}-${yyyymmdd.slice(4, 6)}-${yyyymmdd.slice(6, 8)}`);
                 if (seanceTs < cutoff) {
-                    // Check if some file exists sarting with CRSSN{yyyymmdd} in transformed dir
                     const files = await fs.readdir(transformedSessionDir);
-                    const dayFiles = files.filter((fn) => fn.startsWith(`CRSSN${yyyymmdd}-`) && fn.endsWith(".json"));
+                    const dayFiles = files.filter((fn) => fn.startsWith(`CRSSN${yyyymmdd}E`) && fn.endsWith(".json"));
                     if (dayFiles.length > 0) {
-                        // Link existing files to agendas
                         for (const fn of dayFiles) {
-                            const match = fn.match(/^CRSSN(\d{8})-(.+)\.json$/);
-                            const slot = match?.[2];
+                            const match = fn.match(/^CRSSN(\d{8})E(.+)\.json$/);
+                            const eventId = match?.[2];
+                            if (!eventId)
+                                continue;
                             const crPath = path.join(transformedSessionDir, fn);
                             try {
                                 const cr = await fs.readJSON(crPath);
-                                await linkCriSlotIntoAgendaGrouped(dataDir, yyyymmdd, slot, cr.uid, cr, session);
+                                await linkCriEventIntoAgenda(dataDir, yyyymmdd, eventId, cr.uid, cr, session);
                             }
                             catch (e) {
-                                console.warn(`[AGENDA] [${session}] Could not link existing CR into grouped for ${yyyymmdd} ${slot}:`, e);
+                                console.warn(`[CR] [${session}] Could not relink existing CR into a reunion for ${yyyymmdd} event=${eventId}:`, e);
                             }
                         }
                         continue;
                     }
                 }
             }
-            // 1) Deduce slot(s) from agenda if it exsits
-            const agendaInfo = loadAgendaSPSlotsForDate(dataDir, yyyymmdd, session);
-            const firstSlotOfDay = pickFirstSlotOfDay(agendaInfo?.slots ?? []);
-            // 2) Detect slots from CRI content
-            let slotsInCri = [];
+            // === Charger les events SP du jour depuis les agendas groupés ===
+            const dayEvents = await loadAgendaSpEventsForDate(dataDir, yyyymmdd, session);
+            if (dayEvents.length === 0) {
+                console.warn(`[CRI] [${session}] No agenda SP events found for ${yyyymmdd} → skip split/link`);
+                continue;
+            }
+            // === Lire XML + construire index DOM ===
+            let raw;
+            let $;
+            let order;
+            let idx;
             try {
-                const raw = await fs.readFile(xmlPath, "utf8");
-                const $ = cheerio.load(raw, { xml: false });
-                const order = $("body *").toArray();
-                const idx = new Map(order.map((el, i) => [el, i]));
-                const intervals = computeIntervalsBySlot($, idx, firstSlotOfDay ?? undefined);
-                const uniq = new Set();
-                for (const iv of intervals)
-                    if (iv.slot && iv.slot !== "UNKNOWN")
-                        uniq.add(iv.slot);
-                slotsInCri = Array.from(uniq);
+                raw = await fs.readFile(xmlPath, "utf8");
+                $ = cheerio.load(raw, { xml: false });
+                order = $("body *").toArray();
+                idx = new Map(order.map((el, i) => [el, i]));
             }
             catch (e) {
                 console.warn(`[CRI] [${session}] Cannot read/parse ${f}:`, e);
                 continue;
             }
-            if (slotsInCri.length === 0) {
-                slotsInCri = [firstSlotOfDay ?? "MATIN"];
+            // === Extraire sommaire + matcher vers events agenda ===
+            const blocks = extractSommaireBlocks($, idx);
+            const intervals = buildIntervalsByAgendaEvents($, idx, order, blocks, dayEvents);
+            if (!intervals.length) {
+                console.warn(`[CRI] [${session}] No confident split intervals for ${yyyymmdd} → skip`);
+                continue;
             }
-            // 3) Parse & write each slot
-            for (const slot of slotsInCri) {
-                const outName = `CRSSN${yyyymmdd}-${slot}.json`;
-                const cr = await parseCompteRenduSlotFromFile(xmlPath, slot, firstSlotOfDay ?? slot);
+            // === Parser / écrire / linker chaque segment par event ===
+            for (const iv of intervals) {
+                const outName = `CRSSN${yyyymmdd}E${iv.agendaEventId}.json`;
+                const outPath = path.join(transformedSessionDir, outName);
+                const cr = await parseCompteRenduIntervalFromFile(xmlPath, iv.startIndex, iv.endIndex, iv.agendaEventId);
                 if (!cr) {
-                    console.warn(`[CRI] [${session}] Empty or no points for ${yyyymmdd} (${slot}) → skip`);
+                    console.warn(`[CRI] [${session}] Empty or no points for ${yyyymmdd} event=${iv.agendaEventId} → skip`);
                     continue;
                 }
-                const outDir = transformedSessionDir;
-                await fs.ensureDir(outDir);
-                const outPath = path.join(outDir, outName);
+                await fs.ensureDir(transformedSessionDir);
                 await fs.writeJSON(outPath, cr, { spaces: 2 });
                 try {
-                    await linkCriSlotIntoAgendaGrouped(dataDir, yyyymmdd, slot, cr.uid, cr, session);
+                    await linkCriEventIntoAgenda(dataDir, yyyymmdd, iv.agendaEventId, cr.uid, cr, session);
                 }
                 catch (e) {
-                    console.warn(`[AGENDA] [${session}] Could not link CR into grouped for ${yyyymmdd} ${slot}:`, e);
+                    console.warn(`[CR] [${session}] Could not link CR into agenda for ${yyyymmdd} event=${iv.agendaEventId}:`, e);
                 }
             }
         }
     }
 }
+async function linkCriEventIntoAgenda(dataDir, yyyymmdd, agendaEventId, crUid, cr, session) {
+    const agendadDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, session.toString());
+    fs.ensureDirSync(agendadDir);
+    const dateISO = `${yyyymmdd.slice(0, 4)}-${yyyymmdd.slice(4, 6)}-${yyyymmdd.slice(6, 8)}`;
+    const agendaUid = makeReunionUid(dateISO, "SP", agendaEventId, null);
+    const agendaPath = path.join(agendadDir, `${agendaUid}.json`);
+    let agenda = null;
+    if (await fs.pathExists(agendaPath)) {
+        try {
+            agenda = await fs.readJSON(agendaPath);
+        }
+        catch (e) {
+            console.warn(`[CR] unreadable reunion JSON → ${agendaPath} (${e})`);
+            agenda = null;
+        }
+    }
+    if (!agenda) {
+        console.warn(`[CR] Missing reunion file for SP event=${agendaEventId}: ${agendaPath}`);
+        return;
+    }
+    ;
+    agenda.compteRenduRefUid = crUid;
+    await fs.writeJSON(agendaPath, agenda, { spaces: 2 });
+    console.log(`[CR] Linked CR ${crUid} → ${path.basename(agendaPath)} (event=${agendaEventId})`);
+}
+function buildIntervalsByAgendaEvents($, idx, order, blocks, dayEvents) {
+    const MIN_SCORE = 0.65;
+    const MIN_GAP = 0.08;
+    const firstIntervenant = $("div.intervenant").first()[0];
+    const firstIntervenantIdx = firstIntervenant ? (idx.get(firstIntervenant) ?? null) : null;
+    const pivots = [];
+    for (const b of blocks) {
+        if (isNoiseBlock(b.text))
+            continue;
+        let best = null;
+        let second = 0;
+        for (const ev of dayEvents) {
+            const s = scoreSommaireBlockForEvent(b.text, ev);
+            if (!best || s > best.score) {
+                second = best?.score ?? second;
+                best = { ev, score: s };
+            }
+            else if (s > second) {
+                second = s;
+            }
+        }
+        if (!best)
+            continue;
+        const resolved = resolveTargetIndex($, idx, b.targetId);
+        const contentStartIndex = resolved ?? b.startIndex;
+        if (firstIntervenantIdx != null && contentStartIndex < firstIntervenantIdx && resolved == null) {
+            continue;
+        }
+        if (best.score < MIN_SCORE)
+            continue;
+        if (best.score - second < MIN_GAP)
+            continue;
+        pivots.push({
+            agendaEventId: best.ev.id,
+            startIndex: contentStartIndex,
+            score: best.score,
+        });
+    }
+    if (pivots.length === 0)
+        return [];
+    // Dédupe par event (on garde le premier startIndex)
+    const byEvent = new Map();
+    for (const p of pivots.sort((a, b) => a.startIndex - b.startIndex)) {
+        if (!byEvent.has(p.agendaEventId)) {
+            byEvent.set(p.agendaEventId, {
+                startIndex: p.startIndex,
+                score: p.score,
+            });
+        }
+    }
+    const sorted = Array.from(byEvent.entries())
+        .map(([agendaEventId, v]) => ({
+        agendaEventId,
+        startIndex: v.startIndex,
+        score: v.score,
+    }))
+        .sort((a, b) => a.startIndex - b.startIndex);
+    // Construction des intervalles
+    const intervals = [];
+    for (let i = 0; i < sorted.length; i++) {
+        const cur = sorted[i];
+        const next = sorted[i + 1];
+        const endIndex = next ? next.startIndex - 1 : order.length - 1;
+        intervals.push({
+            agendaEventId: cur.agendaEventId,
+            startIndex: cur.startIndex,
+            endIndex,
+            score: cur.score,
+        });
+    }
+    return intervals;
+}
+async function loadAgendaSpEventsForDate(dataDir, yyyymmdd, session) {
+    const agendasDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, session.toString());
+    if (!(await fs.pathExists(agendasDir)))
+        return [];
+    const files = (await fs.readdir(agendasDir)).filter((fn) => fn.startsWith(`RUSN${yyyymmdd}IDS`) && fn.endsWith(".json"));
+    const events = [];
+    for (const fn of files) {
+        try {
+            const g = (await fs.readJSON(path.join(agendasDir, fn)));
+            const e = g?.events?.[0];
+            if (e && e.type === "Séance publique")
+                events.push(e);
+        }
+        catch { }
+    }
+    return events;
+}
+function cssEscapeIdent(s) {
+    return s.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
+}
+function resolveTargetIndex($, idx, targetId) {
+    if (!targetId)
+        return null;
+    const safe = cssEscapeIdent(targetId);
+    const el = $(`[id="${safe}"]`)[0] || $(`[name="${safe}"]`)[0];
+    if (!el)
+        return null;
+    const i = idx.get(el);
+    return i == null ? null : i;
+}
 async function main() {
     const dataDir = options["dataDir"];
     assert(dataDir, "Missing argument: data directory");
@@ -242,50 +345,3 @@ main()
     console.error(error);
     process.exit(1);
 });
-async function linkCriSlotIntoAgendaGrouped(dataDir, yyyymmdd, slot, crUid, cr, session) {
-    const groupedDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, session.toString());
-    fs.ensureDirSync(groupedDir);
-    const groupedPath = path.join(groupedDir, `RUSN${yyyymmdd}IDS-${slot}.json`);
-    let group = null;
-    if (fs.existsSync(groupedPath)) {
-        try {
-            const parsed = JSON.parse(fs.readFileSync(groupedPath, "utf8"));
-            if (Array.isArray(parsed)) {
-                // Take correct slot if multiple or first one if no direct match ?
-                group = parsed.find((g) => g?.slot === slot) ?? parsed[0] ?? null;
-            }
-            else {
-                group = parsed;
-            }
-        }
-        catch (e) {
-            console.warn(`[AGENDA] unreadable grouped JSON → ${groupedPath} (${e}) → recreating`);
-            group = null;
-        }
-    }
-    const dateISO = `${yyyymmdd.slice(0, 4)}-${yyyymmdd.slice(4, 6)}-${yyyymmdd.slice(6, 8)}`;
-    const sommaire = cr?.metadonnees?.sommaire;
-    const { titre: dTitre, objet: dObjet } = deriveTitreObjetFromSommaire(sommaire, slot);
-    // Création si manquant
-    if (!group) {
-        group = {
-            uid: makeGroupUid(dateISO, slot),
-            chambre: "SN",
-            date: dateISO,
-            slot,
-            type: "Séance publique",
-            startTime: null,
-            endTime: null,
-            captationVideo: false,
-            titre: dTitre,
-            objet: dObjet || "",
-            events: [],
-            compteRenduRefUid: crUid,
-        };
-    }
-    else {
-        group.compteRenduRefUid = crUid;
-    }
-    await fs.writeJSON(groupedPath, group, { spaces: 2 });
-    console.log(`[AGENDA] Linked CR ${crUid} → ${path.basename(groupedPath)} [${slot}]`);
-}

package/lib/scripts/retrieve_videos.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export ~~declare function buildSenatVodMasterM3u8FromNvs(nvsText: string): string \| null~~;
1	+ export {};