npm - @tricoteuses/senat - Versions diffs - 2.20.8 → 2.20.10 - Mend

@tricoteuses/senat 2.20.8 → 2.20.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +2 -2
package/lib/model/ameli.js +1 -1
package/lib/model/dosleg.js +3 -0
package/lib/scripts/retrieve_cr_commission.js +10 -3
package/lib/scripts/retrieve_videos.js +19 -5
package/lib/scripts/shared/cli_helpers.d.ts +7 -3
package/lib/scripts/shared/cli_helpers.js +6 -0
package/lib/utils/nvs-timecode.d.ts +1 -0
package/lib/utils/nvs-timecode.js +62 -0
package/package.json +2 -1

package/README.md CHANGED Viewed

@@ -56,10 +56,10 @@ npm run data:parse_textes_lois ../senat-data
 npm run data:retrieve_agenda ../senat-data -- --fromSession 2022 [--parseAgenda]
 # Retrieval (& parsing) of comptes-rendus de séance from Sénat's data
-npm run data:retrieve_cr_seance ../senat-data -- [--parseDebats]
+npm run data:retrieve_cr_seance ../senat-data -- [--parseDebats] [--keepDir]
 # Retrieval (& parsing) of comptes-rendus de commissions from Sénat's website
-npm run data:retrieve_cr_commission ../senat-data -- [--parseDebats]
+npm run data:retrieve_cr_commission ../senat-data -- [--parseDebats] [--keepDir]
 # Retrieval of sénateurs' pictures from Sénat's website
 npm run data:retrieve_senateurs_photos ../senat-data

package/lib/model/ameli.js CHANGED Viewed

@@ -123,7 +123,7 @@ const findAllAmendementsQuery = dbSenat
     "ameli.avigvt.lib as avis_gouvernement",
     eb.fn.coalesce("ameli.sor.lib", "ameli.irr.libirr").as("sort"),
     "ameli.amd.rev as revision",
-    concat(val("https://www.senat.fr/amendements/"), ref("ameli.ses.ann"), val("-"), sql `(ameli.ses.ann + 1)`, val("/"), ref("ameli.txt_ameli.numabs"), val("/Amdt_"), ref("ameli.amd.numabs"), val(".html")).as("url"),
+    concat(val("https://www.senat.fr/amendements/"), ref("ameli.ses.ann"), val("-"), sql `(ameli.ses.ann + 1)`, val("/"), ref("ameli.txt_ameli.numabs"), val("/Amdt_"), ref("ameli.amd.num"), val(".html")).as("url"),
     "ameli.grppol_ameli.lilcou as au_nom_de_groupe_politique",
     "ameli.com_ameli.lil as au_nom_de_commission",
     eb.case().when("ameli.cab.entid", "is not", null).then(true).else(false).end().as("auteur_est_gouvernement"),

package/lib/model/dosleg.js CHANGED Viewed

@@ -34,10 +34,12 @@ function rapports(lectureAssembleeId) {
         .withSchema("dosleg")
         .selectFrom("rap")
         .leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod")
+        .leftJoin("raporg", "raporg.rapcod", "rap.rapcod")
         .leftJoin("denrap", "denrap.coddenrap", "rap.coddenrap")
         .where("lecassrap.lecassidt", "=", lectureAssembleeId)
         .select(({ eb, ref, val }) => [
         "rap.rapnum as numero",
+        "raporg.orgcod as code_organisme",
         eb
             .case()
             .when("rap.typurl", "=", "I")
@@ -84,6 +86,7 @@ function textes(lectureAssembleeId) {
         .where("texte.lecassidt", "=", lectureAssembleeId)
         .select(({ eb, ref, val }) => [
         "texte.texnum as numero",
+        "texte.orgcod as code_organisme",
         eb
             .case()
             .when("texte.typurl", "=", "I")

package/lib/scripts/retrieve_cr_commission.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import fs from "fs-extra";
+import fs, { ensureDir } from "fs-extra";
 import assert from "assert";
 import path from "path";
 import * as cheerio from "cheerio";
@@ -187,7 +187,12 @@ async function retrieveCommissionCRs(options = {}) {
     const politenessMs = Number(options["politenessMs"] ?? 150);
     const commissionsRootDir = path.join(dataDir, COMMISSION_FOLDER);
     const originalRoot = path.join(commissionsRootDir, DATA_ORIGINAL_FOLDER);
-    ensureAndClearDir(originalRoot);
+    if (!options["keepDir"]) {
+        ensureAndClearDir(originalRoot);
+    }
+    else {
+        ensureDir(originalRoot);
+    }
     const discovered = await discoverCommissionWeeklyPages(fromSession);
     console.log(`[COM-CR][discover] ${discovered.length} links (>= session ${fromSession})`);
     const jobs = discovered.map(({ url, yyyymmdd, commissionKey }) => {
@@ -238,7 +243,9 @@ async function retrieveCommissionCRs(options = {}) {
     const sessions = getSessionsFromStart(options["fromSession"]);
     const comRoot = path.join(dataDir, COMMISSION_FOLDER);
     const transformedRoot = path.join(comRoot, DATA_TRANSFORMED_FOLDER);
-    if (options["parseDebats"])
+    if (options["keepDir"])
+        ensureDir(transformedRoot);
+    else
         ensureAndClearDir(transformedRoot);
     for (const session of sessions) {
         const originalSessionDir = path.join(originalRoot, String(session));

package/lib/scripts/retrieve_videos.js CHANGED Viewed

@@ -9,6 +9,7 @@ import { getSessionsFromStart } from "../types/sessions";
 import { commonOptions } from "./shared/cli_helpers";
 import { decodeHtmlEntities } from "../model/util";
 import { DateTime } from "luxon";
+import { getFirstInterventionStartTimecode } from "../utils/nvs-timecode";
 // ===================== Constants =====================
 const MATCH_THRESHOLD = 0.5;
 const MAX_CANDIDATES = 15;
@@ -274,6 +275,8 @@ async function processGroupedReunion(agenda, session, dataDir) {
         }
     }
     let master = null;
+    let dataTxt = null;
+    let finalTxt = null;
     let accepted = false;
     if (!skipDownload) {
         STATS.total++;
@@ -398,8 +401,8 @@ async function processGroupedReunion(agenda, session, dataDir) {
         await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
         const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
         const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
-        const dataTxt = await fetchText(dataUrl);
-        const finalTxt = await fetchText(finalUrl);
+        dataTxt = await fetchText(dataUrl);
+        finalTxt = await fetchText(finalUrl);
         if (dataTxt)
             await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
         if (finalTxt)
@@ -414,14 +417,21 @@ async function processGroupedReunion(agenda, session, dataDir) {
     else {
         // Skipped download, but need to read data.nvs for urlVideo
         try {
-            const dataTxt = await fsp.readFile(path.join(baseDir, "data.nvs"), "utf-8");
-            const finalTxt = await fsp.readFile(path.join(baseDir, "finalplayer.nvs"), "utf-8");
+            dataTxt = await fsp.readFile(path.join(baseDir, "data.nvs"), "utf-8");
+            finalTxt = await fsp.readFile(path.join(baseDir, "finalplayer.nvs"), "utf-8");
             master = buildSenatVodMasterM3u8FromNvs(dataTxt);
         }
         catch (e) {
             console.warn(e);
         }
     }
+    let timecodeDebutVideo = null;
+    if (dataTxt && finalTxt) {
+        timecodeDebutVideo = getFirstInterventionStartTimecode(dataTxt, finalTxt);
+        if (timecodeDebutVideo === null) {
+            console.warn(`[warn] Cannot retrieve start video timecode from reunion` + reunionUid);
+        }
+    }
     // ==== 4) Update agenda file (only if accepted + m3u8) ====
     if ((accepted || skipDownload) && master) {
         const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
@@ -437,9 +447,13 @@ async function processGroupedReunion(agenda, session, dataDir) {
             }
             if (obj && typeof obj === "object" && !Array.isArray(obj)) {
                 const next = { ...obj, urlVideo: master };
+                if (timecodeDebutVideo != null) {
+                    next.timecodeDebutVideo = timecodeDebutVideo;
+                }
                 await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
                 if (!options["silent"]) {
-                    console.log(`[write] ${agenda.uid} urlVideo ← ${master}`);
+                    console.log(`[write] ${agenda.uid} urlVideo ← ${master}` +
+                        (timecodeDebutVideo != null ? ` (timecodeDebutVideo ← ${timecodeDebutVideo}s)` : ""));
                 }
             }
             else {

package/lib/scripts/shared/cli_helpers.d.ts CHANGED Viewed

@@ -35,6 +35,11 @@ export declare const onlyRecentOption: {
     name: string;
     type: NumberConstructor;
 };
+export declare const keepDirOption: {
+    help: string;
+    name: string;
+    type: BooleanConstructor;
+};
 export declare const commonOptions: ({
     alias: string;
     defaultValue: string[];
@@ -48,12 +53,11 @@ export declare const commonOptions: ({
     name: string;
     type: StringConstructor;
 } | {
-    alias: string;
     help: string;
     name: string;
-    type: BooleanConstructor;
+    type: NumberConstructor;
 } | {
     help: string;
     name: string;
-    type: NumberConstructor;
+    type: BooleanConstructor;
 })[];

package/lib/scripts/shared/cli_helpers.js CHANGED Viewed

@@ -35,6 +35,11 @@ export const onlyRecentOption = {
     name: "only-recent",
     type: Number,
 };
+export const keepDirOption = {
+    help: "keep directories when cleaning data",
+    name: "keepDir",
+    type: Boolean,
+};
 export const commonOptions = [
     categoriesOption,
     dataDirDefaultOption,
@@ -42,4 +47,5 @@ export const commonOptions = [
     silentOption,
     verboseOption,
     onlyRecentOption,
+    keepDirOption,
 ];

package/lib/utils/nvs-timecode.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare function getFirstInterventionStartTimecode(dataNvs: string, finalPlayerNvs: string): number \| null;

package/lib/utils/nvs-timecode.js ADDED Viewed

@@ -0,0 +1,62 @@
+import { XMLParser } from "fast-xml-parser";
+const xmlParser = new XMLParser({
+    ignoreAttributes: false,
+    attributeNamePrefix: "@_",
+});
+function getFirstInterventionChapterId(dataNvs) {
+    const xml = xmlParser.parse(dataNvs);
+    const rootChapters = xml?.data?.chapters?.chapter;
+    if (!rootChapters)
+        return null;
+    const chaptersArray = Array.isArray(rootChapters) ? rootChapters : [rootChapters];
+    let foundId = null;
+    function dfsChapter(chapter) {
+        if (foundId)
+            return;
+        const metas = chapter.metadata ? (Array.isArray(chapter.metadata) ? chapter.metadata : [chapter.metadata]) : [];
+        const isIntervention = metas.some((m) => m?.["@_name"] === "type" && (m?.["@_value"] === "IN" || m?.["@_label"] === "Intervention"));
+        const hasSpeaker = !!chapter.speaker;
+        if (isIntervention && hasSpeaker && chapter["@_id"]) {
+            foundId = String(chapter["@_id"]);
+            return;
+        }
+        const children = chapter.chapter;
+        if (!children)
+            return;
+        const childArray = Array.isArray(children) ? children : [children];
+        for (const child of childArray) {
+            dfsChapter(child);
+            if (foundId)
+                return;
+        }
+    }
+    for (const ch of chaptersArray) {
+        dfsChapter(ch);
+        if (foundId)
+            break;
+    }
+    return foundId;
+}
+function getTimecodeForChapterId(finalPlayerNvs, chapterId) {
+    const xml = xmlParser.parse(finalPlayerNvs);
+    const synchros = xml?.player?.synchro;
+    if (!synchros)
+        return null;
+    const synchsArray = Array.isArray(synchros) ? synchros : [synchros];
+    const match = synchsArray.find((s) => String(s["@_id"]) === String(chapterId));
+    if (!match)
+        return null;
+    const rawTimecode = match["@_timecode"];
+    if (rawTimecode == null)
+        return null;
+    const ms = Number(rawTimecode);
+    if (Number.isNaN(ms))
+        return null;
+    return Math.floor(ms / 1000);
+}
+export function getFirstInterventionStartTimecode(dataNvs, finalPlayerNvs) {
+    const firstChapterId = getFirstInterventionChapterId(dataNvs);
+    if (!firstChapterId)
+        return null;
+    return getTimecodeForChapterId(finalPlayerNvs, firstChapterId);
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tricoteuses/senat",
-  "version": "2.20.8",
+  "version": "2.20.10",
   "description": "Handle French Sénat's open data",
   "keywords": [
     "France",
@@ -65,6 +65,7 @@
     "cheerio": "^1.1.2",
     "command-line-args": "^6.0.1",
     "dotenv": "^17.2.3",
+    "fast-xml-parser": "^5.3.2",
     "fs-extra": "^11.3.2",
     "jsdom": "^27.2.0",
     "kysely": "^0.28.8",