npm - @tricoteuses/senat - Versions diffs - 2.21.3 → 2.21.5 - Mend

@tricoteuses/senat 2.21.3 → 2.21.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/lib/config.d.ts +2 -20
package/lib/config.js +5 -16
package/lib/loaders.js +1 -1
package/lib/model/ameli.js +3 -1
package/lib/model/documents.js +18 -5
package/lib/model/dosleg.js +11 -11
package/lib/raw_types/questions.d.ts +0 -4
package/lib/raw_types/senat.d.ts +0 -4
package/lib/raw_types_schemats/ameli.d.ts +1 -1
package/lib/raw_types_schemats/debats.d.ts +1 -1
package/lib/raw_types_schemats/dosleg.d.ts +1 -1
package/lib/raw_types_schemats/questions.d.ts +1 -3
package/lib/raw_types_schemats/sens.d.ts +1 -1
package/lib/scripts/convert_data.js +3 -3
package/lib/scripts/retrieve_open_data.js +1 -2
package/lib/scripts/test_iter_load.js +1 -1
package/lib/src/git.js +52 -2
package/lib/src/loaders.d.ts +2 -3
package/lib/src/loaders.js +14 -29
package/lib/src/model/documents.js +9 -6
package/lib/src/parsers/texte.js +1 -20
package/lib/src/scripts/convert_data.js +9 -11
package/lib/src/scripts/retrieve_documents.js +34 -8
package/lib/src/scripts/retrieve_videos.js +16 -4
package/lib/src/types/texte.d.ts +0 -1
package/lib/src/videos/pipeline.d.ts +27 -1
package/lib/src/videos/pipeline.js +49 -8
package/lib/src/videos/types.d.ts +5 -0
package/lib/validators/config.d.ts +1 -9
package/lib/validators/config.js +54 -10
package/package.json +1 -1
package/lib/src/utils/nvs-timecode.d.ts +0 -17
package/lib/src/utils/nvs-timecode.js +0 -79
package/lib/src/utils/weights_scoring_config.d.ts +0 -2
package/lib/src/utils/weights_scoring_config.js +0 -15

package/lib/config.d.ts CHANGED Viewed

@@ -1,21 +1,3 @@
 import "dotenv/config";
-import { z } from "zod";
-export declare const configSchema: z.ZodObject<{
-    db: z.ZodObject<{
-        host: z.ZodString;
-        name: z.ZodString;
-        password: z.ZodString;
-        port: z.ZodCoercedNumber<unknown>;
-        user: z.ZodString;
-    }, z.core.$strip>;
-}, z.core.$strip>;
-declare const _default: {
-    db: {
-        host: string;
-        name: string;
-        password: string;
-        port: number;
-        user: string;
-    };
-};
-export default _default;
+declare const validConfig: any;
+export default validConfig;

package/lib/config.js CHANGED Viewed

@@ -1,27 +1,16 @@
 import "dotenv/config";
-import { z } from "zod";
-const dbSchema = z.object({
-    host: z.string().trim().min(1, "Must not be empty"),
-    name: z.string().trim().min(1, "Must not be empty"),
-    password: z.string().trim().min(1, "Must not be empty"),
-    port: z.coerce.number().int().min(0).max(65535),
-    user: z.string().trim().min(1, "Must not be empty"),
-});
-export const configSchema = z.object({
-    db: dbSchema,
-});
+import { validateConfig } from "./validators/config";
 const config = {
     db: {
         host: process.env["TRICOTEUSES_SENAT_DB_HOST"] || "localhost",
-        name: process.env["TRICOTEUSES_SENAT_DB_NAME"] || "postgres",
         password: process.env["TRICOTEUSES_SENAT_DB_PASSWORD"] || "opendata",
         port: process.env["TRICOTEUSES_SENAT_DB_PORT"] || 5432,
         user: process.env["TRICOTEUSES_SENAT_DB_USER"] || "opendata",
     },
 };
-const result = configSchema.safeParse(config);
-if (!result.success) {
-    console.error(`Error in configuration:\n${JSON.stringify(config, null, 2)}\nError:\n${JSON.stringify(result.error.issues, null, 2)}`);
+const [validConfig, error] = validateConfig(config);
+if (error !== null) {
+    console.error(`Error in configuration:\n${JSON.stringify(validConfig, null, 2)}\nError:\n${JSON.stringify(error, null, 2)}`);
     process.exit(-1);
 }
-export default result.data;
+export default validConfig;

package/lib/loaders.js CHANGED Viewed

@@ -193,7 +193,7 @@ export function* iterLoadSenatDocuments(dataDir, session, documentType, options
                         ...document,
                     };
                     const documentItem = {
-                        item: enrichedDocument,
+                        item: enrichedDocument
                     };
                     if (document.url) {
                         const documentName = path.parse(document.url).name;

package/lib/model/ameli.js CHANGED Viewed

@@ -47,7 +47,9 @@ const findAllAmendementsQuery = dbSenat
     .leftJoin("ameli.typses", "ameli.typses.id", "ameli.ses.typid")
     .leftJoin("ameli.nat", "ameli.txt_ameli.natid", "ameli.nat.id")
     .leftJoin("ameli.lec_ameli", "ameli.txt_ameli.lecid", "ameli.lec_ameli.id")
-    .leftJoin("dosleg.texte", (join) => join.onRef("ameli.ses.ann", "=", "dosleg.texte.sesann").onRef("ameli.txt_ameli.numabs", "=", "dosleg.texte.texnum"))
+    .leftJoin("dosleg.texte", (join) => join
+    .onRef("ameli.ses.ann", "=", "dosleg.texte.sesann")
+    .onRef("ameli.txt_ameli.numabs", "=", "dosleg.texte.texnum"))
     .leftJoin("dosleg.lecass", "dosleg.texte.lecassidt", "dosleg.lecass.lecassidt")
     .leftJoin("ameli.mot", "ameli.amd.motid", "ameli.mot.id")
     .leftJoin("ameli.avicom", "ameli.amd.avcid", "ameli.avicom.id")

package/lib/model/documents.js CHANGED Viewed

@@ -28,7 +28,10 @@ function documentsAttaches(rapportId) {
         .selectFrom("docatt")
         .leftJoin("typatt", "docatt.typattcod", "typatt.typattcod")
         .where("docatt.rapcod", "=", rapportId)
-        .select(["docatt.docatturl as url", "typatt.typattlib as type_document"]));
+        .select([
+        "docatt.docatturl as url",
+        "typatt.typattlib as type_document"
+    ]));
 }
 function selectRapportAttributes({ eb, ref, val }) {
     return [
@@ -67,9 +70,14 @@ const queryRapports = baseQueryRapports
     .leftJoin("lecass", "lecass.lecassidt", "lecassrap.lecassidt")
     .leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
     .leftJoin("loi", "loi.loicod", "lecture.loicod")
-    .select((args) => ["loi.signet as signet_dossier", ...selectRapportAttributes(args)]);
+    .select((args) => [
+    "loi.signet as signet_dossier",
+    ...selectRapportAttributes(args),
+]);
 export function rapports(lectureAssembleeId) {
-    return jsonArrayFrom(baseQueryRapports.select(selectRapportAttributes).where("lecassrap.lecassidt", "=", lectureAssembleeId));
+    return jsonArrayFrom(baseQueryRapports
+        .select(selectRapportAttributes)
+        .where("lecassrap.lecassidt", "=", lectureAssembleeId));
 }
 function auteursTexte(texteId) {
     return jsonArrayFrom(dbSenat
@@ -126,9 +134,14 @@ const queryTextes = baseQueryTextes
     .leftJoin("lecass", "lecass.lecassidt", "texte.lecassidt")
     .leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
     .leftJoin("loi", "loi.loicod", "lecture.loicod")
-    .select((args) => ["loi.signet as signet_dossier", ...selectTexteAttributes(args)]);
+    .select((args) => [
+    "loi.signet as signet_dossier",
+    ...selectTexteAttributes(args),
+]);
 export function textes(lectureAssembleeId) {
-    return jsonArrayFrom(baseQueryTextes.select(selectTexteAttributes).where("texte.lecassidt", "=", lectureAssembleeId));
+    return jsonArrayFrom(baseQueryTextes
+        .select(selectTexteAttributes)
+        .where("texte.lecassidt", "=", lectureAssembleeId));
 }
 export function findAllTextes() {
     return queryTextes.stream();

package/lib/model/dosleg.js CHANGED Viewed

@@ -203,10 +203,10 @@ export function buildActesLegislatifs(dossier) {
                     numero: depotTexte.numero,
                     uid: `${loiSignet}-${phasePrefix}-DEPOT`,
                     session: lecAss.session,
-                    chambre: "SN",
+                    chambre: 'SN',
                     signet_dossier: loiSignet,
                     texte_url: depotTexte.url,
-                    code_organisme: null,
+                    code_organisme: null
                 });
             }
             // =================================================================
@@ -225,7 +225,7 @@ export function buildActesLegislatifs(dossier) {
                         adoption: rap.adoption,
                         uid: `${loiSignet}-${phasePrefix}-COM`,
                         session: lecAss.session,
-                        chambre: "SN",
+                        chambre: 'SN',
                         signet_dossier: loiSignet,
                         texte_url: rap.url,
                     });
@@ -247,9 +247,9 @@ export function buildActesLegislatifs(dossier) {
                         libelle: `Discussion en séance publique`,
                         uid: `${loiSignet}-${phasePrefix}-DEBATS-SEANCE`,
                         session: lecAss.session,
-                        chambre: "SN",
+                        chambre: 'SN',
                         signet_dossier: loiSignet,
-                        code_organisme: null,
+                        code_organisme: null
                     });
                 }
             }
@@ -281,10 +281,10 @@ export function buildActesLegislatifs(dossier) {
                     adoption: libelleStatut,
                     uid: `${loiSignet}-DEC-${texteFinal.numero}`,
                     session: lecAss.session,
-                    chambre: "SN",
+                    chambre: 'SN',
                     signet_dossier: loiSignet,
                     texte_url: texteFinal.url,
-                    code_organisme: null,
+                    code_organisme: null
                 });
             }
         }
@@ -294,19 +294,19 @@ export function buildActesLegislatifs(dossier) {
     // =================================================================
     if (dossier.date_decision_CoC) {
         actes.push({
-            code_acte: "CC",
+            code_acte: 'CC',
             date: dossier.date_decision_CoC,
             libelle: `Décision du Conseil constitutionnel`,
             id: dossier.url_decision_CoC,
             uid: `${loiSignet}-CC`,
-            chambre: "AN",
+            chambre: 'AN',
             signet_dossier: loiSignet,
             texte_url: dossier.url_decision_CoC || dossier.url_dossier_CoC,
         });
     }
     if (dossier.date_promulgation) {
         actes.push({
-            code_acte: "PROM",
+            code_acte: 'PROM',
             date: dossier.date_promulgation,
             libelle: `Promulgation de la loi`,
             date_publication_JO: dossier.date_publication_JO,
@@ -314,7 +314,7 @@ export function buildActesLegislatifs(dossier) {
             url_legifrance: dossier.url_JO,
             id: dossier.url_JO,
             uid: `${loiSignet}-PROM`,
-            chambre: "AN",
+            chambre: 'AN',
             signet_dossier: loiSignet,
         });
     }

package/lib/raw_types/questions.d.ts CHANGED Viewed

@@ -113,10 +113,6 @@ export interface TamMinisteres {
     titreministre: string | null;
 }
 export interface TamQuestions {
-    /**
-     * Question caduque redéposée
-     */
-    caduque_redeposee: string | null;
     /**
      * Libellé de la circonscription
      */

package/lib/raw_types/senat.d.ts CHANGED Viewed

@@ -2735,10 +2735,6 @@ export interface QuestionsTamMinisteres {
     titreministre: string | null;
 }
 export interface QuestionsTamQuestions {
-    /**
-     * Question caduque redéposée
-     */
-    caduque_redeposee: string | null;
     /**
      * Libellé de la circonscription
      */

package/lib/raw_types_schemats/ameli.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * AUTO-GENERATED FILE - DO NOT EDIT!
  *
- * This file was automatically generated by schemats v.2.20.33
+ * This file was automatically generated by schemats v.2.19.6
  * $ schemats generate -c postgres://username:password@localhost:5432/senat -t amd -t amdsen -t avicom -t avigvt -t cab -t com_ameli -t ent -t etatxt -t fbu -t grppol_ameli -t gvt -t intora -t irr -t lec_ameli -t mot -t nat -t orarol -t sai -t saisen -t sea -t sen_ameli -t ses -t sor -t sub -t txt_ameli -t typrect -t typses -t typsub -t w_nivrec -s ameli
  *
  */

package/lib/raw_types_schemats/debats.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * AUTO-GENERATED FILE - DO NOT EDIT!
  *
- * This file was automatically generated by schemats v.2.20.33
+ * This file was automatically generated by schemats v.2.19.6
  * $ schemats generate -c postgres://username:password@localhost:5432/senat -t debats -t intdivers -t intpjl -t lecassdeb -t secdis -t secdivers -t syndeb -t typsec -s debats
  *
  */

package/lib/raw_types_schemats/dosleg.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * AUTO-GENERATED FILE - DO NOT EDIT!
  *
- * This file was automatically generated by schemats v.2.20.33
+ * This file was automatically generated by schemats v.2.19.6
  * $ schemats generate -c postgres://username:password@localhost:5432/senat -t amescr -t ass -t aud -t auteur -t ble -t catrap -t corscr -t date_seance -t deccoc -t denrap -t doc -t docatt -t docsea -t ecr -t etaloi -t evtsea -t forpub -t gen -t lecass -t lecassrap -t lecture -t lnkrap -t loi -t loithe -t natloi -t org -t orgnomhis -t orippr -t oritxt -t posvot -t qua -t rap -t raporg -t rapthe -t rolsig -t scr -t ses -t stavot -t texte -t texte_ancien -t the -t titsen -t typatt -t typaut -t typdoc -t typevtsea -t typlec -t typloi -t typorg -t typrap -t typtxt -t typurl -t votsen -s dosleg
  *
  */

package/lib/raw_types_schemats/questions.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * AUTO-GENERATED FILE - DO NOT EDIT!
  *
- * This file was automatically generated by schemats v.2.20.33
+ * This file was automatically generated by schemats v.2.19.6
  * $ schemats generate -c postgres://username:password@localhost:5432/senat -t etatquestion -t legquestion -t naturequestion -t sortquestion -t tam_ministeres -t tam_questions -t tam_reponses -t the -s questions
  *
  */
@@ -76,7 +76,6 @@ export interface tam_ministeres {
     titreministre: tam_ministeresFields.titreministre;
 }
 export declare namespace tam_questionsFields {
-    type caduque_redeposee = string | null;
     type circonscription = string | null;
     type cirnum = number | null;
     type codequalite = string | null;
@@ -136,7 +135,6 @@ export declare namespace tam_questionsFields {
     type version = number | null;
 }
 export interface tam_questions {
-    caduque_redeposee: tam_questionsFields.caduque_redeposee;
     circonscription: tam_questionsFields.circonscription;
     cirnum: tam_questionsFields.cirnum;
     codequalite: tam_questionsFields.codequalite;

package/lib/raw_types_schemats/sens.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * AUTO-GENERATED FILE - DO NOT EDIT!
  *
- * This file was automatically generated by schemats v.2.20.33
+ * This file was automatically generated by schemats v.2.19.6
  * $ schemats generate -c postgres://username:password@localhost:5432/senat -t acr -t activite -t activite_audit -t activite_delegation -t activite_delegation_audit -t activite_loi -t activite_loi_audit -t activite_obligatoire -t activite_participant -t activite_participant_audit -t activite_senateur -t activite_senateur_audit -t activite_senateur_params -t activite_senateur_params_audit -t activites_liees -t activites_liees_audit -t actpro -t adhgrpsen -t adr -t adresse -t adrsen -t app -t assparint -t asster -t autgrpsen -t autorisation_profil -t autorisations -t avis_nomination_art13 -t basdes -t bur -t bur3r -t bur4r -t cad -t candid -t candidat -t candtodelete -t categorie_activite -t catpro -t catpro2e -t catterrit -t cible_categorie_periode -t cirdep -t com -t con -t cotgip -t csp -t cspfam -t databasechangelog -t databasechangeloglock -t delega -t derogation -t derogation_audit -t derogation_senateur -t derogation_senateur_audit -t design -t designoep -t designorg -t discou -t div -t dpt -t dpt_seuil_presence -t dptele -t dptele_files -t dptele_processing -t dptele_processing_type -t dpttypman -t droits_acces -t droits_acces_audit -t droits_type_derogation -t ele -t eleloc -t elucan -t eludep -t eludiv -t elueur -t elueur_apf -t elumet -t elureg -t elusen -t elusen2e -t elusen3r -t elusen4r -t elusencommu -t elusenpair -t eluter -t elutit -t eluvil -t etadebman -t etadebman3r -t etadebman4r -t etafinman -t etafinman3r -t etafinman4r -t etaprr -t etarpm -t etasen -t ext2e_bio -t ext2e_csp -t ext2e_mandats -t ext2e_minist -t extsencom_identite -t extsencom_mandat -t fonact_participant -t foncandid -t foncom -t fondelega -t fongrppol -t fongrpsen -t fonmemcom -t fonmemdelega -t fonmemextpar -t fonmemgrppol -t fonmemgrpsen -t fonmemorg -t fonorg -t grppol -t grppol4r -t grpsenami -t grpsenamiadh -t grpsenamiadhreq -t grpsenamiadhreqeta -t grpsenamiunadh -t grpsim -t gvt -t insee_pays2008 -t jhi_authority -t jhi_user -t jhi_user_authority -t lanetr -t libcom -t libdelega -t libgrppol -t libgrpsen -t liborg -t lisdptele -t mel -t memcom -t memcomsea -t memdelega -t memextpar -t memgrppol -t memgrpsen -t memorg -t met -t minind -t minist -t mis -t misetafin -t mismin -t misrapeta -t missen -t moddes -t mode_acces_elusenpair -t nation -t nationgrpsen -t nivlan -t org -t orgext -t orgextpres -t orgthe -t pairie_elusenpair -t parpol -t parpolglo -t participa -t pcs -t pcs24 -t pcs42 -t pcs8 -t pcscatpro -t per -t per_sen -t perapp -t periode_presence -t perpolglo -t perrol -t pj_justificatif -t pj_justificatif_audit -t plaind -t plan_table -t plsql_profiler_runs -t plsql_profiler_units -t poicon -t posvot -t presences_scrutin_surcharge -t presencesrevisionentity -t profil_applicatif -t qua -t rap_the -t reg -t reladr -t requetes_profil -t reslis -t resultat -t reu -t revchanges -t rne_mandat -t rne_mandat_diff -t rne_sen -t rne_sen_diff -t rne_type_mandat -t rol -t sal -t scr -t scrusoldelega -t sea -t sec -t sec2e -t secexe -t sen -t senbur -t senbur3r -t senbur4r -t sennom -t senpj -t sensim -t sentablenom -t senurl -t seuil_presence -t sirpas_elusen -t sirpas_fonmemcom -t sirpas_fonmemdelega -t sirpas_fonmemgrppol -t sirpas_memcom -t sirpas_memdelega -t sirpas_memgrppol -t sirpas_mvt -t sirpas_mvtcm -t sirpas_mvttri -t sirpas_sen -t sirpas_senbur -t sirpas_trf -t srv -t stajur -t stavot -t suspensiontravaux -t suspensiontravaux_audit -t sysage -t syscognos -t sysevt -t sysvar -t sysvar_sendev -t sysvar_senprod -t tapsenrevchanges -t tapsenrevisionentity -t telephone -t temval -t tenpol -t territ -t testoracle -t titele -t titelerne -t titmin -t titnob -t tmpsd -t toutes -t turelu -t typadr -t typapppol -t typbister -t typcandid -t type_activite -t type_activite_participant -t type_activite_rol -t type_activite_senateur -t type_categorie -t type_derogation -t type_droit_acces -t type_pj_justificatif -t type_rne_diff -t type_type_derogation -t typele -t typgrpsen -t typman -t typmin -t typmoddes -t typorg -t typorgext -t typparpol -t typpoicon -t typprs -t typprssta -t typscr -t typtel -t typurl -t typvoi -t uploaded_file -t uploaded_file_type -t validation -t validation_defview_profil -t validation_profil -t vercand -t verres -t votes -t zongeo -s sens
  *
  */

package/lib/scripts/convert_data.js CHANGED Viewed

@@ -5,7 +5,7 @@ import path from "path";
 import pLimit from "p-limit";
 import * as git from "../git";
 import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
-import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
+import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER } from "../loaders";
 import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findAllTextes, findAllRapports, } from "../model";
 import { processRapport, processTexte } from "./retrieve_documents";
 import { buildActesLegislatifs } from "../model/dosleg";
@@ -164,7 +164,7 @@ async function convertDatasetDosLeg(dataDir, options) {
         const actesBrutsNormalises = buildActesLegislatifs(dossier);
         const dossierWithActes = {
             ...dossier,
-            actes_legislatifs: actesBrutsNormalises,
+            actes_legislatifs: actesBrutsNormalises
         };
         const dossierFile = `${dossier["signet"]}.json`;
         await fs.outputJSON(path.join(dossierReorganizedDir, dossierFile), dossierWithActes, { spaces: 2 });
@@ -236,7 +236,7 @@ async function convertTextes(dataDir, options) {
         const texteName = path.parse(texte["url"]).name;
         const texteDir = path.join(originalTextesDir, `${session}`, texteName);
         // oritxtcod = 1 corresponds to "Texte de loi déposé au Sénat"
-        const hasExposeDesMotifs = texte["origine"] === "Sénat" && texte["ordre_origine"] === "1";
+        const hasExposeDesMotifs = texte["origine"] === 'Sénat' && texte["ordre_origine"] === '1';
         const metadata = {
             name: texteName,
             session: texte["session"],

package/lib/scripts/retrieve_open_data.js CHANGED Viewed

@@ -271,8 +271,7 @@ async function retrieveOpenData() {
     process.env = {
         ...process.env,
         PGHOST: process.env["PGHOST"] || config.db.host,
-        PGPORT: process.env["PGPORT"] || String(config.db.port),
-        PGDATABASE: process.env["PGDATABASE"] || config.db.name,
+        PGPORT: process.env["PGPORT"] || config.db.port,
         PGUSER: process.env["PGUSER"] || config.db.user,
         PGPASSWORD: process.env["PGPASSWORD"] || config.db.password,
     };

package/lib/scripts/test_iter_load.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { iterLoadSenatRapports, } from "../loaders";
+import { iterLoadSenatRapports } from "../loaders";
 import commandLineArgs from "command-line-args";
 import { dataDirDefaultOption } from "./shared/cli_helpers";
 const optionsDefinitions = [dataDirDefaultOption];

package/lib/src/git.js CHANGED Viewed

@@ -2,6 +2,57 @@ import { execSync } from "node:child_process";
 import fs from "fs-extra";
 import path from "node:path";
 const MAXBUFFER = 50 * 1024 * 1024;
+const GIT_LOCK_RETRY_DELAY_MS = 1000;
+const GIT_LOCK_RETRY_COUNT = 5;
+const GIT_LOCK_STALE_AFTER_MS = 2 * 60 * 1000;
+function sleep(ms) {
+    Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
+}
+function getIndexLockPath(repositoryDir) {
+    return path.join(repositoryDir, ".git", "index.lock");
+}
+function isIndexLockError(error) {
+    const stderr = String(error?.stderr || "");
+    return /index\.lock': File exists\./.test(stderr);
+}
+function removeStaleIndexLock(repositoryDir) {
+    const lockPath = getIndexLockPath(repositoryDir);
+    if (!fs.existsSync(lockPath)) {
+        return false;
+    }
+    const stats = fs.statSync(lockPath);
+    const ageMs = Date.now() - stats.mtimeMs;
+    if (ageMs < GIT_LOCK_STALE_AFTER_MS) {
+        return false;
+    }
+    fs.removeSync(lockPath);
+    return true;
+}
+function execGitWithIndexLockRecovery(command, repositoryDir, options) {
+    let lockRemoved = false;
+    for (let attempt = 1; attempt <= GIT_LOCK_RETRY_COUNT; attempt++) {
+        try {
+            execSync(command, {
+                cwd: repositoryDir,
+                ...options,
+            });
+            return;
+        }
+        catch (error) {
+            if (!isIndexLockError(error)) {
+                throw error;
+            }
+            if (!lockRemoved && removeStaleIndexLock(repositoryDir)) {
+                lockRemoved = true;
+                continue;
+            }
+            if (attempt === GIT_LOCK_RETRY_COUNT) {
+                throw error;
+            }
+            sleep(GIT_LOCK_RETRY_DELAY_MS);
+        }
+    }
+}
 export function initRepo(repositoryDir) {
     if (!fs.existsSync(path.join(repositoryDir, ".git"))) {
         fs.ensureDirSync(repositoryDir);
@@ -15,8 +66,7 @@ export function initRepo(repositoryDir) {
 }
 export function commit(repositoryDir, message) {
     initRepo(repositoryDir);
-    execSync("git add .", {
-        cwd: repositoryDir,
+    execGitWithIndexLockRecovery("git add .", repositoryDir, {
         env: process.env,
         encoding: "utf-8",
         stdio: ["ignore", "ignore", "pipe"],

package/lib/src/loaders.d.ts CHANGED Viewed

@@ -43,10 +43,9 @@ export declare function iterLoadSenatComptesRendusCommissions(dataDir: string, s
 export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
 export declare function iterLoadSenatRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
 export declare function iterLoadSenatTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
-export declare function iterLoadSenatDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<DocumentResult>>;
 export declare function iterLoadSenatRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
-export declare function iterLoadSenatTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
-export declare function loadSenatTexteContent(dataDir: string, textePathFromDataset: string): IterItem<FlatTexte | null>;
+export declare function iterLoadSenatTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult & Partial<FlatTexte>>>;
+export declare function loadSenatTexteContent(dataDir: string, session: number | string | null | undefined, texteId: string): IterItem<FlatTexte | null>;
 export declare function loadSenatCompteRenduContent(dataDir: string, session: number, debatId: string | number): {
     item: CompteRendu | null;
 };

package/lib/src/loaders.js CHANGED Viewed

@@ -182,29 +182,6 @@ export function* iterLoadSenatTexteUrls(dataDir, session) {
         }
     }
 }
-export function* iterLoadSenatDocuments(dataDir, session, documentType, options = {}) {
-    for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(dataDir, session, options)) {
-        for (const lecture of dossierLegislatif["lectures"]) {
-            const lecturesSenat = lecture.lectures_assemblee.filter((lectureAssemblee) => lectureAssemblee.assemblee === "Sénat");
-            for (const lectureSenat of lecturesSenat) {
-                for (const document of lectureSenat[documentType]) {
-                    const enrichedDocument = {
-                        signet_dossier: dossierLegislatif["signet"],
-                        ...document,
-                    };
-                    const documentItem = {
-                        item: enrichedDocument,
-                    };
-                    if (document.url) {
-                        const documentName = path.parse(document.url).name;
-                        documentItem.filePathFromDataset = path.join(`${document.session ?? UNDEFINED_SESSION}`, documentName, `${documentName}.pdf`);
-                    }
-                    yield documentItem;
-                }
-            }
-        }
-    }
-}
 export function* iterLoadSenatRapports(dataDir, session, options = {}) {
     for (const iterItem of iterLoadSenatItems(dataDir, RAPPORT_FOLDER, session, "original", options)) {
         if (iterItem.item?.["id"]) {
@@ -213,14 +190,22 @@ export function* iterLoadSenatRapports(dataDir, session, options = {}) {
     }
 }
 export function* iterLoadSenatTextes(dataDir, session, options = {}) {
-    for (const iterItem of iterLoadSenatDocuments(dataDir, session, "textes", options)) {
-        yield iterItem;
+    for (const iterItem of iterLoadSenatItems(dataDir, TEXTE_FOLDER, session, DATA_ORIGINAL_FOLDER, options)) {
+        if (!iterItem.item?.["id"]) {
+            continue;
+        }
+        const texteItem = iterItem;
+        const texte = texteItem.item;
+        const texteId = texte["id"];
+        const { item: texteContent } = loadSenatTexteContent(dataDir, texte["session"], texteId);
+        if (texteContent) {
+            texteItem.item.divisions = texteContent.divisions;
+        }
+        yield texteItem;
     }
 }
-export function loadSenatTexteContent(dataDir, textePathFromDataset) {
-    const parsedTextePath = path.parse(textePathFromDataset);
-    const jsonTexteName = `${parsedTextePath.name}.json`;
-    const fullTextePath = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER, parsedTextePath.dir, jsonTexteName);
+export function loadSenatTexteContent(dataDir, session, texteId) {
+    const fullTextePath = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER, String(session ?? UNDEFINED_SESSION), texteId, `${texteId}.json`);
     if (!fs.existsSync(fullTextePath)) {
         return { item: null };
     }

package/lib/src/model/documents.js CHANGED Viewed

@@ -2,6 +2,9 @@ import { sql } from "kysely";
 import { dbSenat } from "../databases";
 import { concat, rtrim, toDateString } from "./util";
 import { jsonArrayFrom } from "kysely/helpers/postgres";
+function stripTrailingHashes(expr) {
+    return sql `regexp_replace(${expr}, '#+$', '')`;
+}
 function orderOrdreOrigineTexte(expr) {
     return sql `array_position(array['0','2','1'], ${expr})`;
 }
@@ -37,15 +40,15 @@ function selectRapportAttributes({ eb, ref, val }) {
         eb
             .case()
             .when("rap.rapurl", "is not", null)
-            .then(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
+            .then(stripTrailingHashes(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`))
             .else(null)
             .end()
             .as("id"),
         eb
             .case()
             .when("rap.typurl", "=", "I")
-            .then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
-            .else(rtrim(ref("rap.rapurl")))
+            .then(stripTrailingHashes(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl")))))
+            .else(stripTrailingHashes(rtrim(ref("rap.rapurl"))))
             .end()
             .as("url"),
         rtrim(ref("denrap.libdenrap")).as("type"),
@@ -95,15 +98,15 @@ function selectTexteAttributes({ eb, ref, val }) {
         eb
             .case()
             .when("texte.texurl", "is not", null)
-            .then(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
+            .then(stripTrailingHashes(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`))
             .else(null)
             .end()
             .as("id"),
         eb
             .case()
             .when("texte.typurl", "=", "I")
-            .then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
-            .else(rtrim(ref("texte.texurl")))
+            .then(stripTrailingHashes(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl")))))
+            .else(stripTrailingHashes(rtrim(ref("texte.texurl"))))
             .end()
             .as("url"),
         rtrim(ref("oritxt.oritxtlib")).as("origine"),

package/lib/src/parsers/texte.js CHANGED Viewed

@@ -1,24 +1,6 @@
 import { JSDOM } from "jsdom";
-import { AKN_IDENTIFICATION_STRUCTURE_REGEXP, AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil";
+import { AKN_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil";
 import { DivisionType, } from "../types/texte";
-function buildWorklow(metaElement) {
-    const stepElements = metaElement.querySelectorAll("workflow step");
-    const steps = [];
-    for (const stepElement of stepElements) {
-        const identification = stepElement.getAttribute("href") ?? "";
-        const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
-        steps.push({
-            eId: stepElement.getAttribute("eId"),
-            date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
-            type: identificationParts?.["type"] || null,
-            session: identificationParts?.["session"] || null,
-            numero: identificationParts?.["numTexte"] || null,
-            version: identificationParts?.["version"] ? identificationParts["version"] : null,
-            outcome: stepElement.getAttribute("outcome"),
-        });
-    }
-    return steps;
-}
 function buildDivision(node, index) {
     const eId = node.getAttribute("eId");
     const tag = node.nodeName;
@@ -147,7 +129,6 @@ export function transformTexte(document) {
         dateDepot: dateDepot ? new Date(dateDepot) : null,
         datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
         version: identificationParts?.["version"] ? identificationParts["version"] : null,
-        workflow: metaElement ? buildWorklow(metaElement) : [],
         divisions: bodyElement ? flattenTexte(bodyElement) : [],
     };
 }

package/lib/src/scripts/convert_data.js CHANGED Viewed

@@ -13,32 +13,31 @@ import { UNDEFINED_SESSION } from "../types/sessions";
 import { getSessionFromDate, getSessionFromSignet } from "./datautil";
 import { commonOptions } from "./shared/cli_helpers";
 import { ensureAndClearDir } from "./shared/util";
+let exitCode = 10; // 0: some data changed, 10: no modification
 const optionsDefinitions = [...commonOptions];
 const options = commandLineArgs(optionsDefinitions);
 const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/";
 const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
 const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/";
 const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
-function commitGit(datasetDir, options, exitCode) {
+function commitAndPushGit(datasetDir, options) {
     if (options.commit) {
         const errorCode = git.commitAndPush(datasetDir, "Nouvelle moisson", options.remote);
         if ((exitCode === 10 && errorCode !== 10) || (exitCode === 0 && errorCode !== 0 && errorCode !== 10)) {
             exitCode = errorCode;
         }
     }
-    return exitCode;
 }
 async function convertData() {
     const dataDir = options["dataDir"];
     assert(dataDir, "Missing argument: data directory");
     const enabledDatasets = getEnabledDatasets(options["categories"]);
     console.time("data transformation time");
-    let exitCode = 0;
     if (enabledDatasets & EnabledDatasets.Ameli) {
         try {
             await convertDatasetAmeli(dataDir, options);
             const ameliDir = path.join(dataDir, datasets.ameli.database);
-            exitCode = commitGit(ameliDir, options, exitCode);
+            commitAndPushGit(ameliDir, options);
         }
         catch (error) {
             console.error(`Error converting Ameli dataset:`, error);
@@ -48,7 +47,7 @@ async function convertData() {
         try {
             await convertDatasetDebats(dataDir, options);
             const debatsDir = path.join(dataDir, datasets.debats.database);
-            exitCode = commitGit(debatsDir, options, exitCode);
+            commitAndPushGit(debatsDir, options);
         }
         catch (error) {
             console.error(`Error converting Debats dataset:`, error);
@@ -58,7 +57,7 @@ async function convertData() {
         try {
             await convertDatasetDosLeg(dataDir, options);
             const doslegDir = path.join(dataDir, datasets.dosleg.database);
-            exitCode = commitGit(doslegDir, options, exitCode);
+            commitAndPushGit(doslegDir, options);
         }
         catch (error) {
             console.error(`Error converting DosLeg dataset:`, error);
@@ -66,7 +65,7 @@ async function convertData() {
         try {
             await convertDatasetScrutins(dataDir, options);
             const scrutinsDir = path.join(dataDir, SCRUTINS_FOLDER);
-            exitCode = commitGit(scrutinsDir, options, exitCode);
+            commitAndPushGit(scrutinsDir, options);
         }
         catch (error) {
             console.error(`Error converting Scrutins dataset:`, error);
@@ -76,7 +75,7 @@ async function convertData() {
         try {
             await convertDatasetQuestions(dataDir, options);
             const questionsDir = path.join(dataDir, datasets.questions.database);
-            exitCode = commitGit(questionsDir, options, exitCode);
+            commitAndPushGit(questionsDir, options);
         }
         catch (error) {
             console.error(`Error converting Questions dataset:`, error);
@@ -86,7 +85,7 @@ async function convertData() {
         try {
             await convertDatasetSens(dataDir, options);
             const sensDir = path.join(dataDir, datasets.sens.database);
-            exitCode = commitGit(sensDir, options, exitCode);
+            commitAndPushGit(sensDir, options);
         }
         catch (error) {
             console.error(`Error converting Sens dataset:`, error);
@@ -95,7 +94,6 @@ async function convertData() {
     if (!options["silent"]) {
         console.timeEnd("data transformation time");
     }
-    return exitCode;
 }
 async function convertDatasetAmeli(dataDir, options) {
     const dataset = datasets.ameli;
@@ -347,7 +345,7 @@ async function convertDatasetSens(dataDir, options) {
     }
 }
 convertData()
-    .then((exitCode) => process.exit(exitCode || 0))
+    .then(() => process.exit(exitCode))
     .catch((error) => {
     console.log(error);
     process.exit(1);

package/lib/src/scripts/retrieve_documents.js CHANGED Viewed

@@ -3,11 +3,13 @@ import commandLineArgs from "command-line-args";
 import fs from "fs-extra";
 import { DateTime } from "luxon";
 import path from "path";
+import * as git from "../git";
 import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatRapportUrls, iterLoadSenatTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, } from "../loaders";
 import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../parsers/texte";
 import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
 import { commonOptions } from "./shared/cli_helpers";
 import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue } from "./shared/util";
+let exitCode = 10; // 0: some data changed, 10: no modification
 const optionsDefinitions = [
     ...commonOptions,
     {
@@ -32,6 +34,14 @@ const optionsDefinitions = [
 const options = commandLineArgs(optionsDefinitions);
 const textDecoder = new TextDecoder("utf8");
 const today = DateTime.now();
+function commitAndPushGit(datasetDir) {
+    if (options["commit"]) {
+        const errorCode = git.commitAndPush(datasetDir, "Nouvelle moisson", options["remote"]);
+        if ((exitCode === 10 && errorCode !== 10) || (exitCode === 0 && errorCode !== 0 && errorCode !== 10)) {
+            exitCode = errorCode;
+        }
+    }
+}
 function isDocumentRecent(documentDate, daysThreshold) {
     if (!documentDate)
         return false;
@@ -117,6 +127,12 @@ export async function processTexte(texteMetadata, originalTextesDir, transformed
         const result = await processDocument(format.url.toString(), destPath, texteMetadata.date, options);
         // Specific logic: Parsing (Only applies to XML)
         if (format.isParseTarget && options.parseDocuments) {
+            if (!result.buffer && !(await fs.pathExists(destPath))) {
+                if (options.verbose) {
+                    console.warn(`Skipping parse for missing XML file: ${destPath}`);
+                }
+                continue;
+            }
             await parseDocument(texteMetadata.session, transformedTextesDir, destPath, texteMetadata.name, result.buffer, exposeDesMotifsContent, options);
         }
     }
@@ -134,9 +150,10 @@ export async function processRapport(rapportMetadata, originalRapportsDir, optio
         await processDocument(format.url.toString(), destPath, rapportMetadata.date, options);
     }
 }
-async function retrieveTextes(dataDir, sessions) {
-    const originalTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
-    const transformedTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER);
+async function processTextes(dataDir, sessions) {
+    const textesDir = path.join(dataDir, TEXTE_FOLDER);
+    const originalTextesDir = path.join(textesDir, DATA_ORIGINAL_FOLDER);
+    const transformedTextesDir = path.join(textesDir, DATA_TRANSFORMED_FOLDER);
     if (options["parseDocuments"]) {
         ensureAndClearDir(transformedTextesDir);
     }
@@ -153,9 +170,11 @@ async function retrieveTextes(dataDir, sessions) {
             await processTexte(texteMetadata, originalTextesDir, transformedTextesDir, dlOptions);
         }
     }
+    commitAndPushGit(textesDir);
 }
-async function retrieveRapports(dataDir, sessions) {
-    const originalRapportsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
+async function processRapports(dataDir, sessions) {
+    const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
+    const originalRapportsDir = path.join(rapportsDir, DATA_ORIGINAL_FOLDER);
     const dlOptions = {
         force: options["force"],
         silent: options["silent"],
@@ -168,6 +187,7 @@ async function retrieveRapports(dataDir, sessions) {
             await processRapport(rapportMetadata, originalRapportsDir, dlOptions);
         }
     }
+    commitAndPushGit(rapportsDir);
 }
 async function parseDocument(session, transformedTextesDir, textePath, texteName, texteBuffer, exposeDesMotifs = null, options = {}) {
     if (options.verbose) {
@@ -179,6 +199,12 @@ async function parseDocument(session, transformedTextesDir, textePath, texteName
         parsedTexte = parseTexte(texteXml);
     }
     else {
+        if (!(await fs.pathExists(textePath))) {
+            if (options.verbose) {
+                console.warn(`Skipping parse for missing XML file: ${textePath}`);
+            }
+            return null;
+        }
         parsedTexte = await parseTexteFromFile(textePath);
     }
     if (!parsedTexte)
@@ -200,10 +226,10 @@ async function main() {
     const sessions = getSessionsFromStart(options["fromSession"]);
     console.time("documents processing time");
     if (isOptionEmptyOrHasValue(options["types"], "textes")) {
-        await retrieveTextes(dataDir, sessions);
+        await processTextes(dataDir, sessions);
     }
     if (isOptionEmptyOrHasValue(options["types"], "rapports")) {
-        await retrieveRapports(dataDir, sessions);
+        await processRapports(dataDir, sessions);
     }
     if (!options["silent"]) {
         console.timeEnd("documents processing time");
@@ -211,7 +237,7 @@ async function main() {
 }
 if (process.argv[1].endsWith("retrieve_documents.ts")) {
     main()
-        .then(() => process.exit(0))
+        .then(() => process.exit(exitCode))
         .catch((error) => {
         console.log(error);
         process.exit(1);

package/lib/src/scripts/retrieve_videos.js CHANGED Viewed

@@ -97,7 +97,7 @@ async function writeMatchArtifacts(args) {
     if (finalTxt)
         await fsp.writeFile(path.join(ctx.baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
 }
-async function processGroupedReunion(agenda, session, dataDir) {
+async function processGroupedReunion(agenda, session, dataDir, lastByVideo) {
     // 1) GuardRails
     if (shouldSkipAgenda(agenda))
         return;
@@ -137,18 +137,30 @@ async function processGroupedReunion(agenda, session, dataDir) {
         session: ctx.session,
         options,
         writeIfChanged,
+        lastByVideo, // NEW
+        getAgendaSegmentTimecodes,
+        buildSenatVodMasterM3u8FromNvs,
+    });
+    await processBisIfNeeded({
+        agenda,
+        secondBest,
+        ctx,
+        skipDownload,
+        options,
+        lastByVideo,
+        writeIfChanged,
+        processOneReunionMatch,
         getAgendaSegmentTimecodes,
         buildSenatVodMasterM3u8FromNvs,
     });
-    // 4) Optional BIS
-    await processBisIfNeeded({ agenda, secondBest, ctx, skipDownload, options });
 }
 async function processAll(dataDir, sessions) {
     console.log("Process all Agendas and fetch video's url");
     for (const session of sessions) {
+        const lastByVideo = new Map();
         for (const { item: agenda } of iterLoadSenatAgendas(dataDir, session)) {
             try {
-                await processGroupedReunion(agenda, session, dataDir);
+                await processGroupedReunion(agenda, session, dataDir, lastByVideo);
             }
             catch (e) {
                 console.error(`[error] ${agenda?.uid ?? "unknown-uid"}:`, e?.message || e);

package/lib/src/types/texte.d.ts CHANGED Viewed

@@ -35,7 +35,6 @@ export interface FlatTexte {
     dateDepot: Date | null;
     datePublicationXml: Date | null;
     version: Version | null;
-    workflow: Step[];
     divisions: Division[];
     exposeDesMotifs?: ExposeDesMotifs | null;
 }

package/lib/src/videos/pipeline.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { Reunion } from "../types/agenda";
-import { BestMatch, MatchContext } from "./types";
+import { BestMatch, LastForVideo, MatchContext } from "./types";
 import { CommandLineOptions } from "command-line-args";
 export declare function processOneReunionMatch(args: {
     agenda: Reunion;
@@ -8,6 +8,11 @@ export declare function processOneReunionMatch(args: {
     session: number;
     options: Record<string, any>;
     writeIfChanged: (p: string, content: string) => Promise<void>;
+    lastByVideo: Map<string, {
+        agendaUid: string;
+        agendaJsonPath: string;
+        start: number;
+    }>;
     getAgendaSegmentTimecodes: (dataNvs: string, finalNvs: string, agendaKey: string) => {
         start: number;
         end: number | null;
@@ -20,5 +25,26 @@ export declare function processBisIfNeeded(args: {
     ctx: MatchContext;
     skipDownload: boolean;
     options: CommandLineOptions;
+    lastByVideo: Map<string, LastForVideo>;
+    writeIfChanged: (p: string, content: string) => Promise<void>;
+    processOneReunionMatch: (args: {
+        agenda: Reunion;
+        baseDir: string;
+        dataDir: string;
+        session: number;
+        options: Record<string, any>;
+        writeIfChanged: (p: string, content: string) => Promise<void>;
+        lastByVideo: Map<string, LastForVideo>;
+        getAgendaSegmentTimecodes: (dataNvs: string, finalNvs: string, agendaKey: string) => {
+            start: number;
+            end: number | null;
+        } | null;
+        buildSenatVodMasterM3u8FromNvs: (dataNvs: string) => string | null;
+    }) => Promise<void>;
+    getAgendaSegmentTimecodes: (dataNvs: string, finalNvs: string, agendaKey: string) => {
+        start: number;
+        end: number | null;
+    } | null;
+    buildSenatVodMasterM3u8FromNvs: (dataNvs: string) => string | null;
 }): Promise<void>;
 export declare function writeIfChanged(p: string, content: string): Promise<void>;

package/lib/src/videos/pipeline.js CHANGED Viewed

@@ -5,9 +5,8 @@ import { fetchText } from "./search";
 import fs from "fs-extra";
 import fsp from "fs/promises";
 import path from "path";
-import { getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs } from "../utils/nvs-parsing";
 export async function processOneReunionMatch(args) {
-    const { agenda, baseDir, dataDir, session, options, writeIfChanged, getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs, } = args;
+    const { agenda, baseDir, dataDir, session, options, writeIfChanged, lastByVideo, getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs, } = args;
     const reunionUid = agenda.uid;
     let dataTxt;
     let finalTxt;
@@ -25,18 +24,40 @@ export async function processOneReunionMatch(args) {
         return;
     }
     const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
+    // Ensure it exists first.
+    if (!(await fs.pathExists(agendaJsonPath))) {
+        console.warn(`[warn] agenda file not found: ${agendaJsonPath}`);
+        return;
+    }
     let timecodeDebutVideo = null;
     let timecodeFinVideo = null;
     const agendaKey = agenda.titre || agenda.objet || "";
     const seg = getAgendaSegmentTimecodes(dataTxt, finalTxt, agendaKey);
     if (seg) {
         timecodeDebutVideo = seg.start;
-        timecodeFinVideo = seg.end;
+        timecodeFinVideo = null; // keep open by default
     }
-    if (!(await fs.pathExists(agendaJsonPath))) {
-        console.warn(`[warn] agenda file not found: ${agendaJsonPath}`);
-        return;
+    // 1) If we have a start timecode, close the previous agenda for this SAME master
+    if (timecodeDebutVideo != null) {
+        const prev = lastByVideo.get(master);
+        if (prev && prev.agendaJsonPath !== agendaJsonPath) {
+            // micro-safety: do not close with an earlier timecode
+            if (timecodeDebutVideo <= prev.start) {
+                console.warn(`[warn] timecode order inversion on same video: ` +
+                    `prev=${prev.agendaUid}(${prev.start}s) -> cur=${agenda.uid}(${timecodeDebutVideo}s). ` +
+                    `Skip closing prev to avoid negative segment.`);
+            }
+            else {
+                await patchAgendaTimecodeFin({
+                    agendaJsonPath: prev.agendaJsonPath,
+                    timecodeFinVideo: timecodeDebutVideo,
+                    writeIfChanged,
+                });
+            }
+        }
+        lastByVideo.set(master, { agendaUid: agenda.uid, agendaJsonPath, start: timecodeDebutVideo });
     }
+    // 2) Update current agenda JSON with urlVideo (+ start/end if any)
     const raw = await fsp.readFile(agendaJsonPath, "utf-8");
     let obj;
     try {
@@ -49,7 +70,10 @@ export async function processOneReunionMatch(args) {
     const next = { ...obj, urlVideo: master, startTime: agenda.startTime };
     if (timecodeDebutVideo != null) {
         next.timecodeDebutVideo = timecodeDebutVideo;
-        next.timecodeFinVideo = timecodeFinVideo ?? undefined;
+        if (timecodeFinVideo != null)
+            next.timecodeFinVideo = timecodeFinVideo;
+        else
+            delete next.timecodeFinVideo;
     }
     await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
     if (!options["silent"]) {
@@ -58,7 +82,7 @@ export async function processOneReunionMatch(args) {
     }
 }
 export async function processBisIfNeeded(args) {
-    const { agenda, secondBest, ctx, skipDownload, options } = args;
+    const { agenda, secondBest, ctx, skipDownload, options, lastByVideo, writeIfChanged, processOneReunionMatch, getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs, } = args;
     if (skipDownload)
         return;
     if (!secondBest)
@@ -83,6 +107,7 @@ export async function processBisIfNeeded(args) {
         session: ctx.session,
         options,
         writeIfChanged,
+        lastByVideo,
         getAgendaSegmentTimecodes,
         buildSenatVodMasterM3u8FromNvs,
     });
@@ -128,3 +153,19 @@ export async function writeIfChanged(p, content) {
     }
     await fsp.writeFile(p, content, "utf-8");
 }
+async function patchAgendaTimecodeFin(args) {
+    const { agendaJsonPath, timecodeFinVideo, writeIfChanged } = args;
+    if (!(await fs.pathExists(agendaJsonPath)))
+        return;
+    const raw = await fsp.readFile(agendaJsonPath, "utf-8");
+    let obj;
+    try {
+        obj = JSON.parse(raw);
+    }
+    catch {
+        console.warn(`[warn] invalid JSON in ${agendaJsonPath}`);
+        return;
+    }
+    const next = { ...obj, timecodeFinVideo };
+    await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
+}

package/lib/src/videos/types.d.ts CHANGED Viewed

@@ -68,3 +68,8 @@ export type MatchContext = {
     reunionUid: string;
     agendaTs: number | null;
 };
+export type LastForVideo = {
+    agendaUid: string;
+    agendaJsonPath: string;
+    start: number;
+};

package/lib/validators/config.d.ts CHANGED Viewed

@@ -1,9 +1 @@
-import { z } from "zod";
-export declare const configSchema: z.ZodObject<{
-    db: z.ZodObject<{
-        host: z.ZodString;
-        password: z.ZodString;
-        user: z.ZodString;
-        port: z.ZodCoercedNumber<unknown>;
-    }, z.core.$strip>;
-}, z.core.$strip>;
+export declare function validateConfig(data: any): [any, any];

package/lib/validators/config.js CHANGED Viewed

@@ -1,10 +1,54 @@
-import { z } from "zod";
-const dbSchema = z.object({
-    host: z.string().trim().min(1, "Must not be empty"),
-    password: z.string().trim().min(1, "Must not be empty"),
-    user: z.string().trim().min(1, "Must not be empty"),
-    port: z.coerce.number().int().min(0).max(65535),
-});
-export const configSchema = z.object({
-    db: dbSchema,
-});
+import { validateChain, validateInteger, validateNonEmptyTrimmedString, validateNumber, validateOption, validateString, validateStringToNumber, validateTest, } from "@biryani/core";
+function validateDb(data) {
+    if (data === null || data === undefined) {
+        return [data, "Missing value"];
+    }
+    if (typeof data !== "object") {
+        return [data, `Expected an object got "${typeof data}"`];
+    }
+    data = { ...data };
+    const errors = {};
+    const remainingKeys = new Set(Object.keys(data));
+    for (const key of ["host", "password", "user"]) {
+        remainingKeys.delete(key);
+        const [value, error] = validateNonEmptyTrimmedString(data[key]);
+        data[key] = value;
+        if (error !== null) {
+            errors[key] = error;
+        }
+    }
+    {
+        const key = "port";
+        remainingKeys.delete(key);
+        const [value, error] = validateChain(validateOption([validateString, validateStringToNumber], validateNumber), validateInteger, validateTest((value) => 0 <= value && value <= 65535, "Must be an integer between 0 and 65535"))(data[key]);
+        data[key] = value;
+        if (error !== null) {
+            errors[key] = error;
+        }
+    }
+    for (const key of remainingKeys) {
+        errors[key] = "Unexpected item";
+    }
+    return [data, Object.keys(errors).length === 0 ? null : errors];
+}
+export function validateConfig(data) {
+    if (data === null || data === undefined) {
+        return [data, "Missing value"];
+    }
+    if (typeof data !== "object") {
+        return [data, `Expected an object got "${typeof data}"`];
+    }
+    data = { ...data };
+    const errors = {};
+    const remainingKeys = new Set(Object.keys(data));
+    {
+        const key = "db";
+        remainingKeys.delete(key);
+        const [value, error] = validateDb(data[key]);
+        data[key] = value;
+        if (error !== null) {
+            errors[key] = error;
+        }
+    }
+    return [data, Object.keys(errors).length === 0 ? null : errors];
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tricoteuses/senat",
-  "version": "2.21.3",
+  "version": "2.21.5",
   "description": "Handle French Sénat's open data",
   "keywords": [
     "France",

package/lib/src/utils/nvs-timecode.d.ts DELETED Viewed

@@ -1,17 +0,0 @@
-export type L1Chapter = {
-    id: string;
-    label: string;
-    index: number;
-};
-export declare function getLevel1Chapters(dataNvs: string): L1Chapter[];
-export declare function pickBestLevel1ChapterForAgenda(chapters: L1Chapter[], agendaTitle: string): {
-    chapter: L1Chapter;
-    score: number;
-} | null;
-export declare function getAgendaSegmentTimecodes(dataNvs: string, finalPlayerNvs: string, agendaTitleOrObjet: string): {
-    start: number;
-    end: number | null;
-    chapterId: string;
-    nextChapterId: string | null;
-    score: number;
-} | null;

package/lib/src/utils/nvs-timecode.js DELETED Viewed

@@ -1,79 +0,0 @@
-import { XMLParser } from "fast-xml-parser";
-import { dice, normalize } from "./scoring";
-import { decodeHtmlEntities } from "./string_cleaning";
-const CHAPTER_MATCH_THRESHOLD = 0.5;
-const xmlParser = new XMLParser({
-    ignoreAttributes: false,
-    attributeNamePrefix: "@_",
-});
-function getTimecodeForChapterId(finalPlayerNvs, chapterId) {
-    const xml = xmlParser.parse(finalPlayerNvs);
-    const synchros = xml?.player?.synchro;
-    if (!synchros)
-        return null;
-    const synchsArray = Array.isArray(synchros) ? synchros : [synchros];
-    const match = synchsArray.find((s) => String(s["@_id"]) === String(chapterId));
-    if (!match)
-        return null;
-    const rawTimecode = match["@_timecode"];
-    if (rawTimecode == null)
-        return null;
-    const ms = Number(rawTimecode);
-    if (Number.isNaN(ms))
-        return null;
-    return Math.floor(ms / 1000);
-}
-function toArray(v) {
-    if (!v)
-        return [];
-    return Array.isArray(v) ? v : [v];
-}
-export function getLevel1Chapters(dataNvs) {
-    const xml = xmlParser.parse(dataNvs);
-    const root = xml?.data?.chapters?.chapter ?? xml?.chapters?.chapter;
-    const roots = toArray(root);
-    return roots
-        .map((ch, i) => {
-        const id = ch?.id ?? ch?.["@_id"];
-        const labelRaw = ch?.label ?? ch?.["@_label"] ?? "";
-        return {
-            id: String(id),
-            label: decodeHtmlEntities(String(labelRaw)).trim(),
-            index: i,
-        };
-    })
-        .filter((c) => c.id && c.label);
-}
-export function pickBestLevel1ChapterForAgenda(chapters, agendaTitle) {
-    const q = normalize(agendaTitle);
-    let best = null;
-    for (const ch of chapters) {
-        const s = dice(q, ch.label);
-        if (!best || s > best.score)
-            best = { chapter: ch, score: s };
-    }
-    if (!best || best.score < CHAPTER_MATCH_THRESHOLD)
-        return { chapter: chapters[0], score: 0 };
-    return best;
-}
-export function getAgendaSegmentTimecodes(dataNvs, finalPlayerNvs, agendaTitleOrObjet) {
-    const l1 = getLevel1Chapters(dataNvs);
-    if (!l1.length)
-        return null;
-    const best = pickBestLevel1ChapterForAgenda(l1, agendaTitleOrObjet);
-    if (!best)
-        return null;
-    const chapter = best.chapter;
-    const next = l1[chapter.index + 1] ?? null;
-    const start = getTimecodeForChapterId(finalPlayerNvs, chapter.id);
-    if (start == null)
-        return null;
-    const end = next ? getTimecodeForChapterId(finalPlayerNvs, next.id) : null;
-    return {
-        start,
-        end,
-        chapterId: chapter.id,
-        nextChapterId: next?.id ?? null,
-        score: best.score,
-    };
-}

package/lib/src/utils/weights_scoring_config.d.ts DELETED Viewed

	@@ -1,2 +0,0 @@
1	- import { MatchWeights } from "./scoring";
2	- export declare const weights: MatchWeights;

package/lib/src/utils/weights_scoring_config.js DELETED Viewed

@@ -1,15 +0,0 @@
-// BASED ON TESTS RESULTS
-// these weights yield good results in the benchmark test suite
-// aiming at 0 WRONG matches while maximizing HITs
-export const weights = {
-    wTitle: 0.7,
-    wOrg: 0.1,
-    wSalle: 0,
-    wTime: 0.2,
-    sameOrgBonus: 0.2,
-    minAccept: 0.5,
-    margin: 0.1,
-    titleMin: 0.2,
-    titleDominance: 0,
-    orgUncertainPenalty: 0.8,
-};