@tricoteuses/senat 2.21.3 → 2.21.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/config.d.ts CHANGED
@@ -1,21 +1,3 @@
1
1
  import "dotenv/config";
2
- import { z } from "zod";
3
- export declare const configSchema: z.ZodObject<{
4
- db: z.ZodObject<{
5
- host: z.ZodString;
6
- name: z.ZodString;
7
- password: z.ZodString;
8
- port: z.ZodCoercedNumber<unknown>;
9
- user: z.ZodString;
10
- }, z.core.$strip>;
11
- }, z.core.$strip>;
12
- declare const _default: {
13
- db: {
14
- host: string;
15
- name: string;
16
- password: string;
17
- port: number;
18
- user: string;
19
- };
20
- };
21
- export default _default;
2
+ declare const validConfig: any;
3
+ export default validConfig;
package/lib/config.js CHANGED
@@ -1,27 +1,16 @@
1
1
  import "dotenv/config";
2
- import { z } from "zod";
3
- const dbSchema = z.object({
4
- host: z.string().trim().min(1, "Must not be empty"),
5
- name: z.string().trim().min(1, "Must not be empty"),
6
- password: z.string().trim().min(1, "Must not be empty"),
7
- port: z.coerce.number().int().min(0).max(65535),
8
- user: z.string().trim().min(1, "Must not be empty"),
9
- });
10
- export const configSchema = z.object({
11
- db: dbSchema,
12
- });
2
+ import { validateConfig } from "./validators/config";
13
3
  const config = {
14
4
  db: {
15
5
  host: process.env["TRICOTEUSES_SENAT_DB_HOST"] || "localhost",
16
- name: process.env["TRICOTEUSES_SENAT_DB_NAME"] || "postgres",
17
6
  password: process.env["TRICOTEUSES_SENAT_DB_PASSWORD"] || "opendata",
18
7
  port: process.env["TRICOTEUSES_SENAT_DB_PORT"] || 5432,
19
8
  user: process.env["TRICOTEUSES_SENAT_DB_USER"] || "opendata",
20
9
  },
21
10
  };
22
- const result = configSchema.safeParse(config);
23
- if (!result.success) {
24
- console.error(`Error in configuration:\n${JSON.stringify(config, null, 2)}\nError:\n${JSON.stringify(result.error.issues, null, 2)}`);
11
+ const [validConfig, error] = validateConfig(config);
12
+ if (error !== null) {
13
+ console.error(`Error in configuration:\n${JSON.stringify(validConfig, null, 2)}\nError:\n${JSON.stringify(error, null, 2)}`);
25
14
  process.exit(-1);
26
15
  }
27
- export default result.data;
16
+ export default validConfig;
package/lib/loaders.js CHANGED
@@ -193,7 +193,7 @@ export function* iterLoadSenatDocuments(dataDir, session, documentType, options
193
193
  ...document,
194
194
  };
195
195
  const documentItem = {
196
- item: enrichedDocument,
196
+ item: enrichedDocument
197
197
  };
198
198
  if (document.url) {
199
199
  const documentName = path.parse(document.url).name;
@@ -47,7 +47,9 @@ const findAllAmendementsQuery = dbSenat
47
47
  .leftJoin("ameli.typses", "ameli.typses.id", "ameli.ses.typid")
48
48
  .leftJoin("ameli.nat", "ameli.txt_ameli.natid", "ameli.nat.id")
49
49
  .leftJoin("ameli.lec_ameli", "ameli.txt_ameli.lecid", "ameli.lec_ameli.id")
50
- .leftJoin("dosleg.texte", (join) => join.onRef("ameli.ses.ann", "=", "dosleg.texte.sesann").onRef("ameli.txt_ameli.numabs", "=", "dosleg.texte.texnum"))
50
+ .leftJoin("dosleg.texte", (join) => join
51
+ .onRef("ameli.ses.ann", "=", "dosleg.texte.sesann")
52
+ .onRef("ameli.txt_ameli.numabs", "=", "dosleg.texte.texnum"))
51
53
  .leftJoin("dosleg.lecass", "dosleg.texte.lecassidt", "dosleg.lecass.lecassidt")
52
54
  .leftJoin("ameli.mot", "ameli.amd.motid", "ameli.mot.id")
53
55
  .leftJoin("ameli.avicom", "ameli.amd.avcid", "ameli.avicom.id")
@@ -28,7 +28,10 @@ function documentsAttaches(rapportId) {
28
28
  .selectFrom("docatt")
29
29
  .leftJoin("typatt", "docatt.typattcod", "typatt.typattcod")
30
30
  .where("docatt.rapcod", "=", rapportId)
31
- .select(["docatt.docatturl as url", "typatt.typattlib as type_document"]));
31
+ .select([
32
+ "docatt.docatturl as url",
33
+ "typatt.typattlib as type_document"
34
+ ]));
32
35
  }
33
36
  function selectRapportAttributes({ eb, ref, val }) {
34
37
  return [
@@ -67,9 +70,14 @@ const queryRapports = baseQueryRapports
67
70
  .leftJoin("lecass", "lecass.lecassidt", "lecassrap.lecassidt")
68
71
  .leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
69
72
  .leftJoin("loi", "loi.loicod", "lecture.loicod")
70
- .select((args) => ["loi.signet as signet_dossier", ...selectRapportAttributes(args)]);
73
+ .select((args) => [
74
+ "loi.signet as signet_dossier",
75
+ ...selectRapportAttributes(args),
76
+ ]);
71
77
  export function rapports(lectureAssembleeId) {
72
- return jsonArrayFrom(baseQueryRapports.select(selectRapportAttributes).where("lecassrap.lecassidt", "=", lectureAssembleeId));
78
+ return jsonArrayFrom(baseQueryRapports
79
+ .select(selectRapportAttributes)
80
+ .where("lecassrap.lecassidt", "=", lectureAssembleeId));
73
81
  }
74
82
  function auteursTexte(texteId) {
75
83
  return jsonArrayFrom(dbSenat
@@ -126,9 +134,14 @@ const queryTextes = baseQueryTextes
126
134
  .leftJoin("lecass", "lecass.lecassidt", "texte.lecassidt")
127
135
  .leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
128
136
  .leftJoin("loi", "loi.loicod", "lecture.loicod")
129
- .select((args) => ["loi.signet as signet_dossier", ...selectTexteAttributes(args)]);
137
+ .select((args) => [
138
+ "loi.signet as signet_dossier",
139
+ ...selectTexteAttributes(args),
140
+ ]);
130
141
  export function textes(lectureAssembleeId) {
131
- return jsonArrayFrom(baseQueryTextes.select(selectTexteAttributes).where("texte.lecassidt", "=", lectureAssembleeId));
142
+ return jsonArrayFrom(baseQueryTextes
143
+ .select(selectTexteAttributes)
144
+ .where("texte.lecassidt", "=", lectureAssembleeId));
132
145
  }
133
146
  export function findAllTextes() {
134
147
  return queryTextes.stream();
@@ -203,10 +203,10 @@ export function buildActesLegislatifs(dossier) {
203
203
  numero: depotTexte.numero,
204
204
  uid: `${loiSignet}-${phasePrefix}-DEPOT`,
205
205
  session: lecAss.session,
206
- chambre: "SN",
206
+ chambre: 'SN',
207
207
  signet_dossier: loiSignet,
208
208
  texte_url: depotTexte.url,
209
- code_organisme: null,
209
+ code_organisme: null
210
210
  });
211
211
  }
212
212
  // =================================================================
@@ -225,7 +225,7 @@ export function buildActesLegislatifs(dossier) {
225
225
  adoption: rap.adoption,
226
226
  uid: `${loiSignet}-${phasePrefix}-COM`,
227
227
  session: lecAss.session,
228
- chambre: "SN",
228
+ chambre: 'SN',
229
229
  signet_dossier: loiSignet,
230
230
  texte_url: rap.url,
231
231
  });
@@ -247,9 +247,9 @@ export function buildActesLegislatifs(dossier) {
247
247
  libelle: `Discussion en séance publique`,
248
248
  uid: `${loiSignet}-${phasePrefix}-DEBATS-SEANCE`,
249
249
  session: lecAss.session,
250
- chambre: "SN",
250
+ chambre: 'SN',
251
251
  signet_dossier: loiSignet,
252
- code_organisme: null,
252
+ code_organisme: null
253
253
  });
254
254
  }
255
255
  }
@@ -281,10 +281,10 @@ export function buildActesLegislatifs(dossier) {
281
281
  adoption: libelleStatut,
282
282
  uid: `${loiSignet}-DEC-${texteFinal.numero}`,
283
283
  session: lecAss.session,
284
- chambre: "SN",
284
+ chambre: 'SN',
285
285
  signet_dossier: loiSignet,
286
286
  texte_url: texteFinal.url,
287
- code_organisme: null,
287
+ code_organisme: null
288
288
  });
289
289
  }
290
290
  }
@@ -294,19 +294,19 @@ export function buildActesLegislatifs(dossier) {
294
294
  // =================================================================
295
295
  if (dossier.date_decision_CoC) {
296
296
  actes.push({
297
- code_acte: "CC",
297
+ code_acte: 'CC',
298
298
  date: dossier.date_decision_CoC,
299
299
  libelle: `Décision du Conseil constitutionnel`,
300
300
  id: dossier.url_decision_CoC,
301
301
  uid: `${loiSignet}-CC`,
302
- chambre: "AN",
302
+ chambre: 'AN',
303
303
  signet_dossier: loiSignet,
304
304
  texte_url: dossier.url_decision_CoC || dossier.url_dossier_CoC,
305
305
  });
306
306
  }
307
307
  if (dossier.date_promulgation) {
308
308
  actes.push({
309
- code_acte: "PROM",
309
+ code_acte: 'PROM',
310
310
  date: dossier.date_promulgation,
311
311
  libelle: `Promulgation de la loi`,
312
312
  date_publication_JO: dossier.date_publication_JO,
@@ -314,7 +314,7 @@ export function buildActesLegislatifs(dossier) {
314
314
  url_legifrance: dossier.url_JO,
315
315
  id: dossier.url_JO,
316
316
  uid: `${loiSignet}-PROM`,
317
- chambre: "AN",
317
+ chambre: 'AN',
318
318
  signet_dossier: loiSignet,
319
319
  });
320
320
  }
@@ -113,10 +113,6 @@ export interface TamMinisteres {
113
113
  titreministre: string | null;
114
114
  }
115
115
  export interface TamQuestions {
116
- /**
117
- * Question caduque redéposée
118
- */
119
- caduque_redeposee: string | null;
120
116
  /**
121
117
  * Libellé de la circonscription
122
118
  */
@@ -2735,10 +2735,6 @@ export interface QuestionsTamMinisteres {
2735
2735
  titreministre: string | null;
2736
2736
  }
2737
2737
  export interface QuestionsTamQuestions {
2738
- /**
2739
- * Question caduque redéposée
2740
- */
2741
- caduque_redeposee: string | null;
2742
2738
  /**
2743
2739
  * Libellé de la circonscription
2744
2740
  */
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * AUTO-GENERATED FILE - DO NOT EDIT!
3
3
  *
4
- * This file was automatically generated by schemats v.2.20.33
4
+ * This file was automatically generated by schemats v.2.19.6
5
5
  * $ schemats generate -c postgres://username:password@localhost:5432/senat -t amd -t amdsen -t avicom -t avigvt -t cab -t com_ameli -t ent -t etatxt -t fbu -t grppol_ameli -t gvt -t intora -t irr -t lec_ameli -t mot -t nat -t orarol -t sai -t saisen -t sea -t sen_ameli -t ses -t sor -t sub -t txt_ameli -t typrect -t typses -t typsub -t w_nivrec -s ameli
6
6
  *
7
7
  */
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * AUTO-GENERATED FILE - DO NOT EDIT!
3
3
  *
4
- * This file was automatically generated by schemats v.2.20.33
4
+ * This file was automatically generated by schemats v.2.19.6
5
5
  * $ schemats generate -c postgres://username:password@localhost:5432/senat -t debats -t intdivers -t intpjl -t lecassdeb -t secdis -t secdivers -t syndeb -t typsec -s debats
6
6
  *
7
7
  */
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * AUTO-GENERATED FILE - DO NOT EDIT!
3
3
  *
4
- * This file was automatically generated by schemats v.2.20.33
4
+ * This file was automatically generated by schemats v.2.19.6
5
5
  * $ schemats generate -c postgres://username:password@localhost:5432/senat -t amescr -t ass -t aud -t auteur -t ble -t catrap -t corscr -t date_seance -t deccoc -t denrap -t doc -t docatt -t docsea -t ecr -t etaloi -t evtsea -t forpub -t gen -t lecass -t lecassrap -t lecture -t lnkrap -t loi -t loithe -t natloi -t org -t orgnomhis -t orippr -t oritxt -t posvot -t qua -t rap -t raporg -t rapthe -t rolsig -t scr -t ses -t stavot -t texte -t texte_ancien -t the -t titsen -t typatt -t typaut -t typdoc -t typevtsea -t typlec -t typloi -t typorg -t typrap -t typtxt -t typurl -t votsen -s dosleg
6
6
  *
7
7
  */
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * AUTO-GENERATED FILE - DO NOT EDIT!
3
3
  *
4
- * This file was automatically generated by schemats v.2.20.33
4
+ * This file was automatically generated by schemats v.2.19.6
5
5
  * $ schemats generate -c postgres://username:password@localhost:5432/senat -t etatquestion -t legquestion -t naturequestion -t sortquestion -t tam_ministeres -t tam_questions -t tam_reponses -t the -s questions
6
6
  *
7
7
  */
@@ -76,7 +76,6 @@ export interface tam_ministeres {
76
76
  titreministre: tam_ministeresFields.titreministre;
77
77
  }
78
78
  export declare namespace tam_questionsFields {
79
- type caduque_redeposee = string | null;
80
79
  type circonscription = string | null;
81
80
  type cirnum = number | null;
82
81
  type codequalite = string | null;
@@ -136,7 +135,6 @@ export declare namespace tam_questionsFields {
136
135
  type version = number | null;
137
136
  }
138
137
  export interface tam_questions {
139
- caduque_redeposee: tam_questionsFields.caduque_redeposee;
140
138
  circonscription: tam_questionsFields.circonscription;
141
139
  cirnum: tam_questionsFields.cirnum;
142
140
  codequalite: tam_questionsFields.codequalite;
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * AUTO-GENERATED FILE - DO NOT EDIT!
3
3
  *
4
- * This file was automatically generated by schemats v.2.20.33
4
+ * This file was automatically generated by schemats v.2.19.6
5
5
  * $ schemats generate -c postgres://username:password@localhost:5432/senat -t acr -t activite -t activite_audit -t activite_delegation -t activite_delegation_audit -t activite_loi -t activite_loi_audit -t activite_obligatoire -t activite_participant -t activite_participant_audit -t activite_senateur -t activite_senateur_audit -t activite_senateur_params -t activite_senateur_params_audit -t activites_liees -t activites_liees_audit -t actpro -t adhgrpsen -t adr -t adresse -t adrsen -t app -t assparint -t asster -t autgrpsen -t autorisation_profil -t autorisations -t avis_nomination_art13 -t basdes -t bur -t bur3r -t bur4r -t cad -t candid -t candidat -t candtodelete -t categorie_activite -t catpro -t catpro2e -t catterrit -t cible_categorie_periode -t cirdep -t com -t con -t cotgip -t csp -t cspfam -t databasechangelog -t databasechangeloglock -t delega -t derogation -t derogation_audit -t derogation_senateur -t derogation_senateur_audit -t design -t designoep -t designorg -t discou -t div -t dpt -t dpt_seuil_presence -t dptele -t dptele_files -t dptele_processing -t dptele_processing_type -t dpttypman -t droits_acces -t droits_acces_audit -t droits_type_derogation -t ele -t eleloc -t elucan -t eludep -t eludiv -t elueur -t elueur_apf -t elumet -t elureg -t elusen -t elusen2e -t elusen3r -t elusen4r -t elusencommu -t elusenpair -t eluter -t elutit -t eluvil -t etadebman -t etadebman3r -t etadebman4r -t etafinman -t etafinman3r -t etafinman4r -t etaprr -t etarpm -t etasen -t ext2e_bio -t ext2e_csp -t ext2e_mandats -t ext2e_minist -t extsencom_identite -t extsencom_mandat -t fonact_participant -t foncandid -t foncom -t fondelega -t fongrppol -t fongrpsen -t fonmemcom -t fonmemdelega -t fonmemextpar -t fonmemgrppol -t fonmemgrpsen -t fonmemorg -t fonorg -t grppol -t grppol4r -t grpsenami -t grpsenamiadh -t grpsenamiadhreq -t grpsenamiadhreqeta -t grpsenamiunadh -t grpsim -t gvt -t insee_pays2008 -t jhi_authority -t jhi_user -t jhi_user_authority -t lanetr -t libcom -t libdelega -t libgrppol -t libgrpsen -t liborg -t lisdptele -t mel -t memcom -t memcomsea -t memdelega -t memextpar -t memgrppol -t memgrpsen -t memorg -t met -t minind -t minist -t mis -t misetafin -t mismin -t misrapeta -t missen -t moddes -t mode_acces_elusenpair -t nation -t nationgrpsen -t nivlan -t org -t orgext -t orgextpres -t orgthe -t pairie_elusenpair -t parpol -t parpolglo -t participa -t pcs -t pcs24 -t pcs42 -t pcs8 -t pcscatpro -t per -t per_sen -t perapp -t periode_presence -t perpolglo -t perrol -t pj_justificatif -t pj_justificatif_audit -t plaind -t plan_table -t plsql_profiler_runs -t plsql_profiler_units -t poicon -t posvot -t presences_scrutin_surcharge -t presencesrevisionentity -t profil_applicatif -t qua -t rap_the -t reg -t reladr -t requetes_profil -t reslis -t resultat -t reu -t revchanges -t rne_mandat -t rne_mandat_diff -t rne_sen -t rne_sen_diff -t rne_type_mandat -t rol -t sal -t scr -t scrusoldelega -t sea -t sec -t sec2e -t secexe -t sen -t senbur -t senbur3r -t senbur4r -t sennom -t senpj -t sensim -t sentablenom -t senurl -t seuil_presence -t sirpas_elusen -t sirpas_fonmemcom -t sirpas_fonmemdelega -t sirpas_fonmemgrppol -t sirpas_memcom -t sirpas_memdelega -t sirpas_memgrppol -t sirpas_mvt -t sirpas_mvtcm -t sirpas_mvttri -t sirpas_sen -t sirpas_senbur -t sirpas_trf -t srv -t stajur -t stavot -t suspensiontravaux -t suspensiontravaux_audit -t sysage -t syscognos -t sysevt -t sysvar -t sysvar_sendev -t sysvar_senprod -t tapsenrevchanges -t tapsenrevisionentity -t telephone -t temval -t tenpol -t territ -t testoracle -t titele -t titelerne -t titmin -t titnob -t tmpsd -t toutes -t turelu -t typadr -t typapppol -t typbister -t typcandid -t type_activite -t type_activite_participant -t type_activite_rol -t type_activite_senateur -t type_categorie -t type_derogation -t type_droit_acces -t type_pj_justificatif -t type_rne_diff -t type_type_derogation -t typele -t typgrpsen -t typman -t typmin -t typmoddes -t typorg -t typorgext -t typparpol -t typpoicon -t typprs -t typprssta -t typscr -t typtel -t typurl -t typvoi -t uploaded_file -t uploaded_file_type -t validation -t validation_defview_profil -t validation_profil -t vercand -t verres -t votes -t zongeo -s sens
6
6
  *
7
7
  */
@@ -5,7 +5,7 @@ import path from "path";
5
5
  import pLimit from "p-limit";
6
6
  import * as git from "../git";
7
7
  import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
8
- import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
8
+ import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER } from "../loaders";
9
9
  import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findAllTextes, findAllRapports, } from "../model";
10
10
  import { processRapport, processTexte } from "./retrieve_documents";
11
11
  import { buildActesLegislatifs } from "../model/dosleg";
@@ -164,7 +164,7 @@ async function convertDatasetDosLeg(dataDir, options) {
164
164
  const actesBrutsNormalises = buildActesLegislatifs(dossier);
165
165
  const dossierWithActes = {
166
166
  ...dossier,
167
- actes_legislatifs: actesBrutsNormalises,
167
+ actes_legislatifs: actesBrutsNormalises
168
168
  };
169
169
  const dossierFile = `${dossier["signet"]}.json`;
170
170
  await fs.outputJSON(path.join(dossierReorganizedDir, dossierFile), dossierWithActes, { spaces: 2 });
@@ -236,7 +236,7 @@ async function convertTextes(dataDir, options) {
236
236
  const texteName = path.parse(texte["url"]).name;
237
237
  const texteDir = path.join(originalTextesDir, `${session}`, texteName);
238
238
  // oritxtcod = 1 corresponds to "Texte de loi déposé au Sénat"
239
- const hasExposeDesMotifs = texte["origine"] === "Sénat" && texte["ordre_origine"] === "1";
239
+ const hasExposeDesMotifs = texte["origine"] === 'Sénat' && texte["ordre_origine"] === '1';
240
240
  const metadata = {
241
241
  name: texteName,
242
242
  session: texte["session"],
@@ -271,8 +271,7 @@ async function retrieveOpenData() {
271
271
  process.env = {
272
272
  ...process.env,
273
273
  PGHOST: process.env["PGHOST"] || config.db.host,
274
- PGPORT: process.env["PGPORT"] || String(config.db.port),
275
- PGDATABASE: process.env["PGDATABASE"] || config.db.name,
274
+ PGPORT: process.env["PGPORT"] || config.db.port,
276
275
  PGUSER: process.env["PGUSER"] || config.db.user,
277
276
  PGPASSWORD: process.env["PGPASSWORD"] || config.db.password,
278
277
  };
@@ -1,4 +1,4 @@
1
- import { iterLoadSenatRapports, } from "../loaders";
1
+ import { iterLoadSenatRapports } from "../loaders";
2
2
  import commandLineArgs from "command-line-args";
3
3
  import { dataDirDefaultOption } from "./shared/cli_helpers";
4
4
  const optionsDefinitions = [dataDirDefaultOption];
package/lib/src/git.js CHANGED
@@ -2,6 +2,57 @@ import { execSync } from "node:child_process";
2
2
  import fs from "fs-extra";
3
3
  import path from "node:path";
4
4
  const MAXBUFFER = 50 * 1024 * 1024;
5
+ const GIT_LOCK_RETRY_DELAY_MS = 1000;
6
+ const GIT_LOCK_RETRY_COUNT = 5;
7
+ const GIT_LOCK_STALE_AFTER_MS = 2 * 60 * 1000;
8
+ function sleep(ms) {
9
+ Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
10
+ }
11
+ function getIndexLockPath(repositoryDir) {
12
+ return path.join(repositoryDir, ".git", "index.lock");
13
+ }
14
+ function isIndexLockError(error) {
15
+ const stderr = String(error?.stderr || "");
16
+ return /index\.lock': File exists\./.test(stderr);
17
+ }
18
+ function removeStaleIndexLock(repositoryDir) {
19
+ const lockPath = getIndexLockPath(repositoryDir);
20
+ if (!fs.existsSync(lockPath)) {
21
+ return false;
22
+ }
23
+ const stats = fs.statSync(lockPath);
24
+ const ageMs = Date.now() - stats.mtimeMs;
25
+ if (ageMs < GIT_LOCK_STALE_AFTER_MS) {
26
+ return false;
27
+ }
28
+ fs.removeSync(lockPath);
29
+ return true;
30
+ }
31
+ function execGitWithIndexLockRecovery(command, repositoryDir, options) {
32
+ let lockRemoved = false;
33
+ for (let attempt = 1; attempt <= GIT_LOCK_RETRY_COUNT; attempt++) {
34
+ try {
35
+ execSync(command, {
36
+ cwd: repositoryDir,
37
+ ...options,
38
+ });
39
+ return;
40
+ }
41
+ catch (error) {
42
+ if (!isIndexLockError(error)) {
43
+ throw error;
44
+ }
45
+ if (!lockRemoved && removeStaleIndexLock(repositoryDir)) {
46
+ lockRemoved = true;
47
+ continue;
48
+ }
49
+ if (attempt === GIT_LOCK_RETRY_COUNT) {
50
+ throw error;
51
+ }
52
+ sleep(GIT_LOCK_RETRY_DELAY_MS);
53
+ }
54
+ }
55
+ }
5
56
  export function initRepo(repositoryDir) {
6
57
  if (!fs.existsSync(path.join(repositoryDir, ".git"))) {
7
58
  fs.ensureDirSync(repositoryDir);
@@ -15,8 +66,7 @@ export function initRepo(repositoryDir) {
15
66
  }
16
67
  export function commit(repositoryDir, message) {
17
68
  initRepo(repositoryDir);
18
- execSync("git add .", {
19
- cwd: repositoryDir,
69
+ execGitWithIndexLockRecovery("git add .", repositoryDir, {
20
70
  env: process.env,
21
71
  encoding: "utf-8",
22
72
  stdio: ["ignore", "ignore", "pipe"],
@@ -43,10 +43,9 @@ export declare function iterLoadSenatComptesRendusCommissions(dataDir: string, s
43
43
  export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
44
44
  export declare function iterLoadSenatRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
45
45
  export declare function iterLoadSenatTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
46
- export declare function iterLoadSenatDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<DocumentResult>>;
47
46
  export declare function iterLoadSenatRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
48
- export declare function iterLoadSenatTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
49
- export declare function loadSenatTexteContent(dataDir: string, textePathFromDataset: string): IterItem<FlatTexte | null>;
47
+ export declare function iterLoadSenatTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult & Partial<FlatTexte>>>;
48
+ export declare function loadSenatTexteContent(dataDir: string, session: number | string | null | undefined, texteId: string): IterItem<FlatTexte | null>;
50
49
  export declare function loadSenatCompteRenduContent(dataDir: string, session: number, debatId: string | number): {
51
50
  item: CompteRendu | null;
52
51
  };
@@ -182,29 +182,6 @@ export function* iterLoadSenatTexteUrls(dataDir, session) {
182
182
  }
183
183
  }
184
184
  }
185
- export function* iterLoadSenatDocuments(dataDir, session, documentType, options = {}) {
186
- for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(dataDir, session, options)) {
187
- for (const lecture of dossierLegislatif["lectures"]) {
188
- const lecturesSenat = lecture.lectures_assemblee.filter((lectureAssemblee) => lectureAssemblee.assemblee === "Sénat");
189
- for (const lectureSenat of lecturesSenat) {
190
- for (const document of lectureSenat[documentType]) {
191
- const enrichedDocument = {
192
- signet_dossier: dossierLegislatif["signet"],
193
- ...document,
194
- };
195
- const documentItem = {
196
- item: enrichedDocument,
197
- };
198
- if (document.url) {
199
- const documentName = path.parse(document.url).name;
200
- documentItem.filePathFromDataset = path.join(`${document.session ?? UNDEFINED_SESSION}`, documentName, `${documentName}.pdf`);
201
- }
202
- yield documentItem;
203
- }
204
- }
205
- }
206
- }
207
- }
208
185
  export function* iterLoadSenatRapports(dataDir, session, options = {}) {
209
186
  for (const iterItem of iterLoadSenatItems(dataDir, RAPPORT_FOLDER, session, "original", options)) {
210
187
  if (iterItem.item?.["id"]) {
@@ -213,14 +190,22 @@ export function* iterLoadSenatRapports(dataDir, session, options = {}) {
213
190
  }
214
191
  }
215
192
  export function* iterLoadSenatTextes(dataDir, session, options = {}) {
216
- for (const iterItem of iterLoadSenatDocuments(dataDir, session, "textes", options)) {
217
- yield iterItem;
193
+ for (const iterItem of iterLoadSenatItems(dataDir, TEXTE_FOLDER, session, DATA_ORIGINAL_FOLDER, options)) {
194
+ if (!iterItem.item?.["id"]) {
195
+ continue;
196
+ }
197
+ const texteItem = iterItem;
198
+ const texte = texteItem.item;
199
+ const texteId = texte["id"];
200
+ const { item: texteContent } = loadSenatTexteContent(dataDir, texte["session"], texteId);
201
+ if (texteContent) {
202
+ texteItem.item.divisions = texteContent.divisions;
203
+ }
204
+ yield texteItem;
218
205
  }
219
206
  }
220
- export function loadSenatTexteContent(dataDir, textePathFromDataset) {
221
- const parsedTextePath = path.parse(textePathFromDataset);
222
- const jsonTexteName = `${parsedTextePath.name}.json`;
223
- const fullTextePath = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER, parsedTextePath.dir, jsonTexteName);
207
+ export function loadSenatTexteContent(dataDir, session, texteId) {
208
+ const fullTextePath = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER, String(session ?? UNDEFINED_SESSION), texteId, `${texteId}.json`);
224
209
  if (!fs.existsSync(fullTextePath)) {
225
210
  return { item: null };
226
211
  }
@@ -2,6 +2,9 @@ import { sql } from "kysely";
2
2
  import { dbSenat } from "../databases";
3
3
  import { concat, rtrim, toDateString } from "./util";
4
4
  import { jsonArrayFrom } from "kysely/helpers/postgres";
5
+ function stripTrailingHashes(expr) {
6
+ return sql `regexp_replace(${expr}, '#+$', '')`;
7
+ }
5
8
  function orderOrdreOrigineTexte(expr) {
6
9
  return sql `array_position(array['0','2','1'], ${expr})`;
7
10
  }
@@ -37,15 +40,15 @@ function selectRapportAttributes({ eb, ref, val }) {
37
40
  eb
38
41
  .case()
39
42
  .when("rap.rapurl", "is not", null)
40
- .then(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
43
+ .then(stripTrailingHashes(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`))
41
44
  .else(null)
42
45
  .end()
43
46
  .as("id"),
44
47
  eb
45
48
  .case()
46
49
  .when("rap.typurl", "=", "I")
47
- .then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
48
- .else(rtrim(ref("rap.rapurl")))
50
+ .then(stripTrailingHashes(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl")))))
51
+ .else(stripTrailingHashes(rtrim(ref("rap.rapurl"))))
49
52
  .end()
50
53
  .as("url"),
51
54
  rtrim(ref("denrap.libdenrap")).as("type"),
@@ -95,15 +98,15 @@ function selectTexteAttributes({ eb, ref, val }) {
95
98
  eb
96
99
  .case()
97
100
  .when("texte.texurl", "is not", null)
98
- .then(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
101
+ .then(stripTrailingHashes(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`))
99
102
  .else(null)
100
103
  .end()
101
104
  .as("id"),
102
105
  eb
103
106
  .case()
104
107
  .when("texte.typurl", "=", "I")
105
- .then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
106
- .else(rtrim(ref("texte.texurl")))
108
+ .then(stripTrailingHashes(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl")))))
109
+ .else(stripTrailingHashes(rtrim(ref("texte.texurl"))))
107
110
  .end()
108
111
  .as("url"),
109
112
  rtrim(ref("oritxt.oritxtlib")).as("origine"),
@@ -1,24 +1,6 @@
1
1
  import { JSDOM } from "jsdom";
2
- import { AKN_IDENTIFICATION_STRUCTURE_REGEXP, AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil";
2
+ import { AKN_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil";
3
3
  import { DivisionType, } from "../types/texte";
4
- function buildWorklow(metaElement) {
5
- const stepElements = metaElement.querySelectorAll("workflow step");
6
- const steps = [];
7
- for (const stepElement of stepElements) {
8
- const identification = stepElement.getAttribute("href") ?? "";
9
- const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
10
- steps.push({
11
- eId: stepElement.getAttribute("eId"),
12
- date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
13
- type: identificationParts?.["type"] || null,
14
- session: identificationParts?.["session"] || null,
15
- numero: identificationParts?.["numTexte"] || null,
16
- version: identificationParts?.["version"] ? identificationParts["version"] : null,
17
- outcome: stepElement.getAttribute("outcome"),
18
- });
19
- }
20
- return steps;
21
- }
22
4
  function buildDivision(node, index) {
23
5
  const eId = node.getAttribute("eId");
24
6
  const tag = node.nodeName;
@@ -147,7 +129,6 @@ export function transformTexte(document) {
147
129
  dateDepot: dateDepot ? new Date(dateDepot) : null,
148
130
  datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
149
131
  version: identificationParts?.["version"] ? identificationParts["version"] : null,
150
- workflow: metaElement ? buildWorklow(metaElement) : [],
151
132
  divisions: bodyElement ? flattenTexte(bodyElement) : [],
152
133
  };
153
134
  }
@@ -13,32 +13,31 @@ import { UNDEFINED_SESSION } from "../types/sessions";
13
13
  import { getSessionFromDate, getSessionFromSignet } from "./datautil";
14
14
  import { commonOptions } from "./shared/cli_helpers";
15
15
  import { ensureAndClearDir } from "./shared/util";
16
+ let exitCode = 10; // 0: some data changed, 10: no modification
16
17
  const optionsDefinitions = [...commonOptions];
17
18
  const options = commandLineArgs(optionsDefinitions);
18
19
  const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/";
19
20
  const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
20
21
  const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/";
21
22
  const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
22
- function commitGit(datasetDir, options, exitCode) {
23
+ function commitAndPushGit(datasetDir, options) {
23
24
  if (options.commit) {
24
25
  const errorCode = git.commitAndPush(datasetDir, "Nouvelle moisson", options.remote);
25
26
  if ((exitCode === 10 && errorCode !== 10) || (exitCode === 0 && errorCode !== 0 && errorCode !== 10)) {
26
27
  exitCode = errorCode;
27
28
  }
28
29
  }
29
- return exitCode;
30
30
  }
31
31
  async function convertData() {
32
32
  const dataDir = options["dataDir"];
33
33
  assert(dataDir, "Missing argument: data directory");
34
34
  const enabledDatasets = getEnabledDatasets(options["categories"]);
35
35
  console.time("data transformation time");
36
- let exitCode = 0;
37
36
  if (enabledDatasets & EnabledDatasets.Ameli) {
38
37
  try {
39
38
  await convertDatasetAmeli(dataDir, options);
40
39
  const ameliDir = path.join(dataDir, datasets.ameli.database);
41
- exitCode = commitGit(ameliDir, options, exitCode);
40
+ commitAndPushGit(ameliDir, options);
42
41
  }
43
42
  catch (error) {
44
43
  console.error(`Error converting Ameli dataset:`, error);
@@ -48,7 +47,7 @@ async function convertData() {
48
47
  try {
49
48
  await convertDatasetDebats(dataDir, options);
50
49
  const debatsDir = path.join(dataDir, datasets.debats.database);
51
- exitCode = commitGit(debatsDir, options, exitCode);
50
+ commitAndPushGit(debatsDir, options);
52
51
  }
53
52
  catch (error) {
54
53
  console.error(`Error converting Debats dataset:`, error);
@@ -58,7 +57,7 @@ async function convertData() {
58
57
  try {
59
58
  await convertDatasetDosLeg(dataDir, options);
60
59
  const doslegDir = path.join(dataDir, datasets.dosleg.database);
61
- exitCode = commitGit(doslegDir, options, exitCode);
60
+ commitAndPushGit(doslegDir, options);
62
61
  }
63
62
  catch (error) {
64
63
  console.error(`Error converting DosLeg dataset:`, error);
@@ -66,7 +65,7 @@ async function convertData() {
66
65
  try {
67
66
  await convertDatasetScrutins(dataDir, options);
68
67
  const scrutinsDir = path.join(dataDir, SCRUTINS_FOLDER);
69
- exitCode = commitGit(scrutinsDir, options, exitCode);
68
+ commitAndPushGit(scrutinsDir, options);
70
69
  }
71
70
  catch (error) {
72
71
  console.error(`Error converting Scrutins dataset:`, error);
@@ -76,7 +75,7 @@ async function convertData() {
76
75
  try {
77
76
  await convertDatasetQuestions(dataDir, options);
78
77
  const questionsDir = path.join(dataDir, datasets.questions.database);
79
- exitCode = commitGit(questionsDir, options, exitCode);
78
+ commitAndPushGit(questionsDir, options);
80
79
  }
81
80
  catch (error) {
82
81
  console.error(`Error converting Questions dataset:`, error);
@@ -86,7 +85,7 @@ async function convertData() {
86
85
  try {
87
86
  await convertDatasetSens(dataDir, options);
88
87
  const sensDir = path.join(dataDir, datasets.sens.database);
89
- exitCode = commitGit(sensDir, options, exitCode);
88
+ commitAndPushGit(sensDir, options);
90
89
  }
91
90
  catch (error) {
92
91
  console.error(`Error converting Sens dataset:`, error);
@@ -95,7 +94,6 @@ async function convertData() {
95
94
  if (!options["silent"]) {
96
95
  console.timeEnd("data transformation time");
97
96
  }
98
- return exitCode;
99
97
  }
100
98
  async function convertDatasetAmeli(dataDir, options) {
101
99
  const dataset = datasets.ameli;
@@ -347,7 +345,7 @@ async function convertDatasetSens(dataDir, options) {
347
345
  }
348
346
  }
349
347
  convertData()
350
- .then((exitCode) => process.exit(exitCode || 0))
348
+ .then(() => process.exit(exitCode))
351
349
  .catch((error) => {
352
350
  console.log(error);
353
351
  process.exit(1);
@@ -3,11 +3,13 @@ import commandLineArgs from "command-line-args";
3
3
  import fs from "fs-extra";
4
4
  import { DateTime } from "luxon";
5
5
  import path from "path";
6
+ import * as git from "../git";
6
7
  import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatRapportUrls, iterLoadSenatTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, } from "../loaders";
7
8
  import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../parsers/texte";
8
9
  import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
9
10
  import { commonOptions } from "./shared/cli_helpers";
10
11
  import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue } from "./shared/util";
12
+ let exitCode = 10; // 0: some data changed, 10: no modification
11
13
  const optionsDefinitions = [
12
14
  ...commonOptions,
13
15
  {
@@ -32,6 +34,14 @@ const optionsDefinitions = [
32
34
  const options = commandLineArgs(optionsDefinitions);
33
35
  const textDecoder = new TextDecoder("utf8");
34
36
  const today = DateTime.now();
37
+ function commitAndPushGit(datasetDir) {
38
+ if (options["commit"]) {
39
+ const errorCode = git.commitAndPush(datasetDir, "Nouvelle moisson", options["remote"]);
40
+ if ((exitCode === 10 && errorCode !== 10) || (exitCode === 0 && errorCode !== 0 && errorCode !== 10)) {
41
+ exitCode = errorCode;
42
+ }
43
+ }
44
+ }
35
45
  function isDocumentRecent(documentDate, daysThreshold) {
36
46
  if (!documentDate)
37
47
  return false;
@@ -117,6 +127,12 @@ export async function processTexte(texteMetadata, originalTextesDir, transformed
117
127
  const result = await processDocument(format.url.toString(), destPath, texteMetadata.date, options);
118
128
  // Specific logic: Parsing (Only applies to XML)
119
129
  if (format.isParseTarget && options.parseDocuments) {
130
+ if (!result.buffer && !(await fs.pathExists(destPath))) {
131
+ if (options.verbose) {
132
+ console.warn(`Skipping parse for missing XML file: ${destPath}`);
133
+ }
134
+ continue;
135
+ }
120
136
  await parseDocument(texteMetadata.session, transformedTextesDir, destPath, texteMetadata.name, result.buffer, exposeDesMotifsContent, options);
121
137
  }
122
138
  }
@@ -134,9 +150,10 @@ export async function processRapport(rapportMetadata, originalRapportsDir, optio
134
150
  await processDocument(format.url.toString(), destPath, rapportMetadata.date, options);
135
151
  }
136
152
  }
137
- async function retrieveTextes(dataDir, sessions) {
138
- const originalTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
139
- const transformedTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER);
153
+ async function processTextes(dataDir, sessions) {
154
+ const textesDir = path.join(dataDir, TEXTE_FOLDER);
155
+ const originalTextesDir = path.join(textesDir, DATA_ORIGINAL_FOLDER);
156
+ const transformedTextesDir = path.join(textesDir, DATA_TRANSFORMED_FOLDER);
140
157
  if (options["parseDocuments"]) {
141
158
  ensureAndClearDir(transformedTextesDir);
142
159
  }
@@ -153,9 +170,11 @@ async function retrieveTextes(dataDir, sessions) {
153
170
  await processTexte(texteMetadata, originalTextesDir, transformedTextesDir, dlOptions);
154
171
  }
155
172
  }
173
+ commitAndPushGit(textesDir);
156
174
  }
157
- async function retrieveRapports(dataDir, sessions) {
158
- const originalRapportsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
175
+ async function processRapports(dataDir, sessions) {
176
+ const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
177
+ const originalRapportsDir = path.join(rapportsDir, DATA_ORIGINAL_FOLDER);
159
178
  const dlOptions = {
160
179
  force: options["force"],
161
180
  silent: options["silent"],
@@ -168,6 +187,7 @@ async function retrieveRapports(dataDir, sessions) {
168
187
  await processRapport(rapportMetadata, originalRapportsDir, dlOptions);
169
188
  }
170
189
  }
190
+ commitAndPushGit(rapportsDir);
171
191
  }
172
192
  async function parseDocument(session, transformedTextesDir, textePath, texteName, texteBuffer, exposeDesMotifs = null, options = {}) {
173
193
  if (options.verbose) {
@@ -179,6 +199,12 @@ async function parseDocument(session, transformedTextesDir, textePath, texteName
179
199
  parsedTexte = parseTexte(texteXml);
180
200
  }
181
201
  else {
202
+ if (!(await fs.pathExists(textePath))) {
203
+ if (options.verbose) {
204
+ console.warn(`Skipping parse for missing XML file: ${textePath}`);
205
+ }
206
+ return null;
207
+ }
182
208
  parsedTexte = await parseTexteFromFile(textePath);
183
209
  }
184
210
  if (!parsedTexte)
@@ -200,10 +226,10 @@ async function main() {
200
226
  const sessions = getSessionsFromStart(options["fromSession"]);
201
227
  console.time("documents processing time");
202
228
  if (isOptionEmptyOrHasValue(options["types"], "textes")) {
203
- await retrieveTextes(dataDir, sessions);
229
+ await processTextes(dataDir, sessions);
204
230
  }
205
231
  if (isOptionEmptyOrHasValue(options["types"], "rapports")) {
206
- await retrieveRapports(dataDir, sessions);
232
+ await processRapports(dataDir, sessions);
207
233
  }
208
234
  if (!options["silent"]) {
209
235
  console.timeEnd("documents processing time");
@@ -211,7 +237,7 @@ async function main() {
211
237
  }
212
238
  if (process.argv[1].endsWith("retrieve_documents.ts")) {
213
239
  main()
214
- .then(() => process.exit(0))
240
+ .then(() => process.exit(exitCode))
215
241
  .catch((error) => {
216
242
  console.log(error);
217
243
  process.exit(1);
@@ -97,7 +97,7 @@ async function writeMatchArtifacts(args) {
97
97
  if (finalTxt)
98
98
  await fsp.writeFile(path.join(ctx.baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
99
99
  }
100
- async function processGroupedReunion(agenda, session, dataDir) {
100
+ async function processGroupedReunion(agenda, session, dataDir, lastByVideo) {
101
101
  // 1) GuardRails
102
102
  if (shouldSkipAgenda(agenda))
103
103
  return;
@@ -137,18 +137,30 @@ async function processGroupedReunion(agenda, session, dataDir) {
137
137
  session: ctx.session,
138
138
  options,
139
139
  writeIfChanged,
140
+ lastByVideo, // NEW
141
+ getAgendaSegmentTimecodes,
142
+ buildSenatVodMasterM3u8FromNvs,
143
+ });
144
+ await processBisIfNeeded({
145
+ agenda,
146
+ secondBest,
147
+ ctx,
148
+ skipDownload,
149
+ options,
150
+ lastByVideo,
151
+ writeIfChanged,
152
+ processOneReunionMatch,
140
153
  getAgendaSegmentTimecodes,
141
154
  buildSenatVodMasterM3u8FromNvs,
142
155
  });
143
- // 4) Optional BIS
144
- await processBisIfNeeded({ agenda, secondBest, ctx, skipDownload, options });
145
156
  }
146
157
  async function processAll(dataDir, sessions) {
147
158
  console.log("Process all Agendas and fetch video's url");
148
159
  for (const session of sessions) {
160
+ const lastByVideo = new Map();
149
161
  for (const { item: agenda } of iterLoadSenatAgendas(dataDir, session)) {
150
162
  try {
151
- await processGroupedReunion(agenda, session, dataDir);
163
+ await processGroupedReunion(agenda, session, dataDir, lastByVideo);
152
164
  }
153
165
  catch (e) {
154
166
  console.error(`[error] ${agenda?.uid ?? "unknown-uid"}:`, e?.message || e);
@@ -35,7 +35,6 @@ export interface FlatTexte {
35
35
  dateDepot: Date | null;
36
36
  datePublicationXml: Date | null;
37
37
  version: Version | null;
38
- workflow: Step[];
39
38
  divisions: Division[];
40
39
  exposeDesMotifs?: ExposeDesMotifs | null;
41
40
  }
@@ -1,5 +1,5 @@
1
1
  import { Reunion } from "../types/agenda";
2
- import { BestMatch, MatchContext } from "./types";
2
+ import { BestMatch, LastForVideo, MatchContext } from "./types";
3
3
  import { CommandLineOptions } from "command-line-args";
4
4
  export declare function processOneReunionMatch(args: {
5
5
  agenda: Reunion;
@@ -8,6 +8,11 @@ export declare function processOneReunionMatch(args: {
8
8
  session: number;
9
9
  options: Record<string, any>;
10
10
  writeIfChanged: (p: string, content: string) => Promise<void>;
11
+ lastByVideo: Map<string, {
12
+ agendaUid: string;
13
+ agendaJsonPath: string;
14
+ start: number;
15
+ }>;
11
16
  getAgendaSegmentTimecodes: (dataNvs: string, finalNvs: string, agendaKey: string) => {
12
17
  start: number;
13
18
  end: number | null;
@@ -20,5 +25,26 @@ export declare function processBisIfNeeded(args: {
20
25
  ctx: MatchContext;
21
26
  skipDownload: boolean;
22
27
  options: CommandLineOptions;
28
+ lastByVideo: Map<string, LastForVideo>;
29
+ writeIfChanged: (p: string, content: string) => Promise<void>;
30
+ processOneReunionMatch: (args: {
31
+ agenda: Reunion;
32
+ baseDir: string;
33
+ dataDir: string;
34
+ session: number;
35
+ options: Record<string, any>;
36
+ writeIfChanged: (p: string, content: string) => Promise<void>;
37
+ lastByVideo: Map<string, LastForVideo>;
38
+ getAgendaSegmentTimecodes: (dataNvs: string, finalNvs: string, agendaKey: string) => {
39
+ start: number;
40
+ end: number | null;
41
+ } | null;
42
+ buildSenatVodMasterM3u8FromNvs: (dataNvs: string) => string | null;
43
+ }) => Promise<void>;
44
+ getAgendaSegmentTimecodes: (dataNvs: string, finalNvs: string, agendaKey: string) => {
45
+ start: number;
46
+ end: number | null;
47
+ } | null;
48
+ buildSenatVodMasterM3u8FromNvs: (dataNvs: string) => string | null;
23
49
  }): Promise<void>;
24
50
  export declare function writeIfChanged(p: string, content: string): Promise<void>;
@@ -5,9 +5,8 @@ import { fetchText } from "./search";
5
5
  import fs from "fs-extra";
6
6
  import fsp from "fs/promises";
7
7
  import path from "path";
8
- import { getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs } from "../utils/nvs-parsing";
9
8
  export async function processOneReunionMatch(args) {
10
- const { agenda, baseDir, dataDir, session, options, writeIfChanged, getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs, } = args;
9
+ const { agenda, baseDir, dataDir, session, options, writeIfChanged, lastByVideo, getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs, } = args;
11
10
  const reunionUid = agenda.uid;
12
11
  let dataTxt;
13
12
  let finalTxt;
@@ -25,18 +24,40 @@ export async function processOneReunionMatch(args) {
25
24
  return;
26
25
  }
27
26
  const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
27
+ // Ensure it exists first.
28
+ if (!(await fs.pathExists(agendaJsonPath))) {
29
+ console.warn(`[warn] agenda file not found: ${agendaJsonPath}`);
30
+ return;
31
+ }
28
32
  let timecodeDebutVideo = null;
29
33
  let timecodeFinVideo = null;
30
34
  const agendaKey = agenda.titre || agenda.objet || "";
31
35
  const seg = getAgendaSegmentTimecodes(dataTxt, finalTxt, agendaKey);
32
36
  if (seg) {
33
37
  timecodeDebutVideo = seg.start;
34
- timecodeFinVideo = seg.end;
38
+ timecodeFinVideo = null; // keep open by default
35
39
  }
36
- if (!(await fs.pathExists(agendaJsonPath))) {
37
- console.warn(`[warn] agenda file not found: ${agendaJsonPath}`);
38
- return;
40
+ // 1) If we have a start timecode, close the previous agenda for this SAME master
41
+ if (timecodeDebutVideo != null) {
42
+ const prev = lastByVideo.get(master);
43
+ if (prev && prev.agendaJsonPath !== agendaJsonPath) {
44
+ // micro-safety: do not close with an earlier timecode
45
+ if (timecodeDebutVideo <= prev.start) {
46
+ console.warn(`[warn] timecode order inversion on same video: ` +
47
+ `prev=${prev.agendaUid}(${prev.start}s) -> cur=${agenda.uid}(${timecodeDebutVideo}s). ` +
48
+ `Skip closing prev to avoid negative segment.`);
49
+ }
50
+ else {
51
+ await patchAgendaTimecodeFin({
52
+ agendaJsonPath: prev.agendaJsonPath,
53
+ timecodeFinVideo: timecodeDebutVideo,
54
+ writeIfChanged,
55
+ });
56
+ }
57
+ }
58
+ lastByVideo.set(master, { agendaUid: agenda.uid, agendaJsonPath, start: timecodeDebutVideo });
39
59
  }
60
+ // 2) Update current agenda JSON with urlVideo (+ start/end if any)
40
61
  const raw = await fsp.readFile(agendaJsonPath, "utf-8");
41
62
  let obj;
42
63
  try {
@@ -49,7 +70,10 @@ export async function processOneReunionMatch(args) {
49
70
  const next = { ...obj, urlVideo: master, startTime: agenda.startTime };
50
71
  if (timecodeDebutVideo != null) {
51
72
  next.timecodeDebutVideo = timecodeDebutVideo;
52
- next.timecodeFinVideo = timecodeFinVideo ?? undefined;
73
+ if (timecodeFinVideo != null)
74
+ next.timecodeFinVideo = timecodeFinVideo;
75
+ else
76
+ delete next.timecodeFinVideo;
53
77
  }
54
78
  await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
55
79
  if (!options["silent"]) {
@@ -58,7 +82,7 @@ export async function processOneReunionMatch(args) {
58
82
  }
59
83
  }
60
84
  export async function processBisIfNeeded(args) {
61
- const { agenda, secondBest, ctx, skipDownload, options } = args;
85
+ const { agenda, secondBest, ctx, skipDownload, options, lastByVideo, writeIfChanged, processOneReunionMatch, getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs, } = args;
62
86
  if (skipDownload)
63
87
  return;
64
88
  if (!secondBest)
@@ -83,6 +107,7 @@ export async function processBisIfNeeded(args) {
83
107
  session: ctx.session,
84
108
  options,
85
109
  writeIfChanged,
110
+ lastByVideo,
86
111
  getAgendaSegmentTimecodes,
87
112
  buildSenatVodMasterM3u8FromNvs,
88
113
  });
@@ -128,3 +153,19 @@ export async function writeIfChanged(p, content) {
128
153
  }
129
154
  await fsp.writeFile(p, content, "utf-8");
130
155
  }
156
+ async function patchAgendaTimecodeFin(args) {
157
+ const { agendaJsonPath, timecodeFinVideo, writeIfChanged } = args;
158
+ if (!(await fs.pathExists(agendaJsonPath)))
159
+ return;
160
+ const raw = await fsp.readFile(agendaJsonPath, "utf-8");
161
+ let obj;
162
+ try {
163
+ obj = JSON.parse(raw);
164
+ }
165
+ catch {
166
+ console.warn(`[warn] invalid JSON in ${agendaJsonPath}`);
167
+ return;
168
+ }
169
+ const next = { ...obj, timecodeFinVideo };
170
+ await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
171
+ }
@@ -68,3 +68,8 @@ export type MatchContext = {
68
68
  reunionUid: string;
69
69
  agendaTs: number | null;
70
70
  };
71
+ export type LastForVideo = {
72
+ agendaUid: string;
73
+ agendaJsonPath: string;
74
+ start: number;
75
+ };
@@ -1,9 +1 @@
1
- import { z } from "zod";
2
- export declare const configSchema: z.ZodObject<{
3
- db: z.ZodObject<{
4
- host: z.ZodString;
5
- password: z.ZodString;
6
- user: z.ZodString;
7
- port: z.ZodCoercedNumber<unknown>;
8
- }, z.core.$strip>;
9
- }, z.core.$strip>;
1
+ export declare function validateConfig(data: any): [any, any];
@@ -1,10 +1,54 @@
1
- import { z } from "zod";
2
- const dbSchema = z.object({
3
- host: z.string().trim().min(1, "Must not be empty"),
4
- password: z.string().trim().min(1, "Must not be empty"),
5
- user: z.string().trim().min(1, "Must not be empty"),
6
- port: z.coerce.number().int().min(0).max(65535),
7
- });
8
- export const configSchema = z.object({
9
- db: dbSchema,
10
- });
1
+ import { validateChain, validateInteger, validateNonEmptyTrimmedString, validateNumber, validateOption, validateString, validateStringToNumber, validateTest, } from "@biryani/core";
2
+ function validateDb(data) {
3
+ if (data === null || data === undefined) {
4
+ return [data, "Missing value"];
5
+ }
6
+ if (typeof data !== "object") {
7
+ return [data, `Expected an object got "${typeof data}"`];
8
+ }
9
+ data = { ...data };
10
+ const errors = {};
11
+ const remainingKeys = new Set(Object.keys(data));
12
+ for (const key of ["host", "password", "user"]) {
13
+ remainingKeys.delete(key);
14
+ const [value, error] = validateNonEmptyTrimmedString(data[key]);
15
+ data[key] = value;
16
+ if (error !== null) {
17
+ errors[key] = error;
18
+ }
19
+ }
20
+ {
21
+ const key = "port";
22
+ remainingKeys.delete(key);
23
+ const [value, error] = validateChain(validateOption([validateString, validateStringToNumber], validateNumber), validateInteger, validateTest((value) => 0 <= value && value <= 65535, "Must be an integer between 0 and 65535"))(data[key]);
24
+ data[key] = value;
25
+ if (error !== null) {
26
+ errors[key] = error;
27
+ }
28
+ }
29
+ for (const key of remainingKeys) {
30
+ errors[key] = "Unexpected item";
31
+ }
32
+ return [data, Object.keys(errors).length === 0 ? null : errors];
33
+ }
34
+ export function validateConfig(data) {
35
+ if (data === null || data === undefined) {
36
+ return [data, "Missing value"];
37
+ }
38
+ if (typeof data !== "object") {
39
+ return [data, `Expected an object got "${typeof data}"`];
40
+ }
41
+ data = { ...data };
42
+ const errors = {};
43
+ const remainingKeys = new Set(Object.keys(data));
44
+ {
45
+ const key = "db";
46
+ remainingKeys.delete(key);
47
+ const [value, error] = validateDb(data[key]);
48
+ data[key] = value;
49
+ if (error !== null) {
50
+ errors[key] = error;
51
+ }
52
+ }
53
+ return [data, Object.keys(errors).length === 0 ? null : errors];
54
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.21.3",
3
+ "version": "2.21.5",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",
@@ -1,17 +0,0 @@
1
- export type L1Chapter = {
2
- id: string;
3
- label: string;
4
- index: number;
5
- };
6
- export declare function getLevel1Chapters(dataNvs: string): L1Chapter[];
7
- export declare function pickBestLevel1ChapterForAgenda(chapters: L1Chapter[], agendaTitle: string): {
8
- chapter: L1Chapter;
9
- score: number;
10
- } | null;
11
- export declare function getAgendaSegmentTimecodes(dataNvs: string, finalPlayerNvs: string, agendaTitleOrObjet: string): {
12
- start: number;
13
- end: number | null;
14
- chapterId: string;
15
- nextChapterId: string | null;
16
- score: number;
17
- } | null;
@@ -1,79 +0,0 @@
1
- import { XMLParser } from "fast-xml-parser";
2
- import { dice, normalize } from "./scoring";
3
- import { decodeHtmlEntities } from "./string_cleaning";
4
- const CHAPTER_MATCH_THRESHOLD = 0.5;
5
- const xmlParser = new XMLParser({
6
- ignoreAttributes: false,
7
- attributeNamePrefix: "@_",
8
- });
9
- function getTimecodeForChapterId(finalPlayerNvs, chapterId) {
10
- const xml = xmlParser.parse(finalPlayerNvs);
11
- const synchros = xml?.player?.synchro;
12
- if (!synchros)
13
- return null;
14
- const synchsArray = Array.isArray(synchros) ? synchros : [synchros];
15
- const match = synchsArray.find((s) => String(s["@_id"]) === String(chapterId));
16
- if (!match)
17
- return null;
18
- const rawTimecode = match["@_timecode"];
19
- if (rawTimecode == null)
20
- return null;
21
- const ms = Number(rawTimecode);
22
- if (Number.isNaN(ms))
23
- return null;
24
- return Math.floor(ms / 1000);
25
- }
26
- function toArray(v) {
27
- if (!v)
28
- return [];
29
- return Array.isArray(v) ? v : [v];
30
- }
31
- export function getLevel1Chapters(dataNvs) {
32
- const xml = xmlParser.parse(dataNvs);
33
- const root = xml?.data?.chapters?.chapter ?? xml?.chapters?.chapter;
34
- const roots = toArray(root);
35
- return roots
36
- .map((ch, i) => {
37
- const id = ch?.id ?? ch?.["@_id"];
38
- const labelRaw = ch?.label ?? ch?.["@_label"] ?? "";
39
- return {
40
- id: String(id),
41
- label: decodeHtmlEntities(String(labelRaw)).trim(),
42
- index: i,
43
- };
44
- })
45
- .filter((c) => c.id && c.label);
46
- }
47
- export function pickBestLevel1ChapterForAgenda(chapters, agendaTitle) {
48
- const q = normalize(agendaTitle);
49
- let best = null;
50
- for (const ch of chapters) {
51
- const s = dice(q, ch.label);
52
- if (!best || s > best.score)
53
- best = { chapter: ch, score: s };
54
- }
55
- if (!best || best.score < CHAPTER_MATCH_THRESHOLD)
56
- return { chapter: chapters[0], score: 0 };
57
- return best;
58
- }
59
- export function getAgendaSegmentTimecodes(dataNvs, finalPlayerNvs, agendaTitleOrObjet) {
60
- const l1 = getLevel1Chapters(dataNvs);
61
- if (!l1.length)
62
- return null;
63
- const best = pickBestLevel1ChapterForAgenda(l1, agendaTitleOrObjet);
64
- if (!best)
65
- return null;
66
- const chapter = best.chapter;
67
- const next = l1[chapter.index + 1] ?? null;
68
- const start = getTimecodeForChapterId(finalPlayerNvs, chapter.id);
69
- if (start == null)
70
- return null;
71
- const end = next ? getTimecodeForChapterId(finalPlayerNvs, next.id) : null;
72
- return {
73
- start,
74
- end,
75
- chapterId: chapter.id,
76
- nextChapterId: next?.id ?? null,
77
- score: best.score,
78
- };
79
- }
@@ -1,2 +0,0 @@
1
- import { MatchWeights } from "./scoring";
2
- export declare const weights: MatchWeights;
@@ -1,15 +0,0 @@
1
- // BASED ON TESTS RESULTS
2
- // these weights yield good results in the benchmark test suite
3
- // aiming at 0 WRONG matches while maximizing HITs
4
- export const weights = {
5
- wTitle: 0.7,
6
- wOrg: 0.1,
7
- wSalle: 0,
8
- wTime: 0.2,
9
- sameOrgBonus: 0.2,
10
- minAccept: 0.5,
11
- margin: 0.1,
12
- titleMin: 0.2,
13
- titleDominance: 0,
14
- orgUncertainPenalty: 0.8,
15
- };