@tricoteuses/senat 2.21.3 → 2.21.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/config.d.ts +2 -20
- package/lib/config.js +5 -16
- package/lib/loaders.js +1 -1
- package/lib/model/ameli.js +3 -1
- package/lib/model/documents.js +18 -5
- package/lib/model/dosleg.js +11 -11
- package/lib/raw_types/questions.d.ts +0 -4
- package/lib/raw_types/senat.d.ts +0 -4
- package/lib/raw_types_schemats/ameli.d.ts +1 -1
- package/lib/raw_types_schemats/debats.d.ts +1 -1
- package/lib/raw_types_schemats/dosleg.d.ts +1 -1
- package/lib/raw_types_schemats/questions.d.ts +1 -3
- package/lib/raw_types_schemats/sens.d.ts +1 -1
- package/lib/scripts/convert_data.js +3 -3
- package/lib/scripts/retrieve_open_data.js +1 -2
- package/lib/scripts/test_iter_load.js +1 -1
- package/lib/src/git.js +52 -2
- package/lib/src/loaders.d.ts +2 -3
- package/lib/src/loaders.js +14 -29
- package/lib/src/model/documents.js +9 -6
- package/lib/src/parsers/texte.js +1 -20
- package/lib/src/scripts/convert_data.js +9 -11
- package/lib/src/scripts/retrieve_documents.js +34 -8
- package/lib/src/scripts/retrieve_videos.js +16 -4
- package/lib/src/types/texte.d.ts +0 -1
- package/lib/src/videos/pipeline.d.ts +27 -1
- package/lib/src/videos/pipeline.js +49 -8
- package/lib/src/videos/types.d.ts +5 -0
- package/lib/validators/config.d.ts +1 -9
- package/lib/validators/config.js +54 -10
- package/package.json +1 -1
- package/lib/src/utils/nvs-timecode.d.ts +0 -17
- package/lib/src/utils/nvs-timecode.js +0 -79
- package/lib/src/utils/weights_scoring_config.d.ts +0 -2
- package/lib/src/utils/weights_scoring_config.js +0 -15
package/lib/config.d.ts
CHANGED
|
@@ -1,21 +1,3 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
|
|
3
|
-
export
|
|
4
|
-
db: z.ZodObject<{
|
|
5
|
-
host: z.ZodString;
|
|
6
|
-
name: z.ZodString;
|
|
7
|
-
password: z.ZodString;
|
|
8
|
-
port: z.ZodCoercedNumber<unknown>;
|
|
9
|
-
user: z.ZodString;
|
|
10
|
-
}, z.core.$strip>;
|
|
11
|
-
}, z.core.$strip>;
|
|
12
|
-
declare const _default: {
|
|
13
|
-
db: {
|
|
14
|
-
host: string;
|
|
15
|
-
name: string;
|
|
16
|
-
password: string;
|
|
17
|
-
port: number;
|
|
18
|
-
user: string;
|
|
19
|
-
};
|
|
20
|
-
};
|
|
21
|
-
export default _default;
|
|
2
|
+
declare const validConfig: any;
|
|
3
|
+
export default validConfig;
|
package/lib/config.js
CHANGED
|
@@ -1,27 +1,16 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
import {
|
|
3
|
-
const dbSchema = z.object({
|
|
4
|
-
host: z.string().trim().min(1, "Must not be empty"),
|
|
5
|
-
name: z.string().trim().min(1, "Must not be empty"),
|
|
6
|
-
password: z.string().trim().min(1, "Must not be empty"),
|
|
7
|
-
port: z.coerce.number().int().min(0).max(65535),
|
|
8
|
-
user: z.string().trim().min(1, "Must not be empty"),
|
|
9
|
-
});
|
|
10
|
-
export const configSchema = z.object({
|
|
11
|
-
db: dbSchema,
|
|
12
|
-
});
|
|
2
|
+
import { validateConfig } from "./validators/config";
|
|
13
3
|
const config = {
|
|
14
4
|
db: {
|
|
15
5
|
host: process.env["TRICOTEUSES_SENAT_DB_HOST"] || "localhost",
|
|
16
|
-
name: process.env["TRICOTEUSES_SENAT_DB_NAME"] || "postgres",
|
|
17
6
|
password: process.env["TRICOTEUSES_SENAT_DB_PASSWORD"] || "opendata",
|
|
18
7
|
port: process.env["TRICOTEUSES_SENAT_DB_PORT"] || 5432,
|
|
19
8
|
user: process.env["TRICOTEUSES_SENAT_DB_USER"] || "opendata",
|
|
20
9
|
},
|
|
21
10
|
};
|
|
22
|
-
const
|
|
23
|
-
if (
|
|
24
|
-
console.error(`Error in configuration:\n${JSON.stringify(
|
|
11
|
+
const [validConfig, error] = validateConfig(config);
|
|
12
|
+
if (error !== null) {
|
|
13
|
+
console.error(`Error in configuration:\n${JSON.stringify(validConfig, null, 2)}\nError:\n${JSON.stringify(error, null, 2)}`);
|
|
25
14
|
process.exit(-1);
|
|
26
15
|
}
|
|
27
|
-
export default
|
|
16
|
+
export default validConfig;
|
package/lib/loaders.js
CHANGED
|
@@ -193,7 +193,7 @@ export function* iterLoadSenatDocuments(dataDir, session, documentType, options
|
|
|
193
193
|
...document,
|
|
194
194
|
};
|
|
195
195
|
const documentItem = {
|
|
196
|
-
item: enrichedDocument
|
|
196
|
+
item: enrichedDocument
|
|
197
197
|
};
|
|
198
198
|
if (document.url) {
|
|
199
199
|
const documentName = path.parse(document.url).name;
|
package/lib/model/ameli.js
CHANGED
|
@@ -47,7 +47,9 @@ const findAllAmendementsQuery = dbSenat
|
|
|
47
47
|
.leftJoin("ameli.typses", "ameli.typses.id", "ameli.ses.typid")
|
|
48
48
|
.leftJoin("ameli.nat", "ameli.txt_ameli.natid", "ameli.nat.id")
|
|
49
49
|
.leftJoin("ameli.lec_ameli", "ameli.txt_ameli.lecid", "ameli.lec_ameli.id")
|
|
50
|
-
.leftJoin("dosleg.texte", (join) => join
|
|
50
|
+
.leftJoin("dosleg.texte", (join) => join
|
|
51
|
+
.onRef("ameli.ses.ann", "=", "dosleg.texte.sesann")
|
|
52
|
+
.onRef("ameli.txt_ameli.numabs", "=", "dosleg.texte.texnum"))
|
|
51
53
|
.leftJoin("dosleg.lecass", "dosleg.texte.lecassidt", "dosleg.lecass.lecassidt")
|
|
52
54
|
.leftJoin("ameli.mot", "ameli.amd.motid", "ameli.mot.id")
|
|
53
55
|
.leftJoin("ameli.avicom", "ameli.amd.avcid", "ameli.avicom.id")
|
package/lib/model/documents.js
CHANGED
|
@@ -28,7 +28,10 @@ function documentsAttaches(rapportId) {
|
|
|
28
28
|
.selectFrom("docatt")
|
|
29
29
|
.leftJoin("typatt", "docatt.typattcod", "typatt.typattcod")
|
|
30
30
|
.where("docatt.rapcod", "=", rapportId)
|
|
31
|
-
.select([
|
|
31
|
+
.select([
|
|
32
|
+
"docatt.docatturl as url",
|
|
33
|
+
"typatt.typattlib as type_document"
|
|
34
|
+
]));
|
|
32
35
|
}
|
|
33
36
|
function selectRapportAttributes({ eb, ref, val }) {
|
|
34
37
|
return [
|
|
@@ -67,9 +70,14 @@ const queryRapports = baseQueryRapports
|
|
|
67
70
|
.leftJoin("lecass", "lecass.lecassidt", "lecassrap.lecassidt")
|
|
68
71
|
.leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
|
|
69
72
|
.leftJoin("loi", "loi.loicod", "lecture.loicod")
|
|
70
|
-
.select((args) => [
|
|
73
|
+
.select((args) => [
|
|
74
|
+
"loi.signet as signet_dossier",
|
|
75
|
+
...selectRapportAttributes(args),
|
|
76
|
+
]);
|
|
71
77
|
export function rapports(lectureAssembleeId) {
|
|
72
|
-
return jsonArrayFrom(baseQueryRapports
|
|
78
|
+
return jsonArrayFrom(baseQueryRapports
|
|
79
|
+
.select(selectRapportAttributes)
|
|
80
|
+
.where("lecassrap.lecassidt", "=", lectureAssembleeId));
|
|
73
81
|
}
|
|
74
82
|
function auteursTexte(texteId) {
|
|
75
83
|
return jsonArrayFrom(dbSenat
|
|
@@ -126,9 +134,14 @@ const queryTextes = baseQueryTextes
|
|
|
126
134
|
.leftJoin("lecass", "lecass.lecassidt", "texte.lecassidt")
|
|
127
135
|
.leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
|
|
128
136
|
.leftJoin("loi", "loi.loicod", "lecture.loicod")
|
|
129
|
-
.select((args) => [
|
|
137
|
+
.select((args) => [
|
|
138
|
+
"loi.signet as signet_dossier",
|
|
139
|
+
...selectTexteAttributes(args),
|
|
140
|
+
]);
|
|
130
141
|
export function textes(lectureAssembleeId) {
|
|
131
|
-
return jsonArrayFrom(baseQueryTextes
|
|
142
|
+
return jsonArrayFrom(baseQueryTextes
|
|
143
|
+
.select(selectTexteAttributes)
|
|
144
|
+
.where("texte.lecassidt", "=", lectureAssembleeId));
|
|
132
145
|
}
|
|
133
146
|
export function findAllTextes() {
|
|
134
147
|
return queryTextes.stream();
|
package/lib/model/dosleg.js
CHANGED
|
@@ -203,10 +203,10 @@ export function buildActesLegislatifs(dossier) {
|
|
|
203
203
|
numero: depotTexte.numero,
|
|
204
204
|
uid: `${loiSignet}-${phasePrefix}-DEPOT`,
|
|
205
205
|
session: lecAss.session,
|
|
206
|
-
chambre:
|
|
206
|
+
chambre: 'SN',
|
|
207
207
|
signet_dossier: loiSignet,
|
|
208
208
|
texte_url: depotTexte.url,
|
|
209
|
-
code_organisme: null
|
|
209
|
+
code_organisme: null
|
|
210
210
|
});
|
|
211
211
|
}
|
|
212
212
|
// =================================================================
|
|
@@ -225,7 +225,7 @@ export function buildActesLegislatifs(dossier) {
|
|
|
225
225
|
adoption: rap.adoption,
|
|
226
226
|
uid: `${loiSignet}-${phasePrefix}-COM`,
|
|
227
227
|
session: lecAss.session,
|
|
228
|
-
chambre:
|
|
228
|
+
chambre: 'SN',
|
|
229
229
|
signet_dossier: loiSignet,
|
|
230
230
|
texte_url: rap.url,
|
|
231
231
|
});
|
|
@@ -247,9 +247,9 @@ export function buildActesLegislatifs(dossier) {
|
|
|
247
247
|
libelle: `Discussion en séance publique`,
|
|
248
248
|
uid: `${loiSignet}-${phasePrefix}-DEBATS-SEANCE`,
|
|
249
249
|
session: lecAss.session,
|
|
250
|
-
chambre:
|
|
250
|
+
chambre: 'SN',
|
|
251
251
|
signet_dossier: loiSignet,
|
|
252
|
-
code_organisme: null
|
|
252
|
+
code_organisme: null
|
|
253
253
|
});
|
|
254
254
|
}
|
|
255
255
|
}
|
|
@@ -281,10 +281,10 @@ export function buildActesLegislatifs(dossier) {
|
|
|
281
281
|
adoption: libelleStatut,
|
|
282
282
|
uid: `${loiSignet}-DEC-${texteFinal.numero}`,
|
|
283
283
|
session: lecAss.session,
|
|
284
|
-
chambre:
|
|
284
|
+
chambre: 'SN',
|
|
285
285
|
signet_dossier: loiSignet,
|
|
286
286
|
texte_url: texteFinal.url,
|
|
287
|
-
code_organisme: null
|
|
287
|
+
code_organisme: null
|
|
288
288
|
});
|
|
289
289
|
}
|
|
290
290
|
}
|
|
@@ -294,19 +294,19 @@ export function buildActesLegislatifs(dossier) {
|
|
|
294
294
|
// =================================================================
|
|
295
295
|
if (dossier.date_decision_CoC) {
|
|
296
296
|
actes.push({
|
|
297
|
-
code_acte:
|
|
297
|
+
code_acte: 'CC',
|
|
298
298
|
date: dossier.date_decision_CoC,
|
|
299
299
|
libelle: `Décision du Conseil constitutionnel`,
|
|
300
300
|
id: dossier.url_decision_CoC,
|
|
301
301
|
uid: `${loiSignet}-CC`,
|
|
302
|
-
chambre:
|
|
302
|
+
chambre: 'AN',
|
|
303
303
|
signet_dossier: loiSignet,
|
|
304
304
|
texte_url: dossier.url_decision_CoC || dossier.url_dossier_CoC,
|
|
305
305
|
});
|
|
306
306
|
}
|
|
307
307
|
if (dossier.date_promulgation) {
|
|
308
308
|
actes.push({
|
|
309
|
-
code_acte:
|
|
309
|
+
code_acte: 'PROM',
|
|
310
310
|
date: dossier.date_promulgation,
|
|
311
311
|
libelle: `Promulgation de la loi`,
|
|
312
312
|
date_publication_JO: dossier.date_publication_JO,
|
|
@@ -314,7 +314,7 @@ export function buildActesLegislatifs(dossier) {
|
|
|
314
314
|
url_legifrance: dossier.url_JO,
|
|
315
315
|
id: dossier.url_JO,
|
|
316
316
|
uid: `${loiSignet}-PROM`,
|
|
317
|
-
chambre:
|
|
317
|
+
chambre: 'AN',
|
|
318
318
|
signet_dossier: loiSignet,
|
|
319
319
|
});
|
|
320
320
|
}
|
package/lib/raw_types/senat.d.ts
CHANGED
|
@@ -2735,10 +2735,6 @@ export interface QuestionsTamMinisteres {
|
|
|
2735
2735
|
titreministre: string | null;
|
|
2736
2736
|
}
|
|
2737
2737
|
export interface QuestionsTamQuestions {
|
|
2738
|
-
/**
|
|
2739
|
-
* Question caduque redéposée
|
|
2740
|
-
*/
|
|
2741
|
-
caduque_redeposee: string | null;
|
|
2742
2738
|
/**
|
|
2743
2739
|
* Libellé de la circonscription
|
|
2744
2740
|
*/
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* AUTO-GENERATED FILE - DO NOT EDIT!
|
|
3
3
|
*
|
|
4
|
-
* This file was automatically generated by schemats v.2.
|
|
4
|
+
* This file was automatically generated by schemats v.2.19.6
|
|
5
5
|
* $ schemats generate -c postgres://username:password@localhost:5432/senat -t amd -t amdsen -t avicom -t avigvt -t cab -t com_ameli -t ent -t etatxt -t fbu -t grppol_ameli -t gvt -t intora -t irr -t lec_ameli -t mot -t nat -t orarol -t sai -t saisen -t sea -t sen_ameli -t ses -t sor -t sub -t txt_ameli -t typrect -t typses -t typsub -t w_nivrec -s ameli
|
|
6
6
|
*
|
|
7
7
|
*/
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* AUTO-GENERATED FILE - DO NOT EDIT!
|
|
3
3
|
*
|
|
4
|
-
* This file was automatically generated by schemats v.2.
|
|
4
|
+
* This file was automatically generated by schemats v.2.19.6
|
|
5
5
|
* $ schemats generate -c postgres://username:password@localhost:5432/senat -t debats -t intdivers -t intpjl -t lecassdeb -t secdis -t secdivers -t syndeb -t typsec -s debats
|
|
6
6
|
*
|
|
7
7
|
*/
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* AUTO-GENERATED FILE - DO NOT EDIT!
|
|
3
3
|
*
|
|
4
|
-
* This file was automatically generated by schemats v.2.
|
|
4
|
+
* This file was automatically generated by schemats v.2.19.6
|
|
5
5
|
* $ schemats generate -c postgres://username:password@localhost:5432/senat -t amescr -t ass -t aud -t auteur -t ble -t catrap -t corscr -t date_seance -t deccoc -t denrap -t doc -t docatt -t docsea -t ecr -t etaloi -t evtsea -t forpub -t gen -t lecass -t lecassrap -t lecture -t lnkrap -t loi -t loithe -t natloi -t org -t orgnomhis -t orippr -t oritxt -t posvot -t qua -t rap -t raporg -t rapthe -t rolsig -t scr -t ses -t stavot -t texte -t texte_ancien -t the -t titsen -t typatt -t typaut -t typdoc -t typevtsea -t typlec -t typloi -t typorg -t typrap -t typtxt -t typurl -t votsen -s dosleg
|
|
6
6
|
*
|
|
7
7
|
*/
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* AUTO-GENERATED FILE - DO NOT EDIT!
|
|
3
3
|
*
|
|
4
|
-
* This file was automatically generated by schemats v.2.
|
|
4
|
+
* This file was automatically generated by schemats v.2.19.6
|
|
5
5
|
* $ schemats generate -c postgres://username:password@localhost:5432/senat -t etatquestion -t legquestion -t naturequestion -t sortquestion -t tam_ministeres -t tam_questions -t tam_reponses -t the -s questions
|
|
6
6
|
*
|
|
7
7
|
*/
|
|
@@ -76,7 +76,6 @@ export interface tam_ministeres {
|
|
|
76
76
|
titreministre: tam_ministeresFields.titreministre;
|
|
77
77
|
}
|
|
78
78
|
export declare namespace tam_questionsFields {
|
|
79
|
-
type caduque_redeposee = string | null;
|
|
80
79
|
type circonscription = string | null;
|
|
81
80
|
type cirnum = number | null;
|
|
82
81
|
type codequalite = string | null;
|
|
@@ -136,7 +135,6 @@ export declare namespace tam_questionsFields {
|
|
|
136
135
|
type version = number | null;
|
|
137
136
|
}
|
|
138
137
|
export interface tam_questions {
|
|
139
|
-
caduque_redeposee: tam_questionsFields.caduque_redeposee;
|
|
140
138
|
circonscription: tam_questionsFields.circonscription;
|
|
141
139
|
cirnum: tam_questionsFields.cirnum;
|
|
142
140
|
codequalite: tam_questionsFields.codequalite;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* AUTO-GENERATED FILE - DO NOT EDIT!
|
|
3
3
|
*
|
|
4
|
-
* This file was automatically generated by schemats v.2.
|
|
4
|
+
* This file was automatically generated by schemats v.2.19.6
|
|
5
5
|
* $ schemats generate -c postgres://username:password@localhost:5432/senat -t acr -t activite -t activite_audit -t activite_delegation -t activite_delegation_audit -t activite_loi -t activite_loi_audit -t activite_obligatoire -t activite_participant -t activite_participant_audit -t activite_senateur -t activite_senateur_audit -t activite_senateur_params -t activite_senateur_params_audit -t activites_liees -t activites_liees_audit -t actpro -t adhgrpsen -t adr -t adresse -t adrsen -t app -t assparint -t asster -t autgrpsen -t autorisation_profil -t autorisations -t avis_nomination_art13 -t basdes -t bur -t bur3r -t bur4r -t cad -t candid -t candidat -t candtodelete -t categorie_activite -t catpro -t catpro2e -t catterrit -t cible_categorie_periode -t cirdep -t com -t con -t cotgip -t csp -t cspfam -t databasechangelog -t databasechangeloglock -t delega -t derogation -t derogation_audit -t derogation_senateur -t derogation_senateur_audit -t design -t designoep -t designorg -t discou -t div -t dpt -t dpt_seuil_presence -t dptele -t dptele_files -t dptele_processing -t dptele_processing_type -t dpttypman -t droits_acces -t droits_acces_audit -t droits_type_derogation -t ele -t eleloc -t elucan -t eludep -t eludiv -t elueur -t elueur_apf -t elumet -t elureg -t elusen -t elusen2e -t elusen3r -t elusen4r -t elusencommu -t elusenpair -t eluter -t elutit -t eluvil -t etadebman -t etadebman3r -t etadebman4r -t etafinman -t etafinman3r -t etafinman4r -t etaprr -t etarpm -t etasen -t ext2e_bio -t ext2e_csp -t ext2e_mandats -t ext2e_minist -t extsencom_identite -t extsencom_mandat -t fonact_participant -t foncandid -t foncom -t fondelega -t fongrppol -t fongrpsen -t fonmemcom -t fonmemdelega -t fonmemextpar -t fonmemgrppol -t fonmemgrpsen -t fonmemorg -t fonorg -t grppol -t grppol4r -t grpsenami -t grpsenamiadh -t grpsenamiadhreq -t grpsenamiadhreqeta -t grpsenamiunadh -t grpsim -t gvt -t insee_pays2008 -t jhi_authority -t jhi_user -t jhi_user_authority -t lanetr -t libcom -t libdelega -t libgrppol -t libgrpsen -t liborg -t lisdptele -t mel -t memcom -t memcomsea -t memdelega -t memextpar -t memgrppol -t memgrpsen -t memorg -t met -t minind -t minist -t mis -t misetafin -t mismin -t misrapeta -t missen -t moddes -t mode_acces_elusenpair -t nation -t nationgrpsen -t nivlan -t org -t orgext -t orgextpres -t orgthe -t pairie_elusenpair -t parpol -t parpolglo -t participa -t pcs -t pcs24 -t pcs42 -t pcs8 -t pcscatpro -t per -t per_sen -t perapp -t periode_presence -t perpolglo -t perrol -t pj_justificatif -t pj_justificatif_audit -t plaind -t plan_table -t plsql_profiler_runs -t plsql_profiler_units -t poicon -t posvot -t presences_scrutin_surcharge -t presencesrevisionentity -t profil_applicatif -t qua -t rap_the -t reg -t reladr -t requetes_profil -t reslis -t resultat -t reu -t revchanges -t rne_mandat -t rne_mandat_diff -t rne_sen -t rne_sen_diff -t rne_type_mandat -t rol -t sal -t scr -t scrusoldelega -t sea -t sec -t sec2e -t secexe -t sen -t senbur -t senbur3r -t senbur4r -t sennom -t senpj -t sensim -t sentablenom -t senurl -t seuil_presence -t sirpas_elusen -t sirpas_fonmemcom -t sirpas_fonmemdelega -t sirpas_fonmemgrppol -t sirpas_memcom -t sirpas_memdelega -t sirpas_memgrppol -t sirpas_mvt -t sirpas_mvtcm -t sirpas_mvttri -t sirpas_sen -t sirpas_senbur -t sirpas_trf -t srv -t stajur -t stavot -t suspensiontravaux -t suspensiontravaux_audit -t sysage -t syscognos -t sysevt -t sysvar -t sysvar_sendev -t sysvar_senprod -t tapsenrevchanges -t tapsenrevisionentity -t telephone -t temval -t tenpol -t territ -t testoracle -t titele -t titelerne -t titmin -t titnob -t tmpsd -t toutes -t turelu -t typadr -t typapppol -t typbister -t typcandid -t type_activite -t type_activite_participant -t type_activite_rol -t type_activite_senateur -t type_categorie -t type_derogation -t type_droit_acces -t type_pj_justificatif -t type_rne_diff -t type_type_derogation -t typele -t typgrpsen -t typman -t typmin -t typmoddes -t typorg -t typorgext -t typparpol -t typpoicon -t typprs -t typprssta -t typscr -t typtel -t typurl -t typvoi -t uploaded_file -t uploaded_file_type -t validation -t validation_defview_profil -t validation_profil -t vercand -t verres -t votes -t zongeo -s sens
|
|
6
6
|
*
|
|
7
7
|
*/
|
|
@@ -5,7 +5,7 @@ import path from "path";
|
|
|
5
5
|
import pLimit from "p-limit";
|
|
6
6
|
import * as git from "../git";
|
|
7
7
|
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
|
|
8
|
-
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER
|
|
8
|
+
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER } from "../loaders";
|
|
9
9
|
import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findAllTextes, findAllRapports, } from "../model";
|
|
10
10
|
import { processRapport, processTexte } from "./retrieve_documents";
|
|
11
11
|
import { buildActesLegislatifs } from "../model/dosleg";
|
|
@@ -164,7 +164,7 @@ async function convertDatasetDosLeg(dataDir, options) {
|
|
|
164
164
|
const actesBrutsNormalises = buildActesLegislatifs(dossier);
|
|
165
165
|
const dossierWithActes = {
|
|
166
166
|
...dossier,
|
|
167
|
-
actes_legislatifs: actesBrutsNormalises
|
|
167
|
+
actes_legislatifs: actesBrutsNormalises
|
|
168
168
|
};
|
|
169
169
|
const dossierFile = `${dossier["signet"]}.json`;
|
|
170
170
|
await fs.outputJSON(path.join(dossierReorganizedDir, dossierFile), dossierWithActes, { spaces: 2 });
|
|
@@ -236,7 +236,7 @@ async function convertTextes(dataDir, options) {
|
|
|
236
236
|
const texteName = path.parse(texte["url"]).name;
|
|
237
237
|
const texteDir = path.join(originalTextesDir, `${session}`, texteName);
|
|
238
238
|
// oritxtcod = 1 corresponds to "Texte de loi déposé au Sénat"
|
|
239
|
-
const hasExposeDesMotifs = texte["origine"] ===
|
|
239
|
+
const hasExposeDesMotifs = texte["origine"] === 'Sénat' && texte["ordre_origine"] === '1';
|
|
240
240
|
const metadata = {
|
|
241
241
|
name: texteName,
|
|
242
242
|
session: texte["session"],
|
|
@@ -271,8 +271,7 @@ async function retrieveOpenData() {
|
|
|
271
271
|
process.env = {
|
|
272
272
|
...process.env,
|
|
273
273
|
PGHOST: process.env["PGHOST"] || config.db.host,
|
|
274
|
-
PGPORT: process.env["PGPORT"] ||
|
|
275
|
-
PGDATABASE: process.env["PGDATABASE"] || config.db.name,
|
|
274
|
+
PGPORT: process.env["PGPORT"] || config.db.port,
|
|
276
275
|
PGUSER: process.env["PGUSER"] || config.db.user,
|
|
277
276
|
PGPASSWORD: process.env["PGPASSWORD"] || config.db.password,
|
|
278
277
|
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { iterLoadSenatRapports
|
|
1
|
+
import { iterLoadSenatRapports } from "../loaders";
|
|
2
2
|
import commandLineArgs from "command-line-args";
|
|
3
3
|
import { dataDirDefaultOption } from "./shared/cli_helpers";
|
|
4
4
|
const optionsDefinitions = [dataDirDefaultOption];
|
package/lib/src/git.js
CHANGED
|
@@ -2,6 +2,57 @@ import { execSync } from "node:child_process";
|
|
|
2
2
|
import fs from "fs-extra";
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
const MAXBUFFER = 50 * 1024 * 1024;
|
|
5
|
+
const GIT_LOCK_RETRY_DELAY_MS = 1000;
|
|
6
|
+
const GIT_LOCK_RETRY_COUNT = 5;
|
|
7
|
+
const GIT_LOCK_STALE_AFTER_MS = 2 * 60 * 1000;
|
|
8
|
+
function sleep(ms) {
|
|
9
|
+
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
|
|
10
|
+
}
|
|
11
|
+
function getIndexLockPath(repositoryDir) {
|
|
12
|
+
return path.join(repositoryDir, ".git", "index.lock");
|
|
13
|
+
}
|
|
14
|
+
function isIndexLockError(error) {
|
|
15
|
+
const stderr = String(error?.stderr || "");
|
|
16
|
+
return /index\.lock': File exists\./.test(stderr);
|
|
17
|
+
}
|
|
18
|
+
function removeStaleIndexLock(repositoryDir) {
|
|
19
|
+
const lockPath = getIndexLockPath(repositoryDir);
|
|
20
|
+
if (!fs.existsSync(lockPath)) {
|
|
21
|
+
return false;
|
|
22
|
+
}
|
|
23
|
+
const stats = fs.statSync(lockPath);
|
|
24
|
+
const ageMs = Date.now() - stats.mtimeMs;
|
|
25
|
+
if (ageMs < GIT_LOCK_STALE_AFTER_MS) {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
fs.removeSync(lockPath);
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
function execGitWithIndexLockRecovery(command, repositoryDir, options) {
|
|
32
|
+
let lockRemoved = false;
|
|
33
|
+
for (let attempt = 1; attempt <= GIT_LOCK_RETRY_COUNT; attempt++) {
|
|
34
|
+
try {
|
|
35
|
+
execSync(command, {
|
|
36
|
+
cwd: repositoryDir,
|
|
37
|
+
...options,
|
|
38
|
+
});
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
catch (error) {
|
|
42
|
+
if (!isIndexLockError(error)) {
|
|
43
|
+
throw error;
|
|
44
|
+
}
|
|
45
|
+
if (!lockRemoved && removeStaleIndexLock(repositoryDir)) {
|
|
46
|
+
lockRemoved = true;
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
if (attempt === GIT_LOCK_RETRY_COUNT) {
|
|
50
|
+
throw error;
|
|
51
|
+
}
|
|
52
|
+
sleep(GIT_LOCK_RETRY_DELAY_MS);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
5
56
|
export function initRepo(repositoryDir) {
|
|
6
57
|
if (!fs.existsSync(path.join(repositoryDir, ".git"))) {
|
|
7
58
|
fs.ensureDirSync(repositoryDir);
|
|
@@ -15,8 +66,7 @@ export function initRepo(repositoryDir) {
|
|
|
15
66
|
}
|
|
16
67
|
export function commit(repositoryDir, message) {
|
|
17
68
|
initRepo(repositoryDir);
|
|
18
|
-
|
|
19
|
-
cwd: repositoryDir,
|
|
69
|
+
execGitWithIndexLockRecovery("git add .", repositoryDir, {
|
|
20
70
|
env: process.env,
|
|
21
71
|
encoding: "utf-8",
|
|
22
72
|
stdio: ["ignore", "ignore", "pipe"],
|
package/lib/src/loaders.d.ts
CHANGED
|
@@ -43,10 +43,9 @@ export declare function iterLoadSenatComptesRendusCommissions(dataDir: string, s
|
|
|
43
43
|
export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
|
|
44
44
|
export declare function iterLoadSenatRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
|
|
45
45
|
export declare function iterLoadSenatTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
|
|
46
|
-
export declare function iterLoadSenatDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<DocumentResult>>;
|
|
47
46
|
export declare function iterLoadSenatRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
|
|
48
|
-
export declare function iterLoadSenatTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult
|
|
49
|
-
export declare function loadSenatTexteContent(dataDir: string,
|
|
47
|
+
export declare function iterLoadSenatTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult & Partial<FlatTexte>>>;
|
|
48
|
+
export declare function loadSenatTexteContent(dataDir: string, session: number | string | null | undefined, texteId: string): IterItem<FlatTexte | null>;
|
|
50
49
|
export declare function loadSenatCompteRenduContent(dataDir: string, session: number, debatId: string | number): {
|
|
51
50
|
item: CompteRendu | null;
|
|
52
51
|
};
|
package/lib/src/loaders.js
CHANGED
|
@@ -182,29 +182,6 @@ export function* iterLoadSenatTexteUrls(dataDir, session) {
|
|
|
182
182
|
}
|
|
183
183
|
}
|
|
184
184
|
}
|
|
185
|
-
export function* iterLoadSenatDocuments(dataDir, session, documentType, options = {}) {
|
|
186
|
-
for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(dataDir, session, options)) {
|
|
187
|
-
for (const lecture of dossierLegislatif["lectures"]) {
|
|
188
|
-
const lecturesSenat = lecture.lectures_assemblee.filter((lectureAssemblee) => lectureAssemblee.assemblee === "Sénat");
|
|
189
|
-
for (const lectureSenat of lecturesSenat) {
|
|
190
|
-
for (const document of lectureSenat[documentType]) {
|
|
191
|
-
const enrichedDocument = {
|
|
192
|
-
signet_dossier: dossierLegislatif["signet"],
|
|
193
|
-
...document,
|
|
194
|
-
};
|
|
195
|
-
const documentItem = {
|
|
196
|
-
item: enrichedDocument,
|
|
197
|
-
};
|
|
198
|
-
if (document.url) {
|
|
199
|
-
const documentName = path.parse(document.url).name;
|
|
200
|
-
documentItem.filePathFromDataset = path.join(`${document.session ?? UNDEFINED_SESSION}`, documentName, `${documentName}.pdf`);
|
|
201
|
-
}
|
|
202
|
-
yield documentItem;
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
185
|
export function* iterLoadSenatRapports(dataDir, session, options = {}) {
|
|
209
186
|
for (const iterItem of iterLoadSenatItems(dataDir, RAPPORT_FOLDER, session, "original", options)) {
|
|
210
187
|
if (iterItem.item?.["id"]) {
|
|
@@ -213,14 +190,22 @@ export function* iterLoadSenatRapports(dataDir, session, options = {}) {
|
|
|
213
190
|
}
|
|
214
191
|
}
|
|
215
192
|
export function* iterLoadSenatTextes(dataDir, session, options = {}) {
|
|
216
|
-
for (const iterItem of
|
|
217
|
-
|
|
193
|
+
for (const iterItem of iterLoadSenatItems(dataDir, TEXTE_FOLDER, session, DATA_ORIGINAL_FOLDER, options)) {
|
|
194
|
+
if (!iterItem.item?.["id"]) {
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
const texteItem = iterItem;
|
|
198
|
+
const texte = texteItem.item;
|
|
199
|
+
const texteId = texte["id"];
|
|
200
|
+
const { item: texteContent } = loadSenatTexteContent(dataDir, texte["session"], texteId);
|
|
201
|
+
if (texteContent) {
|
|
202
|
+
texteItem.item.divisions = texteContent.divisions;
|
|
203
|
+
}
|
|
204
|
+
yield texteItem;
|
|
218
205
|
}
|
|
219
206
|
}
|
|
220
|
-
export function loadSenatTexteContent(dataDir,
|
|
221
|
-
const
|
|
222
|
-
const jsonTexteName = `${parsedTextePath.name}.json`;
|
|
223
|
-
const fullTextePath = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER, parsedTextePath.dir, jsonTexteName);
|
|
207
|
+
export function loadSenatTexteContent(dataDir, session, texteId) {
|
|
208
|
+
const fullTextePath = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER, String(session ?? UNDEFINED_SESSION), texteId, `${texteId}.json`);
|
|
224
209
|
if (!fs.existsSync(fullTextePath)) {
|
|
225
210
|
return { item: null };
|
|
226
211
|
}
|
|
@@ -2,6 +2,9 @@ import { sql } from "kysely";
|
|
|
2
2
|
import { dbSenat } from "../databases";
|
|
3
3
|
import { concat, rtrim, toDateString } from "./util";
|
|
4
4
|
import { jsonArrayFrom } from "kysely/helpers/postgres";
|
|
5
|
+
function stripTrailingHashes(expr) {
|
|
6
|
+
return sql `regexp_replace(${expr}, '#+$', '')`;
|
|
7
|
+
}
|
|
5
8
|
function orderOrdreOrigineTexte(expr) {
|
|
6
9
|
return sql `array_position(array['0','2','1'], ${expr})`;
|
|
7
10
|
}
|
|
@@ -37,15 +40,15 @@ function selectRapportAttributes({ eb, ref, val }) {
|
|
|
37
40
|
eb
|
|
38
41
|
.case()
|
|
39
42
|
.when("rap.rapurl", "is not", null)
|
|
40
|
-
.then(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
|
|
43
|
+
.then(stripTrailingHashes(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`))
|
|
41
44
|
.else(null)
|
|
42
45
|
.end()
|
|
43
46
|
.as("id"),
|
|
44
47
|
eb
|
|
45
48
|
.case()
|
|
46
49
|
.when("rap.typurl", "=", "I")
|
|
47
|
-
.then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
|
|
48
|
-
.else(rtrim(ref("rap.rapurl")))
|
|
50
|
+
.then(stripTrailingHashes(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl")))))
|
|
51
|
+
.else(stripTrailingHashes(rtrim(ref("rap.rapurl"))))
|
|
49
52
|
.end()
|
|
50
53
|
.as("url"),
|
|
51
54
|
rtrim(ref("denrap.libdenrap")).as("type"),
|
|
@@ -95,15 +98,15 @@ function selectTexteAttributes({ eb, ref, val }) {
|
|
|
95
98
|
eb
|
|
96
99
|
.case()
|
|
97
100
|
.when("texte.texurl", "is not", null)
|
|
98
|
-
.then(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
|
|
101
|
+
.then(stripTrailingHashes(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`))
|
|
99
102
|
.else(null)
|
|
100
103
|
.end()
|
|
101
104
|
.as("id"),
|
|
102
105
|
eb
|
|
103
106
|
.case()
|
|
104
107
|
.when("texte.typurl", "=", "I")
|
|
105
|
-
.then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
|
|
106
|
-
.else(rtrim(ref("texte.texurl")))
|
|
108
|
+
.then(stripTrailingHashes(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl")))))
|
|
109
|
+
.else(stripTrailingHashes(rtrim(ref("texte.texurl"))))
|
|
107
110
|
.end()
|
|
108
111
|
.as("url"),
|
|
109
112
|
rtrim(ref("oritxt.oritxtlib")).as("origine"),
|
package/lib/src/parsers/texte.js
CHANGED
|
@@ -1,24 +1,6 @@
|
|
|
1
1
|
import { JSDOM } from "jsdom";
|
|
2
|
-
import { AKN_IDENTIFICATION_STRUCTURE_REGEXP
|
|
2
|
+
import { AKN_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil";
|
|
3
3
|
import { DivisionType, } from "../types/texte";
|
|
4
|
-
function buildWorklow(metaElement) {
|
|
5
|
-
const stepElements = metaElement.querySelectorAll("workflow step");
|
|
6
|
-
const steps = [];
|
|
7
|
-
for (const stepElement of stepElements) {
|
|
8
|
-
const identification = stepElement.getAttribute("href") ?? "";
|
|
9
|
-
const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
|
|
10
|
-
steps.push({
|
|
11
|
-
eId: stepElement.getAttribute("eId"),
|
|
12
|
-
date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
|
|
13
|
-
type: identificationParts?.["type"] || null,
|
|
14
|
-
session: identificationParts?.["session"] || null,
|
|
15
|
-
numero: identificationParts?.["numTexte"] || null,
|
|
16
|
-
version: identificationParts?.["version"] ? identificationParts["version"] : null,
|
|
17
|
-
outcome: stepElement.getAttribute("outcome"),
|
|
18
|
-
});
|
|
19
|
-
}
|
|
20
|
-
return steps;
|
|
21
|
-
}
|
|
22
4
|
function buildDivision(node, index) {
|
|
23
5
|
const eId = node.getAttribute("eId");
|
|
24
6
|
const tag = node.nodeName;
|
|
@@ -147,7 +129,6 @@ export function transformTexte(document) {
|
|
|
147
129
|
dateDepot: dateDepot ? new Date(dateDepot) : null,
|
|
148
130
|
datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
|
|
149
131
|
version: identificationParts?.["version"] ? identificationParts["version"] : null,
|
|
150
|
-
workflow: metaElement ? buildWorklow(metaElement) : [],
|
|
151
132
|
divisions: bodyElement ? flattenTexte(bodyElement) : [],
|
|
152
133
|
};
|
|
153
134
|
}
|
|
@@ -13,32 +13,31 @@ import { UNDEFINED_SESSION } from "../types/sessions";
|
|
|
13
13
|
import { getSessionFromDate, getSessionFromSignet } from "./datautil";
|
|
14
14
|
import { commonOptions } from "./shared/cli_helpers";
|
|
15
15
|
import { ensureAndClearDir } from "./shared/util";
|
|
16
|
+
let exitCode = 10; // 0: some data changed, 10: no modification
|
|
16
17
|
const optionsDefinitions = [...commonOptions];
|
|
17
18
|
const options = commandLineArgs(optionsDefinitions);
|
|
18
19
|
const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/";
|
|
19
20
|
const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
|
|
20
21
|
const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/";
|
|
21
22
|
const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
|
|
22
|
-
function
|
|
23
|
+
function commitAndPushGit(datasetDir, options) {
|
|
23
24
|
if (options.commit) {
|
|
24
25
|
const errorCode = git.commitAndPush(datasetDir, "Nouvelle moisson", options.remote);
|
|
25
26
|
if ((exitCode === 10 && errorCode !== 10) || (exitCode === 0 && errorCode !== 0 && errorCode !== 10)) {
|
|
26
27
|
exitCode = errorCode;
|
|
27
28
|
}
|
|
28
29
|
}
|
|
29
|
-
return exitCode;
|
|
30
30
|
}
|
|
31
31
|
async function convertData() {
|
|
32
32
|
const dataDir = options["dataDir"];
|
|
33
33
|
assert(dataDir, "Missing argument: data directory");
|
|
34
34
|
const enabledDatasets = getEnabledDatasets(options["categories"]);
|
|
35
35
|
console.time("data transformation time");
|
|
36
|
-
let exitCode = 0;
|
|
37
36
|
if (enabledDatasets & EnabledDatasets.Ameli) {
|
|
38
37
|
try {
|
|
39
38
|
await convertDatasetAmeli(dataDir, options);
|
|
40
39
|
const ameliDir = path.join(dataDir, datasets.ameli.database);
|
|
41
|
-
|
|
40
|
+
commitAndPushGit(ameliDir, options);
|
|
42
41
|
}
|
|
43
42
|
catch (error) {
|
|
44
43
|
console.error(`Error converting Ameli dataset:`, error);
|
|
@@ -48,7 +47,7 @@ async function convertData() {
|
|
|
48
47
|
try {
|
|
49
48
|
await convertDatasetDebats(dataDir, options);
|
|
50
49
|
const debatsDir = path.join(dataDir, datasets.debats.database);
|
|
51
|
-
|
|
50
|
+
commitAndPushGit(debatsDir, options);
|
|
52
51
|
}
|
|
53
52
|
catch (error) {
|
|
54
53
|
console.error(`Error converting Debats dataset:`, error);
|
|
@@ -58,7 +57,7 @@ async function convertData() {
|
|
|
58
57
|
try {
|
|
59
58
|
await convertDatasetDosLeg(dataDir, options);
|
|
60
59
|
const doslegDir = path.join(dataDir, datasets.dosleg.database);
|
|
61
|
-
|
|
60
|
+
commitAndPushGit(doslegDir, options);
|
|
62
61
|
}
|
|
63
62
|
catch (error) {
|
|
64
63
|
console.error(`Error converting DosLeg dataset:`, error);
|
|
@@ -66,7 +65,7 @@ async function convertData() {
|
|
|
66
65
|
try {
|
|
67
66
|
await convertDatasetScrutins(dataDir, options);
|
|
68
67
|
const scrutinsDir = path.join(dataDir, SCRUTINS_FOLDER);
|
|
69
|
-
|
|
68
|
+
commitAndPushGit(scrutinsDir, options);
|
|
70
69
|
}
|
|
71
70
|
catch (error) {
|
|
72
71
|
console.error(`Error converting Scrutins dataset:`, error);
|
|
@@ -76,7 +75,7 @@ async function convertData() {
|
|
|
76
75
|
try {
|
|
77
76
|
await convertDatasetQuestions(dataDir, options);
|
|
78
77
|
const questionsDir = path.join(dataDir, datasets.questions.database);
|
|
79
|
-
|
|
78
|
+
commitAndPushGit(questionsDir, options);
|
|
80
79
|
}
|
|
81
80
|
catch (error) {
|
|
82
81
|
console.error(`Error converting Questions dataset:`, error);
|
|
@@ -86,7 +85,7 @@ async function convertData() {
|
|
|
86
85
|
try {
|
|
87
86
|
await convertDatasetSens(dataDir, options);
|
|
88
87
|
const sensDir = path.join(dataDir, datasets.sens.database);
|
|
89
|
-
|
|
88
|
+
commitAndPushGit(sensDir, options);
|
|
90
89
|
}
|
|
91
90
|
catch (error) {
|
|
92
91
|
console.error(`Error converting Sens dataset:`, error);
|
|
@@ -95,7 +94,6 @@ async function convertData() {
|
|
|
95
94
|
if (!options["silent"]) {
|
|
96
95
|
console.timeEnd("data transformation time");
|
|
97
96
|
}
|
|
98
|
-
return exitCode;
|
|
99
97
|
}
|
|
100
98
|
async function convertDatasetAmeli(dataDir, options) {
|
|
101
99
|
const dataset = datasets.ameli;
|
|
@@ -347,7 +345,7 @@ async function convertDatasetSens(dataDir, options) {
|
|
|
347
345
|
}
|
|
348
346
|
}
|
|
349
347
|
convertData()
|
|
350
|
-
.then((
|
|
348
|
+
.then(() => process.exit(exitCode))
|
|
351
349
|
.catch((error) => {
|
|
352
350
|
console.log(error);
|
|
353
351
|
process.exit(1);
|
|
@@ -3,11 +3,13 @@ import commandLineArgs from "command-line-args";
|
|
|
3
3
|
import fs from "fs-extra";
|
|
4
4
|
import { DateTime } from "luxon";
|
|
5
5
|
import path from "path";
|
|
6
|
+
import * as git from "../git";
|
|
6
7
|
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatRapportUrls, iterLoadSenatTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, } from "../loaders";
|
|
7
8
|
import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../parsers/texte";
|
|
8
9
|
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
9
10
|
import { commonOptions } from "./shared/cli_helpers";
|
|
10
11
|
import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue } from "./shared/util";
|
|
12
|
+
let exitCode = 10; // 0: some data changed, 10: no modification
|
|
11
13
|
const optionsDefinitions = [
|
|
12
14
|
...commonOptions,
|
|
13
15
|
{
|
|
@@ -32,6 +34,14 @@ const optionsDefinitions = [
|
|
|
32
34
|
const options = commandLineArgs(optionsDefinitions);
|
|
33
35
|
const textDecoder = new TextDecoder("utf8");
|
|
34
36
|
const today = DateTime.now();
|
|
37
|
+
function commitAndPushGit(datasetDir) {
|
|
38
|
+
if (options["commit"]) {
|
|
39
|
+
const errorCode = git.commitAndPush(datasetDir, "Nouvelle moisson", options["remote"]);
|
|
40
|
+
if ((exitCode === 10 && errorCode !== 10) || (exitCode === 0 && errorCode !== 0 && errorCode !== 10)) {
|
|
41
|
+
exitCode = errorCode;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
35
45
|
function isDocumentRecent(documentDate, daysThreshold) {
|
|
36
46
|
if (!documentDate)
|
|
37
47
|
return false;
|
|
@@ -117,6 +127,12 @@ export async function processTexte(texteMetadata, originalTextesDir, transformed
|
|
|
117
127
|
const result = await processDocument(format.url.toString(), destPath, texteMetadata.date, options);
|
|
118
128
|
// Specific logic: Parsing (Only applies to XML)
|
|
119
129
|
if (format.isParseTarget && options.parseDocuments) {
|
|
130
|
+
if (!result.buffer && !(await fs.pathExists(destPath))) {
|
|
131
|
+
if (options.verbose) {
|
|
132
|
+
console.warn(`Skipping parse for missing XML file: ${destPath}`);
|
|
133
|
+
}
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
120
136
|
await parseDocument(texteMetadata.session, transformedTextesDir, destPath, texteMetadata.name, result.buffer, exposeDesMotifsContent, options);
|
|
121
137
|
}
|
|
122
138
|
}
|
|
@@ -134,9 +150,10 @@ export async function processRapport(rapportMetadata, originalRapportsDir, optio
|
|
|
134
150
|
await processDocument(format.url.toString(), destPath, rapportMetadata.date, options);
|
|
135
151
|
}
|
|
136
152
|
}
|
|
137
|
-
async function
|
|
138
|
-
const
|
|
139
|
-
const
|
|
153
|
+
async function processTextes(dataDir, sessions) {
|
|
154
|
+
const textesDir = path.join(dataDir, TEXTE_FOLDER);
|
|
155
|
+
const originalTextesDir = path.join(textesDir, DATA_ORIGINAL_FOLDER);
|
|
156
|
+
const transformedTextesDir = path.join(textesDir, DATA_TRANSFORMED_FOLDER);
|
|
140
157
|
if (options["parseDocuments"]) {
|
|
141
158
|
ensureAndClearDir(transformedTextesDir);
|
|
142
159
|
}
|
|
@@ -153,9 +170,11 @@ async function retrieveTextes(dataDir, sessions) {
|
|
|
153
170
|
await processTexte(texteMetadata, originalTextesDir, transformedTextesDir, dlOptions);
|
|
154
171
|
}
|
|
155
172
|
}
|
|
173
|
+
commitAndPushGit(textesDir);
|
|
156
174
|
}
|
|
157
|
-
async function
|
|
158
|
-
const
|
|
175
|
+
async function processRapports(dataDir, sessions) {
|
|
176
|
+
const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
|
|
177
|
+
const originalRapportsDir = path.join(rapportsDir, DATA_ORIGINAL_FOLDER);
|
|
159
178
|
const dlOptions = {
|
|
160
179
|
force: options["force"],
|
|
161
180
|
silent: options["silent"],
|
|
@@ -168,6 +187,7 @@ async function retrieveRapports(dataDir, sessions) {
|
|
|
168
187
|
await processRapport(rapportMetadata, originalRapportsDir, dlOptions);
|
|
169
188
|
}
|
|
170
189
|
}
|
|
190
|
+
commitAndPushGit(rapportsDir);
|
|
171
191
|
}
|
|
172
192
|
async function parseDocument(session, transformedTextesDir, textePath, texteName, texteBuffer, exposeDesMotifs = null, options = {}) {
|
|
173
193
|
if (options.verbose) {
|
|
@@ -179,6 +199,12 @@ async function parseDocument(session, transformedTextesDir, textePath, texteName
|
|
|
179
199
|
parsedTexte = parseTexte(texteXml);
|
|
180
200
|
}
|
|
181
201
|
else {
|
|
202
|
+
if (!(await fs.pathExists(textePath))) {
|
|
203
|
+
if (options.verbose) {
|
|
204
|
+
console.warn(`Skipping parse for missing XML file: ${textePath}`);
|
|
205
|
+
}
|
|
206
|
+
return null;
|
|
207
|
+
}
|
|
182
208
|
parsedTexte = await parseTexteFromFile(textePath);
|
|
183
209
|
}
|
|
184
210
|
if (!parsedTexte)
|
|
@@ -200,10 +226,10 @@ async function main() {
|
|
|
200
226
|
const sessions = getSessionsFromStart(options["fromSession"]);
|
|
201
227
|
console.time("documents processing time");
|
|
202
228
|
if (isOptionEmptyOrHasValue(options["types"], "textes")) {
|
|
203
|
-
await
|
|
229
|
+
await processTextes(dataDir, sessions);
|
|
204
230
|
}
|
|
205
231
|
if (isOptionEmptyOrHasValue(options["types"], "rapports")) {
|
|
206
|
-
await
|
|
232
|
+
await processRapports(dataDir, sessions);
|
|
207
233
|
}
|
|
208
234
|
if (!options["silent"]) {
|
|
209
235
|
console.timeEnd("documents processing time");
|
|
@@ -211,7 +237,7 @@ async function main() {
|
|
|
211
237
|
}
|
|
212
238
|
if (process.argv[1].endsWith("retrieve_documents.ts")) {
|
|
213
239
|
main()
|
|
214
|
-
.then(() => process.exit(
|
|
240
|
+
.then(() => process.exit(exitCode))
|
|
215
241
|
.catch((error) => {
|
|
216
242
|
console.log(error);
|
|
217
243
|
process.exit(1);
|
|
@@ -97,7 +97,7 @@ async function writeMatchArtifacts(args) {
|
|
|
97
97
|
if (finalTxt)
|
|
98
98
|
await fsp.writeFile(path.join(ctx.baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
|
|
99
99
|
}
|
|
100
|
-
async function processGroupedReunion(agenda, session, dataDir) {
|
|
100
|
+
async function processGroupedReunion(agenda, session, dataDir, lastByVideo) {
|
|
101
101
|
// 1) GuardRails
|
|
102
102
|
if (shouldSkipAgenda(agenda))
|
|
103
103
|
return;
|
|
@@ -137,18 +137,30 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
137
137
|
session: ctx.session,
|
|
138
138
|
options,
|
|
139
139
|
writeIfChanged,
|
|
140
|
+
lastByVideo, // NEW
|
|
141
|
+
getAgendaSegmentTimecodes,
|
|
142
|
+
buildSenatVodMasterM3u8FromNvs,
|
|
143
|
+
});
|
|
144
|
+
await processBisIfNeeded({
|
|
145
|
+
agenda,
|
|
146
|
+
secondBest,
|
|
147
|
+
ctx,
|
|
148
|
+
skipDownload,
|
|
149
|
+
options,
|
|
150
|
+
lastByVideo,
|
|
151
|
+
writeIfChanged,
|
|
152
|
+
processOneReunionMatch,
|
|
140
153
|
getAgendaSegmentTimecodes,
|
|
141
154
|
buildSenatVodMasterM3u8FromNvs,
|
|
142
155
|
});
|
|
143
|
-
// 4) Optional BIS
|
|
144
|
-
await processBisIfNeeded({ agenda, secondBest, ctx, skipDownload, options });
|
|
145
156
|
}
|
|
146
157
|
async function processAll(dataDir, sessions) {
|
|
147
158
|
console.log("Process all Agendas and fetch video's url");
|
|
148
159
|
for (const session of sessions) {
|
|
160
|
+
const lastByVideo = new Map();
|
|
149
161
|
for (const { item: agenda } of iterLoadSenatAgendas(dataDir, session)) {
|
|
150
162
|
try {
|
|
151
|
-
await processGroupedReunion(agenda, session, dataDir);
|
|
163
|
+
await processGroupedReunion(agenda, session, dataDir, lastByVideo);
|
|
152
164
|
}
|
|
153
165
|
catch (e) {
|
|
154
166
|
console.error(`[error] ${agenda?.uid ?? "unknown-uid"}:`, e?.message || e);
|
package/lib/src/types/texte.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Reunion } from "../types/agenda";
|
|
2
|
-
import { BestMatch, MatchContext } from "./types";
|
|
2
|
+
import { BestMatch, LastForVideo, MatchContext } from "./types";
|
|
3
3
|
import { CommandLineOptions } from "command-line-args";
|
|
4
4
|
export declare function processOneReunionMatch(args: {
|
|
5
5
|
agenda: Reunion;
|
|
@@ -8,6 +8,11 @@ export declare function processOneReunionMatch(args: {
|
|
|
8
8
|
session: number;
|
|
9
9
|
options: Record<string, any>;
|
|
10
10
|
writeIfChanged: (p: string, content: string) => Promise<void>;
|
|
11
|
+
lastByVideo: Map<string, {
|
|
12
|
+
agendaUid: string;
|
|
13
|
+
agendaJsonPath: string;
|
|
14
|
+
start: number;
|
|
15
|
+
}>;
|
|
11
16
|
getAgendaSegmentTimecodes: (dataNvs: string, finalNvs: string, agendaKey: string) => {
|
|
12
17
|
start: number;
|
|
13
18
|
end: number | null;
|
|
@@ -20,5 +25,26 @@ export declare function processBisIfNeeded(args: {
|
|
|
20
25
|
ctx: MatchContext;
|
|
21
26
|
skipDownload: boolean;
|
|
22
27
|
options: CommandLineOptions;
|
|
28
|
+
lastByVideo: Map<string, LastForVideo>;
|
|
29
|
+
writeIfChanged: (p: string, content: string) => Promise<void>;
|
|
30
|
+
processOneReunionMatch: (args: {
|
|
31
|
+
agenda: Reunion;
|
|
32
|
+
baseDir: string;
|
|
33
|
+
dataDir: string;
|
|
34
|
+
session: number;
|
|
35
|
+
options: Record<string, any>;
|
|
36
|
+
writeIfChanged: (p: string, content: string) => Promise<void>;
|
|
37
|
+
lastByVideo: Map<string, LastForVideo>;
|
|
38
|
+
getAgendaSegmentTimecodes: (dataNvs: string, finalNvs: string, agendaKey: string) => {
|
|
39
|
+
start: number;
|
|
40
|
+
end: number | null;
|
|
41
|
+
} | null;
|
|
42
|
+
buildSenatVodMasterM3u8FromNvs: (dataNvs: string) => string | null;
|
|
43
|
+
}) => Promise<void>;
|
|
44
|
+
getAgendaSegmentTimecodes: (dataNvs: string, finalNvs: string, agendaKey: string) => {
|
|
45
|
+
start: number;
|
|
46
|
+
end: number | null;
|
|
47
|
+
} | null;
|
|
48
|
+
buildSenatVodMasterM3u8FromNvs: (dataNvs: string) => string | null;
|
|
23
49
|
}): Promise<void>;
|
|
24
50
|
export declare function writeIfChanged(p: string, content: string): Promise<void>;
|
|
@@ -5,9 +5,8 @@ import { fetchText } from "./search";
|
|
|
5
5
|
import fs from "fs-extra";
|
|
6
6
|
import fsp from "fs/promises";
|
|
7
7
|
import path from "path";
|
|
8
|
-
import { getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs } from "../utils/nvs-parsing";
|
|
9
8
|
export async function processOneReunionMatch(args) {
|
|
10
|
-
const { agenda, baseDir, dataDir, session, options, writeIfChanged, getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs, } = args;
|
|
9
|
+
const { agenda, baseDir, dataDir, session, options, writeIfChanged, lastByVideo, getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs, } = args;
|
|
11
10
|
const reunionUid = agenda.uid;
|
|
12
11
|
let dataTxt;
|
|
13
12
|
let finalTxt;
|
|
@@ -25,18 +24,40 @@ export async function processOneReunionMatch(args) {
|
|
|
25
24
|
return;
|
|
26
25
|
}
|
|
27
26
|
const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
|
|
27
|
+
// Ensure it exists first.
|
|
28
|
+
if (!(await fs.pathExists(agendaJsonPath))) {
|
|
29
|
+
console.warn(`[warn] agenda file not found: ${agendaJsonPath}`);
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
28
32
|
let timecodeDebutVideo = null;
|
|
29
33
|
let timecodeFinVideo = null;
|
|
30
34
|
const agendaKey = agenda.titre || agenda.objet || "";
|
|
31
35
|
const seg = getAgendaSegmentTimecodes(dataTxt, finalTxt, agendaKey);
|
|
32
36
|
if (seg) {
|
|
33
37
|
timecodeDebutVideo = seg.start;
|
|
34
|
-
timecodeFinVideo =
|
|
38
|
+
timecodeFinVideo = null; // keep open by default
|
|
35
39
|
}
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
40
|
+
// 1) If we have a start timecode, close the previous agenda for this SAME master
|
|
41
|
+
if (timecodeDebutVideo != null) {
|
|
42
|
+
const prev = lastByVideo.get(master);
|
|
43
|
+
if (prev && prev.agendaJsonPath !== agendaJsonPath) {
|
|
44
|
+
// micro-safety: do not close with an earlier timecode
|
|
45
|
+
if (timecodeDebutVideo <= prev.start) {
|
|
46
|
+
console.warn(`[warn] timecode order inversion on same video: ` +
|
|
47
|
+
`prev=${prev.agendaUid}(${prev.start}s) -> cur=${agenda.uid}(${timecodeDebutVideo}s). ` +
|
|
48
|
+
`Skip closing prev to avoid negative segment.`);
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
await patchAgendaTimecodeFin({
|
|
52
|
+
agendaJsonPath: prev.agendaJsonPath,
|
|
53
|
+
timecodeFinVideo: timecodeDebutVideo,
|
|
54
|
+
writeIfChanged,
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
lastByVideo.set(master, { agendaUid: agenda.uid, agendaJsonPath, start: timecodeDebutVideo });
|
|
39
59
|
}
|
|
60
|
+
// 2) Update current agenda JSON with urlVideo (+ start/end if any)
|
|
40
61
|
const raw = await fsp.readFile(agendaJsonPath, "utf-8");
|
|
41
62
|
let obj;
|
|
42
63
|
try {
|
|
@@ -49,7 +70,10 @@ export async function processOneReunionMatch(args) {
|
|
|
49
70
|
const next = { ...obj, urlVideo: master, startTime: agenda.startTime };
|
|
50
71
|
if (timecodeDebutVideo != null) {
|
|
51
72
|
next.timecodeDebutVideo = timecodeDebutVideo;
|
|
52
|
-
|
|
73
|
+
if (timecodeFinVideo != null)
|
|
74
|
+
next.timecodeFinVideo = timecodeFinVideo;
|
|
75
|
+
else
|
|
76
|
+
delete next.timecodeFinVideo;
|
|
53
77
|
}
|
|
54
78
|
await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
|
|
55
79
|
if (!options["silent"]) {
|
|
@@ -58,7 +82,7 @@ export async function processOneReunionMatch(args) {
|
|
|
58
82
|
}
|
|
59
83
|
}
|
|
60
84
|
export async function processBisIfNeeded(args) {
|
|
61
|
-
const { agenda, secondBest, ctx, skipDownload, options } = args;
|
|
85
|
+
const { agenda, secondBest, ctx, skipDownload, options, lastByVideo, writeIfChanged, processOneReunionMatch, getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs, } = args;
|
|
62
86
|
if (skipDownload)
|
|
63
87
|
return;
|
|
64
88
|
if (!secondBest)
|
|
@@ -83,6 +107,7 @@ export async function processBisIfNeeded(args) {
|
|
|
83
107
|
session: ctx.session,
|
|
84
108
|
options,
|
|
85
109
|
writeIfChanged,
|
|
110
|
+
lastByVideo,
|
|
86
111
|
getAgendaSegmentTimecodes,
|
|
87
112
|
buildSenatVodMasterM3u8FromNvs,
|
|
88
113
|
});
|
|
@@ -128,3 +153,19 @@ export async function writeIfChanged(p, content) {
|
|
|
128
153
|
}
|
|
129
154
|
await fsp.writeFile(p, content, "utf-8");
|
|
130
155
|
}
|
|
156
|
+
async function patchAgendaTimecodeFin(args) {
|
|
157
|
+
const { agendaJsonPath, timecodeFinVideo, writeIfChanged } = args;
|
|
158
|
+
if (!(await fs.pathExists(agendaJsonPath)))
|
|
159
|
+
return;
|
|
160
|
+
const raw = await fsp.readFile(agendaJsonPath, "utf-8");
|
|
161
|
+
let obj;
|
|
162
|
+
try {
|
|
163
|
+
obj = JSON.parse(raw);
|
|
164
|
+
}
|
|
165
|
+
catch {
|
|
166
|
+
console.warn(`[warn] invalid JSON in ${agendaJsonPath}`);
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
const next = { ...obj, timecodeFinVideo };
|
|
170
|
+
await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
|
|
171
|
+
}
|
|
@@ -1,9 +1 @@
|
|
|
1
|
-
|
|
2
|
-
export declare const configSchema: z.ZodObject<{
|
|
3
|
-
db: z.ZodObject<{
|
|
4
|
-
host: z.ZodString;
|
|
5
|
-
password: z.ZodString;
|
|
6
|
-
user: z.ZodString;
|
|
7
|
-
port: z.ZodCoercedNumber<unknown>;
|
|
8
|
-
}, z.core.$strip>;
|
|
9
|
-
}, z.core.$strip>;
|
|
1
|
+
export declare function validateConfig(data: any): [any, any];
|
package/lib/validators/config.js
CHANGED
|
@@ -1,10 +1,54 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
}
|
|
1
|
+
import { validateChain, validateInteger, validateNonEmptyTrimmedString, validateNumber, validateOption, validateString, validateStringToNumber, validateTest, } from "@biryani/core";
|
|
2
|
+
function validateDb(data) {
|
|
3
|
+
if (data === null || data === undefined) {
|
|
4
|
+
return [data, "Missing value"];
|
|
5
|
+
}
|
|
6
|
+
if (typeof data !== "object") {
|
|
7
|
+
return [data, `Expected an object got "${typeof data}"`];
|
|
8
|
+
}
|
|
9
|
+
data = { ...data };
|
|
10
|
+
const errors = {};
|
|
11
|
+
const remainingKeys = new Set(Object.keys(data));
|
|
12
|
+
for (const key of ["host", "password", "user"]) {
|
|
13
|
+
remainingKeys.delete(key);
|
|
14
|
+
const [value, error] = validateNonEmptyTrimmedString(data[key]);
|
|
15
|
+
data[key] = value;
|
|
16
|
+
if (error !== null) {
|
|
17
|
+
errors[key] = error;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
{
|
|
21
|
+
const key = "port";
|
|
22
|
+
remainingKeys.delete(key);
|
|
23
|
+
const [value, error] = validateChain(validateOption([validateString, validateStringToNumber], validateNumber), validateInteger, validateTest((value) => 0 <= value && value <= 65535, "Must be an integer between 0 and 65535"))(data[key]);
|
|
24
|
+
data[key] = value;
|
|
25
|
+
if (error !== null) {
|
|
26
|
+
errors[key] = error;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
for (const key of remainingKeys) {
|
|
30
|
+
errors[key] = "Unexpected item";
|
|
31
|
+
}
|
|
32
|
+
return [data, Object.keys(errors).length === 0 ? null : errors];
|
|
33
|
+
}
|
|
34
|
+
export function validateConfig(data) {
|
|
35
|
+
if (data === null || data === undefined) {
|
|
36
|
+
return [data, "Missing value"];
|
|
37
|
+
}
|
|
38
|
+
if (typeof data !== "object") {
|
|
39
|
+
return [data, `Expected an object got "${typeof data}"`];
|
|
40
|
+
}
|
|
41
|
+
data = { ...data };
|
|
42
|
+
const errors = {};
|
|
43
|
+
const remainingKeys = new Set(Object.keys(data));
|
|
44
|
+
{
|
|
45
|
+
const key = "db";
|
|
46
|
+
remainingKeys.delete(key);
|
|
47
|
+
const [value, error] = validateDb(data[key]);
|
|
48
|
+
data[key] = value;
|
|
49
|
+
if (error !== null) {
|
|
50
|
+
errors[key] = error;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return [data, Object.keys(errors).length === 0 ? null : errors];
|
|
54
|
+
}
|
package/package.json
CHANGED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
export type L1Chapter = {
|
|
2
|
-
id: string;
|
|
3
|
-
label: string;
|
|
4
|
-
index: number;
|
|
5
|
-
};
|
|
6
|
-
export declare function getLevel1Chapters(dataNvs: string): L1Chapter[];
|
|
7
|
-
export declare function pickBestLevel1ChapterForAgenda(chapters: L1Chapter[], agendaTitle: string): {
|
|
8
|
-
chapter: L1Chapter;
|
|
9
|
-
score: number;
|
|
10
|
-
} | null;
|
|
11
|
-
export declare function getAgendaSegmentTimecodes(dataNvs: string, finalPlayerNvs: string, agendaTitleOrObjet: string): {
|
|
12
|
-
start: number;
|
|
13
|
-
end: number | null;
|
|
14
|
-
chapterId: string;
|
|
15
|
-
nextChapterId: string | null;
|
|
16
|
-
score: number;
|
|
17
|
-
} | null;
|
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
import { XMLParser } from "fast-xml-parser";
|
|
2
|
-
import { dice, normalize } from "./scoring";
|
|
3
|
-
import { decodeHtmlEntities } from "./string_cleaning";
|
|
4
|
-
const CHAPTER_MATCH_THRESHOLD = 0.5;
|
|
5
|
-
const xmlParser = new XMLParser({
|
|
6
|
-
ignoreAttributes: false,
|
|
7
|
-
attributeNamePrefix: "@_",
|
|
8
|
-
});
|
|
9
|
-
function getTimecodeForChapterId(finalPlayerNvs, chapterId) {
|
|
10
|
-
const xml = xmlParser.parse(finalPlayerNvs);
|
|
11
|
-
const synchros = xml?.player?.synchro;
|
|
12
|
-
if (!synchros)
|
|
13
|
-
return null;
|
|
14
|
-
const synchsArray = Array.isArray(synchros) ? synchros : [synchros];
|
|
15
|
-
const match = synchsArray.find((s) => String(s["@_id"]) === String(chapterId));
|
|
16
|
-
if (!match)
|
|
17
|
-
return null;
|
|
18
|
-
const rawTimecode = match["@_timecode"];
|
|
19
|
-
if (rawTimecode == null)
|
|
20
|
-
return null;
|
|
21
|
-
const ms = Number(rawTimecode);
|
|
22
|
-
if (Number.isNaN(ms))
|
|
23
|
-
return null;
|
|
24
|
-
return Math.floor(ms / 1000);
|
|
25
|
-
}
|
|
26
|
-
function toArray(v) {
|
|
27
|
-
if (!v)
|
|
28
|
-
return [];
|
|
29
|
-
return Array.isArray(v) ? v : [v];
|
|
30
|
-
}
|
|
31
|
-
export function getLevel1Chapters(dataNvs) {
|
|
32
|
-
const xml = xmlParser.parse(dataNvs);
|
|
33
|
-
const root = xml?.data?.chapters?.chapter ?? xml?.chapters?.chapter;
|
|
34
|
-
const roots = toArray(root);
|
|
35
|
-
return roots
|
|
36
|
-
.map((ch, i) => {
|
|
37
|
-
const id = ch?.id ?? ch?.["@_id"];
|
|
38
|
-
const labelRaw = ch?.label ?? ch?.["@_label"] ?? "";
|
|
39
|
-
return {
|
|
40
|
-
id: String(id),
|
|
41
|
-
label: decodeHtmlEntities(String(labelRaw)).trim(),
|
|
42
|
-
index: i,
|
|
43
|
-
};
|
|
44
|
-
})
|
|
45
|
-
.filter((c) => c.id && c.label);
|
|
46
|
-
}
|
|
47
|
-
export function pickBestLevel1ChapterForAgenda(chapters, agendaTitle) {
|
|
48
|
-
const q = normalize(agendaTitle);
|
|
49
|
-
let best = null;
|
|
50
|
-
for (const ch of chapters) {
|
|
51
|
-
const s = dice(q, ch.label);
|
|
52
|
-
if (!best || s > best.score)
|
|
53
|
-
best = { chapter: ch, score: s };
|
|
54
|
-
}
|
|
55
|
-
if (!best || best.score < CHAPTER_MATCH_THRESHOLD)
|
|
56
|
-
return { chapter: chapters[0], score: 0 };
|
|
57
|
-
return best;
|
|
58
|
-
}
|
|
59
|
-
export function getAgendaSegmentTimecodes(dataNvs, finalPlayerNvs, agendaTitleOrObjet) {
|
|
60
|
-
const l1 = getLevel1Chapters(dataNvs);
|
|
61
|
-
if (!l1.length)
|
|
62
|
-
return null;
|
|
63
|
-
const best = pickBestLevel1ChapterForAgenda(l1, agendaTitleOrObjet);
|
|
64
|
-
if (!best)
|
|
65
|
-
return null;
|
|
66
|
-
const chapter = best.chapter;
|
|
67
|
-
const next = l1[chapter.index + 1] ?? null;
|
|
68
|
-
const start = getTimecodeForChapterId(finalPlayerNvs, chapter.id);
|
|
69
|
-
if (start == null)
|
|
70
|
-
return null;
|
|
71
|
-
const end = next ? getTimecodeForChapterId(finalPlayerNvs, next.id) : null;
|
|
72
|
-
return {
|
|
73
|
-
start,
|
|
74
|
-
end,
|
|
75
|
-
chapterId: chapter.id,
|
|
76
|
-
nextChapterId: next?.id ?? null,
|
|
77
|
-
score: best.score,
|
|
78
|
-
};
|
|
79
|
-
}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
// BASED ON TESTS RESULTS
|
|
2
|
-
// these weights yield good results in the benchmark test suite
|
|
3
|
-
// aiming at 0 WRONG matches while maximizing HITs
|
|
4
|
-
export const weights = {
|
|
5
|
-
wTitle: 0.7,
|
|
6
|
-
wOrg: 0.1,
|
|
7
|
-
wSalle: 0,
|
|
8
|
-
wTime: 0.2,
|
|
9
|
-
sameOrgBonus: 0.2,
|
|
10
|
-
minAccept: 0.5,
|
|
11
|
-
margin: 0.1,
|
|
12
|
-
titleMin: 0.2,
|
|
13
|
-
titleDominance: 0,
|
|
14
|
-
orgUncertainPenalty: 0.8,
|
|
15
|
-
};
|