@tricoteuses/senat 2.11.5 → 2.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/model/ameli.js +0 -2
- package/lib/model/compte_rendu.d.ts +9 -0
- package/lib/model/compte_rendu.js +325 -0
- package/lib/raw_types/ameli.d.ts +0 -21
- package/lib/raw_types/db.d.ts +11389 -0
- package/lib/raw_types/db.js +5 -0
- package/lib/raw_types/senat.d.ts +0 -21
- package/lib/raw_types_schemats/ameli.d.ts +1 -13
- package/lib/raw_types_schemats/debats.d.ts +1 -1
- package/lib/raw_types_schemats/dosleg.d.ts +1 -1
- package/lib/raw_types_schemats/questions.d.ts +1 -1
- package/lib/raw_types_schemats/sens.d.ts +1 -1
- package/lib/scripts/retrieve_comptes_rendus.d.ts +6 -0
- package/lib/scripts/retrieve_comptes_rendus.js +274 -0
- package/package.json +1 -1
package/lib/raw_types/senat.d.ts
CHANGED
|
@@ -20,10 +20,6 @@ export interface AmeliAmd {
|
|
|
20
20
|
* Identifiant de l'amendement pere pour les sous-amendements
|
|
21
21
|
*/
|
|
22
22
|
amdperid: number | null;
|
|
23
|
-
/**
|
|
24
|
-
* Identifiant de l'amendement auquel celui-ci a ?t? rendu similaire
|
|
25
|
-
*/
|
|
26
|
-
amdrendusim: number | null;
|
|
27
23
|
/**
|
|
28
24
|
* Indication de la mention -Et plusieurs de ses collegues-
|
|
29
25
|
*/
|
|
@@ -40,10 +36,6 @@ export interface AmeliAmd {
|
|
|
40
36
|
* Indication de la mendion -Et plusieurs de ses collegues- (uniquement pour les amendements de commission)
|
|
41
37
|
*/
|
|
42
38
|
colleg: Generated<string>;
|
|
43
|
-
/**
|
|
44
|
-
* Commentaire sur les probl?mes rencontr?s lors du traitement de l'amendement
|
|
45
|
-
*/
|
|
46
|
-
commentprobleme: string | null;
|
|
47
39
|
/**
|
|
48
40
|
* Date de depot de l'amendement
|
|
49
41
|
*/
|
|
@@ -60,10 +52,6 @@ export interface AmeliAmd {
|
|
|
60
52
|
* Identifiant de l'etat de l'amendement
|
|
61
53
|
*/
|
|
62
54
|
etaid: number;
|
|
63
|
-
/**
|
|
64
|
-
* Identifiant de l'?tat de traitement d'un amendement (lien fait ? partir d'un enum dans le back)
|
|
65
|
-
*/
|
|
66
|
-
etatraitid: Generated<number>;
|
|
67
55
|
/**
|
|
68
56
|
* Identifiant
|
|
69
57
|
*/
|
|
@@ -84,7 +72,6 @@ export interface AmeliAmd {
|
|
|
84
72
|
* Identit? de l'entit? qui a saisi l'irrecevabilit?
|
|
85
73
|
*/
|
|
86
74
|
irrsaisiepar: number | null;
|
|
87
|
-
islu: Generated<string | null>;
|
|
88
75
|
/**
|
|
89
76
|
* Libelle complementaire (type d'appartenance au groupe)
|
|
90
77
|
*/
|
|
@@ -153,14 +140,6 @@ export interface AmeliAmd {
|
|
|
153
140
|
* Identification des amendements portant sur article additionnel (si different de 0)
|
|
154
141
|
*/
|
|
155
142
|
subpos: Generated<Int8 | null>;
|
|
156
|
-
/**
|
|
157
|
-
* Date de la derni?re modification de l'amendement
|
|
158
|
-
*/
|
|
159
|
-
traitementdate: Timestamp | null;
|
|
160
|
-
/**
|
|
161
|
-
* Identifiant de la derni?re entit? ? avoir modifi? l'amendement
|
|
162
|
-
*/
|
|
163
|
-
traitemententid: number | null;
|
|
164
143
|
/**
|
|
165
144
|
* Identifiant du texte amende
|
|
166
145
|
*/
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* AUTO-GENERATED FILE - DO NOT EDIT!
|
|
3
3
|
*
|
|
4
|
-
* This file was automatically generated by schemats v.2.
|
|
4
|
+
* This file was automatically generated by schemats v.2.12.0
|
|
5
5
|
* $ schemats generate -c postgres://username:password@localhost:5433/senat -t amd -t amdsen -t avicom -t avigvt -t cab -t com_ameli -t ent -t etatxt -t fbu -t grppol_ameli -t gvt -t intora -t irr -t lec_ameli -t mot -t nat -t orarol -t sai -t saisen -t sea -t sen_ameli -t ses -t sor -t sub -t txt_ameli -t typrect -t typses -t typsub -t w_nivrec -s ameli
|
|
6
6
|
*
|
|
7
7
|
*/
|
|
@@ -9,23 +9,19 @@ export declare namespace amdFields {
|
|
|
9
9
|
type accgou = string | null;
|
|
10
10
|
type alinea = number | null;
|
|
11
11
|
type amdperid = number | null;
|
|
12
|
-
type amdrendusim = number | null;
|
|
13
12
|
type autext = string;
|
|
14
13
|
type avcid = string | null;
|
|
15
14
|
type avgid = string | null;
|
|
16
15
|
type colleg = string;
|
|
17
|
-
type commentprobleme = string | null;
|
|
18
16
|
type datdep = Date | null;
|
|
19
17
|
type dis = string | null;
|
|
20
18
|
type discomid = number | null;
|
|
21
19
|
type etaid = number;
|
|
22
|
-
type etatraitid = number;
|
|
23
20
|
type id = number;
|
|
24
21
|
type ideid = number | null;
|
|
25
22
|
type irrid = number | null;
|
|
26
23
|
type irrlo1113valid = string | null;
|
|
27
24
|
type irrsaisiepar = number | null;
|
|
28
|
-
type islu = string | null;
|
|
29
25
|
type libgrp = string | null;
|
|
30
26
|
type mot = string | null;
|
|
31
27
|
type motid = number | null;
|
|
@@ -43,8 +39,6 @@ export declare namespace amdFields {
|
|
|
43
39
|
type subid = number | null;
|
|
44
40
|
type subidder = number | null;
|
|
45
41
|
type subpos = number | null;
|
|
46
|
-
type traitementdate = Date | null;
|
|
47
|
-
type traitemententid = number | null;
|
|
48
42
|
type txtid = number;
|
|
49
43
|
type typ = string;
|
|
50
44
|
type typrectid = number | null;
|
|
@@ -53,23 +47,19 @@ export interface amd {
|
|
|
53
47
|
accgou: amdFields.accgou;
|
|
54
48
|
alinea: amdFields.alinea;
|
|
55
49
|
amdperid: amdFields.amdperid;
|
|
56
|
-
amdrendusim: amdFields.amdrendusim;
|
|
57
50
|
autext: amdFields.autext;
|
|
58
51
|
avcid: amdFields.avcid;
|
|
59
52
|
avgid: amdFields.avgid;
|
|
60
53
|
colleg: amdFields.colleg;
|
|
61
|
-
commentprobleme: amdFields.commentprobleme;
|
|
62
54
|
datdep: amdFields.datdep;
|
|
63
55
|
dis: amdFields.dis;
|
|
64
56
|
discomid: amdFields.discomid;
|
|
65
57
|
etaid: amdFields.etaid;
|
|
66
|
-
etatraitid: amdFields.etatraitid;
|
|
67
58
|
id: amdFields.id;
|
|
68
59
|
ideid: amdFields.ideid;
|
|
69
60
|
irrid: amdFields.irrid;
|
|
70
61
|
irrlo1113valid: amdFields.irrlo1113valid;
|
|
71
62
|
irrsaisiepar: amdFields.irrsaisiepar;
|
|
72
|
-
islu: amdFields.islu;
|
|
73
63
|
libgrp: amdFields.libgrp;
|
|
74
64
|
mot: amdFields.mot;
|
|
75
65
|
motid: amdFields.motid;
|
|
@@ -87,8 +77,6 @@ export interface amd {
|
|
|
87
77
|
subid: amdFields.subid;
|
|
88
78
|
subidder: amdFields.subidder;
|
|
89
79
|
subpos: amdFields.subpos;
|
|
90
|
-
traitementdate: amdFields.traitementdate;
|
|
91
|
-
traitemententid: amdFields.traitemententid;
|
|
92
80
|
txtid: amdFields.txtid;
|
|
93
81
|
typ: amdFields.typ;
|
|
94
82
|
typrectid: amdFields.typrectid;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* AUTO-GENERATED FILE - DO NOT EDIT!
|
|
3
3
|
*
|
|
4
|
-
* This file was automatically generated by schemats v.2.
|
|
4
|
+
* This file was automatically generated by schemats v.2.12.0
|
|
5
5
|
* $ schemats generate -c postgres://username:password@localhost:5433/senat -t debats -t intdivers -t intpjl -t lecassdeb -t secdis -t secdivers -t syndeb -t typsec -s debats
|
|
6
6
|
*
|
|
7
7
|
*/
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* AUTO-GENERATED FILE - DO NOT EDIT!
|
|
3
3
|
*
|
|
4
|
-
* This file was automatically generated by schemats v.2.
|
|
4
|
+
* This file was automatically generated by schemats v.2.12.0
|
|
5
5
|
* $ schemats generate -c postgres://username:password@localhost:5433/senat -t amescr -t ass -t aud -t auteur -t ble -t catrap -t corscr -t date_seance -t deccoc -t denrap -t doc -t docatt -t docsea -t ecr -t etaloi -t evtsea -t forpub -t gen -t lecass -t lecassrap -t lecture -t lnkrap -t loi -t loithe -t natloi -t org -t orgnomhis -t orippr -t oritxt -t posvot -t qua -t rap -t raporg -t rapthe -t rolsig -t scr -t ses -t stavot -t texte -t texte_ancien -t the -t titsen -t typatt -t typaut -t typdoc -t typevtsea -t typlec -t typloi -t typorg -t typrap -t typtxt -t typurl -t votsen -s dosleg
|
|
6
6
|
*
|
|
7
7
|
*/
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* AUTO-GENERATED FILE - DO NOT EDIT!
|
|
3
3
|
*
|
|
4
|
-
* This file was automatically generated by schemats v.2.
|
|
4
|
+
* This file was automatically generated by schemats v.2.12.0
|
|
5
5
|
* $ schemats generate -c postgres://username:password@localhost:5433/senat -t etatquestion -t legquestion -t naturequestion -t sortquestion -t tam_ministeres -t tam_questions -t tam_reponses -t the -s questions
|
|
6
6
|
*
|
|
7
7
|
*/
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* AUTO-GENERATED FILE - DO NOT EDIT!
|
|
3
3
|
*
|
|
4
|
-
* This file was automatically generated by schemats v.2.
|
|
4
|
+
* This file was automatically generated by schemats v.2.12.0
|
|
5
5
|
* $ schemats generate -c postgres://username:password@localhost:5433/senat -t acr -t activite -t activite_audit -t activite_delegation -t activite_delegation_audit -t activite_loi -t activite_loi_audit -t activite_obligatoire -t activite_participant -t activite_participant_audit -t activite_senateur -t activite_senateur_audit -t activite_senateur_params -t activite_senateur_params_audit -t activites_liees -t activites_liees_audit -t actpro -t adhgrpsen -t adr -t adresse -t adrsen -t app -t assparint -t asster -t autgrpsen -t autorisation_profil -t autorisations -t avis_nomination_art13 -t basdes -t bur -t bur3r -t bur4r -t cad -t candid -t candidat -t candtodelete -t categorie_activite -t catpro -t catpro2e -t catterrit -t cible_categorie_periode -t cirdep -t com -t con -t cotgip -t csp -t cspfam -t databasechangelog -t databasechangeloglock -t delega -t derogation -t derogation_audit -t derogation_senateur -t derogation_senateur_audit -t design -t designoep -t designorg -t discou -t div -t dpt -t dpt_seuil_presence -t dptele -t dptele_files -t dptele_processing -t dptele_processing_type -t dpttypman -t droits_acces -t droits_acces_audit -t droits_type_derogation -t ele -t eleloc -t elucan -t eludep -t eludiv -t elueur -t elueur_apf -t elumet -t elureg -t elusen -t elusen2e -t elusen3r -t elusen4r -t elusencommu -t elusenpair -t eluter -t elutit -t eluvil -t etadebman -t etadebman3r -t etadebman4r -t etafinman -t etafinman3r -t etafinman4r -t etaprr -t etarpm -t etasen -t ext2e_bio -t ext2e_csp -t ext2e_mandats -t ext2e_minist -t extsencom_identite -t extsencom_mandat -t fonact_participant -t foncandid -t foncom -t fondelega -t fongrppol -t fongrpsen -t fonmemcom -t fonmemdelega -t fonmemextpar -t fonmemgrppol -t fonmemgrpsen -t fonmemorg -t fonorg -t grppol -t grppol4r -t grpsenami -t grpsenamiadh -t grpsenamiadhreq -t grpsenamiadhreqeta -t grpsenamiunadh -t grpsim -t gvt -t insee_pays2008 -t jhi_authority -t jhi_user -t jhi_user_authority -t lanetr -t libcom -t libdelega -t libgrppol -t libgrpsen -t liborg -t lisdptele -t mel -t memcom -t memcomsea -t memdelega -t memextpar -t memgrppol -t memgrpsen -t memorg -t met -t minind -t minist -t mis -t misetafin -t mismin -t misrapeta -t missen -t moddes -t mode_acces_elusenpair -t nation -t nationgrpsen -t nivlan -t org -t orgext -t orgextpres -t orgthe -t pairie_elusenpair -t parpol -t parpolglo -t participa -t pcs -t pcs24 -t pcs42 -t pcs8 -t pcscatpro -t per -t per_sen -t perapp -t periode_presence -t perpolglo -t perrol -t pj_justificatif -t pj_justificatif_audit -t plaind -t plan_table -t plsql_profiler_runs -t plsql_profiler_units -t poicon -t posvot -t presences_scrutin_surcharge -t presencesrevisionentity -t profil_applicatif -t qua -t rap_the -t reg -t reladr -t requetes_profil -t reslis -t resultat -t reu -t revchanges -t rne_mandat -t rne_mandat_diff -t rne_sen -t rne_sen_diff -t rne_type_mandat -t rol -t sal -t scr -t scrusoldelega -t sea -t sec -t sec2e -t secexe -t sen -t senbur -t senbur3r -t senbur4r -t sennom -t senpj -t sensim -t sentablenom -t senurl -t seuil_presence -t sirpas_elusen -t sirpas_fonmemcom -t sirpas_fonmemdelega -t sirpas_fonmemgrppol -t sirpas_memcom -t sirpas_memdelega -t sirpas_memgrppol -t sirpas_mvt -t sirpas_mvtcm -t sirpas_mvttri -t sirpas_sen -t sirpas_senbur -t sirpas_trf -t srv -t stajur -t stavot -t suspensiontravaux -t suspensiontravaux_audit -t sysage -t syscognos -t sysevt -t sysvar -t sysvar_sendev -t sysvar_senprod -t tapsenrevchanges -t tapsenrevisionentity -t telephone -t temval -t tenpol -t territ -t testoracle -t titele -t titelerne -t titmin -t titnob -t tmpsd -t toutes -t turelu -t typadr -t typapppol -t typbister -t typcandid -t type_activite -t type_activite_participant -t type_activite_rol -t type_activite_senateur -t type_categorie -t type_derogation -t type_droit_acces -t type_pj_justificatif -t type_rne_diff -t type_type_derogation -t typele -t typgrpsen -t typman -t typmin -t typmoddes -t typorg -t typorgext -t typparpol -t typpoicon -t typprs -t typprssta -t typscr -t typtel -t typurl -t typvoi -t uploaded_file -t uploaded_file_type -t validation -t validation_defview_profil -t validation_profil -t vercand -t verres -t votes -t zongeo -s sens
|
|
6
6
|
*
|
|
7
7
|
*/
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Needs to be run after retrieve_agenda.ts !
|
|
3
|
+
* - downloads the ZIP of comptes-rendus des débats (CRI) from data.senat.fr
|
|
4
|
+
* - extracts XML files, distributes them by session/year
|
|
5
|
+
*/
|
|
6
|
+
export declare function retrieveCriXmlDump(dataDir: string, options?: Record<string, any>): Promise<void>;
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Needs to be run after retrieve_agenda.ts !
|
|
3
|
+
* - downloads the ZIP of comptes-rendus des débats (CRI) from data.senat.fr
|
|
4
|
+
* - extracts XML files, distributes them by session/year
|
|
5
|
+
*/
|
|
6
|
+
import assert from "assert";
|
|
7
|
+
import commandLineArgs from "command-line-args";
|
|
8
|
+
import fs from "fs-extra";
|
|
9
|
+
import path from "path";
|
|
10
|
+
import StreamZip from "node-stream-zip";
|
|
11
|
+
import * as cheerio from "cheerio";
|
|
12
|
+
import { AGENDA_FOLDER, COMPTES_RENDUS_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, } from "../loaders";
|
|
13
|
+
import { commonOptions } from "./shared/cli_helpers";
|
|
14
|
+
import { deriveTitreObjetFromSommaire, parseCompteRenduSlotFromFile, parseYYYYMMDD, sessionStartYearFromDate } from "../model/compte_rendu";
|
|
15
|
+
import { makeGroupUid } from "../utils/reunion_grouping";
|
|
16
|
+
import { getSessionsFromStart } from "../types/sessions";
|
|
17
|
+
import { ensureAndClearDir, fetchWithRetry } from "./shared/util";
|
|
18
|
+
import { computeIntervalsBySlot } from "../utils/cr_spliting";
|
|
19
|
+
const optionsDefinitions = [
|
|
20
|
+
...commonOptions,
|
|
21
|
+
{
|
|
22
|
+
help: "parse and convert comptes-rendus des débats into JSON",
|
|
23
|
+
name: "parseDebats",
|
|
24
|
+
type: Boolean,
|
|
25
|
+
}
|
|
26
|
+
];
|
|
27
|
+
const options = commandLineArgs(optionsDefinitions);
|
|
28
|
+
const CRI_ZIP_URL = "https://data.senat.fr/data/debats/cri.zip";
|
|
29
|
+
const SLOT_ORDER = ["MATIN", "APRES-MIDI", "SOIR"];
|
|
30
|
+
class CompteRenduError extends Error {
|
|
31
|
+
constructor(message, url) {
|
|
32
|
+
super(`An error occurred while retrieving ${url}: ${message}`);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
function pickFirstSlotOfDay(slots) {
|
|
36
|
+
for (const s of SLOT_ORDER)
|
|
37
|
+
if (slots.includes(s))
|
|
38
|
+
return s;
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
function loadAgendaSPSlotsForDate(dataDir, yyyymmdd, session) {
|
|
42
|
+
const dirPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, session.toString());
|
|
43
|
+
if (!fs.existsSync(dirPath)) {
|
|
44
|
+
console.warn(`[AGENDA] Directory not found for session ${session} → ${dirPath}`);
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
const pattern = new RegExp(`^RUSN${yyyymmdd}IDS-(MATIN|APRES-MIDI|SOIR)\\.json$`);
|
|
48
|
+
const ALLOWED_SLOTS = new Set(["MATIN", "APRES-MIDI", "SOIR"]);
|
|
49
|
+
try {
|
|
50
|
+
const files = fs.readdirSync(dirPath);
|
|
51
|
+
const matched = files.filter((f) => pattern.test(f));
|
|
52
|
+
if (matched.length === 0) {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
const found = new Set();
|
|
56
|
+
for (const name of matched) {
|
|
57
|
+
const m = name.match(pattern);
|
|
58
|
+
const raw = (m?.[1] ?? "");
|
|
59
|
+
if (ALLOWED_SLOTS.has(raw))
|
|
60
|
+
found.add(raw);
|
|
61
|
+
}
|
|
62
|
+
const slots = Array.from(found);
|
|
63
|
+
if (slots.length === 0) {
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
return { filePath: dirPath, slots };
|
|
67
|
+
}
|
|
68
|
+
catch {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
async function downloadCriZip(zipPath) {
|
|
73
|
+
if (!options["silent"])
|
|
74
|
+
console.log(`Downloading CRI zip ${CRI_ZIP_URL}…`);
|
|
75
|
+
const response = await fetchWithRetry(CRI_ZIP_URL);
|
|
76
|
+
if (!response.ok) {
|
|
77
|
+
if (response.status === 404) {
|
|
78
|
+
console.warn(`CRI zip ${CRI_ZIP_URL} not found`);
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
throw new CompteRenduError(String(response.status), CRI_ZIP_URL);
|
|
82
|
+
}
|
|
83
|
+
const buf = Buffer.from(await response.arrayBuffer());
|
|
84
|
+
await fs.writeFile(zipPath, buf);
|
|
85
|
+
if (!options["silent"]) {
|
|
86
|
+
const mb = (buf.length / (1024 * 1024)).toFixed(1);
|
|
87
|
+
console.log(`[CRI] Downloaded ${mb} MB → ${zipPath}`);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
async function extractAndDistributeXmlBySession(zipPath, originalRoot) {
|
|
91
|
+
const zip = new StreamZip.async({ file: zipPath });
|
|
92
|
+
const entries = await zip.entries();
|
|
93
|
+
let count = 0;
|
|
94
|
+
for (const entryName of Object.keys(entries)) {
|
|
95
|
+
if (!entryName.toLowerCase().endsWith(".xml"))
|
|
96
|
+
continue;
|
|
97
|
+
// ex: d20231005.xml
|
|
98
|
+
const base = path.basename(entryName);
|
|
99
|
+
const m = base.match(/^d(\d{8})\.xml$/i);
|
|
100
|
+
if (!m)
|
|
101
|
+
continue;
|
|
102
|
+
const yyyymmdd = m[1];
|
|
103
|
+
const dt = parseYYYYMMDD(yyyymmdd);
|
|
104
|
+
if (!dt)
|
|
105
|
+
continue;
|
|
106
|
+
const session = sessionStartYearFromDate(dt);
|
|
107
|
+
const destDir = path.join(originalRoot, String(session));
|
|
108
|
+
await fs.ensureDir(destDir);
|
|
109
|
+
const outPath = path.join(destDir, base);
|
|
110
|
+
await zip.extract(entryName, outPath);
|
|
111
|
+
count++;
|
|
112
|
+
}
|
|
113
|
+
await zip.close();
|
|
114
|
+
return count;
|
|
115
|
+
}
|
|
116
|
+
export async function retrieveCriXmlDump(dataDir, options = {}) {
|
|
117
|
+
const root = path.join(dataDir, COMPTES_RENDUS_FOLDER);
|
|
118
|
+
ensureAndClearDir(root);
|
|
119
|
+
const originalRoot = path.join(root, DATA_ORIGINAL_FOLDER);
|
|
120
|
+
fs.ensureDirSync(originalRoot);
|
|
121
|
+
const transformedRoot = path.join(root, DATA_TRANSFORMED_FOLDER);
|
|
122
|
+
if (options["parseDebats"])
|
|
123
|
+
fs.ensureDirSync(transformedRoot);
|
|
124
|
+
const sessions = getSessionsFromStart(options["fromSession"]);
|
|
125
|
+
// 1) Download ZIP global + distribut by session
|
|
126
|
+
const zipPath = path.join(dataDir, "cri.zip");
|
|
127
|
+
console.log("[CRI] Downloading global CRI zip…");
|
|
128
|
+
await downloadCriZip(zipPath);
|
|
129
|
+
console.log("[CRI] Extracting + distributing XMLs by session…");
|
|
130
|
+
for (const session of sessions) {
|
|
131
|
+
const dir = path.join(originalRoot, String(session));
|
|
132
|
+
if (await fs.pathExists(dir)) {
|
|
133
|
+
for (const f of await fs.readdir(dir))
|
|
134
|
+
if (/\.xml$/i.test(f))
|
|
135
|
+
await fs.remove(path.join(dir, f));
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
const n = await extractAndDistributeXmlBySession(zipPath, originalRoot);
|
|
139
|
+
if (n === 0) {
|
|
140
|
+
console.warn("[CRI] No XML extracted. Archive empty or layout changed?");
|
|
141
|
+
}
|
|
142
|
+
else {
|
|
143
|
+
console.log(`[CRI] Distributed ${n} XML file(s) into session folders.`);
|
|
144
|
+
}
|
|
145
|
+
if (!options["parseDebats"]) {
|
|
146
|
+
console.log("[CRI] parseDebats not requested → done.");
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
for (const session of sessions) {
|
|
150
|
+
const originalSessionDir = path.join(originalRoot, String(session));
|
|
151
|
+
if (!(await fs.pathExists(originalSessionDir))) {
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
const xmlFiles = (await fs.readdir(originalSessionDir))
|
|
155
|
+
.filter((f) => /^d\d{8}\.xml$/i.test(f))
|
|
156
|
+
.sort();
|
|
157
|
+
const transformedSessionDir = path.join(transformedRoot, String(session));
|
|
158
|
+
if (options["parseDebats"])
|
|
159
|
+
await fs.ensureDir(transformedSessionDir);
|
|
160
|
+
for (const f of xmlFiles) {
|
|
161
|
+
const yyyymmdd = f.slice(1, 9);
|
|
162
|
+
const xmlPath = path.join(originalSessionDir, f);
|
|
163
|
+
// 1) Deduce slot(s) from agenda if it exsits
|
|
164
|
+
const agendaInfo = loadAgendaSPSlotsForDate(dataDir, yyyymmdd, session);
|
|
165
|
+
const firstSlotOfDay = pickFirstSlotOfDay(agendaInfo?.slots ?? []);
|
|
166
|
+
// 2) Detect slots from CRI content
|
|
167
|
+
let slotsInCri = [];
|
|
168
|
+
try {
|
|
169
|
+
const raw = await fs.readFile(xmlPath, "utf8");
|
|
170
|
+
const $ = cheerio.load(raw, { xml: false });
|
|
171
|
+
const order = $("body *").toArray();
|
|
172
|
+
const idx = new Map(order.map((el, i) => [el, i]));
|
|
173
|
+
const intervals = computeIntervalsBySlot($, idx, firstSlotOfDay ?? undefined);
|
|
174
|
+
const uniq = new Set();
|
|
175
|
+
for (const iv of intervals)
|
|
176
|
+
if (iv.slot && iv.slot !== "UNKNOWN")
|
|
177
|
+
uniq.add(iv.slot);
|
|
178
|
+
slotsInCri = Array.from(uniq);
|
|
179
|
+
}
|
|
180
|
+
catch (e) {
|
|
181
|
+
console.warn(`[CRI] [${session}] Cannot read/parse ${f}:`, e);
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
if (slotsInCri.length === 0) {
|
|
185
|
+
slotsInCri = [firstSlotOfDay ?? "MATIN"];
|
|
186
|
+
}
|
|
187
|
+
// 3) Parse & write each slot
|
|
188
|
+
for (const slot of slotsInCri) {
|
|
189
|
+
const outName = `CRSSN${yyyymmdd}-${slot}.json`;
|
|
190
|
+
const cr = await parseCompteRenduSlotFromFile(xmlPath, slot, firstSlotOfDay ?? slot);
|
|
191
|
+
if (!cr) {
|
|
192
|
+
console.warn(`[CRI] [${session}] Empty or no points for ${yyyymmdd} (${slot}) → skip`);
|
|
193
|
+
continue;
|
|
194
|
+
}
|
|
195
|
+
const outDir = transformedSessionDir;
|
|
196
|
+
await fs.ensureDir(outDir);
|
|
197
|
+
const outPath = path.join(outDir, outName);
|
|
198
|
+
await fs.writeJSON(outPath, cr, { spaces: 2 });
|
|
199
|
+
try {
|
|
200
|
+
await linkCriSlotIntoAgendaGrouped(dataDir, yyyymmdd, slot, cr.uid, cr, session);
|
|
201
|
+
}
|
|
202
|
+
catch (e) {
|
|
203
|
+
console.warn(`[AGENDA] [${session}] Could not link CR into grouped for ${yyyymmdd} ${slot}:`, e);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
async function main() {
|
|
210
|
+
const dataDir = options["dataDir"];
|
|
211
|
+
assert(dataDir, "Missing argument: data directory");
|
|
212
|
+
console.time("CRI processing time");
|
|
213
|
+
await retrieveCriXmlDump(dataDir, options);
|
|
214
|
+
if (!options["silent"]) {
|
|
215
|
+
console.timeEnd("CRI processing time");
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
main()
|
|
219
|
+
.then(() => process.exit(0))
|
|
220
|
+
.catch((error) => {
|
|
221
|
+
console.error(error);
|
|
222
|
+
process.exit(1);
|
|
223
|
+
});
|
|
224
|
+
async function linkCriSlotIntoAgendaGrouped(dataDir, yyyymmdd, slot, crUid, cr, session) {
|
|
225
|
+
const groupedDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, session.toString());
|
|
226
|
+
fs.ensureDirSync(groupedDir);
|
|
227
|
+
const groupedPath = path.join(groupedDir, 'RUSN' + yyyymmdd + 'IDS-' + slot + '.json');
|
|
228
|
+
let groups = [];
|
|
229
|
+
if (fs.existsSync(groupedPath)) {
|
|
230
|
+
try {
|
|
231
|
+
groups = JSON.parse(fs.readFileSync(groupedPath, "utf8"));
|
|
232
|
+
if (!Array.isArray(groups))
|
|
233
|
+
groups = [];
|
|
234
|
+
}
|
|
235
|
+
catch (e) {
|
|
236
|
+
console.warn(`[AGENDA] unreadable grouped JSON → ${groupedPath} (${e}) → recreating`);
|
|
237
|
+
groups = [];
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
// find existing group with same slot
|
|
241
|
+
const sameSlot = groups.filter(g => g?.slot === slot);
|
|
242
|
+
let target = null;
|
|
243
|
+
if (sameSlot.length > 1) {
|
|
244
|
+
console.warn(`[AGENDA] multiple groups for ${yyyymmdd} ${slot} in ${groupedPath} → linking the first`);
|
|
245
|
+
}
|
|
246
|
+
target = sameSlot[0] ?? null;
|
|
247
|
+
const dateISO = `${yyyymmdd.slice(0, 4)}-${yyyymmdd.slice(4, 6)}-${yyyymmdd.slice(6, 8)}`;
|
|
248
|
+
const sommaire = cr?.metadonnees?.sommaire;
|
|
249
|
+
const { titre: dTitre, objet: dObjet } = deriveTitreObjetFromSommaire(sommaire, slot);
|
|
250
|
+
if (!target) {
|
|
251
|
+
const newGroup = {
|
|
252
|
+
uid: makeGroupUid(dateISO, slot),
|
|
253
|
+
chambre: "SN",
|
|
254
|
+
date: dateISO,
|
|
255
|
+
slot,
|
|
256
|
+
type: "Séance publique",
|
|
257
|
+
startTime: null,
|
|
258
|
+
endTime: null,
|
|
259
|
+
captationVideo: false,
|
|
260
|
+
titre: dTitre,
|
|
261
|
+
objet: dObjet || "",
|
|
262
|
+
events: [],
|
|
263
|
+
compteRenduRefUid: crUid,
|
|
264
|
+
};
|
|
265
|
+
groups.push(newGroup);
|
|
266
|
+
}
|
|
267
|
+
else {
|
|
268
|
+
target.compteRenduRefUid = crUid;
|
|
269
|
+
}
|
|
270
|
+
await fs.writeJSON(groupedPath, groups, { spaces: 2 });
|
|
271
|
+
if (!options["silent"]) {
|
|
272
|
+
console.log(`[AGENDA] Linked CR ${crUid} → ${path.basename(groupedPath)} [${slot}]`);
|
|
273
|
+
}
|
|
274
|
+
}
|