@tricoteuses/senat 2.11.3 → 2.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Tricoteuses-Senat
2
2
 
3
- ## _Retrieve, clean up & handle French Sénat's open data_
3
+ ## _Retrieve, clean up & handle French Sénat's open data_
4
4
 
5
5
  ## Requirements
6
6
 
@@ -42,6 +42,7 @@ npm run data:download ../senat-data -- [--categories All]
42
42
  ```
43
43
 
44
44
  Data from other sources is also available :
45
+
45
46
  ```bash
46
47
  # Retrieval of textes and rapports from Sénat's website
47
48
  # Available options for optional `formats` parameter : xml, html, pdf
@@ -57,8 +58,11 @@ npm run data:parse_textes_lois ../senat-data
57
58
  # Retrieval (& parsing) of agenda from Sénat's website
58
59
  npm run data:retrieve_agenda ../senat-data -- --fromSession 2022 [--parseAgenda]
59
60
 
60
- # Retrieval (& parsing) of comptes-rendus des débats from Sénat's website
61
- npm run data:retrieve_comptes_rendus ../senat-data -- [--parseDebats]
61
+ # Retrieval (& parsing) of comptes-rendus de séance from Sénat's data
62
+ npm run data:retrieve_cr_seance ../senat-data -- [--parseDebats]
63
+
64
+ # Retrieval (& parsing) of comptes-rendus de commissions from Sénat's website
65
+ npm run data:retrieve_cr_commission ../senat-data -- [--parseDebats]
62
66
 
63
67
  # Retrieval of sénateurs' pictures from Sénat's website
64
68
  npm run data:retrieve_senateurs_photos ../senat-data
@@ -2,4 +2,8 @@ import * as cheerio from "cheerio";
2
2
  import { CompteRendu } from "../types/compte_rendu";
3
3
  import { GroupedReunion } from "../types/agenda";
4
4
  export declare function getRemainingTextAfterSpeakerHeader($: cheerio.CheerioAPI, $p: cheerio.Cheerio<any>): string;
5
- export declare function parseCommissionCRFromFile(htmlFilePath: string, best: GroupedReunion): CompteRendu | null;
5
+ export declare function parseCommissionCRFromFile(htmlFilePath: string, best?: GroupedReunion, fallback?: {
6
+ dateISO: string;
7
+ hourShort: string | null;
8
+ organe?: string | null;
9
+ }): CompteRendu | null;
@@ -2,6 +2,8 @@ import * as cheerio from "cheerio";
2
2
  import path from "path";
3
3
  import fs from "fs";
4
4
  import { norm, toCRDate } from "./util";
5
+ import { makeTypeGroupUid } from "../utils/reunion_grouping";
6
+ import { hourShortToStartTime } from "../utils/cr_spliting";
5
7
  const PARA_h3_SEL = "p.sh_justify, p.sh_center, p.sh_marge, p[align], li, h3";
6
8
  function findDayRoot($, targetISO) {
7
9
  let $root = $();
@@ -190,41 +192,50 @@ function frDateToISO(s) {
190
192
  return;
191
193
  return `${y}-${String(mon).padStart(2, "0")}-${String(d).padStart(2, "0")}`;
192
194
  }
193
- export function parseCommissionCRFromFile(htmlFilePath, best) {
195
+ export function parseCommissionCRFromFile(htmlFilePath, best, fallback) {
194
196
  try {
197
+ if (!best && !fallback) {
198
+ console.warn(`[COM-CR][parse] missing both 'best' and 'fallback' for ${path.basename(htmlFilePath)}`);
199
+ return null;
200
+ }
195
201
  const raw = fs.readFileSync(htmlFilePath, "utf8");
196
202
  const $ = cheerio.load(raw, { xmlMode: false });
197
- const dateISO = best.date;
198
- const dateSeance = toCRDate(dateISO, best.startTime);
203
+ // --- champs déterminés depuis best OU fallback (aucun fallback via filename) ---
204
+ const dateISO = best?.date ?? fallback.dateISO;
205
+ const startTime = best?.startTime ?? hourShortToStartTime(fallback.hourShort);
206
+ const organe = best?.organe ?? fallback?.organe ?? undefined;
207
+ // UIDs alignés sur makeTypeGroupUid (RUSN…) mais CR = RUSN → CRC
208
+ const seanceRef = best?.uid ?? makeTypeGroupUid(dateISO, "COM", fallback.hourShort ?? "NA", organe);
209
+ const uid = seanceRef.replace(/^RU/, "CRC");
210
+ const dateSeance = toCRDate(dateISO, startTime);
211
+ // --- scope du jour ---
199
212
  const $dayRoot = findDayRoot($, dateISO);
200
213
  if ($dayRoot.length === 0) {
201
214
  console.warn(`[COM-CR][parse] day root not found for ${dateISO} in ${path.basename(htmlFilePath)}`);
202
215
  return null;
203
216
  }
204
- let points = [];
205
- // Take all paragraphs/h3 until next h2
217
+ // --- collecte des paragraphes/h3 jusqu’au prochain h2 ---
206
218
  const dayParas = [];
207
219
  let $cursor = $dayRoot.next();
208
220
  while ($cursor.length && !$cursor.is("h2")) {
209
- if ($cursor.is("h3")) {
221
+ if ($cursor.is("h3"))
210
222
  dayParas.push($cursor);
211
- }
212
223
  if ($cursor.is(PARA_h3_SEL)) {
213
224
  dayParas.push($cursor);
214
225
  }
215
226
  else {
216
227
  const $ps = $cursor.find(PARA_h3_SEL);
217
- if ($ps.length) {
228
+ if ($ps.length)
218
229
  $ps.each((_, p) => {
219
230
  dayParas.push($(p));
220
231
  });
221
- }
222
232
  }
223
233
  $cursor = $cursor.next();
224
234
  }
235
+ // --- points ---
225
236
  const allDayPoints = buildPointsFromParagraphs($, dayParas);
226
- if (allDayPoints.length > 0)
227
- points = allDayPoints;
237
+ const points = allDayPoints.length > 0 ? allDayPoints : [];
238
+ // --- session ---
228
239
  const session = dateISO.slice(5, 7) >= "10"
229
240
  ? `${dateISO.slice(0, 4)}-${Number(dateISO.slice(0, 4)) + 1}`
230
241
  : `${Number(dateISO.slice(0, 4)) - 1}-${dateISO.slice(0, 4)}`;
@@ -233,7 +244,7 @@ export function parseCommissionCRFromFile(htmlFilePath, best) {
233
244
  point: points,
234
245
  };
235
246
  const metadonnees = {
236
- dateSeance: dateSeance,
247
+ dateSeance,
237
248
  dateSeanceJour: dateISO,
238
249
  numSeanceJour: "",
239
250
  numSeance: "",
@@ -249,8 +260,8 @@ export function parseCommissionCRFromFile(htmlFilePath, best) {
249
260
  heureGeneration: new Date(),
250
261
  };
251
262
  return {
252
- uid: best.uid.replace(/^RUSN/, "CRC"),
253
- seanceRef: best.uid,
263
+ uid, // ex: CRC20240117IDC…-HHMM
264
+ seanceRef, // ex: RUSN20240117IDC…-HHMM
254
265
  sessionRef: session,
255
266
  metadonnees,
256
267
  contenu,
@@ -13,6 +13,8 @@ export declare function findAuteurs(): Promise<{
13
13
  prenom: any;
14
14
  matricule: any;
15
15
  }[]>;
16
+ export declare function getCodeActeLecture(codeNatureDossier: string, typeLecture: string, assemblee: string): string | null;
17
+ export declare function getCodeActeTexte(codeParent: string | null, texteOrigine: string): string | null;
16
18
  export type DossierLegislatifResult = InferResult<typeof findAllDossiersQuery>[0];
17
19
  export type AuteurResult = InferResult<typeof findAuteursQuery>[0];
18
20
  export {};
@@ -235,3 +235,63 @@ export async function findAuteurs() {
235
235
  return findAuteursQuery
236
236
  .execute();
237
237
  }
238
+ export function getCodeActeLecture(codeNatureDossier, typeLecture, assemblee) {
239
+ const codeAssemblee = assemblee === "Sénat" ? "SN" : assemblee === "Assemblée nationale" ? "AN" : null;
240
+ if (typeLecture === "Commission mixte paritaire") {
241
+ return "CMP";
242
+ }
243
+ if (!codeAssemblee) {
244
+ return null;
245
+ }
246
+ if (["ppl", "pjl", "cvn"].includes(codeNatureDossier) && typeLecture === "Première lecture") {
247
+ return `${codeAssemblee}1`;
248
+ }
249
+ if (["ppl", "pjl", "cvn"].includes(codeNatureDossier) && typeLecture === "Deuxième lecture") {
250
+ return `${codeAssemblee}2`;
251
+ }
252
+ if (["ppl", "pjl", "cvn"].includes(codeNatureDossier) && typeLecture === "Troisième lecture") {
253
+ return `${codeAssemblee}3`;
254
+ }
255
+ if (["ppl", "pjl", "cvn"].includes(codeNatureDossier) && typeLecture === "Quatrième lecture") {
256
+ return `${codeAssemblee}4`;
257
+ }
258
+ if (["ppl", "pjl", "cvn"].includes(codeNatureDossier) && typeLecture === "Nouvelle lecture") {
259
+ return `${codeAssemblee}NLEC`;
260
+ }
261
+ if (["ppl", "pjl", "cvn"].includes(codeNatureDossier) && typeLecture === "Lecture définitive") {
262
+ return `${codeAssemblee}LDEF`;
263
+ }
264
+ if (["ppr"].includes(codeNatureDossier) && typeLecture === "Première lecture") {
265
+ return `${codeAssemblee}LUNI`;
266
+ }
267
+ return null;
268
+ }
269
+ export function getCodeActeTexte(codeParent, texteOrigine) {
270
+ if (codeParent === "CMP") {
271
+ if (texteOrigine === "adopté par l'Assemblée Nationale") {
272
+ return "CMP-DEBATS-SN";
273
+ }
274
+ else if (texteOrigine === "adopté définitivement par le Sénat") {
275
+ return "PROM";
276
+ }
277
+ }
278
+ if (texteOrigine === "transmis au Sénat" || texteOrigine === "déposé au Sénat") {
279
+ return `${codeParent}-DEPOT`;
280
+ }
281
+ // Rajouter une étape similaire -COM-FOND
282
+ if (texteOrigine === "de la commission" || texteOrigine === "de la commission (AN)" || texteOrigine === "résultat des travaux de la commission") {
283
+ return `${codeParent}-DEBATS-SEANCE`;
284
+ }
285
+ if (texteOrigine === "déposé à l'Assemblée Nationale") {
286
+ return `${codeParent}-DEPOT`;
287
+ }
288
+ if (texteOrigine === "retiré par l'auteur") {
289
+ return `${codeParent}-RTRINI`;
290
+ }
291
+ if (texteOrigine === "adopté par le Sénat" || texteOrigine === "adopté par l'Assemblée Nationale" || texteOrigine === "rejeté par le Sénat") {
292
+ return `${codeParent}-DEC`;
293
+ }
294
+ // Rajouter une étape CC-SAISIE
295
+ // Rajouter une étape PROM-
296
+ return null;
297
+ }
@@ -5,6 +5,7 @@ import path from "path";
5
5
  import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
6
6
  import { DATA_ORIGINAL_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
7
7
  import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findAuteurs, findSenatRapportUrls, findSenatTexteUrls, } from "../model";
8
+ import { getCodeActeLecture, getCodeActeTexte } from "../model/dosleg";
8
9
  import { UNDEFINED_SESSION } from "../types/sessions";
9
10
  import { getSessionFromDate, getSessionFromSignet } from "./datautil";
10
11
  import { commonOptions } from "./shared/cli_helpers";
@@ -120,8 +121,28 @@ async function convertDatasetDosLeg(dataDir, options) {
120
121
  }
121
122
  loiReorganizedDir = path.join(dossiersReorganizedDir, String(session));
122
123
  fs.ensureDirSync(loiReorganizedDir);
124
+ // Ajout de code_acte à chaque lecture du dossier
125
+ const lecturesWithCodeActe = (loi["lectures"] || []).map((lecture) => {
126
+ const lecturesAssemblee = (lecture["lectures_assemblee"] || []).map((lectureAss) => {
127
+ const codeParent = getCodeActeLecture(loi["code_nature_dossier"], lecture["type_lecture"], lectureAss["assemblee"]);
128
+ const textesWithCodeActe = (lectureAss["textes"] || []).map((texte) => ({
129
+ ...texte,
130
+ code_acte: getCodeActeTexte(codeParent, texte["origine"])
131
+ }));
132
+ return {
133
+ ...lectureAss,
134
+ code_acte: codeParent,
135
+ textes: textesWithCodeActe
136
+ };
137
+ });
138
+ return {
139
+ ...lecture,
140
+ lectures_assemblee: lecturesAssemblee
141
+ };
142
+ });
143
+ const loiWithCodeActe = { ...loi, lectures: lecturesWithCodeActe };
123
144
  const scrutinFileName = `${loi["signet"]}.json`;
124
- fs.writeJSONSync(path.join(loiReorganizedDir, scrutinFileName), loi, {
145
+ fs.writeJSONSync(path.join(loiReorganizedDir, scrutinFileName), loiWithCodeActe, {
125
146
  spaces: 2,
126
147
  });
127
148
  }
@@ -96,12 +96,22 @@ async function parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPa
96
96
  return;
97
97
  const flatPath = path.join(transformedAgendaSessionDir, `${agendaFileName}.json`);
98
98
  fs.writeJSONSync(flatPath, parsedAgendaEvents, { spaces: 2 });
99
- // 1) SP → groubed by (date, slot)
99
+ // 1) SP → grouped by (date, slot)
100
100
  const spGrouped = groupSeancePubliqueBySlot(parsedAgendaEvents);
101
- if (spGrouped.length > 0) {
102
- writeGroupsAsFiles(transformedAgendaSessionDir, spGrouped);
101
+ // a) on a un Record<TimeSlot, GroupedReunion[]>, on le transforme en array
102
+ const spGroups = Object.values(spGrouped).flat();
103
+ // b) (reco) trier pour stabilité, comme pour les NON-SP
104
+ const PARIS = "Europe/Paris";
105
+ spGroups.sort((a, b) => {
106
+ const da = DateTime.fromISO(`${a.date}T${a.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
107
+ const db = DateTime.fromISO(`${b.date}T${b.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
108
+ // en cas d’égalité, ordre par slot pour stabilité
109
+ return da - db || (a.slot || "UNKNOWN").localeCompare(b.slot || "UNKNOWN");
110
+ });
111
+ if (spGroups.length > 0) {
112
+ writeGroupsAsFiles(transformedAgendaSessionDir, spGroups);
103
113
  }
104
- // 2) NON-SP → groubed by (date, organe, hour)
114
+ // 2) NON-SP → grouped by (date, organe, hour)
105
115
  const groupedBySuffix = groupNonSPByTypeOrganeHour(parsedAgendaEvents);
106
116
  for (const suffix of ["IDC", "IDM", "IDO", "IDI"]) {
107
117
  const groups = groupedBySuffix[suffix] || [];
@@ -3,7 +3,7 @@ import assert from "assert";
3
3
  import path from "path";
4
4
  import * as cheerio from "cheerio";
5
5
  import { COMMISSION_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
6
- import { createCommissionGroupIfMissing, loadCommissionAgendaForDate, parseCommissionMetadataFromHtml, } from "../utils/cr_spliting";
6
+ import { loadCommissionAgendaForDate, parseCommissionMetadataFromHtml, linkCRtoCommissionGroup, } from "../utils/cr_spliting";
7
7
  import { parseCommissionCRFromFile } from "../model/commission";
8
8
  import commandLineArgs from "command-line-args";
9
9
  import { commonOptions } from "./shared/cli_helpers";
@@ -245,29 +245,40 @@ async function retrieveCommissionCRs(options = {}) {
245
245
  deltaMin = candidates[0].d;
246
246
  }
247
247
  }
248
- if (best) {
249
- const cr = parseCommissionCRFromFile(htmlPath, best);
250
- if (!cr) {
251
- console.warn(`[COM-CR][TRANSFORM] parse failed for ${f} → ${best.uid}`);
252
- }
253
- else {
254
- const fileUid = cr.uid;
255
- const outPath = path.join(transformedSessionDir, `${fileUid}.json`);
256
- await fs.writeJSON(outPath, cr, { spaces: 2 });
257
- const npts = Array.isArray(cr.contenu.point) ? cr.contenu.point.length : cr.contenu.point ? 1 : 0;
258
- if (!options["silent"]) {
259
- console.log(`[COM-CR][TRANSFORM] saved ${path.basename(outPath)} (points=${npts})`);
260
- }
261
- }
248
+ // Parse CR (avec ou sans best)
249
+ const hourShort = toHourShort(day.openTime) ?? "NA";
250
+ const cr = parseCommissionCRFromFile(htmlPath, best ?? undefined, {
251
+ dateISO: day.date,
252
+ hourShort,
253
+ organe: meta.organeDetected ?? null,
254
+ });
255
+ if (!cr) {
256
+ console.warn(`[COM-CR][TRANSFORM] parse failed for ${f} → ${best ? best.uid : "NO-GROUP"}`);
262
257
  }
263
258
  else {
259
+ const fileUid = cr.uid;
260
+ const outPath = path.join(transformedSessionDir, `${fileUid}.json`);
261
+ await fs.writeJSON(outPath, cr, { spaces: 2 });
262
+ const npts = Array.isArray(cr.contenu.point) ? cr.contenu.point.length : cr.contenu.point ? 1 : 0;
263
+ if (!options["silent"]) {
264
+ console.log(`[COM-CR][TRANSFORM] saved ${path.basename(outPath)} (points=${npts})`);
265
+ }
264
266
  const hourShort = toHourShort(day.openTime) ?? "NA";
265
267
  const titreGuess = meta.organeDetected || meta.organeTitleRaw || "Commission";
266
- const { uid, filePath } = await createCommissionGroupIfMissing(dataDir, day.date, meta.organeDetected ?? null, hourShort, titreGuess);
268
+ // Si on a un match agenda, on force le groupUid existant (best.uid)
269
+ const up = await linkCRtoCommissionGroup({
270
+ dataDir,
271
+ session: session,
272
+ dateISO: day.date,
273
+ organeDetected: meta.organeDetected ?? null,
274
+ hourShort,
275
+ crUid: fileUid,
276
+ titreGuess,
277
+ groupUid: best ? best.uid : undefined,
278
+ });
267
279
  if (!options["silent"]) {
268
- console.log(`[COM-CR][PRE-SPLIT][${session}] ${f} | ${day.date}` +
269
- (day.openTime ? ` ${day.openTime}` : ``) +
270
- ` → NO-MATCH → CREATED uid=${uid} file=${path.basename(filePath)}`);
280
+ console.log(`[AGENDA][COM] Linked CR ${fileUid} ${path.basename(up.filePath)} ` +
281
+ `${up.created ? "[created]" : "[updated]"}`);
271
282
  }
272
283
  }
273
284
  }
@@ -9,9 +9,9 @@ import fs from "fs-extra";
9
9
  import path from "path";
10
10
  import StreamZip from "node-stream-zip";
11
11
  import * as cheerio from "cheerio";
12
- import { AGENDA_FOLDER, COMPTES_RENDUS_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, } from "../loaders";
12
+ import { AGENDA_FOLDER, COMPTES_RENDUS_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
13
13
  import { commonOptions } from "./shared/cli_helpers";
14
- import { deriveTitreObjetFromSommaire, parseCompteRenduSlotFromFile, parseYYYYMMDD, sessionStartYearFromDate } from "../model/seance";
14
+ import { deriveTitreObjetFromSommaire, parseCompteRenduSlotFromFile, parseYYYYMMDD, sessionStartYearFromDate, } from "../model/seance";
15
15
  import { makeGroupUid } from "../utils/reunion_grouping";
16
16
  import { getSessionsFromStart } from "../types/sessions";
17
17
  import { ensureAndClearDir, fetchWithRetry } from "./shared/util";
@@ -22,7 +22,7 @@ const optionsDefinitions = [
22
22
  help: "parse and convert comptes-rendus des débats into JSON",
23
23
  name: "parseDebats",
24
24
  type: Boolean,
25
- }
25
+ },
26
26
  ];
27
27
  const options = commandLineArgs(optionsDefinitions);
28
28
  const CRI_ZIP_URL = "https://data.senat.fr/data/debats/cri.zip";
@@ -151,9 +151,7 @@ export async function retrieveCriXmlDump(dataDir, options = {}) {
151
151
  if (!(await fs.pathExists(originalSessionDir))) {
152
152
  continue;
153
153
  }
154
- const xmlFiles = (await fs.readdir(originalSessionDir))
155
- .filter((f) => /^d\d{8}\.xml$/i.test(f))
156
- .sort();
154
+ const xmlFiles = (await fs.readdir(originalSessionDir)).filter((f) => /^d\d{8}\.xml$/i.test(f)).sort();
157
155
  const transformedSessionDir = path.join(transformedRoot, String(session));
158
156
  if (options["parseDebats"])
159
157
  await fs.ensureDir(transformedSessionDir);
@@ -224,31 +222,30 @@ main()
224
222
  async function linkCriSlotIntoAgendaGrouped(dataDir, yyyymmdd, slot, crUid, cr, session) {
225
223
  const groupedDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, session.toString());
226
224
  fs.ensureDirSync(groupedDir);
227
- const groupedPath = path.join(groupedDir, 'RUSN' + yyyymmdd + 'IDS-' + slot + '.json');
228
- let groups = [];
225
+ const groupedPath = path.join(groupedDir, `RUSN${yyyymmdd}IDS-${slot}.json`);
226
+ let group = null;
229
227
  if (fs.existsSync(groupedPath)) {
230
228
  try {
231
- groups = JSON.parse(fs.readFileSync(groupedPath, "utf8"));
232
- if (!Array.isArray(groups))
233
- groups = [];
229
+ const parsed = JSON.parse(fs.readFileSync(groupedPath, "utf8"));
230
+ if (Array.isArray(parsed)) {
231
+ // Take correct slot if multiple or first one if no direct match ?
232
+ group = parsed.find((g) => g?.slot === slot) ?? parsed[0] ?? null;
233
+ }
234
+ else {
235
+ group = parsed;
236
+ }
234
237
  }
235
238
  catch (e) {
236
239
  console.warn(`[AGENDA] unreadable grouped JSON → ${groupedPath} (${e}) → recreating`);
237
- groups = [];
240
+ group = null;
238
241
  }
239
242
  }
240
- // find existing group with same slot
241
- const sameSlot = groups.filter(g => g?.slot === slot);
242
- let target = null;
243
- if (sameSlot.length > 1) {
244
- console.warn(`[AGENDA] multiple groups for ${yyyymmdd} ${slot} in ${groupedPath} → linking the first`);
245
- }
246
- target = sameSlot[0] ?? null;
247
243
  const dateISO = `${yyyymmdd.slice(0, 4)}-${yyyymmdd.slice(4, 6)}-${yyyymmdd.slice(6, 8)}`;
248
244
  const sommaire = cr?.metadonnees?.sommaire;
249
245
  const { titre: dTitre, objet: dObjet } = deriveTitreObjetFromSommaire(sommaire, slot);
250
- if (!target) {
251
- const newGroup = {
246
+ // Création si manquant
247
+ if (!group) {
248
+ group = {
252
249
  uid: makeGroupUid(dateISO, slot),
253
250
  chambre: "SN",
254
251
  date: dateISO,
@@ -262,13 +259,10 @@ async function linkCriSlotIntoAgendaGrouped(dataDir, yyyymmdd, slot, crUid, cr,
262
259
  events: [],
263
260
  compteRenduRefUid: crUid,
264
261
  };
265
- groups.push(newGroup);
266
262
  }
267
263
  else {
268
- target.compteRenduRefUid = crUid;
269
- }
270
- await fs.writeJSON(groupedPath, groups, { spaces: 2 });
271
- if (!options["silent"]) {
272
- console.log(`[AGENDA] Linked CR ${crUid} → ${path.basename(groupedPath)} [${slot}]`);
264
+ group.compteRenduRefUid = crUid;
273
265
  }
266
+ await fs.writeJSON(groupedPath, group, { spaces: 2 });
267
+ console.log(`[AGENDA] Linked CR ${crUid} → ${path.basename(groupedPath)} [${slot}]`);
274
268
  }
@@ -18,11 +18,19 @@ export declare function parseCommissionMetadataFromHtml(html: string, sourceFile
18
18
  }[];
19
19
  };
20
20
  export declare function loadCommissionAgendaForDate(dataDir: string, yyyymmdd: string, session: number): Promise<GroupedReunion[]>;
21
- export declare function createCommissionGroupIfMissing(dataDir: string, dateISO: string, // "YYYY-MM-DD"
22
- organeDetected: string | null, // ex. "Commission des finances"
23
- hourShort: string | null, // "HHMM" | "NA"
24
- titreGuess?: string | null): Promise<{
21
+ export declare function hourShortToStartTime(hourShort: string | null): string | null;
22
+ export declare function linkCRtoCommissionGroup(opts: {
23
+ dataDir: string;
24
+ session: number;
25
+ dateISO: string;
26
+ organeDetected: string | null;
27
+ hourShort: string | null;
28
+ crUid: string;
29
+ titreGuess?: string | null;
30
+ groupUid?: string;
31
+ }): Promise<{
25
32
  uid: string;
26
33
  filePath: string;
27
34
  created: boolean;
35
+ updated: boolean;
28
36
  }>;
@@ -2,8 +2,6 @@ import path from "path";
2
2
  import * as cheerio from "cheerio";
3
3
  import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
4
4
  import fs from "fs-extra";
5
- import { makeTypeGroupUid } from "./reunion_grouping";
6
- import { sessionStartYearFromDate } from "../model/seance";
7
5
  export function computeIntervalsBySlot($, idx, firstSlotOfDay) {
8
6
  const all = $("body *").toArray();
9
7
  const cuts = [{ pos: 0, hhmm: undefined }];
@@ -312,7 +310,7 @@ export async function loadCommissionAgendaForDate(dataDir, yyyymmdd, session) {
312
310
  }
313
311
  return out;
314
312
  }
315
- function hourShortToStartTime(hourShort) {
313
+ export function hourShortToStartTime(hourShort) {
316
314
  if (!hourShort || hourShort === "NA")
317
315
  return null;
318
316
  if (!/^\d{4}$/.test(hourShort))
@@ -321,66 +319,52 @@ function hourShortToStartTime(hourShort) {
321
319
  const mm = hourShort.slice(2, 4);
322
320
  return `${hh}:${mm}`;
323
321
  }
324
- export async function createCommissionGroupIfMissing(dataDir, dateISO, // "YYYY-MM-DD"
325
- organeDetected, // ex. "Commission des finances"
326
- hourShort, // "HHMM" | "NA"
327
- titreGuess) {
328
- const uid = makeTypeGroupUid(dateISO, "COM", hourShort ?? "NA", organeDetected ?? undefined);
329
- const session = sessionStartYearFromDate(new Date(dateISO));
330
- const dir = path.join(dataDir, "agenda", "transformed", String(session));
331
- await fs.ensureDir(dir);
332
- const filePath = path.join(dir, `${uid}.json`);
333
- let groups = [];
322
+ export async function linkCRtoCommissionGroup(opts) {
323
+ const { dataDir, session, dateISO, organeDetected, hourShort, crUid, titreGuess, groupUid } = opts;
324
+ const computedUid = crUid.replace(/^CRC/, "RU");
325
+ const uid = groupUid ?? computedUid; // <-- on respecte l’uid existant si fourni
326
+ const groupedDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session));
327
+ await fs.ensureDir(groupedDir);
328
+ const filePath = path.join(groupedDir, `${uid}.json`);
329
+ let group = null;
334
330
  let created = false;
335
- if (await fs.pathExists(filePath)) {
336
- try {
337
- const raw = await fs.readFile(filePath, "utf8");
338
- groups = JSON.parse(raw);
339
- if (!Array.isArray(groups))
340
- groups = [];
341
- }
342
- catch {
343
- groups = [];
344
- }
345
- const exists = groups.some((g) => g?.uid === uid);
346
- if (!exists) {
347
- groups.push({
348
- uid,
349
- chambre: "SN",
350
- date: dateISO,
351
- slot: null,
352
- type: organeDetected ?? "Commission",
353
- startTime: hourShortToStartTime(hourShort),
354
- endTime: null,
355
- captationVideo: false,
356
- titre: titreGuess ?? null,
357
- objet: null,
358
- reunions: [],
359
- compteRenduRefUid: null,
360
- });
361
- await fs.writeJSON(filePath, groups, { spaces: 2 });
362
- created = true;
331
+ try {
332
+ if (await fs.pathExists(filePath)) {
333
+ group = await fs.readJSON(filePath);
363
334
  }
364
335
  }
336
+ catch (e) {
337
+ console.warn(`[AGENDA][COM] Unreadable JSON → ${filePath} (${e?.message}) → will recreate`);
338
+ }
339
+ if (!group) {
340
+ // group = {
341
+ // uid,
342
+ // chambre: "SN",
343
+ // date: dateISO,
344
+ // type: organeDetected ?? "Commissions",
345
+ // startTime: hourShortToStartTime(hourShort),
346
+ // endTime: null,
347
+ // captationVideo: false,
348
+ // titre: titreGuess ?? "",
349
+ // objet: "",
350
+ // events: [],
351
+ // compteRenduRefUid: crUid,
352
+ // }
353
+ // created = true
354
+ }
365
355
  else {
366
- groups = [
367
- {
368
- uid,
369
- chambre: "SN",
370
- date: dateISO,
371
- slot: null,
372
- type: organeDetected ?? "Commission",
373
- startTime: hourShortToStartTime(hourShort),
374
- endTime: null,
375
- captationVideo: false,
376
- titre: titreGuess ?? null,
377
- objet: null,
378
- reunions: [],
379
- compteRenduRefUid: null,
380
- },
381
- ];
382
- await fs.writeJSON(filePath, groups, { spaces: 2 });
383
- created = true;
356
+ group.compteRenduRefUid = crUid;
384
357
  }
385
- return { uid, filePath, created };
358
+ // Lien CR
359
+ // Enrichir depuis CR si vide
360
+ // const sommaire = cr?.metadonnees?.sommaire as Sommaire | undefined;
361
+ // if (sommaire) {
362
+ // const { titre: dTitre, objet: dObjet } = deriveTitreObjetFromSommaire(sommaire, undefined);
363
+ // if (!group.titre && dTitre) group.titre = dTitre;
364
+ // if ((!group.objet || !group.objet.trim()) && dObjet) group.objet = dObjet;
365
+ // } else if (!group.titre && titreGuess) {
366
+ // group.titre = titreGuess;
367
+ // }
368
+ await fs.writeJSON(filePath, group, { spaces: 2 });
369
+ return { uid, filePath, created, updated: !created };
386
370
  }
@@ -1,7 +1,7 @@
1
1
  import { AgendaEvent, GroupedReunion, TimeSlot } from "../types/agenda";
2
2
  type KnownType = "SP" | "COM" | "MC" | "OD" | "ID";
3
3
  export declare function groupNonSPByTypeOrganeHour(events: AgendaEvent[]): Record<"IDC" | "IDM" | "IDO" | "IDI", GroupedReunion[]>;
4
- export declare function groupSeancePubliqueBySlot(events: AgendaEvent[]): GroupedReunion[];
4
+ export declare function groupSeancePubliqueBySlot(events: AgendaEvent[]): Record<TimeSlot, GroupedReunion[]>;
5
5
  export declare function makeTypeGroupUid(dateISO: string, kind: KnownType, hourShort: string | null, organe?: string | null): string;
6
6
  export declare function makeGroupUid(date: string, slot: TimeSlot): string;
7
7
  export declare function formatYYYYMMDD(dateYYYYMMDD: string): string;
@@ -2,20 +2,30 @@ import { DateTime } from "luxon";
2
2
  import { norm } from "../model/util";
3
3
  const PARIS = "Europe/Paris";
4
4
  const STOPWORDS = new Set([
5
- "de", "du", "des",
6
- "la", "le", "les", "l",
5
+ "de",
6
+ "du",
7
+ "des",
8
+ "la",
9
+ "le",
10
+ "les",
11
+ "l",
7
12
  "d",
8
13
  "et",
9
14
  "en",
10
- "au", "aux",
15
+ "au",
16
+ "aux",
11
17
  "pour",
12
- "sur", "sous", "à", "a", "aux",
18
+ "sur",
19
+ "sous",
20
+ "à",
21
+ "a",
22
+ "aux",
13
23
  ]);
14
24
  export function groupNonSPByTypeOrganeHour(events) {
15
25
  const out = { IDC: [], IDM: [], IDO: [], IDI: [] };
16
26
  if (!events?.length)
17
27
  return out;
18
- const nonSP = events.filter(e => !isSeancePublique(e?.type));
28
+ const nonSP = events.filter((e) => !isSeancePublique(e?.type));
19
29
  if (nonSP.length === 0)
20
30
  return out;
21
31
  const buckets = new Map();
@@ -33,15 +43,17 @@ export function groupNonSPByTypeOrganeHour(events) {
33
43
  for (const [key, list] of buckets) {
34
44
  const [date, kindStr, hourShort] = key.split("|");
35
45
  const kind = kindStr;
36
- const enriched = list.map(ev => {
46
+ const enriched = list
47
+ .map((ev) => {
37
48
  const { startISO, endISO } = deriveTimesForEvent(ev);
38
49
  return { ev, startISO: startISO ?? ev.startTime, endISO: endISO ?? ev.endTime };
39
- }).sort((a, b) => {
40
- const ta = a.startISO ? parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER : Number.MAX_SAFE_INTEGER;
41
- const tb = b.startISO ? parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER : Number.MAX_SAFE_INTEGER;
50
+ })
51
+ .sort((a, b) => {
52
+ const ta = a.startISO ? (parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
53
+ const tb = b.startISO ? (parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
42
54
  return ta - tb;
43
55
  });
44
- const startTime = enriched.find(x => !!x.startISO)?.startISO ?? null;
56
+ const startTime = enriched.find((x) => !!x.startISO)?.startISO ?? null;
45
57
  const endTime = enriched.reduce((acc, x) => {
46
58
  const de = x.endISO ? parseISO(x.endISO)?.toMillis() : null;
47
59
  const accMs = acc ? parseISO(acc)?.toMillis() : null;
@@ -52,7 +64,7 @@ export function groupNonSPByTypeOrganeHour(events) {
52
64
  const any = enriched[0]?.ev;
53
65
  const hour = hourShort !== "NA" ? hourShort : (hourShortFromISO(startTime) ?? hourShortFromOriginal(any?.timeOriginal));
54
66
  const uid = makeTypeGroupUid(date, kind, hour ?? null, any?.organe || undefined);
55
- const suffix = (kind === "COM" ? "IDC" : kind === "MC" ? "IDM" : kind === 'OD' ? 'IDO' : "IDI");
67
+ const suffix = (kind === "COM" ? "IDC" : kind === "MC" ? "IDM" : kind === "OD" ? "IDO" : "IDI");
56
68
  const group = {
57
69
  uid,
58
70
  chambre: "SN",
@@ -61,10 +73,10 @@ export function groupNonSPByTypeOrganeHour(events) {
61
73
  organe: any?.organe || undefined,
62
74
  startTime,
63
75
  endTime,
64
- captationVideo: enriched.some(x => x.ev.captationVideo === true),
65
- titre: compactTitleList(enriched.map(x => x.ev.titre || "").filter(Boolean), 8),
66
- objet: joinObjets(enriched.map(x => x.ev)),
67
- events: enriched.map(x => x.ev),
76
+ captationVideo: enriched.some((x) => x.ev.captationVideo === true),
77
+ titre: compactTitleList(enriched.map((x) => x.ev.titre || "").filter(Boolean), 8),
78
+ objet: joinObjets(enriched.map((x) => x.ev)),
79
+ events: enriched.map((x) => x.ev),
68
80
  };
69
81
  out[suffix].push(group);
70
82
  }
@@ -78,11 +90,15 @@ export function groupNonSPByTypeOrganeHour(events) {
78
90
  return out;
79
91
  }
80
92
  export function groupSeancePubliqueBySlot(events) {
93
+ // Résultat à la manière de groupNonSPByTypeOrganeHour : objet de listes, ici indexé par créneau
94
+ const out = {};
95
+ const ensureBucket = (slot) => (out[slot] ??= []);
81
96
  if (!events?.length)
82
- return [];
83
- const sp = events.filter(e => isSeancePublique(e?.type));
97
+ return out;
98
+ const sp = events.filter((e) => isSeancePublique(e?.type));
84
99
  if (sp.length === 0)
85
- return [];
100
+ return out;
101
+ // Regroupement par date
86
102
  const byDate = new Map();
87
103
  for (const e of sp) {
88
104
  const d = norm(e.date);
@@ -92,17 +108,19 @@ export function groupSeancePubliqueBySlot(events) {
92
108
  byDate.set(d, []);
93
109
  byDate.get(d).push(e);
94
110
  }
95
- const out = [];
111
+ // Pour chaque date : enrichir, bucketiser par slot, puis pousser dans out[slot]
96
112
  for (const [date, dayEvents] of byDate) {
97
113
  const enriched = dayEvents.map((e) => {
98
114
  const { startISO, endISO, slot } = deriveTimesForEvent(e);
99
115
  return { ev: e, startISO, endISO, slot };
100
116
  });
117
+ // tri par heure de début connue
101
118
  enriched.sort((a, b) => {
102
- const da = a.startISO ? parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER : Number.MAX_SAFE_INTEGER;
103
- const db = b.startISO ? parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER : Number.MAX_SAFE_INTEGER;
119
+ const da = a.startISO ? (parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
120
+ const db = b.startISO ? (parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
104
121
  return da - db;
105
122
  });
123
+ // Bucket par slot (en déduisant le slot UNKNOWN via l'heure si possible)
106
124
  const bySlot = new Map();
107
125
  for (const it of enriched) {
108
126
  let s = it.slot;
@@ -111,14 +129,17 @@ export function groupSeancePubliqueBySlot(events) {
111
129
  if (dt)
112
130
  s = slotOf(dt);
113
131
  }
132
+ if (s === "UNKNOWN")
133
+ continue; // on écarte les inconnus résiduels (option : créer un bucket "UNKNOWN")
114
134
  if (!bySlot.has(s))
115
135
  bySlot.set(s, []);
116
136
  bySlot.get(s).push(it);
117
137
  }
138
+ // Construire les GroupedReunion et les pousser dans out[slot]
118
139
  for (const [slot, list] of bySlot) {
119
140
  const sorted = list.slice().sort((a, b) => {
120
- const da = a.startISO ? parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER : Number.MAX_SAFE_INTEGER;
121
- const db = b.startISO ? parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER : Number.MAX_SAFE_INTEGER;
141
+ const da = a.startISO ? (parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
142
+ const db = b.startISO ? (parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
122
143
  return da - db;
123
144
  });
124
145
  const startTime = sorted.find((x) => !!x.startISO)?.startISO ?? null;
@@ -131,7 +152,7 @@ export function groupSeancePubliqueBySlot(events) {
131
152
  }, null);
132
153
  const titres = sorted.map((x) => x.ev.titre || "").filter(Boolean);
133
154
  const captationVideo = sorted.some((x) => x.ev.captationVideo === true);
134
- out.push({
155
+ ensureBucket(slot).push({
135
156
  uid: makeGroupUid(date, slot),
136
157
  chambre: "SN",
137
158
  date,
@@ -146,11 +167,15 @@ export function groupSeancePubliqueBySlot(events) {
146
167
  });
147
168
  }
148
169
  }
149
- out.sort((a, b) => {
150
- const da = DateTime.fromISO(`${a.date}T${a.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
151
- const db = DateTime.fromISO(`${b.date}T${b.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
152
- return da - db || a.slot.localeCompare(b.slot);
153
- });
170
+ // Tri interne de chaque créneau (cohérent avec groupNonSPByTypeOrganeHour)
171
+ for (const s of Object.keys(out)) {
172
+ out[s].sort((a, b) => {
173
+ const da = DateTime.fromISO(`${a.date}T${a.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
174
+ const db = DateTime.fromISO(`${b.date}T${b.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
175
+ // puis par nom de slot pour stabilité (facultatif)
176
+ return da - db || (a.slot || "UNKNOWN").localeCompare(b.slot || "UNKNOWN");
177
+ });
178
+ }
154
179
  return out;
155
180
  }
156
181
  function normalizeNoAccents(s) {
@@ -179,11 +204,16 @@ function classifyAgendaType(typeLabel) {
179
204
  }
180
205
  function typeToSuffixStrict(kind) {
181
206
  switch (kind) {
182
- case "SP": return "IDS";
183
- case "COM": return "IDC";
184
- case "MC": return "IDM";
185
- case "OD": return "IDO";
186
- case "ID": return "IDI";
207
+ case "SP":
208
+ return "IDS";
209
+ case "COM":
210
+ return "IDC";
211
+ case "MC":
212
+ return "IDM";
213
+ case "OD":
214
+ return "IDO";
215
+ case "ID":
216
+ return "IDI";
187
217
  }
188
218
  }
189
219
  function hourShortFromISO(iso) {
@@ -271,7 +301,7 @@ function trimWords(s, max = 40) {
271
301
  return words.length <= max ? words.join(" ") : words.slice(0, max).join(" ");
272
302
  }
273
303
  function compactTitleList(titres, maxTitles = 5) {
274
- const uniq = Array.from(new Set(titres.map(t => norm(t)).filter(Boolean)));
304
+ const uniq = Array.from(new Set(titres.map((t) => norm(t)).filter(Boolean)));
275
305
  return uniq.slice(0, maxTitles).join(" · ") || "(sans titre)";
276
306
  }
277
307
  export function makeGroupUid(date, slot) {
@@ -288,9 +318,9 @@ export function makeReunionUid(agenda) {
288
318
  }
289
319
  function joinObjets(events) {
290
320
  const objets = events
291
- .map(e => (e.objet || "").trim())
321
+ .map((e) => (e.objet || "").trim())
292
322
  .filter(Boolean)
293
- .map(s => trimWords(s, 40));
323
+ .map((s) => trimWords(s, 40));
294
324
  if (objets.length === 0)
295
325
  return "";
296
326
  return objets.join(" · ");
@@ -328,8 +358,12 @@ function parseTimeOriginalFR(timeOriginal) {
328
358
  }
329
359
  return { start: null, end: null };
330
360
  }
331
- function clampHour(h) { return Math.max(0, Math.min(23, h)); }
332
- function clampMinute(m) { return Math.max(0, Math.min(59, m)); }
361
+ function clampHour(h) {
362
+ return Math.max(0, Math.min(23, h));
363
+ }
364
+ function clampMinute(m) {
365
+ return Math.max(0, Math.min(59, m));
366
+ }
333
367
  function toIsoTime(h, m) {
334
368
  return `${String(h).padStart(2, "0")}:${String(m).padStart(2, "0")}:00.000+02:00`;
335
369
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.11.3",
3
+ "version": "2.11.5",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",