@tricoteuses/senat 2.20.13 → 2.20.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ import { ID_DATE_FORMAT } from "./datautil";
10
10
  import { commonOptions } from "./shared/cli_helpers";
11
11
  import { fetchWithRetry } from "./shared/util";
12
12
  import { groupNonSPByTypeOrganeHour, groupSeancePubliqueBySlot } from "../utils/reunion_grouping";
13
+ import { buildSenatDossierIndex } from "../utils/reunion_odj_building";
13
14
  const optionsDefinitions = [
14
15
  ...commonOptions,
15
16
  {
@@ -26,9 +27,9 @@ class AgendaError extends Error {
26
27
  super(`An error occurred while retrieving Agenda ${agendaName}: ${message}`);
27
28
  }
28
29
  }
29
- async function retrieveAgendas(dataDir, sessions) {
30
+ async function retrieveAgendas(options, sessions) {
30
31
  console.log(`[AGENDA] Retrieving agendas for sessions ${sessions.join(", ")}`);
31
- const agendaRootDir = path.join(dataDir, AGENDA_FOLDER);
32
+ const agendaRootDir = path.join(options["dataDir"], AGENDA_FOLDER);
32
33
  fs.ensureDirSync(agendaRootDir);
33
34
  const originalAgendaDir = path.join(agendaRootDir, DATA_ORIGINAL_FOLDER);
34
35
  fs.ensureDirSync(originalAgendaDir);
@@ -36,14 +37,18 @@ async function retrieveAgendas(dataDir, sessions) {
36
37
  if (options["parseAgenda"]) {
37
38
  fs.ensureDirSync(transformedAgendaDir);
38
39
  }
40
+ let dossierIndex = {};
41
+ dossierIndex = buildSenatDossierIndex(options);
39
42
  for (const session of sessions) {
40
43
  const originalAgendaSessionDir = path.join(originalAgendaDir, `${session}`);
41
44
  fs.ensureDirSync(originalAgendaSessionDir);
42
- fs.emptyDirSync(originalAgendaSessionDir);
45
+ if (!options["keepDir"])
46
+ fs.emptyDirSync(originalAgendaSessionDir);
43
47
  const transformedAgendaSessionDir = path.join(transformedAgendaDir, `${session}`);
44
48
  if (options["parseAgenda"]) {
45
49
  fs.ensureDirSync(transformedAgendaSessionDir);
46
- fs.emptyDirSync(transformedAgendaSessionDir);
50
+ if (!options["keepDir"])
51
+ fs.emptyDirSync(transformedAgendaSessionDir);
47
52
  }
48
53
  const fifteenDaysFromNow = new Date();
49
54
  fifteenDaysFromNow.setDate(fifteenDaysFromNow.getDate() + 15); // Don't download agendas more than 15 days in the future
@@ -54,7 +59,7 @@ async function retrieveAgendas(dataDir, sessions) {
54
59
  try {
55
60
  await downloadAgenda(agendaName, agendaPath);
56
61
  if (options["parseAgenda"]) {
57
- await parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPath);
62
+ await parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPath, dossierIndex);
58
63
  }
59
64
  }
60
65
  catch (error) {
@@ -90,7 +95,7 @@ function writeGroupsAsFiles(dir, groups) {
90
95
  fs.writeJSONSync(outPath, g, { spaces: 2 });
91
96
  }
92
97
  }
93
- async function parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPath) {
98
+ async function parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPath, dossierBySenatUrl) {
94
99
  if (!options["silent"])
95
100
  console.log(`Parsing Agenda ${agendaPath}…`);
96
101
  const parsedAgendaEvents = await parseAgendaFromFile(agendaPath);
@@ -99,7 +104,7 @@ async function parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPa
99
104
  const flatPath = path.join(transformedAgendaSessionDir, `${agendaFileName}.json`);
100
105
  fs.writeJSONSync(flatPath, parsedAgendaEvents, { spaces: 2 });
101
106
  // 1) SP → grouped by (date, slot)
102
- const spGrouped = groupSeancePubliqueBySlot(parsedAgendaEvents);
107
+ const spGrouped = groupSeancePubliqueBySlot(parsedAgendaEvents, dossierBySenatUrl);
103
108
  // a) on a un Record<TimeSlot, GroupedReunion[]>, on le transforme en array
104
109
  const spGroups = Object.values(spGrouped).flat();
105
110
  // b) (reco) trier pour stabilité, comme pour les NON-SP
@@ -114,7 +119,7 @@ async function parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPa
114
119
  writeGroupsAsFiles(transformedAgendaSessionDir, spGroups);
115
120
  }
116
121
  // 2) NON-SP → grouped by (date, organe, hour)
117
- const groupedBySuffix = groupNonSPByTypeOrganeHour(parsedAgendaEvents);
122
+ const groupedBySuffix = groupNonSPByTypeOrganeHour(parsedAgendaEvents, dossierBySenatUrl);
118
123
  for (const suffix of ["IDC", "IDM", "IDO", "IDI"]) {
119
124
  const groups = groupedBySuffix[suffix] || [];
120
125
  if (groups.length > 0) {
@@ -127,7 +132,7 @@ async function main() {
127
132
  assert(dataDir, "Missing argument: data directory");
128
133
  const sessions = getSessionsFromStart(options["fromSession"]);
129
134
  console.time("agenda processing time");
130
- await retrieveAgendas(dataDir, sessions);
135
+ await retrieveAgendas(options, sessions);
131
136
  console.timeEnd("agenda processing time");
132
137
  }
133
138
  main()
@@ -32,4 +32,13 @@ export interface GroupedReunion {
32
32
  transcriptionRef?: string;
33
33
  urlVideo?: string;
34
34
  timecodeDebutVideo?: number;
35
+ odj?: GroupedReunionOdj;
36
+ }
37
+ export interface GroupedReunionOdjPoint {
38
+ objet: string | null;
39
+ dossierLegislatifRef: string | null;
40
+ codeEtape: string | null;
41
+ }
42
+ export interface GroupedReunionOdj {
43
+ pointsOdj: GroupedReunionOdjPoint[];
35
44
  }
@@ -1,7 +1,9 @@
1
1
  import { AgendaEvent, GroupedReunion, TimeSlot } from "../types/agenda";
2
+ import { DossierLegislatifResult } from "../model/dosleg";
2
3
  type KnownType = "SP" | "COM" | "MC" | "OD" | "ID";
3
- export declare function groupNonSPByTypeOrganeHour(events: AgendaEvent[]): Record<"IDC" | "IDM" | "IDO" | "IDI", GroupedReunion[]>;
4
- export declare function groupSeancePubliqueBySlot(events: AgendaEvent[]): Record<TimeSlot, GroupedReunion[]>;
4
+ type DossierBySenatUrl = Record<string, DossierLegislatifResult>;
5
+ export declare function groupNonSPByTypeOrganeHour(events: AgendaEvent[], DossierBySenatUrl: DossierBySenatUrl): Record<"IDC" | "IDM" | "IDO" | "IDI", GroupedReunion[]>;
6
+ export declare function groupSeancePubliqueBySlot(events: AgendaEvent[], dossierBySenatUrl: DossierBySenatUrl): Record<TimeSlot, GroupedReunion[]>;
5
7
  export declare function makeTypeGroupUid(dateISO: string, kind: KnownType, agendaEventId: string, organe?: string | null): string;
6
8
  export declare function makeGroupUid(date: string, slot: TimeSlot): string;
7
9
  export declare function formatYYYYMMDD(dateYYYYMMDD: string): string;
@@ -1,5 +1,6 @@
1
1
  import { DateTime } from "luxon";
2
2
  import { norm } from "../model/util";
3
+ import { buildOdj } from "./reunion_odj_building";
3
4
  const PARIS = "Europe/Paris";
4
5
  const STOPWORDS = new Set([
5
6
  "de",
@@ -21,7 +22,7 @@ const STOPWORDS = new Set([
21
22
  "a",
22
23
  "aux",
23
24
  ]);
24
- export function groupNonSPByTypeOrganeHour(events) {
25
+ export function groupNonSPByTypeOrganeHour(events, DossierBySenatUrl) {
25
26
  const out = { IDC: [], IDM: [], IDO: [], IDI: [] };
26
27
  if (!events?.length)
27
28
  return out;
@@ -49,6 +50,7 @@ export function groupNonSPByTypeOrganeHour(events) {
49
50
  titre: e.titre,
50
51
  objet: e.objet || "",
51
52
  events: [e],
53
+ odj: buildOdj([e], DossierBySenatUrl),
52
54
  };
53
55
  out[suffix].push(group);
54
56
  }
@@ -61,8 +63,7 @@ export function groupNonSPByTypeOrganeHour(events) {
61
63
  }
62
64
  return out;
63
65
  }
64
- export function groupSeancePubliqueBySlot(events) {
65
- // Résultat à la manière de groupNonSPByTypeOrganeHour : objet de listes, ici indexé par créneau
66
+ export function groupSeancePubliqueBySlot(events, dossierBySenatUrl) {
66
67
  const out = {};
67
68
  const ensureBucket = (slot) => (out[slot] ??= []);
68
69
  if (!events?.length)
@@ -70,7 +71,6 @@ export function groupSeancePubliqueBySlot(events) {
70
71
  const sp = events.filter((e) => isSeancePublique(e?.type));
71
72
  if (sp.length === 0)
72
73
  return out;
73
- // Regroupement par date
74
74
  const byDate = new Map();
75
75
  for (const e of sp) {
76
76
  const d = norm(e.date);
@@ -124,6 +124,7 @@ export function groupSeancePubliqueBySlot(events) {
124
124
  }, null);
125
125
  const titres = sorted.map((x) => x.ev.titre || "").filter(Boolean);
126
126
  const captationVideo = sorted.some((x) => x.ev.captationVideo === true);
127
+ const eventsForSlot = sorted.map((x) => x.ev);
127
128
  ensureBucket(slot).push({
128
129
  uid: makeGroupUid(date, slot),
129
130
  chambre: "SN",
@@ -137,6 +138,7 @@ export function groupSeancePubliqueBySlot(events) {
137
138
  titre: compactTitleList(titres, 5),
138
139
  objet: joinObjets(sorted.map((x) => x.ev)),
139
140
  events: sorted.map((x) => x.ev),
141
+ odj: buildOdj(eventsForSlot, dossierBySenatUrl),
140
142
  });
141
143
  }
142
144
  }
@@ -189,32 +191,6 @@ function typeToSuffixStrict(kind) {
189
191
  return "IDI";
190
192
  }
191
193
  }
192
- function hourShortFromISO(iso) {
193
- if (!iso)
194
- return null;
195
- const dt = parseISO(iso);
196
- if (!dt)
197
- return null;
198
- const z = DateTime.fromISO(iso, { zone: PARIS });
199
- const H = String(z.hour);
200
- const mm = String(z.minute).padStart(2, "0");
201
- return `${H}${mm}`;
202
- }
203
- function hourShortFromOriginal(s) {
204
- if (!s)
205
- return null;
206
- const clean = normalizeNoAccents(s).toLowerCase();
207
- const m = clean.match(/(\d{1,2})\s*[h:]\s*(\d{2})/);
208
- if (m) {
209
- const H = String(parseInt(m[1], 10));
210
- const mm = m[2].padStart(2, "0");
211
- return `${H}${mm}`;
212
- }
213
- const m2 = clean.match(/(\d{1,2})\s*h\b/);
214
- if (m2)
215
- return `${parseInt(m2[1], 10)}00`;
216
- return null;
217
- }
218
194
  function organeInitials(input, maxLen = 8) {
219
195
  if (!input)
220
196
  return "";
@@ -0,0 +1,5 @@
1
+ import commandLineArgs from "command-line-args";
2
+ import { DossierLegislatifResult } from "../model/dosleg";
3
+ import { AgendaEvent, GroupedReunionOdj } from "../types/agenda";
4
+ export declare function buildOdj(events: AgendaEvent[], dossierBySenatUrl: Record<string, DossierLegislatifResult>): GroupedReunionOdj | undefined;
5
+ export declare function buildSenatDossierIndex(options: commandLineArgs.CommandLineOptions): Record<string, DossierLegislatifResult>;
@@ -0,0 +1,162 @@
1
+ import { getSessionsFromStart } from "../types/sessions";
2
+ import { iterLoadSenatDossiersLegislatifs } from "../loaders";
3
+ export function buildOdj(events, dossierBySenatUrl) {
4
+ const byObjet = new Map(); // objet -> set de dossier uids
5
+ let codeEtape = null;
6
+ let dossier = null;
7
+ for (const ev of events) {
8
+ const objetKey = (ev.objet ?? "").trim();
9
+ const url = normalizeSenatUrl(ev.urlDossierSenat) ?? undefined;
10
+ if (url)
11
+ console.log(` urlDossierSenat: ${url}`);
12
+ dossier = url ? dossierBySenatUrl[url] : null;
13
+ const dossierUid = dossier ? pickDossierUid(dossier) : undefined;
14
+ if (url)
15
+ console.log(` → matched dossier uid: ${dossierUid}`);
16
+ codeEtape = dossier ? computeCodeEtape(ev, dossier) : null;
17
+ // si on n’a ni objet ni dossier, ça ne sert à rien de créer un point
18
+ if (!objetKey && !dossierUid)
19
+ continue;
20
+ if (!byObjet.has(objetKey) && dossierUid) {
21
+ byObjet.set(objetKey, dossierUid);
22
+ }
23
+ }
24
+ if (byObjet.size === 0)
25
+ return undefined;
26
+ const pointsOdj = [];
27
+ for (const [objetKey, dossierUid] of byObjet) {
28
+ pointsOdj.push({
29
+ objet: objetKey || null,
30
+ dossierLegislatifRef: dossierUid || null,
31
+ codeEtape,
32
+ });
33
+ }
34
+ return { pointsOdj };
35
+ }
36
+ function pickDossierUid(d) {
37
+ if (d["signet"] && d["signet"].trim())
38
+ return d["signet"].trim();
39
+ if (d["code"] && String(d["code"]).trim())
40
+ return String(d["code"]).trim();
41
+ return undefined;
42
+ }
43
+ function normalizeSenatUrl(url) {
44
+ if (!url)
45
+ return null;
46
+ let u = url.trim();
47
+ if (!u)
48
+ return null;
49
+ if (!/^https?:\/\//i.test(u))
50
+ return u;
51
+ // force https://
52
+ u = u.replace(/^http:\/\//i, "https://");
53
+ u = u.replace(/\/+$/, "");
54
+ return u;
55
+ }
56
+ export function buildSenatDossierIndex(options) {
57
+ const index = {};
58
+ const sessions = getSessionsFromStart(2015);
59
+ for (const session of sessions) {
60
+ for (const item of iterLoadSenatDossiersLegislatifs(options["dataDir"], session)) {
61
+ const dossier = item.item;
62
+ const url = dossier["url"] ? normalizeSenatUrl(dossier["url"]) : undefined;
63
+ if (url)
64
+ index[url] = dossier;
65
+ }
66
+ }
67
+ return index;
68
+ }
69
+ function detectLecture(objet) {
70
+ objet = objet.toLowerCase();
71
+ if (objet.includes("première lecture"))
72
+ return 1;
73
+ if (objet.includes("deuxième lecture") || objet.includes("2ème"))
74
+ return 2;
75
+ if (objet.includes("troisième lecture") || objet.includes("3ème"))
76
+ return 3;
77
+ return undefined;
78
+ }
79
+ function computeCodeEtape(ev, dossier) {
80
+ const lecture = detectLecture(ev.objet ?? "");
81
+ const organe = ev.organe ?? "";
82
+ const nature = organe.toLowerCase().includes("commission")
83
+ ? "COM"
84
+ : organe.toLowerCase().includes("séance publique")
85
+ ? "DEBATS"
86
+ : "";
87
+ const evDate = ev.date.split("T")[0];
88
+ const flat = buildFlatActes(dossier);
89
+ // 1) Match strict : même date + nature (COM / DEBATS si connue)
90
+ let candidates = flat.filter((a) => {
91
+ if (a.date !== evDate)
92
+ return false;
93
+ if (nature && !a.codeActe.includes(nature))
94
+ return false;
95
+ return true;
96
+ });
97
+ console.log(` → candidats STRICT (date==${evDate} & nature=${nature || "ANY"}): ${candidates.length}`);
98
+ candidates.forEach((c) => console.log(` STRICT MATCH: ${c.codeActe} (date=${c.date}, lecture=${c.ordreLecture})`));
99
+ // Si lecture détectée → on filtre si ça garde des candidats
100
+ if (lecture !== undefined && candidates.length > 0) {
101
+ const withLecture = candidates.filter((c) => c.ordreLecture === lecture);
102
+ if (withLecture.length > 0) {
103
+ console.log(` → filtre lecture=${lecture} : ${withLecture.length} candidats`);
104
+ candidates = withLecture;
105
+ }
106
+ }
107
+ // Multiple candidates : we take the longest ?
108
+ if (candidates.length > 0) {
109
+ candidates.sort((a, b) => b.codeActe.length - a.codeActe.length);
110
+ console.log(` ✔ match FINAL (intervalle) : ${candidates[0].codeActe}`);
111
+ return candidates[0].codeActe;
112
+ }
113
+ // 2) fallback COM : dernier acte COM avant la date
114
+ if (nature === "COM") {
115
+ let comActs = flat.filter((a) => a.codeActe.includes("COM") && a.date <= evDate);
116
+ console.log(` → fallback COM: actes COM <= date : ${comActs.length}`);
117
+ if (lecture !== undefined) {
118
+ const byLecture = comActs.filter((a) => a.ordreLecture === lecture);
119
+ if (byLecture.length > 0) {
120
+ comActs = byLecture;
121
+ console.log(` → filtrés lecture=${lecture}: ${comActs.length}`);
122
+ }
123
+ }
124
+ if (comActs.length > 0) {
125
+ comActs.sort((a, b) => b.date.localeCompare(a.date) || b.codeActe.length - a.codeActe.length);
126
+ console.log(` ✔ match FINAL (fallback COM): ${comActs[0].codeActe}`);
127
+ return comActs[0].codeActe;
128
+ }
129
+ }
130
+ // 3) Fallback lecture générale (SN1, SN2…) depuis la structure d’origine
131
+ if (lecture !== undefined) {
132
+ const lectures = dossier["actes_legislatifs"] ?? [];
133
+ const lectureNode = lectures.find((l) => l.ordre_lecture === lecture);
134
+ const rootCode = lectureNode?.code_acte;
135
+ if (rootCode && typeof rootCode === "string") {
136
+ console.log(` ✔ FALLBACK LECTURE: ${rootCode}`);
137
+ return rootCode;
138
+ }
139
+ }
140
+ console.log(` ✖ aucun code d’étape trouvé pour ev=${ev.id}`);
141
+ return null;
142
+ }
143
+ function buildFlatActes(dossier) {
144
+ const lectures = dossier["actes_legislatifs"] ?? [];
145
+ const res = [];
146
+ for (const lectureActe of lectures) {
147
+ const assemblee = lectureActe["assemblee"];
148
+ if (assemblee !== "Sénat")
149
+ continue;
150
+ const ordreLecture = lectureActe.ordre_lecture;
151
+ const sub = lectureActe.actes_legislatifs;
152
+ const actes = Array.isArray(sub) && sub.length > 0 ? sub : [lectureActe];
153
+ for (const acte of actes) {
154
+ const codeActe = acte.code_acte;
155
+ const dateActe = acte.date?.split("T")[0];
156
+ if (!codeActe || !dateActe)
157
+ continue;
158
+ res.push({ codeActe, ordreLecture, date: dateActe });
159
+ }
160
+ }
161
+ return res;
162
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.20.13",
3
+ "version": "2.20.15",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",