@tricoteuses/senat 2.22.13 → 2.22.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/lib/src/loaders.d.ts +2 -8
  2. package/lib/src/loaders.js +7 -25
  3. package/lib/tests/test_iter_load.test.js +17 -0
  4. package/package.json +2 -2
  5. package/lib/config.d.ts +0 -21
  6. package/lib/config.js +0 -27
  7. package/lib/databases.d.ts +0 -2
  8. package/lib/databases.js +0 -26
  9. package/lib/datasets.d.ts +0 -34
  10. package/lib/datasets.js +0 -233
  11. package/lib/git.d.ts +0 -26
  12. package/lib/git.js +0 -167
  13. package/lib/index.d.ts +0 -13
  14. package/lib/index.js +0 -1
  15. package/lib/loaders.d.ts +0 -58
  16. package/lib/loaders.js +0 -286
  17. package/lib/model/agenda.d.ts +0 -6
  18. package/lib/model/agenda.js +0 -148
  19. package/lib/model/ameli.d.ts +0 -51
  20. package/lib/model/ameli.js +0 -147
  21. package/lib/model/commission.d.ts +0 -18
  22. package/lib/model/commission.js +0 -269
  23. package/lib/model/debats.d.ts +0 -67
  24. package/lib/model/debats.js +0 -95
  25. package/lib/model/documents.d.ts +0 -12
  26. package/lib/model/documents.js +0 -138
  27. package/lib/model/dosleg.d.ts +0 -7
  28. package/lib/model/dosleg.js +0 -326
  29. package/lib/model/index.d.ts +0 -7
  30. package/lib/model/index.js +0 -7
  31. package/lib/model/questions.d.ts +0 -45
  32. package/lib/model/questions.js +0 -89
  33. package/lib/model/scrutins.d.ts +0 -13
  34. package/lib/model/scrutins.js +0 -114
  35. package/lib/model/seance.d.ts +0 -3
  36. package/lib/model/seance.js +0 -267
  37. package/lib/model/sens.d.ts +0 -146
  38. package/lib/model/sens.js +0 -454
  39. package/lib/model/texte.d.ts +0 -7
  40. package/lib/model/texte.js +0 -228
  41. package/lib/model/util.d.ts +0 -9
  42. package/lib/model/util.js +0 -38
  43. package/lib/parsers/texte.d.ts +0 -7
  44. package/lib/parsers/texte.js +0 -228
  45. package/lib/raw_types/ameli.d.ts +0 -914
  46. package/lib/raw_types/ameli.js +0 -5
  47. package/lib/raw_types/debats.d.ts +0 -207
  48. package/lib/raw_types/debats.js +0 -5
  49. package/lib/raw_types/dosleg.d.ts +0 -1619
  50. package/lib/raw_types/dosleg.js +0 -5
  51. package/lib/raw_types/questions.d.ts +0 -423
  52. package/lib/raw_types/questions.js +0 -5
  53. package/lib/raw_types/senat.d.ts +0 -11372
  54. package/lib/raw_types/senat.js +0 -5
  55. package/lib/raw_types/sens.d.ts +0 -8248
  56. package/lib/raw_types/sens.js +0 -5
  57. package/lib/raw_types_schemats/ameli.d.ts +0 -539
  58. package/lib/raw_types_schemats/ameli.js +0 -2
  59. package/lib/raw_types_schemats/debats.d.ts +0 -127
  60. package/lib/raw_types_schemats/debats.js +0 -2
  61. package/lib/raw_types_schemats/dosleg.d.ts +0 -977
  62. package/lib/raw_types_schemats/dosleg.js +0 -2
  63. package/lib/raw_types_schemats/questions.d.ts +0 -237
  64. package/lib/raw_types_schemats/questions.js +0 -2
  65. package/lib/raw_types_schemats/sens.d.ts +0 -6915
  66. package/lib/raw_types_schemats/sens.js +0 -2
  67. package/lib/scripts/convert_data.js +0 -354
  68. package/lib/scripts/data-download.d.ts +0 -1
  69. package/lib/scripts/data-download.js +0 -12
  70. package/lib/scripts/datautil.d.ts +0 -8
  71. package/lib/scripts/datautil.js +0 -34
  72. package/lib/scripts/parse_textes.d.ts +0 -1
  73. package/lib/scripts/parse_textes.js +0 -44
  74. package/lib/scripts/retrieve_agenda.d.ts +0 -1
  75. package/lib/scripts/retrieve_agenda.js +0 -132
  76. package/lib/scripts/retrieve_cr_commission.d.ts +0 -1
  77. package/lib/scripts/retrieve_cr_commission.js +0 -364
  78. package/lib/scripts/retrieve_cr_seance.d.ts +0 -6
  79. package/lib/scripts/retrieve_cr_seance.js +0 -347
  80. package/lib/scripts/retrieve_documents.d.ts +0 -3
  81. package/lib/scripts/retrieve_documents.js +0 -219
  82. package/lib/scripts/retrieve_open_data.d.ts +0 -1
  83. package/lib/scripts/retrieve_open_data.js +0 -316
  84. package/lib/scripts/retrieve_senateurs_photos.d.ts +0 -1
  85. package/lib/scripts/retrieve_senateurs_photos.js +0 -147
  86. package/lib/scripts/retrieve_videos.d.ts +0 -1
  87. package/lib/scripts/retrieve_videos.js +0 -461
  88. package/lib/scripts/shared/cli_helpers.d.ts +0 -95
  89. package/lib/scripts/shared/cli_helpers.js +0 -91
  90. package/lib/scripts/shared/util.d.ts +0 -4
  91. package/lib/scripts/shared/util.js +0 -35
  92. package/lib/scripts/test_iter_load.d.ts +0 -1
  93. package/lib/scripts/test_iter_load.js +0 -12
  94. package/lib/src/utils/nvs-timecode.d.ts +0 -17
  95. package/lib/src/utils/nvs-timecode.js +0 -79
  96. package/lib/src/utils/weights_scoring_config.d.ts +0 -2
  97. package/lib/src/utils/weights_scoring_config.js +0 -15
  98. package/lib/strings.d.ts +0 -1
  99. package/lib/strings.js +0 -18
  100. package/lib/types/agenda.d.ts +0 -44
  101. package/lib/types/agenda.js +0 -1
  102. package/lib/types/ameli.d.ts +0 -5
  103. package/lib/types/ameli.js +0 -1
  104. package/lib/types/compte_rendu.d.ts +0 -83
  105. package/lib/types/compte_rendu.js +0 -1
  106. package/lib/types/debats.d.ts +0 -2
  107. package/lib/types/debats.js +0 -1
  108. package/lib/types/dosleg.d.ts +0 -70
  109. package/lib/types/dosleg.js +0 -1
  110. package/lib/types/questions.d.ts +0 -2
  111. package/lib/types/questions.js +0 -1
  112. package/lib/types/sens.d.ts +0 -10
  113. package/lib/types/sens.js +0 -1
  114. package/lib/types/sessions.d.ts +0 -5
  115. package/lib/types/sessions.js +0 -84
  116. package/lib/types/texte.d.ts +0 -74
  117. package/lib/types/texte.js +0 -16
  118. package/lib/utils/cr_spliting.d.ts +0 -28
  119. package/lib/utils/cr_spliting.js +0 -265
  120. package/lib/utils/date.d.ts +0 -10
  121. package/lib/utils/date.js +0 -100
  122. package/lib/utils/nvs-timecode.d.ts +0 -7
  123. package/lib/utils/nvs-timecode.js +0 -79
  124. package/lib/utils/reunion_grouping.d.ts +0 -11
  125. package/lib/utils/reunion_grouping.js +0 -337
  126. package/lib/utils/reunion_odj_building.d.ts +0 -5
  127. package/lib/utils/reunion_odj_building.js +0 -154
  128. package/lib/utils/reunion_parsing.d.ts +0 -23
  129. package/lib/utils/reunion_parsing.js +0 -209
  130. package/lib/utils/scoring.d.ts +0 -14
  131. package/lib/utils/scoring.js +0 -147
  132. package/lib/utils/string_cleaning.d.ts +0 -7
  133. package/lib/utils/string_cleaning.js +0 -57
  134. package/lib/validators/config.d.ts +0 -9
  135. package/lib/validators/config.js +0 -10
  136. /package/lib/{scripts/convert_data.d.ts → tests/test_iter_load.test.d.ts} +0 -0
@@ -1,337 +0,0 @@
1
- import { DateTime } from "luxon";
2
- import { norm } from "../model/util";
3
- import { buildOdj } from "./reunion_odj_building";
4
- const PARIS = "Europe/Paris";
5
- const STOPWORDS = new Set([
6
- "de",
7
- "du",
8
- "des",
9
- "la",
10
- "le",
11
- "les",
12
- "l",
13
- "d",
14
- "et",
15
- "en",
16
- "au",
17
- "aux",
18
- "pour",
19
- "sur",
20
- "sous",
21
- "à",
22
- "a",
23
- "aux",
24
- ]);
25
- export function groupNonSPByTypeOrganeHour(events, DossierBySenatUrl) {
26
- const out = { IDC: [], IDM: [], IDO: [], IDI: [] };
27
- if (!events?.length)
28
- return out;
29
- const nonSP = events.filter((e) => !isSeancePublique(e?.type));
30
- if (nonSP.length === 0)
31
- return out;
32
- for (const e of nonSP) {
33
- const kind = classifyAgendaType(e?.type);
34
- if (!kind || kind === "SP")
35
- continue;
36
- const { startISO, endISO } = deriveTimesForEvent(e);
37
- const startTime = startISO ?? e.startTime ?? null;
38
- const endTime = endISO ?? e.endTime ?? null;
39
- const uid = makeTypeGroupUid(e.date, kind, e.id, e.organe ?? null);
40
- const suffix = (kind === "COM" ? "IDC" : kind === "MC" ? "IDM" : kind === "OD" ? "IDO" : "IDI");
41
- const group = {
42
- uid,
43
- chambre: "SN",
44
- date: e.date,
45
- type: e.type || "",
46
- organe: e.organe || undefined,
47
- startTime,
48
- endTime,
49
- captationVideo: e.captationVideo === true,
50
- titre: e.titre,
51
- objet: e.objet || "",
52
- events: [e],
53
- odj: buildOdj([e], DossierBySenatUrl),
54
- };
55
- out[suffix].push(group);
56
- }
57
- for (const k of Object.keys(out)) {
58
- out[k].sort((a, b) => {
59
- const da = DateTime.fromISO(`${a.date}T${a.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
60
- const db = DateTime.fromISO(`${b.date}T${b.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
61
- return da - db || (a.organe || "").localeCompare(b.organe || "");
62
- });
63
- }
64
- return out;
65
- }
66
- export function groupSeancePubliqueBySlot(events, dossierBySenatUrl) {
67
- const out = {};
68
- const ensureBucket = (slot) => (out[slot] ??= []);
69
- if (!events?.length)
70
- return out;
71
- const sp = events.filter((e) => isSeancePublique(e?.type));
72
- if (sp.length === 0)
73
- return out;
74
- const byDate = new Map();
75
- for (const e of sp) {
76
- const d = norm(e.date);
77
- if (!d)
78
- continue;
79
- if (!byDate.has(d))
80
- byDate.set(d, []);
81
- byDate.get(d).push(e);
82
- }
83
- // Pour chaque date : enrichir, bucketiser par slot, puis pousser dans out[slot]
84
- for (const [date, dayEvents] of byDate) {
85
- const enriched = dayEvents.map((e) => {
86
- const { startISO, endISO, slot } = deriveTimesForEvent(e);
87
- return { ev: e, startISO, endISO, slot };
88
- });
89
- // tri par heure de début connue
90
- enriched.sort((a, b) => {
91
- const da = a.startISO ? (parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
92
- const db = b.startISO ? (parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
93
- return da - db;
94
- });
95
- // Bucket par slot (en déduisant le slot UNKNOWN via l'heure si possible)
96
- const bySlot = new Map();
97
- for (const it of enriched) {
98
- let s = it.slot;
99
- if (s === "UNKNOWN" && it.startISO) {
100
- const dt = parseISO(it.startISO);
101
- if (dt)
102
- s = slotOf(dt);
103
- }
104
- if (s === "UNKNOWN")
105
- continue; // on écarte les inconnus résiduels (option : créer un bucket "UNKNOWN")
106
- if (!bySlot.has(s))
107
- bySlot.set(s, []);
108
- bySlot.get(s).push(it);
109
- }
110
- // Construire les GroupedReunion et les pousser dans out[slot]
111
- for (const [slot, list] of bySlot) {
112
- const sorted = list.slice().sort((a, b) => {
113
- const da = a.startISO ? (parseISO(a.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
114
- const db = b.startISO ? (parseISO(b.startISO)?.toMillis() ?? Number.MAX_SAFE_INTEGER) : Number.MAX_SAFE_INTEGER;
115
- return da - db;
116
- });
117
- const startTime = sorted.find((x) => !!x.startISO)?.startISO ?? null;
118
- const endTime = sorted.reduce((acc, x) => {
119
- const de = x.endISO ? parseISO(x.endISO)?.toMillis() : null;
120
- const accMs = acc ? parseISO(acc)?.toMillis() : null;
121
- if (de != null && (accMs == null || de > accMs))
122
- return x.endISO;
123
- return acc;
124
- }, null);
125
- const titres = sorted.map((x) => x.ev.titre || "").filter(Boolean);
126
- const captationVideo = sorted.some((x) => x.ev.captationVideo === true);
127
- const eventsForSlot = sorted.map((x) => x.ev);
128
- ensureBucket(slot).push({
129
- uid: makeGroupUid(date, slot),
130
- chambre: "SN",
131
- date,
132
- slot,
133
- type: "Séance publique",
134
- startTime,
135
- endTime,
136
- captationVideo,
137
- organe: "Séance publique",
138
- titre: compactTitleList(titres, 5),
139
- objet: joinObjets(sorted.map((x) => x.ev)),
140
- events: sorted.map((x) => x.ev),
141
- odj: buildOdj(eventsForSlot, dossierBySenatUrl),
142
- });
143
- }
144
- }
145
- // Tri interne de chaque créneau (cohérent avec groupNonSPByTypeOrganeHour)
146
- for (const s of Object.keys(out)) {
147
- out[s].sort((a, b) => {
148
- const da = DateTime.fromISO(`${a.date}T${a.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
149
- const db = DateTime.fromISO(`${b.date}T${b.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
150
- // puis par nom de slot pour stabilité (facultatif)
151
- return da - db || (a.slot || "UNKNOWN").localeCompare(b.slot || "UNKNOWN");
152
- });
153
- }
154
- return out;
155
- }
156
- function normalizeNoAccents(s) {
157
- return (s || "")
158
- .trim()
159
- .normalize("NFKD")
160
- .replace(/[\u0300-\u036f]/g, "");
161
- }
162
- function isSeancePublique(typeLabel) {
163
- const s = normalizeNoAccents(typeLabel || "").toLowerCase();
164
- return /\bseance\b.*\bpublique\b/.test(s);
165
- }
166
- function classifyAgendaType(typeLabel) {
167
- const s = normalizeNoAccents(typeLabel || "").toLowerCase();
168
- if (/\bseance\b.*\bpublique\b/.test(s))
169
- return "SP";
170
- if (/\bcommissions\b/.test(s))
171
- return "COM";
172
- if (/\bmission\b.*\bcontrole\b/.test(s))
173
- return "MC";
174
- if (/\boffices\b|\bdelegations\b/.test(s))
175
- return "OD";
176
- if (/\instances\b|\decisionelles\b/.test(s))
177
- return "ID";
178
- return null;
179
- }
180
- function typeToSuffixStrict(kind) {
181
- switch (kind) {
182
- case "SP":
183
- return "IDS";
184
- case "COM":
185
- return "IDC";
186
- case "MC":
187
- return "IDM";
188
- case "OD":
189
- return "IDO";
190
- case "ID":
191
- return "IDI";
192
- }
193
- }
194
- function organeInitials(input, maxLen = 8) {
195
- if (!input)
196
- return "";
197
- const clean = normalizeNoAccents(input)
198
- .replace(/['’]/g, " ")
199
- .replace(/[^A-Za-z0-9\s]/g, " ")
200
- .replace(/\s+/g, " ")
201
- .trim();
202
- if (!clean)
203
- return "";
204
- const parts = clean.split(" ");
205
- const letters = [];
206
- for (const raw of parts) {
207
- const w = raw.toLowerCase();
208
- if (!w)
209
- continue;
210
- if (STOPWORDS.has(w))
211
- continue;
212
- // Take two first letter if alphanumeric
213
- const two = raw.slice(0, 2);
214
- if (/[A-Za-z0-9]/.test(two))
215
- letters.push(two.toUpperCase());
216
- }
217
- const out = letters.join("");
218
- return out.slice(0, maxLen);
219
- }
220
- export function makeTypeGroupUid(dateISO, kind, agendaEventId, organe) {
221
- const ymd = dateISO ? formatYYYYMMDD(dateISO) : "00000000";
222
- const suffix = typeToSuffixStrict(kind);
223
- const org = organe ? organeInitials(organe) : "";
224
- let base = `RUSN${ymd}${suffix}${org ? org : ""}${agendaEventId}`;
225
- return base;
226
- }
227
- function parseISO(isoLike) {
228
- if (!isoLike)
229
- return null;
230
- const dt = DateTime.fromISO(isoLike, { zone: PARIS });
231
- return dt.isValid ? dt : null;
232
- }
233
- function slotOf(dt) {
234
- if (!dt)
235
- return "UNKNOWN";
236
- const h = dt.hour + dt.minute / 60;
237
- if (h < 12.5)
238
- return "MATIN";
239
- if (h < 19.0)
240
- return "APRES-MIDI";
241
- return "SOIR";
242
- }
243
- function trimWords(s, max = 40) {
244
- const words = norm(s).split(/\s+/).filter(Boolean);
245
- return words.length <= max ? words.join(" ") : words.slice(0, max).join(" ");
246
- }
247
- function compactTitleList(titres, maxTitles = 5) {
248
- const uniq = Array.from(new Set(titres.map((t) => norm(t)).filter(Boolean)));
249
- return uniq.slice(0, maxTitles).join(" · ") || "(sans titre)";
250
- }
251
- export function makeGroupUid(date, slot) {
252
- const ymd = date ? formatYYYYMMDD(date) : "00000000";
253
- return `RUSN${ymd}IDS-${slot}`;
254
- }
255
- export function formatYYYYMMDD(dateYYYYMMDD) {
256
- const [y, m, d] = dateYYYYMMDD.split("-");
257
- return `${y}${m}${d}`;
258
- }
259
- export function makeReunionUid(agenda) {
260
- const ymd = agenda.date ? formatYYYYMMDD(agenda.date) : "00000000";
261
- return `${ymd}-${agenda.id}`;
262
- }
263
- function joinObjets(events) {
264
- const objets = events
265
- .map((e) => (e.objet || "").trim())
266
- .filter(Boolean)
267
- .map((s) => trimWords(s, 40));
268
- if (objets.length === 0)
269
- return "";
270
- return objets.join(" · ");
271
- }
272
- // Extract hours/minutes from French text like "à 10 h 30", "de 10 h à 12 h", etc.
273
- function parseTimeOriginalFR(timeOriginal) {
274
- if (!timeOriginal)
275
- return { start: null, end: null };
276
- const txt = (timeOriginal || "")
277
- .replace(/\u00A0/g, " ") // nbsp → space
278
- .replace(/\s+/g, " ") // espaces multiples
279
- .toLowerCase()
280
- .trim();
281
- // 1) "de 10 h 30 à 12 heures", "de 10h30 à 12h", "de 9 h à 11 h 15", etc.
282
- const reRange = /\bde\s+(\d{1,2})\s*(?:h|:)?\s*(\d{1,2})?\s*(?:heures?)?\s*à\s*(\d{1,2})\s*(?:h|:)?\s*(\d{1,2})?\s*(?:heures?)?/i;
283
- const mRange = txt.match(reRange);
284
- if (mRange) {
285
- const h1 = clampHour(+mRange[1]), m1 = clampMinute(mRange[2] ? +mRange[2] : 0);
286
- const h2 = clampHour(+mRange[3]), m2 = clampMinute(mRange[4] ? +mRange[4] : 0);
287
- return { start: toIsoTime(h1, m1), end: toIsoTime(h2, m2) };
288
- }
289
- // 2) "à 10 h 30", "à 10h", "A 10h30", "A 9 heures", etc.
290
- const reAt = /\b(?:a|à)\s*(\d{1,2})\s*(?:h|:)?\s*(\d{1,2})?\s*(?:heures?)?/i;
291
- const mAt = txt.match(reAt);
292
- if (mAt) {
293
- const h = clampHour(+mAt[1]), m = clampMinute(mAt[2] ? +mAt[2] : 0);
294
- return { start: toIsoTime(h, m), end: null };
295
- }
296
- // 3) "10 h 30", "15h", "9 heures" sans 'à' / 'de ... à ...'
297
- const reBare = /\b(\d{1,2})\s*(?:h|:)?\s*(\d{1,2})?\s*(?:heures?)?\b/;
298
- const mBare = txt.match(reBare);
299
- if (mBare) {
300
- const h = clampHour(+mBare[1]), m = clampMinute(mBare[2] ? +mBare[2] : 0);
301
- return { start: toIsoTime(h, m), end: null };
302
- }
303
- return { start: null, end: null };
304
- }
305
- function clampHour(h) {
306
- return Math.max(0, Math.min(23, h));
307
- }
308
- function clampMinute(m) {
309
- return Math.max(0, Math.min(59, m));
310
- }
311
- function toIsoTime(h, m) {
312
- return `${String(h).padStart(2, "0")}:${String(m).padStart(2, "0")}:00.000+02:00`;
313
- }
314
- function slotFromTimesOrText(startISO, timeOriginal) {
315
- if (startISO) {
316
- const dt = parseISO(startISO);
317
- if (dt)
318
- return slotOf(dt);
319
- }
320
- const t = (timeOriginal || "").toLowerCase();
321
- if (/\b(apr(?:è|e)s[-\s]?midi)\b/.test(t))
322
- return "APRES-MIDI";
323
- if (/\b(soir(?:ée)?)\b/.test(t))
324
- return "SOIR";
325
- if (/\b(matin(?:ée)?)\b/.test(t))
326
- return "MATIN";
327
- return "UNKNOWN";
328
- }
329
- function deriveTimesForEvent(ev) {
330
- const directStart = ev.startTime ?? null;
331
- const directEnd = ev.endTime ?? null;
332
- const fromText = parseTimeOriginalFR(ev.timeOriginal);
333
- const startISO = directStart ?? fromText.start ?? null;
334
- const endISO = directEnd ?? fromText.end ?? null;
335
- const slot = slotFromTimesOrText(startISO, ev.timeOriginal);
336
- return { startISO, endISO, slot };
337
- }
@@ -1,5 +0,0 @@
1
- import commandLineArgs from "command-line-args";
2
- import { DossierLegislatifResult } from "../model/dosleg";
3
- import { AgendaEvent, ReunionOdj } from "../types/agenda";
4
- export declare function buildOdj(events: AgendaEvent[], dossierBySenatUrl: Record<string, DossierLegislatifResult>): ReunionOdj | undefined;
5
- export declare function buildSenatDossierIndex(options: commandLineArgs.CommandLineOptions): Record<string, DossierLegislatifResult>;
@@ -1,154 +0,0 @@
1
- import { getSessionsFromStart } from "../types/sessions";
2
- import { iterLoadSenatDossiersLegislatifs } from "../loaders";
3
- export function buildOdj(events, dossierBySenatUrl) {
4
- const byObjet = new Map(); // objet -> set de dossier uids
5
- let codeEtape = null;
6
- let dossier = null;
7
- for (const ev of events) {
8
- const objetKey = (ev.objet ?? "").trim();
9
- const url = normalizeSenatUrl(ev.urlDossierSenat) ?? undefined;
10
- dossier = url ? dossierBySenatUrl[url] : null;
11
- const dossierUid = dossier ? pickDossierUid(dossier) : undefined;
12
- codeEtape = dossier ? computeCodeEtape(ev, dossier) : null;
13
- // si on n’a ni objet ni dossier, ça ne sert à rien de créer un point
14
- if (!objetKey && !dossierUid)
15
- continue;
16
- if (!byObjet.has(objetKey) && dossierUid) {
17
- byObjet.set(objetKey, dossierUid);
18
- }
19
- }
20
- if (byObjet.size === 0)
21
- return undefined;
22
- const pointsOdj = [];
23
- for (const [objetKey, dossierUid] of byObjet) {
24
- pointsOdj.push({
25
- objet: objetKey || null,
26
- dossierLegislatifRef: dossierUid || null,
27
- codeEtape,
28
- });
29
- }
30
- return { pointsOdj };
31
- }
32
- function pickDossierUid(d) {
33
- if (d["signet"] && d["signet"].trim())
34
- return d["signet"].trim();
35
- if (d["code"] && String(d["code"]).trim())
36
- return String(d["code"]).trim();
37
- return undefined;
38
- }
39
- function normalizeSenatUrl(url) {
40
- if (!url)
41
- return null;
42
- let u = url.trim();
43
- if (!u)
44
- return null;
45
- if (!/^https?:\/\//i.test(u))
46
- return u;
47
- // force https://
48
- u = u.replace(/^http:\/\//i, "https://");
49
- u = u.replace(/\/+$/, "");
50
- return u;
51
- }
52
- export function buildSenatDossierIndex(options) {
53
- const index = {};
54
- const sessions = getSessionsFromStart(2015);
55
- for (const session of sessions) {
56
- for (const item of iterLoadSenatDossiersLegislatifs(options["dataDir"], session)) {
57
- const dossier = item.item;
58
- const url = dossier["url"] ? normalizeSenatUrl(dossier["url"]) : undefined;
59
- if (url)
60
- index[url] = dossier;
61
- }
62
- }
63
- return index;
64
- }
65
- function detectLecture(objet) {
66
- objet = objet.toLowerCase();
67
- if (objet.includes("première lecture"))
68
- return 1;
69
- if (objet.includes("deuxième lecture") || objet.includes("2ème"))
70
- return 2;
71
- if (objet.includes("troisième lecture") || objet.includes("3ème"))
72
- return 3;
73
- return undefined;
74
- }
75
- function computeCodeEtape(ev, dossier) {
76
- // In order to match with stage, we need to remove the '-SEANCE' suffix from the codeActe
77
- const cleanCode = (code) => code.replace(/-SEANCE$/, "");
78
- const lecture = detectLecture(ev.objet ?? "");
79
- const organe = ev.organe ?? "";
80
- const nature = organe.toLowerCase().includes("commission")
81
- ? "COM"
82
- : organe.toLowerCase().includes("séance publique")
83
- ? "DEBATS"
84
- : "";
85
- const evDate = ev.date.split("T")[0];
86
- const flat = buildFlatActes(dossier);
87
- // 1) Strict matching: same date + same nature
88
- let candidates = flat.filter((a) => {
89
- if (a.date !== evDate)
90
- return false;
91
- if (nature && !a.codeActe.includes(nature))
92
- return false;
93
- return true;
94
- });
95
- // If a specific lecture is detected in the agenda event, refine the candidates
96
- if (lecture !== undefined && candidates.length > 0) {
97
- const withLecture = candidates.filter((c) => c.ordreLecture === lecture);
98
- if (withLecture.length > 0) {
99
- candidates = withLecture;
100
- }
101
- }
102
- if (candidates.length > 0) {
103
- // Multiple candidates: pick the most specific one (longest code string)
104
- candidates.sort((a, b) => b.codeActe.length - a.codeActe.length);
105
- return cleanCode(candidates[0].codeActe);
106
- }
107
- // 2) Fallback COM: If no exact date match for a commission event,
108
- // take the latest commission act for this lecture on or before the event date.
109
- if (nature === "COM") {
110
- let comActs = flat.filter((a) => a.codeActe.includes("COM") && a.date <= evDate);
111
- if (lecture !== undefined) {
112
- const byLecture = comActs.filter((a) => a.ordreLecture === lecture);
113
- if (byLecture.length > 0)
114
- comActs = byLecture;
115
- }
116
- if (comActs.length > 0) {
117
- comActs.sort((a, b) => b.date.localeCompare(a.date) || b.codeActe.length - a.codeActe.length);
118
- return cleanCode(comActs[0].codeActe);
119
- }
120
- }
121
- // 3) Fallback general lecture: if nothing else worked but a lecture is identified,
122
- // find any act belonging to that lecture (e.g., SN1-DEPOT).
123
- if (lecture !== undefined) {
124
- const genericActe = flat.find((a) => a.ordreLecture === lecture);
125
- if (genericActe) {
126
- return cleanCode(genericActe.codeActe);
127
- }
128
- }
129
- console.log(`✖ No stage code found for ev=${ev.id} (Date: ${evDate}, Nature: ${nature}, Lecture: ${lecture})`, {
130
- totalActsInDossier: dossier["actes_legislatifs"]?.length || 0,
131
- firstActDate: flat[0]?.date,
132
- });
133
- return null;
134
- }
135
- function buildFlatActes(dossier) {
136
- const actes = dossier["actes_legislatifs"] ?? [];
137
- const res = [];
138
- for (const acte of actes) {
139
- if (acte["chambre"] !== "SN")
140
- continue;
141
- const codeActe = acte.code_acte;
142
- const dateActe = acte.date?.split("T")[0];
143
- if (!codeActe || !dateActe)
144
- continue;
145
- const match = codeActe.match(/^(?:SN|AN)(\d+)/);
146
- const ordreLecture = match ? parseInt(match[1], 10) : undefined;
147
- res.push({
148
- codeActe,
149
- date: dateActe,
150
- ordreLecture,
151
- });
152
- }
153
- return res;
154
- }
@@ -1,23 +0,0 @@
1
- import { DateTime } from "luxon";
2
- import { AgendaEvent, Reunion } from "../types/agenda";
3
- import { DossierLegislatifResult } from "../model/dosleg";
4
- import * as cheerio from "cheerio";
5
- type KnownType = "SP" | "COM" | "MC" | "OD" | "ID";
6
- type DossierBySenatUrl = Record<string, DossierLegislatifResult>;
7
- type ReunionBucket = "IDS" | "IDC" | "IDM" | "IDO" | "IDI";
8
- type ReunionsByBucket = Record<ReunionBucket, Reunion[]>;
9
- export declare function buildReunionsByBucket(events: AgendaEvent[], dossierBySenatUrl: DossierBySenatUrl): ReunionsByBucket;
10
- export declare function makeReunionUid(dateISO: string, kind: KnownType, agendaEventId: string, organe?: string | null): string;
11
- export declare function formatYYYYMMDD(dateYYYYMMDD: string): string;
12
- export declare function deriveTimesForEvent(ev: AgendaEvent): {
13
- startISO: string | null;
14
- endISO: string | null;
15
- };
16
- export type SommaireBlock = {
17
- text: string;
18
- startIndex: number;
19
- targetId?: string | null;
20
- };
21
- export declare function extractSommaireBlocks($: cheerio.CheerioAPI, idx: Map<any, number>): SommaireBlock[];
22
- export declare function parseISO(iso: string | null | undefined): DateTime | null;
23
- export {};