@tricoteuses/senat 2.10.5 → 2.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/lib/databases.d.ts +1 -28
  2. package/lib/databases.js +0 -6
  3. package/lib/datasets.d.ts +6 -0
  4. package/lib/datasets.js +233 -0
  5. package/lib/loaders.d.ts +5 -0
  6. package/lib/loaders.js +14 -9
  7. package/lib/model/ameli.d.ts +31 -143
  8. package/lib/model/ameli.js +102 -95
  9. package/lib/model/commission.d.ts +5 -0
  10. package/lib/model/commission.js +263 -0
  11. package/lib/model/debats.d.ts +13 -51
  12. package/lib/model/documents.d.ts +2 -0
  13. package/lib/model/documents.js +37 -0
  14. package/lib/model/dosleg.d.ts +9 -104
  15. package/lib/model/dosleg.js +76 -108
  16. package/lib/model/index.d.ts +4 -2
  17. package/lib/model/index.js +4 -2
  18. package/lib/model/questions.d.ts +10 -458
  19. package/lib/model/scrutins.d.ts +3 -0
  20. package/lib/model/scrutins.js +74 -0
  21. package/lib/model/{compte_rendu.js → seance.js} +47 -28
  22. package/lib/model/sens.d.ts +28 -1002
  23. package/lib/model/sens.js +65 -33
  24. package/lib/model/util.d.ts +1 -0
  25. package/lib/model/util.js +19 -1
  26. package/lib/raw_types/ameli.d.ts +778 -1521
  27. package/lib/raw_types/ameli.js +5 -345
  28. package/lib/raw_types/debats.d.ts +163 -306
  29. package/lib/raw_types/debats.js +5 -84
  30. package/lib/raw_types/dosleg.d.ts +1349 -2293
  31. package/lib/raw_types/dosleg.js +5 -550
  32. package/lib/raw_types/questions.d.ts +374 -519
  33. package/lib/raw_types/questions.js +5 -84
  34. package/lib/raw_types/senat.d.ts +11389 -0
  35. package/lib/raw_types/senat.js +5 -0
  36. package/lib/raw_types/sens.d.ts +6729 -12571
  37. package/lib/raw_types/sens.js +5 -2944
  38. package/lib/raw_types_schemats/ameli.d.ts +2 -2
  39. package/lib/raw_types_schemats/debats.d.ts +2 -2
  40. package/lib/raw_types_schemats/dosleg.d.ts +2 -2
  41. package/lib/raw_types_schemats/questions.d.ts +2 -2
  42. package/lib/raw_types_schemats/sens.d.ts +2 -2
  43. package/lib/scripts/convert_data.js +37 -31
  44. package/lib/scripts/retrieve_cr_commission.d.ts +1 -0
  45. package/lib/scripts/retrieve_cr_commission.js +291 -0
  46. package/lib/scripts/{retrieve_comptes_rendus.js → retrieve_cr_seance.js} +1 -1
  47. package/lib/scripts/retrieve_open_data.js +35 -1
  48. package/lib/utils/cr_spliting.d.ts +22 -1
  49. package/lib/utils/cr_spliting.js +273 -12
  50. package/lib/utils/reunion_grouping.d.ts +3 -0
  51. package/lib/utils/reunion_grouping.js +1 -1
  52. package/package.json +12 -11
  53. /package/lib/model/{compte_rendu.d.ts → seance.d.ts} +0 -0
  54. /package/lib/scripts/{retrieve_comptes_rendus.d.ts → retrieve_cr_seance.d.ts} +0 -0
@@ -1,7 +1,13 @@
1
+ import path from "path";
2
+ import * as cheerio from "cheerio";
3
+ import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
4
+ import fs from "fs-extra";
5
+ import { makeTypeGroupUid } from "./reunion_grouping";
6
+ import { sessionStartYearFromDate } from "../model/seance";
1
7
  export function computeIntervalsBySlot($, idx, firstSlotOfDay) {
2
8
  const all = $("body *").toArray();
3
9
  const cuts = [{ pos: 0, hhmm: undefined }];
4
- $('a[name]').each((_, a) => {
10
+ $("a[name]").each((_, a) => {
5
11
  const name = (a.attribs?.["name"] || "").trim();
6
12
  if (!/^su/i.test(name))
7
13
  return;
@@ -30,7 +36,7 @@ export function computeIntervalsBySlot($, idx, firstSlotOfDay) {
30
36
  continue;
31
37
  // i=0 initialSlot
32
38
  // i>0 : if current cut has SU -> slotOfHHMM, otherwise lastSlot
33
- const slot = i === 0 ? initialSlot : (cuts[i].hhmm ? slotOfHHMM(cuts[i].hhmm) : lastSlot);
39
+ const slot = i === 0 ? initialSlot : cuts[i].hhmm ? slotOfHHMM(cuts[i].hhmm) : lastSlot;
34
40
  intervals.push({ slot, start, end });
35
41
  lastSlot = slot;
36
42
  }
@@ -70,7 +76,11 @@ function extractOpeningHHMM($) {
70
76
  }
71
77
  // Convert "quinze heures trente", "15 heures 30", "dix-sept heures moins le quart", etc. en "HHMM"
72
78
  function parseFrenchClockToHHMM(input) {
73
- const s = (input || "").toLowerCase().normalize("NFKD").replace(/[\u0300-\u036f]/g, "").trim();
79
+ const s = (input || "")
80
+ .toLowerCase()
81
+ .normalize("NFKD")
82
+ .replace(/[\u0300-\u036f]/g, "")
83
+ .trim();
74
84
  if (!s)
75
85
  return undefined;
76
86
  const digitMatch = s.match(/(\d{1,2})\s*heures?(?:\s*(\d{1,2}))?/);
@@ -80,12 +90,41 @@ function parseFrenchClockToHHMM(input) {
80
90
  return `${String(h).padStart(2, "0")}${String(m).padStart(2, "0")}`;
81
91
  }
82
92
  const NUM = new Map([
83
- ["zero", 0], ["une", 1], ["un", 1], ["deux", 2], ["trois", 3], ["quatre", 4], ["cinq", 5], ["six", 6],
84
- ["sept", 7], ["huit", 8], ["neuf", 9], ["dix", 10], ["onze", 11], ["douze", 12], ["treize", 13],
85
- ["quatorze", 14], ["quinze", 15], ["seize", 16], ["dix-sept", 17], ["dix sept", 17], ["dix-huit", 18],
86
- ["dix huit", 18], ["dix-neuf", 19], ["dix neuf", 19], ["vingt", 20], ["vingt et une", 21],
87
- ["vingt-et-une", 21], ["vingt et un", 21], ["vingt-et-un", 21], ["vingt-deux", 22], ["vingt deux", 22],
88
- ["vingt-trois", 23], ["vingt trois", 23], ["vingt-quatre", 24], ["vingt quatre", 24],
93
+ ["zero", 0],
94
+ ["une", 1],
95
+ ["un", 1],
96
+ ["deux", 2],
97
+ ["trois", 3],
98
+ ["quatre", 4],
99
+ ["cinq", 5],
100
+ ["six", 6],
101
+ ["sept", 7],
102
+ ["huit", 8],
103
+ ["neuf", 9],
104
+ ["dix", 10],
105
+ ["onze", 11],
106
+ ["douze", 12],
107
+ ["treize", 13],
108
+ ["quatorze", 14],
109
+ ["quinze", 15],
110
+ ["seize", 16],
111
+ ["dix-sept", 17],
112
+ ["dix sept", 17],
113
+ ["dix-huit", 18],
114
+ ["dix huit", 18],
115
+ ["dix-neuf", 19],
116
+ ["dix neuf", 19],
117
+ ["vingt", 20],
118
+ ["vingt et une", 21],
119
+ ["vingt-et-une", 21],
120
+ ["vingt et un", 21],
121
+ ["vingt-et-un", 21],
122
+ ["vingt-deux", 22],
123
+ ["vingt deux", 22],
124
+ ["vingt-trois", 23],
125
+ ["vingt trois", 23],
126
+ ["vingt-quatre", 24],
127
+ ["vingt quatre", 24],
89
128
  ]);
90
129
  const hourWordMatch = s.match(/([a-z\- ]+?)\s*heures?/);
91
130
  if (!hourWordMatch)
@@ -109,9 +148,21 @@ function parseFrenchClockToHHMM(input) {
109
148
  }
110
149
  else {
111
150
  const MIN = new Map([
112
- ["cinq", 5], ["dix", 10], ["quinze", 15], ["vingt", 20], ["vingt-cinq", 25], ["vingt cinq", 25],
113
- ["trente", 30], ["trente-cinq", 35], ["trente cinq", 35], ["quarante", 40], ["quarante-cinq", 45],
114
- ["quarante cinq", 45], ["cinquante", 50], ["cinquante-cinq", 55], ["cinquante cinq", 55],
151
+ ["cinq", 5],
152
+ ["dix", 10],
153
+ ["quinze", 15],
154
+ ["vingt", 20],
155
+ ["vingt-cinq", 25],
156
+ ["vingt cinq", 25],
157
+ ["trente", 30],
158
+ ["trente-cinq", 35],
159
+ ["trente cinq", 35],
160
+ ["quarante", 40],
161
+ ["quarante-cinq", 45],
162
+ ["quarante cinq", 45],
163
+ ["cinquante", 50],
164
+ ["cinquante-cinq", 55],
165
+ ["cinquante cinq", 55],
115
166
  ]);
116
167
  const minWordMatch = s.match(/heures?\s+([a-z\- ]+?)(?:[).,;]|$)/);
117
168
  if (minWordMatch) {
@@ -123,3 +174,213 @@ function parseFrenchClockToHHMM(input) {
123
174
  }
124
175
  return `${String(hour).padStart(2, "0")}${String(minutes).padStart(2, "0")}`;
125
176
  }
177
+ // Helpers locaux (autonomes)
178
+ function frDateToISO(s) {
179
+ if (!s)
180
+ return;
181
+ const months = {
182
+ janvier: 1,
183
+ février: 2,
184
+ fevrier: 2,
185
+ mars: 3,
186
+ avril: 4,
187
+ mai: 5,
188
+ juin: 6,
189
+ juillet: 7,
190
+ août: 8,
191
+ aout: 8,
192
+ septembre: 9,
193
+ octobre: 10,
194
+ novembre: 11,
195
+ décembre: 12,
196
+ decembre: 12,
197
+ };
198
+ const m = s
199
+ .trim()
200
+ .replace(/\u00A0/g, " ")
201
+ .replace(/ +/g, " ")
202
+ .match(/^(\d{1,2})\s+([a-zéèêîïôûùç]+)\s+(\d{4})$/i);
203
+ if (!m)
204
+ return;
205
+ const d = String(parseInt(m[1], 10)).padStart(2, "0");
206
+ const mon = months[m[2].toLowerCase()];
207
+ if (!mon)
208
+ return;
209
+ const y = m[3];
210
+ return `${y}-${String(mon).padStart(2, "0")}-${d}`;
211
+ }
212
+ function extractWeekStartFromHead($) {
213
+ const og = $('meta[property="og:title"]').attr("content") || $("title").text();
214
+ const m = (og ?? "").toLowerCase().match(/semaine du\s+(\d{1,2}\s+\w+\s+\d{4})/i);
215
+ if (m)
216
+ return frDateToISO(m[1]);
217
+ return undefined;
218
+ }
219
+ function detectOrganeFromTitle(s) {
220
+ const t = (s ?? "").trim();
221
+ if (!t)
222
+ return { organeTitleRaw: undefined, organeDetected: undefined };
223
+ const lower = t.toLowerCase();
224
+ const m = lower.match(/commission(?:\s+des|\s+de|)\s+([^:]+)$/i);
225
+ let organeDetected;
226
+ if (m && m[1]) {
227
+ organeDetected = ("Commission " + m[1])
228
+ .replace(/\s+/g, " ")
229
+ .replace(/\s+:? comptes? rendus?$/i, "")
230
+ .trim();
231
+ organeDetected = organeDetected[0].toUpperCase() + organeDetected.slice(1);
232
+ }
233
+ return { organeTitleRaw: t, organeDetected };
234
+ }
235
+ function extractDaysAndOpenings($) {
236
+ const days = [];
237
+ const h2s = $("h2").toArray();
238
+ for (let i = 0; i < h2s.length; i++) {
239
+ const h = h2s[i];
240
+ const txt = $(h).text().trim();
241
+ const m = txt.match(/(?:Lundi|Mardi|Mercredi|Jeudi|Vendredi|Samedi|Dimanche)\s+(.+)$/i);
242
+ if (!m)
243
+ continue;
244
+ const iso = frDateToISO(m[1]);
245
+ if (!iso)
246
+ continue;
247
+ let openTime;
248
+ let cur = $(h).next();
249
+ while (cur.length && cur[0].tagName !== "h2") {
250
+ const t = cur.text().replace(/\s+/g, " ").trim();
251
+ const mt = t.match(/La réunion est ouverte à\s+(\d{1,2})(?:h(?:\s*(\d{2}))?)?/i);
252
+ if (mt) {
253
+ openTime = `${mt[1].padStart(2, "0")}:${(mt[2] ?? "00").padStart(2, "0")}`;
254
+ break;
255
+ }
256
+ cur = cur.next();
257
+ }
258
+ days.push({ date: iso, openTime, h2Index: i });
259
+ }
260
+ return days;
261
+ }
262
+ function extractOrganeCode($) {
263
+ const names = $("a[name]")
264
+ .toArray()
265
+ .map((a) => ($(a).attr("name") || "").trim());
266
+ return names.find((n) => /^[A-Z]{3,6}$/.test(n));
267
+ }
268
+ export function parseCommissionMetadataFromHtml(html, sourceFileName) {
269
+ const $ = cheerio.load(html);
270
+ const h1 = $("h1.page-title").first().text().trim() || undefined;
271
+ const headTitle = $('meta[property="og:title"]').attr("content") || $("title").text() || undefined;
272
+ const { organeTitleRaw, organeDetected } = detectOrganeFromTitle(h1 || headTitle);
273
+ let weekStart = extractWeekStartFromHead($);
274
+ const days = extractDaysAndOpenings($);
275
+ if (!weekStart && days.length > 0)
276
+ weekStart = days[0].date;
277
+ const organeCode = extractOrganeCode($);
278
+ return {
279
+ sourceFile: sourceFileName ?? null,
280
+ organeTitleRaw: organeTitleRaw ?? null,
281
+ organeDetected: organeDetected ?? null,
282
+ organeCode: organeCode ?? null,
283
+ weekStart: weekStart ?? null,
284
+ days, // [{date, openTime?, h2Index}]
285
+ };
286
+ }
287
+ function isGroupedReunion(o) {
288
+ return o && typeof o === "object" && typeof o.uid === "string" && typeof o.date === "string";
289
+ }
290
+ export async function loadCommissionAgendaForDate(dataDir, yyyymmdd, session) {
291
+ const baseDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session));
292
+ if (!(await fs.pathExists(baseDir)))
293
+ return [];
294
+ const files = (await fs.readdir(baseDir)).filter((f) => f.startsWith(`RUSN${yyyymmdd}IDC`) && f.toLowerCase().endsWith(".json"));
295
+ const out = [];
296
+ for (const f of files) {
297
+ const p = path.join(baseDir, f);
298
+ try {
299
+ const raw = await fs.readFile(p, "utf8");
300
+ const obj = JSON.parse(raw);
301
+ if (!isGroupedReunion(obj)) {
302
+ continue;
303
+ }
304
+ if (!obj.uid.startsWith(`RUSN${yyyymmdd}IDC`)) {
305
+ continue;
306
+ }
307
+ out.push(obj);
308
+ }
309
+ catch {
310
+ // ignore
311
+ }
312
+ }
313
+ return out;
314
+ }
315
+ function hourShortToStartTime(hourShort) {
316
+ if (!hourShort || hourShort === "NA")
317
+ return null;
318
+ if (!/^\d{4}$/.test(hourShort))
319
+ return null;
320
+ const hh = hourShort.slice(0, 2);
321
+ const mm = hourShort.slice(2, 4);
322
+ return `${hh}:${mm}`;
323
+ }
324
+ export async function createCommissionGroupIfMissing(dataDir, dateISO, // "YYYY-MM-DD"
325
+ organeDetected, // ex. "Commission des finances"
326
+ hourShort, // "HHMM" | "NA"
327
+ titreGuess) {
328
+ const uid = makeTypeGroupUid(dateISO, "COM", hourShort ?? "NA", organeDetected ?? undefined);
329
+ const session = sessionStartYearFromDate(new Date(dateISO));
330
+ const dir = path.join(dataDir, "agenda", "transformed", String(session));
331
+ await fs.ensureDir(dir);
332
+ const filePath = path.join(dir, `${uid}.json`);
333
+ let groups = [];
334
+ let created = false;
335
+ if (await fs.pathExists(filePath)) {
336
+ try {
337
+ const raw = await fs.readFile(filePath, "utf8");
338
+ groups = JSON.parse(raw);
339
+ if (!Array.isArray(groups))
340
+ groups = [];
341
+ }
342
+ catch {
343
+ groups = [];
344
+ }
345
+ const exists = groups.some((g) => g?.uid === uid);
346
+ if (!exists) {
347
+ groups.push({
348
+ uid,
349
+ chambre: "SN",
350
+ date: dateISO,
351
+ slot: null,
352
+ type: organeDetected ?? "Commission",
353
+ startTime: hourShortToStartTime(hourShort),
354
+ endTime: null,
355
+ captationVideo: false,
356
+ titre: titreGuess ?? null,
357
+ objet: null,
358
+ reunions: [],
359
+ compteRenduRefUid: null,
360
+ });
361
+ await fs.writeJSON(filePath, groups, { spaces: 2 });
362
+ created = true;
363
+ }
364
+ }
365
+ else {
366
+ groups = [
367
+ {
368
+ uid,
369
+ chambre: "SN",
370
+ date: dateISO,
371
+ slot: null,
372
+ type: organeDetected ?? "Commission",
373
+ startTime: hourShortToStartTime(hourShort),
374
+ endTime: null,
375
+ captationVideo: false,
376
+ titre: titreGuess ?? null,
377
+ objet: null,
378
+ reunions: [],
379
+ compteRenduRefUid: null,
380
+ },
381
+ ];
382
+ await fs.writeJSON(filePath, groups, { spaces: 2 });
383
+ created = true;
384
+ }
385
+ return { uid, filePath, created };
386
+ }
@@ -1,6 +1,9 @@
1
1
  import { AgendaEvent, GroupedReunion, TimeSlot } from "../types/agenda";
2
+ type KnownType = "SP" | "COM" | "MC" | "OD" | "ID";
2
3
  export declare function groupNonSPByTypeOrganeHour(events: AgendaEvent[]): Record<"IDC" | "IDM" | "IDO" | "IDI", GroupedReunion[]>;
3
4
  export declare function groupSeancePubliqueBySlot(events: AgendaEvent[]): GroupedReunion[];
5
+ export declare function makeTypeGroupUid(dateISO: string, kind: KnownType, hourShort: string | null, organe?: string | null): string;
4
6
  export declare function makeGroupUid(date: string, slot: TimeSlot): string;
5
7
  export declare function formatYYYYMMDD(dateYYYYMMDD: string): string;
6
8
  export declare function makeReunionUid(agenda: AgendaEvent): string;
9
+ export {};
@@ -243,7 +243,7 @@ function organeInitials(input, maxLen = 8) {
243
243
  const out = letters.join("");
244
244
  return out.slice(0, maxLen);
245
245
  }
246
- function makeTypeGroupUid(dateISO, kind, hourShort, organe) {
246
+ export function makeTypeGroupUid(dateISO, kind, hourShort, organe) {
247
247
  const ymd = dateISO ? formatYYYYMMDD(dateISO) : "00000000";
248
248
  const suffix = typeToSuffixStrict(kind);
249
249
  const hh = hourShort ?? "NA";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.10.5",
3
+ "version": "2.11.1",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",
@@ -46,7 +46,8 @@
46
46
  "data:download": "tsx src/scripts/data-download.ts",
47
47
  "data:generate_schemas": "tsx src/scripts/retrieve_open_data.ts --schema",
48
48
  "data:retrieve_agenda": "cross-env TZ='Etc/UTC' tsx src/scripts/retrieve_agenda.ts",
49
- "data:retrieve_comptes_rendus": "tsx src/scripts/retrieve_comptes_rendus.ts",
49
+ "data:retrieve_cr_seance": "tsx src/scripts/retrieve_cr_seance.ts",
50
+ "data:retrieve_cr_commission": "tsx src/scripts/retrieve_cr_commission.ts",
50
51
  "data:retrieve_documents": "tsx src/scripts/retrieve_documents.ts",
51
52
  "data:retrieve_open_data": "tsx src/scripts/retrieve_open_data.ts --all",
52
53
  "data:retrieve_senateurs_photos": "tsx src/scripts/retrieve_senateurs_photos.ts --fetch",
@@ -66,12 +67,12 @@
66
67
  "fs-extra": "^9.1.0",
67
68
  "jsdom": "^26.0.0",
68
69
  "kysely": "^0.27.4",
69
- "luxon": "^3.5.0",
70
+ "luxon": "^3.7.2",
70
71
  "node-stream-zip": "^1.8.2",
71
72
  "pg": "^8.13.1",
72
73
  "pg-cursor": "^2.12.1",
73
74
  "slug": "^11.0.0",
74
- "tsx": "^4.19.4",
75
+ "tsx": "^4.20.6",
75
76
  "windows-1252": "^1.0.0"
76
77
  },
77
78
  "devDependencies": {
@@ -80,19 +81,19 @@
80
81
  "@types/command-line-args": "^5.0.0",
81
82
  "@types/fs-extra": "^9.0.7",
82
83
  "@types/jsdom": "^21.1.7",
83
- "@types/luxon": "^3.4.2",
84
+ "@types/luxon": "^3.7.1",
84
85
  "@types/node": "^20.17.6",
85
- "@types/pg": "^8.11.10",
86
+ "@types/pg": "^8.15.5",
86
87
  "@types/pg-cursor": "^2.7.2",
87
88
  "@types/slug": "^5.0.9",
88
- "@typescript-eslint/eslint-plugin": "^8.13.0",
89
- "@typescript-eslint/parser": "^8.13.0",
90
- "cross-env": "^10.0.0",
89
+ "@typescript-eslint/eslint-plugin": "^8.46.0",
90
+ "@typescript-eslint/parser": "^8.46.0",
91
+ "cross-env": "^10.1.0",
91
92
  "eslint": "^8.57.1",
92
93
  "iconv-lite": "^0.7.0",
93
- "pg-to-ts": "^4.1.1",
94
+ "kysely-codegen": "^0.19.0",
94
95
  "prettier": "^3.5.3",
95
96
  "tslib": "^2.1.0",
96
- "typescript": "^5.8.3"
97
+ "typescript": "^5.9.3"
97
98
  }
98
99
  }
File without changes