@tricoteuses/senat 2.14.8 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/model/commission.js +1 -31
- package/lib/scripts/retrieve_agenda.js +1 -1
- package/lib/scripts/retrieve_cr_commission.js +131 -83
- package/lib/utils/cr_spliting.d.ts +3 -3
- package/lib/utils/cr_spliting.js +37 -28
- package/lib/utils/reunion_grouping.js +4 -9
- package/package.json +1 -1
- package/lib/model/compte_rendu.d.ts +0 -9
- package/lib/model/compte_rendu.js +0 -325
- package/lib/raw_types/db.d.ts +0 -11389
- package/lib/raw_types/db.js +0 -5
- package/lib/scripts/retrieve_comptes_rendus.d.ts +0 -6
- package/lib/scripts/retrieve_comptes_rendus.js +0 -274
package/lib/model/commission.js
CHANGED
|
@@ -3,7 +3,7 @@ import path from "path";
|
|
|
3
3
|
import fs from "fs";
|
|
4
4
|
import { norm, toCRDate } from "./util";
|
|
5
5
|
import { makeTypeGroupUid } from "../utils/reunion_grouping";
|
|
6
|
-
import { hourShortToStartTime } from "../utils/cr_spliting";
|
|
6
|
+
import { frDateToISO, hourShortToStartTime } from "../utils/cr_spliting";
|
|
7
7
|
const PARA_h3_SEL = "p.sh_justify, p.sh_center, p.sh_marge, p[align], li, h3";
|
|
8
8
|
function findDayRoot($, targetISO) {
|
|
9
9
|
let $root = $();
|
|
@@ -162,36 +162,6 @@ function buildPointsFromParagraphs($, paras) {
|
|
|
162
162
|
flush();
|
|
163
163
|
return points;
|
|
164
164
|
}
|
|
165
|
-
function frDateToISO(s) {
|
|
166
|
-
if (!s)
|
|
167
|
-
return;
|
|
168
|
-
const months = {
|
|
169
|
-
janvier: 1,
|
|
170
|
-
fevrier: 2,
|
|
171
|
-
février: 2,
|
|
172
|
-
mars: 3,
|
|
173
|
-
avril: 4,
|
|
174
|
-
mai: 5,
|
|
175
|
-
juin: 6,
|
|
176
|
-
juillet: 7,
|
|
177
|
-
aout: 8,
|
|
178
|
-
août: 8,
|
|
179
|
-
septembre: 9,
|
|
180
|
-
octobre: 10,
|
|
181
|
-
novembre: 11,
|
|
182
|
-
decembre: 12,
|
|
183
|
-
décembre: 12,
|
|
184
|
-
};
|
|
185
|
-
const m = norm(s).match(/^(\d{1,2})\s+([A-Za-zéûôîà]+)\s+(\d{4})$/i);
|
|
186
|
-
if (!m)
|
|
187
|
-
return;
|
|
188
|
-
const d = Number(m[1]);
|
|
189
|
-
const mon = months[m[2].toLowerCase()];
|
|
190
|
-
const y = Number(m[3]);
|
|
191
|
-
if (!mon)
|
|
192
|
-
return;
|
|
193
|
-
return `${y}-${String(mon).padStart(2, "0")}-${String(d).padStart(2, "0")}`;
|
|
194
|
-
}
|
|
195
165
|
export function parseCommissionCRFromFile(htmlFilePath, best, fallback) {
|
|
196
166
|
try {
|
|
197
167
|
if (!best && !fallback) {
|
|
@@ -44,7 +44,7 @@ async function retrieveAgendas(dataDir, sessions) {
|
|
|
44
44
|
fs.ensureDirSync(transformedAgendaSessionDir);
|
|
45
45
|
}
|
|
46
46
|
const fifteenDaysFromNow = new Date();
|
|
47
|
-
fifteenDaysFromNow.setDate(fifteenDaysFromNow.getDate() + 15);
|
|
47
|
+
fifteenDaysFromNow.setDate(fifteenDaysFromNow.getDate() + 15); // Don't download agendas more than 15 days in the future
|
|
48
48
|
for (const date = new Date(session, 9, 1); date <= new Date(session + 1, 8, 30) && date <= fifteenDaysFromNow; date.setDate(date.getDate() + 1)) {
|
|
49
49
|
const agendaName = DateTime.fromJSDate(date).toFormat(EVENT_DATE_FORMAT);
|
|
50
50
|
const agendaFileName = DateTime.fromJSDate(date).toFormat(ID_DATE_FORMAT);
|
|
@@ -3,7 +3,7 @@ import assert from "assert";
|
|
|
3
3
|
import path from "path";
|
|
4
4
|
import * as cheerio from "cheerio";
|
|
5
5
|
import { COMMISSION_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
6
|
-
import {
|
|
6
|
+
import { loadAgendaForDate, parseCommissionMetadataFromHtml, linkCRtoCommissionGroup } from "../utils/cr_spliting";
|
|
7
7
|
import { parseCommissionCRFromFile } from "../model/commission";
|
|
8
8
|
import commandLineArgs from "command-line-args";
|
|
9
9
|
import { commonOptions } from "./shared/cli_helpers";
|
|
@@ -32,12 +32,12 @@ const COMMISSION_HUBS = {
|
|
|
32
32
|
"https://www.senat.fr/compte-rendu-commissions/affaires-etrangeres_archives.html",
|
|
33
33
|
],
|
|
34
34
|
"affaires-economiques": [
|
|
35
|
-
"https://www.senat.fr/compte-rendu-commissions/
|
|
36
|
-
"https://www.senat.fr/compte-rendu-commissions/
|
|
35
|
+
"https://www.senat.fr/compte-rendu-commissions/economie.html",
|
|
36
|
+
"https://www.senat.fr/compte-rendu-commissions/economie_archives.html",
|
|
37
37
|
],
|
|
38
38
|
"amenagement-developpement-durable": [
|
|
39
|
-
"https://www.senat.fr/compte-rendu-commissions/
|
|
40
|
-
"https://www.senat.fr/compte-rendu-commissions/
|
|
39
|
+
"https://www.senat.fr/compte-rendu-commissions/developpement-durable.html",
|
|
40
|
+
"https://www.senat.fr/compte-rendu-commissions/developpement-durable_archives.html",
|
|
41
41
|
],
|
|
42
42
|
culture: [
|
|
43
43
|
"https://www.senat.fr/compte-rendu-commissions/culture.html",
|
|
@@ -122,6 +122,54 @@ async function tryDownload(url) {
|
|
|
122
122
|
const ab = await res.arrayBuffer();
|
|
123
123
|
return Buffer.from(ab);
|
|
124
124
|
}
|
|
125
|
+
function normOrgane(s) {
|
|
126
|
+
return s
|
|
127
|
+
.toLowerCase()
|
|
128
|
+
.normalize("NFD")
|
|
129
|
+
.replace(/[\u0300-\u036f]/g, "")
|
|
130
|
+
.replace(/&/g, " et ")
|
|
131
|
+
.replace(/[^a-z0-9\s-]/g, " ")
|
|
132
|
+
.replace(/\s+/g, " ")
|
|
133
|
+
.trim();
|
|
134
|
+
}
|
|
135
|
+
function toTokens(s) {
|
|
136
|
+
return new Set(normOrgane(s)
|
|
137
|
+
.split(/\s+/)
|
|
138
|
+
.filter((t) => t.length >= 3 && !["commission", "des", "de", "du", "d", "la", "le", "les", "et"].includes(t)));
|
|
139
|
+
}
|
|
140
|
+
function jaccard(a, b) {
|
|
141
|
+
if (!a.size || !b.size)
|
|
142
|
+
return 0;
|
|
143
|
+
let inter = 0;
|
|
144
|
+
for (const t of a)
|
|
145
|
+
if (b.has(t))
|
|
146
|
+
inter++;
|
|
147
|
+
return inter / (a.size + b.size - inter);
|
|
148
|
+
}
|
|
149
|
+
function reunionOrganeCandidates(h) {
|
|
150
|
+
const any = h;
|
|
151
|
+
const out = [any.organeSlug, any.organeKey, any.organe, h.titre].filter(Boolean);
|
|
152
|
+
return Array.from(new Set(out.map(normOrgane)));
|
|
153
|
+
}
|
|
154
|
+
function organeSimilarity(h, commissionKey) {
|
|
155
|
+
const keyTokens = toTokens(commissionKey.replace(/-/g, " "));
|
|
156
|
+
const cand = reunionOrganeCandidates(h).map(toTokens);
|
|
157
|
+
let best = 0;
|
|
158
|
+
for (const B of cand)
|
|
159
|
+
best = Math.max(best, jaccard(keyTokens, B));
|
|
160
|
+
return best; // 0..1
|
|
161
|
+
}
|
|
162
|
+
function timeProximityScore(h, openHHMM, maxDeltaMin) {
|
|
163
|
+
if (!openHHMM)
|
|
164
|
+
return 0;
|
|
165
|
+
const hhmm = (h.startTime ?? null);
|
|
166
|
+
if (!hhmm)
|
|
167
|
+
return 0;
|
|
168
|
+
const d = Math.abs(timeToMinutes(hhmm) - timeToMinutes(openHHMM));
|
|
169
|
+
if (d > maxDeltaMin)
|
|
170
|
+
return 0;
|
|
171
|
+
return 1 - d / maxDeltaMin; // 0..1 (1 = même heure)
|
|
172
|
+
}
|
|
125
173
|
async function retrieveCommissionCRs(options = {}) {
|
|
126
174
|
const dataDir = options["dataDir"];
|
|
127
175
|
const fromSession = Number(options["fromSession"]);
|
|
@@ -132,14 +180,14 @@ async function retrieveCommissionCRs(options = {}) {
|
|
|
132
180
|
ensureAndClearDir(originalRoot);
|
|
133
181
|
const discovered = await discoverCommissionWeeklyPages(fromSession);
|
|
134
182
|
console.log(`[COM-CR][discover] ${discovered.length} links (>= session ${fromSession})`);
|
|
135
|
-
const jobs = discovered.map(({ url, yyyymmdd }) => {
|
|
183
|
+
const jobs = discovered.map(({ url, yyyymmdd, commissionKey }) => {
|
|
136
184
|
const d = new Date(Number(yyyymmdd.slice(0, 4)), Number(yyyymmdd.slice(4, 6)) - 1, Number(yyyymmdd.slice(6, 8)));
|
|
137
185
|
const session = sessionStartYearFromDate(d);
|
|
138
|
-
const dir = path.join(originalRoot, String(session));
|
|
186
|
+
const dir = path.join(originalRoot, String(session), commissionKey);
|
|
139
187
|
fs.ensureDirSync(dir);
|
|
140
188
|
const slug = url.replace(/^.*\/(\d{8})\/([^\/]+)\.html$/i, "$2");
|
|
141
189
|
const outPath = path.join(dir, `${yyyymmdd}.${slug}.html`);
|
|
142
|
-
return { url, outPath, yyyymmdd };
|
|
190
|
+
return { url, outPath, yyyymmdd, commissionKey };
|
|
143
191
|
});
|
|
144
192
|
console.log(`[COM-CR] Downloading ${jobs.length} links → ${path.relative(process.cwd(), originalRoot)}`);
|
|
145
193
|
let completed = 0, saved = 0, skipped = 0, notFound = 0;
|
|
@@ -188,100 +236,100 @@ async function retrieveCommissionCRs(options = {}) {
|
|
|
188
236
|
fs.ensureDirSync(transformedSessionDir);
|
|
189
237
|
if (!(await fs.pathExists(originalSessionDir)))
|
|
190
238
|
continue;
|
|
191
|
-
const
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
.replace(/[’']/g, "'")
|
|
206
|
-
.split(/\W+/)
|
|
207
|
-
.filter((x) => x.length >= 3 && !["commission", "des", "de", "du", "d", "la", "le", "les", "et"].includes(x));
|
|
208
|
-
const MAX_TIME_DELTA_MIN = 120;
|
|
209
|
-
for (let i = 0; i < meta.days.length; i++) {
|
|
210
|
-
const day = meta.days[i];
|
|
211
|
-
const yyyymmdd = day.date.replace(/-/g, "");
|
|
212
|
-
const dt = new Date(Number(day.date.slice(0, 4)), Number(day.date.slice(5, 7)) - 1, Number(day.date.slice(8, 10)));
|
|
213
|
-
const daySession = sessionStartYearFromDate(dt);
|
|
214
|
-
const hits = await loadCommissionAgendaForDate(dataDir, yyyymmdd, daySession);
|
|
215
|
-
let best = null;
|
|
216
|
-
let reason = "fallback-none";
|
|
217
|
-
let deltaMin;
|
|
218
|
-
// a) score by title and organe keywords
|
|
219
|
-
if (organeKeywords.length && hits.length) {
|
|
220
|
-
const scored = hits
|
|
221
|
-
.map((h) => {
|
|
222
|
-
const t = (h.titre ?? "").toLowerCase();
|
|
223
|
-
const s = organeKeywords.reduce((acc, kw) => acc + (t.includes(kw) ? 1 : 0), 0);
|
|
224
|
-
return { h, s };
|
|
225
|
-
})
|
|
226
|
-
.sort((a, b) => b.s - a.s);
|
|
227
|
-
if (scored[0]?.s > 0) {
|
|
228
|
-
best = scored[0].h;
|
|
229
|
-
reason = "title";
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
// b) otherwise score by time proximity
|
|
233
|
-
if (!best && day.openTime && hits.length) {
|
|
234
|
-
const candidates = hits
|
|
235
|
-
.map((h) => ({ h, hhmm: h.startTime ?? null }))
|
|
236
|
-
.filter((x) => !!x.hhmm)
|
|
237
|
-
.map((x) => ({
|
|
238
|
-
h: x.h,
|
|
239
|
-
d: Math.abs(timeToMinutes(x.hhmm) - timeToMinutes(day.openTime)),
|
|
240
|
-
}))
|
|
241
|
-
.sort((a, b) => a.d - b.d);
|
|
242
|
-
if (candidates[0] && candidates[0].d <= MAX_TIME_DELTA_MIN) {
|
|
243
|
-
best = candidates[0].h;
|
|
244
|
-
reason = "time";
|
|
245
|
-
deltaMin = candidates[0].d;
|
|
246
|
-
}
|
|
239
|
+
const commissionDirs = (await fs.readdir(originalSessionDir, { withFileTypes: true }))
|
|
240
|
+
.filter((d) => d.isDirectory())
|
|
241
|
+
.map((d) => d.name); // ex: "affaires-etrangeres", "finances", etc.
|
|
242
|
+
for (const commissionKey of commissionDirs) {
|
|
243
|
+
const commissionDir = path.join(originalSessionDir, commissionKey);
|
|
244
|
+
const htmlFiles = (await fs.readdir(commissionDir)).filter((f) => /\.html?$/i.test(f)).sort();
|
|
245
|
+
let totalFiles = 0;
|
|
246
|
+
let linkedFiles = 0;
|
|
247
|
+
for (const f of htmlFiles) {
|
|
248
|
+
const htmlPath = path.join(commissionDir, f);
|
|
249
|
+
let meta;
|
|
250
|
+
try {
|
|
251
|
+
const raw = await fs.readFile(htmlPath, "utf8");
|
|
252
|
+
meta = parseCommissionMetadataFromHtml(raw, f);
|
|
247
253
|
}
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
dateISO: day.date,
|
|
252
|
-
hourShort,
|
|
253
|
-
organe: meta.organeDetected ?? null,
|
|
254
|
-
});
|
|
255
|
-
if (!cr) {
|
|
256
|
-
console.warn(`[COM-CR][TRANSFORM] parse failed for ${f} → ${best ? best.uid : "NO-GROUP"}`);
|
|
254
|
+
catch (e) {
|
|
255
|
+
console.warn(`[COM-CR][PRE][${session}] Cannot read/parse ${f}:`, e);
|
|
256
|
+
continue;
|
|
257
257
|
}
|
|
258
|
-
|
|
258
|
+
const organeLabel = meta?.organeTitleRaw;
|
|
259
|
+
for (const day of meta.days ?? []) {
|
|
260
|
+
const yyyymmdd = day.date.replace(/-/g, "");
|
|
261
|
+
const dt = new Date(Number(day.date.slice(0, 4)), Number(day.date.slice(5, 7)) - 1, Number(day.date.slice(8, 10)));
|
|
262
|
+
const daySession = sessionStartYearFromDate(dt);
|
|
263
|
+
let hits = await loadAgendaForDate(dataDir, yyyymmdd, daySession);
|
|
264
|
+
let best = null;
|
|
265
|
+
let reason = "fallback-none";
|
|
266
|
+
let deltaMin;
|
|
267
|
+
// gate + scoring combined
|
|
268
|
+
const MAX_TIME_DELTA_MIN = 120;
|
|
269
|
+
const ORGANE_GATE = 0.55; // minimum similarity organe to be considered
|
|
270
|
+
const W_ORG = 0.7, W_TIM = 0.3;
|
|
271
|
+
if (hits.length) {
|
|
272
|
+
// 1) Gate organe : only keep those above gate, then score with combined organe+time
|
|
273
|
+
const gated = hits
|
|
274
|
+
.map((h) => {
|
|
275
|
+
const sOrg = organeSimilarity(h, commissionKey); // 0..1
|
|
276
|
+
const sTim = timeProximityScore(h, day.openTime ?? null, MAX_TIME_DELTA_MIN); // 0..1
|
|
277
|
+
const total = W_ORG * sOrg + +W_TIM * sTim;
|
|
278
|
+
return { h, sOrg, sTim, total };
|
|
279
|
+
})
|
|
280
|
+
.filter((x) => x.sOrg >= ORGANE_GATE)
|
|
281
|
+
.sort((a, b) => b.total - a.total);
|
|
282
|
+
if (gated[0]) {
|
|
283
|
+
best = gated[0].h;
|
|
284
|
+
reason = gated[0].sOrg >= ORGANE_GATE ? "organe" : "fallback-none";
|
|
285
|
+
if (day.openTime && best?.startTime) {
|
|
286
|
+
deltaMin = Math.abs(timeToMinutes(best.startTime) - timeToMinutes(day.openTime));
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
else {
|
|
290
|
+
best = null;
|
|
291
|
+
reason = "fallback-none";
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
// Parse CR
|
|
295
|
+
const hourShort = toHourShort(day.openTime) ?? "NA";
|
|
296
|
+
const cr = parseCommissionCRFromFile(htmlPath, best ?? undefined, {
|
|
297
|
+
dateISO: day.date,
|
|
298
|
+
hourShort,
|
|
299
|
+
organe: commissionKey,
|
|
300
|
+
});
|
|
301
|
+
if (!cr) {
|
|
302
|
+
console.warn(`[COM-CR][TRANSFORM] parse failed for ${f} → ${best ? best.uid : "NO-GROUP"} (${commissionKey})`);
|
|
303
|
+
continue;
|
|
304
|
+
}
|
|
259
305
|
const fileUid = cr.uid;
|
|
260
306
|
const outPath = path.join(transformedSessionDir, `${fileUid}.json`);
|
|
261
307
|
await fs.writeJSON(outPath, cr, { spaces: 2 });
|
|
262
308
|
const npts = Array.isArray(cr.contenu.point) ? cr.contenu.point.length : cr.contenu.point ? 1 : 0;
|
|
263
309
|
if (!options["silent"]) {
|
|
264
|
-
console.log(`[COM-CR][TRANSFORM] saved ${path.basename(outPath)} (points=${npts})`);
|
|
310
|
+
console.log(`[COM-CR][TRANSFORM] saved ${path.basename(outPath)} (points=${npts}) [${commissionKey}]`);
|
|
265
311
|
}
|
|
266
|
-
const
|
|
267
|
-
const titreGuess = meta.organeDetected || meta.organeTitleRaw || "Commission";
|
|
268
|
-
// Si on a un match agenda, on force le groupUid existant (best.uid)
|
|
312
|
+
const titreGuess = organeLabel || "Commission";
|
|
269
313
|
const up = await linkCRtoCommissionGroup({
|
|
270
314
|
dataDir,
|
|
271
|
-
session: session,
|
|
272
315
|
dateISO: day.date,
|
|
273
|
-
organeDetected:
|
|
316
|
+
organeDetected: best?.organe ?? null,
|
|
274
317
|
hourShort,
|
|
275
318
|
crUid: fileUid,
|
|
276
319
|
titreGuess,
|
|
277
320
|
groupUid: best ? best.uid : undefined,
|
|
278
321
|
});
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
322
|
+
totalFiles++;
|
|
323
|
+
if (up.created || up.updated)
|
|
324
|
+
linkedFiles++;
|
|
325
|
+
else {
|
|
326
|
+
console.warn(`[COM-CR][AGENDA][WARN] CR ${fileUid} not linked to any agenda group (reason=${reason}, delta=${deltaMin ?? "NA"}m)`);
|
|
282
327
|
}
|
|
283
328
|
}
|
|
284
329
|
}
|
|
330
|
+
if (!options["silent"]) {
|
|
331
|
+
console.log(`[COM-CR][SESSION ${session}][${commissionKey}] Processed ${totalFiles} CR files, linked to agenda: ${linkedFiles}`);
|
|
332
|
+
}
|
|
285
333
|
}
|
|
286
334
|
}
|
|
287
335
|
}
|
|
@@ -5,6 +5,7 @@ export declare function computeIntervalsBySlot($: cheerio.CheerioAPI, idx: Map<a
|
|
|
5
5
|
start: number;
|
|
6
6
|
end: number;
|
|
7
7
|
}[];
|
|
8
|
+
export declare function frDateToISO(s?: string): string | undefined;
|
|
8
9
|
export declare function parseCommissionMetadataFromHtml(html: string, sourceFileName?: string): {
|
|
9
10
|
sourceFile: string | null;
|
|
10
11
|
organeTitleRaw: string | null;
|
|
@@ -17,17 +18,16 @@ export declare function parseCommissionMetadataFromHtml(html: string, sourceFile
|
|
|
17
18
|
h2Index: number;
|
|
18
19
|
}[];
|
|
19
20
|
};
|
|
20
|
-
export declare function
|
|
21
|
+
export declare function loadAgendaForDate(dataDir: string, yyyymmdd: string, session: number): Promise<GroupedReunion[]>;
|
|
21
22
|
export declare function hourShortToStartTime(hourShort: string | null): string | null;
|
|
22
23
|
export declare function linkCRtoCommissionGroup(opts: {
|
|
23
24
|
dataDir: string;
|
|
24
|
-
session: number;
|
|
25
25
|
dateISO: string;
|
|
26
26
|
organeDetected: string | null;
|
|
27
27
|
hourShort: string | null;
|
|
28
28
|
crUid: string;
|
|
29
29
|
titreGuess?: string | null;
|
|
30
|
-
groupUid?: string;
|
|
30
|
+
groupUid?: string | null;
|
|
31
31
|
}): Promise<{
|
|
32
32
|
uid: string;
|
|
33
33
|
filePath: string;
|
package/lib/utils/cr_spliting.js
CHANGED
|
@@ -2,6 +2,7 @@ import path from "path";
|
|
|
2
2
|
import * as cheerio from "cheerio";
|
|
3
3
|
import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
4
4
|
import fs from "fs-extra";
|
|
5
|
+
import { sessionStartYearFromDate } from "../model/seance";
|
|
5
6
|
export function computeIntervalsBySlot($, idx, firstSlotOfDay) {
|
|
6
7
|
const all = $("body *").toArray();
|
|
7
8
|
const cuts = [{ pos: 0, hhmm: undefined }];
|
|
@@ -172,8 +173,7 @@ function parseFrenchClockToHHMM(input) {
|
|
|
172
173
|
}
|
|
173
174
|
return `${String(hour).padStart(2, "0")}${String(minutes).padStart(2, "0")}`;
|
|
174
175
|
}
|
|
175
|
-
|
|
176
|
-
function frDateToISO(s) {
|
|
176
|
+
export function frDateToISO(s) {
|
|
177
177
|
if (!s)
|
|
178
178
|
return;
|
|
179
179
|
const months = {
|
|
@@ -193,11 +193,11 @@ function frDateToISO(s) {
|
|
|
193
193
|
décembre: 12,
|
|
194
194
|
decembre: 12,
|
|
195
195
|
};
|
|
196
|
-
const
|
|
196
|
+
const cleaned = s
|
|
197
197
|
.trim()
|
|
198
198
|
.replace(/\u00A0/g, " ")
|
|
199
|
-
.replace(/ +/g, " ")
|
|
200
|
-
|
|
199
|
+
.replace(/ +/g, " ");
|
|
200
|
+
const m = cleaned.match(/^(\d{1,2})(?:er)?\s+([a-zéèêîïôûùç]+)\s+(\d{4})$/i);
|
|
201
201
|
if (!m)
|
|
202
202
|
return;
|
|
203
203
|
const d = String(parseInt(m[1], 10)).padStart(2, "0");
|
|
@@ -230,13 +230,19 @@ function detectOrganeFromTitle(s) {
|
|
|
230
230
|
}
|
|
231
231
|
return { organeTitleRaw: t, organeDetected };
|
|
232
232
|
}
|
|
233
|
+
function normalizeSpaces(s) {
|
|
234
|
+
return s
|
|
235
|
+
.replace(/\u00A0/g, " ")
|
|
236
|
+
.replace(/\s+/g, " ")
|
|
237
|
+
.trim();
|
|
238
|
+
}
|
|
233
239
|
function extractDaysAndOpenings($) {
|
|
234
240
|
const days = [];
|
|
235
241
|
const h2s = $("h2").toArray();
|
|
236
242
|
for (let i = 0; i < h2s.length; i++) {
|
|
237
243
|
const h = h2s[i];
|
|
238
|
-
const txt = $(h).text()
|
|
239
|
-
const m = txt.match(
|
|
244
|
+
const txt = normalizeSpaces($(h).text());
|
|
245
|
+
const m = txt.match(/^(?:Lundi|Mardi|Mercredi|Jeudi|Vendredi|Samedi|Dimanche)\s+(.+?)$/i);
|
|
240
246
|
if (!m)
|
|
241
247
|
continue;
|
|
242
248
|
const iso = frDateToISO(m[1]);
|
|
@@ -245,8 +251,8 @@ function extractDaysAndOpenings($) {
|
|
|
245
251
|
let openTime;
|
|
246
252
|
let cur = $(h).next();
|
|
247
253
|
while (cur.length && cur[0].tagName !== "h2") {
|
|
248
|
-
const t = cur.text()
|
|
249
|
-
const mt = t.match(/La réunion est ouverte à\s+(\d{1,2})
|
|
254
|
+
const t = normalizeSpaces(cur.text());
|
|
255
|
+
const mt = t.match(/La réunion est ouverte à\s+(\d{1,2})\s*h(?:\s*(\d{2}))?/i);
|
|
250
256
|
if (mt) {
|
|
251
257
|
openTime = `${mt[1].padStart(2, "0")}:${(mt[2] ?? "00").padStart(2, "0")}`;
|
|
252
258
|
break;
|
|
@@ -285,7 +291,7 @@ export function parseCommissionMetadataFromHtml(html, sourceFileName) {
|
|
|
285
291
|
function isGroupedReunion(o) {
|
|
286
292
|
return o && typeof o === "object" && typeof o.uid === "string" && typeof o.date === "string";
|
|
287
293
|
}
|
|
288
|
-
export async function
|
|
294
|
+
export async function loadAgendaForDate(dataDir, yyyymmdd, session) {
|
|
289
295
|
const baseDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session));
|
|
290
296
|
if (!(await fs.pathExists(baseDir)))
|
|
291
297
|
return [];
|
|
@@ -320,14 +326,16 @@ export function hourShortToStartTime(hourShort) {
|
|
|
320
326
|
return `${hh}:${mm}`;
|
|
321
327
|
}
|
|
322
328
|
export async function linkCRtoCommissionGroup(opts) {
|
|
323
|
-
const { dataDir,
|
|
329
|
+
const { dataDir, dateISO, organeDetected, hourShort, crUid, titreGuess, groupUid } = opts;
|
|
324
330
|
const computedUid = crUid.replace(/^CRC/, "RU");
|
|
325
|
-
const uid = groupUid ?? computedUid;
|
|
331
|
+
const uid = groupUid ?? computedUid;
|
|
332
|
+
const session = sessionStartYearFromDate(new Date(dateISO));
|
|
326
333
|
const groupedDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session));
|
|
327
334
|
await fs.ensureDir(groupedDir);
|
|
328
335
|
const filePath = path.join(groupedDir, `${uid}.json`);
|
|
329
336
|
let group = null;
|
|
330
337
|
let created = false;
|
|
338
|
+
let updated = false;
|
|
331
339
|
try {
|
|
332
340
|
if (await fs.pathExists(filePath)) {
|
|
333
341
|
group = await fs.readJSON(filePath);
|
|
@@ -337,24 +345,25 @@ export async function linkCRtoCommissionGroup(opts) {
|
|
|
337
345
|
console.warn(`[AGENDA][COM] Unreadable JSON → ${filePath} (${e?.message}) → will recreate`);
|
|
338
346
|
}
|
|
339
347
|
if (!group) {
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
348
|
+
group = {
|
|
349
|
+
uid,
|
|
350
|
+
chambre: "SN",
|
|
351
|
+
date: dateISO,
|
|
352
|
+
type: organeDetected ?? "Commissions",
|
|
353
|
+
startTime: hourShortToStartTime(hourShort),
|
|
354
|
+
endTime: null,
|
|
355
|
+
captationVideo: false,
|
|
356
|
+
titre: titreGuess ?? "",
|
|
357
|
+
objet: "",
|
|
358
|
+
events: [],
|
|
359
|
+
compteRenduRefUid: crUid,
|
|
360
|
+
};
|
|
361
|
+
created = true;
|
|
362
|
+
console.log(`[AGENDA][COM] Created new group uid=${uid} for CR uid=${crUid}`);
|
|
355
363
|
}
|
|
356
364
|
else {
|
|
357
365
|
group.compteRenduRefUid = crUid;
|
|
366
|
+
updated = true;
|
|
358
367
|
}
|
|
359
368
|
// Lien CR
|
|
360
369
|
// Enrichir depuis CR si vide
|
|
@@ -367,5 +376,5 @@ export async function linkCRtoCommissionGroup(opts) {
|
|
|
367
376
|
// group.titre = titreGuess;
|
|
368
377
|
// }
|
|
369
378
|
await fs.writeJSON(filePath, group, { spaces: 2 });
|
|
370
|
-
return { uid, filePath, created, updated
|
|
379
|
+
return { uid, filePath, created, updated };
|
|
371
380
|
}
|
|
@@ -262,15 +262,10 @@ function organeInitials(input, maxLen = 8) {
|
|
|
262
262
|
continue;
|
|
263
263
|
if (STOPWORDS.has(w))
|
|
264
264
|
continue;
|
|
265
|
-
//
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
}
|
|
270
|
-
// otherwise, take first letter if alphanumeric
|
|
271
|
-
const ch = raw[0];
|
|
272
|
-
if (/[A-Za-z0-9]/.test(ch))
|
|
273
|
-
letters.push(ch.toUpperCase());
|
|
265
|
+
// Take two first letter if alphanumeric
|
|
266
|
+
const two = raw.slice(0, 2);
|
|
267
|
+
if (/[A-Za-z0-9]/.test(two))
|
|
268
|
+
letters.push(two.toUpperCase());
|
|
274
269
|
}
|
|
275
270
|
const out = letters.join("");
|
|
276
271
|
return out.slice(0, maxLen);
|
package/package.json
CHANGED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import { CompteRendu, Sommaire } from "../types/compte_rendu";
|
|
2
|
-
import { TimeSlot } from "../types/agenda";
|
|
3
|
-
export declare function parseCompteRenduSlotFromFile(xmlFilePath: string, wantedSlot: TimeSlot, firstSlotOfDay?: TimeSlot): Promise<CompteRendu | null>;
|
|
4
|
-
export declare function sessionStartYearFromDate(d: Date): number;
|
|
5
|
-
export declare function parseYYYYMMDD(yyyymmdd: string): Date | null;
|
|
6
|
-
export declare function deriveTitreObjetFromSommaire(sommaire: Sommaire | undefined, slot: TimeSlot): {
|
|
7
|
-
titre: string;
|
|
8
|
-
objet: string;
|
|
9
|
-
};
|