@tricoteuses/senat 2.15.6 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/datasets.js CHANGED
@@ -220,7 +220,6 @@ export const datasets = {
220
220
  ],
221
221
  votsen: [
222
222
  { name: "idx_scrnum", columns: ["scrnum"] },
223
- { name: "idx_sesann", columns: ["sesann"] },
224
223
  { name: "idx_titsencod", columns: ["titsencod"] },
225
224
  { name: "idx_stavotidt", columns: ["stavotidt"] },
226
225
  { name: "idx_posvotcod", columns: ["posvotcod"] },
@@ -7,9 +7,7 @@ function eventIsSeance(eventElement) {
7
7
  return eventElement.classList.contains("evt-seance");
8
8
  }
9
9
  function getEventType(eventClasses) {
10
- const typeClass = [...eventClasses]
11
- .find(className => className.startsWith("evt-"))
12
- || null;
10
+ const typeClass = [...eventClasses].find((className) => className.startsWith("evt-")) || null;
13
11
  switch (typeClass) {
14
12
  case "evt-seance":
15
13
  return "Séance publique";
@@ -25,8 +23,7 @@ function getEventType(eventClasses) {
25
23
  return null;
26
24
  }
27
25
  function getUrlDossierSenat(lienElements) {
28
- const urlElement = [...lienElements]
29
- .find(lienElement => lienElement.textContent?.includes("dossier législatif"));
26
+ const urlElement = [...lienElements].find((lienElement) => lienElement.textContent?.includes("dossier législatif"));
30
27
  return urlElement ? urlElement.getAttribute("href") : null;
31
28
  }
32
29
  function getQuantieme(eventElement, seancesElements) {
@@ -60,17 +57,16 @@ function normalizeTime(timeStr) {
60
57
  ?.replace(/^(?:l')?après-midi/i, "16h00") // We chose "après-midi" to mean 16h00
61
58
  ?.replace(/^(?:le )?soir/i, "20h00") // We chose "soir" to mean 20h00
62
59
  ?.replace(/^(?:la )?nuit/i, "22h00") // We chose "nuit" to mean 22h00
63
- ?.replace(/^à\s/ig, "")
64
- ?.replace(/heures/ig, "h00")
60
+ ?.replace(/^à\s/gi, "")
61
+ ?.replace(/heures/gi, "h00")
65
62
  ?.replace(/\set.*/i, "")
66
63
  ?.replace(/,.*/, "")
67
64
  ?.replace(/\s\(hors hémicycle\)/i, "")
68
- ?.replace(/\s*h\s*/ig, "h");
65
+ ?.replace(/\s*h\s*/gi, "h");
69
66
  }
70
67
  function getStartAndEndTimes(timeStr) {
71
68
  const normalizedTime = normalizeTime(timeStr);
72
- const timeMatches = normalizedTime
73
- ?.match(/^de (?<startTime>\d{2}h\d{2}) à (?<endTime>\d{2}h\d{2})$/i);
69
+ const timeMatches = normalizedTime?.match(/^de (?<startTime>\d{2}h\d{2}) à (?<endTime>\d{2}h\d{2})$/i);
74
70
  if (timeMatches?.groups) {
75
71
  const { startTime, endTime } = timeMatches.groups;
76
72
  return {
@@ -86,7 +82,7 @@ function getStartAndEndTimes(timeStr) {
86
82
  function transformAgenda(document, fileName) {
87
83
  const agendaEvents = [];
88
84
  const eventElements = document.querySelectorAll(".evt");
89
- const seanceElements = Array.from(eventElements).filter(eventElement => eventIsSeance(eventElement));
85
+ const seanceElements = Array.from(eventElements).filter((eventElement) => eventIsSeance(eventElement));
90
86
  for (const eventElement of eventElements) {
91
87
  const id = eventElement.previousElementSibling?.getAttribute("name") || null;
92
88
  if (!id) {
@@ -96,12 +92,9 @@ function transformAgenda(document, fileName) {
96
92
  const date = DateTime.fromFormat(fileName, ID_DATE_FORMAT).toFormat(STANDARD_DATE_FORMAT);
97
93
  const timeOriginal = eventElement.querySelector(".time")?.textContent || null;
98
94
  const { startTime, endTime } = getStartAndEndTimes(timeOriginal);
99
- const titre = eventElement.querySelector(".titre")?.textContent?.trim() || null;
95
+ const titre = eventElement.querySelector(".titre")?.textContent?.trim() || "";
100
96
  const organe = eventElement.querySelector(".organe")?.textContent?.trim() || null;
101
- const objet = eventElement.querySelector(".objet")?.textContent
102
- ?.trim()
103
- ?.replace(/^- /, "")
104
- || null;
97
+ const objet = eventElement.querySelector(".objet")?.textContent?.trim()?.replace(/^- /, "") || null;
105
98
  const lieu = eventElement.querySelector(".lieu")?.textContent || null;
106
99
  const videoElement = eventElement.querySelector(".video");
107
100
  const urlDossierSenat = getUrlDossierSenat(eventElement.querySelectorAll(".lien a"));
@@ -2,8 +2,16 @@ import * as cheerio from "cheerio";
2
2
  import { CompteRendu } from "../types/compte_rendu";
3
3
  import { GroupedReunion } from "../types/agenda";
4
4
  export declare function getRemainingTextAfterSpeakerHeader($: cheerio.CheerioAPI, $p: cheerio.Cheerio<any>): string;
5
- export declare function parseCommissionCRFromFile(htmlFilePath: string, best?: GroupedReunion, fallback?: {
5
+ export type DaySection = {
6
+ title: string;
7
+ $start: cheerio.Cheerio<any>;
8
+ };
9
+ export declare function cleanTitle(t: string): string;
10
+ export declare function extractDayH3Sections($: cheerio.CheerioAPI, dateISO: string): DaySection[];
11
+ export declare function parseCommissionCRSectionFromDom($: cheerio.CheerioAPI, htmlFilePath: string, opts: {
6
12
  dateISO: string;
7
13
  hourShort: string | null;
8
14
  organe?: string | null;
15
+ section: DaySection;
16
+ matched?: GroupedReunion;
9
17
  }): CompteRendu | null;
@@ -1,6 +1,5 @@
1
1
  import * as cheerio from "cheerio";
2
2
  import path from "path";
3
- import fs from "fs";
4
3
  import { norm, toCRDate } from "./util";
5
4
  import { makeTypeGroupUid } from "../utils/reunion_grouping";
6
5
  import { frDateToISO, hourShortToStartTime } from "../utils/cr_spliting";
@@ -162,47 +161,68 @@ function buildPointsFromParagraphs($, paras) {
162
161
  flush();
163
162
  return points;
164
163
  }
165
- export function parseCommissionCRFromFile(htmlFilePath, best, fallback) {
166
- try {
167
- if (!best && !fallback) {
168
- console.warn(`[COM-CR][parse] missing both 'best' and 'fallback' for ${path.basename(htmlFilePath)}`);
169
- return null;
164
+ export function cleanTitle(t) {
165
+ return (t || "").replace(/\s+/g, " ").trim();
166
+ }
167
+ export function extractDayH3Sections($, dateISO) {
168
+ const sections = [];
169
+ const $dayRoot = findDayRoot($, dateISO);
170
+ if ($dayRoot.length === 0)
171
+ return sections;
172
+ let $cursor = $dayRoot.next();
173
+ while ($cursor.length && !$cursor.is("h2")) {
174
+ if ($cursor.is("h3")) {
175
+ const title = cleanTitle($cursor.text());
176
+ if (title)
177
+ sections.push({ title, $start: $cursor });
178
+ $cursor = $cursor.next();
179
+ continue;
180
+ }
181
+ const $h3 = $cursor.find("h3").first();
182
+ if ($h3.length) {
183
+ const title = cleanTitle($h3.text());
184
+ if (title)
185
+ sections.push({ title, $start: $h3 });
186
+ $cursor = $cursor.next();
187
+ continue;
170
188
  }
171
- const raw = fs.readFileSync(htmlFilePath, "utf8");
172
- const $ = cheerio.load(raw, { xmlMode: false });
173
- const dateISO = best?.date ?? fallback.dateISO;
174
- const startTime = best?.startTime ?? hourShortToStartTime(fallback.hourShort);
175
- const organe = best?.organe ?? fallback?.organe ?? undefined;
176
- const seanceRef = best?.uid ?? makeTypeGroupUid(dateISO, "COM", fallback.hourShort ?? "NA", organe);
189
+ $cursor = $cursor.next();
190
+ }
191
+ return sections;
192
+ }
193
+ export function parseCommissionCRSectionFromDom($, htmlFilePath, opts) {
194
+ try {
195
+ const { dateISO, hourShort, organe, section, matched } = opts;
196
+ const seanceRef = matched?.uid ?? makeTypeGroupUid(dateISO, "COM", matched?.events[0].id ?? hourShort ?? "", organe ?? undefined);
177
197
  const uid = seanceRef.replace(/^RU/, "CRC");
178
- const dateSeance = toCRDate(dateISO, startTime);
198
+ const dateSeance = toCRDate(dateISO, matched?.startTime ?? hourShortToStartTime(hourShort));
179
199
  const $dayRoot = findDayRoot($, dateISO);
180
200
  if ($dayRoot.length === 0) {
181
201
  console.warn(`[COM-CR][parse] day root not found for ${dateISO} in ${path.basename(htmlFilePath)}`);
182
202
  return null;
183
203
  }
184
- // --- Collect paragraphes/h3 until next h2 ---
185
- const dayParas = [];
186
- let $cursor = $dayRoot.next();
187
- while ($cursor.length && !$cursor.is("h2")) {
188
- if ($cursor.is("h3"))
189
- dayParas.push($cursor);
204
+ const paras = [];
205
+ let $cursor = section.$start;
206
+ // Jump title if we do not want to add it to paragraphes
207
+ $cursor = $cursor.next();
208
+ while ($cursor.length && !$cursor.is("h2") && !$cursor.is("h3")) {
190
209
  if ($cursor.is(PARA_h3_SEL)) {
191
- dayParas.push($cursor);
210
+ paras.push($cursor);
192
211
  }
193
212
  else {
194
213
  const $ps = $cursor.find(PARA_h3_SEL);
195
214
  if ($ps.length)
196
215
  $ps.each((_, p) => {
197
- dayParas.push($(p));
216
+ paras.push($(p));
198
217
  });
199
218
  }
200
219
  $cursor = $cursor.next();
201
220
  }
202
- // --- points ---
203
- const allDayPoints = buildPointsFromParagraphs($, dayParas);
204
- const points = allDayPoints.length > 0 ? allDayPoints : [];
205
- // --- session ---
221
+ const points = buildPointsFromParagraphs($, paras);
222
+ if (points.length < 4) {
223
+ console.warn(`[COM-CR][parse] Insufficient points found for section="${section.title}" date=${dateISO} in ${path.basename(htmlFilePath)}`);
224
+ return null;
225
+ }
206
226
  const session = dateISO.slice(5, 7) >= "10" ? `${dateISO.slice(0, 4)}` : `${Number(dateISO.slice(0, 4)) - 1}`;
207
227
  const contenu = {
208
228
  quantiemes: { journee: dateISO, session },
@@ -224,16 +244,10 @@ export function parseCommissionCRFromFile(htmlFilePath, best, fallback) {
224
244
  environnement: "prod",
225
245
  heureGeneration: new Date(),
226
246
  };
227
- return {
228
- uid,
229
- seanceRef,
230
- sessionRef: session,
231
- metadonnees,
232
- contenu,
233
- };
247
+ return { uid, seanceRef, sessionRef: session, metadonnees, contenu };
234
248
  }
235
249
  catch (e) {
236
- console.error(`[COM-CR][parse] error file=${path.basename(htmlFilePath)}:`, e);
250
+ console.error(`[COM-CR][parse] error section file=${path.basename(htmlFilePath)}:`, e);
237
251
  return null;
238
252
  }
239
253
  }
@@ -1,12 +1,13 @@
1
1
  import { jsonArrayFrom } from "kysely/helpers/postgres";
2
2
  import { dbSenat } from "../databases";
3
3
  import { rtrim, toDateString } from "./util";
4
- function votes(scrutinNum, scrutinSession, scrutinDate) {
4
+ function votes(scrutinNum, scrutinDate) {
5
5
  return jsonArrayFrom(dbSenat
6
6
  .selectFrom("dosleg.votsen")
7
7
  .leftJoin("dosleg.titsen", "dosleg.titsen.titsencod", "dosleg.votsen.titsencod")
8
8
  .leftJoin("dosleg.stavot", "dosleg.stavot.stavotidt", "dosleg.votsen.stavotidt")
9
9
  .leftJoin("dosleg.posvot", "dosleg.posvot.posvotcod", "dosleg.votsen.posvotcod")
10
+ .leftJoin("sens.sen", "dosleg.votsen.senmat", "sens.sen.senmat")
10
11
  .leftJoin("sens.memgrppol", (join) => join
11
12
  .onRef("sens.memgrppol.senmat", "=", "dosleg.votsen.senmat")
12
13
  .onRef("sens.memgrppol.memgrppoldatdeb", "<=", scrutinDate)
@@ -15,7 +16,7 @@ function votes(scrutinNum, scrutinSession, scrutinDate) {
15
16
  eb("sens.memgrppol.memgrppoldatfin", "is", null)
16
17
  ])))
17
18
  .where("dosleg.votsen.scrnum", "=", scrutinNum)
18
- .where("dosleg.votsen.sesann", "=", scrutinSession)
19
+ .where("sens.sen.etasencod", "=", "ACTIF")
19
20
  .select([
20
21
  "dosleg.votsen.senmat as matricule_votant",
21
22
  "dosleg.votsen.senmatdel as matricule_delegant",
@@ -64,7 +65,7 @@ const findAllScrutinsQuery = dbSenat
64
65
  "scr.scrconsea as nombre_contre_seance",
65
66
  "scr.scrpou as nombre_pour",
66
67
  "scr.scrpousea as nombre_pour_seance",
67
- votes(ref("scr.scrnum"), ref("scr.sesann"), ref("scr.scrdat")).as("votes"),
68
+ votes(ref("scr.scrnum"), ref("scr.scrdat")).as("votes"),
68
69
  misesAuPoint(ref("scr.scrnum")).as("mises_au_point"),
69
70
  ])
70
71
  .$narrowType();
@@ -2,7 +2,7 @@ import fs from "fs";
2
2
  import * as cheerio from "cheerio";
3
3
  import path from "path";
4
4
  import { computeIntervalsBySlot } from "../utils/cr_spliting";
5
- import { norm, toCRDate } from "./util";
5
+ import { decodeHtmlEntities, norm, toCRDate } from "./util";
6
6
  const asArray = (x) => (x == null ? [] : Array.isArray(x) ? x : [x]);
7
7
  const toInt = (s) => (Number.isFinite(Number(s)) ? Number(s) : Number.POSITIVE_INFINITY);
8
8
  export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firstSlotOfDay) {
@@ -166,11 +166,6 @@ function dedupeSpeaker(raw) {
166
166
  }
167
167
  return s.replace(/\.\s*$/, "");
168
168
  }
169
- function decodeHtmlEntities(s) {
170
- return s
171
- .replace(/&#(\d+);/g, (_, d) => String.fromCharCode(parseInt(d, 10)))
172
- .replace(/&#x([0-9a-fA-F]+);/g, (_, h) => String.fromCharCode(parseInt(h, 16)));
173
- }
174
169
  function fixApostrophes(s) {
175
170
  let out = s;
176
171
  out = out.replace(/\s*’\s*/g, "’");
@@ -8,3 +8,6 @@ export declare function rtrim(expr: Expression<string | null | undefined>): impo
8
8
  export declare function toDateString(expr: Expression<Date | null | undefined>, format?: Expression<string>): import("kysely").RawBuilder<string>;
9
9
  export declare function norm(s?: string | null): string;
10
10
  export declare function toCRDate(dateISO: string, startTime?: string | null): string;
11
+ export declare function normalizeTitle(t: string): string;
12
+ export declare function jaccardTokenSim(a: string, b: string): number;
13
+ export declare function decodeHtmlEntities(s?: string | null): string;
package/lib/model/util.js CHANGED
@@ -42,3 +42,35 @@ export function toCRDate(dateISO, startTime) {
42
42
  }
43
43
  return `${yyyymmdd}${hh}${mm}${ss}${SSS}`;
44
44
  }
45
+ export function normalizeTitle(t) {
46
+ return (t || "")
47
+ .toLowerCase()
48
+ .normalize("NFD")
49
+ .replace(/\p{Diacritic}/gu, "")
50
+ .replace(/[^a-z0-9\s]/g, " ")
51
+ .replace(/\s+/g, " ")
52
+ .trim();
53
+ }
54
+ export function jaccardTokenSim(a, b) {
55
+ const A = new Set(normalizeTitle(a).split(" ").filter(Boolean));
56
+ const B = new Set(normalizeTitle(b).split(" ").filter(Boolean));
57
+ if (A.size === 0 || B.size === 0)
58
+ return 0;
59
+ let inter = 0;
60
+ for (const x of A)
61
+ if (B.has(x))
62
+ inter++;
63
+ return inter / (A.size + B.size - inter);
64
+ }
65
+ export function decodeHtmlEntities(s) {
66
+ if (!s)
67
+ return "";
68
+ return s
69
+ .replace(/&#x([0-9a-fA-F]+);/g, (_, h) => String.fromCodePoint(parseInt(h, 16)))
70
+ .replace(/&#(\d+);/g, (_, d) => String.fromCodePoint(parseInt(d, 10)))
71
+ .replace(/&amp;/g, "&")
72
+ .replace(/&lt;/g, "<")
73
+ .replace(/&gt;/g, ">")
74
+ .replace(/&quot;/g, '"')
75
+ .replace(/&apos;/g, "'");
76
+ }
@@ -4,12 +4,13 @@ import path from "path";
4
4
  import * as cheerio from "cheerio";
5
5
  import { COMMISSION_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
6
6
  import { loadAgendaForDate, parseCommissionMetadataFromHtml, linkCRtoCommissionGroup } from "../utils/cr_spliting";
7
- import { parseCommissionCRFromFile } from "../model/commission";
7
+ import { cleanTitle, extractDayH3Sections, parseCommissionCRSectionFromDom } from "../model/commission";
8
8
  import commandLineArgs from "command-line-args";
9
9
  import { commonOptions } from "./shared/cli_helpers";
10
10
  import { sessionStartYearFromDate } from "../model/seance";
11
11
  import { getSessionsFromStart } from "../types/sessions";
12
12
  import { ensureAndClearDir, fetchWithRetry } from "./shared/util";
13
+ import { jaccardTokenSim } from "../model/util";
13
14
  class CommissionCRDownloadError extends Error {
14
15
  constructor(message, url) {
15
16
  super(`An error occurred while retrieving Commission CR ${url}: ${message}`);
@@ -27,35 +28,35 @@ const optionsDefinitions = [
27
28
  ];
28
29
  const options = commandLineArgs(optionsDefinitions);
29
30
  const COMMISSION_HUBS = {
30
- "affaires-etrangeres": [
31
+ "Commission des affaires étrangères": [
31
32
  "https://www.senat.fr/compte-rendu-commissions/affaires-etrangeres.html",
32
33
  "https://www.senat.fr/compte-rendu-commissions/affaires-etrangeres_archives.html",
33
34
  ],
34
- "affaires-economiques": [
35
+ "Commission des affaires économiques": [
35
36
  "https://www.senat.fr/compte-rendu-commissions/economie.html",
36
37
  "https://www.senat.fr/compte-rendu-commissions/economie_archives.html",
37
38
  ],
38
- "amenagement-developpement-durable": [
39
+ "Commission de l'amenagement du territoire et du développement durable": [
39
40
  "https://www.senat.fr/compte-rendu-commissions/developpement-durable.html",
40
41
  "https://www.senat.fr/compte-rendu-commissions/developpement-durable_archives.html",
41
42
  ],
42
- culture: [
43
+ "Commission de la culture": [
43
44
  "https://www.senat.fr/compte-rendu-commissions/culture.html",
44
45
  "https://www.senat.fr/compte-rendu-commissions/culture_archives.html",
45
46
  ],
46
- finances: [
47
+ "Commission des finances": [
47
48
  "https://www.senat.fr/compte-rendu-commissions/finances.html",
48
49
  "https://www.senat.fr/compte-rendu-commissions/finances_archives.html",
49
50
  ],
50
- lois: [
51
+ "Commission des lois": [
51
52
  "https://www.senat.fr/compte-rendu-commissions/lois.html",
52
53
  "https://www.senat.fr/compte-rendu-commissions/lois_archives.html",
53
54
  ],
54
- "affaires-sociales": [
55
+ "Commission des affaires sociales": [
55
56
  "https://www.senat.fr/compte-rendu-commissions/affaires-sociales.html",
56
57
  "https://www.senat.fr/compte-rendu-commissions/affaires-sociales_archives.html",
57
58
  ],
58
- "affaires-europeennes": [
59
+ "Commission des affaires européennes": [
59
60
  "https://www.senat.fr/compte-rendu-commissions/affaires-europeennes.html",
60
61
  "https://www.senat.fr/compte-rendu-commissions/affaires-europeennes_archives.html",
61
62
  ],
@@ -170,6 +171,15 @@ function timeProximityScore(h, openHHMM, maxDeltaMin) {
170
171
  return 0;
171
172
  return 1 - d / maxDeltaMin; // 0..1 (1 = même heure)
172
173
  }
174
+ function titleSimilarity(reunion, sectionTitle) {
175
+ const t = reunion.titre ?? "";
176
+ const o = reunion.objet ?? "";
177
+ if (!sectionTitle.trim())
178
+ return 0;
179
+ const sTit = jaccardTokenSim(t, sectionTitle);
180
+ const sObj = jaccardTokenSim(o, sectionTitle);
181
+ return Math.max(sTit, sObj);
182
+ }
173
183
  async function retrieveCommissionCRs(options = {}) {
174
184
  const dataDir = options["dataDir"];
175
185
  const fromSession = Number(options["fromSession"]);
@@ -245,86 +255,94 @@ async function retrieveCommissionCRs(options = {}) {
245
255
  for (const f of htmlFiles) {
246
256
  const htmlPath = path.join(commissionDir, f);
247
257
  let meta;
258
+ let raw = "";
248
259
  try {
249
- const raw = await fs.readFile(htmlPath, "utf8");
260
+ raw = await fs.readFile(htmlPath, "utf8");
250
261
  meta = parseCommissionMetadataFromHtml(raw, f);
251
262
  }
252
263
  catch (e) {
253
264
  console.warn(`[COM-CR][PRE][${session}] Cannot read/parse ${f}:`, e);
254
265
  continue;
255
266
  }
256
- const organeLabel = meta?.organeTitleRaw;
257
- for (const day of meta.days ?? []) {
267
+ if (!meta?.days?.length)
268
+ continue;
269
+ const $ = cheerio.load(raw, { xmlMode: false });
270
+ for (const day of meta.days) {
258
271
  const yyyymmdd = day.date.replace(/-/g, "");
259
272
  const dt = new Date(Number(day.date.slice(0, 4)), Number(day.date.slice(5, 7)) - 1, Number(day.date.slice(8, 10)));
260
273
  const daySession = sessionStartYearFromDate(dt);
261
274
  let hits = await loadAgendaForDate(dataDir, yyyymmdd, daySession);
262
- let best = null;
263
- let reason = "fallback-none";
264
- let deltaMin;
265
- // gate + scoring combined
275
+ console.log(`[COM-CR][TRANSFORM] ${f} ${hits.length} agenda events on ${day.date} :`);
276
+ const sections = extractDayH3Sections($, day.date);
277
+ if (sections.length === 0) {
278
+ console.warn(`[COM-CR][TRANSFORM] no sections found for ${f} on ${day.date}, skipping.`);
279
+ continue;
280
+ }
266
281
  const MAX_TIME_DELTA_MIN = 120;
267
- const ORGANE_GATE = 0.55; // minimum similarity organe to be considered
268
- const W_ORG = 0.7, W_TIM = 0.3;
269
- if (hits.length) {
270
- // 1) Gate organe : only keep those above gate, then score with combined organe+time
271
- const gated = hits
272
- .map((h) => {
273
- const sOrg = organeSimilarity(h, commissionKey); // 0..1
274
- const sTim = timeProximityScore(h, day.openTime ?? null, MAX_TIME_DELTA_MIN); // 0..1
275
- const total = W_ORG * sOrg + +W_TIM * sTim;
276
- return { h, sOrg, sTim, total };
277
- })
278
- .filter((x) => x.sOrg >= ORGANE_GATE)
279
- .sort((a, b) => b.total - a.total);
280
- if (gated[0]) {
281
- best = gated[0].h;
282
- reason = gated[0].sOrg >= ORGANE_GATE ? "organe" : "fallback-none";
283
- if (day.openTime && best?.startTime) {
284
- deltaMin = Math.abs(timeToMinutes(best.startTime) - timeToMinutes(day.openTime));
282
+ const ORGANE_GATE = 0.55;
283
+ const TITLE_GATE = 0.2;
284
+ const W_ORG = 0.5;
285
+ const W_TIM = 0.2;
286
+ const W_TIT = 0.3;
287
+ for (let sIdx = 0; sIdx < sections.length; sIdx++) {
288
+ const sec = sections[sIdx];
289
+ let best = null;
290
+ let reason = "fallback-none";
291
+ if (hits.length) {
292
+ const scored = hits
293
+ .map((h) => {
294
+ const sOrg = organeSimilarity(h, commissionKey); // 0..1
295
+ const sTim = timeProximityScore(h, day.openTime ?? null, MAX_TIME_DELTA_MIN); // 0..1
296
+ const sTit = titleSimilarity(h, sec.title); // 0..1
297
+ const total = W_ORG * sOrg + W_TIM * sTim + W_TIT * sTit;
298
+ return { h, sOrg, sTim, sTit, total };
299
+ })
300
+ .filter((x) => x.sOrg >= ORGANE_GATE && x.sTit >= TITLE_GATE)
301
+ .sort((a, b) => b.total - a.total);
302
+ if (scored[0]) {
303
+ best = scored[0].h;
304
+ reason =
305
+ scored[0].sTit >= Math.max(scored[0].sOrg, scored[0].sTim)
306
+ ? "title"
307
+ : scored[0].sOrg >= scored[0].sTim
308
+ ? "organe"
309
+ : "time";
285
310
  }
286
311
  }
312
+ const hourShort = toHourShort(day.openTime) ?? "NA";
313
+ const cr = parseCommissionCRSectionFromDom($, htmlPath, {
314
+ dateISO: day.date,
315
+ hourShort,
316
+ organe: commissionKey,
317
+ section: sec,
318
+ matched: best ?? undefined,
319
+ });
320
+ if (!cr) {
321
+ console.warn(`[COM-CR][TRANSFORM] parse failed for section#${sIdx} ${path.basename(htmlPath)} → ${best ? best.uid : "NO-GROUP"} (${commissionKey})`);
322
+ continue;
323
+ }
324
+ const fileUid = cr.uid;
325
+ const transformedSessionDir = path.join(transformedRoot, String(daySession));
326
+ fs.ensureDirSync(transformedSessionDir);
327
+ const outPath = path.join(transformedSessionDir, `${fileUid}.json`);
328
+ await fs.writeJSON(outPath, cr, { spaces: 2 });
329
+ const titreGuess = cleanTitle(sections[sIdx].title) || "Commission du " + day.date;
330
+ const up = await linkCRtoCommissionGroup({
331
+ dataDir,
332
+ dateISO: day.date,
333
+ organeDetected: commissionKey,
334
+ hourShort,
335
+ crUid: fileUid,
336
+ titreGuess,
337
+ groupUid: best ? best.uid : undefined,
338
+ });
339
+ totalFiles++;
340
+ if (up.created || up.updated)
341
+ linkedFiles++;
287
342
  else {
288
- best = null;
289
- reason = "fallback-none";
343
+ console.warn(`[COM-CR][AGENDA][WARN] CR ${fileUid} (section#${sIdx}) not linked (reason=${reason})`);
290
344
  }
291
345
  }
292
- // Parse CR
293
- const hourShort = toHourShort(day.openTime) ?? "NA";
294
- const cr = parseCommissionCRFromFile(htmlPath, best ?? undefined, {
295
- dateISO: day.date,
296
- hourShort,
297
- organe: commissionKey,
298
- });
299
- if (!cr) {
300
- console.warn(`[COM-CR][TRANSFORM] parse failed for ${f} → ${best ? best.uid : "NO-GROUP"} (${commissionKey})`);
301
- continue;
302
- }
303
- const fileUid = cr.uid;
304
- const transformedSessionDir = path.join(transformedRoot, String(daySession));
305
- fs.ensureDirSync(transformedSessionDir);
306
- const outPath = path.join(transformedSessionDir, `${fileUid}.json`);
307
- await fs.writeJSON(outPath, cr, { spaces: 2 });
308
- const npts = Array.isArray(cr.contenu.point) ? cr.contenu.point.length : cr.contenu.point ? 1 : 0;
309
- if (!options["silent"]) {
310
- console.log(`[COM-CR][TRANSFORM] saved ${path.basename(outPath)} (points=${npts}) [${commissionKey}]`);
311
- }
312
- const titreGuess = organeLabel || "Commission";
313
- const up = await linkCRtoCommissionGroup({
314
- dataDir,
315
- dateISO: day.date,
316
- organeDetected: best?.organe ?? null,
317
- hourShort,
318
- crUid: fileUid,
319
- titreGuess,
320
- groupUid: best ? best.uid : undefined,
321
- });
322
- totalFiles++;
323
- if (up.created || up.updated)
324
- linkedFiles++;
325
- else {
326
- console.warn(`[COM-CR][AGENDA][WARN] CR ${fileUid} not linked to any agenda group (reason=${reason}, delta=${deltaMin ?? "NA"}m)`);
327
- }
328
346
  }
329
347
  }
330
348
  if (!options["silent"]) {
@@ -1,6 +1,4 @@
1
1
  import { GroupedReunion } from "../types/agenda";
2
- export declare function buildSenatVodMasterM3u8FromNvs(nvsText: string, finalText: string): string | null;
3
- export declare function score(agenda: GroupedReunion, agendaTs: number | null, videoTitle?: string, videoEpoch?: number): number;
4
2
  /**
5
3
  * Build search strategies for senat's videos
6
4
  */