@tricoteuses/senat 2.20.17 → 2.20.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@ import commandLineArgs from "command-line-args";
3
3
  import fs from "fs-extra";
4
4
  import path from "path";
5
5
  import pLimit from "p-limit";
6
+ import * as git from "../git";
6
7
  import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
7
8
  import { DATA_ORIGINAL_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
8
9
  import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findSenatRapportUrls, findSenatTexteUrls, } from "../model";
@@ -17,14 +18,26 @@ const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/";
17
18
  const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
18
19
  const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/";
19
20
  const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
21
+ function commitGit(datasetDir, options, exitCode) {
22
+ if (options.commit) {
23
+ const errorCode = git.commitAndPush(datasetDir, "Nouvelle moisson", options.remote);
24
+ if ((exitCode === 10 && errorCode !== 10) || (exitCode === 0 && errorCode !== 0 && errorCode !== 10)) {
25
+ exitCode = errorCode;
26
+ }
27
+ }
28
+ return exitCode;
29
+ }
20
30
  async function convertData() {
21
31
  const dataDir = options["dataDir"];
22
32
  assert(dataDir, "Missing argument: data directory");
23
33
  const enabledDatasets = getEnabledDatasets(options["categories"]);
24
34
  console.time("data transformation time");
35
+ let exitCode = 0;
25
36
  if (enabledDatasets & EnabledDatasets.Ameli) {
26
37
  try {
27
38
  await convertDatasetAmeli(dataDir, options);
39
+ const ameliDir = path.join(dataDir, datasets.ameli.database);
40
+ exitCode = commitGit(ameliDir, options, exitCode);
28
41
  }
29
42
  catch (error) {
30
43
  console.error(`Error converting Ameli dataset:`, error);
@@ -33,6 +46,8 @@ async function convertData() {
33
46
  if (enabledDatasets & EnabledDatasets.Debats) {
34
47
  try {
35
48
  await convertDatasetDebats(dataDir, options);
49
+ const debatsDir = path.join(dataDir, datasets.debats.database);
50
+ exitCode = commitGit(debatsDir, options, exitCode);
36
51
  }
37
52
  catch (error) {
38
53
  console.error(`Error converting Debats dataset:`, error);
@@ -41,12 +56,16 @@ async function convertData() {
41
56
  if (enabledDatasets & EnabledDatasets.DosLeg) {
42
57
  try {
43
58
  await convertDatasetDosLeg(dataDir, options);
59
+ const doslegDir = path.join(dataDir, datasets.dosleg.database);
60
+ exitCode = commitGit(doslegDir, options, exitCode);
44
61
  }
45
62
  catch (error) {
46
63
  console.error(`Error converting DosLeg dataset:`, error);
47
64
  }
48
65
  try {
49
66
  await convertDatasetScrutins(dataDir, options);
67
+ const scrutinsDir = path.join(dataDir, SCRUTINS_FOLDER);
68
+ exitCode = commitGit(scrutinsDir, options, exitCode);
50
69
  }
51
70
  catch (error) {
52
71
  console.error(`Error converting Scrutins dataset:`, error);
@@ -55,6 +74,8 @@ async function convertData() {
55
74
  if (enabledDatasets & EnabledDatasets.Questions) {
56
75
  try {
57
76
  await convertDatasetQuestions(dataDir);
77
+ const questionsDir = path.join(dataDir, datasets.questions.database);
78
+ exitCode = commitGit(questionsDir, options, exitCode);
58
79
  }
59
80
  catch (error) {
60
81
  console.error(`Error converting Questions dataset:`, error);
@@ -63,6 +84,8 @@ async function convertData() {
63
84
  if (enabledDatasets & EnabledDatasets.Sens) {
64
85
  try {
65
86
  await convertDatasetSens(dataDir);
87
+ const sensDir = path.join(dataDir, datasets.sens.database);
88
+ exitCode = commitGit(sensDir, options, exitCode);
66
89
  }
67
90
  catch (error) {
68
91
  console.error(`Error converting Sens dataset:`, error);
@@ -71,6 +94,7 @@ async function convertData() {
71
94
  if (!options["silent"]) {
72
95
  console.timeEnd("data transformation time");
73
96
  }
97
+ return exitCode;
74
98
  }
75
99
  async function convertDatasetAmeli(dataDir, options) {
76
100
  const dataset = datasets.ameli;
@@ -284,7 +308,7 @@ async function convertDatasetSens(dataDir) {
284
308
  }
285
309
  }
286
310
  convertData()
287
- .then(() => process.exit(0))
311
+ .then((exitCode) => process.exit(exitCode || 0))
288
312
  .catch((error) => {
289
313
  console.log(error);
290
314
  process.exit(1);
@@ -9,7 +9,7 @@ import { getSessionsFromStart } from "../types/sessions";
9
9
  import { ID_DATE_FORMAT } from "./datautil";
10
10
  import { commonOptions } from "./shared/cli_helpers";
11
11
  import { fetchWithRetry } from "./shared/util";
12
- import { groupNonSPByTypeOrganeHour, groupSeancePubliqueBySlot } from "../utils/reunion_grouping";
12
+ import { buildReunionsByBucket } from "../utils/reunion_parsing";
13
13
  import { buildSenatDossierIndex } from "../utils/reunion_odj_building";
14
14
  const optionsDefinitions = [
15
15
  ...commonOptions,
@@ -103,25 +103,14 @@ async function parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPa
103
103
  return;
104
104
  const flatPath = path.join(transformedAgendaSessionDir, `${agendaFileName}.json`);
105
105
  fs.writeJSONSync(flatPath, parsedAgendaEvents, { spaces: 2 });
106
- // 1) SP → grouped by (date, slot)
107
- const spGrouped = groupSeancePubliqueBySlot(parsedAgendaEvents, dossierBySenatUrl);
108
- // a) on a un Record<TimeSlot, GroupedReunion[]>, on le transforme en array
109
- const spGroups = Object.values(spGrouped).flat();
110
- // b) (reco) trier pour stabilité, comme pour les NON-SP
111
- const PARIS = "Europe/Paris";
112
- spGroups.sort((a, b) => {
113
- const da = DateTime.fromISO(`${a.date}T${a.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
114
- const db = DateTime.fromISO(`${b.date}T${b.startTime || "00:00:00.000+02:00"}`, { zone: PARIS }).toMillis();
115
- // en cas d’égalité, ordre par slot pour stabilité
116
- return da - db || (a.slot || "UNKNOWN").localeCompare(b.slot || "UNKNOWN");
117
- });
118
- if (spGroups.length > 0) {
119
- writeGroupsAsFiles(transformedAgendaSessionDir, spGroups);
106
+ const byBucket = buildReunionsByBucket(parsedAgendaEvents, dossierBySenatUrl);
107
+ // SP
108
+ if (byBucket.IDS.length > 0) {
109
+ writeGroupsAsFiles(transformedAgendaSessionDir, byBucket.IDS);
120
110
  }
121
- // 2) NON-SP → grouped by (date, organe, hour)
122
- const groupedBySuffix = groupNonSPByTypeOrganeHour(parsedAgendaEvents, dossierBySenatUrl);
111
+ // NON-SP
123
112
  for (const suffix of ["IDC", "IDM", "IDO", "IDI"]) {
124
- const groups = groupedBySuffix[suffix] || [];
113
+ const groups = byBucket[suffix];
125
114
  if (groups.length > 0) {
126
115
  writeGroupsAsFiles(transformedAgendaSessionDir, groups);
127
116
  }
@@ -10,7 +10,7 @@ import { commonOptions } from "./shared/cli_helpers";
10
10
  import { sessionStartYearFromDate } from "../model/seance";
11
11
  import { getSessionsFromStart } from "../types/sessions";
12
12
  import { ensureAndClearDir, fetchWithRetry } from "./shared/util";
13
- import { jaccardTokenSim } from "../model/util";
13
+ import { jaccard, jaccardTokenSim } from "../utils/scoring";
14
14
  class CommissionCRDownloadError extends Error {
15
15
  constructor(message, url) {
16
16
  super(`An error occurred while retrieving Commission CR ${url}: ${message}`);
@@ -138,15 +138,6 @@ function toTokens(s) {
138
138
  .split(/\s+/)
139
139
  .filter((t) => t.length >= 3 && !["commission", "des", "de", "du", "d", "la", "le", "les", "et"].includes(t)));
140
140
  }
141
- function jaccard(a, b) {
142
- if (!a.size || !b.size)
143
- return 0;
144
- let inter = 0;
145
- for (const t of a)
146
- if (b.has(t))
147
- inter++;
148
- return inter / (a.size + b.size - inter);
149
- }
150
141
  function reunionOrganeCandidates(h) {
151
142
  const any = h;
152
143
  const out = [any.organeSlug, any.organeKey, any.organe, h.titre].filter(Boolean);
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Needs to be run after retrieve_agenda.ts !
2
+ * Needs to be ran after retrieve_agenda.ts script !
3
3
  * - downloads the ZIP of comptes-rendus des débats (CRI) from data.senat.fr
4
4
  * - extracts XML files, distributes them by session/year
5
5
  */
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Needs to be run after retrieve_agenda.ts !
2
+ * Needs to be ran after retrieve_agenda.ts script !
3
3
  * - downloads the ZIP of comptes-rendus des débats (CRI) from data.senat.fr
4
4
  * - extracts XML files, distributes them by session/year
5
5
  */
@@ -11,11 +11,12 @@ import StreamZip from "node-stream-zip";
11
11
  import * as cheerio from "cheerio";
12
12
  import { AGENDA_FOLDER, COMPTES_RENDUS_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
13
13
  import { commonOptions } from "./shared/cli_helpers";
14
- import { deriveTitreObjetFromSommaire, parseCompteRenduSlotFromFile, parseYYYYMMDD, sessionStartYearFromDate, } from "../model/seance";
15
- import { makeGroupUid } from "../utils/reunion_grouping";
14
+ import { parseCompteRenduIntervalFromFile, sessionStartYearFromDate } from "../model/seance";
15
+ import { extractSommaireBlocks, makeReunionUid } from "../utils/reunion_parsing";
16
16
  import { getSessionsFromStart } from "../types/sessions";
17
- import { fetchWithRetry } from "./shared/util";
18
- import { computeIntervalsBySlot } from "../utils/cr_spliting";
17
+ import { ensureAndClearDir, fetchWithRetry } from "./shared/util";
18
+ import { isNoiseBlock, scoreSommaireBlockForEvent } from "../utils/scoring";
19
+ import { parseYYYYMMDD } from "../utils/date";
19
20
  const optionsDefinitions = [
20
21
  ...commonOptions,
21
22
  {
@@ -26,49 +27,11 @@ const optionsDefinitions = [
26
27
  ];
27
28
  const options = commandLineArgs(optionsDefinitions);
28
29
  const CRI_ZIP_URL = "https://data.senat.fr/data/debats/cri.zip";
29
- const SLOT_ORDER = ["MATIN", "APRES-MIDI", "SOIR"];
30
30
  class CompteRenduError extends Error {
31
31
  constructor(message, url) {
32
32
  super(`An error occurred while retrieving ${url}: ${message}`);
33
33
  }
34
34
  }
35
- function pickFirstSlotOfDay(slots) {
36
- for (const s of SLOT_ORDER)
37
- if (slots.includes(s))
38
- return s;
39
- return null;
40
- }
41
- function loadAgendaSPSlotsForDate(dataDir, yyyymmdd, session) {
42
- const dirPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, session.toString());
43
- if (!fs.existsSync(dirPath)) {
44
- console.warn(`[AGENDA] Directory not found for session ${session} → ${dirPath}`);
45
- return null;
46
- }
47
- const pattern = new RegExp(`^RUSN${yyyymmdd}IDS-(MATIN|APRES-MIDI|SOIR)\\.json$`);
48
- const ALLOWED_SLOTS = new Set(["MATIN", "APRES-MIDI", "SOIR"]);
49
- try {
50
- const files = fs.readdirSync(dirPath);
51
- const matched = files.filter((f) => pattern.test(f));
52
- if (matched.length === 0) {
53
- return null;
54
- }
55
- const found = new Set();
56
- for (const name of matched) {
57
- const m = name.match(pattern);
58
- const raw = (m?.[1] ?? "");
59
- if (ALLOWED_SLOTS.has(raw))
60
- found.add(raw);
61
- }
62
- const slots = Array.from(found);
63
- if (slots.length === 0) {
64
- return null;
65
- }
66
- return { filePath: dirPath, slots };
67
- }
68
- catch {
69
- return null;
70
- }
71
- }
72
35
  async function downloadCriZip(zipPath) {
73
36
  if (!options["silent"])
74
37
  console.log(`Downloading CRI zip ${CRI_ZIP_URL}…`);
@@ -117,10 +80,19 @@ export async function retrieveCriXmlDump(dataDir, options = {}) {
117
80
  const root = path.join(dataDir, COMPTES_RENDUS_FOLDER);
118
81
  ensureDirSync(root);
119
82
  const originalRoot = path.join(root, DATA_ORIGINAL_FOLDER);
120
- fs.ensureDirSync(originalRoot);
83
+ if (!options["keepDir"]) {
84
+ ensureAndClearDir(originalRoot);
85
+ }
86
+ else {
87
+ fs.ensureDirSync(originalRoot);
88
+ }
121
89
  const transformedRoot = path.join(root, DATA_TRANSFORMED_FOLDER);
122
- if (options["parseDebats"])
90
+ if (!options["keepDir"]) {
91
+ ensureAndClearDir(transformedRoot);
92
+ }
93
+ else {
123
94
  fs.ensureDirSync(transformedRoot);
95
+ }
124
96
  const sessions = getSessionsFromStart(options["fromSession"]);
125
97
  // 1) Download ZIP global + distribut by session
126
98
  const zipPath = path.join(dataDir, "cri.zip");
@@ -158,77 +130,208 @@ export async function retrieveCriXmlDump(dataDir, options = {}) {
158
130
  for (const f of xmlFiles) {
159
131
  const yyyymmdd = f.slice(1, 9);
160
132
  const xmlPath = path.join(originalSessionDir, f);
133
+ // === ONLY-RECENT
161
134
  if (options["only-recent"]) {
162
135
  const cutoff = now - options["only-recent"] * 24 * 3600 * 1000;
163
- const seanceTs = Date.parse(yyyymmdd.slice(0, 4) + "-" + yyyymmdd.slice(4, 6) + "-" + yyyymmdd.slice(6, 8));
136
+ const seanceTs = Date.parse(`${yyyymmdd.slice(0, 4)}-${yyyymmdd.slice(4, 6)}-${yyyymmdd.slice(6, 8)}`);
164
137
  if (seanceTs < cutoff) {
165
- // Check if some file exists sarting with CRSSN{yyyymmdd} in transformed dir
166
138
  const files = await fs.readdir(transformedSessionDir);
167
- const dayFiles = files.filter((fn) => fn.startsWith(`CRSSN${yyyymmdd}-`) && fn.endsWith(".json"));
139
+ const dayFiles = files.filter((fn) => fn.startsWith(`CRSSN${yyyymmdd}E`) && fn.endsWith(".json"));
168
140
  if (dayFiles.length > 0) {
169
- // Link existing files to agendas
170
141
  for (const fn of dayFiles) {
171
- const match = fn.match(/^CRSSN(\d{8})-(.+)\.json$/);
172
- const slot = match?.[2];
142
+ const match = fn.match(/^CRSSN(\d{8})E(.+)\.json$/);
143
+ const eventId = match?.[2];
144
+ if (!eventId)
145
+ continue;
173
146
  const crPath = path.join(transformedSessionDir, fn);
174
147
  try {
175
148
  const cr = await fs.readJSON(crPath);
176
- await linkCriSlotIntoAgendaGrouped(dataDir, yyyymmdd, slot, cr.uid, cr, session);
149
+ await linkCriEventIntoAgenda(dataDir, yyyymmdd, eventId, cr.uid, cr, session);
177
150
  }
178
151
  catch (e) {
179
- console.warn(`[AGENDA] [${session}] Could not link existing CR into grouped for ${yyyymmdd} ${slot}:`, e);
152
+ console.warn(`[CR] [${session}] Could not relink existing CR into a reunion for ${yyyymmdd} event=${eventId}:`, e);
180
153
  }
181
154
  }
182
155
  continue;
183
156
  }
184
157
  }
185
158
  }
186
- // 1) Deduce slot(s) from agenda if it exsits
187
- const agendaInfo = loadAgendaSPSlotsForDate(dataDir, yyyymmdd, session);
188
- const firstSlotOfDay = pickFirstSlotOfDay(agendaInfo?.slots ?? []);
189
- // 2) Detect slots from CRI content
190
- let slotsInCri = [];
159
+ // === Charger les events SP du jour depuis les agendas groupés ===
160
+ const dayEvents = await loadAgendaSpEventsForDate(dataDir, yyyymmdd, session);
161
+ if (dayEvents.length === 0) {
162
+ console.warn(`[CRI] [${session}] No agenda SP events found for ${yyyymmdd} → skip split/link`);
163
+ continue;
164
+ }
165
+ // === Lire XML + construire index DOM ===
166
+ let raw;
167
+ let $;
168
+ let order;
169
+ let idx;
191
170
  try {
192
- const raw = await fs.readFile(xmlPath, "utf8");
193
- const $ = cheerio.load(raw, { xml: false });
194
- const order = $("body *").toArray();
195
- const idx = new Map(order.map((el, i) => [el, i]));
196
- const intervals = computeIntervalsBySlot($, idx, firstSlotOfDay ?? undefined);
197
- const uniq = new Set();
198
- for (const iv of intervals)
199
- if (iv.slot && iv.slot !== "UNKNOWN")
200
- uniq.add(iv.slot);
201
- slotsInCri = Array.from(uniq);
171
+ raw = await fs.readFile(xmlPath, "utf8");
172
+ $ = cheerio.load(raw, { xml: false });
173
+ order = $("body *").toArray();
174
+ idx = new Map(order.map((el, i) => [el, i]));
202
175
  }
203
176
  catch (e) {
204
177
  console.warn(`[CRI] [${session}] Cannot read/parse ${f}:`, e);
205
178
  continue;
206
179
  }
207
- if (slotsInCri.length === 0) {
208
- slotsInCri = [firstSlotOfDay ?? "MATIN"];
180
+ // === Extraire sommaire + matcher vers events agenda ===
181
+ const blocks = extractSommaireBlocks($, idx);
182
+ const intervals = buildIntervalsByAgendaEvents($, idx, order, blocks, dayEvents);
183
+ if (!intervals.length) {
184
+ console.warn(`[CRI] [${session}] No confident split intervals for ${yyyymmdd} → skip`);
185
+ continue;
209
186
  }
210
- // 3) Parse & write each slot
211
- for (const slot of slotsInCri) {
212
- const outName = `CRSSN${yyyymmdd}-${slot}.json`;
213
- const cr = await parseCompteRenduSlotFromFile(xmlPath, slot, firstSlotOfDay ?? slot);
187
+ // === Parser / écrire / linker chaque segment par event ===
188
+ for (const iv of intervals) {
189
+ const outName = `CRSSN${yyyymmdd}E${iv.agendaEventId}.json`;
190
+ const outPath = path.join(transformedSessionDir, outName);
191
+ const cr = await parseCompteRenduIntervalFromFile(xmlPath, iv.startIndex, iv.endIndex, iv.agendaEventId);
214
192
  if (!cr) {
215
- console.warn(`[CRI] [${session}] Empty or no points for ${yyyymmdd} (${slot}) → skip`);
193
+ console.warn(`[CRI] [${session}] Empty or no points for ${yyyymmdd} event=${iv.agendaEventId} → skip`);
216
194
  continue;
217
195
  }
218
- const outDir = transformedSessionDir;
219
- await fs.ensureDir(outDir);
220
- const outPath = path.join(outDir, outName);
196
+ await fs.ensureDir(transformedSessionDir);
221
197
  await fs.writeJSON(outPath, cr, { spaces: 2 });
222
198
  try {
223
- await linkCriSlotIntoAgendaGrouped(dataDir, yyyymmdd, slot, cr.uid, cr, session);
199
+ await linkCriEventIntoAgenda(dataDir, yyyymmdd, iv.agendaEventId, cr.uid, cr, session);
224
200
  }
225
201
  catch (e) {
226
- console.warn(`[AGENDA] [${session}] Could not link CR into grouped for ${yyyymmdd} ${slot}:`, e);
202
+ console.warn(`[CR] [${session}] Could not link CR into agenda for ${yyyymmdd} event=${iv.agendaEventId}:`, e);
227
203
  }
228
204
  }
229
205
  }
230
206
  }
231
207
  }
208
+ async function linkCriEventIntoAgenda(dataDir, yyyymmdd, agendaEventId, crUid, cr, session) {
209
+ const agendadDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, session.toString());
210
+ fs.ensureDirSync(agendadDir);
211
+ const dateISO = `${yyyymmdd.slice(0, 4)}-${yyyymmdd.slice(4, 6)}-${yyyymmdd.slice(6, 8)}`;
212
+ const agendaUid = makeReunionUid(dateISO, "SP", agendaEventId, null);
213
+ const agendaPath = path.join(agendadDir, `${agendaUid}.json`);
214
+ let agenda = null;
215
+ if (await fs.pathExists(agendaPath)) {
216
+ try {
217
+ agenda = await fs.readJSON(agendaPath);
218
+ }
219
+ catch (e) {
220
+ console.warn(`[CR] unreadable reunion JSON → ${agendaPath} (${e})`);
221
+ agenda = null;
222
+ }
223
+ }
224
+ if (!agenda) {
225
+ console.warn(`[CR] Missing reunion file for SP event=${agendaEventId}: ${agendaPath}`);
226
+ return;
227
+ }
228
+ ;
229
+ agenda.compteRenduRefUid = crUid;
230
+ await fs.writeJSON(agendaPath, agenda, { spaces: 2 });
231
+ console.log(`[CR] Linked CR ${crUid} → ${path.basename(agendaPath)} (event=${agendaEventId})`);
232
+ }
233
+ function buildIntervalsByAgendaEvents($, idx, order, blocks, dayEvents) {
234
+ const MIN_SCORE = 0.65;
235
+ const MIN_GAP = 0.08;
236
+ const firstIntervenant = $("div.intervenant").first()[0];
237
+ const firstIntervenantIdx = firstIntervenant ? (idx.get(firstIntervenant) ?? null) : null;
238
+ const pivots = [];
239
+ for (const b of blocks) {
240
+ if (isNoiseBlock(b.text))
241
+ continue;
242
+ let best = null;
243
+ let second = 0;
244
+ for (const ev of dayEvents) {
245
+ const s = scoreSommaireBlockForEvent(b.text, ev);
246
+ if (!best || s > best.score) {
247
+ second = best?.score ?? second;
248
+ best = { ev, score: s };
249
+ }
250
+ else if (s > second) {
251
+ second = s;
252
+ }
253
+ }
254
+ if (!best)
255
+ continue;
256
+ const resolved = resolveTargetIndex($, idx, b.targetId);
257
+ const contentStartIndex = resolved ?? b.startIndex;
258
+ if (firstIntervenantIdx != null && contentStartIndex < firstIntervenantIdx && resolved == null) {
259
+ continue;
260
+ }
261
+ if (best.score < MIN_SCORE)
262
+ continue;
263
+ if (best.score - second < MIN_GAP)
264
+ continue;
265
+ pivots.push({
266
+ agendaEventId: best.ev.id,
267
+ startIndex: contentStartIndex,
268
+ score: best.score,
269
+ });
270
+ }
271
+ if (pivots.length === 0)
272
+ return [];
273
+ // Dédupe par event (on garde le premier startIndex)
274
+ const byEvent = new Map();
275
+ for (const p of pivots.sort((a, b) => a.startIndex - b.startIndex)) {
276
+ if (!byEvent.has(p.agendaEventId)) {
277
+ byEvent.set(p.agendaEventId, {
278
+ startIndex: p.startIndex,
279
+ score: p.score,
280
+ });
281
+ }
282
+ }
283
+ const sorted = Array.from(byEvent.entries())
284
+ .map(([agendaEventId, v]) => ({
285
+ agendaEventId,
286
+ startIndex: v.startIndex,
287
+ score: v.score,
288
+ }))
289
+ .sort((a, b) => a.startIndex - b.startIndex);
290
+ // Construction des intervalles
291
+ const intervals = [];
292
+ for (let i = 0; i < sorted.length; i++) {
293
+ const cur = sorted[i];
294
+ const next = sorted[i + 1];
295
+ const endIndex = next ? next.startIndex - 1 : order.length - 1;
296
+ intervals.push({
297
+ agendaEventId: cur.agendaEventId,
298
+ startIndex: cur.startIndex,
299
+ endIndex,
300
+ score: cur.score,
301
+ });
302
+ }
303
+ return intervals;
304
+ }
305
+ async function loadAgendaSpEventsForDate(dataDir, yyyymmdd, session) {
306
+ const agendasDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, session.toString());
307
+ if (!(await fs.pathExists(agendasDir)))
308
+ return [];
309
+ const files = (await fs.readdir(agendasDir)).filter((fn) => fn.startsWith(`RUSN${yyyymmdd}IDS`) && fn.endsWith(".json"));
310
+ const events = [];
311
+ for (const fn of files) {
312
+ try {
313
+ const g = (await fs.readJSON(path.join(agendasDir, fn)));
314
+ const e = g?.events?.[0];
315
+ if (e && e.type === "Séance publique")
316
+ events.push(e);
317
+ }
318
+ catch { }
319
+ }
320
+ return events;
321
+ }
322
+ function cssEscapeIdent(s) {
323
+ return s.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
324
+ }
325
+ function resolveTargetIndex($, idx, targetId) {
326
+ if (!targetId)
327
+ return null;
328
+ const safe = cssEscapeIdent(targetId);
329
+ const el = $(`[id="${safe}"]`)[0] || $(`[name="${safe}"]`)[0];
330
+ if (!el)
331
+ return null;
332
+ const i = idx.get(el);
333
+ return i == null ? null : i;
334
+ }
232
335
  async function main() {
233
336
  const dataDir = options["dataDir"];
234
337
  assert(dataDir, "Missing argument: data directory");
@@ -242,50 +345,3 @@ main()
242
345
  console.error(error);
243
346
  process.exit(1);
244
347
  });
245
- async function linkCriSlotIntoAgendaGrouped(dataDir, yyyymmdd, slot, crUid, cr, session) {
246
- const groupedDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, session.toString());
247
- fs.ensureDirSync(groupedDir);
248
- const groupedPath = path.join(groupedDir, `RUSN${yyyymmdd}IDS-${slot}.json`);
249
- let group = null;
250
- if (fs.existsSync(groupedPath)) {
251
- try {
252
- const parsed = JSON.parse(fs.readFileSync(groupedPath, "utf8"));
253
- if (Array.isArray(parsed)) {
254
- // Take correct slot if multiple or first one if no direct match ?
255
- group = parsed.find((g) => g?.slot === slot) ?? parsed[0] ?? null;
256
- }
257
- else {
258
- group = parsed;
259
- }
260
- }
261
- catch (e) {
262
- console.warn(`[AGENDA] unreadable grouped JSON → ${groupedPath} (${e}) → recreating`);
263
- group = null;
264
- }
265
- }
266
- const dateISO = `${yyyymmdd.slice(0, 4)}-${yyyymmdd.slice(4, 6)}-${yyyymmdd.slice(6, 8)}`;
267
- const sommaire = cr?.metadonnees?.sommaire;
268
- const { titre: dTitre, objet: dObjet } = deriveTitreObjetFromSommaire(sommaire, slot);
269
- // Création si manquant
270
- if (!group) {
271
- group = {
272
- uid: makeGroupUid(dateISO, slot),
273
- chambre: "SN",
274
- date: dateISO,
275
- slot,
276
- type: "Séance publique",
277
- startTime: null,
278
- endTime: null,
279
- captationVideo: false,
280
- titre: dTitre,
281
- objet: dObjet || "",
282
- events: [],
283
- compteRenduRefUid: crUid,
284
- };
285
- }
286
- else {
287
- group.compteRenduRefUid = crUid;
288
- }
289
- await fs.writeJSON(groupedPath, group, { spaces: 2 });
290
- console.log(`[AGENDA] Linked CR ${crUid} → ${path.basename(groupedPath)} [${slot}]`);
291
- }
@@ -1 +1 @@
1
- export declare function buildSenatVodMasterM3u8FromNvs(nvsText: string): string | null;
1
+ export {};