@tricoteuses/senat 2.13.1 → 2.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/loaders.d.ts CHANGED
@@ -3,7 +3,7 @@ import { DebatResult } from "./model/debats";
3
3
  import { DossierLegislatifResult } from "./model/dosleg";
4
4
  import { QuestionResult } from "./model/questions";
5
5
  import { CirconscriptionResult, OrganismeResult, SenateurResult } from "./model/sens";
6
- import { AgendaEvent, GroupedReunion } from "./types/agenda";
6
+ import { GroupedReunion } from "./types/agenda";
7
7
  import { FlatTexte } from "./types/texte";
8
8
  import { CompteRendu } from "./types/compte_rendu";
9
9
  export { EnabledDatasets } from "./datasets";
@@ -83,8 +83,6 @@ export declare function loadSenatTexteContent(dataDir: string, textePathFromData
83
83
  export declare function loadSenatCompteRenduContent(dataDir: string, session: number, debatId: string | number): {
84
84
  item: CompteRendu | null;
85
85
  };
86
- export declare function iterLoadSenatAgendas(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AgendaEvent[]>>;
87
- export declare function iterLoadSenatEvenements(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AgendaEvent>>;
88
86
  export declare function iterLoadSenatAgendasGrouped(dataDir: string, session: number | undefined): Generator<IterItem<GroupedReunion>>;
89
87
  export declare function iterLoadSenatCirconscriptions(dataDir: string, options?: {}): Generator<IterItem<CirconscriptionResult>>;
90
88
  export declare function iterLoadSenatOrganismes(dataDir: string, options?: {}): Generator<IterItem<OrganismeResult>>;
package/lib/loaders.js CHANGED
@@ -183,18 +183,6 @@ export function loadSenatCompteRenduContent(dataDir, session, debatId) {
183
183
  const json = fs.readFileSync(fullPath, { encoding: "utf8" });
184
184
  return { item: JSON.parse(json) };
185
185
  }
186
- export function* iterLoadSenatAgendas(dataDir, session, options = {}) {
187
- for (const evenementsItem of iterLoadSenatItems(dataDir, AGENDA_FOLDER, session, DATA_TRANSFORMED_FOLDER, options)) {
188
- yield evenementsItem;
189
- }
190
- }
191
- export function* iterLoadSenatEvenements(dataDir, session, options = {}) {
192
- for (const evenementsItem of iterLoadSenatItems(dataDir, AGENDA_FOLDER, session, DATA_TRANSFORMED_FOLDER, options)) {
193
- for (const evenement of evenementsItem.item) {
194
- yield { item: evenement };
195
- }
196
- }
197
- }
198
186
  export function* iterLoadSenatAgendasGrouped(dataDir, session) {
199
187
  const baseDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session ?? ""));
200
188
  if (!fs.existsSync(baseDir))
@@ -1 +1,7 @@
1
- export {};
1
+ import { GroupedReunion } from "../types/agenda";
2
+ export declare function buildSenatVodMasterM3u8FromNvs(nvsText: string, finalText: string): string | null;
3
+ export declare function score(agenda: GroupedReunion, agendaTs: number | null, videoTitle?: string, videoEpoch?: number): number;
4
+ /**
5
+ * Build search strategies for senat's videos
6
+ */
7
+ export declare function buildSearchStrategies(agenda: GroupedReunion): Array<Record<string, string>>;
@@ -4,12 +4,11 @@ import commandLineArgs from "command-line-args";
4
4
  import fs from "fs-extra";
5
5
  import fsp from "fs/promises";
6
6
  import path from "path";
7
- import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatAgendas, } from "../loaders";
7
+ import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatAgendasGrouped } from "../loaders";
8
8
  import { getSessionsFromStart } from "../types/sessions";
9
9
  import { commonOptions } from "./shared/cli_helpers";
10
- import { formatYYYYMMDD, makeReunionUid } from "../utils/reunion_grouping";
11
10
  // ===================== Constants =====================
12
- const MATCH_THRESHOLD = 0.60;
11
+ const MATCH_THRESHOLD = 0.6;
13
12
  const MAX_CANDIDATES = 15;
14
13
  const MAX_PAGES = 3;
15
14
  const STATS = { total: 0, accepted: 0 };
@@ -18,9 +17,7 @@ const SENAT_VIDEOS_SEARCH_AJAX = "https://videos.senat.fr/senat_videos_search.ph
18
17
  const SENAT_DATAS_ROOT = "https://videos.senat.fr/Datas/senat";
19
18
  const SENAT_VOD_HOST = "https://vodsenat.akamaized.net";
20
19
  // ===================== CLI =====================
21
- const optionsDefinitions = [
22
- ...commonOptions,
23
- ];
20
+ const optionsDefinitions = [...commonOptions];
24
21
  const options = commandLineArgs(optionsDefinitions);
25
22
  // ===================== Utils =====================
26
23
  function normalize(s) {
@@ -32,7 +29,9 @@ function normalize(s) {
32
29
  .replace(/\s+/g, " ")
33
30
  .trim();
34
31
  }
35
- function tokens(s) { return normalize(s).split(" ").filter(Boolean); }
32
+ function tokens(s) {
33
+ return normalize(s).split(" ").filter(Boolean);
34
+ }
36
35
  function dice(a, b) {
37
36
  const A = new Set(tokens(a)), B = new Set(tokens(b));
38
37
  if (!A.size || !B.size)
@@ -46,7 +45,7 @@ function dice(a, b) {
46
45
  // Heuristic for Europe/Paris DST: +02:00 ≈ April→October, +01:00 otherwise.
47
46
  function parisOffsetForDate(dateYYYYMMDD) {
48
47
  const m = Number(dateYYYYMMDD.split("-")[1] || "1");
49
- return (m >= 4 && m <= 10) ? "+02:00" : "+01:00";
48
+ return m >= 4 && m <= 10 ? "+02:00" : "+01:00";
50
49
  }
51
50
  function epochToParisDateTime(epochSec) {
52
51
  if (!Number.isFinite(epochSec))
@@ -54,7 +53,7 @@ function epochToParisDateTime(epochSec) {
54
53
  const dUtc = new Date(epochSec * 1000);
55
54
  // Offset heuristic (same logique que parisOffsetForDate)
56
55
  const m = dUtc.getUTCMonth() + 1; // 1..12
57
- const offsetHours = (m >= 4 && m <= 10) ? 2 : 1;
56
+ const offsetHours = m >= 4 && m <= 10 ? 2 : 1;
58
57
  const offsetStr = offsetHours === 2 ? "+02:00" : "+01:00";
59
58
  // Applique l'offset pour obtenir la date/heure locales Paris
60
59
  const localMs = dUtc.getTime() + offsetHours * 3600 * 1000;
@@ -149,7 +148,7 @@ function extractCandidatesFromSearchHtml(html) {
149
148
  out.push({ id, hash, pageUrl, title: t?.[1] });
150
149
  }
151
150
  const seen = new Set();
152
- return out.filter(c => {
151
+ return out.filter((c) => {
153
152
  const k = `${c.id}_${c.hash}`;
154
153
  if (seen.has(k))
155
154
  return false;
@@ -162,46 +161,68 @@ function parseDataNvs(nvs) {
162
161
  const title = nvs.match(/<metadata\s+name="title"\s+value="([^"]+)"/i)?.[1];
163
162
  return { epoch: epoch ? Number(epoch) : undefined, title };
164
163
  }
165
- function buildSenatVodMasterM3u8FromNvs(xml, host = SENAT_VOD_HOST) {
166
- if (!xml)
164
+ // nvsText = contenu texte de data.nvs (utf-8)
165
+ // finalText = contenu texte de finalplayer.nvs (utf-8)
166
+ export function buildSenatVodMasterM3u8FromNvs(nvsText, finalText) {
167
+ // 1) Base Akamai depuis data.nvs (mp4 "serverfiles://senat/YYYY/MM/encoderX_YYYYMMDDHHMMSS_1.mp4")
168
+ const baseMatch = nvsText.match(/serverfiles:\/\/senat\/(\d{4})\/(\d{2})\/(encoder\d)_(\d{14})/i);
169
+ if (!baseMatch)
167
170
  return null;
168
- // (a) Déjà un lien VOD complet en .smil/playlist.m3u8
169
- const mVod = xml.match(/https?:\/\/[^"'<>]*vodsenat[^"'<>]*\.smil\/(?:playlist|master)\.m3u8/i);
170
- if (mVod)
171
- return mVod[0];
172
- // (b) Chemin senat/YYYY/MM/<basename>.smil -> normalise en playlist.m3u8
173
- const mSmilPath = xml.match(/senat\/(\d{4})\/(\d{2})\/([^"'<>\/]+?)\.smil/i);
174
- if (mSmilPath) {
175
- const [, y, m, base] = mSmilPath;
176
- return `${host}/senat/${y}/${m}/${base}.smil/playlist.m3u8`;
171
+ const [, yyyy, mm, encoder, stamp] = baseMatch;
172
+ const base = `https://vodsenat.akamaized.net/senat/${yyyy}/${mm}/${encoder}_${stamp}`;
173
+ // 2) start/end depuis finalplayer.nvs
174
+ let start = null, end = null;
175
+ const playerAttr = finalText.match(/player[^>]*\bstarttime="(\d+)"[^>]*\bendtime="(\d+)"/i);
176
+ if (playerAttr) {
177
+ start = parseInt(playerAttr[1], 10);
178
+ end = parseInt(playerAttr[2], 10);
179
+ }
180
+ else {
181
+ // fallback: prendre le plus petit timecode des <synchro timecode="...">
182
+ const tc = Array.from(finalText.matchAll(/timecode="(\d+)"/g)).map((m) => parseInt(m[1], 10));
183
+ if (tc.length)
184
+ start = Math.min(...tc);
185
+ }
186
+ // 3) si pas d'end, on peut déduire via "duree" (en secondes) de data.nvs
187
+ if (end == null) {
188
+ const durMeta = nvsText.match(/<metadata[^>]*\bname="duree"[^>]*\bvalue="(\d+)"[^>]*>/i);
189
+ if (durMeta && start != null) {
190
+ const durMs = parseInt(durMeta[1], 10) * 1000; // sec → ms
191
+ end = start + durMs;
192
+ }
177
193
  }
178
- // (c) Chemin senat/YYYY/MM/<basename>.mp4 -> transforme en .smil/playlist.m3u8
179
- const mMp4Path = xml.match(/senat\/(\d{4})\/(\d{2})\/([^"'<>\/]+?)\.mp4/i);
180
- if (mMp4Path) {
181
- const [, y, m, base] = mMp4Path;
182
- return `${host}/senat/${y}/${m}/${base}.smil/playlist.m3u8`;
194
+ // 4) Construction de l’URL
195
+ // - si on a start & end → utiliser ps/pd (robuste et conforme à ce que sert le Sénat)
196
+ // - sinon fallback sans suffixe (souvent valide aussi)
197
+ if (start != null && end != null && end > start) {
198
+ const pd = end - start;
199
+ return `${base}_ps${start}_pd${pd}.smil/master.m3u8`;
183
200
  }
184
- // (d) À défaut, n’importe quel .m3u8 présent (faible priorité — peut être du live)
185
- const mAny = xml.match(/https?:\/\/[^"'<>]+\.m3u8/i);
186
- return mAny ? mAny[0] : null;
201
+ return `${base}.smil/master.m3u8`;
187
202
  }
188
- function score(agenda, agendaTs, videoTitle, videoEpoch) {
203
+ export function score(agenda, agendaTs, videoTitle, videoEpoch) {
189
204
  const titleScore = dice(agenda.titre || "", videoTitle || "");
190
205
  let timeScore = 0;
191
206
  if (agendaTs && videoEpoch) {
207
+ // second
192
208
  const deltaMin = Math.abs(videoEpoch - agendaTs) / 60;
193
- timeScore = Math.max(0, 1 - (deltaMin / 180));
209
+ // delta : 180min
210
+ timeScore = Math.max(0, 1 - deltaMin / 180);
194
211
  }
195
212
  let orgBonus = 0;
196
213
  if (agenda.organe && videoTitle) {
197
214
  const o = normalize(agenda.organe);
198
215
  const t = normalize(videoTitle);
199
- if (o && t.includes(o.split(" ")[0]))
216
+ const first = o.split(" ").filter(Boolean)[0];
217
+ if (first && t.includes(first))
200
218
  orgBonus = 0.15;
201
219
  }
202
- return 0.3 * titleScore + 0.7 * timeScore + orgBonus;
220
+ return 0.3 * titleScore + 0.7 * timeScore + orgBonus; // Can be adjusted
203
221
  }
204
- function buildSearchStrategies(agenda) {
222
+ /**
223
+ * Build search strategies for senat's videos
224
+ */
225
+ export function buildSearchStrategies(agenda) {
205
226
  const fr = agenda.date ? toFRDate(agenda.date) : undefined;
206
227
  const kw = simplifyTitleForKeywords(agenda.titre || "");
207
228
  const commission = agenda.organe || undefined;
@@ -239,21 +260,23 @@ async function fetchAllSearchPages(args, baseDir, strategyIndex, maxPages = MAX_
239
260
  }
240
261
  return pages;
241
262
  }
242
- async function processAgenda(agenda, session, dataDir) {
263
+ async function processGroupedReunion(agenda, session, dataDir) {
243
264
  if (!agenda)
244
265
  return;
266
+ // 1) Garde-fous
245
267
  if (!agenda.captationVideo) {
246
268
  if (!options["silent"])
247
- console.log(`[skip] ${agenda.id} captationVideo=false`);
269
+ console.log(`[skip] ${agenda.uid} captationVideo=false`);
248
270
  return;
249
271
  }
250
272
  if (!agenda.date || !agenda.startTime) {
251
273
  if (!options["silent"])
252
- console.log(`[skip] ${agenda.id} date/hour missing`);
274
+ console.log(`[skip] ${agenda.uid} date/hour missing`);
253
275
  return;
254
276
  }
255
277
  STATS.total++;
256
- const reunionUid = makeReunionUid(agenda);
278
+ // 2) Dossier de sortie (utilise directement l'UID)
279
+ const reunionUid = agenda.uid;
257
280
  const baseDir = path.join(dataDir, VIDEOS_ROOT_FOLDER, String(session), reunionUid);
258
281
  await fs.ensureDir(baseDir);
259
282
  const agendaTs = toTargetEpoch(agenda.date, agenda.startTime);
@@ -276,8 +299,9 @@ async function processAgenda(agenda, session, dataDir) {
276
299
  }
277
300
  }
278
301
  if (usedStrategy === -1 || !candidates.length) {
279
- if (!options["silent"])
280
- console.log(`[miss] ${agenda.id} no candidates (triedStrategies=${strategies.length})`);
302
+ if (!options["silent"]) {
303
+ console.log(`[miss] ${agenda.uid} no candidates (triedStrategies=${strategies.length})`);
304
+ }
281
305
  return;
282
306
  }
283
307
  // ==== 2) Enrich via data.nvs + scoring; pick best ====
@@ -295,14 +319,14 @@ async function processAgenda(agenda, session, dataDir) {
295
319
  }
296
320
  if (!best) {
297
321
  if (!options["silent"])
298
- console.log(`[miss] ${agenda.id} candidats without data.nvs`);
322
+ console.log(`[miss] ${agenda.uid} candidates without data.nvs`);
299
323
  return;
300
324
  }
301
325
  const accepted = best.score >= MATCH_THRESHOLD;
302
326
  if (accepted)
303
327
  STATS.accepted++;
304
328
  if (!options["silent"]) {
305
- console.log(`[pick] ${agenda.id} best id=${best.id} hash=${best.hash} score=${best.score.toFixed(2)} accepted=${accepted} (strategy=${usedStrategy})`);
329
+ console.log(`[pick] ${agenda.uid} best id=${best.id} hash=${best.hash} score=${best.score.toFixed(2)} accepted=${accepted} (strategy=${usedStrategy})`);
306
330
  }
307
331
  // ==== 3) Write metadata + NVS of the best candidate (always) ====
308
332
  const bestDt = best?.epoch ? epochToParisDateTime(best.epoch) : null;
@@ -317,7 +341,7 @@ async function processAgenda(agenda, session, dataDir) {
317
341
  startTime: agenda.startTime,
318
342
  titre: agenda.titre,
319
343
  organe: agenda.organe ?? undefined,
320
- id: agenda.id,
344
+ uid: agenda.uid,
321
345
  },
322
346
  best: {
323
347
  id: best.id,
@@ -340,37 +364,30 @@ async function processAgenda(agenda, session, dataDir) {
340
364
  if (finalTxt)
341
365
  await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
342
366
  let master = null;
343
- if (dataTxt)
344
- master = buildSenatVodMasterM3u8FromNvs(dataTxt);
367
+ if (dataTxt && finalTxt)
368
+ master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
345
369
  // ==== 4) Update agenda file (only if accepted + m3u8) ====
346
370
  if (accepted && master) {
347
- const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${formatYYYYMMDD(agenda.date)}.json`);
371
+ const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
348
372
  if (await fs.pathExists(agendaJsonPath)) {
349
373
  const raw = await fsp.readFile(agendaJsonPath, "utf-8");
350
- let items;
374
+ let obj;
351
375
  try {
352
- items = JSON.parse(raw);
376
+ obj = JSON.parse(raw);
353
377
  }
354
378
  catch (e) {
355
379
  console.warn(`[warn] invalid JSON in ${agendaJsonPath}:`, e?.message);
356
- items = null;
380
+ obj = null;
357
381
  }
358
- if (Array.isArray(items)) {
359
- const idx = items.findIndex((e) => String(e?.id) === String(agenda.id));
360
- if (idx === -1) {
361
- console.warn(`[warn] agenda id ${agenda.id} not found in ${agendaJsonPath}`);
362
- }
363
- else {
364
- // add/update urlVideo on the matching item
365
- items[idx] = { ...items[idx], urlVideo: master };
366
- await writeIfChanged(agendaJsonPath, JSON.stringify(items, null, 2));
367
- if (!options["silent"]) {
368
- console.log(`[write] ${agenda.id} urlVideo ← ${master}`);
369
- }
382
+ if (obj && typeof obj === "object" && !Array.isArray(obj)) {
383
+ const next = { ...obj, urlVideo: master };
384
+ await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
385
+ if (!options["silent"]) {
386
+ console.log(`[write] ${agenda.uid} urlVideo ← ${master}`);
370
387
  }
371
388
  }
372
389
  else {
373
- console.warn(`[warn] expected an array in ${agendaJsonPath}, got ${typeof items}`);
390
+ console.warn(`[warn] expected an object in ${agendaJsonPath}, got ${Array.isArray(obj) ? "array" : typeof obj}`);
374
391
  }
375
392
  }
376
393
  else {
@@ -379,15 +396,14 @@ async function processAgenda(agenda, session, dataDir) {
379
396
  }
380
397
  }
381
398
  async function processAll(dataDir, sessions) {
399
+ console.log("Process all Agendas and fetch video's url");
382
400
  for (const session of sessions) {
383
- for (const { item: agendas } of iterLoadSenatAgendas(dataDir, session, {})) {
384
- for (const agenda of agendas) {
385
- try {
386
- await processAgenda(agenda, session, dataDir);
387
- }
388
- catch (e) {
389
- console.error(`[error] ${agenda.id}:`, e?.message || e);
390
- }
401
+ for (const { item: agenda } of iterLoadSenatAgendasGrouped(dataDir, session)) {
402
+ try {
403
+ await processGroupedReunion(agenda, session, dataDir);
404
+ }
405
+ catch (e) {
406
+ console.error(`[error] ${agenda?.uid ?? "unknown-uid"}:`, e?.message || e);
391
407
  }
392
408
  }
393
409
  }
@@ -396,17 +412,16 @@ async function main() {
396
412
  const dataDir = options["dataDir"];
397
413
  assert(dataDir, "Missing argument: data directory");
398
414
  const sessions = getSessionsFromStart(options["fromSession"]);
399
- if (!options["silent"])
400
- console.time("senat-agendas→videos start processing time");
415
+ console.time("senat-agendas→videos start processing time");
401
416
  await processAll(dataDir, sessions);
402
- if (!options["silent"])
403
- console.timeEnd("senat-agendas→videos processing time");
404
- if (!options["silent"]) {
405
- const { total, accepted } = STATS;
406
- const ratio = total ? (accepted / total * 100).toFixed(1) : "0.0";
407
- console.log(`[summary] accepted=${accepted} / total=${total} (${ratio}%)`);
408
- }
417
+ console.timeEnd("senat-agendas→videos processing time");
418
+ const { total, accepted } = STATS;
419
+ const ratio = total ? ((accepted / total) * 100).toFixed(1) : "0.0";
420
+ console.log(`[summary] accepted=${accepted} / total=${total} (${ratio}%)`);
409
421
  }
410
422
  main()
411
423
  .then(() => process.exit(0))
412
- .catch((err) => { console.error(err); process.exit(1); });
424
+ .catch((err) => {
425
+ console.error(err);
426
+ process.exit(1);
427
+ });
@@ -337,6 +337,7 @@ export async function linkCRtoCommissionGroup(opts) {
337
337
  console.warn(`[AGENDA][COM] Unreadable JSON → ${filePath} (${e?.message}) → will recreate`);
338
338
  }
339
339
  if (!group) {
340
+ // FIX : fix the way groups are found because it creates doublons
340
341
  // group = {
341
342
  // uid,
342
343
  // chambre: "SN",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.13.1",
3
+ "version": "2.13.2",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",