@tricoteuses/senat 2.18.10 → 2.18.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -108,7 +108,10 @@ npm run data:generate_schemas ../senat-data
108
108
  To publish a new version of this package onto npm, bump the package version and publish.
109
109
 
110
110
  ```bash
111
- npm version x.y.z # Bumps version in package.json and creates a new tag x.y.z
111
+ # Increment version and create a new Git tag automatically
112
+ npm version patch # +0.0.1 → small fixes
113
+ npm version minor # +0.1.0 → new features
114
+ npm version major # +1.0.0 → breaking changes
112
115
  npx tsc
113
116
  npm publish
114
117
  ```
@@ -65,10 +65,19 @@ function epochToParisDateTime(epochSec) {
65
65
  startTime: `${hh}:${mi}:${ss}.${ms}${offsetStr}`,
66
66
  };
67
67
  }
68
- function toTargetEpoch(time) {
68
+ function toTargetEpoch(time, date) {
69
69
  if (!time)
70
70
  return null;
71
- const dtLocal = DateTime.fromISO(time, { zone: "Europe/Paris" });
71
+ let dtLocal;
72
+ if (time.includes("T")) {
73
+ dtLocal = DateTime.fromISO(time, { zone: "Europe/Paris" });
74
+ }
75
+ else if (date) {
76
+ dtLocal = DateTime.fromISO(`${date}T${time}`, { zone: "Europe/Paris" });
77
+ }
78
+ else {
79
+ return null;
80
+ }
72
81
  if (!dtLocal.isValid)
73
82
  return null;
74
83
  return Math.floor(dtLocal.toUTC().toSeconds());
@@ -126,15 +135,6 @@ function extractCandidatesFromSearchHtml(html) {
126
135
  return true;
127
136
  });
128
137
  }
129
- function parseFinalNvs(nvs) {
130
- const playerTag = nvs.match(/<player\b[^>]*>/i)?.[0];
131
- if (!playerTag)
132
- return {};
133
- const sessionStartStr = playerTag.match(/\bsessionstart="(\d+)"/i)?.[1];
134
- return {
135
- sessionStart: sessionStartStr ? Number(sessionStartStr) : undefined,
136
- };
137
- }
138
138
  function parseDataNvs(nvs) {
139
139
  const epochStr = nvs.match(/<metadata\s+name="date"\s+value="(\d+)"/i)?.[1];
140
140
  const epoch = epochStr ? Number(epochStr) : undefined;
@@ -276,154 +276,173 @@ async function processGroupedReunion(agenda, session, dataDir) {
276
276
  // if (!options["silent"]) console.log(`[skip] ${agenda.uid} date/hour missing`)
277
277
  return;
278
278
  }
279
- const agendaTs = toTargetEpoch(agenda.startTime);
279
+ const agendaTs = toTargetEpoch(agenda.startTime, agenda.date);
280
280
  const now = Date.now();
281
281
  if (agendaTs && agendaTs * 1000 > now) {
282
282
  return;
283
283
  }
284
- STATS.total++;
285
284
  const reunionUid = agenda.uid;
286
285
  const baseDir = path.join(dataDir, VIDEOS_ROOT_FOLDER, String(session), reunionUid);
287
286
  await fs.ensureDir(baseDir);
288
- const searchParams = {
289
- search: "true",
290
- videotype: getAgendaType(agenda),
291
- };
292
- if (agenda.date) {
293
- const fr = toFRDate(agenda.date);
294
- searchParams.period = "custom";
295
- searchParams.begin = fr;
296
- searchParams.end = fr;
297
- }
298
- if (agenda.organe) {
299
- searchParams.organe = agenda.organe;
300
- }
301
- const pages = await fetchAllSearchPages(searchParams);
302
- if (!pages.length) {
303
- if (!options["silent"]) {
304
- console.log(`[miss] ${agenda.uid} no candidates (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
287
+ let skipDownload = false;
288
+ if (options["only-recent"]) {
289
+ const now = Date.now();
290
+ const cutoff = now - options["only-recent"] * 24 * 3600 * 1000;
291
+ const reunionTs = Date.parse(agenda.date);
292
+ if (reunionTs < cutoff) {
293
+ // Check if files already exist
294
+ const dataNvsPath = path.join(baseDir, "data.nvs");
295
+ const finalplayerNvsPath = path.join(baseDir, "finalplayer.nvs");
296
+ if (fs.existsSync(dataNvsPath) && fs.existsSync(finalplayerNvsPath)) {
297
+ skipDownload = true;
298
+ }
305
299
  }
306
- return;
307
300
  }
308
- const combinedHtml = pages.join("\n<!-- PAGE SPLIT -->\n");
309
- const candidates = extractCandidatesFromSearchHtml(combinedHtml).slice(0, MAX_CANDIDATES);
310
- if (!candidates.length) {
311
- if (!options["silent"]) {
312
- console.log(`[miss] ${agenda.uid} no candidates after parse (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
301
+ let master = null;
302
+ let accepted = false;
303
+ if (!skipDownload) {
304
+ STATS.total++;
305
+ const searchParams = {
306
+ search: "true",
307
+ videotype: getAgendaType(agenda),
308
+ };
309
+ if (agenda.date) {
310
+ const fr = toFRDate(agenda.date);
311
+ searchParams.period = "custom";
312
+ searchParams.begin = fr;
313
+ searchParams.end = fr;
313
314
  }
314
- return;
315
- }
316
- // ==== 2) Enrich via data.nvs + scoring; pick best ====
317
- let best = null;
318
- for (const c of candidates) {
319
- const dataUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`;
320
- const finalUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/finalplayer.nvs`;
321
- const dataBuf = await fetchBuffer(dataUrl);
322
- if (!dataBuf)
323
- continue;
324
- const meta = parseDataNvs(dataBuf.toString("utf-8"));
325
- let sessionStart;
326
- const finalBuf = await fetchBuffer(finalUrl);
327
- if (finalBuf) {
328
- const finalMeta = parseFinalNvs(finalBuf.toString("utf-8"));
329
- sessionStart = finalMeta.sessionStart;
315
+ if (agenda.organe) {
316
+ searchParams.organe = agenda.organe;
330
317
  }
331
- const videoEpoch = meta.epoch ?? sessionStart;
332
- let sameOrg = false;
333
- // If organes are too different, go to next candidates
334
- if (agenda.organe && meta.organes?.length) {
335
- const agendaOrgNorm = normalize(agenda.organe);
336
- const agendaKey = getOrgKey(agendaOrgNorm);
337
- let bestDice = 0;
338
- let hasSameKey = false;
339
- for (const vo of meta.organes) {
340
- const videoOrgNorm = normalize(vo);
341
- const videoKey = getOrgKey(videoOrgNorm);
342
- const d = dice(agendaOrgNorm, videoOrgNorm);
343
- if (videoKey === agendaKey && videoKey !== "autre") {
344
- hasSameKey = true;
345
- }
346
- if (d > bestDice)
347
- bestDice = d;
318
+ const pages = await fetchAllSearchPages(searchParams);
319
+ if (!pages.length) {
320
+ if (!options["silent"]) {
321
+ console.log(`[miss] ${agenda.uid} no candidates (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
348
322
  }
349
- if (hasSameKey) {
350
- sameOrg = true; // we are sure this is the same org
323
+ return;
324
+ }
325
+ const combinedHtml = pages.join("\n<!-- PAGE SPLIT -->\n");
326
+ const candidates = extractCandidatesFromSearchHtml(combinedHtml).slice(0, MAX_CANDIDATES);
327
+ if (!candidates.length) {
328
+ if (!options["silent"]) {
329
+ console.log(`[miss] ${agenda.uid} no candidates after parse (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
351
330
  }
352
- else if (bestDice < 0.8) {
353
- // if diff org and dice too low we skip
331
+ return;
332
+ }
333
+ // ==== 2) Enrich via data.nvs + scoring; pick best ====
334
+ let best = null;
335
+ for (const c of candidates) {
336
+ const dataUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`;
337
+ const finalUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/finalplayer.nvs`;
338
+ const dataBuf = await fetchBuffer(dataUrl);
339
+ if (!dataBuf)
354
340
  continue;
341
+ const meta = parseDataNvs(dataBuf.toString("utf-8"));
342
+ let sameOrg = false;
343
+ // If organes are too different, go to next candidates
344
+ if (agenda.organe && meta.organes?.length) {
345
+ const agendaOrgNorm = normalize(agenda.organe);
346
+ const agendaKey = getOrgKey(agendaOrgNorm);
347
+ let bestDice = 0;
348
+ let hasSameKey = false;
349
+ for (const vo of meta.organes) {
350
+ const videoOrgNorm = normalize(vo);
351
+ const videoKey = getOrgKey(videoOrgNorm);
352
+ const d = dice(agendaOrgNorm, videoOrgNorm);
353
+ if (videoKey === agendaKey && videoKey !== "autre") {
354
+ hasSameKey = true;
355
+ }
356
+ if (d > bestDice)
357
+ bestDice = d;
358
+ }
359
+ if (hasSameKey) {
360
+ sameOrg = true; // we are sure this is the same org
361
+ }
362
+ else if (bestDice < 0.8) {
363
+ // if diff org and dice too low we skip
364
+ continue;
365
+ }
366
+ }
367
+ let videoTitle = c.title;
368
+ if (c.isSeancePublique && meta.firstChapterLabel) {
369
+ videoTitle = meta.firstChapterLabel;
370
+ }
371
+ const s = score(agenda, agendaTs, sameOrg, videoTitle, meta.epoch, meta.organes);
372
+ if (!best || s > best.score) {
373
+ best = {
374
+ id: c.id,
375
+ hash: c.hash,
376
+ pageUrl: c.pageUrl,
377
+ epoch: meta.epoch,
378
+ vtitle: videoTitle,
379
+ score: s,
380
+ vorgane: meta.organes[0],
381
+ };
355
382
  }
356
383
  }
357
- let videoTitle = c.title;
358
- if (c.isSeancePublique && meta.firstChapterLabel) {
359
- videoTitle = meta.firstChapterLabel;
360
- }
361
- const s = score(agenda, agendaTs, sameOrg, videoTitle, videoEpoch, meta.organes);
362
- if (!best || s > best.score) {
363
- best = {
364
- id: c.id,
365
- hash: c.hash,
366
- pageUrl: c.pageUrl,
367
- epoch: meta.epoch,
368
- vtitle: videoTitle,
369
- score: s,
370
- vorgane: meta.organes[0],
371
- };
384
+ if (!best) {
385
+ if (!options["silent"])
386
+ console.log(`[miss] ${agenda.uid} No candidate found for this reunion`);
387
+ return;
372
388
  }
373
- }
374
- if (!best) {
375
- if (!options["silent"])
376
- console.log(`[miss] ${agenda.uid} No candidate found for this reunion`);
377
- return;
378
- }
379
- const accepted = best.score >= MATCH_THRESHOLD;
380
- if (accepted)
381
- STATS.accepted++;
382
- if (!options["silent"]) {
383
- console.log(`[pick] ${agenda.uid} score=${best.score.toFixed(2)}
389
+ accepted = best.score >= MATCH_THRESHOLD;
390
+ if (accepted)
391
+ STATS.accepted++;
392
+ if (!options["silent"]) {
393
+ console.log(`[pick] ${agenda.uid} score=${best.score.toFixed(2)}
384
394
  agenda title="${agenda.titre ?? ""}" agenda organe="${agenda.organe ?? ""}" agenda heure=${agenda.startTime}
385
395
  best title="${best.vtitle ?? ""}" best organe="${best.vorgane ?? ""}"
386
396
  accepted=${accepted}`);
397
+ }
398
+ // ==== 3) Write metadata + NVS of the best candidate (always) ====
399
+ const bestDt = best?.epoch ? epochToParisDateTime(best.epoch) : null;
400
+ const metadata = {
401
+ reunionUid,
402
+ session,
403
+ accepted,
404
+ threshold: MATCH_THRESHOLD,
405
+ agenda: {
406
+ date: agenda.date,
407
+ startTime: agenda.startTime,
408
+ titre: agenda.titre,
409
+ organe: agenda.organe ?? undefined,
410
+ uid: agenda.uid,
411
+ },
412
+ best: {
413
+ id: best.id,
414
+ hash: best.hash,
415
+ pageUrl: best.pageUrl,
416
+ epoch: best.epoch ?? null,
417
+ date: bestDt?.date ?? null,
418
+ startTime: bestDt?.startTime ?? null,
419
+ title: best.vtitle ?? null,
420
+ score: best.score,
421
+ },
422
+ };
423
+ await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
424
+ const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
425
+ const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
426
+ const dataTxt = await fetchText(dataUrl);
427
+ const finalTxt = await fetchText(finalUrl);
428
+ if (dataTxt)
429
+ await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
430
+ if (finalTxt)
431
+ await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
432
+ if (dataTxt && finalTxt)
433
+ master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
434
+ }
435
+ else {
436
+ // Skipped download, but need to read data.nvs for urlVideo
437
+ try {
438
+ const dataTxt = await fsp.readFile(path.join(baseDir, "data.nvs"), "utf-8");
439
+ const finalTxt = await fsp.readFile(path.join(baseDir, "finalplayer.nvs"), "utf-8");
440
+ master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
441
+ }
442
+ catch { }
387
443
  }
388
- // ==== 3) Write metadata + NVS of the best candidate (always) ====
389
- const bestDt = best?.epoch ? epochToParisDateTime(best.epoch) : null;
390
- const metadata = {
391
- reunionUid,
392
- session,
393
- accepted,
394
- threshold: MATCH_THRESHOLD,
395
- agenda: {
396
- date: agenda.date,
397
- startTime: agenda.startTime,
398
- titre: agenda.titre,
399
- organe: agenda.organe ?? undefined,
400
- uid: agenda.uid,
401
- },
402
- best: {
403
- id: best.id,
404
- hash: best.hash,
405
- pageUrl: best.pageUrl,
406
- epoch: best.epoch ?? null,
407
- date: bestDt?.date ?? null,
408
- startTime: bestDt?.startTime ?? null,
409
- title: best.vtitle ?? null,
410
- score: best.score,
411
- },
412
- };
413
- await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
414
- const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
415
- const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
416
- const dataTxt = await fetchText(dataUrl);
417
- const finalTxt = await fetchText(finalUrl);
418
- if (dataTxt)
419
- await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
420
- if (finalTxt)
421
- await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
422
- let master = null;
423
- if (dataTxt && finalTxt)
424
- master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
425
444
  // ==== 4) Update agenda file (only if accepted + m3u8) ====
426
- if (accepted && master) {
445
+ if ((accepted || skipDownload) && master) {
427
446
  const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
428
447
  if (await fs.pathExists(agendaJsonPath)) {
429
448
  const raw = await fsp.readFile(agendaJsonPath, "utf-8");
@@ -48,13 +48,12 @@ export declare const commonOptions: ({
48
48
  name: string;
49
49
  type: StringConstructor;
50
50
  } | {
51
- defaultValue: number;
51
+ alias: string;
52
52
  help: string;
53
53
  name: string;
54
- type: NumberConstructor;
54
+ type: BooleanConstructor;
55
55
  } | {
56
- alias: string;
57
56
  help: string;
58
57
  name: string;
59
- type: BooleanConstructor;
58
+ type: NumberConstructor;
60
59
  })[];
@@ -35,4 +35,11 @@ export const onlyRecentOption = {
35
35
  name: "only-recent",
36
36
  type: Number,
37
37
  };
38
- export const commonOptions = [categoriesOption, dataDirDefaultOption, fromSessionOption, silentOption, verboseOption];
38
+ export const commonOptions = [
39
+ categoriesOption,
40
+ dataDirDefaultOption,
41
+ fromSessionOption,
42
+ silentOption,
43
+ verboseOption,
44
+ onlyRecentOption,
45
+ ];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.18.10",
3
+ "version": "2.18.12",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",