@tricoteuses/senat 2.18.11 → 2.18.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -108,7 +108,10 @@ npm run data:generate_schemas ../senat-data
108
108
  To publish a new version of this package onto npm, bump the package version and publish.
109
109
 
110
110
  ```bash
111
- npm version x.y.z # Bumps version in package.json and creates a new tag x.y.z
111
+ # Increment version and create a new Git tag automatically
112
+ npm version patch # +0.0.1 → small fixes
113
+ npm version minor # +0.1.0 → new features
114
+ npm version major # +1.0.0 → breaking changes
112
115
  npx tsc
113
116
  npm publish
114
117
  ```
@@ -135,15 +135,6 @@ function extractCandidatesFromSearchHtml(html) {
135
135
  return true;
136
136
  });
137
137
  }
138
- function parseFinalNvs(nvs) {
139
- const playerTag = nvs.match(/<player\b[^>]*>/i)?.[0];
140
- if (!playerTag)
141
- return {};
142
- const sessionStartStr = playerTag.match(/\bsessionstart="(\d+)"/i)?.[1];
143
- return {
144
- sessionStart: sessionStartStr ? Number(sessionStartStr) : undefined,
145
- };
146
- }
147
138
  function parseDataNvs(nvs) {
148
139
  const epochStr = nvs.match(/<metadata\s+name="date"\s+value="(\d+)"/i)?.[1];
149
140
  const epoch = epochStr ? Number(epochStr) : undefined;
@@ -207,8 +198,6 @@ function score(agenda, agendaTs, sameOrg, videoTitle, videoEpoch, videoOrganes)
207
198
  const titleScore = Math.max(objetS, titleS);
208
199
  let timeScore = 0;
209
200
  if (agendaTs && videoEpoch) {
210
- console.log("agendaTs", agendaTs);
211
- console.log("videoEpoch", videoEpoch);
212
201
  const deltaMin = Math.abs(videoEpoch - agendaTs) / 60;
213
202
  timeScore = Math.exp(-deltaMin / 60);
214
203
  }
@@ -292,142 +281,168 @@ async function processGroupedReunion(agenda, session, dataDir) {
292
281
  if (agendaTs && agendaTs * 1000 > now) {
293
282
  return;
294
283
  }
295
- STATS.total++;
296
284
  const reunionUid = agenda.uid;
297
285
  const baseDir = path.join(dataDir, VIDEOS_ROOT_FOLDER, String(session), reunionUid);
298
286
  await fs.ensureDir(baseDir);
299
- const searchParams = {
300
- search: "true",
301
- videotype: getAgendaType(agenda),
302
- };
303
- if (agenda.date) {
304
- const fr = toFRDate(agenda.date);
305
- searchParams.period = "custom";
306
- searchParams.begin = fr;
307
- searchParams.end = fr;
308
- }
309
- if (agenda.organe) {
310
- searchParams.organe = agenda.organe;
311
- }
312
- const pages = await fetchAllSearchPages(searchParams);
313
- if (!pages.length) {
314
- if (!options["silent"]) {
315
- console.log(`[miss] ${agenda.uid} no candidates (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
287
+ let skipDownload = false;
288
+ if (options["only-recent"]) {
289
+ const now = Date.now();
290
+ const cutoff = now - options["only-recent"] * 24 * 3600 * 1000;
291
+ const reunionTs = Date.parse(agenda.date);
292
+ if (reunionTs < cutoff) {
293
+ // Check if files already exist
294
+ const dataNvsPath = path.join(baseDir, "data.nvs");
295
+ const finalplayerNvsPath = path.join(baseDir, "finalplayer.nvs");
296
+ if (fs.existsSync(dataNvsPath) && fs.existsSync(finalplayerNvsPath)) {
297
+ skipDownload = true;
298
+ }
316
299
  }
317
- return;
318
300
  }
319
- const combinedHtml = pages.join("\n<!-- PAGE SPLIT -->\n");
320
- const candidates = extractCandidatesFromSearchHtml(combinedHtml).slice(0, MAX_CANDIDATES);
321
- if (!candidates.length) {
322
- if (!options["silent"]) {
323
- console.log(`[miss] ${agenda.uid} no candidates after parse (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
301
+ let master = null;
302
+ let accepted = false;
303
+ if (!skipDownload) {
304
+ STATS.total++;
305
+ const searchParams = {
306
+ search: "true",
307
+ videotype: getAgendaType(agenda),
308
+ };
309
+ if (agenda.date) {
310
+ const fr = toFRDate(agenda.date);
311
+ searchParams.period = "custom";
312
+ searchParams.begin = fr;
313
+ searchParams.end = fr;
324
314
  }
325
- return;
326
- }
327
- // ==== 2) Enrich via data.nvs + scoring; pick best ====
328
- let best = null;
329
- for (const c of candidates) {
330
- const dataUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`;
331
- const finalUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/finalplayer.nvs`;
332
- const dataBuf = await fetchBuffer(dataUrl);
333
- if (!dataBuf)
334
- continue;
335
- const meta = parseDataNvs(dataBuf.toString("utf-8"));
336
- let sameOrg = false;
337
- // If organes are too different, go to next candidates
338
- if (agenda.organe && meta.organes?.length) {
339
- const agendaOrgNorm = normalize(agenda.organe);
340
- const agendaKey = getOrgKey(agendaOrgNorm);
341
- let bestDice = 0;
342
- let hasSameKey = false;
343
- for (const vo of meta.organes) {
344
- const videoOrgNorm = normalize(vo);
345
- const videoKey = getOrgKey(videoOrgNorm);
346
- const d = dice(agendaOrgNorm, videoOrgNorm);
347
- if (videoKey === agendaKey && videoKey !== "autre") {
348
- hasSameKey = true;
349
- }
350
- if (d > bestDice)
351
- bestDice = d;
315
+ if (agenda.organe) {
316
+ searchParams.organe = agenda.organe;
317
+ }
318
+ const pages = await fetchAllSearchPages(searchParams);
319
+ if (!pages.length) {
320
+ if (!options["silent"]) {
321
+ console.log(`[miss] ${agenda.uid} no candidates (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
352
322
  }
353
- if (hasSameKey) {
354
- sameOrg = true; // we are sure this is the same org
323
+ return;
324
+ }
325
+ const combinedHtml = pages.join("\n<!-- PAGE SPLIT -->\n");
326
+ const candidates = extractCandidatesFromSearchHtml(combinedHtml).slice(0, MAX_CANDIDATES);
327
+ if (!candidates.length) {
328
+ if (!options["silent"]) {
329
+ console.log(`[miss] ${agenda.uid} no candidates after parse (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
355
330
  }
356
- else if (bestDice < 0.8) {
357
- // if diff org and dice too low we skip
331
+ return;
332
+ }
333
+ // ==== 2) Enrich via data.nvs + scoring; pick best ====
334
+ let best = null;
335
+ for (const c of candidates) {
336
+ const dataUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`;
337
+ const finalUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/finalplayer.nvs`;
338
+ const dataBuf = await fetchBuffer(dataUrl);
339
+ if (!dataBuf)
358
340
  continue;
341
+ const meta = parseDataNvs(dataBuf.toString("utf-8"));
342
+ let sameOrg = false;
343
+ // If organes are too different, go to next candidates
344
+ if (agenda.organe && meta.organes?.length) {
345
+ const agendaOrgNorm = normalize(agenda.organe);
346
+ const agendaKey = getOrgKey(agendaOrgNorm);
347
+ let bestDice = 0;
348
+ let hasSameKey = false;
349
+ for (const vo of meta.organes) {
350
+ const videoOrgNorm = normalize(vo);
351
+ const videoKey = getOrgKey(videoOrgNorm);
352
+ const d = dice(agendaOrgNorm, videoOrgNorm);
353
+ if (videoKey === agendaKey && videoKey !== "autre") {
354
+ hasSameKey = true;
355
+ }
356
+ if (d > bestDice)
357
+ bestDice = d;
358
+ }
359
+ if (hasSameKey) {
360
+ sameOrg = true; // we are sure this is the same org
361
+ }
362
+ else if (bestDice < 0.8) {
363
+ // if diff org and dice too low we skip
364
+ continue;
365
+ }
366
+ }
367
+ let videoTitle = c.title;
368
+ if (c.isSeancePublique && meta.firstChapterLabel) {
369
+ videoTitle = meta.firstChapterLabel;
370
+ }
371
+ const s = score(agenda, agendaTs, sameOrg, videoTitle, meta.epoch, meta.organes);
372
+ if (!best || s > best.score) {
373
+ best = {
374
+ id: c.id,
375
+ hash: c.hash,
376
+ pageUrl: c.pageUrl,
377
+ epoch: meta.epoch,
378
+ vtitle: videoTitle,
379
+ score: s,
380
+ vorgane: meta.organes[0],
381
+ };
359
382
  }
360
383
  }
361
- let videoTitle = c.title;
362
- if (c.isSeancePublique && meta.firstChapterLabel) {
363
- videoTitle = meta.firstChapterLabel;
364
- }
365
- const s = score(agenda, agendaTs, sameOrg, videoTitle, meta.epoch, meta.organes);
366
- if (!best || s > best.score) {
367
- best = {
368
- id: c.id,
369
- hash: c.hash,
370
- pageUrl: c.pageUrl,
371
- epoch: meta.epoch,
372
- vtitle: videoTitle,
373
- score: s,
374
- vorgane: meta.organes[0],
375
- };
384
+ if (!best) {
385
+ if (!options["silent"])
386
+ console.log(`[miss] ${agenda.uid} No candidate found for this reunion`);
387
+ return;
376
388
  }
377
- }
378
- if (!best) {
379
- if (!options["silent"])
380
- console.log(`[miss] ${agenda.uid} No candidate found for this reunion`);
381
- return;
382
- }
383
- const accepted = best.score >= MATCH_THRESHOLD;
384
- if (accepted)
385
- STATS.accepted++;
386
- if (!options["silent"]) {
387
- console.log(`[pick] ${agenda.uid} score=${best.score.toFixed(2)}
389
+ accepted = best.score >= MATCH_THRESHOLD;
390
+ if (accepted)
391
+ STATS.accepted++;
392
+ if (!options["silent"]) {
393
+ console.log(`[pick] ${agenda.uid} score=${best.score.toFixed(2)}
388
394
  agenda title="${agenda.titre ?? ""}" agenda organe="${agenda.organe ?? ""}" agenda heure=${agenda.startTime}
389
395
  best title="${best.vtitle ?? ""}" best organe="${best.vorgane ?? ""}"
390
396
  accepted=${accepted}`);
397
+ }
398
+ // ==== 3) Write metadata + NVS of the best candidate (always) ====
399
+ const bestDt = best?.epoch ? epochToParisDateTime(best.epoch) : null;
400
+ const metadata = {
401
+ reunionUid,
402
+ session,
403
+ accepted,
404
+ threshold: MATCH_THRESHOLD,
405
+ agenda: {
406
+ date: agenda.date,
407
+ startTime: agenda.startTime,
408
+ titre: agenda.titre,
409
+ organe: agenda.organe ?? undefined,
410
+ uid: agenda.uid,
411
+ },
412
+ best: {
413
+ id: best.id,
414
+ hash: best.hash,
415
+ pageUrl: best.pageUrl,
416
+ epoch: best.epoch ?? null,
417
+ date: bestDt?.date ?? null,
418
+ startTime: bestDt?.startTime ?? null,
419
+ title: best.vtitle ?? null,
420
+ score: best.score,
421
+ },
422
+ };
423
+ await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
424
+ const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
425
+ const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
426
+ const dataTxt = await fetchText(dataUrl);
427
+ const finalTxt = await fetchText(finalUrl);
428
+ if (dataTxt)
429
+ await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
430
+ if (finalTxt)
431
+ await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
432
+ if (dataTxt && finalTxt)
433
+ master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
434
+ }
435
+ else {
436
+ // Skipped download, but need to read data.nvs for urlVideo
437
+ try {
438
+ const dataTxt = await fsp.readFile(path.join(baseDir, "data.nvs"), "utf-8");
439
+ const finalTxt = await fsp.readFile(path.join(baseDir, "finalplayer.nvs"), "utf-8");
440
+ master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
441
+ }
442
+ catch { }
391
443
  }
392
- // ==== 3) Write metadata + NVS of the best candidate (always) ====
393
- const bestDt = best?.epoch ? epochToParisDateTime(best.epoch) : null;
394
- const metadata = {
395
- reunionUid,
396
- session,
397
- accepted,
398
- threshold: MATCH_THRESHOLD,
399
- agenda: {
400
- date: agenda.date,
401
- startTime: agenda.startTime,
402
- titre: agenda.titre,
403
- organe: agenda.organe ?? undefined,
404
- uid: agenda.uid,
405
- },
406
- best: {
407
- id: best.id,
408
- hash: best.hash,
409
- pageUrl: best.pageUrl,
410
- epoch: best.epoch ?? null,
411
- date: bestDt?.date ?? null,
412
- startTime: bestDt?.startTime ?? null,
413
- title: best.vtitle ?? null,
414
- score: best.score,
415
- },
416
- };
417
- await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
418
- const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
419
- const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
420
- const dataTxt = await fetchText(dataUrl);
421
- const finalTxt = await fetchText(finalUrl);
422
- if (dataTxt)
423
- await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
424
- if (finalTxt)
425
- await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
426
- let master = null;
427
- if (dataTxt && finalTxt)
428
- master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
429
444
  // ==== 4) Update agenda file (only if accepted + m3u8) ====
430
- if (accepted && master) {
445
+ if ((accepted || skipDownload) && master) {
431
446
  const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
432
447
  if (await fs.pathExists(agendaJsonPath)) {
433
448
  const raw = await fsp.readFile(agendaJsonPath, "utf-8");
@@ -48,13 +48,12 @@ export declare const commonOptions: ({
48
48
  name: string;
49
49
  type: StringConstructor;
50
50
  } | {
51
- defaultValue: number;
51
+ alias: string;
52
52
  help: string;
53
53
  name: string;
54
- type: NumberConstructor;
54
+ type: BooleanConstructor;
55
55
  } | {
56
- alias: string;
57
56
  help: string;
58
57
  name: string;
59
- type: BooleanConstructor;
58
+ type: NumberConstructor;
60
59
  })[];
@@ -35,4 +35,11 @@ export const onlyRecentOption = {
35
35
  name: "only-recent",
36
36
  type: Number,
37
37
  };
38
- export const commonOptions = [categoriesOption, dataDirDefaultOption, fromSessionOption, silentOption, verboseOption];
38
+ export const commonOptions = [
39
+ categoriesOption,
40
+ dataDirDefaultOption,
41
+ fromSessionOption,
42
+ silentOption,
43
+ verboseOption,
44
+ onlyRecentOption,
45
+ ];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.18.11",
3
+ "version": "2.18.12",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",