@tricoteuses/senat 2.18.10 → 2.18.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -108,7 +108,10 @@ npm run data:generate_schemas ../senat-data
|
|
|
108
108
|
To publish a new version of this package onto npm, bump the package version and publish.
|
|
109
109
|
|
|
110
110
|
```bash
|
|
111
|
-
|
|
111
|
+
# Increment version and create a new Git tag automatically
|
|
112
|
+
npm version patch # +0.0.1 → small fixes
|
|
113
|
+
npm version minor # +0.1.0 → new features
|
|
114
|
+
npm version major # +1.0.0 → breaking changes
|
|
112
115
|
npx tsc
|
|
113
116
|
npm publish
|
|
114
117
|
```
|
|
@@ -65,10 +65,19 @@ function epochToParisDateTime(epochSec) {
|
|
|
65
65
|
startTime: `${hh}:${mi}:${ss}.${ms}${offsetStr}`,
|
|
66
66
|
};
|
|
67
67
|
}
|
|
68
|
-
function toTargetEpoch(time) {
|
|
68
|
+
function toTargetEpoch(time, date) {
|
|
69
69
|
if (!time)
|
|
70
70
|
return null;
|
|
71
|
-
|
|
71
|
+
let dtLocal;
|
|
72
|
+
if (time.includes("T")) {
|
|
73
|
+
dtLocal = DateTime.fromISO(time, { zone: "Europe/Paris" });
|
|
74
|
+
}
|
|
75
|
+
else if (date) {
|
|
76
|
+
dtLocal = DateTime.fromISO(`${date}T${time}`, { zone: "Europe/Paris" });
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
72
81
|
if (!dtLocal.isValid)
|
|
73
82
|
return null;
|
|
74
83
|
return Math.floor(dtLocal.toUTC().toSeconds());
|
|
@@ -126,15 +135,6 @@ function extractCandidatesFromSearchHtml(html) {
|
|
|
126
135
|
return true;
|
|
127
136
|
});
|
|
128
137
|
}
|
|
129
|
-
function parseFinalNvs(nvs) {
|
|
130
|
-
const playerTag = nvs.match(/<player\b[^>]*>/i)?.[0];
|
|
131
|
-
if (!playerTag)
|
|
132
|
-
return {};
|
|
133
|
-
const sessionStartStr = playerTag.match(/\bsessionstart="(\d+)"/i)?.[1];
|
|
134
|
-
return {
|
|
135
|
-
sessionStart: sessionStartStr ? Number(sessionStartStr) : undefined,
|
|
136
|
-
};
|
|
137
|
-
}
|
|
138
138
|
function parseDataNvs(nvs) {
|
|
139
139
|
const epochStr = nvs.match(/<metadata\s+name="date"\s+value="(\d+)"/i)?.[1];
|
|
140
140
|
const epoch = epochStr ? Number(epochStr) : undefined;
|
|
@@ -276,154 +276,173 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
276
276
|
// if (!options["silent"]) console.log(`[skip] ${agenda.uid} date/hour missing`)
|
|
277
277
|
return;
|
|
278
278
|
}
|
|
279
|
-
const agendaTs = toTargetEpoch(agenda.startTime);
|
|
279
|
+
const agendaTs = toTargetEpoch(agenda.startTime, agenda.date);
|
|
280
280
|
const now = Date.now();
|
|
281
281
|
if (agendaTs && agendaTs * 1000 > now) {
|
|
282
282
|
return;
|
|
283
283
|
}
|
|
284
|
-
STATS.total++;
|
|
285
284
|
const reunionUid = agenda.uid;
|
|
286
285
|
const baseDir = path.join(dataDir, VIDEOS_ROOT_FOLDER, String(session), reunionUid);
|
|
287
286
|
await fs.ensureDir(baseDir);
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
}
|
|
301
|
-
const pages = await fetchAllSearchPages(searchParams);
|
|
302
|
-
if (!pages.length) {
|
|
303
|
-
if (!options["silent"]) {
|
|
304
|
-
console.log(`[miss] ${agenda.uid} no candidates (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
|
|
287
|
+
let skipDownload = false;
|
|
288
|
+
if (options["only-recent"]) {
|
|
289
|
+
const now = Date.now();
|
|
290
|
+
const cutoff = now - options["only-recent"] * 24 * 3600 * 1000;
|
|
291
|
+
const reunionTs = Date.parse(agenda.date);
|
|
292
|
+
if (reunionTs < cutoff) {
|
|
293
|
+
// Check if files already exist
|
|
294
|
+
const dataNvsPath = path.join(baseDir, "data.nvs");
|
|
295
|
+
const finalplayerNvsPath = path.join(baseDir, "finalplayer.nvs");
|
|
296
|
+
if (fs.existsSync(dataNvsPath) && fs.existsSync(finalplayerNvsPath)) {
|
|
297
|
+
skipDownload = true;
|
|
298
|
+
}
|
|
305
299
|
}
|
|
306
|
-
return;
|
|
307
300
|
}
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
if (!
|
|
311
|
-
|
|
312
|
-
|
|
301
|
+
let master = null;
|
|
302
|
+
let accepted = false;
|
|
303
|
+
if (!skipDownload) {
|
|
304
|
+
STATS.total++;
|
|
305
|
+
const searchParams = {
|
|
306
|
+
search: "true",
|
|
307
|
+
videotype: getAgendaType(agenda),
|
|
308
|
+
};
|
|
309
|
+
if (agenda.date) {
|
|
310
|
+
const fr = toFRDate(agenda.date);
|
|
311
|
+
searchParams.period = "custom";
|
|
312
|
+
searchParams.begin = fr;
|
|
313
|
+
searchParams.end = fr;
|
|
313
314
|
}
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
// ==== 2) Enrich via data.nvs + scoring; pick best ====
|
|
317
|
-
let best = null;
|
|
318
|
-
for (const c of candidates) {
|
|
319
|
-
const dataUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`;
|
|
320
|
-
const finalUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/finalplayer.nvs`;
|
|
321
|
-
const dataBuf = await fetchBuffer(dataUrl);
|
|
322
|
-
if (!dataBuf)
|
|
323
|
-
continue;
|
|
324
|
-
const meta = parseDataNvs(dataBuf.toString("utf-8"));
|
|
325
|
-
let sessionStart;
|
|
326
|
-
const finalBuf = await fetchBuffer(finalUrl);
|
|
327
|
-
if (finalBuf) {
|
|
328
|
-
const finalMeta = parseFinalNvs(finalBuf.toString("utf-8"));
|
|
329
|
-
sessionStart = finalMeta.sessionStart;
|
|
315
|
+
if (agenda.organe) {
|
|
316
|
+
searchParams.organe = agenda.organe;
|
|
330
317
|
}
|
|
331
|
-
const
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
const agendaOrgNorm = normalize(agenda.organe);
|
|
336
|
-
const agendaKey = getOrgKey(agendaOrgNorm);
|
|
337
|
-
let bestDice = 0;
|
|
338
|
-
let hasSameKey = false;
|
|
339
|
-
for (const vo of meta.organes) {
|
|
340
|
-
const videoOrgNorm = normalize(vo);
|
|
341
|
-
const videoKey = getOrgKey(videoOrgNorm);
|
|
342
|
-
const d = dice(agendaOrgNorm, videoOrgNorm);
|
|
343
|
-
if (videoKey === agendaKey && videoKey !== "autre") {
|
|
344
|
-
hasSameKey = true;
|
|
345
|
-
}
|
|
346
|
-
if (d > bestDice)
|
|
347
|
-
bestDice = d;
|
|
318
|
+
const pages = await fetchAllSearchPages(searchParams);
|
|
319
|
+
if (!pages.length) {
|
|
320
|
+
if (!options["silent"]) {
|
|
321
|
+
console.log(`[miss] ${agenda.uid} no candidates (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
|
|
348
322
|
}
|
|
349
|
-
|
|
350
|
-
|
|
323
|
+
return;
|
|
324
|
+
}
|
|
325
|
+
const combinedHtml = pages.join("\n<!-- PAGE SPLIT -->\n");
|
|
326
|
+
const candidates = extractCandidatesFromSearchHtml(combinedHtml).slice(0, MAX_CANDIDATES);
|
|
327
|
+
if (!candidates.length) {
|
|
328
|
+
if (!options["silent"]) {
|
|
329
|
+
console.log(`[miss] ${agenda.uid} no candidates after parse (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
|
|
351
330
|
}
|
|
352
|
-
|
|
353
|
-
|
|
331
|
+
return;
|
|
332
|
+
}
|
|
333
|
+
// ==== 2) Enrich via data.nvs + scoring; pick best ====
|
|
334
|
+
let best = null;
|
|
335
|
+
for (const c of candidates) {
|
|
336
|
+
const dataUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`;
|
|
337
|
+
const finalUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/finalplayer.nvs`;
|
|
338
|
+
const dataBuf = await fetchBuffer(dataUrl);
|
|
339
|
+
if (!dataBuf)
|
|
354
340
|
continue;
|
|
341
|
+
const meta = parseDataNvs(dataBuf.toString("utf-8"));
|
|
342
|
+
let sameOrg = false;
|
|
343
|
+
// If organes are too different, go to next candidates
|
|
344
|
+
if (agenda.organe && meta.organes?.length) {
|
|
345
|
+
const agendaOrgNorm = normalize(agenda.organe);
|
|
346
|
+
const agendaKey = getOrgKey(agendaOrgNorm);
|
|
347
|
+
let bestDice = 0;
|
|
348
|
+
let hasSameKey = false;
|
|
349
|
+
for (const vo of meta.organes) {
|
|
350
|
+
const videoOrgNorm = normalize(vo);
|
|
351
|
+
const videoKey = getOrgKey(videoOrgNorm);
|
|
352
|
+
const d = dice(agendaOrgNorm, videoOrgNorm);
|
|
353
|
+
if (videoKey === agendaKey && videoKey !== "autre") {
|
|
354
|
+
hasSameKey = true;
|
|
355
|
+
}
|
|
356
|
+
if (d > bestDice)
|
|
357
|
+
bestDice = d;
|
|
358
|
+
}
|
|
359
|
+
if (hasSameKey) {
|
|
360
|
+
sameOrg = true; // we are sure this is the same org
|
|
361
|
+
}
|
|
362
|
+
else if (bestDice < 0.8) {
|
|
363
|
+
// if diff org and dice too low we skip
|
|
364
|
+
continue;
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
let videoTitle = c.title;
|
|
368
|
+
if (c.isSeancePublique && meta.firstChapterLabel) {
|
|
369
|
+
videoTitle = meta.firstChapterLabel;
|
|
370
|
+
}
|
|
371
|
+
const s = score(agenda, agendaTs, sameOrg, videoTitle, meta.epoch, meta.organes);
|
|
372
|
+
if (!best || s > best.score) {
|
|
373
|
+
best = {
|
|
374
|
+
id: c.id,
|
|
375
|
+
hash: c.hash,
|
|
376
|
+
pageUrl: c.pageUrl,
|
|
377
|
+
epoch: meta.epoch,
|
|
378
|
+
vtitle: videoTitle,
|
|
379
|
+
score: s,
|
|
380
|
+
vorgane: meta.organes[0],
|
|
381
|
+
};
|
|
355
382
|
}
|
|
356
383
|
}
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
const s = score(agenda, agendaTs, sameOrg, videoTitle, videoEpoch, meta.organes);
|
|
362
|
-
if (!best || s > best.score) {
|
|
363
|
-
best = {
|
|
364
|
-
id: c.id,
|
|
365
|
-
hash: c.hash,
|
|
366
|
-
pageUrl: c.pageUrl,
|
|
367
|
-
epoch: meta.epoch,
|
|
368
|
-
vtitle: videoTitle,
|
|
369
|
-
score: s,
|
|
370
|
-
vorgane: meta.organes[0],
|
|
371
|
-
};
|
|
384
|
+
if (!best) {
|
|
385
|
+
if (!options["silent"])
|
|
386
|
+
console.log(`[miss] ${agenda.uid} No candidate found for this reunion`);
|
|
387
|
+
return;
|
|
372
388
|
}
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
}
|
|
379
|
-
const accepted = best.score >= MATCH_THRESHOLD;
|
|
380
|
-
if (accepted)
|
|
381
|
-
STATS.accepted++;
|
|
382
|
-
if (!options["silent"]) {
|
|
383
|
-
console.log(`[pick] ${agenda.uid} score=${best.score.toFixed(2)}
|
|
389
|
+
accepted = best.score >= MATCH_THRESHOLD;
|
|
390
|
+
if (accepted)
|
|
391
|
+
STATS.accepted++;
|
|
392
|
+
if (!options["silent"]) {
|
|
393
|
+
console.log(`[pick] ${agenda.uid} score=${best.score.toFixed(2)}
|
|
384
394
|
agenda title="${agenda.titre ?? ""}" agenda organe="${agenda.organe ?? ""}" agenda heure=${agenda.startTime}
|
|
385
395
|
best title="${best.vtitle ?? ""}" best organe="${best.vorgane ?? ""}"
|
|
386
396
|
accepted=${accepted}`);
|
|
397
|
+
}
|
|
398
|
+
// ==== 3) Write metadata + NVS of the best candidate (always) ====
|
|
399
|
+
const bestDt = best?.epoch ? epochToParisDateTime(best.epoch) : null;
|
|
400
|
+
const metadata = {
|
|
401
|
+
reunionUid,
|
|
402
|
+
session,
|
|
403
|
+
accepted,
|
|
404
|
+
threshold: MATCH_THRESHOLD,
|
|
405
|
+
agenda: {
|
|
406
|
+
date: agenda.date,
|
|
407
|
+
startTime: agenda.startTime,
|
|
408
|
+
titre: agenda.titre,
|
|
409
|
+
organe: agenda.organe ?? undefined,
|
|
410
|
+
uid: agenda.uid,
|
|
411
|
+
},
|
|
412
|
+
best: {
|
|
413
|
+
id: best.id,
|
|
414
|
+
hash: best.hash,
|
|
415
|
+
pageUrl: best.pageUrl,
|
|
416
|
+
epoch: best.epoch ?? null,
|
|
417
|
+
date: bestDt?.date ?? null,
|
|
418
|
+
startTime: bestDt?.startTime ?? null,
|
|
419
|
+
title: best.vtitle ?? null,
|
|
420
|
+
score: best.score,
|
|
421
|
+
},
|
|
422
|
+
};
|
|
423
|
+
await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
424
|
+
const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
|
|
425
|
+
const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
|
|
426
|
+
const dataTxt = await fetchText(dataUrl);
|
|
427
|
+
const finalTxt = await fetchText(finalUrl);
|
|
428
|
+
if (dataTxt)
|
|
429
|
+
await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
|
|
430
|
+
if (finalTxt)
|
|
431
|
+
await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
|
|
432
|
+
if (dataTxt && finalTxt)
|
|
433
|
+
master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
|
|
434
|
+
}
|
|
435
|
+
else {
|
|
436
|
+
// Skipped download, but need to read data.nvs for urlVideo
|
|
437
|
+
try {
|
|
438
|
+
const dataTxt = await fsp.readFile(path.join(baseDir, "data.nvs"), "utf-8");
|
|
439
|
+
const finalTxt = await fsp.readFile(path.join(baseDir, "finalplayer.nvs"), "utf-8");
|
|
440
|
+
master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
|
|
441
|
+
}
|
|
442
|
+
catch { }
|
|
387
443
|
}
|
|
388
|
-
// ==== 3) Write metadata + NVS of the best candidate (always) ====
|
|
389
|
-
const bestDt = best?.epoch ? epochToParisDateTime(best.epoch) : null;
|
|
390
|
-
const metadata = {
|
|
391
|
-
reunionUid,
|
|
392
|
-
session,
|
|
393
|
-
accepted,
|
|
394
|
-
threshold: MATCH_THRESHOLD,
|
|
395
|
-
agenda: {
|
|
396
|
-
date: agenda.date,
|
|
397
|
-
startTime: agenda.startTime,
|
|
398
|
-
titre: agenda.titre,
|
|
399
|
-
organe: agenda.organe ?? undefined,
|
|
400
|
-
uid: agenda.uid,
|
|
401
|
-
},
|
|
402
|
-
best: {
|
|
403
|
-
id: best.id,
|
|
404
|
-
hash: best.hash,
|
|
405
|
-
pageUrl: best.pageUrl,
|
|
406
|
-
epoch: best.epoch ?? null,
|
|
407
|
-
date: bestDt?.date ?? null,
|
|
408
|
-
startTime: bestDt?.startTime ?? null,
|
|
409
|
-
title: best.vtitle ?? null,
|
|
410
|
-
score: best.score,
|
|
411
|
-
},
|
|
412
|
-
};
|
|
413
|
-
await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
414
|
-
const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
|
|
415
|
-
const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
|
|
416
|
-
const dataTxt = await fetchText(dataUrl);
|
|
417
|
-
const finalTxt = await fetchText(finalUrl);
|
|
418
|
-
if (dataTxt)
|
|
419
|
-
await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
|
|
420
|
-
if (finalTxt)
|
|
421
|
-
await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
|
|
422
|
-
let master = null;
|
|
423
|
-
if (dataTxt && finalTxt)
|
|
424
|
-
master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
|
|
425
444
|
// ==== 4) Update agenda file (only if accepted + m3u8) ====
|
|
426
|
-
if (accepted && master) {
|
|
445
|
+
if ((accepted || skipDownload) && master) {
|
|
427
446
|
const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
|
|
428
447
|
if (await fs.pathExists(agendaJsonPath)) {
|
|
429
448
|
const raw = await fsp.readFile(agendaJsonPath, "utf-8");
|
|
@@ -48,13 +48,12 @@ export declare const commonOptions: ({
|
|
|
48
48
|
name: string;
|
|
49
49
|
type: StringConstructor;
|
|
50
50
|
} | {
|
|
51
|
-
|
|
51
|
+
alias: string;
|
|
52
52
|
help: string;
|
|
53
53
|
name: string;
|
|
54
|
-
type:
|
|
54
|
+
type: BooleanConstructor;
|
|
55
55
|
} | {
|
|
56
|
-
alias: string;
|
|
57
56
|
help: string;
|
|
58
57
|
name: string;
|
|
59
|
-
type:
|
|
58
|
+
type: NumberConstructor;
|
|
60
59
|
})[];
|
|
@@ -35,4 +35,11 @@ export const onlyRecentOption = {
|
|
|
35
35
|
name: "only-recent",
|
|
36
36
|
type: Number,
|
|
37
37
|
};
|
|
38
|
-
export const commonOptions = [
|
|
38
|
+
export const commonOptions = [
|
|
39
|
+
categoriesOption,
|
|
40
|
+
dataDirDefaultOption,
|
|
41
|
+
fromSessionOption,
|
|
42
|
+
silentOption,
|
|
43
|
+
verboseOption,
|
|
44
|
+
onlyRecentOption,
|
|
45
|
+
];
|