@tricoteuses/senat 2.18.11 → 2.18.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -108,7 +108,10 @@ npm run data:generate_schemas ../senat-data
|
|
|
108
108
|
To publish a new version of this package onto npm, bump the package version and publish.
|
|
109
109
|
|
|
110
110
|
```bash
|
|
111
|
-
|
|
111
|
+
# Increment version and create a new Git tag automatically
|
|
112
|
+
npm version patch # +0.0.1 → small fixes
|
|
113
|
+
npm version minor # +0.1.0 → new features
|
|
114
|
+
npm version major # +1.0.0 → breaking changes
|
|
112
115
|
npx tsc
|
|
113
116
|
npm publish
|
|
114
117
|
```
|
|
@@ -135,15 +135,6 @@ function extractCandidatesFromSearchHtml(html) {
|
|
|
135
135
|
return true;
|
|
136
136
|
});
|
|
137
137
|
}
|
|
138
|
-
function parseFinalNvs(nvs) {
|
|
139
|
-
const playerTag = nvs.match(/<player\b[^>]*>/i)?.[0];
|
|
140
|
-
if (!playerTag)
|
|
141
|
-
return {};
|
|
142
|
-
const sessionStartStr = playerTag.match(/\bsessionstart="(\d+)"/i)?.[1];
|
|
143
|
-
return {
|
|
144
|
-
sessionStart: sessionStartStr ? Number(sessionStartStr) : undefined,
|
|
145
|
-
};
|
|
146
|
-
}
|
|
147
138
|
function parseDataNvs(nvs) {
|
|
148
139
|
const epochStr = nvs.match(/<metadata\s+name="date"\s+value="(\d+)"/i)?.[1];
|
|
149
140
|
const epoch = epochStr ? Number(epochStr) : undefined;
|
|
@@ -207,8 +198,6 @@ function score(agenda, agendaTs, sameOrg, videoTitle, videoEpoch, videoOrganes)
|
|
|
207
198
|
const titleScore = Math.max(objetS, titleS);
|
|
208
199
|
let timeScore = 0;
|
|
209
200
|
if (agendaTs && videoEpoch) {
|
|
210
|
-
console.log("agendaTs", agendaTs);
|
|
211
|
-
console.log("videoEpoch", videoEpoch);
|
|
212
201
|
const deltaMin = Math.abs(videoEpoch - agendaTs) / 60;
|
|
213
202
|
timeScore = Math.exp(-deltaMin / 60);
|
|
214
203
|
}
|
|
@@ -292,142 +281,168 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
292
281
|
if (agendaTs && agendaTs * 1000 > now) {
|
|
293
282
|
return;
|
|
294
283
|
}
|
|
295
|
-
STATS.total++;
|
|
296
284
|
const reunionUid = agenda.uid;
|
|
297
285
|
const baseDir = path.join(dataDir, VIDEOS_ROOT_FOLDER, String(session), reunionUid);
|
|
298
286
|
await fs.ensureDir(baseDir);
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
}
|
|
312
|
-
const pages = await fetchAllSearchPages(searchParams);
|
|
313
|
-
if (!pages.length) {
|
|
314
|
-
if (!options["silent"]) {
|
|
315
|
-
console.log(`[miss] ${agenda.uid} no candidates (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
|
|
287
|
+
let skipDownload = false;
|
|
288
|
+
if (options["only-recent"]) {
|
|
289
|
+
const now = Date.now();
|
|
290
|
+
const cutoff = now - options["only-recent"] * 24 * 3600 * 1000;
|
|
291
|
+
const reunionTs = Date.parse(agenda.date);
|
|
292
|
+
if (reunionTs < cutoff) {
|
|
293
|
+
// Check if files already exist
|
|
294
|
+
const dataNvsPath = path.join(baseDir, "data.nvs");
|
|
295
|
+
const finalplayerNvsPath = path.join(baseDir, "finalplayer.nvs");
|
|
296
|
+
if (fs.existsSync(dataNvsPath) && fs.existsSync(finalplayerNvsPath)) {
|
|
297
|
+
skipDownload = true;
|
|
298
|
+
}
|
|
316
299
|
}
|
|
317
|
-
return;
|
|
318
300
|
}
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
if (!
|
|
322
|
-
|
|
323
|
-
|
|
301
|
+
let master = null;
|
|
302
|
+
let accepted = false;
|
|
303
|
+
if (!skipDownload) {
|
|
304
|
+
STATS.total++;
|
|
305
|
+
const searchParams = {
|
|
306
|
+
search: "true",
|
|
307
|
+
videotype: getAgendaType(agenda),
|
|
308
|
+
};
|
|
309
|
+
if (agenda.date) {
|
|
310
|
+
const fr = toFRDate(agenda.date);
|
|
311
|
+
searchParams.period = "custom";
|
|
312
|
+
searchParams.begin = fr;
|
|
313
|
+
searchParams.end = fr;
|
|
324
314
|
}
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
const dataBuf = await fetchBuffer(dataUrl);
|
|
333
|
-
if (!dataBuf)
|
|
334
|
-
continue;
|
|
335
|
-
const meta = parseDataNvs(dataBuf.toString("utf-8"));
|
|
336
|
-
let sameOrg = false;
|
|
337
|
-
// If organes are too different, go to next candidates
|
|
338
|
-
if (agenda.organe && meta.organes?.length) {
|
|
339
|
-
const agendaOrgNorm = normalize(agenda.organe);
|
|
340
|
-
const agendaKey = getOrgKey(agendaOrgNorm);
|
|
341
|
-
let bestDice = 0;
|
|
342
|
-
let hasSameKey = false;
|
|
343
|
-
for (const vo of meta.organes) {
|
|
344
|
-
const videoOrgNorm = normalize(vo);
|
|
345
|
-
const videoKey = getOrgKey(videoOrgNorm);
|
|
346
|
-
const d = dice(agendaOrgNorm, videoOrgNorm);
|
|
347
|
-
if (videoKey === agendaKey && videoKey !== "autre") {
|
|
348
|
-
hasSameKey = true;
|
|
349
|
-
}
|
|
350
|
-
if (d > bestDice)
|
|
351
|
-
bestDice = d;
|
|
315
|
+
if (agenda.organe) {
|
|
316
|
+
searchParams.organe = agenda.organe;
|
|
317
|
+
}
|
|
318
|
+
const pages = await fetchAllSearchPages(searchParams);
|
|
319
|
+
if (!pages.length) {
|
|
320
|
+
if (!options["silent"]) {
|
|
321
|
+
console.log(`[miss] ${agenda.uid} no candidates (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
|
|
352
322
|
}
|
|
353
|
-
|
|
354
|
-
|
|
323
|
+
return;
|
|
324
|
+
}
|
|
325
|
+
const combinedHtml = pages.join("\n<!-- PAGE SPLIT -->\n");
|
|
326
|
+
const candidates = extractCandidatesFromSearchHtml(combinedHtml).slice(0, MAX_CANDIDATES);
|
|
327
|
+
if (!candidates.length) {
|
|
328
|
+
if (!options["silent"]) {
|
|
329
|
+
console.log(`[miss] ${agenda.uid} no candidates after parse (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
|
|
355
330
|
}
|
|
356
|
-
|
|
357
|
-
|
|
331
|
+
return;
|
|
332
|
+
}
|
|
333
|
+
// ==== 2) Enrich via data.nvs + scoring; pick best ====
|
|
334
|
+
let best = null;
|
|
335
|
+
for (const c of candidates) {
|
|
336
|
+
const dataUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`;
|
|
337
|
+
const finalUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/finalplayer.nvs`;
|
|
338
|
+
const dataBuf = await fetchBuffer(dataUrl);
|
|
339
|
+
if (!dataBuf)
|
|
358
340
|
continue;
|
|
341
|
+
const meta = parseDataNvs(dataBuf.toString("utf-8"));
|
|
342
|
+
let sameOrg = false;
|
|
343
|
+
// If organes are too different, go to next candidates
|
|
344
|
+
if (agenda.organe && meta.organes?.length) {
|
|
345
|
+
const agendaOrgNorm = normalize(agenda.organe);
|
|
346
|
+
const agendaKey = getOrgKey(agendaOrgNorm);
|
|
347
|
+
let bestDice = 0;
|
|
348
|
+
let hasSameKey = false;
|
|
349
|
+
for (const vo of meta.organes) {
|
|
350
|
+
const videoOrgNorm = normalize(vo);
|
|
351
|
+
const videoKey = getOrgKey(videoOrgNorm);
|
|
352
|
+
const d = dice(agendaOrgNorm, videoOrgNorm);
|
|
353
|
+
if (videoKey === agendaKey && videoKey !== "autre") {
|
|
354
|
+
hasSameKey = true;
|
|
355
|
+
}
|
|
356
|
+
if (d > bestDice)
|
|
357
|
+
bestDice = d;
|
|
358
|
+
}
|
|
359
|
+
if (hasSameKey) {
|
|
360
|
+
sameOrg = true; // we are sure this is the same org
|
|
361
|
+
}
|
|
362
|
+
else if (bestDice < 0.8) {
|
|
363
|
+
// if diff org and dice too low we skip
|
|
364
|
+
continue;
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
let videoTitle = c.title;
|
|
368
|
+
if (c.isSeancePublique && meta.firstChapterLabel) {
|
|
369
|
+
videoTitle = meta.firstChapterLabel;
|
|
370
|
+
}
|
|
371
|
+
const s = score(agenda, agendaTs, sameOrg, videoTitle, meta.epoch, meta.organes);
|
|
372
|
+
if (!best || s > best.score) {
|
|
373
|
+
best = {
|
|
374
|
+
id: c.id,
|
|
375
|
+
hash: c.hash,
|
|
376
|
+
pageUrl: c.pageUrl,
|
|
377
|
+
epoch: meta.epoch,
|
|
378
|
+
vtitle: videoTitle,
|
|
379
|
+
score: s,
|
|
380
|
+
vorgane: meta.organes[0],
|
|
381
|
+
};
|
|
359
382
|
}
|
|
360
383
|
}
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
const s = score(agenda, agendaTs, sameOrg, videoTitle, meta.epoch, meta.organes);
|
|
366
|
-
if (!best || s > best.score) {
|
|
367
|
-
best = {
|
|
368
|
-
id: c.id,
|
|
369
|
-
hash: c.hash,
|
|
370
|
-
pageUrl: c.pageUrl,
|
|
371
|
-
epoch: meta.epoch,
|
|
372
|
-
vtitle: videoTitle,
|
|
373
|
-
score: s,
|
|
374
|
-
vorgane: meta.organes[0],
|
|
375
|
-
};
|
|
384
|
+
if (!best) {
|
|
385
|
+
if (!options["silent"])
|
|
386
|
+
console.log(`[miss] ${agenda.uid} No candidate found for this reunion`);
|
|
387
|
+
return;
|
|
376
388
|
}
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
}
|
|
383
|
-
const accepted = best.score >= MATCH_THRESHOLD;
|
|
384
|
-
if (accepted)
|
|
385
|
-
STATS.accepted++;
|
|
386
|
-
if (!options["silent"]) {
|
|
387
|
-
console.log(`[pick] ${agenda.uid} score=${best.score.toFixed(2)}
|
|
389
|
+
accepted = best.score >= MATCH_THRESHOLD;
|
|
390
|
+
if (accepted)
|
|
391
|
+
STATS.accepted++;
|
|
392
|
+
if (!options["silent"]) {
|
|
393
|
+
console.log(`[pick] ${agenda.uid} score=${best.score.toFixed(2)}
|
|
388
394
|
agenda title="${agenda.titre ?? ""}" agenda organe="${agenda.organe ?? ""}" agenda heure=${agenda.startTime}
|
|
389
395
|
best title="${best.vtitle ?? ""}" best organe="${best.vorgane ?? ""}"
|
|
390
396
|
accepted=${accepted}`);
|
|
397
|
+
}
|
|
398
|
+
// ==== 3) Write metadata + NVS of the best candidate (always) ====
|
|
399
|
+
const bestDt = best?.epoch ? epochToParisDateTime(best.epoch) : null;
|
|
400
|
+
const metadata = {
|
|
401
|
+
reunionUid,
|
|
402
|
+
session,
|
|
403
|
+
accepted,
|
|
404
|
+
threshold: MATCH_THRESHOLD,
|
|
405
|
+
agenda: {
|
|
406
|
+
date: agenda.date,
|
|
407
|
+
startTime: agenda.startTime,
|
|
408
|
+
titre: agenda.titre,
|
|
409
|
+
organe: agenda.organe ?? undefined,
|
|
410
|
+
uid: agenda.uid,
|
|
411
|
+
},
|
|
412
|
+
best: {
|
|
413
|
+
id: best.id,
|
|
414
|
+
hash: best.hash,
|
|
415
|
+
pageUrl: best.pageUrl,
|
|
416
|
+
epoch: best.epoch ?? null,
|
|
417
|
+
date: bestDt?.date ?? null,
|
|
418
|
+
startTime: bestDt?.startTime ?? null,
|
|
419
|
+
title: best.vtitle ?? null,
|
|
420
|
+
score: best.score,
|
|
421
|
+
},
|
|
422
|
+
};
|
|
423
|
+
await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
424
|
+
const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
|
|
425
|
+
const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
|
|
426
|
+
const dataTxt = await fetchText(dataUrl);
|
|
427
|
+
const finalTxt = await fetchText(finalUrl);
|
|
428
|
+
if (dataTxt)
|
|
429
|
+
await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
|
|
430
|
+
if (finalTxt)
|
|
431
|
+
await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
|
|
432
|
+
if (dataTxt && finalTxt)
|
|
433
|
+
master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
|
|
434
|
+
}
|
|
435
|
+
else {
|
|
436
|
+
// Skipped download, but need to read data.nvs for urlVideo
|
|
437
|
+
try {
|
|
438
|
+
const dataTxt = await fsp.readFile(path.join(baseDir, "data.nvs"), "utf-8");
|
|
439
|
+
const finalTxt = await fsp.readFile(path.join(baseDir, "finalplayer.nvs"), "utf-8");
|
|
440
|
+
master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
|
|
441
|
+
}
|
|
442
|
+
catch { }
|
|
391
443
|
}
|
|
392
|
-
// ==== 3) Write metadata + NVS of the best candidate (always) ====
|
|
393
|
-
const bestDt = best?.epoch ? epochToParisDateTime(best.epoch) : null;
|
|
394
|
-
const metadata = {
|
|
395
|
-
reunionUid,
|
|
396
|
-
session,
|
|
397
|
-
accepted,
|
|
398
|
-
threshold: MATCH_THRESHOLD,
|
|
399
|
-
agenda: {
|
|
400
|
-
date: agenda.date,
|
|
401
|
-
startTime: agenda.startTime,
|
|
402
|
-
titre: agenda.titre,
|
|
403
|
-
organe: agenda.organe ?? undefined,
|
|
404
|
-
uid: agenda.uid,
|
|
405
|
-
},
|
|
406
|
-
best: {
|
|
407
|
-
id: best.id,
|
|
408
|
-
hash: best.hash,
|
|
409
|
-
pageUrl: best.pageUrl,
|
|
410
|
-
epoch: best.epoch ?? null,
|
|
411
|
-
date: bestDt?.date ?? null,
|
|
412
|
-
startTime: bestDt?.startTime ?? null,
|
|
413
|
-
title: best.vtitle ?? null,
|
|
414
|
-
score: best.score,
|
|
415
|
-
},
|
|
416
|
-
};
|
|
417
|
-
await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
418
|
-
const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
|
|
419
|
-
const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
|
|
420
|
-
const dataTxt = await fetchText(dataUrl);
|
|
421
|
-
const finalTxt = await fetchText(finalUrl);
|
|
422
|
-
if (dataTxt)
|
|
423
|
-
await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
|
|
424
|
-
if (finalTxt)
|
|
425
|
-
await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
|
|
426
|
-
let master = null;
|
|
427
|
-
if (dataTxt && finalTxt)
|
|
428
|
-
master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
|
|
429
444
|
// ==== 4) Update agenda file (only if accepted + m3u8) ====
|
|
430
|
-
if (accepted && master) {
|
|
445
|
+
if ((accepted || skipDownload) && master) {
|
|
431
446
|
const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
|
|
432
447
|
if (await fs.pathExists(agendaJsonPath)) {
|
|
433
448
|
const raw = await fsp.readFile(agendaJsonPath, "utf-8");
|
|
@@ -48,13 +48,12 @@ export declare const commonOptions: ({
|
|
|
48
48
|
name: string;
|
|
49
49
|
type: StringConstructor;
|
|
50
50
|
} | {
|
|
51
|
-
|
|
51
|
+
alias: string;
|
|
52
52
|
help: string;
|
|
53
53
|
name: string;
|
|
54
|
-
type:
|
|
54
|
+
type: BooleanConstructor;
|
|
55
55
|
} | {
|
|
56
|
-
alias: string;
|
|
57
56
|
help: string;
|
|
58
57
|
name: string;
|
|
59
|
-
type:
|
|
58
|
+
type: NumberConstructor;
|
|
60
59
|
})[];
|
|
@@ -35,4 +35,11 @@ export const onlyRecentOption = {
|
|
|
35
35
|
name: "only-recent",
|
|
36
36
|
type: Number,
|
|
37
37
|
};
|
|
38
|
-
export const commonOptions = [
|
|
38
|
+
export const commonOptions = [
|
|
39
|
+
categoriesOption,
|
|
40
|
+
dataDirDefaultOption,
|
|
41
|
+
fromSessionOption,
|
|
42
|
+
silentOption,
|
|
43
|
+
verboseOption,
|
|
44
|
+
onlyRecentOption,
|
|
45
|
+
];
|