@tricoteuses/senat 2.20.19 → 2.20.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -8,7 +8,7 @@ import * as cheerio from "cheerio";
|
|
|
8
8
|
import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatAgendas } from "../loaders";
|
|
9
9
|
import { getSessionsFromStart } from "../types/sessions";
|
|
10
10
|
import { commonOptions } from "./shared/cli_helpers";
|
|
11
|
-
import {
|
|
11
|
+
import { getAgendaSegmentTimecodes } from "../utils/nvs-timecode";
|
|
12
12
|
import { decodeHtmlEntities } from "../utils/string_cleaning";
|
|
13
13
|
import { dice, normalize, scoreVideo } from "../utils/scoring";
|
|
14
14
|
import { epochToParisDateTime, toFRDate, toTargetEpoch } from "../utils/date";
|
|
@@ -379,16 +379,22 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
379
379
|
console.warn(e);
|
|
380
380
|
}
|
|
381
381
|
}
|
|
382
|
-
let timecodeDebutVideo = null;
|
|
383
|
-
if (dataTxt && finalTxt) {
|
|
384
|
-
timecodeDebutVideo = getFirstInterventionStartTimecode(dataTxt, finalTxt);
|
|
385
|
-
if (timecodeDebutVideo === null) {
|
|
386
|
-
console.warn(`[warn] Cannot retrieve start video timecode from reunion` + reunionUid);
|
|
387
|
-
}
|
|
388
|
-
}
|
|
389
382
|
// ==== 4) Update agenda file (only if accepted + m3u8) ====
|
|
390
383
|
if ((accepted || skipDownload) && master) {
|
|
391
384
|
const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
|
|
385
|
+
let timecodeDebutVideo = null;
|
|
386
|
+
let timecodeFinVideo = null;
|
|
387
|
+
if (dataTxt && finalTxt) {
|
|
388
|
+
const agendaKey = agenda.titre || agenda.objet || "";
|
|
389
|
+
const seg = getAgendaSegmentTimecodes(dataTxt, finalTxt, agendaKey);
|
|
390
|
+
if (!seg) {
|
|
391
|
+
console.warn(`[warn] Cannot retrieve agenda segment timecodes from reunion ${reunionUid}`);
|
|
392
|
+
}
|
|
393
|
+
else {
|
|
394
|
+
timecodeDebutVideo = seg.start;
|
|
395
|
+
timecodeFinVideo = seg.end;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
392
398
|
if (await fs.pathExists(agendaJsonPath)) {
|
|
393
399
|
const raw = await fsp.readFile(agendaJsonPath, "utf-8");
|
|
394
400
|
let obj;
|
|
@@ -403,6 +409,7 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
403
409
|
const next = { ...obj, urlVideo: master };
|
|
404
410
|
if (timecodeDebutVideo != null) {
|
|
405
411
|
next.timecodeDebutVideo = timecodeDebutVideo;
|
|
412
|
+
next.timecodeFinVideo = timecodeFinVideo;
|
|
406
413
|
}
|
|
407
414
|
await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
|
|
408
415
|
if (!options["silent"]) {
|
package/lib/types/agenda.d.ts
CHANGED
|
@@ -1 +1,7 @@
|
|
|
1
|
-
export declare function
|
|
1
|
+
export declare function getAgendaSegmentTimecodes(dataNvs: string, finalPlayerNvs: string, agendaTitleOrObjet: string): {
|
|
2
|
+
start: number;
|
|
3
|
+
end: number | null;
|
|
4
|
+
chapterId: string;
|
|
5
|
+
nextChapterId: string | null;
|
|
6
|
+
score: number;
|
|
7
|
+
} | null;
|
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
import { XMLParser } from "fast-xml-parser";
|
|
2
|
+
import { dice, normalize } from "./scoring";
|
|
3
|
+
import { decodeHtmlEntities } from "./string_cleaning";
|
|
4
|
+
const CHAPTER_MATCH_THRESHOLD = 0.5;
|
|
2
5
|
const xmlParser = new XMLParser({
|
|
3
6
|
ignoreAttributes: false,
|
|
4
7
|
attributeNamePrefix: "@_",
|
|
@@ -20,20 +23,57 @@ function getTimecodeForChapterId(finalPlayerNvs, chapterId) {
|
|
|
20
23
|
return null;
|
|
21
24
|
return Math.floor(ms / 1000);
|
|
22
25
|
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
return getTimecodeForChapterId(finalPlayerNvs, firstChapterId);
|
|
26
|
+
function toArray(v) {
|
|
27
|
+
if (!v)
|
|
28
|
+
return [];
|
|
29
|
+
return Array.isArray(v) ? v : [v];
|
|
28
30
|
}
|
|
29
|
-
function
|
|
31
|
+
function getLevel1Chapters(dataNvs) {
|
|
30
32
|
const xml = xmlParser.parse(dataNvs);
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
+
const root = xml?.data?.chapters?.chapter ?? xml?.chapters?.chapter;
|
|
34
|
+
const roots = toArray(root);
|
|
35
|
+
return roots
|
|
36
|
+
.map((ch, i) => {
|
|
37
|
+
const id = ch?.id ?? ch?.["@_id"];
|
|
38
|
+
const labelRaw = ch?.label ?? ch?.["@_label"] ?? "";
|
|
39
|
+
return {
|
|
40
|
+
id: String(id),
|
|
41
|
+
label: decodeHtmlEntities(String(labelRaw)).trim(),
|
|
42
|
+
index: i,
|
|
43
|
+
};
|
|
44
|
+
})
|
|
45
|
+
.filter((c) => c.id && c.label);
|
|
46
|
+
}
|
|
47
|
+
function pickBestLevel1ChapterForAgenda(chapters, agendaTitle) {
|
|
48
|
+
const q = normalize(agendaTitle);
|
|
49
|
+
let best = null;
|
|
50
|
+
for (const ch of chapters) {
|
|
51
|
+
const s = dice(q, ch.label);
|
|
52
|
+
if (!best || s > best.score)
|
|
53
|
+
best = { chapter: ch, score: s };
|
|
54
|
+
}
|
|
55
|
+
if (!best || best.score < CHAPTER_MATCH_THRESHOLD)
|
|
56
|
+
return { chapter: chapters[0], score: 0 };
|
|
57
|
+
return best;
|
|
58
|
+
}
|
|
59
|
+
export function getAgendaSegmentTimecodes(dataNvs, finalPlayerNvs, agendaTitleOrObjet) {
|
|
60
|
+
const l1 = getLevel1Chapters(dataNvs);
|
|
61
|
+
if (!l1.length)
|
|
62
|
+
return null;
|
|
63
|
+
const best = pickBestLevel1ChapterForAgenda(l1, agendaTitleOrObjet);
|
|
64
|
+
if (!best)
|
|
33
65
|
return null;
|
|
34
|
-
const
|
|
35
|
-
const
|
|
36
|
-
|
|
66
|
+
const chapter = best.chapter;
|
|
67
|
+
const next = l1[chapter.index + 1] ?? null;
|
|
68
|
+
const start = getTimecodeForChapterId(finalPlayerNvs, chapter.id);
|
|
69
|
+
if (start == null)
|
|
37
70
|
return null;
|
|
38
|
-
|
|
71
|
+
const end = next ? getTimecodeForChapterId(finalPlayerNvs, next.id) : null;
|
|
72
|
+
return {
|
|
73
|
+
start,
|
|
74
|
+
end,
|
|
75
|
+
chapterId: chapter.id,
|
|
76
|
+
nextChapterId: next?.id ?? null,
|
|
77
|
+
score: best.score,
|
|
78
|
+
};
|
|
39
79
|
}
|