@tricoteuses/senat 2.20.19 → 2.20.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ import * as cheerio from "cheerio";
8
8
  import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatAgendas } from "../loaders";
9
9
  import { getSessionsFromStart } from "../types/sessions";
10
10
  import { commonOptions } from "./shared/cli_helpers";
11
- import { getFirstInterventionStartTimecode } from "../utils/nvs-timecode";
11
+ import { getAgendaSegmentTimecodes } from "../utils/nvs-timecode";
12
12
  import { decodeHtmlEntities } from "../utils/string_cleaning";
13
13
  import { dice, normalize, scoreVideo } from "../utils/scoring";
14
14
  import { epochToParisDateTime, toFRDate, toTargetEpoch } from "../utils/date";
@@ -379,16 +379,22 @@ async function processGroupedReunion(agenda, session, dataDir) {
379
379
  console.warn(e);
380
380
  }
381
381
  }
382
- let timecodeDebutVideo = null;
383
- if (dataTxt && finalTxt) {
384
- timecodeDebutVideo = getFirstInterventionStartTimecode(dataTxt, finalTxt);
385
- if (timecodeDebutVideo === null) {
386
- console.warn(`[warn] Cannot retrieve start video timecode from reunion` + reunionUid);
387
- }
388
- }
389
382
  // ==== 4) Update agenda file (only if accepted + m3u8) ====
390
383
  if ((accepted || skipDownload) && master) {
391
384
  const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
385
+ let timecodeDebutVideo = null;
386
+ let timecodeFinVideo = null;
387
+ if (dataTxt && finalTxt) {
388
+ const agendaKey = agenda.titre || agenda.objet || "";
389
+ const seg = getAgendaSegmentTimecodes(dataTxt, finalTxt, agendaKey);
390
+ if (!seg) {
391
+ console.warn(`[warn] Cannot retrieve agenda segment timecodes from reunion ${reunionUid}`);
392
+ }
393
+ else {
394
+ timecodeDebutVideo = seg.start;
395
+ timecodeFinVideo = seg.end;
396
+ }
397
+ }
392
398
  if (await fs.pathExists(agendaJsonPath)) {
393
399
  const raw = await fsp.readFile(agendaJsonPath, "utf-8");
394
400
  let obj;
@@ -403,6 +409,7 @@ async function processGroupedReunion(agenda, session, dataDir) {
403
409
  const next = { ...obj, urlVideo: master };
404
410
  if (timecodeDebutVideo != null) {
405
411
  next.timecodeDebutVideo = timecodeDebutVideo;
412
+ next.timecodeFinVideo = timecodeFinVideo;
406
413
  }
407
414
  await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
408
415
  if (!options["silent"]) {
@@ -31,6 +31,7 @@ export interface Reunion {
31
31
  transcriptionRef?: string;
32
32
  urlVideo?: string;
33
33
  timecodeDebutVideo?: number;
34
+ timecodeFinVideo?: number;
34
35
  odj?: ReunionOdj;
35
36
  }
36
37
  export interface ReunionOdjPoint {
@@ -1 +1,7 @@
1
- export declare function getFirstInterventionStartTimecode(dataNvs: string, finalPlayerNvs: string): number | null;
1
+ export declare function getAgendaSegmentTimecodes(dataNvs: string, finalPlayerNvs: string, agendaTitleOrObjet: string): {
2
+ start: number;
3
+ end: number | null;
4
+ chapterId: string;
5
+ nextChapterId: string | null;
6
+ score: number;
7
+ } | null;
@@ -1,4 +1,7 @@
1
1
  import { XMLParser } from "fast-xml-parser";
2
+ import { dice, normalize } from "./scoring";
3
+ import { decodeHtmlEntities } from "./string_cleaning";
4
+ const CHAPTER_MATCH_THRESHOLD = 0.5;
2
5
  const xmlParser = new XMLParser({
3
6
  ignoreAttributes: false,
4
7
  attributeNamePrefix: "@_",
@@ -20,20 +23,57 @@ function getTimecodeForChapterId(finalPlayerNvs, chapterId) {
20
23
  return null;
21
24
  return Math.floor(ms / 1000);
22
25
  }
23
- export function getFirstInterventionStartTimecode(dataNvs, finalPlayerNvs) {
24
- const firstChapterId = getFirstChapterId(dataNvs);
25
- if (!firstChapterId)
26
- return null;
27
- return getTimecodeForChapterId(finalPlayerNvs, firstChapterId);
26
+ function toArray(v) {
27
+ if (!v)
28
+ return [];
29
+ return Array.isArray(v) ? v : [v];
28
30
  }
29
- function getFirstChapterId(dataNvs) {
31
+ function getLevel1Chapters(dataNvs) {
30
32
  const xml = xmlParser.parse(dataNvs);
31
- const rootChapters = xml?.data?.chapters?.chapter;
32
- if (!rootChapters)
33
+ const root = xml?.data?.chapters?.chapter ?? xml?.chapters?.chapter;
34
+ const roots = toArray(root);
35
+ return roots
36
+ .map((ch, i) => {
37
+ const id = ch?.id ?? ch?.["@_id"];
38
+ const labelRaw = ch?.label ?? ch?.["@_label"] ?? "";
39
+ return {
40
+ id: String(id),
41
+ label: decodeHtmlEntities(String(labelRaw)).trim(),
42
+ index: i,
43
+ };
44
+ })
45
+ .filter((c) => c.id && c.label);
46
+ }
47
+ function pickBestLevel1ChapterForAgenda(chapters, agendaTitle) {
48
+ const q = normalize(agendaTitle);
49
+ let best = null;
50
+ for (const ch of chapters) {
51
+ const s = dice(q, ch.label);
52
+ if (!best || s > best.score)
53
+ best = { chapter: ch, score: s };
54
+ }
55
+ if (!best || best.score < CHAPTER_MATCH_THRESHOLD)
56
+ return { chapter: chapters[0], score: 0 };
57
+ return best;
58
+ }
59
+ export function getAgendaSegmentTimecodes(dataNvs, finalPlayerNvs, agendaTitleOrObjet) {
60
+ const l1 = getLevel1Chapters(dataNvs);
61
+ if (!l1.length)
62
+ return null;
63
+ const best = pickBestLevel1ChapterForAgenda(l1, agendaTitleOrObjet);
64
+ if (!best)
33
65
  return null;
34
- const chaptersArray = Array.isArray(rootChapters) ? rootChapters : [rootChapters];
35
- const firstChapter = chaptersArray[0];
36
- if (!firstChapter || !firstChapter["@_id"])
66
+ const chapter = best.chapter;
67
+ const next = l1[chapter.index + 1] ?? null;
68
+ const start = getTimecodeForChapterId(finalPlayerNvs, chapter.id);
69
+ if (start == null)
37
70
  return null;
38
- return String(firstChapter["@_id"]);
71
+ const end = next ? getTimecodeForChapterId(finalPlayerNvs, next.id) : null;
72
+ return {
73
+ start,
74
+ end,
75
+ chapterId: chapter.id,
76
+ nextChapterId: next?.id ?? null,
77
+ score: best.score,
78
+ };
39
79
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.20.19",
3
+ "version": "2.20.21",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",