@tricoteuses/senat 2.21.0 → 2.21.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/src/scripts/retrieve_videos.d.ts +1 -33
- package/lib/src/scripts/retrieve_videos.js +115 -358
- package/lib/src/utils/date.d.ts +1 -0
- package/lib/src/utils/date.js +26 -0
- package/lib/src/utils/nvs-parsing.d.ts +24 -0
- package/lib/src/utils/nvs-parsing.js +112 -0
- package/lib/src/utils/scoring.d.ts +3 -59
- package/lib/src/utils/scoring.js +39 -113
- package/lib/src/videos/config.d.ts +10 -0
- package/lib/src/videos/config.js +20 -0
- package/lib/src/videos/index.d.ts +5 -0
- package/lib/src/videos/index.js +5 -0
- package/lib/src/videos/match.d.ts +18 -0
- package/lib/src/videos/match.js +134 -0
- package/lib/src/videos/pipeline.d.ts +24 -0
- package/lib/src/videos/pipeline.js +130 -0
- package/lib/src/videos/search.d.ts +10 -0
- package/lib/src/videos/search.js +97 -0
- package/lib/src/videos/types.d.ts +70 -0
- package/lib/src/videos/types.js +1 -0
- package/lib/tests/videoMatching.test.js +4 -44
- package/package.json +6 -4
|
@@ -1,33 +1 @@
|
|
|
1
|
-
|
|
2
|
-
export interface SearchParams {
|
|
3
|
-
search: string;
|
|
4
|
-
videotype: string;
|
|
5
|
-
period?: string;
|
|
6
|
-
begin?: string;
|
|
7
|
-
end?: string;
|
|
8
|
-
organe?: string;
|
|
9
|
-
}
|
|
10
|
-
export declare const SENAT_VIDEOS_SEARCH_AJAX = "https://videos.senat.fr/senat_videos_search.php";
|
|
11
|
-
export declare const SENAT_DATAS_ROOT = "https://videos.senat.fr/Datas/senat";
|
|
12
|
-
export declare function fetchText(url: string): Promise<string | null>;
|
|
13
|
-
export declare function fetchBuffer(url: string): Promise<Buffer | null>;
|
|
14
|
-
export declare function queryString(obj: Record<string, string>): string;
|
|
15
|
-
export declare function parseDataNvs(nvs: string): {
|
|
16
|
-
epoch?: number;
|
|
17
|
-
organes: string[];
|
|
18
|
-
firstChapterLabel?: string;
|
|
19
|
-
salle?: string;
|
|
20
|
-
};
|
|
21
|
-
export declare function buildSenatVodMasterM3u8FromNvs(nvsText: string): string | null;
|
|
22
|
-
export declare function isAmbiguousTimeOriginal(timeOriginal?: string | null): boolean;
|
|
23
|
-
export declare function getAgendaType(agenda: Reunion): string;
|
|
24
|
-
export declare function fetchAllSearchPages(args: SearchParams, maxPages?: number): Promise<string[]>;
|
|
25
|
-
export declare function getOrgKey(norm: string): string;
|
|
26
|
-
export type MatchResult = {
|
|
27
|
-
reunionUid: string;
|
|
28
|
-
picked: null | {
|
|
29
|
-
m3u8: string;
|
|
30
|
-
startSec: number;
|
|
31
|
-
score: number;
|
|
32
|
-
};
|
|
33
|
-
};
|
|
1
|
+
export {};
|
|
@@ -4,387 +4,144 @@ import commandLineArgs from "command-line-args";
|
|
|
4
4
|
import fs from "fs-extra";
|
|
5
5
|
import fsp from "fs/promises";
|
|
6
6
|
import path from "path";
|
|
7
|
-
import
|
|
8
|
-
import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatAgendas } from "../loaders";
|
|
7
|
+
import { iterLoadSenatAgendas } from "../loaders";
|
|
9
8
|
import { getSessionsFromStart } from "../types/sessions";
|
|
10
9
|
import { commonOptions } from "./shared/cli_helpers";
|
|
11
|
-
import { getAgendaSegmentTimecodes } from "../utils/nvs-
|
|
12
|
-
import {
|
|
13
|
-
import { matchOneReunion } from "../utils/scoring";
|
|
14
|
-
import { epochToParisDateTime, toFRDate, toTargetEpoch } from "../utils/date";
|
|
10
|
+
import { getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs } from "../utils/nvs-parsing";
|
|
11
|
+
import { epochToParisDateTime, isAmbiguousTimeOriginal, toTargetEpoch } from "../utils/date";
|
|
15
12
|
import { pathToFileURL } from "url";
|
|
16
|
-
import {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
const VIDEOS_ROOT_FOLDER = "videos";
|
|
21
|
-
export const SENAT_VIDEOS_SEARCH_AJAX = "https://videos.senat.fr/senat_videos_search.php";
|
|
22
|
-
export const SENAT_DATAS_ROOT = "https://videos.senat.fr/Datas/senat";
|
|
23
|
-
// ===================== CLI =====================
|
|
13
|
+
import { fetchCandidatesForAgenda, fetchText } from "../videos/search";
|
|
14
|
+
import { matchAgendaToVideo } from "../videos/match";
|
|
15
|
+
import { SENAT_DATAS_ROOT, STATS, VIDEOS_ROOT_FOLDER, weights } from "../videos/config";
|
|
16
|
+
import { processBisIfNeeded, processOneReunionMatch, writeIfChanged } from "../videos";
|
|
24
17
|
const optionsDefinitions = [...commonOptions];
|
|
25
18
|
const options = commandLineArgs(optionsDefinitions);
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
const res = await fetch(url);
|
|
29
|
-
if (!res.ok)
|
|
30
|
-
return null;
|
|
31
|
-
return await res.text();
|
|
32
|
-
}
|
|
33
|
-
export async function fetchBuffer(url) {
|
|
34
|
-
const res = await fetch(url);
|
|
35
|
-
if (!res.ok)
|
|
36
|
-
return null;
|
|
37
|
-
const ab = await res.arrayBuffer();
|
|
38
|
-
return Buffer.from(ab);
|
|
39
|
-
}
|
|
40
|
-
async function writeIfChanged(p, content) {
|
|
41
|
-
const exists = await fs.pathExists(p);
|
|
42
|
-
if (exists) {
|
|
43
|
-
const old = await fsp.readFile(p, "utf-8");
|
|
44
|
-
if (old === content)
|
|
45
|
-
return;
|
|
46
|
-
}
|
|
47
|
-
await fsp.writeFile(p, content, "utf-8");
|
|
48
|
-
}
|
|
49
|
-
export function queryString(obj) {
|
|
50
|
-
return Object.entries(obj)
|
|
51
|
-
.map(([k, v]) => `${encodeURIComponent(k)}=${encodeURIComponent(v)}`)
|
|
52
|
-
.join("&");
|
|
53
|
-
}
|
|
54
|
-
function extractCandidatesFromSearchHtml(html) {
|
|
55
|
-
const $ = cheerio.load(html);
|
|
56
|
-
const out = [];
|
|
57
|
-
const re = /video\.(\d+)_([a-z0-9]+)/i;
|
|
58
|
-
$('h3.card-title a.stretched-link[href*="video."]').each((_, a) => {
|
|
59
|
-
const href = $(a).attr("href") || "";
|
|
60
|
-
const m = href.match(re);
|
|
61
|
-
if (!m)
|
|
62
|
-
return;
|
|
63
|
-
const id = m[1];
|
|
64
|
-
const hash = m[2];
|
|
65
|
-
const pageUrl = `https://videos.senat.fr/video.${id}_${hash}.html`;
|
|
66
|
-
const title = ($(a).attr("title") || $(a).text() || "").replace(/\s+/g, " ").trim() || undefined;
|
|
67
|
-
const isSeancePublique = title?.toLowerCase().includes("séance publique") ?? false;
|
|
68
|
-
out.push({ id, hash, pageUrl, title, isSeancePublique });
|
|
69
|
-
});
|
|
70
|
-
// dedupe
|
|
71
|
-
const seen = new Set();
|
|
72
|
-
return out.filter((c) => {
|
|
73
|
-
const k = `${c.id}_${c.hash}`;
|
|
74
|
-
if (seen.has(k))
|
|
75
|
-
return false;
|
|
76
|
-
seen.add(k);
|
|
19
|
+
function shouldSkipAgenda(agenda) {
|
|
20
|
+
if (!agenda.captationVideo)
|
|
77
21
|
return true;
|
|
78
|
-
|
|
79
|
-
}
|
|
80
|
-
export function parseDataNvs(nvs) {
|
|
81
|
-
const epochStr = nvs.match(/<metadata\s+name="date"\s+value="(\d+)"/i)?.[1];
|
|
82
|
-
const epoch = epochStr ? Number(epochStr) : undefined;
|
|
83
|
-
// There can be multiple organes for one video in meta
|
|
84
|
-
const organes = [];
|
|
85
|
-
const organesRegex = /<metadata\b[^>]*\bname="organes"[^>]*>/gi;
|
|
86
|
-
let m;
|
|
87
|
-
const salle = decodeHtmlEntities(nvs.match(/<metadata\s+name="salle"\s+value="([^"]+)"/i)?.[1]).trim();
|
|
88
|
-
while ((m = organesRegex.exec(nvs)) !== null) {
|
|
89
|
-
const tag = m[0];
|
|
90
|
-
const label = tag.match(/\blabel="([^"]+)"/i)?.[1];
|
|
91
|
-
if (label) {
|
|
92
|
-
const decoded = decodeHtmlEntities(label).trim();
|
|
93
|
-
if (decoded)
|
|
94
|
-
organes.push(decoded);
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
if (organes.length === 0) {
|
|
98
|
-
organes.push("Séance publique");
|
|
99
|
-
}
|
|
100
|
-
const firstChapterLabelMatch = nvs.match(/<chapter\b[^>]*\blabel="([^"]+)"/i);
|
|
101
|
-
const firstChapterLabel = firstChapterLabelMatch ? decodeHtmlEntities(firstChapterLabelMatch[1]).trim() : undefined;
|
|
102
|
-
return { epoch, organes, firstChapterLabel, salle };
|
|
103
|
-
}
|
|
104
|
-
export function buildSenatVodMasterM3u8FromNvs(nvsText) {
|
|
105
|
-
// serverfiles://senat/2025/10/encoder10_20251022084451_2.mp4
|
|
106
|
-
const m = nvsText.match(/serverfiles:\/\/senat\/(\d{4})\/(\d{2})\/(encoder\d+)_([0-9]{14})/i);
|
|
107
|
-
if (!m)
|
|
108
|
-
return null;
|
|
109
|
-
const [, yyyy, mm, encoder, stamp] = m;
|
|
110
|
-
const base = `https://vodsenat.akamaized.net/senat/${yyyy}/${mm}/${encoder}_${stamp}`;
|
|
111
|
-
return `${base}.smil/master.m3u8`;
|
|
112
|
-
}
|
|
113
|
-
export function isAmbiguousTimeOriginal(timeOriginal) {
|
|
114
|
-
if (!timeOriginal)
|
|
115
|
-
return false;
|
|
116
|
-
const s = timeOriginal.toLowerCase();
|
|
117
|
-
// Catches "14h", "14 h", "14h30", "14 h 30", "14 heures", "14 heure"
|
|
118
|
-
const timeRe = /\b([01]?\d|2[0-3])\s*(?:h|heures?|heure)\s*(?:([0-5]\d))?\b/g;
|
|
119
|
-
const times = new Set();
|
|
120
|
-
let m;
|
|
121
|
-
while ((m = timeRe.exec(s))) {
|
|
122
|
-
const hh = String(m[1]).padStart(2, "0");
|
|
123
|
-
const mm = m[2] ? String(m[2]).padStart(2, "0") : "00";
|
|
124
|
-
times.add(`${hh}:${mm}`);
|
|
125
|
-
}
|
|
126
|
-
// "midi" / "minuit"
|
|
127
|
-
if (/\bmidi\b/.test(s))
|
|
128
|
-
times.add("12:00");
|
|
129
|
-
if (/\bminuit\b/.test(s))
|
|
130
|
-
times.add("00:00");
|
|
131
|
-
if (times.size >= 2)
|
|
22
|
+
if (!agenda.date || !agenda.startTime)
|
|
132
23
|
return true;
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
24
|
+
if (agenda.uid.endsWith("Bis"))
|
|
25
|
+
return true; // Don't reprocess bis reunions
|
|
26
|
+
const agendaTs = toTargetEpoch(agenda.startTime, agenda.date);
|
|
27
|
+
const now = Date.now();
|
|
28
|
+
if (agendaTs && agendaTs * 1000 > now)
|
|
136
29
|
return true;
|
|
137
30
|
return false;
|
|
138
31
|
}
|
|
139
|
-
|
|
140
|
-
const
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
32
|
+
async function computeContext(agenda, session, dataDir) {
|
|
33
|
+
const agendaTs = agenda.startTime && agenda.date ? toTargetEpoch(agenda.startTime, agenda.date) : null;
|
|
34
|
+
const reunionUid = agenda.uid;
|
|
35
|
+
const baseDir = path.join(dataDir, VIDEOS_ROOT_FOLDER, String(session), reunionUid);
|
|
36
|
+
await fs.ensureDir(baseDir);
|
|
37
|
+
return { session, dataDir, baseDir, reunionUid, agendaTs };
|
|
144
38
|
}
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
return pages;
|
|
39
|
+
function shouldSkipDownload(agenda, baseDir) {
|
|
40
|
+
if (!options["only-recent"])
|
|
41
|
+
return false;
|
|
42
|
+
const now = Date.now();
|
|
43
|
+
const cutoff = now - options["only-recent"] * 24 * 3600 * 1000;
|
|
44
|
+
const reunionTs = Date.parse(agenda.date);
|
|
45
|
+
if (reunionTs >= cutoff)
|
|
46
|
+
return false;
|
|
47
|
+
const dataNvsPath = path.join(baseDir, "data.nvs");
|
|
48
|
+
const finalplayerNvsPath = path.join(baseDir, "finalplayer.nvs");
|
|
49
|
+
return fs.existsSync(dataNvsPath) && fs.existsSync(finalplayerNvsPath);
|
|
157
50
|
}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
51
|
+
async function writeMatchArtifacts(args) {
|
|
52
|
+
const { agenda, ctx, best, secondBest } = args;
|
|
53
|
+
const bestDt = best.epoch ? epochToParisDateTime(best.epoch) : null;
|
|
54
|
+
const secondBestDt = secondBest && secondBest.epoch ? epochToParisDateTime(secondBest.epoch) : null;
|
|
55
|
+
const metadata = {
|
|
56
|
+
reunionUid: ctx.reunionUid,
|
|
57
|
+
session: ctx.session,
|
|
58
|
+
accepted: true,
|
|
59
|
+
threshold: weights.minAccept,
|
|
60
|
+
agenda: {
|
|
61
|
+
date: agenda.date,
|
|
62
|
+
startTime: agenda.startTime,
|
|
63
|
+
titre: agenda.titre,
|
|
64
|
+
organe: agenda.organe ?? undefined,
|
|
65
|
+
uid: agenda.uid,
|
|
66
|
+
},
|
|
67
|
+
best: {
|
|
68
|
+
id: best.id,
|
|
69
|
+
hash: best.hash,
|
|
70
|
+
pageUrl: best.pageUrl,
|
|
71
|
+
epoch: best.epoch ?? null,
|
|
72
|
+
date: bestDt?.date ?? null,
|
|
73
|
+
startTime: bestDt?.startTime ?? null,
|
|
74
|
+
title: best.vtitle ?? null,
|
|
75
|
+
score: best.score,
|
|
76
|
+
},
|
|
77
|
+
secondBest: secondBest
|
|
78
|
+
? {
|
|
79
|
+
id: secondBest.id,
|
|
80
|
+
hash: secondBest.hash,
|
|
81
|
+
pageUrl: secondBest.pageUrl,
|
|
82
|
+
epoch: secondBest.epoch ?? null,
|
|
83
|
+
date: secondBestDt?.date ?? null,
|
|
84
|
+
startTime: secondBestDt?.startTime ?? null,
|
|
85
|
+
title: secondBest.vtitle ?? null,
|
|
86
|
+
score: secondBest.score,
|
|
87
|
+
}
|
|
88
|
+
: null,
|
|
89
|
+
};
|
|
90
|
+
await writeIfChanged(path.join(ctx.baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
91
|
+
const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
|
|
92
|
+
const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
|
|
93
|
+
const dataTxt = await fetchText(dataUrl);
|
|
94
|
+
const finalTxt = await fetchText(finalUrl);
|
|
95
|
+
if (dataTxt)
|
|
96
|
+
await fsp.writeFile(path.join(ctx.baseDir, "data.nvs"), dataTxt, "utf-8");
|
|
97
|
+
if (finalTxt)
|
|
98
|
+
await fsp.writeFile(path.join(ctx.baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
|
|
196
99
|
}
|
|
197
100
|
async function processGroupedReunion(agenda, session, dataDir) {
|
|
198
101
|
// 1) GuardRails
|
|
199
|
-
if (
|
|
200
|
-
// if (!options["silent"]) console.log(`[skip] ${agenda.uid} captationVideo=false`)
|
|
201
|
-
return;
|
|
202
|
-
}
|
|
203
|
-
if (!agenda.date || !agenda.startTime) {
|
|
204
|
-
// if (!options["silent"]) console.log(`[skip] ${agenda.uid} date/hour missing`)
|
|
205
|
-
return;
|
|
206
|
-
}
|
|
207
|
-
const agendaTs = toTargetEpoch(agenda.startTime, agenda.date);
|
|
208
|
-
const now = Date.now();
|
|
209
|
-
if (agendaTs && agendaTs * 1000 > now) {
|
|
102
|
+
if (shouldSkipAgenda(agenda))
|
|
210
103
|
return;
|
|
211
|
-
|
|
212
|
-
const
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
let
|
|
216
|
-
|
|
217
|
-
const now = Date.now();
|
|
218
|
-
const cutoff = now - options["only-recent"] * 24 * 3600 * 1000;
|
|
219
|
-
const reunionTs = Date.parse(agenda.date);
|
|
220
|
-
if (reunionTs < cutoff) {
|
|
221
|
-
// Check if files already exist
|
|
222
|
-
const dataNvsPath = path.join(baseDir, "data.nvs");
|
|
223
|
-
const finalplayerNvsPath = path.join(baseDir, "finalplayer.nvs");
|
|
224
|
-
if (fs.existsSync(dataNvsPath) && fs.existsSync(finalplayerNvsPath)) {
|
|
225
|
-
skipDownload = true;
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
let master = null;
|
|
230
|
-
let dataTxt = null;
|
|
231
|
-
let finalTxt = null;
|
|
232
|
-
let accepted = false;
|
|
104
|
+
const ctx = await computeContext(agenda, session, dataDir);
|
|
105
|
+
const skipDownload = shouldSkipDownload(agenda, ctx.baseDir);
|
|
106
|
+
let match = null;
|
|
107
|
+
let best = null;
|
|
108
|
+
let secondBest = null;
|
|
109
|
+
// 2) Match + download artifacts (only if not skipped)
|
|
233
110
|
if (!skipDownload) {
|
|
234
111
|
STATS.total++;
|
|
235
|
-
const
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
};
|
|
239
|
-
if (agenda.date) {
|
|
240
|
-
const fr = toFRDate(agenda.date);
|
|
241
|
-
searchParams.period = "custom";
|
|
242
|
-
searchParams.begin = fr;
|
|
243
|
-
searchParams.end = fr;
|
|
244
|
-
}
|
|
245
|
-
if (agenda.organe) {
|
|
246
|
-
searchParams.organe = agenda.organe;
|
|
247
|
-
}
|
|
248
|
-
const pages = await fetchAllSearchPages(searchParams);
|
|
249
|
-
if (!pages.length) {
|
|
250
|
-
if (!options["silent"]) {
|
|
251
|
-
console.log(`[miss] ${agenda.uid} no candidates (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
|
|
252
|
-
}
|
|
112
|
+
const candidates = await fetchCandidatesForAgenda(agenda, options);
|
|
113
|
+
if (!candidates) {
|
|
114
|
+
console.log(`[warn] ${agenda.uid} No candidate found for this reunion. Probably VOD not published yet.`);
|
|
253
115
|
return;
|
|
254
116
|
}
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
if (!options["silent"]) {
|
|
259
|
-
console.log(`[miss] ${agenda.uid} no candidates after parse (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
|
|
260
|
-
}
|
|
261
|
-
return;
|
|
262
|
-
}
|
|
263
|
-
// ==== 2) Enrich via data.nvs + scoring; pick best ====
|
|
264
|
-
const timeAmbigious = isAmbiguousTimeOriginal(agenda.events[0].timeOriginal);
|
|
265
|
-
if (timeAmbigious) {
|
|
266
|
-
console.log(`[match] ${agenda.uid} timeOriginal ambiguous => ignoring time scoring: "${agenda.events[0].timeOriginal}"`);
|
|
267
|
-
}
|
|
268
|
-
const best = await matchOneReunion({
|
|
269
|
-
agenda,
|
|
270
|
-
agendaTs,
|
|
271
|
-
timeAmbigious,
|
|
272
|
-
candidates,
|
|
273
|
-
weights,
|
|
274
|
-
fetchDataNvs: (c) => fetchBuffer(`${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`),
|
|
275
|
-
});
|
|
276
|
-
if (!best) {
|
|
277
|
-
if (!options["silent"])
|
|
278
|
-
console.log(`[miss] ${agenda.uid} No candidate found for this reunion`);
|
|
117
|
+
match = await matchAgendaToVideo({ agenda, agendaTs: ctx.agendaTs, candidates, options });
|
|
118
|
+
if (!match) {
|
|
119
|
+
console.log(`[miss] ${agenda.uid} No match found for this reunion`);
|
|
279
120
|
return;
|
|
280
121
|
}
|
|
122
|
+
;
|
|
123
|
+
({ best, secondBest } = match);
|
|
281
124
|
STATS.accepted++;
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
id: best.id,
|
|
303
|
-
hash: best.hash,
|
|
304
|
-
pageUrl: best.pageUrl,
|
|
305
|
-
epoch: best.epoch ?? null,
|
|
306
|
-
date: bestDt?.date ?? null,
|
|
307
|
-
startTime: bestDt?.startTime ?? null,
|
|
308
|
-
title: best.vtitle ?? null,
|
|
309
|
-
score: best.score,
|
|
310
|
-
},
|
|
311
|
-
};
|
|
312
|
-
await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
313
|
-
const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
|
|
314
|
-
const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
|
|
315
|
-
dataTxt = await fetchText(dataUrl);
|
|
316
|
-
finalTxt = await fetchText(finalUrl);
|
|
317
|
-
if (dataTxt)
|
|
318
|
-
await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
|
|
319
|
-
if (finalTxt)
|
|
320
|
-
await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
|
|
321
|
-
if (best.m3u8) {
|
|
322
|
-
master = best.m3u8;
|
|
323
|
-
}
|
|
324
|
-
else {
|
|
325
|
-
console.log("Cannot download data nvs");
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
else {
|
|
329
|
-
// Skipped download, but need to read data.nvs for urlVideo
|
|
330
|
-
try {
|
|
331
|
-
dataTxt = await fsp.readFile(path.join(baseDir, "data.nvs"), "utf-8");
|
|
332
|
-
finalTxt = await fsp.readFile(path.join(baseDir, "finalplayer.nvs"), "utf-8");
|
|
333
|
-
master = buildSenatVodMasterM3u8FromNvs(dataTxt);
|
|
334
|
-
}
|
|
335
|
-
catch (e) {
|
|
336
|
-
console.warn(e);
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
// ==== 4) Update agenda file (only if accepted + m3u8) ====
|
|
340
|
-
if (master) {
|
|
341
|
-
const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
|
|
342
|
-
let timecodeDebutVideo = null;
|
|
343
|
-
let timecodeFinVideo = null;
|
|
344
|
-
if (dataTxt && finalTxt) {
|
|
345
|
-
const agendaKey = agenda.titre || agenda.objet || "";
|
|
346
|
-
const seg = getAgendaSegmentTimecodes(dataTxt, finalTxt, agendaKey);
|
|
347
|
-
if (!seg) {
|
|
348
|
-
console.warn(`[warn] Cannot retrieve agenda segment timecodes from reunion ${reunionUid}`);
|
|
349
|
-
}
|
|
350
|
-
else {
|
|
351
|
-
timecodeDebutVideo = seg.start;
|
|
352
|
-
timecodeFinVideo = seg.end;
|
|
353
|
-
}
|
|
354
|
-
}
|
|
355
|
-
if (await fs.pathExists(agendaJsonPath)) {
|
|
356
|
-
const raw = await fsp.readFile(agendaJsonPath, "utf-8");
|
|
357
|
-
let obj;
|
|
358
|
-
try {
|
|
359
|
-
obj = JSON.parse(raw);
|
|
360
|
-
}
|
|
361
|
-
catch (e) {
|
|
362
|
-
console.warn(`[warn] invalid JSON in ${agendaJsonPath}:`, e?.message);
|
|
363
|
-
obj = null;
|
|
364
|
-
}
|
|
365
|
-
if (obj && typeof obj === "object" && !Array.isArray(obj)) {
|
|
366
|
-
const next = { ...obj, urlVideo: master };
|
|
367
|
-
if (timecodeDebutVideo != null) {
|
|
368
|
-
next.timecodeDebutVideo = timecodeDebutVideo;
|
|
369
|
-
next.timecodeFinVideo = timecodeFinVideo;
|
|
370
|
-
}
|
|
371
|
-
await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
|
|
372
|
-
if (!options["silent"]) {
|
|
373
|
-
console.log(`[write] ${agenda.uid} urlVideo ← ${master}` +
|
|
374
|
-
(timecodeDebutVideo != null ? ` (timecodeDebutVideo ← ${timecodeDebutVideo}s)` : ""));
|
|
375
|
-
}
|
|
376
|
-
}
|
|
377
|
-
else {
|
|
378
|
-
console.warn(`[warn] expected an object in ${agendaJsonPath}, got ${Array.isArray(obj) ? "array" : typeof obj}`);
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
else {
|
|
382
|
-
console.warn(`[warn] agenda file not found for update: ${agendaJsonPath}`);
|
|
383
|
-
}
|
|
384
|
-
}
|
|
385
|
-
else {
|
|
386
|
-
console.warn(`[warn] The video url could not be built for reunion `, reunionUid);
|
|
387
|
-
}
|
|
125
|
+
await writeMatchArtifacts({ agenda, ctx, best, secondBest });
|
|
126
|
+
}
|
|
127
|
+
if (best && isAmbiguousTimeOriginal(agenda.events[0].timeOriginal)) {
|
|
128
|
+
if (!options["silent"])
|
|
129
|
+
console.log("If the time is ambiguous, update agenda startTime from matched video");
|
|
130
|
+
agenda = { ...agenda, startTime: epochToParisDateTime(best.epoch)?.startTime ?? agenda.startTime };
|
|
131
|
+
}
|
|
132
|
+
// 3) Always update BEST agenda JSON from local NVS
|
|
133
|
+
await processOneReunionMatch({
|
|
134
|
+
agenda,
|
|
135
|
+
baseDir: ctx.baseDir,
|
|
136
|
+
dataDir: ctx.dataDir,
|
|
137
|
+
session: ctx.session,
|
|
138
|
+
options,
|
|
139
|
+
writeIfChanged,
|
|
140
|
+
getAgendaSegmentTimecodes,
|
|
141
|
+
buildSenatVodMasterM3u8FromNvs,
|
|
142
|
+
});
|
|
143
|
+
// 4) Optional BIS
|
|
144
|
+
await processBisIfNeeded({ agenda, secondBest, ctx, skipDownload, options });
|
|
388
145
|
}
|
|
389
146
|
async function processAll(dataDir, sessions) {
|
|
390
147
|
console.log("Process all Agendas and fetch video's url");
|
package/lib/src/utils/date.d.ts
CHANGED
|
@@ -8,3 +8,4 @@ export declare function epochToParisDateTime(epochSec: number): {
|
|
|
8
8
|
} | null;
|
|
9
9
|
export declare function toTargetEpoch(time: string | null, date?: string | null): number | null;
|
|
10
10
|
export declare function toFRDate(dateYYYYMMDD: string): string;
|
|
11
|
+
export declare function isAmbiguousTimeOriginal(timeOriginal?: string | null): boolean;
|
package/lib/src/utils/date.js
CHANGED
|
@@ -98,3 +98,29 @@ export function toFRDate(dateYYYYMMDD) {
|
|
|
98
98
|
const [y, m, d] = dateYYYYMMDD.split("-");
|
|
99
99
|
return `${d}/${m}/${y}`; // DD/MM/YYYY
|
|
100
100
|
}
|
|
101
|
+
export function isAmbiguousTimeOriginal(timeOriginal) {
|
|
102
|
+
if (!timeOriginal)
|
|
103
|
+
return false;
|
|
104
|
+
const s = timeOriginal.toLowerCase();
|
|
105
|
+
// Catches "14h", "14 h", "14h30", "14 h 30", "14 heures", "14 heure"
|
|
106
|
+
const timeRe = /\b([01]?\d|2[0-3])\s*(?:h|heures?|heure)\s*(?:([0-5]\d))?\b/g;
|
|
107
|
+
const times = new Set();
|
|
108
|
+
let m;
|
|
109
|
+
while ((m = timeRe.exec(s))) {
|
|
110
|
+
const hh = String(m[1]).padStart(2, "0");
|
|
111
|
+
const mm = m[2] ? String(m[2]).padStart(2, "0") : "00";
|
|
112
|
+
times.add(`${hh}:${mm}`);
|
|
113
|
+
}
|
|
114
|
+
// "midi" / "minuit"
|
|
115
|
+
if (/\bmidi\b/.test(s))
|
|
116
|
+
times.add("12:00");
|
|
117
|
+
if (/\bminuit\b/.test(s))
|
|
118
|
+
times.add("00:00");
|
|
119
|
+
if (times.size >= 2)
|
|
120
|
+
return true;
|
|
121
|
+
const hasDayPeriod = /\b(matin|après-?midi|soir|nuit|journée|toute la journée)\b/.test(s);
|
|
122
|
+
const hasLinking = /,|\bet\b|\bou\b|\bpuis\b/.test(s);
|
|
123
|
+
if (times.size === 1 && hasDayPeriod && hasLinking)
|
|
124
|
+
return true;
|
|
125
|
+
return false;
|
|
126
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export type L1Chapter = {
|
|
2
|
+
id: string;
|
|
3
|
+
label: string;
|
|
4
|
+
index: number;
|
|
5
|
+
};
|
|
6
|
+
export declare function getLevel1Chapters(dataNvs: string): L1Chapter[];
|
|
7
|
+
export declare function pickBestLevel1ChapterForAgenda(chapters: L1Chapter[], agendaTitle: string): {
|
|
8
|
+
chapter: L1Chapter;
|
|
9
|
+
score: number;
|
|
10
|
+
} | null;
|
|
11
|
+
export declare function getAgendaSegmentTimecodes(dataNvs: string, finalPlayerNvs: string, agendaTitleOrObjet: string): {
|
|
12
|
+
start: number;
|
|
13
|
+
end: number | null;
|
|
14
|
+
chapterId: string;
|
|
15
|
+
nextChapterId: string | null;
|
|
16
|
+
score: number;
|
|
17
|
+
} | null;
|
|
18
|
+
export declare function parseDataNvs(nvs: string): {
|
|
19
|
+
epoch?: number;
|
|
20
|
+
organes: string[];
|
|
21
|
+
firstChapterLabel?: string;
|
|
22
|
+
salle?: string;
|
|
23
|
+
};
|
|
24
|
+
export declare function buildSenatVodMasterM3u8FromNvs(nvsText: string): string | null;
|