@tricoteuses/senat 2.13.0 → 2.13.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/loaders.d.ts +1 -3
- package/lib/loaders.js +0 -12
- package/lib/model/commission.js +3 -6
- package/lib/model/seance.js +6 -8
- package/lib/scripts/retrieve_cr_commission.js +1 -1
- package/lib/scripts/retrieve_videos.d.ts +7 -1
- package/lib/scripts/retrieve_videos.js +95 -80
- package/lib/utils/cr_spliting.js +1 -0
- package/package.json +1 -1
- package/lib/model/compte_rendu.d.ts +0 -9
- package/lib/model/compte_rendu.js +0 -325
- package/lib/raw_types/db.d.ts +0 -11389
- package/lib/raw_types/db.js +0 -5
- package/lib/scripts/retrieve_comptes_rendus.d.ts +0 -6
- package/lib/scripts/retrieve_comptes_rendus.js +0 -274
package/lib/loaders.d.ts
CHANGED
|
@@ -3,7 +3,7 @@ import { DebatResult } from "./model/debats";
|
|
|
3
3
|
import { DossierLegislatifResult } from "./model/dosleg";
|
|
4
4
|
import { QuestionResult } from "./model/questions";
|
|
5
5
|
import { CirconscriptionResult, OrganismeResult, SenateurResult } from "./model/sens";
|
|
6
|
-
import {
|
|
6
|
+
import { GroupedReunion } from "./types/agenda";
|
|
7
7
|
import { FlatTexte } from "./types/texte";
|
|
8
8
|
import { CompteRendu } from "./types/compte_rendu";
|
|
9
9
|
export { EnabledDatasets } from "./datasets";
|
|
@@ -83,8 +83,6 @@ export declare function loadSenatTexteContent(dataDir: string, textePathFromData
|
|
|
83
83
|
export declare function loadSenatCompteRenduContent(dataDir: string, session: number, debatId: string | number): {
|
|
84
84
|
item: CompteRendu | null;
|
|
85
85
|
};
|
|
86
|
-
export declare function iterLoadSenatAgendas(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AgendaEvent[]>>;
|
|
87
|
-
export declare function iterLoadSenatEvenements(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AgendaEvent>>;
|
|
88
86
|
export declare function iterLoadSenatAgendasGrouped(dataDir: string, session: number | undefined): Generator<IterItem<GroupedReunion>>;
|
|
89
87
|
export declare function iterLoadSenatCirconscriptions(dataDir: string, options?: {}): Generator<IterItem<CirconscriptionResult>>;
|
|
90
88
|
export declare function iterLoadSenatOrganismes(dataDir: string, options?: {}): Generator<IterItem<OrganismeResult>>;
|
package/lib/loaders.js
CHANGED
|
@@ -183,18 +183,6 @@ export function loadSenatCompteRenduContent(dataDir, session, debatId) {
|
|
|
183
183
|
const json = fs.readFileSync(fullPath, { encoding: "utf8" });
|
|
184
184
|
return { item: JSON.parse(json) };
|
|
185
185
|
}
|
|
186
|
-
export function* iterLoadSenatAgendas(dataDir, session, options = {}) {
|
|
187
|
-
for (const evenementsItem of iterLoadSenatItems(dataDir, AGENDA_FOLDER, session, DATA_TRANSFORMED_FOLDER, options)) {
|
|
188
|
-
yield evenementsItem;
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
export function* iterLoadSenatEvenements(dataDir, session, options = {}) {
|
|
192
|
-
for (const evenementsItem of iterLoadSenatItems(dataDir, AGENDA_FOLDER, session, DATA_TRANSFORMED_FOLDER, options)) {
|
|
193
|
-
for (const evenement of evenementsItem.item) {
|
|
194
|
-
yield { item: evenement };
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
186
|
export function* iterLoadSenatAgendasGrouped(dataDir, session) {
|
|
199
187
|
const baseDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session ?? ""));
|
|
200
188
|
if (!fs.existsSync(baseDir))
|
package/lib/model/commission.js
CHANGED
|
@@ -200,21 +200,18 @@ export function parseCommissionCRFromFile(htmlFilePath, best, fallback) {
|
|
|
200
200
|
}
|
|
201
201
|
const raw = fs.readFileSync(htmlFilePath, "utf8");
|
|
202
202
|
const $ = cheerio.load(raw, { xmlMode: false });
|
|
203
|
-
// --- champs déterminés depuis best OU fallback (aucun fallback via filename) ---
|
|
204
203
|
const dateISO = best?.date ?? fallback.dateISO;
|
|
205
204
|
const startTime = best?.startTime ?? hourShortToStartTime(fallback.hourShort);
|
|
206
205
|
const organe = best?.organe ?? fallback?.organe ?? undefined;
|
|
207
|
-
// UIDs alignés sur makeTypeGroupUid (RUSN…) mais CR = RUSN → CRC
|
|
208
206
|
const seanceRef = best?.uid ?? makeTypeGroupUid(dateISO, "COM", fallback.hourShort ?? "NA", organe);
|
|
209
207
|
const uid = seanceRef.replace(/^RU/, "CRC");
|
|
210
208
|
const dateSeance = toCRDate(dateISO, startTime);
|
|
211
|
-
// --- scope du jour ---
|
|
212
209
|
const $dayRoot = findDayRoot($, dateISO);
|
|
213
210
|
if ($dayRoot.length === 0) {
|
|
214
211
|
console.warn(`[COM-CR][parse] day root not found for ${dateISO} in ${path.basename(htmlFilePath)}`);
|
|
215
212
|
return null;
|
|
216
213
|
}
|
|
217
|
-
// ---
|
|
214
|
+
// --- Collect paragraphes/h3 until next h2 ---
|
|
218
215
|
const dayParas = [];
|
|
219
216
|
let $cursor = $dayRoot.next();
|
|
220
217
|
while ($cursor.length && !$cursor.is("h2")) {
|
|
@@ -260,8 +257,8 @@ export function parseCommissionCRFromFile(htmlFilePath, best, fallback) {
|
|
|
260
257
|
heureGeneration: new Date(),
|
|
261
258
|
};
|
|
262
259
|
return {
|
|
263
|
-
uid,
|
|
264
|
-
seanceRef,
|
|
260
|
+
uid,
|
|
261
|
+
seanceRef,
|
|
265
262
|
sessionRef: session,
|
|
266
263
|
metadonnees,
|
|
267
264
|
contenu,
|
package/lib/model/seance.js
CHANGED
|
@@ -22,14 +22,12 @@ export async function parseCompteRenduSlotFromFile(xmlFilePath, wantedSlot, firs
|
|
|
22
22
|
const points = [];
|
|
23
23
|
let ordre = 0;
|
|
24
24
|
const addPoint = (p) => points.push({ ...p, ordre_absolu_seance: String(++ordre) });
|
|
25
|
-
// Titles
|
|
26
|
-
$("cri\\:titreS1 p.titre_S1").each((_, el) => {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
addPoint({ code_grammaire: "TITRE_TEXTE_DISCUSSION", texte: { _: t }, code_style: "Titre" });
|
|
32
|
-
});
|
|
25
|
+
// Titles removes because they are just listed at the top of the file and not linked to any ancre
|
|
26
|
+
// $("cri\\:titreS1 p.titre_S1").each((_, el) => {
|
|
27
|
+
// if (!elementInAnyInterval(el, idx, intervals)) return
|
|
28
|
+
// const t = normalizeTitle(norm($(el).text() || ""))
|
|
29
|
+
// if (t) addPoint({ code_grammaire: "TITRE_TEXTE_DISCUSSION", texte: { _: t }, code_style: "Titre" })
|
|
30
|
+
// })
|
|
33
31
|
// Interventions
|
|
34
32
|
$("div.intervenant").each((_, block) => {
|
|
35
33
|
if (!elementInAnyInterval(block, idx, intervals))
|
|
@@ -245,7 +245,7 @@ async function retrieveCommissionCRs(options = {}) {
|
|
|
245
245
|
deltaMin = candidates[0].d;
|
|
246
246
|
}
|
|
247
247
|
}
|
|
248
|
-
// Parse CR
|
|
248
|
+
// Parse CR
|
|
249
249
|
const hourShort = toHourShort(day.openTime) ?? "NA";
|
|
250
250
|
const cr = parseCommissionCRFromFile(htmlPath, best ?? undefined, {
|
|
251
251
|
dateISO: day.date,
|
|
@@ -1 +1,7 @@
|
|
|
1
|
-
|
|
1
|
+
import { GroupedReunion } from "../types/agenda";
|
|
2
|
+
export declare function buildSenatVodMasterM3u8FromNvs(nvsText: string, finalText: string): string | null;
|
|
3
|
+
export declare function score(agenda: GroupedReunion, agendaTs: number | null, videoTitle?: string, videoEpoch?: number): number;
|
|
4
|
+
/**
|
|
5
|
+
* Build search strategies for senat's videos
|
|
6
|
+
*/
|
|
7
|
+
export declare function buildSearchStrategies(agenda: GroupedReunion): Array<Record<string, string>>;
|
|
@@ -4,12 +4,11 @@ import commandLineArgs from "command-line-args";
|
|
|
4
4
|
import fs from "fs-extra";
|
|
5
5
|
import fsp from "fs/promises";
|
|
6
6
|
import path from "path";
|
|
7
|
-
import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER,
|
|
7
|
+
import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatAgendasGrouped } from "../loaders";
|
|
8
8
|
import { getSessionsFromStart } from "../types/sessions";
|
|
9
9
|
import { commonOptions } from "./shared/cli_helpers";
|
|
10
|
-
import { formatYYYYMMDD, makeReunionUid } from "../utils/reunion_grouping";
|
|
11
10
|
// ===================== Constants =====================
|
|
12
|
-
const MATCH_THRESHOLD = 0.
|
|
11
|
+
const MATCH_THRESHOLD = 0.6;
|
|
13
12
|
const MAX_CANDIDATES = 15;
|
|
14
13
|
const MAX_PAGES = 3;
|
|
15
14
|
const STATS = { total: 0, accepted: 0 };
|
|
@@ -18,9 +17,7 @@ const SENAT_VIDEOS_SEARCH_AJAX = "https://videos.senat.fr/senat_videos_search.ph
|
|
|
18
17
|
const SENAT_DATAS_ROOT = "https://videos.senat.fr/Datas/senat";
|
|
19
18
|
const SENAT_VOD_HOST = "https://vodsenat.akamaized.net";
|
|
20
19
|
// ===================== CLI =====================
|
|
21
|
-
const optionsDefinitions = [
|
|
22
|
-
...commonOptions,
|
|
23
|
-
];
|
|
20
|
+
const optionsDefinitions = [...commonOptions];
|
|
24
21
|
const options = commandLineArgs(optionsDefinitions);
|
|
25
22
|
// ===================== Utils =====================
|
|
26
23
|
function normalize(s) {
|
|
@@ -32,7 +29,9 @@ function normalize(s) {
|
|
|
32
29
|
.replace(/\s+/g, " ")
|
|
33
30
|
.trim();
|
|
34
31
|
}
|
|
35
|
-
function tokens(s) {
|
|
32
|
+
function tokens(s) {
|
|
33
|
+
return normalize(s).split(" ").filter(Boolean);
|
|
34
|
+
}
|
|
36
35
|
function dice(a, b) {
|
|
37
36
|
const A = new Set(tokens(a)), B = new Set(tokens(b));
|
|
38
37
|
if (!A.size || !B.size)
|
|
@@ -46,7 +45,7 @@ function dice(a, b) {
|
|
|
46
45
|
// Heuristic for Europe/Paris DST: +02:00 ≈ April→October, +01:00 otherwise.
|
|
47
46
|
function parisOffsetForDate(dateYYYYMMDD) {
|
|
48
47
|
const m = Number(dateYYYYMMDD.split("-")[1] || "1");
|
|
49
|
-
return
|
|
48
|
+
return m >= 4 && m <= 10 ? "+02:00" : "+01:00";
|
|
50
49
|
}
|
|
51
50
|
function epochToParisDateTime(epochSec) {
|
|
52
51
|
if (!Number.isFinite(epochSec))
|
|
@@ -54,7 +53,7 @@ function epochToParisDateTime(epochSec) {
|
|
|
54
53
|
const dUtc = new Date(epochSec * 1000);
|
|
55
54
|
// Offset heuristic (same logique que parisOffsetForDate)
|
|
56
55
|
const m = dUtc.getUTCMonth() + 1; // 1..12
|
|
57
|
-
const offsetHours =
|
|
56
|
+
const offsetHours = m >= 4 && m <= 10 ? 2 : 1;
|
|
58
57
|
const offsetStr = offsetHours === 2 ? "+02:00" : "+01:00";
|
|
59
58
|
// Applique l'offset pour obtenir la date/heure locales Paris
|
|
60
59
|
const localMs = dUtc.getTime() + offsetHours * 3600 * 1000;
|
|
@@ -149,7 +148,7 @@ function extractCandidatesFromSearchHtml(html) {
|
|
|
149
148
|
out.push({ id, hash, pageUrl, title: t?.[1] });
|
|
150
149
|
}
|
|
151
150
|
const seen = new Set();
|
|
152
|
-
return out.filter(c => {
|
|
151
|
+
return out.filter((c) => {
|
|
153
152
|
const k = `${c.id}_${c.hash}`;
|
|
154
153
|
if (seen.has(k))
|
|
155
154
|
return false;
|
|
@@ -162,46 +161,68 @@ function parseDataNvs(nvs) {
|
|
|
162
161
|
const title = nvs.match(/<metadata\s+name="title"\s+value="([^"]+)"/i)?.[1];
|
|
163
162
|
return { epoch: epoch ? Number(epoch) : undefined, title };
|
|
164
163
|
}
|
|
165
|
-
|
|
166
|
-
|
|
164
|
+
// nvsText = contenu texte de data.nvs (utf-8)
|
|
165
|
+
// finalText = contenu texte de finalplayer.nvs (utf-8)
|
|
166
|
+
export function buildSenatVodMasterM3u8FromNvs(nvsText, finalText) {
|
|
167
|
+
// 1) Base Akamai depuis data.nvs (mp4 "serverfiles://senat/YYYY/MM/encoderX_YYYYMMDDHHMMSS_1.mp4")
|
|
168
|
+
const baseMatch = nvsText.match(/serverfiles:\/\/senat\/(\d{4})\/(\d{2})\/(encoder\d)_(\d{14})/i);
|
|
169
|
+
if (!baseMatch)
|
|
167
170
|
return null;
|
|
168
|
-
|
|
169
|
-
const
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
171
|
+
const [, yyyy, mm, encoder, stamp] = baseMatch;
|
|
172
|
+
const base = `https://vodsenat.akamaized.net/senat/${yyyy}/${mm}/${encoder}_${stamp}`;
|
|
173
|
+
// 2) start/end depuis finalplayer.nvs
|
|
174
|
+
let start = null, end = null;
|
|
175
|
+
const playerAttr = finalText.match(/player[^>]*\bstarttime="(\d+)"[^>]*\bendtime="(\d+)"/i);
|
|
176
|
+
if (playerAttr) {
|
|
177
|
+
start = parseInt(playerAttr[1], 10);
|
|
178
|
+
end = parseInt(playerAttr[2], 10);
|
|
179
|
+
}
|
|
180
|
+
else {
|
|
181
|
+
// fallback: prendre le plus petit timecode des <synchro timecode="...">
|
|
182
|
+
const tc = Array.from(finalText.matchAll(/timecode="(\d+)"/g)).map((m) => parseInt(m[1], 10));
|
|
183
|
+
if (tc.length)
|
|
184
|
+
start = Math.min(...tc);
|
|
185
|
+
}
|
|
186
|
+
// 3) si pas d'end, on peut déduire via "duree" (en secondes) de data.nvs
|
|
187
|
+
if (end == null) {
|
|
188
|
+
const durMeta = nvsText.match(/<metadata[^>]*\bname="duree"[^>]*\bvalue="(\d+)"[^>]*>/i);
|
|
189
|
+
if (durMeta && start != null) {
|
|
190
|
+
const durMs = parseInt(durMeta[1], 10) * 1000; // sec → ms
|
|
191
|
+
end = start + durMs;
|
|
192
|
+
}
|
|
177
193
|
}
|
|
178
|
-
//
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
194
|
+
// 4) Construction de l’URL
|
|
195
|
+
// - si on a start & end → utiliser ps/pd (robuste et conforme à ce que sert le Sénat)
|
|
196
|
+
// - sinon fallback sans suffixe (souvent valide aussi)
|
|
197
|
+
if (start != null && end != null && end > start) {
|
|
198
|
+
const pd = end - start;
|
|
199
|
+
return `${base}_ps${start}_pd${pd}.smil/master.m3u8`;
|
|
183
200
|
}
|
|
184
|
-
|
|
185
|
-
const mAny = xml.match(/https?:\/\/[^"'<>]+\.m3u8/i);
|
|
186
|
-
return mAny ? mAny[0] : null;
|
|
201
|
+
return `${base}.smil/master.m3u8`;
|
|
187
202
|
}
|
|
188
|
-
function score(agenda, agendaTs, videoTitle, videoEpoch) {
|
|
203
|
+
export function score(agenda, agendaTs, videoTitle, videoEpoch) {
|
|
189
204
|
const titleScore = dice(agenda.titre || "", videoTitle || "");
|
|
190
205
|
let timeScore = 0;
|
|
191
206
|
if (agendaTs && videoEpoch) {
|
|
207
|
+
// second
|
|
192
208
|
const deltaMin = Math.abs(videoEpoch - agendaTs) / 60;
|
|
193
|
-
|
|
209
|
+
// delta : 180min
|
|
210
|
+
timeScore = Math.max(0, 1 - deltaMin / 180);
|
|
194
211
|
}
|
|
195
212
|
let orgBonus = 0;
|
|
196
213
|
if (agenda.organe && videoTitle) {
|
|
197
214
|
const o = normalize(agenda.organe);
|
|
198
215
|
const t = normalize(videoTitle);
|
|
199
|
-
|
|
216
|
+
const first = o.split(" ").filter(Boolean)[0];
|
|
217
|
+
if (first && t.includes(first))
|
|
200
218
|
orgBonus = 0.15;
|
|
201
219
|
}
|
|
202
|
-
return 0.3 * titleScore + 0.7 * timeScore + orgBonus;
|
|
220
|
+
return 0.3 * titleScore + 0.7 * timeScore + orgBonus; // Can be adjusted
|
|
203
221
|
}
|
|
204
|
-
|
|
222
|
+
/**
|
|
223
|
+
* Build search strategies for senat's videos
|
|
224
|
+
*/
|
|
225
|
+
export function buildSearchStrategies(agenda) {
|
|
205
226
|
const fr = agenda.date ? toFRDate(agenda.date) : undefined;
|
|
206
227
|
const kw = simplifyTitleForKeywords(agenda.titre || "");
|
|
207
228
|
const commission = agenda.organe || undefined;
|
|
@@ -239,21 +260,23 @@ async function fetchAllSearchPages(args, baseDir, strategyIndex, maxPages = MAX_
|
|
|
239
260
|
}
|
|
240
261
|
return pages;
|
|
241
262
|
}
|
|
242
|
-
async function
|
|
263
|
+
async function processGroupedReunion(agenda, session, dataDir) {
|
|
243
264
|
if (!agenda)
|
|
244
265
|
return;
|
|
266
|
+
// 1) Garde-fous
|
|
245
267
|
if (!agenda.captationVideo) {
|
|
246
268
|
if (!options["silent"])
|
|
247
|
-
console.log(`[skip] ${agenda.
|
|
269
|
+
console.log(`[skip] ${agenda.uid} captationVideo=false`);
|
|
248
270
|
return;
|
|
249
271
|
}
|
|
250
272
|
if (!agenda.date || !agenda.startTime) {
|
|
251
273
|
if (!options["silent"])
|
|
252
|
-
console.log(`[skip] ${agenda.
|
|
274
|
+
console.log(`[skip] ${agenda.uid} date/hour missing`);
|
|
253
275
|
return;
|
|
254
276
|
}
|
|
255
277
|
STATS.total++;
|
|
256
|
-
|
|
278
|
+
// 2) Dossier de sortie (utilise directement l'UID)
|
|
279
|
+
const reunionUid = agenda.uid;
|
|
257
280
|
const baseDir = path.join(dataDir, VIDEOS_ROOT_FOLDER, String(session), reunionUid);
|
|
258
281
|
await fs.ensureDir(baseDir);
|
|
259
282
|
const agendaTs = toTargetEpoch(agenda.date, agenda.startTime);
|
|
@@ -276,8 +299,9 @@ async function processAgenda(agenda, session, dataDir) {
|
|
|
276
299
|
}
|
|
277
300
|
}
|
|
278
301
|
if (usedStrategy === -1 || !candidates.length) {
|
|
279
|
-
if (!options["silent"])
|
|
280
|
-
console.log(`[miss] ${agenda.
|
|
302
|
+
if (!options["silent"]) {
|
|
303
|
+
console.log(`[miss] ${agenda.uid} no candidates (triedStrategies=${strategies.length})`);
|
|
304
|
+
}
|
|
281
305
|
return;
|
|
282
306
|
}
|
|
283
307
|
// ==== 2) Enrich via data.nvs + scoring; pick best ====
|
|
@@ -295,14 +319,14 @@ async function processAgenda(agenda, session, dataDir) {
|
|
|
295
319
|
}
|
|
296
320
|
if (!best) {
|
|
297
321
|
if (!options["silent"])
|
|
298
|
-
console.log(`[miss] ${agenda.
|
|
322
|
+
console.log(`[miss] ${agenda.uid} candidates without data.nvs`);
|
|
299
323
|
return;
|
|
300
324
|
}
|
|
301
325
|
const accepted = best.score >= MATCH_THRESHOLD;
|
|
302
326
|
if (accepted)
|
|
303
327
|
STATS.accepted++;
|
|
304
328
|
if (!options["silent"]) {
|
|
305
|
-
console.log(`[pick] ${agenda.
|
|
329
|
+
console.log(`[pick] ${agenda.uid} best id=${best.id} hash=${best.hash} score=${best.score.toFixed(2)} accepted=${accepted} (strategy=${usedStrategy})`);
|
|
306
330
|
}
|
|
307
331
|
// ==== 3) Write metadata + NVS of the best candidate (always) ====
|
|
308
332
|
const bestDt = best?.epoch ? epochToParisDateTime(best.epoch) : null;
|
|
@@ -317,7 +341,7 @@ async function processAgenda(agenda, session, dataDir) {
|
|
|
317
341
|
startTime: agenda.startTime,
|
|
318
342
|
titre: agenda.titre,
|
|
319
343
|
organe: agenda.organe ?? undefined,
|
|
320
|
-
|
|
344
|
+
uid: agenda.uid,
|
|
321
345
|
},
|
|
322
346
|
best: {
|
|
323
347
|
id: best.id,
|
|
@@ -340,37 +364,30 @@ async function processAgenda(agenda, session, dataDir) {
|
|
|
340
364
|
if (finalTxt)
|
|
341
365
|
await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
|
|
342
366
|
let master = null;
|
|
343
|
-
if (dataTxt)
|
|
344
|
-
master = buildSenatVodMasterM3u8FromNvs(dataTxt);
|
|
367
|
+
if (dataTxt && finalTxt)
|
|
368
|
+
master = buildSenatVodMasterM3u8FromNvs(dataTxt, finalTxt);
|
|
345
369
|
// ==== 4) Update agenda file (only if accepted + m3u8) ====
|
|
346
370
|
if (accepted && master) {
|
|
347
|
-
const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${
|
|
371
|
+
const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
|
|
348
372
|
if (await fs.pathExists(agendaJsonPath)) {
|
|
349
373
|
const raw = await fsp.readFile(agendaJsonPath, "utf-8");
|
|
350
|
-
let
|
|
374
|
+
let obj;
|
|
351
375
|
try {
|
|
352
|
-
|
|
376
|
+
obj = JSON.parse(raw);
|
|
353
377
|
}
|
|
354
378
|
catch (e) {
|
|
355
379
|
console.warn(`[warn] invalid JSON in ${agendaJsonPath}:`, e?.message);
|
|
356
|
-
|
|
380
|
+
obj = null;
|
|
357
381
|
}
|
|
358
|
-
if (Array.isArray(
|
|
359
|
-
const
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
else {
|
|
364
|
-
// add/update urlVideo on the matching item
|
|
365
|
-
items[idx] = { ...items[idx], urlVideo: master };
|
|
366
|
-
await writeIfChanged(agendaJsonPath, JSON.stringify(items, null, 2));
|
|
367
|
-
if (!options["silent"]) {
|
|
368
|
-
console.log(`[write] ${agenda.id} urlVideo ← ${master}`);
|
|
369
|
-
}
|
|
382
|
+
if (obj && typeof obj === "object" && !Array.isArray(obj)) {
|
|
383
|
+
const next = { ...obj, urlVideo: master };
|
|
384
|
+
await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
|
|
385
|
+
if (!options["silent"]) {
|
|
386
|
+
console.log(`[write] ${agenda.uid} urlVideo ← ${master}`);
|
|
370
387
|
}
|
|
371
388
|
}
|
|
372
389
|
else {
|
|
373
|
-
console.warn(`[warn] expected an
|
|
390
|
+
console.warn(`[warn] expected an object in ${agendaJsonPath}, got ${Array.isArray(obj) ? "array" : typeof obj}`);
|
|
374
391
|
}
|
|
375
392
|
}
|
|
376
393
|
else {
|
|
@@ -379,15 +396,14 @@ async function processAgenda(agenda, session, dataDir) {
|
|
|
379
396
|
}
|
|
380
397
|
}
|
|
381
398
|
async function processAll(dataDir, sessions) {
|
|
399
|
+
console.log("Process all Agendas and fetch video's url");
|
|
382
400
|
for (const session of sessions) {
|
|
383
|
-
for (const { item:
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
console.error(`[error] ${agenda.id}:`, e?.message || e);
|
|
390
|
-
}
|
|
401
|
+
for (const { item: agenda } of iterLoadSenatAgendasGrouped(dataDir, session)) {
|
|
402
|
+
try {
|
|
403
|
+
await processGroupedReunion(agenda, session, dataDir);
|
|
404
|
+
}
|
|
405
|
+
catch (e) {
|
|
406
|
+
console.error(`[error] ${agenda?.uid ?? "unknown-uid"}:`, e?.message || e);
|
|
391
407
|
}
|
|
392
408
|
}
|
|
393
409
|
}
|
|
@@ -396,17 +412,16 @@ async function main() {
|
|
|
396
412
|
const dataDir = options["dataDir"];
|
|
397
413
|
assert(dataDir, "Missing argument: data directory");
|
|
398
414
|
const sessions = getSessionsFromStart(options["fromSession"]);
|
|
399
|
-
|
|
400
|
-
console.time("senat-agendas→videos start processing time");
|
|
415
|
+
console.time("senat-agendas→videos start processing time");
|
|
401
416
|
await processAll(dataDir, sessions);
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
const ratio = total ? (accepted / total * 100).toFixed(1) : "0.0";
|
|
407
|
-
console.log(`[summary] accepted=${accepted} / total=${total} (${ratio}%)`);
|
|
408
|
-
}
|
|
417
|
+
console.timeEnd("senat-agendas→videos processing time");
|
|
418
|
+
const { total, accepted } = STATS;
|
|
419
|
+
const ratio = total ? ((accepted / total) * 100).toFixed(1) : "0.0";
|
|
420
|
+
console.log(`[summary] accepted=${accepted} / total=${total} (${ratio}%)`);
|
|
409
421
|
}
|
|
410
422
|
main()
|
|
411
423
|
.then(() => process.exit(0))
|
|
412
|
-
.catch((err) => {
|
|
424
|
+
.catch((err) => {
|
|
425
|
+
console.error(err);
|
|
426
|
+
process.exit(1);
|
|
427
|
+
});
|
package/lib/utils/cr_spliting.js
CHANGED
|
@@ -337,6 +337,7 @@ export async function linkCRtoCommissionGroup(opts) {
|
|
|
337
337
|
console.warn(`[AGENDA][COM] Unreadable JSON → ${filePath} (${e?.message}) → will recreate`);
|
|
338
338
|
}
|
|
339
339
|
if (!group) {
|
|
340
|
+
// FIX : fix the way groups are found because it creates doublons
|
|
340
341
|
// group = {
|
|
341
342
|
// uid,
|
|
342
343
|
// chambre: "SN",
|
package/package.json
CHANGED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import { CompteRendu, Sommaire } from "../types/compte_rendu";
|
|
2
|
-
import { TimeSlot } from "../types/agenda";
|
|
3
|
-
export declare function parseCompteRenduSlotFromFile(xmlFilePath: string, wantedSlot: TimeSlot, firstSlotOfDay?: TimeSlot): Promise<CompteRendu | null>;
|
|
4
|
-
export declare function sessionStartYearFromDate(d: Date): number;
|
|
5
|
-
export declare function parseYYYYMMDD(yyyymmdd: string): Date | null;
|
|
6
|
-
export declare function deriveTitreObjetFromSommaire(sommaire: Sommaire | undefined, slot: TimeSlot): {
|
|
7
|
-
titre: string;
|
|
8
|
-
objet: string;
|
|
9
|
-
};
|