@tricoteuses/senat 2.22.13 → 2.22.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/lib/src/loaders.d.ts +2 -8
  2. package/lib/src/loaders.js +7 -25
  3. package/lib/tests/test_iter_load.test.js +17 -0
  4. package/package.json +2 -2
  5. package/lib/config.d.ts +0 -21
  6. package/lib/config.js +0 -27
  7. package/lib/databases.d.ts +0 -2
  8. package/lib/databases.js +0 -26
  9. package/lib/datasets.d.ts +0 -34
  10. package/lib/datasets.js +0 -233
  11. package/lib/git.d.ts +0 -26
  12. package/lib/git.js +0 -167
  13. package/lib/index.d.ts +0 -13
  14. package/lib/index.js +0 -1
  15. package/lib/loaders.d.ts +0 -58
  16. package/lib/loaders.js +0 -286
  17. package/lib/model/agenda.d.ts +0 -6
  18. package/lib/model/agenda.js +0 -148
  19. package/lib/model/ameli.d.ts +0 -51
  20. package/lib/model/ameli.js +0 -147
  21. package/lib/model/commission.d.ts +0 -18
  22. package/lib/model/commission.js +0 -269
  23. package/lib/model/debats.d.ts +0 -67
  24. package/lib/model/debats.js +0 -95
  25. package/lib/model/documents.d.ts +0 -12
  26. package/lib/model/documents.js +0 -138
  27. package/lib/model/dosleg.d.ts +0 -7
  28. package/lib/model/dosleg.js +0 -326
  29. package/lib/model/index.d.ts +0 -7
  30. package/lib/model/index.js +0 -7
  31. package/lib/model/questions.d.ts +0 -45
  32. package/lib/model/questions.js +0 -89
  33. package/lib/model/scrutins.d.ts +0 -13
  34. package/lib/model/scrutins.js +0 -114
  35. package/lib/model/seance.d.ts +0 -3
  36. package/lib/model/seance.js +0 -267
  37. package/lib/model/sens.d.ts +0 -146
  38. package/lib/model/sens.js +0 -454
  39. package/lib/model/texte.d.ts +0 -7
  40. package/lib/model/texte.js +0 -228
  41. package/lib/model/util.d.ts +0 -9
  42. package/lib/model/util.js +0 -38
  43. package/lib/parsers/texte.d.ts +0 -7
  44. package/lib/parsers/texte.js +0 -228
  45. package/lib/raw_types/ameli.d.ts +0 -914
  46. package/lib/raw_types/ameli.js +0 -5
  47. package/lib/raw_types/debats.d.ts +0 -207
  48. package/lib/raw_types/debats.js +0 -5
  49. package/lib/raw_types/dosleg.d.ts +0 -1619
  50. package/lib/raw_types/dosleg.js +0 -5
  51. package/lib/raw_types/questions.d.ts +0 -423
  52. package/lib/raw_types/questions.js +0 -5
  53. package/lib/raw_types/senat.d.ts +0 -11372
  54. package/lib/raw_types/senat.js +0 -5
  55. package/lib/raw_types/sens.d.ts +0 -8248
  56. package/lib/raw_types/sens.js +0 -5
  57. package/lib/raw_types_schemats/ameli.d.ts +0 -539
  58. package/lib/raw_types_schemats/ameli.js +0 -2
  59. package/lib/raw_types_schemats/debats.d.ts +0 -127
  60. package/lib/raw_types_schemats/debats.js +0 -2
  61. package/lib/raw_types_schemats/dosleg.d.ts +0 -977
  62. package/lib/raw_types_schemats/dosleg.js +0 -2
  63. package/lib/raw_types_schemats/questions.d.ts +0 -237
  64. package/lib/raw_types_schemats/questions.js +0 -2
  65. package/lib/raw_types_schemats/sens.d.ts +0 -6915
  66. package/lib/raw_types_schemats/sens.js +0 -2
  67. package/lib/scripts/convert_data.js +0 -354
  68. package/lib/scripts/data-download.d.ts +0 -1
  69. package/lib/scripts/data-download.js +0 -12
  70. package/lib/scripts/datautil.d.ts +0 -8
  71. package/lib/scripts/datautil.js +0 -34
  72. package/lib/scripts/parse_textes.d.ts +0 -1
  73. package/lib/scripts/parse_textes.js +0 -44
  74. package/lib/scripts/retrieve_agenda.d.ts +0 -1
  75. package/lib/scripts/retrieve_agenda.js +0 -132
  76. package/lib/scripts/retrieve_cr_commission.d.ts +0 -1
  77. package/lib/scripts/retrieve_cr_commission.js +0 -364
  78. package/lib/scripts/retrieve_cr_seance.d.ts +0 -6
  79. package/lib/scripts/retrieve_cr_seance.js +0 -347
  80. package/lib/scripts/retrieve_documents.d.ts +0 -3
  81. package/lib/scripts/retrieve_documents.js +0 -219
  82. package/lib/scripts/retrieve_open_data.d.ts +0 -1
  83. package/lib/scripts/retrieve_open_data.js +0 -316
  84. package/lib/scripts/retrieve_senateurs_photos.d.ts +0 -1
  85. package/lib/scripts/retrieve_senateurs_photos.js +0 -147
  86. package/lib/scripts/retrieve_videos.d.ts +0 -1
  87. package/lib/scripts/retrieve_videos.js +0 -461
  88. package/lib/scripts/shared/cli_helpers.d.ts +0 -95
  89. package/lib/scripts/shared/cli_helpers.js +0 -91
  90. package/lib/scripts/shared/util.d.ts +0 -4
  91. package/lib/scripts/shared/util.js +0 -35
  92. package/lib/scripts/test_iter_load.d.ts +0 -1
  93. package/lib/scripts/test_iter_load.js +0 -12
  94. package/lib/src/utils/nvs-timecode.d.ts +0 -17
  95. package/lib/src/utils/nvs-timecode.js +0 -79
  96. package/lib/src/utils/weights_scoring_config.d.ts +0 -2
  97. package/lib/src/utils/weights_scoring_config.js +0 -15
  98. package/lib/strings.d.ts +0 -1
  99. package/lib/strings.js +0 -18
  100. package/lib/types/agenda.d.ts +0 -44
  101. package/lib/types/agenda.js +0 -1
  102. package/lib/types/ameli.d.ts +0 -5
  103. package/lib/types/ameli.js +0 -1
  104. package/lib/types/compte_rendu.d.ts +0 -83
  105. package/lib/types/compte_rendu.js +0 -1
  106. package/lib/types/debats.d.ts +0 -2
  107. package/lib/types/debats.js +0 -1
  108. package/lib/types/dosleg.d.ts +0 -70
  109. package/lib/types/dosleg.js +0 -1
  110. package/lib/types/questions.d.ts +0 -2
  111. package/lib/types/questions.js +0 -1
  112. package/lib/types/sens.d.ts +0 -10
  113. package/lib/types/sens.js +0 -1
  114. package/lib/types/sessions.d.ts +0 -5
  115. package/lib/types/sessions.js +0 -84
  116. package/lib/types/texte.d.ts +0 -74
  117. package/lib/types/texte.js +0 -16
  118. package/lib/utils/cr_spliting.d.ts +0 -28
  119. package/lib/utils/cr_spliting.js +0 -265
  120. package/lib/utils/date.d.ts +0 -10
  121. package/lib/utils/date.js +0 -100
  122. package/lib/utils/nvs-timecode.d.ts +0 -7
  123. package/lib/utils/nvs-timecode.js +0 -79
  124. package/lib/utils/reunion_grouping.d.ts +0 -11
  125. package/lib/utils/reunion_grouping.js +0 -337
  126. package/lib/utils/reunion_odj_building.d.ts +0 -5
  127. package/lib/utils/reunion_odj_building.js +0 -154
  128. package/lib/utils/reunion_parsing.d.ts +0 -23
  129. package/lib/utils/reunion_parsing.js +0 -209
  130. package/lib/utils/scoring.d.ts +0 -14
  131. package/lib/utils/scoring.js +0 -147
  132. package/lib/utils/string_cleaning.d.ts +0 -7
  133. package/lib/utils/string_cleaning.js +0 -57
  134. package/lib/validators/config.d.ts +0 -9
  135. package/lib/validators/config.js +0 -10
  136. /package/lib/{scripts/convert_data.d.ts → tests/test_iter_load.test.d.ts} +0 -0
@@ -1,461 +0,0 @@
1
- // scripts/retrieve_senat_videos_from_agendas.ts
2
- import assert from "assert";
3
- import commandLineArgs from "command-line-args";
4
- import fs from "fs-extra";
5
- import fsp from "fs/promises";
6
- import path from "path";
7
- import * as cheerio from "cheerio";
8
- import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatAgendas } from "../loaders";
9
- import { getSessionsFromStart } from "../types/sessions";
10
- import { commonOptions } from "./shared/cli_helpers";
11
- import { getAgendaSegmentTimecodes } from "../utils/nvs-timecode";
12
- import { decodeHtmlEntities } from "../utils/string_cleaning";
13
- import { dice, normalize, scoreVideo } from "../utils/scoring";
14
- import { epochToParisDateTime, toFRDate, toTargetEpoch } from "../utils/date";
15
- // ===================== Constants =====================
16
- const MATCH_THRESHOLD = 0.5;
17
- const MAX_CANDIDATES = 15;
18
- const STATS = { total: 0, accepted: 0 };
19
- const VIDEOS_ROOT_FOLDER = "videos";
20
- const SENAT_VIDEOS_SEARCH_AJAX = "https://videos.senat.fr/senat_videos_search.php";
21
- const SENAT_DATAS_ROOT = "https://videos.senat.fr/Datas/senat";
22
- // ===================== CLI =====================
23
- const optionsDefinitions = [...commonOptions];
24
- const options = commandLineArgs(optionsDefinitions);
25
- // ===================== Utils =====================
26
- async function fetchText(url) {
27
- const res = await fetch(url);
28
- if (!res.ok)
29
- return null;
30
- return await res.text();
31
- }
32
- async function fetchBuffer(url) {
33
- const res = await fetch(url);
34
- if (!res.ok)
35
- return null;
36
- const ab = await res.arrayBuffer();
37
- return Buffer.from(ab);
38
- }
39
- async function writeIfChanged(p, content) {
40
- const exists = await fs.pathExists(p);
41
- if (exists) {
42
- const old = await fsp.readFile(p, "utf-8");
43
- if (old === content)
44
- return;
45
- }
46
- await fsp.writeFile(p, content, "utf-8");
47
- }
48
- function queryString(obj) {
49
- return Object.entries(obj)
50
- .map(([k, v]) => `${encodeURIComponent(k)}=${encodeURIComponent(v)}`)
51
- .join("&");
52
- }
53
- function extractCandidatesFromSearchHtml(html) {
54
- const $ = cheerio.load(html);
55
- const out = [];
56
- const re = /video\.(\d+)_([a-z0-9]+)/i;
57
- $('h3.card-title a.stretched-link[href*="video."]').each((_, a) => {
58
- const href = $(a).attr("href") || "";
59
- const m = href.match(re);
60
- if (!m)
61
- return;
62
- const id = m[1];
63
- const hash = m[2];
64
- const pageUrl = `https://videos.senat.fr/video.${id}_${hash}.html`;
65
- const title = ($(a).attr("title") || $(a).text() || "").replace(/\s+/g, " ").trim() || undefined;
66
- const isSeancePublique = title?.toLowerCase().includes("séance publique") ?? false;
67
- out.push({ id, hash, pageUrl, title, isSeancePublique });
68
- });
69
- // dedupe
70
- const seen = new Set();
71
- return out.filter((c) => {
72
- const k = `${c.id}_${c.hash}`;
73
- if (seen.has(k))
74
- return false;
75
- seen.add(k);
76
- return true;
77
- });
78
- }
79
- function parseDataNvs(nvs) {
80
- const epochStr = nvs.match(/<metadata\s+name="date"\s+value="(\d+)"/i)?.[1];
81
- const epoch = epochStr ? Number(epochStr) : undefined;
82
- // There can be multiple organes for one video in meta
83
- const organes = [];
84
- const organesRegex = /<metadata\b[^>]*\bname="organes"[^>]*>/gi;
85
- let m;
86
- while ((m = organesRegex.exec(nvs)) !== null) {
87
- const tag = m[0];
88
- const label = tag.match(/\blabel="([^"]+)"/i)?.[1];
89
- if (label) {
90
- const decoded = decodeHtmlEntities(label).trim();
91
- if (decoded)
92
- organes.push(decoded);
93
- }
94
- }
95
- if (organes.length === 0) {
96
- organes.push("Séance publique");
97
- }
98
- const firstChapterLabelMatch = nvs.match(/<chapter\b[^>]*\blabel="([^"]+)"/i);
99
- const firstChapterLabel = firstChapterLabelMatch ? decodeHtmlEntities(firstChapterLabelMatch[1]).trim() : undefined;
100
- return { epoch, organes, firstChapterLabel };
101
- }
102
- function buildSenatVodMasterM3u8FromNvs(nvsText) {
103
- // serverfiles://senat/2025/10/encoder10_20251022084451_2.mp4
104
- const m = nvsText.match(/serverfiles:\/\/senat\/(\d{4})\/(\d{2})\/(encoder\d+)_([0-9]{14})/i);
105
- if (!m)
106
- return null;
107
- const [, yyyy, mm, encoder, stamp] = m;
108
- const base = `https://vodsenat.akamaized.net/senat/${yyyy}/${mm}/${encoder}_${stamp}`;
109
- return `${base}.smil/master.m3u8`;
110
- }
111
- function isAmbiguousTimeOriginal(timeOriginal) {
112
- if (!timeOriginal)
113
- return false;
114
- const s = timeOriginal.toLowerCase();
115
- // Catches "14h", "14 h", "14h30", "14 h 30", "14 heures", "14 heure"
116
- const timeRe = /\b([01]?\d|2[0-3])\s*(?:h|heures?|heure)\s*(?:([0-5]\d))?\b/g;
117
- const times = new Set();
118
- let m;
119
- while ((m = timeRe.exec(s))) {
120
- const hh = String(m[1]).padStart(2, "0");
121
- const mm = m[2] ? String(m[2]).padStart(2, "0") : "00";
122
- times.add(`${hh}:${mm}`);
123
- }
124
- // "midi" / "minuit"
125
- if (/\bmidi\b/.test(s))
126
- times.add("12:00");
127
- if (/\bminuit\b/.test(s))
128
- times.add("00:00");
129
- if (times.size >= 2)
130
- return true;
131
- const hasDayPeriod = /\b(matin|après-?midi|soir|nuit|journée|toute la journée)\b/.test(s);
132
- const hasLinking = /,|\bet\b|\bou\b|\bpuis\b/.test(s);
133
- if (times.size === 1 && hasDayPeriod && hasLinking)
134
- return true;
135
- return false;
136
- }
137
- function getAgendaType(agenda) {
138
- const o = agenda.organe || "";
139
- if (/séance publique/i.test(o))
140
- return "Séance publique";
141
- return "Commission";
142
- }
143
- async function fetchAllSearchPages(args, maxPages = 3) {
144
- const pages = [];
145
- for (let p = 1; p <= maxPages; p++) {
146
- const url = `${SENAT_VIDEOS_SEARCH_AJAX}?${queryString({ ...args, page: String(p) })}`;
147
- const html = await fetchText(url);
148
- if (!html)
149
- break;
150
- pages.push(html);
151
- if (!/href="\/?video\.\d+_[a-z0-9]+\./i.test(html))
152
- break;
153
- }
154
- return pages;
155
- }
156
- function getOrgKey(norm) {
157
- if (!norm)
158
- return "autre";
159
- if (norm.includes("seance publique"))
160
- return "seance_publique";
161
- if (norm.includes("culture"))
162
- return "culture";
163
- if (norm.includes("finances"))
164
- return "finances";
165
- if (norm.includes("sociales"))
166
- return "affaires_sociales";
167
- if (norm.includes("economiques"))
168
- return "affaires_economiques";
169
- if (norm.includes("europeennes"))
170
- return "affaires_europeennes";
171
- if (norm.includes("etrangeres") || norm.includes("forces armees") || norm.includes("defense")) {
172
- return "affaires_etrangeres_defense";
173
- }
174
- if (norm.includes("territoire") || norm.includes("durable")) {
175
- return "amenagement_territoire_dd";
176
- }
177
- if (norm.includes("commission des lois"))
178
- return "lois";
179
- if (norm.includes("delegation aux collectivites territoriales") || norm.includes("delegation a la decentralisation"))
180
- return "delegation_collectivites";
181
- if (norm.includes("delegation aux droits des femmes") ||
182
- norm.includes("egalite des chances entre les hommes et les femmes"))
183
- return "delegation_droits_femmes";
184
- if (norm.includes("delegation aux entreprises"))
185
- return "delegation_entreprises";
186
- if (norm.includes("delegation senatoriale aux outre mer") || norm.includes("delegation aux outre mer"))
187
- return "delegation_outre_mer";
188
- if (norm.includes("delegation a la prospective"))
189
- return "delegation_prospective";
190
- if (norm.includes("office parlementaire d evaluation des choix scientifiques et technologiques") ||
191
- norm.includes("opecst"))
192
- return "opecst";
193
- return "autre";
194
- }
195
- async function processGroupedReunion(agenda, session, dataDir) {
196
- // 1) GuardRails
197
- if (!agenda.captationVideo) {
198
- // if (!options["silent"]) console.log(`[skip] ${agenda.uid} captationVideo=false`)
199
- return;
200
- }
201
- if (!agenda.date || !agenda.startTime) {
202
- // if (!options["silent"]) console.log(`[skip] ${agenda.uid} date/hour missing`)
203
- return;
204
- }
205
- const agendaTs = toTargetEpoch(agenda.startTime, agenda.date);
206
- const now = Date.now();
207
- if (agendaTs && agendaTs * 1000 > now) {
208
- return;
209
- }
210
- const reunionUid = agenda.uid;
211
- const baseDir = path.join(dataDir, VIDEOS_ROOT_FOLDER, String(session), reunionUid);
212
- await fs.ensureDir(baseDir);
213
- let skipDownload = false;
214
- if (options["only-recent"]) {
215
- const now = Date.now();
216
- const cutoff = now - options["only-recent"] * 24 * 3600 * 1000;
217
- const reunionTs = Date.parse(agenda.date);
218
- if (reunionTs < cutoff) {
219
- // Check if files already exist
220
- const dataNvsPath = path.join(baseDir, "data.nvs");
221
- const finalplayerNvsPath = path.join(baseDir, "finalplayer.nvs");
222
- if (fs.existsSync(dataNvsPath) && fs.existsSync(finalplayerNvsPath)) {
223
- skipDownload = true;
224
- }
225
- }
226
- }
227
- let master = null;
228
- let dataTxt = null;
229
- let finalTxt = null;
230
- let accepted = false;
231
- if (!skipDownload) {
232
- STATS.total++;
233
- const searchParams = {
234
- search: "true",
235
- videotype: getAgendaType(agenda),
236
- };
237
- if (agenda.date) {
238
- const fr = toFRDate(agenda.date);
239
- searchParams.period = "custom";
240
- searchParams.begin = fr;
241
- searchParams.end = fr;
242
- }
243
- if (agenda.organe) {
244
- searchParams.organe = agenda.organe;
245
- }
246
- const pages = await fetchAllSearchPages(searchParams);
247
- if (!pages.length) {
248
- if (!options["silent"]) {
249
- console.log(`[miss] ${agenda.uid} no candidates (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
250
- }
251
- return;
252
- }
253
- const combinedHtml = pages.join("\n<!-- PAGE SPLIT -->\n");
254
- const candidates = extractCandidatesFromSearchHtml(combinedHtml).slice(0, MAX_CANDIDATES);
255
- if (!candidates.length) {
256
- if (!options["silent"]) {
257
- console.log(`[miss] ${agenda.uid} no candidates after parse (videotype=${searchParams.videotype}, organe=${searchParams.organe || "-"}, date=${searchParams.begin || "-"})`);
258
- }
259
- return;
260
- }
261
- // ==== 2) Enrich via data.nvs + scoring; pick best ====
262
- let best = null;
263
- const timeAmbigious = isAmbiguousTimeOriginal(agenda.events[0].timeOriginal);
264
- if (timeAmbigious) {
265
- console.log(`[match] ${agenda.uid} timeOriginal ambiguous => ignoring time scoring: "${agenda.events[0].timeOriginal}"`);
266
- }
267
- for (const c of candidates) {
268
- const dataUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`;
269
- const finalUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/finalplayer.nvs`;
270
- const dataBuf = await fetchBuffer(dataUrl);
271
- if (!dataBuf)
272
- continue;
273
- const meta = parseDataNvs(dataBuf.toString("utf-8"));
274
- let sameOrg = false;
275
- // If organes are too different, go to next candidates
276
- if (agenda.organe && meta.organes?.length) {
277
- const agendaOrgNorm = normalize(agenda.organe);
278
- const agendaKey = getOrgKey(agendaOrgNorm);
279
- let bestDice = 0;
280
- let hasSameKey = false;
281
- for (const vo of meta.organes) {
282
- const videoOrgNorm = normalize(vo);
283
- const videoKey = getOrgKey(videoOrgNorm);
284
- const d = dice(agendaOrgNorm, videoOrgNorm);
285
- if (videoKey === agendaKey && videoKey !== "autre") {
286
- hasSameKey = true;
287
- }
288
- if (d > bestDice)
289
- bestDice = d;
290
- }
291
- if (hasSameKey) {
292
- sameOrg = true; // we are sure this is the same org
293
- }
294
- else if (bestDice < 0.8) {
295
- // if diff org and dice too low we skip
296
- continue;
297
- }
298
- }
299
- let videoTitle = c.title;
300
- if (c.isSeancePublique && meta.firstChapterLabel) {
301
- videoTitle = meta.firstChapterLabel;
302
- }
303
- const s = scoreVideo(agenda, agendaTs, sameOrg, videoTitle, meta.epoch, meta.organes, timeAmbigious);
304
- if (!best || s > best.score) {
305
- best = {
306
- id: c.id,
307
- hash: c.hash,
308
- pageUrl: c.pageUrl,
309
- epoch: meta.epoch,
310
- vtitle: videoTitle,
311
- score: s,
312
- vorgane: meta.organes[0],
313
- };
314
- }
315
- }
316
- if (!best) {
317
- if (!options["silent"])
318
- console.log(`[miss] ${agenda.uid} No candidate found for this reunion`);
319
- return;
320
- }
321
- accepted = best.score >= MATCH_THRESHOLD;
322
- if (accepted)
323
- STATS.accepted++;
324
- if (!options["silent"]) {
325
- console.log(`[pick] ${agenda.uid} score=${best.score.toFixed(2)}
326
- agenda title="${agenda.titre ?? ""}" agenda organe="${agenda.organe ?? ""}" agenda heure=${agenda.startTime}
327
- best title="${best.vtitle ?? ""}" best organe="${best.vorgane ?? ""}"
328
- accepted=${accepted}`);
329
- }
330
- // ==== 3) Write metadata + NVS of the best candidate (always) ====
331
- const bestDt = best?.epoch ? epochToParisDateTime(best.epoch) : null;
332
- const metadata = {
333
- reunionUid,
334
- session,
335
- accepted,
336
- threshold: MATCH_THRESHOLD,
337
- agenda: {
338
- date: agenda.date,
339
- startTime: agenda.startTime,
340
- titre: agenda.titre,
341
- organe: agenda.organe ?? undefined,
342
- uid: agenda.uid,
343
- },
344
- best: {
345
- id: best.id,
346
- hash: best.hash,
347
- pageUrl: best.pageUrl,
348
- epoch: best.epoch ?? null,
349
- date: bestDt?.date ?? null,
350
- startTime: bestDt?.startTime ?? null,
351
- title: best.vtitle ?? null,
352
- score: best.score,
353
- },
354
- };
355
- await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
356
- const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
357
- const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
358
- dataTxt = await fetchText(dataUrl);
359
- finalTxt = await fetchText(finalUrl);
360
- if (dataTxt)
361
- await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
362
- if (finalTxt)
363
- await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
364
- if (dataTxt) {
365
- master = buildSenatVodMasterM3u8FromNvs(dataTxt);
366
- }
367
- else {
368
- console.log("Cannot download data nvs");
369
- }
370
- }
371
- else {
372
- // Skipped download, but need to read data.nvs for urlVideo
373
- try {
374
- dataTxt = await fsp.readFile(path.join(baseDir, "data.nvs"), "utf-8");
375
- finalTxt = await fsp.readFile(path.join(baseDir, "finalplayer.nvs"), "utf-8");
376
- master = buildSenatVodMasterM3u8FromNvs(dataTxt);
377
- }
378
- catch (e) {
379
- console.warn(e);
380
- }
381
- }
382
- // ==== 4) Update agenda file (only if accepted + m3u8) ====
383
- if ((accepted || skipDownload) && master) {
384
- const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
385
- let timecodeDebutVideo = null;
386
- let timecodeFinVideo = null;
387
- if (dataTxt && finalTxt) {
388
- const agendaKey = agenda.titre || agenda.objet || "";
389
- const seg = getAgendaSegmentTimecodes(dataTxt, finalTxt, agendaKey);
390
- if (!seg) {
391
- console.warn(`[warn] Cannot retrieve agenda segment timecodes from reunion ${reunionUid}`);
392
- }
393
- else {
394
- timecodeDebutVideo = seg.start;
395
- timecodeFinVideo = seg.end;
396
- }
397
- }
398
- if (await fs.pathExists(agendaJsonPath)) {
399
- const raw = await fsp.readFile(agendaJsonPath, "utf-8");
400
- let obj;
401
- try {
402
- obj = JSON.parse(raw);
403
- }
404
- catch (e) {
405
- console.warn(`[warn] invalid JSON in ${agendaJsonPath}:`, e?.message);
406
- obj = null;
407
- }
408
- if (obj && typeof obj === "object" && !Array.isArray(obj)) {
409
- const next = { ...obj, urlVideo: master };
410
- if (timecodeDebutVideo != null) {
411
- next.timecodeDebutVideo = timecodeDebutVideo;
412
- next.timecodeFinVideo = timecodeFinVideo;
413
- }
414
- await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
415
- if (!options["silent"]) {
416
- console.log(`[write] ${agenda.uid} urlVideo ← ${master}` +
417
- (timecodeDebutVideo != null ? ` (timecodeDebutVideo ← ${timecodeDebutVideo}s)` : ""));
418
- }
419
- }
420
- else {
421
- console.warn(`[warn] expected an object in ${agendaJsonPath}, got ${Array.isArray(obj) ? "array" : typeof obj}`);
422
- }
423
- }
424
- else {
425
- console.warn(`[warn] agenda file not found for update: ${agendaJsonPath}`);
426
- }
427
- }
428
- else {
429
- console.warn(`[warn] The video url could not be built for reunion `, reunionUid);
430
- }
431
- }
432
- async function processAll(dataDir, sessions) {
433
- console.log("Process all Agendas and fetch video's url");
434
- for (const session of sessions) {
435
- for (const { item: agenda } of iterLoadSenatAgendas(dataDir, session)) {
436
- try {
437
- await processGroupedReunion(agenda, session, dataDir);
438
- }
439
- catch (e) {
440
- console.error(`[error] ${agenda?.uid ?? "unknown-uid"}:`, e?.message || e);
441
- }
442
- }
443
- }
444
- }
445
- async function main() {
446
- const dataDir = options["dataDir"];
447
- assert(dataDir, "Missing argument: data directory");
448
- const sessions = getSessionsFromStart(options["fromSession"]);
449
- console.time("senat-agendas→videos start processing time");
450
- await processAll(dataDir, sessions);
451
- console.timeEnd("senat-agendas→videos processing time");
452
- const { total, accepted } = STATS;
453
- const ratio = total ? ((accepted / total) * 100).toFixed(1) : "0.0";
454
- console.log(`[summary] accepted=${accepted} / total=${total} (${ratio}%)`);
455
- }
456
- main()
457
- .then(() => process.exit(0))
458
- .catch((err) => {
459
- console.error(err);
460
- process.exit(1);
461
- });
@@ -1,95 +0,0 @@
1
- export declare const categoriesOption: {
2
- alias: string;
3
- defaultValue: string[];
4
- help: string;
5
- multiple: boolean;
6
- name: string;
7
- type: StringConstructor;
8
- };
9
- export declare const dataDirDefaultOption: {
10
- defaultOption: boolean;
11
- help: string;
12
- name: string;
13
- type: StringConstructor;
14
- };
15
- export declare const fromSessionOption: {
16
- defaultValue: number;
17
- help: string;
18
- name: string;
19
- type: NumberConstructor;
20
- };
21
- export declare const silentOption: {
22
- alias: string;
23
- help: string;
24
- name: string;
25
- type: BooleanConstructor;
26
- };
27
- export declare const verboseOption: {
28
- alias: string;
29
- help: string;
30
- name: string;
31
- type: BooleanConstructor;
32
- };
33
- export declare const onlyRecentOption: {
34
- help: string;
35
- name: string;
36
- type: NumberConstructor;
37
- };
38
- export declare const keepDirOption: {
39
- help: string;
40
- name: string;
41
- type: BooleanConstructor;
42
- };
43
- export declare const cloneOption: {
44
- alias: string;
45
- help: string;
46
- name: string;
47
- type: StringConstructor;
48
- };
49
- export declare const commitOption: {
50
- help: string;
51
- name: string;
52
- type: BooleanConstructor;
53
- };
54
- export declare const remoteOption: {
55
- alias: string;
56
- help: string;
57
- multiple: boolean;
58
- name: string;
59
- type: StringConstructor;
60
- };
61
- export declare const pullOption: {
62
- alias: string;
63
- help: string;
64
- name: string;
65
- type: BooleanConstructor;
66
- };
67
- export declare const fetchDocumentsOption: {
68
- help: string;
69
- name: string;
70
- type: BooleanConstructor;
71
- };
72
- export declare const parseDocumentsOption: {
73
- help: string;
74
- name: string;
75
- type: BooleanConstructor;
76
- };
77
- export declare const commonOptions: ({
78
- defaultOption: boolean;
79
- help: string;
80
- name: string;
81
- type: StringConstructor;
82
- } | {
83
- help: string;
84
- name: string;
85
- type: NumberConstructor;
86
- } | {
87
- help: string;
88
- name: string;
89
- type: BooleanConstructor;
90
- } | {
91
- alias: string;
92
- help: string;
93
- name: string;
94
- type: StringConstructor;
95
- })[];
@@ -1,91 +0,0 @@
1
- export const categoriesOption = {
2
- alias: "k",
3
- defaultValue: ["All"],
4
- help: "categories of datasets to reorganize; default All",
5
- multiple: true,
6
- name: "categories",
7
- type: String,
8
- };
9
- export const dataDirDefaultOption = {
10
- defaultOption: true,
11
- help: "directory containing Sénat open data files",
12
- name: "dataDir",
13
- type: String,
14
- };
15
- export const fromSessionOption = {
16
- defaultValue: 2022,
17
- help: "session year to retrieve data from; default 2022",
18
- name: "fromSession",
19
- type: Number,
20
- };
21
- export const silentOption = {
22
- alias: "s",
23
- help: "don't log anything",
24
- name: "silent",
25
- type: Boolean,
26
- };
27
- export const verboseOption = {
28
- alias: "v",
29
- help: "verbose logs",
30
- name: "verbose",
31
- type: Boolean,
32
- };
33
- export const onlyRecentOption = {
34
- help: "retrieve only documents created within the last N days (that are not already downloaded)",
35
- name: "only-recent",
36
- type: Number,
37
- };
38
- export const keepDirOption = {
39
- help: "keep directories when cleaning data",
40
- name: "keepDir",
41
- type: Boolean,
42
- };
43
- export const cloneOption = {
44
- alias: "C",
45
- help: "clone repositories from given group (or organization) git URL",
46
- name: "clone",
47
- type: String,
48
- };
49
- export const commitOption = {
50
- help: "commit clean files",
51
- name: "commit",
52
- type: Boolean,
53
- };
54
- export const remoteOption = {
55
- alias: "r",
56
- help: "push commit to given remote",
57
- multiple: true,
58
- name: "remote",
59
- type: String,
60
- };
61
- export const pullOption = {
62
- alias: "p",
63
- help: "pull repositories before proceeding",
64
- name: "pull",
65
- type: Boolean,
66
- };
67
- export const fetchDocumentsOption = {
68
- help: "download documents",
69
- name: "fetchDocuments",
70
- type: Boolean,
71
- };
72
- export const parseDocumentsOption = {
73
- help: "parse documents",
74
- name: "parseDocuments",
75
- type: Boolean,
76
- };
77
- export const commonOptions = [
78
- categoriesOption,
79
- dataDirDefaultOption,
80
- fromSessionOption,
81
- silentOption,
82
- verboseOption,
83
- onlyRecentOption,
84
- keepDirOption,
85
- cloneOption,
86
- commitOption,
87
- remoteOption,
88
- pullOption,
89
- fetchDocumentsOption,
90
- parseDocumentsOption,
91
- ];
@@ -1,4 +0,0 @@
1
- export declare function isOptionEmptyOrHasValue(option: string, value: string): boolean;
2
- export declare function ensureAndClearDirSync(dir: string): void;
3
- export declare function ensureAndClearDir(path: string): void;
4
- export declare function fetchWithRetry(url: string, retries?: number, backoff?: number): Promise<Response>;
@@ -1,35 +0,0 @@
1
- import fs from "fs-extra";
2
- import path from "path";
3
- export function isOptionEmptyOrHasValue(option, value) {
4
- return !option || option.length === 0 || option.includes(value);
5
- }
6
- export function ensureAndClearDirSync(dir) {
7
- fs.ensureDirSync(dir);
8
- for (const name of fs.readdirSync(dir)) {
9
- fs.rmSync(path.join(dir, name), { recursive: true, force: true });
10
- }
11
- }
12
- export function ensureAndClearDir(path) {
13
- if (!fs.existsSync(path)) {
14
- fs.mkdirSync(path, { recursive: true });
15
- }
16
- else {
17
- fs.emptyDirSync(path);
18
- }
19
- }
20
- export async function fetchWithRetry(url, retries = 3, backoff = 300) {
21
- for (let attempt = 0; attempt < retries; attempt++) {
22
- try {
23
- return await fetch(url);
24
- }
25
- catch (error) {
26
- if (attempt === retries - 1) {
27
- throw error;
28
- }
29
- console.warn(`Fetch attempt ${attempt + 1} for ${url} failed. Retrying in ${backoff}ms…`);
30
- await new Promise((resolve) => setTimeout(resolve, backoff));
31
- backoff *= 2;
32
- }
33
- }
34
- throw new Error(`Failed to fetch ${url} after ${retries} attempts`);
35
- }
@@ -1 +0,0 @@
1
- export {};