@tricoteuses/senat 2.9.0 → 2.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/LICENSE.md +22 -22
  2. package/README.md +116 -115
  3. package/lib/loaders.d.ts +1 -1
  4. package/lib/model/agenda.js +2 -0
  5. package/lib/scripts/retrieve_videos.js +420 -0
  6. package/lib/types/agenda.d.ts +2 -0
  7. package/package.json +95 -94
  8. package/lib/aggregates.d.ts +0 -52
  9. package/lib/aggregates.js +0 -949
  10. package/lib/aggregates.mjs +0 -726
  11. package/lib/aggregates.ts +0 -852
  12. package/lib/config.mjs +0 -16
  13. package/lib/config.ts +0 -26
  14. package/lib/databases.mjs +0 -55
  15. package/lib/databases.ts +0 -68
  16. package/lib/datasets.mjs +0 -78
  17. package/lib/datasets.ts +0 -118
  18. package/lib/fields.d.ts +0 -10
  19. package/lib/fields.js +0 -68
  20. package/lib/fields.mjs +0 -22
  21. package/lib/fields.ts +0 -29
  22. package/lib/index.mjs +0 -7
  23. package/lib/index.ts +0 -64
  24. package/lib/inserters.d.ts +0 -98
  25. package/lib/inserters.js +0 -500
  26. package/lib/inserters.mjs +0 -360
  27. package/lib/inserters.ts +0 -521
  28. package/lib/legislatures.json +0 -38
  29. package/lib/loaders.mjs +0 -97
  30. package/lib/loaders.ts +0 -173
  31. package/lib/model/ameli.mjs +0 -57
  32. package/lib/model/ameli.ts +0 -86
  33. package/lib/model/debats.mjs +0 -43
  34. package/lib/model/debats.ts +0 -68
  35. package/lib/model/dosleg.mjs +0 -163
  36. package/lib/model/dosleg.ts +0 -204
  37. package/lib/model/index.mjs +0 -4
  38. package/lib/model/index.ts +0 -13
  39. package/lib/model/questions.mjs +0 -76
  40. package/lib/model/questions.ts +0 -102
  41. package/lib/model/sens.mjs +0 -339
  42. package/lib/model/sens.ts +0 -432
  43. package/lib/model/texte.mjs +0 -156
  44. package/lib/model/texte.ts +0 -174
  45. package/lib/raw_types_kysely/ameli.d.ts +0 -915
  46. package/lib/raw_types_kysely/ameli.js +0 -7
  47. package/lib/raw_types_kysely/ameli.mjs +0 -5
  48. package/lib/raw_types_kysely/ameli.ts +0 -951
  49. package/lib/raw_types_kysely/debats.d.ts +0 -207
  50. package/lib/raw_types_kysely/debats.js +0 -7
  51. package/lib/raw_types_kysely/debats.mjs +0 -5
  52. package/lib/raw_types_kysely/debats.ts +0 -222
  53. package/lib/raw_types_kysely/dosleg.d.ts +0 -3532
  54. package/lib/raw_types_kysely/dosleg.js +0 -7
  55. package/lib/raw_types_kysely/dosleg.mjs +0 -5
  56. package/lib/raw_types_kysely/dosleg.ts +0 -3621
  57. package/lib/raw_types_kysely/questions.d.ts +0 -414
  58. package/lib/raw_types_kysely/questions.js +0 -7
  59. package/lib/raw_types_kysely/questions.mjs +0 -5
  60. package/lib/raw_types_kysely/questions.ts +0 -426
  61. package/lib/raw_types_kysely/sens.d.ts +0 -4394
  62. package/lib/raw_types_kysely/sens.js +0 -7
  63. package/lib/raw_types_kysely/sens.mjs +0 -5
  64. package/lib/raw_types_kysely/sens.ts +0 -4499
  65. package/lib/raw_types_schemats/ameli.mjs +0 -2
  66. package/lib/raw_types_schemats/ameli.ts +0 -601
  67. package/lib/raw_types_schemats/debats.mjs +0 -2
  68. package/lib/raw_types_schemats/debats.ts +0 -145
  69. package/lib/raw_types_schemats/dosleg.mjs +0 -2
  70. package/lib/raw_types_schemats/dosleg.ts +0 -2193
  71. package/lib/raw_types_schemats/questions.mjs +0 -2
  72. package/lib/raw_types_schemats/questions.ts +0 -249
  73. package/lib/raw_types_schemats/sens.mjs +0 -2
  74. package/lib/raw_types_schemats/sens.ts +0 -2907
  75. package/lib/scripts/convert_data.mjs +0 -95
  76. package/lib/scripts/convert_data.ts +0 -119
  77. package/lib/scripts/datautil.mjs +0 -16
  78. package/lib/scripts/datautil.ts +0 -19
  79. package/lib/scripts/images/transparent_150x192.jpg +0 -0
  80. package/lib/scripts/images/transparent_155x225.jpg +0 -0
  81. package/lib/scripts/parse_textes.mjs +0 -38
  82. package/lib/scripts/parse_textes.ts +0 -52
  83. package/lib/scripts/retrieve_documents.mjs +0 -243
  84. package/lib/scripts/retrieve_documents.ts +0 -279
  85. package/lib/scripts/retrieve_open_data.mjs +0 -214
  86. package/lib/scripts/retrieve_open_data.ts +0 -261
  87. package/lib/scripts/retrieve_senateurs_photos.mjs +0 -147
  88. package/lib/scripts/retrieve_senateurs_photos.ts +0 -177
  89. package/lib/scripts/retrieve_textes.mjs +0 -165
  90. package/lib/scripts/retrieve_textes.ts +0 -79
  91. package/lib/scripts/shared/cli_helpers.ts +0 -36
  92. package/lib/scripts/shared/util.ts +0 -33
  93. package/lib/strings.mjs +0 -18
  94. package/lib/strings.ts +0 -26
  95. package/lib/types/ameli.mjs +0 -13
  96. package/lib/types/ameli.ts +0 -21
  97. package/lib/types/debats.mjs +0 -2
  98. package/lib/types/debats.ts +0 -6
  99. package/lib/types/dosleg.mjs +0 -151
  100. package/lib/types/dosleg.ts +0 -284
  101. package/lib/types/questions.mjs +0 -1
  102. package/lib/types/questions.ts +0 -3
  103. package/lib/types/sens.mjs +0 -1
  104. package/lib/types/sens.ts +0 -12
  105. package/lib/types/sessions.mjs +0 -43
  106. package/lib/types/sessions.ts +0 -42
  107. package/lib/types/texte.mjs +0 -16
  108. package/lib/types/texte.ts +0 -66
  109. package/lib/typings/windows-1252.d.js +0 -2
  110. package/lib/typings/windows-1252.d.mjs +0 -2
  111. package/lib/typings/windows-1252.d.ts +0 -11
  112. package/lib/validators/config.mjs +0 -54
  113. package/lib/validators/config.ts +0 -79
  114. package/lib/validators/senat.mjs +0 -24
  115. package/lib/validators/senat.ts +0 -26
  116. /package/lib/scripts/{retrieve_textes.d.ts → retrieve_videos.d.ts} +0 -0
@@ -0,0 +1,420 @@
1
+ // scripts/retrieve_senat_videos_from_agendas.ts
2
+ import assert from "assert";
3
+ import commandLineArgs from "command-line-args";
4
+ import fs from "fs-extra";
5
+ import fsp from "fs/promises";
6
+ import path from "path";
7
+ import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatAgendas, } from "../loaders";
8
+ import { getSessionsFromStart } from "../types/sessions";
9
+ import { commonOptions } from "./shared/cli_helpers";
10
+ // ===================== Constants =====================
11
+ const MATCH_THRESHOLD = 0.60;
12
+ const MAX_CANDIDATES = 15;
13
+ const MAX_PAGES = 3;
14
+ const STATS = { total: 0, accepted: 0 };
15
+ const VIDEOS_ROOT_FOLDER = "videos";
16
+ const SENAT_VIDEOS_SEARCH_AJAX = "https://videos.senat.fr/senat_videos_search.php";
17
+ const SENAT_DATAS_ROOT = "https://videos.senat.fr/Datas/senat";
18
+ const SENAT_VOD_HOST = "https://vodsenat.akamaized.net";
19
+ // ===================== CLI =====================
20
+ const optionsDefinitions = [
21
+ ...commonOptions,
22
+ ];
23
+ const options = commandLineArgs(optionsDefinitions);
24
+ // ===================== Utils =====================
25
+ function normalize(s) {
26
+ return (s ?? "")
27
+ .toLowerCase()
28
+ .normalize("NFD")
29
+ .replace(/[\u0300-\u036f]/g, "")
30
+ .replace(/[^\p{L}\p{N}\s-]/gu, " ")
31
+ .replace(/\s+/g, " ")
32
+ .trim();
33
+ }
34
+ function tokens(s) { return normalize(s).split(" ").filter(Boolean); }
35
+ function dice(a, b) {
36
+ const A = new Set(tokens(a)), B = new Set(tokens(b));
37
+ if (!A.size || !B.size)
38
+ return 0;
39
+ let inter = 0;
40
+ for (const t of A)
41
+ if (B.has(t))
42
+ inter++;
43
+ return (2 * inter) / (A.size + B.size);
44
+ }
45
+ // Heuristic for Europe/Paris DST: +02:00 ≈ April→October, +01:00 otherwise.
46
+ function parisOffsetForDate(dateYYYYMMDD) {
47
+ const m = Number(dateYYYYMMDD.split("-")[1] || "1");
48
+ return (m >= 4 && m <= 10) ? "+02:00" : "+01:00";
49
+ }
50
+ function epochToParisDateTime(epochSec) {
51
+ if (!Number.isFinite(epochSec))
52
+ return null;
53
+ const dUtc = new Date(epochSec * 1000);
54
+ // Offset heuristic (same logique que parisOffsetForDate)
55
+ const m = dUtc.getUTCMonth() + 1; // 1..12
56
+ const offsetHours = (m >= 4 && m <= 10) ? 2 : 1;
57
+ const offsetStr = offsetHours === 2 ? "+02:00" : "+01:00";
58
+ // Applique l'offset pour obtenir la date/heure locales Paris
59
+ const localMs = dUtc.getTime() + offsetHours * 3600 * 1000;
60
+ const dl = new Date(localMs);
61
+ const yyyy = String(dl.getUTCFullYear());
62
+ const mm = String(dl.getUTCMonth() + 1).padStart(2, "0");
63
+ const dd = String(dl.getUTCDate()).padStart(2, "0");
64
+ const hh = String(dl.getUTCHours()).padStart(2, "0");
65
+ const mi = String(dl.getUTCMinutes()).padStart(2, "0");
66
+ const ss = String(dl.getUTCSeconds()).padStart(2, "0");
67
+ const ms = String(dl.getUTCMilliseconds()).padStart(3, "0");
68
+ return {
69
+ date: `${yyyy}-${mm}-${dd}`,
70
+ startTime: `${hh}:${mi}:${ss}.${ms}${offsetStr}`,
71
+ };
72
+ }
73
+ function toTargetEpoch(date, time) {
74
+ if (!date)
75
+ return null;
76
+ let t = (time ?? "00:00").trim();
77
+ // Si l'heure contient déjà un fuseau (Z ou ±HH:MM), on la fait simplement précéder de la date.
78
+ const hasTz = /(?:Z|[+-]\d{2}:\d{2})$/i.test(t);
79
+ let iso;
80
+ if (hasTz) {
81
+ // Exemple: 2022-10-04T18:00:00.000+02:00
82
+ iso = `${date}T${t}`;
83
+ }
84
+ else {
85
+ // Normalise pour avoir au moins HH:mm:ss
86
+ if (/^\d{1,2}$/.test(t)) {
87
+ t = `${t.padStart(2, "0")}:00:00`;
88
+ }
89
+ else if (/^\d{1,2}:\d{2}$/.test(t)) {
90
+ t = `${t}:00`;
91
+ } // sinon, on garde tel quel (gère HH:mm:ss et HH:mm:ss.SSS)
92
+ // Ajoute l’offset Paris (heuristique saisonnière)
93
+ iso = `${date}T${t}${parisOffsetForDate(date)}`;
94
+ }
95
+ const ms = Date.parse(iso);
96
+ return Number.isNaN(ms) ? null : Math.floor(ms / 1000);
97
+ }
98
+ async function fetchText(url) {
99
+ const res = await fetch(url);
100
+ if (!res.ok)
101
+ return null;
102
+ return await res.text();
103
+ }
104
+ async function fetchBuffer(url) {
105
+ const res = await fetch(url);
106
+ if (!res.ok)
107
+ return null;
108
+ const ab = await res.arrayBuffer();
109
+ return Buffer.from(ab);
110
+ }
111
+ async function writeIfChanged(p, content) {
112
+ const exists = await fs.pathExists(p);
113
+ if (exists) {
114
+ const old = await fsp.readFile(p, "utf-8");
115
+ if (old === content)
116
+ return;
117
+ }
118
+ await fsp.writeFile(p, content, "utf-8");
119
+ }
120
+ function queryString(obj) {
121
+ return Object.entries(obj)
122
+ .map(([k, v]) => `${encodeURIComponent(k)}=${encodeURIComponent(v)}`)
123
+ .join("&");
124
+ }
125
+ function simplifyTitleForKeywords(input) {
126
+ return (input || "")
127
+ .replace(/\baudition\s+de\b/gi, " ")
128
+ .replace(/\breunion\b/gi, " ")
129
+ .replace(/\bsur\b/gi, " ")
130
+ .replace(/\b(la|le|les|des|de|du|d’|d')\b/gi, " ")
131
+ .replace(/[–—-]/g, " ")
132
+ .replace(/\s+/g, " ")
133
+ .trim();
134
+ }
135
+ function toFRDate(dateYYYYMMDD) {
136
+ const [y, m, d] = dateYYYYMMDD.split("-");
137
+ return `${d}/${m}/${y}`; // DD/MM/YYYY
138
+ }
139
+ function formatYYYYMMDD(dateYYYYMMDD) {
140
+ const [y, m, d] = dateYYYYMMDD.split("-");
141
+ return `${y}${m}${d}`;
142
+ }
143
+ function makeReunionUid(agenda) {
144
+ // agenda.date is expected as "YYYY-MM-DD"
145
+ const ymd = agenda.date ? formatYYYYMMDD(agenda.date) : "00000000";
146
+ return `${ymd}-${agenda.id}`;
147
+ }
148
+ function extractCandidatesFromSearchHtml(html) {
149
+ const out = [];
150
+ const re = /href="\/?video\.(\d+)_([a-z0-9]+)\.[^"]+"/gi;
151
+ let m;
152
+ while ((m = re.exec(html))) {
153
+ const id = m[1], hash = m[2];
154
+ const pageUrl = `https://videos.senat.fr/video.${id}_${hash}.html`;
155
+ const ctx = html.slice(Math.max(0, m.index - 240), Math.min(html.length, m.index + 240));
156
+ const t = ctx.match(/title="([^"]+)"/i) || ctx.match(/>([^<]{10,200})</);
157
+ out.push({ id, hash, pageUrl, title: t?.[1] });
158
+ }
159
+ const seen = new Set();
160
+ return out.filter(c => {
161
+ const k = `${c.id}_${c.hash}`;
162
+ if (seen.has(k))
163
+ return false;
164
+ seen.add(k);
165
+ return true;
166
+ });
167
+ }
168
+ function parseDataNvs(nvs) {
169
+ const epoch = nvs.match(/<metadata\s+name="date"\s+value="(\d+)"/i)?.[1];
170
+ const title = nvs.match(/<metadata\s+name="title"\s+value="([^"]+)"/i)?.[1];
171
+ return { epoch: epoch ? Number(epoch) : undefined, title };
172
+ }
173
+ function buildSenatVodMasterM3u8FromNvs(xml, host = SENAT_VOD_HOST) {
174
+ if (!xml)
175
+ return null;
176
+ // (a) Déjà un lien VOD complet en .smil/playlist.m3u8
177
+ const mVod = xml.match(/https?:\/\/[^"'<>]*vodsenat[^"'<>]*\.smil\/(?:playlist|master)\.m3u8/i);
178
+ if (mVod)
179
+ return mVod[0];
180
+ // (b) Chemin senat/YYYY/MM/<basename>.smil -> normalise en playlist.m3u8
181
+ const mSmilPath = xml.match(/senat\/(\d{4})\/(\d{2})\/([^"'<>\/]+?)\.smil/i);
182
+ if (mSmilPath) {
183
+ const [, y, m, base] = mSmilPath;
184
+ return `${host}/senat/${y}/${m}/${base}.smil/playlist.m3u8`;
185
+ }
186
+ // (c) Chemin senat/YYYY/MM/<basename>.mp4 -> transforme en .smil/playlist.m3u8
187
+ const mMp4Path = xml.match(/senat\/(\d{4})\/(\d{2})\/([^"'<>\/]+?)\.mp4/i);
188
+ if (mMp4Path) {
189
+ const [, y, m, base] = mMp4Path;
190
+ return `${host}/senat/${y}/${m}/${base}.smil/playlist.m3u8`;
191
+ }
192
+ // (d) À défaut, n’importe quel .m3u8 présent (faible priorité — peut être du live)
193
+ const mAny = xml.match(/https?:\/\/[^"'<>]+\.m3u8/i);
194
+ return mAny ? mAny[0] : null;
195
+ }
196
+ function score(agenda, agendaTs, videoTitle, videoEpoch) {
197
+ const titleScore = dice(agenda.titre || "", videoTitle || "");
198
+ let timeScore = 0;
199
+ if (agendaTs && videoEpoch) {
200
+ const deltaMin = Math.abs(videoEpoch - agendaTs) / 60;
201
+ timeScore = Math.max(0, 1 - (deltaMin / 180));
202
+ }
203
+ let orgBonus = 0;
204
+ if (agenda.organe && videoTitle) {
205
+ const o = normalize(agenda.organe);
206
+ const t = normalize(videoTitle);
207
+ if (o && t.includes(o.split(" ")[0]))
208
+ orgBonus = 0.15;
209
+ }
210
+ return 0.3 * titleScore + 0.7 * timeScore + orgBonus;
211
+ }
212
+ function buildSearchStrategies(agenda) {
213
+ const fr = agenda.date ? toFRDate(agenda.date) : undefined;
214
+ const kw = simplifyTitleForKeywords(agenda.titre || "");
215
+ const commission = agenda.organe || undefined;
216
+ // common base
217
+ const base = { search: "true", videotype: "Commission" };
218
+ if (fr)
219
+ Object.assign(base, { period: "custom", begin: fr, end: fr });
220
+ const strategies = [];
221
+ // 1) keywords + commission
222
+ if (kw && commission)
223
+ strategies.push({ ...base, motscles: kw, commission });
224
+ // 2) keywords without commission
225
+ if (kw)
226
+ strategies.push({ ...base, motscles: kw });
227
+ // 3) full-text (AND) + commission
228
+ if (kw && commission)
229
+ strategies.push({ ...base, text: `AND${kw}`, commission });
230
+ // 4) full-text (AND) without commission
231
+ if (kw)
232
+ strategies.push({ ...base, text: `AND${kw}` });
233
+ // 5) no keywords (just type + period)
234
+ strategies.push({ ...base });
235
+ return strategies;
236
+ }
237
+ async function fetchAllSearchPages(args, baseDir, strategyIndex, maxPages = MAX_PAGES) {
238
+ const pages = [];
239
+ for (let p = 1; p <= maxPages; p++) {
240
+ const url = `${SENAT_VIDEOS_SEARCH_AJAX}?${queryString({ ...args, page: String(p) })}`;
241
+ const html = await fetchText(url);
242
+ if (!html)
243
+ break;
244
+ pages.push(html);
245
+ if (!/href="\/?video\.\d+_[a-z0-9]+\./i.test(html))
246
+ break;
247
+ }
248
+ return pages;
249
+ }
250
+ async function processAgenda(agenda, session, dataDir) {
251
+ if (!agenda)
252
+ return;
253
+ if (!agenda.captationVideo) {
254
+ if (!options["silent"])
255
+ console.log(`[skip] ${agenda.id} captationVideo=false`);
256
+ return;
257
+ }
258
+ if (!agenda.date || !agenda.startTime) {
259
+ if (!options["silent"])
260
+ console.log(`[skip] ${agenda.id} date/hour missing`);
261
+ return;
262
+ }
263
+ STATS.total++;
264
+ const reunionUid = makeReunionUid(agenda);
265
+ const baseDir = path.join(dataDir, VIDEOS_ROOT_FOLDER, String(session), reunionUid);
266
+ await fs.ensureDir(baseDir);
267
+ const agendaTs = toTargetEpoch(agenda.date, agenda.startTime);
268
+ // ==== 1) Multi-strategy searches ====
269
+ const strategies = buildSearchStrategies(agenda);
270
+ let combinedHtml = "";
271
+ let usedStrategy = -1;
272
+ let candidates = [];
273
+ for (let i = 0; i < strategies.length; i++) {
274
+ const pages = await fetchAllSearchPages(strategies[i], baseDir, i + 1, MAX_PAGES);
275
+ if (pages.length === 0)
276
+ continue;
277
+ const combined = pages.join("\n<!-- PAGE SPLIT -->\n");
278
+ const cs = extractCandidatesFromSearchHtml(combined);
279
+ if (cs.length) {
280
+ combinedHtml = combined;
281
+ candidates = cs.slice(0, MAX_CANDIDATES);
282
+ usedStrategy = i + 1;
283
+ break;
284
+ }
285
+ }
286
+ if (usedStrategy === -1 || !candidates.length) {
287
+ if (!options["silent"])
288
+ console.log(`[miss] ${agenda.id} no candidates (triedStrategies=${strategies.length})`);
289
+ return;
290
+ }
291
+ // ==== 2) Enrich via data.nvs + scoring; pick best ====
292
+ let best = null;
293
+ for (const c of candidates) {
294
+ const dataUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`;
295
+ const buf = await fetchBuffer(dataUrl);
296
+ if (!buf)
297
+ continue;
298
+ const meta = parseDataNvs(buf.toString("utf-8"));
299
+ const s = score(agenda, agendaTs, c.title ?? meta.title, meta.epoch);
300
+ if (!best || s > best.score) {
301
+ best = { id: c.id, hash: c.hash, pageUrl: c.pageUrl, epoch: meta.epoch, vtitle: c.title ?? meta.title, score: s };
302
+ }
303
+ }
304
+ if (!best) {
305
+ if (!options["silent"])
306
+ console.log(`[miss] ${agenda.id} candidats without data.nvs`);
307
+ return;
308
+ }
309
+ const accepted = best.score >= MATCH_THRESHOLD;
310
+ if (accepted)
311
+ STATS.accepted++;
312
+ if (!options["silent"]) {
313
+ console.log(`[pick] ${agenda.id} best id=${best.id} hash=${best.hash} score=${best.score.toFixed(2)} accepted=${accepted} (strategy=${usedStrategy})`);
314
+ }
315
+ // ==== 3) Write metadata + NVS of the best candidate (always) ====
316
+ const bestDt = best?.epoch ? epochToParisDateTime(best.epoch) : null;
317
+ const metadata = {
318
+ reunionUid,
319
+ session,
320
+ accepted,
321
+ threshold: MATCH_THRESHOLD,
322
+ strategy: usedStrategy,
323
+ agenda: {
324
+ date: agenda.date,
325
+ startTime: agenda.startTime,
326
+ titre: agenda.titre,
327
+ organe: agenda.organe ?? undefined,
328
+ id: agenda.id,
329
+ },
330
+ best: {
331
+ id: best.id,
332
+ hash: best.hash,
333
+ pageUrl: best.pageUrl,
334
+ epoch: best.epoch ?? null,
335
+ date: bestDt?.date ?? null,
336
+ startTime: bestDt?.startTime ?? null,
337
+ title: best.vtitle ?? null,
338
+ score: best.score,
339
+ },
340
+ };
341
+ await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
342
+ const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
343
+ const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
344
+ const dataTxt = await fetchText(dataUrl);
345
+ const finalTxt = await fetchText(finalUrl);
346
+ if (dataTxt)
347
+ await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
348
+ if (finalTxt)
349
+ await fsp.writeFile(path.join(baseDir, "finalplayer.nvs"), finalTxt, "utf-8");
350
+ let master = null;
351
+ if (dataTxt)
352
+ master = buildSenatVodMasterM3u8FromNvs(dataTxt);
353
+ // ==== 4) Update agenda file (only if accepted + m3u8) ====
354
+ if (accepted && master) {
355
+ const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${formatYYYYMMDD(agenda.date)}.json`);
356
+ if (await fs.pathExists(agendaJsonPath)) {
357
+ const raw = await fsp.readFile(agendaJsonPath, "utf-8");
358
+ let items;
359
+ try {
360
+ items = JSON.parse(raw);
361
+ }
362
+ catch (e) {
363
+ console.warn(`[warn] invalid JSON in ${agendaJsonPath}:`, e?.message);
364
+ items = null;
365
+ }
366
+ if (Array.isArray(items)) {
367
+ const idx = items.findIndex((e) => String(e?.id) === String(agenda.id));
368
+ if (idx === -1) {
369
+ console.warn(`[warn] agenda id ${agenda.id} not found in ${agendaJsonPath}`);
370
+ }
371
+ else {
372
+ // add/update urlVideo on the matching item
373
+ items[idx] = { ...items[idx], urlVideo: master };
374
+ await writeIfChanged(agendaJsonPath, JSON.stringify(items, null, 2));
375
+ if (!options["silent"]) {
376
+ console.log(`[write] ${agenda.id} urlVideo ← ${master}`);
377
+ }
378
+ }
379
+ }
380
+ else {
381
+ console.warn(`[warn] expected an array in ${agendaJsonPath}, got ${typeof items}`);
382
+ }
383
+ }
384
+ else {
385
+ console.warn(`[warn] agenda file not found for update: ${agendaJsonPath}`);
386
+ }
387
+ }
388
+ }
389
+ async function processAll(dataDir, sessions) {
390
+ for (const session of sessions) {
391
+ for (const { item: agendas } of iterLoadSenatAgendas(dataDir, session, {})) {
392
+ for (const agenda of agendas) {
393
+ try {
394
+ await processAgenda(agenda, session, dataDir);
395
+ }
396
+ catch (e) {
397
+ console.error(`[error] ${agenda.id}:`, e?.message || e);
398
+ }
399
+ }
400
+ }
401
+ }
402
+ }
403
+ async function main() {
404
+ const dataDir = options["dataDir"];
405
+ assert(dataDir, "Missing argument: data directory");
406
+ const sessions = getSessionsFromStart(options["fromSession"]);
407
+ if (!options["silent"])
408
+ console.time("senat-agendas→videos start processing time");
409
+ await processAll(dataDir, sessions);
410
+ if (!options["silent"])
411
+ console.timeEnd("senat-agendas→videos processing time");
412
+ if (!options["silent"]) {
413
+ const { total, accepted } = STATS;
414
+ const ratio = total ? (accepted / total * 100).toFixed(1) : "0.0";
415
+ console.log(`[summary] accepted=${accepted} / total=${total} (${ratio}%)`);
416
+ }
417
+ }
418
+ main()
419
+ .then(() => process.exit(0))
420
+ .catch((err) => { console.error(err); process.exit(1); });
@@ -12,4 +12,6 @@ export interface AgendaEvent {
12
12
  captationVideo: boolean;
13
13
  urlDossierSenat: string | null;
14
14
  quantieme: string | null;
15
+ urlVideo: string | null;
16
+ timecodeDebutVideo: number | null;
15
17
  }
package/package.json CHANGED
@@ -1,94 +1,95 @@
1
- {
2
- "name": "@tricoteuses/senat",
3
- "version": "2.9.0",
4
- "description": "Handle French Sénat's open data",
5
- "keywords": [
6
- "France",
7
- "open data",
8
- "Parliament",
9
- "Sénat"
10
- ],
11
- "author": "Emmanuel Raviart <emmanuel@raviart.com>",
12
- "bugs": {
13
- "url": "https://git.tricoteuses.fr/logiciels/tricoteuses-senat/issues"
14
- },
15
- "homepage": "https://tricoteuses.fr/",
16
- "license": "AGPL-3.0-or-later",
17
- "repository": {
18
- "type": "git",
19
- "url": "https://git.tricoteuses.fr/logiciels/tricoteuses-senat.git"
20
- },
21
- "type": "module",
22
- "engines": {
23
- "node": ">=22"
24
- },
25
- "files": [
26
- "lib"
27
- ],
28
- "exports": {
29
- ".": {
30
- "import": "./lib/index.js",
31
- "types": "./lib/index.d.ts"
32
- },
33
- "./loaders": {
34
- "import": "./lib/loaders.js",
35
- "types": "./lib/loaders.d.ts"
36
- },
37
- "./package.json": "./package.json"
38
- },
39
- "publishConfig": {
40
- "access": "public"
41
- },
42
- "scripts": {
43
- "build": "tsc",
44
- "build:types": "tsc --emitDeclarationOnly",
45
- "data:convert_data": "tsx src/scripts/convert_data.ts",
46
- "data:download": "tsx src/scripts/data-download.ts",
47
- "data:generate_schemas": "tsx src/scripts/retrieve_open_data.ts --schema",
48
- "data:retrieve_agenda": "cross-env TZ='Etc/UTC' tsx src/scripts/retrieve_agenda.ts",
49
- "data:retrieve_comptes_rendus": "tsx src/scripts/retrieve_comptes_rendus.ts",
50
- "data:retrieve_documents": "tsx src/scripts/retrieve_documents.ts",
51
- "data:retrieve_open_data": "tsx src/scripts/retrieve_open_data.ts --all",
52
- "data:retrieve_senateurs_photos": "tsx src/scripts/retrieve_senateurs_photos.ts --fetch",
53
- "data:parse_textes_lois": "tsx src/scripts/parse_textes.ts",
54
- "prepare": "npm run build",
55
- "prepublishOnly": "npm run build",
56
- "prettier": "prettier --write 'src/**/*.ts' 'tests/**/*.test.ts'",
57
- "type-check": "tsc --noEmit",
58
- "type-check:watch": "npm run type-check -- --watch"
59
- },
60
- "dependencies": {
61
- "@biryani/core": "^0.2.1",
62
- "command-line-args": "^5.1.1",
63
- "dotenv": "^8.2.0",
64
- "fs-extra": "^9.1.0",
65
- "jsdom": "^26.0.0",
66
- "kysely": "^0.27.4",
67
- "luxon": "^3.5.0",
68
- "node-stream-zip": "^1.8.2",
69
- "pg": "^8.13.1",
70
- "pg-cursor": "^2.12.1",
71
- "slug": "^11.0.0",
72
- "tsx": "^4.19.4",
73
- "windows-1252": "^1.0.0"
74
- },
75
- "devDependencies": {
76
- "@typed-code/schemats": "^5.0.1",
77
- "@types/command-line-args": "^5.0.0",
78
- "@types/fs-extra": "^9.0.7",
79
- "@types/jsdom": "^21.1.7",
80
- "@types/luxon": "^3.4.2",
81
- "@types/node": "^20.17.6",
82
- "@types/pg": "^8.11.10",
83
- "@types/pg-cursor": "^2.7.2",
84
- "@types/slug": "^5.0.9",
85
- "@typescript-eslint/eslint-plugin": "^8.13.0",
86
- "@typescript-eslint/parser": "^8.13.0",
87
- "cross-env": "^10.0.0",
88
- "eslint": "^8.57.1",
89
- "kysely-codegen": "^0.18.0",
90
- "prettier": "^3.5.3",
91
- "tslib": "^2.1.0",
92
- "typescript": "^5.8.3"
93
- }
94
- }
1
+ {
2
+ "name": "@tricoteuses/senat",
3
+ "version": "2.9.5",
4
+ "description": "Handle French Sénat's open data",
5
+ "keywords": [
6
+ "France",
7
+ "open data",
8
+ "Parliament",
9
+ "Sénat"
10
+ ],
11
+ "author": "Emmanuel Raviart <emmanuel@raviart.com>",
12
+ "bugs": {
13
+ "url": "https://git.tricoteuses.fr/logiciels/tricoteuses-senat/issues"
14
+ },
15
+ "homepage": "https://tricoteuses.fr/",
16
+ "license": "AGPL-3.0-or-later",
17
+ "repository": {
18
+ "type": "git",
19
+ "url": "https://git.tricoteuses.fr/logiciels/tricoteuses-senat.git"
20
+ },
21
+ "type": "module",
22
+ "engines": {
23
+ "node": ">=22"
24
+ },
25
+ "files": [
26
+ "lib"
27
+ ],
28
+ "exports": {
29
+ ".": {
30
+ "import": "./lib/index.js",
31
+ "types": "./lib/index.d.ts"
32
+ },
33
+ "./loaders": {
34
+ "import": "./lib/loaders.js",
35
+ "types": "./lib/loaders.d.ts"
36
+ },
37
+ "./package.json": "./package.json"
38
+ },
39
+ "publishConfig": {
40
+ "access": "public"
41
+ },
42
+ "scripts": {
43
+ "build": "tsc",
44
+ "build:types": "tsc --emitDeclarationOnly",
45
+ "data:convert_data": "tsx src/scripts/convert_data.ts",
46
+ "data:download": "tsx src/scripts/data-download.ts",
47
+ "data:generate_schemas": "tsx src/scripts/retrieve_open_data.ts --schema",
48
+ "data:retrieve_agenda": "cross-env TZ='Etc/UTC' tsx src/scripts/retrieve_agenda.ts",
49
+ "data:retrieve_comptes_rendus": "tsx src/scripts/retrieve_comptes_rendus.ts",
50
+ "data:retrieve_documents": "tsx src/scripts/retrieve_documents.ts",
51
+ "data:retrieve_open_data": "tsx src/scripts/retrieve_open_data.ts --all",
52
+ "data:retrieve_senateurs_photos": "tsx src/scripts/retrieve_senateurs_photos.ts --fetch",
53
+ "data:retrieve_videos": "tsx src/scripts/retrieve_videos.ts",
54
+ "data:parse_textes_lois": "tsx src/scripts/parse_textes.ts",
55
+ "prepare": "npm run build",
56
+ "prepublishOnly": "npm run build",
57
+ "prettier": "prettier --write 'src/**/*.ts' 'tests/**/*.test.ts'",
58
+ "type-check": "tsc --noEmit",
59
+ "type-check:watch": "npm run type-check -- --watch"
60
+ },
61
+ "dependencies": {
62
+ "@biryani/core": "^0.2.1",
63
+ "command-line-args": "^5.1.1",
64
+ "dotenv": "^8.2.0",
65
+ "fs-extra": "^9.1.0",
66
+ "jsdom": "^26.0.0",
67
+ "kysely": "^0.27.4",
68
+ "luxon": "^3.5.0",
69
+ "node-stream-zip": "^1.8.2",
70
+ "pg": "^8.13.1",
71
+ "pg-cursor": "^2.12.1",
72
+ "slug": "^11.0.0",
73
+ "tsx": "^4.19.4",
74
+ "windows-1252": "^1.0.0"
75
+ },
76
+ "devDependencies": {
77
+ "@typed-code/schemats": "^5.0.1",
78
+ "@types/command-line-args": "^5.0.0",
79
+ "@types/fs-extra": "^9.0.7",
80
+ "@types/jsdom": "^21.1.7",
81
+ "@types/luxon": "^3.4.2",
82
+ "@types/node": "^20.17.6",
83
+ "@types/pg": "^8.11.10",
84
+ "@types/pg-cursor": "^2.7.2",
85
+ "@types/slug": "^5.0.9",
86
+ "@typescript-eslint/eslint-plugin": "^8.13.0",
87
+ "@typescript-eslint/parser": "^8.13.0",
88
+ "cross-env": "^10.0.0",
89
+ "eslint": "^8.57.1",
90
+ "kysely-codegen": "^0.18.0",
91
+ "prettier": "^3.5.3",
92
+ "tslib": "^2.1.0",
93
+ "typescript": "^5.8.3"
94
+ }
95
+ }
@@ -1,52 +0,0 @@
1
- import { Ses, Sub, TxtAmeli } from "./types/ameli";
2
- import { Debat } from "./types/debats";
3
- import { Ass, Aud, Auteur, DateSeance, DecCoc, DenRap, DocAtt, Ecr, EtaLoi, LecAss, LecAssRap, Lecture, Loi, Org, OriTxt, Qua, Rap, Scr, Texte, TypAtt, TypLec, TypLoi, TypTxt, TypUrl } from "./types/dosleg";
4
- import { Question } from "./types/questions";
5
- export declare const allFollows: string[];
6
- export declare class Aggregator {
7
- follow: Set<string>;
8
- objectByIdByTableName: {
9
- [name: string]: {
10
- [id: string]: object;
11
- };
12
- };
13
- requestedIdsByTableName: {
14
- [name: string]: Set<string>;
15
- };
16
- visitedIdsByTableName: {
17
- [name: string]: Set<string>;
18
- };
19
- constructor(follow: Set<string>);
20
- addAss(ass: Ass): void;
21
- addAud(aud: Aud): void;
22
- addAuteur(auteur: Auteur): void;
23
- addDateSeance(dateSeance: DateSeance): void;
24
- addDebat(debat: Debat): void;
25
- addDeccoc(deccoc: DecCoc): void;
26
- addDenrap(denrap: DenRap): void;
27
- addDocatt(docatt: DocAtt): void;
28
- addEcr(ecr: Ecr): void;
29
- addEtaloi(etaloi: EtaLoi): void;
30
- addLecass(lecass: LecAss): void;
31
- addLecassrap(lecassrap: LecAssRap): void;
32
- addLecture(lecture: Lecture): void;
33
- addLoi(loi: Loi): void;
34
- addOrg(org: Org): void;
35
- addOritxt(oritxt: OriTxt): void;
36
- addQua(qua: Qua): void;
37
- addQuestion(question: Question): void;
38
- addRap(rap: Rap): void;
39
- addScr(scr: Scr): void;
40
- addSes(ses: Ses): void;
41
- addSub(sub: Sub): void;
42
- addTexte(texte: Texte): void;
43
- addTxtAmeli(txtAmeli: TxtAmeli): void;
44
- addTypatt(typatt: TypAtt): void;
45
- addTyplec(typlec: TypLec): void;
46
- addTyploi(typloi: TypLoi): void;
47
- addTyptxt(typtxt: TypTxt): void;
48
- addTypurl(typurl: TypUrl): void;
49
- getAll(): Promise<void>;
50
- request(tableName: string, id: string): void;
51
- toJson(): any;
52
- }