@tricoteuses/senat 2.20.8 → 2.20.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -56,10 +56,10 @@ npm run data:parse_textes_lois ../senat-data
56
56
  npm run data:retrieve_agenda ../senat-data -- --fromSession 2022 [--parseAgenda]
57
57
 
58
58
  # Retrieval (& parsing) of comptes-rendus de séance from Sénat's data
59
- npm run data:retrieve_cr_seance ../senat-data -- [--parseDebats]
59
+ npm run data:retrieve_cr_seance ../senat-data -- [--parseDebats] [--keepDir]
60
60
 
61
61
  # Retrieval (& parsing) of comptes-rendus de commissions from Sénat's website
62
- npm run data:retrieve_cr_commission ../senat-data -- [--parseDebats]
62
+ npm run data:retrieve_cr_commission ../senat-data -- [--parseDebats] [--keepDir]
63
63
 
64
64
  # Retrieval of sénateurs' pictures from Sénat's website
65
65
  npm run data:retrieve_senateurs_photos ../senat-data
@@ -123,7 +123,7 @@ const findAllAmendementsQuery = dbSenat
123
123
  "ameli.avigvt.lib as avis_gouvernement",
124
124
  eb.fn.coalesce("ameli.sor.lib", "ameli.irr.libirr").as("sort"),
125
125
  "ameli.amd.rev as revision",
126
- concat(val("https://www.senat.fr/amendements/"), ref("ameli.ses.ann"), val("-"), sql `(ameli.ses.ann + 1)`, val("/"), ref("ameli.txt_ameli.numabs"), val("/Amdt_"), ref("ameli.amd.numabs"), val(".html")).as("url"),
126
+ concat(val("https://www.senat.fr/amendements/"), ref("ameli.ses.ann"), val("-"), sql `(ameli.ses.ann + 1)`, val("/"), ref("ameli.txt_ameli.numabs"), val("/Amdt_"), ref("ameli.amd.num"), val(".html")).as("url"),
127
127
  "ameli.grppol_ameli.lilcou as au_nom_de_groupe_politique",
128
128
  "ameli.com_ameli.lil as au_nom_de_commission",
129
129
  eb.case().when("ameli.cab.entid", "is not", null).then(true).else(false).end().as("auteur_est_gouvernement"),
@@ -34,10 +34,12 @@ function rapports(lectureAssembleeId) {
34
34
  .withSchema("dosleg")
35
35
  .selectFrom("rap")
36
36
  .leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod")
37
+ .leftJoin("raporg", "raporg.rapcod", "rap.rapcod")
37
38
  .leftJoin("denrap", "denrap.coddenrap", "rap.coddenrap")
38
39
  .where("lecassrap.lecassidt", "=", lectureAssembleeId)
39
40
  .select(({ eb, ref, val }) => [
40
41
  "rap.rapnum as numero",
42
+ "raporg.orgcod as code_organisme",
41
43
  eb
42
44
  .case()
43
45
  .when("rap.typurl", "=", "I")
@@ -84,6 +86,7 @@ function textes(lectureAssembleeId) {
84
86
  .where("texte.lecassidt", "=", lectureAssembleeId)
85
87
  .select(({ eb, ref, val }) => [
86
88
  "texte.texnum as numero",
89
+ "texte.orgcod as code_organisme",
87
90
  eb
88
91
  .case()
89
92
  .when("texte.typurl", "=", "I")
@@ -1,4 +1,4 @@
1
- import fs from "fs-extra";
1
+ import fs, { ensureDir } from "fs-extra";
2
2
  import assert from "assert";
3
3
  import path from "path";
4
4
  import * as cheerio from "cheerio";
@@ -187,7 +187,12 @@ async function retrieveCommissionCRs(options = {}) {
187
187
  const politenessMs = Number(options["politenessMs"] ?? 150);
188
188
  const commissionsRootDir = path.join(dataDir, COMMISSION_FOLDER);
189
189
  const originalRoot = path.join(commissionsRootDir, DATA_ORIGINAL_FOLDER);
190
- ensureAndClearDir(originalRoot);
190
+ if (!options["keepDir"]) {
191
+ ensureAndClearDir(originalRoot);
192
+ }
193
+ else {
194
+ ensureDir(originalRoot);
195
+ }
191
196
  const discovered = await discoverCommissionWeeklyPages(fromSession);
192
197
  console.log(`[COM-CR][discover] ${discovered.length} links (>= session ${fromSession})`);
193
198
  const jobs = discovered.map(({ url, yyyymmdd, commissionKey }) => {
@@ -238,7 +243,9 @@ async function retrieveCommissionCRs(options = {}) {
238
243
  const sessions = getSessionsFromStart(options["fromSession"]);
239
244
  const comRoot = path.join(dataDir, COMMISSION_FOLDER);
240
245
  const transformedRoot = path.join(comRoot, DATA_TRANSFORMED_FOLDER);
241
- if (options["parseDebats"])
246
+ if (options["keepDir"])
247
+ ensureDir(transformedRoot);
248
+ else
242
249
  ensureAndClearDir(transformedRoot);
243
250
  for (const session of sessions) {
244
251
  const originalSessionDir = path.join(originalRoot, String(session));
@@ -9,6 +9,7 @@ import { getSessionsFromStart } from "../types/sessions";
9
9
  import { commonOptions } from "./shared/cli_helpers";
10
10
  import { decodeHtmlEntities } from "../model/util";
11
11
  import { DateTime } from "luxon";
12
+ import { getFirstInterventionStartTimecode } from "../utils/nvs-timecode";
12
13
  // ===================== Constants =====================
13
14
  const MATCH_THRESHOLD = 0.5;
14
15
  const MAX_CANDIDATES = 15;
@@ -274,6 +275,8 @@ async function processGroupedReunion(agenda, session, dataDir) {
274
275
  }
275
276
  }
276
277
  let master = null;
278
+ let dataTxt = null;
279
+ let finalTxt = null;
277
280
  let accepted = false;
278
281
  if (!skipDownload) {
279
282
  STATS.total++;
@@ -398,8 +401,8 @@ async function processGroupedReunion(agenda, session, dataDir) {
398
401
  await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
399
402
  const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
400
403
  const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
401
- const dataTxt = await fetchText(dataUrl);
402
- const finalTxt = await fetchText(finalUrl);
404
+ dataTxt = await fetchText(dataUrl);
405
+ finalTxt = await fetchText(finalUrl);
403
406
  if (dataTxt)
404
407
  await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
405
408
  if (finalTxt)
@@ -414,14 +417,21 @@ async function processGroupedReunion(agenda, session, dataDir) {
414
417
  else {
415
418
  // Skipped download, but need to read data.nvs for urlVideo
416
419
  try {
417
- const dataTxt = await fsp.readFile(path.join(baseDir, "data.nvs"), "utf-8");
418
- const finalTxt = await fsp.readFile(path.join(baseDir, "finalplayer.nvs"), "utf-8");
420
+ dataTxt = await fsp.readFile(path.join(baseDir, "data.nvs"), "utf-8");
421
+ finalTxt = await fsp.readFile(path.join(baseDir, "finalplayer.nvs"), "utf-8");
419
422
  master = buildSenatVodMasterM3u8FromNvs(dataTxt);
420
423
  }
421
424
  catch (e) {
422
425
  console.warn(e);
423
426
  }
424
427
  }
428
+ let timecodeDebutVideo = null;
429
+ if (dataTxt && finalTxt) {
430
+ timecodeDebutVideo = getFirstInterventionStartTimecode(dataTxt, finalTxt);
431
+ if (timecodeDebutVideo === null) {
432
+ console.warn(`[warn] Cannot retrieve start video timecode from reunion` + reunionUid);
433
+ }
434
+ }
425
435
  // ==== 4) Update agenda file (only if accepted + m3u8) ====
426
436
  if ((accepted || skipDownload) && master) {
427
437
  const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
@@ -437,9 +447,13 @@ async function processGroupedReunion(agenda, session, dataDir) {
437
447
  }
438
448
  if (obj && typeof obj === "object" && !Array.isArray(obj)) {
439
449
  const next = { ...obj, urlVideo: master };
450
+ if (timecodeDebutVideo != null) {
451
+ next.timecodeDebutVideo = timecodeDebutVideo;
452
+ }
440
453
  await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
441
454
  if (!options["silent"]) {
442
- console.log(`[write] ${agenda.uid} urlVideo ← ${master}`);
455
+ console.log(`[write] ${agenda.uid} urlVideo ← ${master}` +
456
+ (timecodeDebutVideo != null ? ` (timecodeDebutVideo ← ${timecodeDebutVideo}s)` : ""));
443
457
  }
444
458
  }
445
459
  else {
@@ -35,6 +35,11 @@ export declare const onlyRecentOption: {
35
35
  name: string;
36
36
  type: NumberConstructor;
37
37
  };
38
+ export declare const keepDirOption: {
39
+ help: string;
40
+ name: string;
41
+ type: BooleanConstructor;
42
+ };
38
43
  export declare const commonOptions: ({
39
44
  alias: string;
40
45
  defaultValue: string[];
@@ -48,12 +53,11 @@ export declare const commonOptions: ({
48
53
  name: string;
49
54
  type: StringConstructor;
50
55
  } | {
51
- alias: string;
52
56
  help: string;
53
57
  name: string;
54
- type: BooleanConstructor;
58
+ type: NumberConstructor;
55
59
  } | {
56
60
  help: string;
57
61
  name: string;
58
- type: NumberConstructor;
62
+ type: BooleanConstructor;
59
63
  })[];
@@ -35,6 +35,11 @@ export const onlyRecentOption = {
35
35
  name: "only-recent",
36
36
  type: Number,
37
37
  };
38
+ export const keepDirOption = {
39
+ help: "keep directories when cleaning data",
40
+ name: "keepDir",
41
+ type: Boolean,
42
+ };
38
43
  export const commonOptions = [
39
44
  categoriesOption,
40
45
  dataDirDefaultOption,
@@ -42,4 +47,5 @@ export const commonOptions = [
42
47
  silentOption,
43
48
  verboseOption,
44
49
  onlyRecentOption,
50
+ keepDirOption,
45
51
  ];
@@ -0,0 +1 @@
1
+ export declare function getFirstInterventionStartTimecode(dataNvs: string, finalPlayerNvs: string): number | null;
@@ -0,0 +1,62 @@
1
+ import { XMLParser } from "fast-xml-parser";
2
+ const xmlParser = new XMLParser({
3
+ ignoreAttributes: false,
4
+ attributeNamePrefix: "@_",
5
+ });
6
+ function getFirstInterventionChapterId(dataNvs) {
7
+ const xml = xmlParser.parse(dataNvs);
8
+ const rootChapters = xml?.data?.chapters?.chapter;
9
+ if (!rootChapters)
10
+ return null;
11
+ const chaptersArray = Array.isArray(rootChapters) ? rootChapters : [rootChapters];
12
+ let foundId = null;
13
+ function dfsChapter(chapter) {
14
+ if (foundId)
15
+ return;
16
+ const metas = chapter.metadata ? (Array.isArray(chapter.metadata) ? chapter.metadata : [chapter.metadata]) : [];
17
+ const isIntervention = metas.some((m) => m?.["@_name"] === "type" && (m?.["@_value"] === "IN" || m?.["@_label"] === "Intervention"));
18
+ const hasSpeaker = !!chapter.speaker;
19
+ if (isIntervention && hasSpeaker && chapter["@_id"]) {
20
+ foundId = String(chapter["@_id"]);
21
+ return;
22
+ }
23
+ const children = chapter.chapter;
24
+ if (!children)
25
+ return;
26
+ const childArray = Array.isArray(children) ? children : [children];
27
+ for (const child of childArray) {
28
+ dfsChapter(child);
29
+ if (foundId)
30
+ return;
31
+ }
32
+ }
33
+ for (const ch of chaptersArray) {
34
+ dfsChapter(ch);
35
+ if (foundId)
36
+ break;
37
+ }
38
+ return foundId;
39
+ }
40
+ function getTimecodeForChapterId(finalPlayerNvs, chapterId) {
41
+ const xml = xmlParser.parse(finalPlayerNvs);
42
+ const synchros = xml?.player?.synchro;
43
+ if (!synchros)
44
+ return null;
45
+ const synchsArray = Array.isArray(synchros) ? synchros : [synchros];
46
+ const match = synchsArray.find((s) => String(s["@_id"]) === String(chapterId));
47
+ if (!match)
48
+ return null;
49
+ const rawTimecode = match["@_timecode"];
50
+ if (rawTimecode == null)
51
+ return null;
52
+ const ms = Number(rawTimecode);
53
+ if (Number.isNaN(ms))
54
+ return null;
55
+ return Math.floor(ms / 1000);
56
+ }
57
+ export function getFirstInterventionStartTimecode(dataNvs, finalPlayerNvs) {
58
+ const firstChapterId = getFirstInterventionChapterId(dataNvs);
59
+ if (!firstChapterId)
60
+ return null;
61
+ return getTimecodeForChapterId(finalPlayerNvs, firstChapterId);
62
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.20.8",
3
+ "version": "2.20.10",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",
@@ -65,6 +65,7 @@
65
65
  "cheerio": "^1.1.2",
66
66
  "command-line-args": "^6.0.1",
67
67
  "dotenv": "^17.2.3",
68
+ "fast-xml-parser": "^5.3.2",
68
69
  "fs-extra": "^11.3.2",
69
70
  "jsdom": "^27.2.0",
70
71
  "kysely": "^0.28.8",