@tricoteuses/senat 2.20.8 → 2.20.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/lib/model/ameli.js +1 -1
- package/lib/model/dosleg.js +3 -0
- package/lib/scripts/retrieve_cr_commission.js +10 -3
- package/lib/scripts/retrieve_videos.js +19 -5
- package/lib/scripts/shared/cli_helpers.d.ts +7 -3
- package/lib/scripts/shared/cli_helpers.js +6 -0
- package/lib/utils/nvs-timecode.d.ts +1 -0
- package/lib/utils/nvs-timecode.js +62 -0
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -56,10 +56,10 @@ npm run data:parse_textes_lois ../senat-data
|
|
|
56
56
|
npm run data:retrieve_agenda ../senat-data -- --fromSession 2022 [--parseAgenda]
|
|
57
57
|
|
|
58
58
|
# Retrieval (& parsing) of comptes-rendus de séance from Sénat's data
|
|
59
|
-
npm run data:retrieve_cr_seance ../senat-data -- [--parseDebats]
|
|
59
|
+
npm run data:retrieve_cr_seance ../senat-data -- [--parseDebats] [--keepDir]
|
|
60
60
|
|
|
61
61
|
# Retrieval (& parsing) of comptes-rendus de commissions from Sénat's website
|
|
62
|
-
npm run data:retrieve_cr_commission ../senat-data -- [--parseDebats]
|
|
62
|
+
npm run data:retrieve_cr_commission ../senat-data -- [--parseDebats] [--keepDir]
|
|
63
63
|
|
|
64
64
|
# Retrieval of sénateurs' pictures from Sénat's website
|
|
65
65
|
npm run data:retrieve_senateurs_photos ../senat-data
|
package/lib/model/ameli.js
CHANGED
|
@@ -123,7 +123,7 @@ const findAllAmendementsQuery = dbSenat
|
|
|
123
123
|
"ameli.avigvt.lib as avis_gouvernement",
|
|
124
124
|
eb.fn.coalesce("ameli.sor.lib", "ameli.irr.libirr").as("sort"),
|
|
125
125
|
"ameli.amd.rev as revision",
|
|
126
|
-
concat(val("https://www.senat.fr/amendements/"), ref("ameli.ses.ann"), val("-"), sql `(ameli.ses.ann + 1)`, val("/"), ref("ameli.txt_ameli.numabs"), val("/Amdt_"), ref("ameli.amd.
|
|
126
|
+
concat(val("https://www.senat.fr/amendements/"), ref("ameli.ses.ann"), val("-"), sql `(ameli.ses.ann + 1)`, val("/"), ref("ameli.txt_ameli.numabs"), val("/Amdt_"), ref("ameli.amd.num"), val(".html")).as("url"),
|
|
127
127
|
"ameli.grppol_ameli.lilcou as au_nom_de_groupe_politique",
|
|
128
128
|
"ameli.com_ameli.lil as au_nom_de_commission",
|
|
129
129
|
eb.case().when("ameli.cab.entid", "is not", null).then(true).else(false).end().as("auteur_est_gouvernement"),
|
package/lib/model/dosleg.js
CHANGED
|
@@ -34,10 +34,12 @@ function rapports(lectureAssembleeId) {
|
|
|
34
34
|
.withSchema("dosleg")
|
|
35
35
|
.selectFrom("rap")
|
|
36
36
|
.leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod")
|
|
37
|
+
.leftJoin("raporg", "raporg.rapcod", "rap.rapcod")
|
|
37
38
|
.leftJoin("denrap", "denrap.coddenrap", "rap.coddenrap")
|
|
38
39
|
.where("lecassrap.lecassidt", "=", lectureAssembleeId)
|
|
39
40
|
.select(({ eb, ref, val }) => [
|
|
40
41
|
"rap.rapnum as numero",
|
|
42
|
+
"raporg.orgcod as code_organisme",
|
|
41
43
|
eb
|
|
42
44
|
.case()
|
|
43
45
|
.when("rap.typurl", "=", "I")
|
|
@@ -84,6 +86,7 @@ function textes(lectureAssembleeId) {
|
|
|
84
86
|
.where("texte.lecassidt", "=", lectureAssembleeId)
|
|
85
87
|
.select(({ eb, ref, val }) => [
|
|
86
88
|
"texte.texnum as numero",
|
|
89
|
+
"texte.orgcod as code_organisme",
|
|
87
90
|
eb
|
|
88
91
|
.case()
|
|
89
92
|
.when("texte.typurl", "=", "I")
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import fs from "fs-extra";
|
|
1
|
+
import fs, { ensureDir } from "fs-extra";
|
|
2
2
|
import assert from "assert";
|
|
3
3
|
import path from "path";
|
|
4
4
|
import * as cheerio from "cheerio";
|
|
@@ -187,7 +187,12 @@ async function retrieveCommissionCRs(options = {}) {
|
|
|
187
187
|
const politenessMs = Number(options["politenessMs"] ?? 150);
|
|
188
188
|
const commissionsRootDir = path.join(dataDir, COMMISSION_FOLDER);
|
|
189
189
|
const originalRoot = path.join(commissionsRootDir, DATA_ORIGINAL_FOLDER);
|
|
190
|
-
|
|
190
|
+
if (!options["keepDir"]) {
|
|
191
|
+
ensureAndClearDir(originalRoot);
|
|
192
|
+
}
|
|
193
|
+
else {
|
|
194
|
+
ensureDir(originalRoot);
|
|
195
|
+
}
|
|
191
196
|
const discovered = await discoverCommissionWeeklyPages(fromSession);
|
|
192
197
|
console.log(`[COM-CR][discover] ${discovered.length} links (>= session ${fromSession})`);
|
|
193
198
|
const jobs = discovered.map(({ url, yyyymmdd, commissionKey }) => {
|
|
@@ -238,7 +243,9 @@ async function retrieveCommissionCRs(options = {}) {
|
|
|
238
243
|
const sessions = getSessionsFromStart(options["fromSession"]);
|
|
239
244
|
const comRoot = path.join(dataDir, COMMISSION_FOLDER);
|
|
240
245
|
const transformedRoot = path.join(comRoot, DATA_TRANSFORMED_FOLDER);
|
|
241
|
-
if (options["
|
|
246
|
+
if (options["keepDir"])
|
|
247
|
+
ensureDir(transformedRoot);
|
|
248
|
+
else
|
|
242
249
|
ensureAndClearDir(transformedRoot);
|
|
243
250
|
for (const session of sessions) {
|
|
244
251
|
const originalSessionDir = path.join(originalRoot, String(session));
|
|
@@ -9,6 +9,7 @@ import { getSessionsFromStart } from "../types/sessions";
|
|
|
9
9
|
import { commonOptions } from "./shared/cli_helpers";
|
|
10
10
|
import { decodeHtmlEntities } from "../model/util";
|
|
11
11
|
import { DateTime } from "luxon";
|
|
12
|
+
import { getFirstInterventionStartTimecode } from "../utils/nvs-timecode";
|
|
12
13
|
// ===================== Constants =====================
|
|
13
14
|
const MATCH_THRESHOLD = 0.5;
|
|
14
15
|
const MAX_CANDIDATES = 15;
|
|
@@ -274,6 +275,8 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
274
275
|
}
|
|
275
276
|
}
|
|
276
277
|
let master = null;
|
|
278
|
+
let dataTxt = null;
|
|
279
|
+
let finalTxt = null;
|
|
277
280
|
let accepted = false;
|
|
278
281
|
if (!skipDownload) {
|
|
279
282
|
STATS.total++;
|
|
@@ -398,8 +401,8 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
398
401
|
await writeIfChanged(path.join(baseDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
399
402
|
const dataUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/data.nvs`;
|
|
400
403
|
const finalUrl = `${SENAT_DATAS_ROOT}/${best.id}_${best.hash}/content/finalplayer.nvs`;
|
|
401
|
-
|
|
402
|
-
|
|
404
|
+
dataTxt = await fetchText(dataUrl);
|
|
405
|
+
finalTxt = await fetchText(finalUrl);
|
|
403
406
|
if (dataTxt)
|
|
404
407
|
await fsp.writeFile(path.join(baseDir, "data.nvs"), dataTxt, "utf-8");
|
|
405
408
|
if (finalTxt)
|
|
@@ -414,14 +417,21 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
414
417
|
else {
|
|
415
418
|
// Skipped download, but need to read data.nvs for urlVideo
|
|
416
419
|
try {
|
|
417
|
-
|
|
418
|
-
|
|
420
|
+
dataTxt = await fsp.readFile(path.join(baseDir, "data.nvs"), "utf-8");
|
|
421
|
+
finalTxt = await fsp.readFile(path.join(baseDir, "finalplayer.nvs"), "utf-8");
|
|
419
422
|
master = buildSenatVodMasterM3u8FromNvs(dataTxt);
|
|
420
423
|
}
|
|
421
424
|
catch (e) {
|
|
422
425
|
console.warn(e);
|
|
423
426
|
}
|
|
424
427
|
}
|
|
428
|
+
let timecodeDebutVideo = null;
|
|
429
|
+
if (dataTxt && finalTxt) {
|
|
430
|
+
timecodeDebutVideo = getFirstInterventionStartTimecode(dataTxt, finalTxt);
|
|
431
|
+
if (timecodeDebutVideo === null) {
|
|
432
|
+
console.warn(`[warn] Cannot retrieve start video timecode from reunion` + reunionUid);
|
|
433
|
+
}
|
|
434
|
+
}
|
|
425
435
|
// ==== 4) Update agenda file (only if accepted + m3u8) ====
|
|
426
436
|
if ((accepted || skipDownload) && master) {
|
|
427
437
|
const agendaJsonPath = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${agenda.uid}.json`);
|
|
@@ -437,9 +447,13 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
437
447
|
}
|
|
438
448
|
if (obj && typeof obj === "object" && !Array.isArray(obj)) {
|
|
439
449
|
const next = { ...obj, urlVideo: master };
|
|
450
|
+
if (timecodeDebutVideo != null) {
|
|
451
|
+
next.timecodeDebutVideo = timecodeDebutVideo;
|
|
452
|
+
}
|
|
440
453
|
await writeIfChanged(agendaJsonPath, JSON.stringify(next, null, 2));
|
|
441
454
|
if (!options["silent"]) {
|
|
442
|
-
console.log(`[write] ${agenda.uid} urlVideo ← ${master}`
|
|
455
|
+
console.log(`[write] ${agenda.uid} urlVideo ← ${master}` +
|
|
456
|
+
(timecodeDebutVideo != null ? ` (timecodeDebutVideo ← ${timecodeDebutVideo}s)` : ""));
|
|
443
457
|
}
|
|
444
458
|
}
|
|
445
459
|
else {
|
|
@@ -35,6 +35,11 @@ export declare const onlyRecentOption: {
|
|
|
35
35
|
name: string;
|
|
36
36
|
type: NumberConstructor;
|
|
37
37
|
};
|
|
38
|
+
export declare const keepDirOption: {
|
|
39
|
+
help: string;
|
|
40
|
+
name: string;
|
|
41
|
+
type: BooleanConstructor;
|
|
42
|
+
};
|
|
38
43
|
export declare const commonOptions: ({
|
|
39
44
|
alias: string;
|
|
40
45
|
defaultValue: string[];
|
|
@@ -48,12 +53,11 @@ export declare const commonOptions: ({
|
|
|
48
53
|
name: string;
|
|
49
54
|
type: StringConstructor;
|
|
50
55
|
} | {
|
|
51
|
-
alias: string;
|
|
52
56
|
help: string;
|
|
53
57
|
name: string;
|
|
54
|
-
type:
|
|
58
|
+
type: NumberConstructor;
|
|
55
59
|
} | {
|
|
56
60
|
help: string;
|
|
57
61
|
name: string;
|
|
58
|
-
type:
|
|
62
|
+
type: BooleanConstructor;
|
|
59
63
|
})[];
|
|
@@ -35,6 +35,11 @@ export const onlyRecentOption = {
|
|
|
35
35
|
name: "only-recent",
|
|
36
36
|
type: Number,
|
|
37
37
|
};
|
|
38
|
+
export const keepDirOption = {
|
|
39
|
+
help: "keep directories when cleaning data",
|
|
40
|
+
name: "keepDir",
|
|
41
|
+
type: Boolean,
|
|
42
|
+
};
|
|
38
43
|
export const commonOptions = [
|
|
39
44
|
categoriesOption,
|
|
40
45
|
dataDirDefaultOption,
|
|
@@ -42,4 +47,5 @@ export const commonOptions = [
|
|
|
42
47
|
silentOption,
|
|
43
48
|
verboseOption,
|
|
44
49
|
onlyRecentOption,
|
|
50
|
+
keepDirOption,
|
|
45
51
|
];
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function getFirstInterventionStartTimecode(dataNvs: string, finalPlayerNvs: string): number | null;
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { XMLParser } from "fast-xml-parser";
|
|
2
|
+
const xmlParser = new XMLParser({
|
|
3
|
+
ignoreAttributes: false,
|
|
4
|
+
attributeNamePrefix: "@_",
|
|
5
|
+
});
|
|
6
|
+
function getFirstInterventionChapterId(dataNvs) {
|
|
7
|
+
const xml = xmlParser.parse(dataNvs);
|
|
8
|
+
const rootChapters = xml?.data?.chapters?.chapter;
|
|
9
|
+
if (!rootChapters)
|
|
10
|
+
return null;
|
|
11
|
+
const chaptersArray = Array.isArray(rootChapters) ? rootChapters : [rootChapters];
|
|
12
|
+
let foundId = null;
|
|
13
|
+
function dfsChapter(chapter) {
|
|
14
|
+
if (foundId)
|
|
15
|
+
return;
|
|
16
|
+
const metas = chapter.metadata ? (Array.isArray(chapter.metadata) ? chapter.metadata : [chapter.metadata]) : [];
|
|
17
|
+
const isIntervention = metas.some((m) => m?.["@_name"] === "type" && (m?.["@_value"] === "IN" || m?.["@_label"] === "Intervention"));
|
|
18
|
+
const hasSpeaker = !!chapter.speaker;
|
|
19
|
+
if (isIntervention && hasSpeaker && chapter["@_id"]) {
|
|
20
|
+
foundId = String(chapter["@_id"]);
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
const children = chapter.chapter;
|
|
24
|
+
if (!children)
|
|
25
|
+
return;
|
|
26
|
+
const childArray = Array.isArray(children) ? children : [children];
|
|
27
|
+
for (const child of childArray) {
|
|
28
|
+
dfsChapter(child);
|
|
29
|
+
if (foundId)
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
for (const ch of chaptersArray) {
|
|
34
|
+
dfsChapter(ch);
|
|
35
|
+
if (foundId)
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
return foundId;
|
|
39
|
+
}
|
|
40
|
+
function getTimecodeForChapterId(finalPlayerNvs, chapterId) {
|
|
41
|
+
const xml = xmlParser.parse(finalPlayerNvs);
|
|
42
|
+
const synchros = xml?.player?.synchro;
|
|
43
|
+
if (!synchros)
|
|
44
|
+
return null;
|
|
45
|
+
const synchsArray = Array.isArray(synchros) ? synchros : [synchros];
|
|
46
|
+
const match = synchsArray.find((s) => String(s["@_id"]) === String(chapterId));
|
|
47
|
+
if (!match)
|
|
48
|
+
return null;
|
|
49
|
+
const rawTimecode = match["@_timecode"];
|
|
50
|
+
if (rawTimecode == null)
|
|
51
|
+
return null;
|
|
52
|
+
const ms = Number(rawTimecode);
|
|
53
|
+
if (Number.isNaN(ms))
|
|
54
|
+
return null;
|
|
55
|
+
return Math.floor(ms / 1000);
|
|
56
|
+
}
|
|
57
|
+
export function getFirstInterventionStartTimecode(dataNvs, finalPlayerNvs) {
|
|
58
|
+
const firstChapterId = getFirstInterventionChapterId(dataNvs);
|
|
59
|
+
if (!firstChapterId)
|
|
60
|
+
return null;
|
|
61
|
+
return getTimecodeForChapterId(finalPlayerNvs, firstChapterId);
|
|
62
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tricoteuses/senat",
|
|
3
|
-
"version": "2.20.
|
|
3
|
+
"version": "2.20.10",
|
|
4
4
|
"description": "Handle French Sénat's open data",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"France",
|
|
@@ -65,6 +65,7 @@
|
|
|
65
65
|
"cheerio": "^1.1.2",
|
|
66
66
|
"command-line-args": "^6.0.1",
|
|
67
67
|
"dotenv": "^17.2.3",
|
|
68
|
+
"fast-xml-parser": "^5.3.2",
|
|
68
69
|
"fs-extra": "^11.3.2",
|
|
69
70
|
"jsdom": "^27.2.0",
|
|
70
71
|
"kysely": "^0.28.8",
|