@tricoteuses/senat 2.16.7 → 2.18.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/loaders.js +9 -2
- package/lib/model/ameli.js +5 -12
- package/lib/scripts/retrieve_videos.js +42 -22
- package/package.json +1 -1
package/lib/loaders.js
CHANGED
|
@@ -42,8 +42,15 @@ function* iterLoadSenatItems(dataDir, dataName, legislatureOrSession, subDir, {
|
|
|
42
42
|
if (log) {
|
|
43
43
|
console.log(`Loading file: ${filePath}…`);
|
|
44
44
|
}
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
let item;
|
|
46
|
+
try {
|
|
47
|
+
const itemJson = fs.readFileSync(filePath, { encoding: "utf8" });
|
|
48
|
+
item = JSON.parse(itemJson);
|
|
49
|
+
}
|
|
50
|
+
catch (err) {
|
|
51
|
+
console.warn(`[iterLoadSenatItems] skipped invalid JSON: ${filePath} (${err.message})`);
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
47
54
|
const filePathFromDataset = filePath.substring(filePath.indexOf(dataName) + dataName.length);
|
|
48
55
|
yield {
|
|
49
56
|
item,
|
package/lib/model/ameli.js
CHANGED
|
@@ -114,20 +114,13 @@ const findAllAmendementsQuery = dbSenat
|
|
|
114
114
|
concat(val("https://www.senat.fr/amendements/"), ref("ameli.ses.lil"), val("/"), ref("ameli.txt_ameli.numabs"), val("/Amdt_"), ref("ameli.amd.numabs"), val(".html")).as("url"),
|
|
115
115
|
"ameli.grppol_ameli.lilcou as au_nom_de_groupe_politique",
|
|
116
116
|
"ameli.com_ameli.lil as au_nom_de_commission",
|
|
117
|
-
eb
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
.else(false)
|
|
122
|
-
.end()
|
|
123
|
-
.as("auteur_est_gouvernement"),
|
|
124
|
-
auteurs(ref("ameli.amd.id")).as("auteurs")
|
|
125
|
-
]);
|
|
117
|
+
eb.case().when("ameli.cab.entid", "is not", null).then(true).else(false).end().as("auteur_est_gouvernement"),
|
|
118
|
+
auteurs(ref("ameli.amd.id")).as("auteurs"),
|
|
119
|
+
])
|
|
120
|
+
.distinctOn("ameli.amd.id");
|
|
126
121
|
export function findAllAmendements(fromSession) {
|
|
127
122
|
if (fromSession !== undefined) {
|
|
128
|
-
return findAllAmendementsQuery
|
|
129
|
-
.where("ameli.ses.ann", ">=", fromSession)
|
|
130
|
-
.stream();
|
|
123
|
+
return findAllAmendementsQuery.where("ameli.ses.ann", ">=", fromSession).stream();
|
|
131
124
|
}
|
|
132
125
|
return findAllAmendementsQuery.stream();
|
|
133
126
|
}
|
|
@@ -138,15 +138,25 @@ function parseFinalNvs(nvs) {
|
|
|
138
138
|
function parseDataNvs(nvs) {
|
|
139
139
|
const epochStr = nvs.match(/<metadata\s+name="date"\s+value="(\d+)"/i)?.[1];
|
|
140
140
|
const epoch = epochStr ? Number(epochStr) : undefined;
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
141
|
+
// There can be multiple organes for one video in meta
|
|
142
|
+
const organes = [];
|
|
143
|
+
const organesRegex = /<metadata\b[^>]*\bname="organes"[^>]*>/gi;
|
|
144
|
+
let m;
|
|
145
|
+
while ((m = organesRegex.exec(nvs)) !== null) {
|
|
146
|
+
const tag = m[0];
|
|
147
|
+
const label = tag.match(/\blabel="([^"]+)"/i)?.[1];
|
|
148
|
+
if (label) {
|
|
149
|
+
const decoded = decodeHtmlEntities(label).trim();
|
|
150
|
+
if (decoded)
|
|
151
|
+
organes.push(decoded);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
if (organes.length === 0) {
|
|
155
|
+
organes.push("Séance publique");
|
|
145
156
|
}
|
|
146
|
-
const
|
|
147
|
-
const
|
|
148
|
-
|
|
149
|
-
return { epoch, organe, firstChapterLabel };
|
|
157
|
+
const firstChapterLabelMatch = nvs.match(/<chapter\b[^>]*\blabel="([^"]+)"/i);
|
|
158
|
+
const firstChapterLabel = firstChapterLabelMatch ? decodeHtmlEntities(firstChapterLabelMatch[1]).trim() : undefined;
|
|
159
|
+
return { epoch, organes, firstChapterLabel };
|
|
150
160
|
}
|
|
151
161
|
function buildSenatVodMasterM3u8FromNvs(nvsText, finalText) {
|
|
152
162
|
// 1) Base Akamai from data.nvs (mp4 "serverfiles://senat/YYYY/MM/encoderX_YYYYMMDDHHMMSS_1.mp4")
|
|
@@ -182,18 +192,20 @@ function buildSenatVodMasterM3u8FromNvs(nvsText, finalText) {
|
|
|
182
192
|
}
|
|
183
193
|
return `${base}.smil/master.m3u8`;
|
|
184
194
|
}
|
|
185
|
-
function score(agenda, agendaTs, sameOrg, videoTitle, videoEpoch,
|
|
195
|
+
function score(agenda, agendaTs, sameOrg, videoTitle, videoEpoch, videoOrganes) {
|
|
186
196
|
const objetS = dice(agenda.objet || "", videoTitle || "");
|
|
187
197
|
const titleS = dice(agenda.titre || "", videoTitle || "");
|
|
188
198
|
const titleScore = Math.max(objetS, titleS);
|
|
189
199
|
let timeScore = 0;
|
|
190
200
|
if (agendaTs && videoEpoch) {
|
|
191
|
-
// second
|
|
192
201
|
const deltaMin = Math.abs(videoEpoch - agendaTs) / 60;
|
|
193
|
-
// delta : 60min
|
|
194
202
|
timeScore = Math.exp(-deltaMin / 60);
|
|
195
203
|
}
|
|
196
|
-
|
|
204
|
+
let orgScore = 0;
|
|
205
|
+
if (agenda.organe && videoOrganes && videoOrganes.length) {
|
|
206
|
+
const agendaOrg = agenda.organe;
|
|
207
|
+
orgScore = Math.max(...videoOrganes.map((v) => dice(agendaOrg, v)));
|
|
208
|
+
}
|
|
197
209
|
return 0.2 * titleScore + 0.4 * timeScore + (sameOrg ? 0.4 : orgScore * 0.4);
|
|
198
210
|
}
|
|
199
211
|
function getAgendaType(agenda) {
|
|
@@ -319,17 +331,25 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
319
331
|
const videoEpoch = sessionStart ?? meta.epoch;
|
|
320
332
|
let sameOrg = false;
|
|
321
333
|
// If organes are too different, go to next candidates
|
|
322
|
-
if (
|
|
323
|
-
const videoOrgNorm = normalize(meta.organe);
|
|
334
|
+
if (agenda.organe && meta.organes?.length) {
|
|
324
335
|
const agendaOrgNorm = normalize(agenda.organe);
|
|
325
|
-
const videoKey = getOrgKey(videoOrgNorm);
|
|
326
336
|
const agendaKey = getOrgKey(agendaOrgNorm);
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
337
|
+
let bestDice = 0;
|
|
338
|
+
let hasSameKey = false;
|
|
339
|
+
for (const vo of meta.organes) {
|
|
340
|
+
const videoOrgNorm = normalize(vo);
|
|
341
|
+
const videoKey = getOrgKey(videoOrgNorm);
|
|
342
|
+
const d = dice(agendaOrgNorm, videoOrgNorm);
|
|
343
|
+
if (videoKey === agendaKey && videoKey !== "autre") {
|
|
344
|
+
hasSameKey = true;
|
|
345
|
+
}
|
|
346
|
+
if (d > bestDice)
|
|
347
|
+
bestDice = d;
|
|
348
|
+
}
|
|
349
|
+
if (hasSameKey) {
|
|
350
|
+
sameOrg = true; // we are sure this is the same org
|
|
331
351
|
}
|
|
332
|
-
else if (
|
|
352
|
+
else if (bestDice < 0.7) {
|
|
333
353
|
// if diff org and dice too low we skip
|
|
334
354
|
continue;
|
|
335
355
|
}
|
|
@@ -338,7 +358,7 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
338
358
|
if (c.isSeancePublique && meta.firstChapterLabel) {
|
|
339
359
|
videoTitle = meta.firstChapterLabel;
|
|
340
360
|
}
|
|
341
|
-
const s = score(agenda, agendaTs, sameOrg, videoTitle, videoEpoch, meta.
|
|
361
|
+
const s = score(agenda, agendaTs, sameOrg, videoTitle, videoEpoch, meta.organes);
|
|
342
362
|
if (!best || s > best.score) {
|
|
343
363
|
best = {
|
|
344
364
|
id: c.id,
|
|
@@ -347,7 +367,7 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
347
367
|
epoch: meta.epoch,
|
|
348
368
|
vtitle: videoTitle,
|
|
349
369
|
score: s,
|
|
350
|
-
vorgane: meta.
|
|
370
|
+
vorgane: meta.organes[0],
|
|
351
371
|
};
|
|
352
372
|
}
|
|
353
373
|
}
|