@tricoteuses/senat 2.16.8 → 2.18.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@ import { commonOptions } from "./shared/cli_helpers";
10
10
  import { decodeHtmlEntities } from "../model/util";
11
11
  import { DateTime } from "luxon";
12
12
  // ===================== Constants =====================
13
- const MATCH_THRESHOLD = 0.6;
13
+ const MATCH_THRESHOLD = 0.5;
14
14
  const MAX_CANDIDATES = 15;
15
15
  const STATS = { total: 0, accepted: 0 };
16
16
  const VIDEOS_ROOT_FOLDER = "videos";
@@ -138,15 +138,25 @@ function parseFinalNvs(nvs) {
138
138
  function parseDataNvs(nvs) {
139
139
  const epochStr = nvs.match(/<metadata\s+name="date"\s+value="(\d+)"/i)?.[1];
140
140
  const epoch = epochStr ? Number(epochStr) : undefined;
141
- const organesTag = nvs.match(/<metadata\b[^>]*\bname="organes"[^>]*>/i)?.[0];
142
- let organeLabel;
143
- if (organesTag) {
144
- organeLabel = organesTag.match(/\blabel="([^"]+)"/i)?.[1];
141
+ // There can be multiple organes for one video in meta
142
+ const organes = [];
143
+ const organesRegex = /<metadata\b[^>]*\bname="organes"[^>]*>/gi;
144
+ let m;
145
+ while ((m = organesRegex.exec(nvs)) !== null) {
146
+ const tag = m[0];
147
+ const label = tag.match(/\blabel="([^"]+)"/i)?.[1];
148
+ if (label) {
149
+ const decoded = decodeHtmlEntities(label).trim();
150
+ if (decoded)
151
+ organes.push(decoded);
152
+ }
153
+ }
154
+ if (organes.length === 0) {
155
+ organes.push("Séance publique");
145
156
  }
146
- const organeRaw = organeLabel ?? "Séance publique";
147
- const organe = decodeHtmlEntities(organeRaw)?.trim();
148
- const firstChapterLabel = decodeHtmlEntities(nvs.match(/<chapter\b[^>]*\blabel="([^"]+)"/i)[1]).trim();
149
- return { epoch, organe, firstChapterLabel };
157
+ const firstChapterLabelMatch = nvs.match(/<chapter\b[^>]*\blabel="([^"]+)"/i);
158
+ const firstChapterLabel = firstChapterLabelMatch ? decodeHtmlEntities(firstChapterLabelMatch[1]).trim() : undefined;
159
+ return { epoch, organes, firstChapterLabel };
150
160
  }
151
161
  function buildSenatVodMasterM3u8FromNvs(nvsText, finalText) {
152
162
  // 1) Base Akamai from data.nvs (mp4 "serverfiles://senat/YYYY/MM/encoderX_YYYYMMDDHHMMSS_1.mp4")
@@ -182,18 +192,20 @@ function buildSenatVodMasterM3u8FromNvs(nvsText, finalText) {
182
192
  }
183
193
  return `${base}.smil/master.m3u8`;
184
194
  }
185
- function score(agenda, agendaTs, sameOrg, videoTitle, videoEpoch, videoOrgane) {
195
+ function score(agenda, agendaTs, sameOrg, videoTitle, videoEpoch, videoOrganes) {
186
196
  const objetS = dice(agenda.objet || "", videoTitle || "");
187
197
  const titleS = dice(agenda.titre || "", videoTitle || "");
188
198
  const titleScore = Math.max(objetS, titleS);
189
199
  let timeScore = 0;
190
200
  if (agendaTs && videoEpoch) {
191
- // second
192
201
  const deltaMin = Math.abs(videoEpoch - agendaTs) / 60;
193
- // delta : 60min
194
202
  timeScore = Math.exp(-deltaMin / 60);
195
203
  }
196
- const orgScore = videoOrgane && agenda.organe ? dice(agenda.organe, videoOrgane) : 0;
204
+ let orgScore = 0;
205
+ if (agenda.organe && videoOrganes && videoOrganes.length) {
206
+ const agendaOrg = agenda.organe;
207
+ orgScore = Math.max(...videoOrganes.map((v) => dice(agendaOrg, v)));
208
+ }
197
209
  return 0.2 * titleScore + 0.4 * timeScore + (sameOrg ? 0.4 : orgScore * 0.4);
198
210
  }
199
211
  function getAgendaType(agenda) {
@@ -316,20 +328,28 @@ async function processGroupedReunion(agenda, session, dataDir) {
316
328
  const finalMeta = parseFinalNvs(finalBuf.toString("utf-8"));
317
329
  sessionStart = finalMeta.sessionStart;
318
330
  }
319
- const videoEpoch = sessionStart ?? meta.epoch;
331
+ const videoEpoch = meta.epoch ?? sessionStart;
320
332
  let sameOrg = false;
321
333
  // If organes are too different, go to next candidates
322
- if (meta.organe && agenda.organe) {
323
- const videoOrgNorm = normalize(meta.organe);
334
+ if (agenda.organe && meta.organes?.length) {
324
335
  const agendaOrgNorm = normalize(agenda.organe);
325
- const videoKey = getOrgKey(videoOrgNorm);
326
336
  const agendaKey = getOrgKey(agendaOrgNorm);
327
- const d = dice(agendaOrgNorm, videoOrgNorm);
328
- if (videoKey === agendaKey && videoKey !== "autre") {
329
- // same org we keep it
330
- sameOrg = true;
337
+ let bestDice = 0;
338
+ let hasSameKey = false;
339
+ for (const vo of meta.organes) {
340
+ const videoOrgNorm = normalize(vo);
341
+ const videoKey = getOrgKey(videoOrgNorm);
342
+ const d = dice(agendaOrgNorm, videoOrgNorm);
343
+ if (videoKey === agendaKey && videoKey !== "autre") {
344
+ hasSameKey = true;
345
+ }
346
+ if (d > bestDice)
347
+ bestDice = d;
348
+ }
349
+ if (hasSameKey) {
350
+ sameOrg = true; // we are sure this is the same org
331
351
  }
332
- else if (d < 0.7) {
352
+ else if (bestDice < 0.8) {
333
353
  // if diff org and dice too low we skip
334
354
  continue;
335
355
  }
@@ -338,7 +358,7 @@ async function processGroupedReunion(agenda, session, dataDir) {
338
358
  if (c.isSeancePublique && meta.firstChapterLabel) {
339
359
  videoTitle = meta.firstChapterLabel;
340
360
  }
341
- const s = score(agenda, agendaTs, sameOrg, videoTitle, videoEpoch, meta.organe);
361
+ const s = score(agenda, agendaTs, sameOrg, videoTitle, videoEpoch, meta.organes);
342
362
  if (!best || s > best.score) {
343
363
  best = {
344
364
  id: c.id,
@@ -347,7 +367,7 @@ async function processGroupedReunion(agenda, session, dataDir) {
347
367
  epoch: meta.epoch,
348
368
  vtitle: videoTitle,
349
369
  score: s,
350
- vorgane: meta.organe,
370
+ vorgane: meta.organes[0],
351
371
  };
352
372
  }
353
373
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.16.8",
3
+ "version": "2.18.10",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",