@tricoteuses/senat 2.20.17 → 2.20.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -19
- package/lib/git.d.ts +26 -0
- package/lib/git.js +167 -0
- package/lib/index.d.ts +1 -1
- package/lib/loaders.d.ts +3 -2
- package/lib/model/commission.d.ts +2 -2
- package/lib/model/commission.js +5 -4
- package/lib/model/seance.d.ts +2 -8
- package/lib/model/seance.js +28 -113
- package/lib/model/util.d.ts +0 -4
- package/lib/model/util.js +0 -38
- package/lib/scripts/convert_data.js +25 -1
- package/lib/scripts/retrieve_agenda.js +7 -18
- package/lib/scripts/retrieve_cr_commission.js +1 -10
- package/lib/scripts/retrieve_cr_seance.d.ts +1 -1
- package/lib/scripts/retrieve_cr_seance.js +183 -127
- package/lib/scripts/retrieve_videos.d.ts +1 -1
- package/lib/scripts/retrieve_videos.js +46 -92
- package/lib/scripts/shared/cli_helpers.d.ts +25 -3
- package/lib/scripts/shared/cli_helpers.js +28 -0
- package/lib/types/agenda.d.ts +5 -6
- package/lib/utils/cr_spliting.d.ts +2 -10
- package/lib/utils/cr_spliting.js +2 -119
- package/lib/utils/date.d.ts +10 -0
- package/lib/utils/date.js +100 -0
- package/lib/utils/reunion_odj_building.d.ts +2 -2
- package/lib/utils/reunion_odj_building.js +8 -12
- package/lib/utils/reunion_parsing.d.ts +23 -0
- package/lib/utils/reunion_parsing.js +209 -0
- package/lib/utils/scoring.d.ts +14 -0
- package/lib/utils/scoring.js +147 -0
- package/lib/utils/string_cleaning.d.ts +7 -0
- package/lib/utils/string_cleaning.js +57 -0
- package/package.json +1 -1
|
@@ -4,12 +4,14 @@ import commandLineArgs from "command-line-args";
|
|
|
4
4
|
import fs from "fs-extra";
|
|
5
5
|
import fsp from "fs/promises";
|
|
6
6
|
import path from "path";
|
|
7
|
+
import * as cheerio from "cheerio";
|
|
7
8
|
import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatAgendas } from "../loaders";
|
|
8
9
|
import { getSessionsFromStart } from "../types/sessions";
|
|
9
10
|
import { commonOptions } from "./shared/cli_helpers";
|
|
10
|
-
import { decodeHtmlEntities } from "../model/util";
|
|
11
|
-
import { DateTime } from "luxon";
|
|
12
11
|
import { getFirstInterventionStartTimecode } from "../utils/nvs-timecode";
|
|
12
|
+
import { decodeHtmlEntities } from "../utils/string_cleaning";
|
|
13
|
+
import { dice, normalize, scoreVideo } from "../utils/scoring";
|
|
14
|
+
import { epochToParisDateTime, toFRDate, toTargetEpoch } from "../utils/date";
|
|
13
15
|
// ===================== Constants =====================
|
|
14
16
|
const MATCH_THRESHOLD = 0.5;
|
|
15
17
|
const MAX_CANDIDATES = 15;
|
|
@@ -21,68 +23,6 @@ const SENAT_DATAS_ROOT = "https://videos.senat.fr/Datas/senat";
|
|
|
21
23
|
const optionsDefinitions = [...commonOptions];
|
|
22
24
|
const options = commandLineArgs(optionsDefinitions);
|
|
23
25
|
// ===================== Utils =====================
|
|
24
|
-
function normalize(s) {
|
|
25
|
-
return (s ?? "")
|
|
26
|
-
.toLowerCase()
|
|
27
|
-
.normalize("NFD")
|
|
28
|
-
.replace(/[\u0300-\u036f]/g, "")
|
|
29
|
-
.replace(/[^\p{L}\p{N}\s-]/gu, " ")
|
|
30
|
-
.replace(/\s+/g, " ")
|
|
31
|
-
.trim();
|
|
32
|
-
}
|
|
33
|
-
function tokens(s) {
|
|
34
|
-
return normalize(s).split(" ").filter(Boolean);
|
|
35
|
-
}
|
|
36
|
-
function dice(a, b) {
|
|
37
|
-
const A = new Set(tokens(a)), B = new Set(tokens(b));
|
|
38
|
-
if (!A.size || !B.size)
|
|
39
|
-
return 0;
|
|
40
|
-
let inter = 0;
|
|
41
|
-
for (const t of A)
|
|
42
|
-
if (B.has(t))
|
|
43
|
-
inter++;
|
|
44
|
-
return (2 * inter) / (A.size + B.size);
|
|
45
|
-
}
|
|
46
|
-
function epochToParisDateTime(epochSec) {
|
|
47
|
-
if (!Number.isFinite(epochSec))
|
|
48
|
-
return null;
|
|
49
|
-
const dUtc = new Date(epochSec * 1000);
|
|
50
|
-
// Offset heuristic (same logique que parisOffsetForDate)
|
|
51
|
-
const m = dUtc.getUTCMonth() + 1; // 1..12
|
|
52
|
-
const offsetHours = m >= 4 && m <= 10 ? 2 : 1;
|
|
53
|
-
const offsetStr = offsetHours === 2 ? "+02:00" : "+01:00";
|
|
54
|
-
// Applique l'offset pour obtenir la date/heure locales Paris
|
|
55
|
-
const localMs = dUtc.getTime() + offsetHours * 3600 * 1000;
|
|
56
|
-
const dl = new Date(localMs);
|
|
57
|
-
const yyyy = String(dl.getUTCFullYear());
|
|
58
|
-
const mm = String(dl.getUTCMonth() + 1).padStart(2, "0");
|
|
59
|
-
const dd = String(dl.getUTCDate()).padStart(2, "0");
|
|
60
|
-
const hh = String(dl.getUTCHours()).padStart(2, "0");
|
|
61
|
-
const mi = String(dl.getUTCMinutes()).padStart(2, "0");
|
|
62
|
-
const ss = String(dl.getUTCSeconds()).padStart(2, "0");
|
|
63
|
-
const ms = String(dl.getUTCMilliseconds()).padStart(3, "0");
|
|
64
|
-
return {
|
|
65
|
-
date: `${yyyy}-${mm}-${dd}`,
|
|
66
|
-
startTime: `${hh}:${mi}:${ss}.${ms}${offsetStr}`,
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
function toTargetEpoch(time, date) {
|
|
70
|
-
if (!time)
|
|
71
|
-
return null;
|
|
72
|
-
let dtLocal;
|
|
73
|
-
if (time.includes("T")) {
|
|
74
|
-
dtLocal = DateTime.fromISO(time, { zone: "Europe/Paris" });
|
|
75
|
-
}
|
|
76
|
-
else if (date) {
|
|
77
|
-
dtLocal = DateTime.fromISO(`${date}T${time}`, { zone: "Europe/Paris" });
|
|
78
|
-
}
|
|
79
|
-
else {
|
|
80
|
-
return null;
|
|
81
|
-
}
|
|
82
|
-
if (!dtLocal.isValid)
|
|
83
|
-
return null;
|
|
84
|
-
return Math.floor(dtLocal.toUTC().toSeconds());
|
|
85
|
-
}
|
|
86
26
|
async function fetchText(url) {
|
|
87
27
|
const res = await fetch(url);
|
|
88
28
|
if (!res.ok)
|
|
@@ -110,23 +50,23 @@ function queryString(obj) {
|
|
|
110
50
|
.map(([k, v]) => `${encodeURIComponent(k)}=${encodeURIComponent(v)}`)
|
|
111
51
|
.join("&");
|
|
112
52
|
}
|
|
113
|
-
function toFRDate(dateYYYYMMDD) {
|
|
114
|
-
const [y, m, d] = dateYYYYMMDD.split("-");
|
|
115
|
-
return `${d}/${m}/${y}`; // DD/MM/YYYY
|
|
116
|
-
}
|
|
117
53
|
function extractCandidatesFromSearchHtml(html) {
|
|
54
|
+
const $ = cheerio.load(html);
|
|
118
55
|
const out = [];
|
|
119
|
-
const re = /
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
const
|
|
56
|
+
const re = /video\.(\d+)_([a-z0-9]+)/i;
|
|
57
|
+
$('h3.card-title a.stretched-link[href*="video."]').each((_, a) => {
|
|
58
|
+
const href = $(a).attr("href") || "";
|
|
59
|
+
const m = href.match(re);
|
|
60
|
+
if (!m)
|
|
61
|
+
return;
|
|
62
|
+
const id = m[1];
|
|
63
|
+
const hash = m[2];
|
|
123
64
|
const pageUrl = `https://videos.senat.fr/video.${id}_${hash}.html`;
|
|
124
|
-
const
|
|
125
|
-
const t = ctx.match(/title="([^"]+)"/i) || ctx.match(/>([^<]{10,200})</);
|
|
126
|
-
const title = t?.[1]?.trim();
|
|
65
|
+
const title = ($(a).attr("title") || $(a).text() || "").replace(/\s+/g, " ").trim() || undefined;
|
|
127
66
|
const isSeancePublique = title?.toLowerCase().includes("séance publique") ?? false;
|
|
128
67
|
out.push({ id, hash, pageUrl, title, isSeancePublique });
|
|
129
|
-
}
|
|
68
|
+
});
|
|
69
|
+
// dedupe
|
|
130
70
|
const seen = new Set();
|
|
131
71
|
return out.filter((c) => {
|
|
132
72
|
const k = `${c.id}_${c.hash}`;
|
|
@@ -159,7 +99,7 @@ function parseDataNvs(nvs) {
|
|
|
159
99
|
const firstChapterLabel = firstChapterLabelMatch ? decodeHtmlEntities(firstChapterLabelMatch[1]).trim() : undefined;
|
|
160
100
|
return { epoch, organes, firstChapterLabel };
|
|
161
101
|
}
|
|
162
|
-
|
|
102
|
+
function buildSenatVodMasterM3u8FromNvs(nvsText) {
|
|
163
103
|
// serverfiles://senat/2025/10/encoder10_20251022084451_2.mp4
|
|
164
104
|
const m = nvsText.match(/serverfiles:\/\/senat\/(\d{4})\/(\d{2})\/(encoder\d+)_([0-9]{14})/i);
|
|
165
105
|
if (!m)
|
|
@@ -168,21 +108,31 @@ export function buildSenatVodMasterM3u8FromNvs(nvsText) {
|
|
|
168
108
|
const base = `https://vodsenat.akamaized.net/senat/${yyyy}/${mm}/${encoder}_${stamp}`;
|
|
169
109
|
return `${base}.smil/master.m3u8`;
|
|
170
110
|
}
|
|
171
|
-
function
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
const
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
orgScore = Math.max(...videoOrganes.map((v) => dice(agendaOrg, v)));
|
|
111
|
+
function isAmbiguousTimeOriginal(timeOriginal) {
|
|
112
|
+
if (!timeOriginal)
|
|
113
|
+
return false;
|
|
114
|
+
const s = timeOriginal.toLowerCase();
|
|
115
|
+
// Catches "14h", "14 h", "14h30", "14 h 30", "14 heures", "14 heure"
|
|
116
|
+
const timeRe = /\b([01]?\d|2[0-3])\s*(?:h|heures?|heure)\s*(?:([0-5]\d))?\b/g;
|
|
117
|
+
const times = new Set();
|
|
118
|
+
let m;
|
|
119
|
+
while ((m = timeRe.exec(s))) {
|
|
120
|
+
const hh = String(m[1]).padStart(2, "0");
|
|
121
|
+
const mm = m[2] ? String(m[2]).padStart(2, "0") : "00";
|
|
122
|
+
times.add(`${hh}:${mm}`);
|
|
184
123
|
}
|
|
185
|
-
|
|
124
|
+
// "midi" / "minuit"
|
|
125
|
+
if (/\bmidi\b/.test(s))
|
|
126
|
+
times.add("12:00");
|
|
127
|
+
if (/\bminuit\b/.test(s))
|
|
128
|
+
times.add("00:00");
|
|
129
|
+
if (times.size >= 2)
|
|
130
|
+
return true;
|
|
131
|
+
const hasDayPeriod = /\b(matin|après-?midi|soir|nuit|journée|toute la journée)\b/.test(s);
|
|
132
|
+
const hasLinking = /,|\bet\b|\bou\b|\bpuis\b/.test(s);
|
|
133
|
+
if (times.size === 1 && hasDayPeriod && hasLinking)
|
|
134
|
+
return true;
|
|
135
|
+
return false;
|
|
186
136
|
}
|
|
187
137
|
function getAgendaType(agenda) {
|
|
188
138
|
const o = agenda.organe || "";
|
|
@@ -310,6 +260,10 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
310
260
|
}
|
|
311
261
|
// ==== 2) Enrich via data.nvs + scoring; pick best ====
|
|
312
262
|
let best = null;
|
|
263
|
+
const timeAmbigious = isAmbiguousTimeOriginal(agenda.events[0].timeOriginal);
|
|
264
|
+
if (timeAmbigious) {
|
|
265
|
+
console.log(`[match] ${agenda.uid} timeOriginal ambiguous => ignoring time scoring: "${agenda.events[0].timeOriginal}"`);
|
|
266
|
+
}
|
|
313
267
|
for (const c of candidates) {
|
|
314
268
|
const dataUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`;
|
|
315
269
|
const finalUrl = `${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/finalplayer.nvs`;
|
|
@@ -346,7 +300,7 @@ async function processGroupedReunion(agenda, session, dataDir) {
|
|
|
346
300
|
if (c.isSeancePublique && meta.firstChapterLabel) {
|
|
347
301
|
videoTitle = meta.firstChapterLabel;
|
|
348
302
|
}
|
|
349
|
-
const s =
|
|
303
|
+
const s = scoreVideo(agenda, agendaTs, sameOrg, videoTitle, meta.epoch, meta.organes, timeAmbigious);
|
|
350
304
|
if (!best || s > best.score) {
|
|
351
305
|
best = {
|
|
352
306
|
id: c.id,
|
|
@@ -40,14 +40,31 @@ export declare const keepDirOption: {
|
|
|
40
40
|
name: string;
|
|
41
41
|
type: BooleanConstructor;
|
|
42
42
|
};
|
|
43
|
-
export declare const
|
|
43
|
+
export declare const cloneOption: {
|
|
44
|
+
alias: string;
|
|
45
|
+
help: string;
|
|
46
|
+
name: string;
|
|
47
|
+
type: StringConstructor;
|
|
48
|
+
};
|
|
49
|
+
export declare const commitOption: {
|
|
50
|
+
help: string;
|
|
51
|
+
name: string;
|
|
52
|
+
type: BooleanConstructor;
|
|
53
|
+
};
|
|
54
|
+
export declare const remoteOption: {
|
|
44
55
|
alias: string;
|
|
45
|
-
defaultValue: string[];
|
|
46
56
|
help: string;
|
|
47
57
|
multiple: boolean;
|
|
48
58
|
name: string;
|
|
49
59
|
type: StringConstructor;
|
|
50
|
-
}
|
|
60
|
+
};
|
|
61
|
+
export declare const pullOption: {
|
|
62
|
+
alias: string;
|
|
63
|
+
help: string;
|
|
64
|
+
name: string;
|
|
65
|
+
type: BooleanConstructor;
|
|
66
|
+
};
|
|
67
|
+
export declare const commonOptions: ({
|
|
51
68
|
defaultOption: boolean;
|
|
52
69
|
help: string;
|
|
53
70
|
name: string;
|
|
@@ -60,4 +77,9 @@ export declare const commonOptions: ({
|
|
|
60
77
|
help: string;
|
|
61
78
|
name: string;
|
|
62
79
|
type: BooleanConstructor;
|
|
80
|
+
} | {
|
|
81
|
+
alias: string;
|
|
82
|
+
help: string;
|
|
83
|
+
name: string;
|
|
84
|
+
type: StringConstructor;
|
|
63
85
|
})[];
|
|
@@ -40,6 +40,30 @@ export const keepDirOption = {
|
|
|
40
40
|
name: "keepDir",
|
|
41
41
|
type: Boolean,
|
|
42
42
|
};
|
|
43
|
+
export const cloneOption = {
|
|
44
|
+
alias: "C",
|
|
45
|
+
help: "clone repositories from given group (or organization) git URL",
|
|
46
|
+
name: "clone",
|
|
47
|
+
type: String,
|
|
48
|
+
};
|
|
49
|
+
export const commitOption = {
|
|
50
|
+
help: "commit clean files",
|
|
51
|
+
name: "commit",
|
|
52
|
+
type: Boolean,
|
|
53
|
+
};
|
|
54
|
+
export const remoteOption = {
|
|
55
|
+
alias: "r",
|
|
56
|
+
help: "push commit to given remote",
|
|
57
|
+
multiple: true,
|
|
58
|
+
name: "remote",
|
|
59
|
+
type: String,
|
|
60
|
+
};
|
|
61
|
+
export const pullOption = {
|
|
62
|
+
alias: "p",
|
|
63
|
+
help: "pull repositories before proceeding",
|
|
64
|
+
name: "pull",
|
|
65
|
+
type: Boolean,
|
|
66
|
+
};
|
|
43
67
|
export const commonOptions = [
|
|
44
68
|
categoriesOption,
|
|
45
69
|
dataDirDefaultOption,
|
|
@@ -48,4 +72,8 @@ export const commonOptions = [
|
|
|
48
72
|
verboseOption,
|
|
49
73
|
onlyRecentOption,
|
|
50
74
|
keepDirOption,
|
|
75
|
+
cloneOption,
|
|
76
|
+
commitOption,
|
|
77
|
+
remoteOption,
|
|
78
|
+
pullOption,
|
|
51
79
|
];
|
package/lib/types/agenda.d.ts
CHANGED
|
@@ -14,11 +14,10 @@ export interface AgendaEvent {
|
|
|
14
14
|
quantieme: string | null;
|
|
15
15
|
}
|
|
16
16
|
export type TimeSlot = "MATIN" | "APRES-MIDI" | "SOIR" | "UNKNOWN";
|
|
17
|
-
export interface
|
|
17
|
+
export interface Reunion {
|
|
18
18
|
uid: string;
|
|
19
19
|
chambre: "SN";
|
|
20
20
|
date: string;
|
|
21
|
-
slot?: TimeSlot;
|
|
22
21
|
startTime: string | null;
|
|
23
22
|
endTime: string | null;
|
|
24
23
|
captationVideo: boolean;
|
|
@@ -32,13 +31,13 @@ export interface GroupedReunion {
|
|
|
32
31
|
transcriptionRef?: string;
|
|
33
32
|
urlVideo?: string;
|
|
34
33
|
timecodeDebutVideo?: number;
|
|
35
|
-
odj?:
|
|
34
|
+
odj?: ReunionOdj;
|
|
36
35
|
}
|
|
37
|
-
export interface
|
|
36
|
+
export interface ReunionOdjPoint {
|
|
38
37
|
objet: string | null;
|
|
39
38
|
dossierLegislatifRef: string | null;
|
|
40
39
|
codeEtape: string | null;
|
|
41
40
|
}
|
|
42
|
-
export interface
|
|
43
|
-
pointsOdj:
|
|
41
|
+
export interface ReunionOdj {
|
|
42
|
+
pointsOdj: ReunionOdjPoint[];
|
|
44
43
|
}
|
|
@@ -1,11 +1,4 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import * as cheerio from "cheerio";
|
|
3
|
-
export declare function computeIntervalsBySlot($: cheerio.CheerioAPI, idx: Map<any, number>, firstSlotOfDay?: TimeSlot): {
|
|
4
|
-
slot: TimeSlot;
|
|
5
|
-
start: number;
|
|
6
|
-
end: number;
|
|
7
|
-
}[];
|
|
8
|
-
export declare function frDateToISO(s?: string): string | undefined;
|
|
1
|
+
import { Reunion } from "../types/agenda";
|
|
9
2
|
export declare function parseCommissionMetadataFromHtml(html: string, sourceFileName?: string): {
|
|
10
3
|
sourceFile: string | null;
|
|
11
4
|
organeTitleRaw: string | null;
|
|
@@ -18,8 +11,7 @@ export declare function parseCommissionMetadataFromHtml(html: string, sourceFile
|
|
|
18
11
|
h2Index: number;
|
|
19
12
|
}[];
|
|
20
13
|
};
|
|
21
|
-
export declare function loadAgendaForDate(dataDir: string, yyyymmdd: string, session: number): Promise<
|
|
22
|
-
export declare function hourShortToStartTime(hourShort: string | null): string | null;
|
|
14
|
+
export declare function loadAgendaForDate(dataDir: string, yyyymmdd: string, session: number): Promise<Reunion[]>;
|
|
23
15
|
export declare function linkCRtoCommissionGroup(opts: {
|
|
24
16
|
dataDir: string;
|
|
25
17
|
dateISO: string;
|
package/lib/utils/cr_spliting.js
CHANGED
|
@@ -3,76 +3,8 @@ import * as cheerio from "cheerio";
|
|
|
3
3
|
import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
4
4
|
import fs from "fs-extra";
|
|
5
5
|
import { sessionStartYearFromDate } from "../model/seance";
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
const cuts = [{ pos: 0, hhmm: undefined }];
|
|
9
|
-
$("a[name]").each((_, a) => {
|
|
10
|
-
const name = (a.attribs?.["name"] || "").trim();
|
|
11
|
-
if (!/^su/i.test(name))
|
|
12
|
-
return;
|
|
13
|
-
const pos = idx.get(a);
|
|
14
|
-
if (pos == null)
|
|
15
|
-
return;
|
|
16
|
-
const hhmm = hhmmFromSuName(name); // "SU1620" -> "1620"
|
|
17
|
-
cuts.push({ pos, hhmm });
|
|
18
|
-
});
|
|
19
|
-
cuts.sort((a, b) => a.pos - b.pos);
|
|
20
|
-
cuts.push({ pos: all.length, hhmm: undefined });
|
|
21
|
-
let initialSlot = firstSlotOfDay;
|
|
22
|
-
if (!initialSlot) {
|
|
23
|
-
const openHHMM = extractOpeningHHMM($);
|
|
24
|
-
if (openHHMM)
|
|
25
|
-
initialSlot = slotOfHHMM(openHHMM);
|
|
26
|
-
}
|
|
27
|
-
if (!initialSlot)
|
|
28
|
-
initialSlot = "MATIN";
|
|
29
|
-
const intervals = [];
|
|
30
|
-
let lastSlot = initialSlot;
|
|
31
|
-
for (let i = 0; i + 1 < cuts.length; i++) {
|
|
32
|
-
const start = cuts[i].pos;
|
|
33
|
-
const end = cuts[i + 1].pos;
|
|
34
|
-
if (end <= start)
|
|
35
|
-
continue;
|
|
36
|
-
// i=0 initialSlot
|
|
37
|
-
// i>0 : if current cut has SU -> slotOfHHMM, otherwise lastSlot
|
|
38
|
-
const slot = i === 0 ? initialSlot : cuts[i].hhmm ? slotOfHHMM(cuts[i].hhmm) : lastSlot;
|
|
39
|
-
intervals.push({ slot, start, end });
|
|
40
|
-
lastSlot = slot;
|
|
41
|
-
}
|
|
42
|
-
return intervals;
|
|
43
|
-
}
|
|
44
|
-
function hhmmFromSuName(name) {
|
|
45
|
-
const m = name.match(/^SU(\d{2})(\d{2})$/i);
|
|
46
|
-
if (!m)
|
|
47
|
-
return;
|
|
48
|
-
return `${m[1]}:${m[2]}`;
|
|
49
|
-
}
|
|
50
|
-
function slotOfHHMM(hhmm) {
|
|
51
|
-
if (!hhmm)
|
|
52
|
-
return "MATIN";
|
|
53
|
-
const [h, m] = hhmm.split(":").map(Number);
|
|
54
|
-
const v = h + m / 60;
|
|
55
|
-
if (v < 12)
|
|
56
|
-
return "MATIN";
|
|
57
|
-
if (v < 18.5)
|
|
58
|
-
return "APRES-MIDI";
|
|
59
|
-
return "SOIR";
|
|
60
|
-
}
|
|
61
|
-
// Looks for text like "(La séance est ouverte à quinze heures.)" and extracts "HH:MM"
|
|
62
|
-
function extractOpeningHHMM($) {
|
|
63
|
-
let txt = "";
|
|
64
|
-
$("span.info_entre_parentheses, .info_entre_parentheses").each((_, el) => {
|
|
65
|
-
const t = ($(el).text() || "").replace(/\s+/g, " ").trim();
|
|
66
|
-
if (!txt && /\bs[eé]ance est ouverte\b/i.test(t))
|
|
67
|
-
txt = t;
|
|
68
|
-
});
|
|
69
|
-
if (!txt)
|
|
70
|
-
return undefined;
|
|
71
|
-
const inner = txt.match(/\(.*?ouverte\s+à\s+([^)]+?)\)/i)?.[1];
|
|
72
|
-
if (!inner)
|
|
73
|
-
return undefined;
|
|
74
|
-
return parseFrenchClockToHHMM(inner);
|
|
75
|
-
}
|
|
6
|
+
import { frDateToISO, hourShortToStartTime } from "./date";
|
|
7
|
+
import { normalizeSpaces } from "./string_cleaning";
|
|
76
8
|
// Convert "quinze heures trente", "15 heures 30", "dix-sept heures moins le quart", etc. en "HHMM"
|
|
77
9
|
function parseFrenchClockToHHMM(input) {
|
|
78
10
|
const s = (input || "")
|
|
@@ -173,40 +105,6 @@ function parseFrenchClockToHHMM(input) {
|
|
|
173
105
|
}
|
|
174
106
|
return `${String(hour).padStart(2, "0")}${String(minutes).padStart(2, "0")}`;
|
|
175
107
|
}
|
|
176
|
-
export function frDateToISO(s) {
|
|
177
|
-
if (!s)
|
|
178
|
-
return;
|
|
179
|
-
const months = {
|
|
180
|
-
janvier: 1,
|
|
181
|
-
février: 2,
|
|
182
|
-
fevrier: 2,
|
|
183
|
-
mars: 3,
|
|
184
|
-
avril: 4,
|
|
185
|
-
mai: 5,
|
|
186
|
-
juin: 6,
|
|
187
|
-
juillet: 7,
|
|
188
|
-
août: 8,
|
|
189
|
-
aout: 8,
|
|
190
|
-
septembre: 9,
|
|
191
|
-
octobre: 10,
|
|
192
|
-
novembre: 11,
|
|
193
|
-
décembre: 12,
|
|
194
|
-
decembre: 12,
|
|
195
|
-
};
|
|
196
|
-
const cleaned = s
|
|
197
|
-
.trim()
|
|
198
|
-
.replace(/\u00A0/g, " ")
|
|
199
|
-
.replace(/ +/g, " ");
|
|
200
|
-
const m = cleaned.match(/^(\d{1,2})(?:er)?\s+([a-zéèêîïôûùç]+)\s+(\d{4})$/i);
|
|
201
|
-
if (!m)
|
|
202
|
-
return;
|
|
203
|
-
const d = String(parseInt(m[1], 10)).padStart(2, "0");
|
|
204
|
-
const mon = months[m[2].toLowerCase()];
|
|
205
|
-
if (!mon)
|
|
206
|
-
return;
|
|
207
|
-
const y = m[3];
|
|
208
|
-
return `${y}-${String(mon).padStart(2, "0")}-${d}`;
|
|
209
|
-
}
|
|
210
108
|
function extractWeekStartFromHead($) {
|
|
211
109
|
const og = $('meta[property="og:title"]').attr("content") || $("title").text();
|
|
212
110
|
const m = (og ?? "").toLowerCase().match(/semaine du\s+(\d{1,2}\s+\w+\s+\d{4})/i);
|
|
@@ -230,12 +128,6 @@ function detectOrganeFromTitle(s) {
|
|
|
230
128
|
}
|
|
231
129
|
return { organeTitleRaw: t, organeDetected };
|
|
232
130
|
}
|
|
233
|
-
function normalizeSpaces(s) {
|
|
234
|
-
return s
|
|
235
|
-
.replace(/\u00A0/g, " ")
|
|
236
|
-
.replace(/\s+/g, " ")
|
|
237
|
-
.trim();
|
|
238
|
-
}
|
|
239
131
|
function extractDaysAndOpenings($) {
|
|
240
132
|
const days = [];
|
|
241
133
|
const h2s = $("h2").toArray();
|
|
@@ -316,15 +208,6 @@ export async function loadAgendaForDate(dataDir, yyyymmdd, session) {
|
|
|
316
208
|
}
|
|
317
209
|
return out;
|
|
318
210
|
}
|
|
319
|
-
export function hourShortToStartTime(hourShort) {
|
|
320
|
-
if (!hourShort || hourShort === "NA")
|
|
321
|
-
return null;
|
|
322
|
-
if (!/^\d{4}$/.test(hourShort))
|
|
323
|
-
return null;
|
|
324
|
-
const hh = hourShort.slice(0, 2);
|
|
325
|
-
const mm = hourShort.slice(2, 4);
|
|
326
|
-
return `${hh}:${mm}`;
|
|
327
|
-
}
|
|
328
211
|
export async function linkCRtoCommissionGroup(opts) {
|
|
329
212
|
const { dataDir, dateISO, organeDetected, hourShort, crUid, titreGuess, groupUid } = opts;
|
|
330
213
|
const computedUid = crUid.replace(/^CRC/, "RU");
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export declare function yyyymmddFromPath(xmlFilePath: string): string;
|
|
2
|
+
export declare function parseYYYYMMDD(yyyymmdd: string): Date | null;
|
|
3
|
+
export declare function frDateToISO(s?: string): string | undefined;
|
|
4
|
+
export declare function hourShortToStartTime(hourShort: string | null): string | null;
|
|
5
|
+
export declare function epochToParisDateTime(epochSec: number): {
|
|
6
|
+
date: string;
|
|
7
|
+
startTime: string;
|
|
8
|
+
} | null;
|
|
9
|
+
export declare function toTargetEpoch(time: string | null, date?: string | null): number | null;
|
|
10
|
+
export declare function toFRDate(dateYYYYMMDD: string): string;
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { DateTime } from "luxon";
|
|
2
|
+
export function yyyymmddFromPath(xmlFilePath) {
|
|
3
|
+
return xmlFilePath.replace(/^.*?(\d{8}).*$/i, "$1");
|
|
4
|
+
}
|
|
5
|
+
export function parseYYYYMMDD(yyyymmdd) {
|
|
6
|
+
if (!/^\d{8}$/.test(yyyymmdd))
|
|
7
|
+
return null;
|
|
8
|
+
const y = Number(yyyymmdd.slice(0, 4));
|
|
9
|
+
const m = Number(yyyymmdd.slice(4, 6)) - 1;
|
|
10
|
+
const d = Number(yyyymmdd.slice(6, 8));
|
|
11
|
+
const dt = new Date(y, m, d);
|
|
12
|
+
return Number.isFinite(dt.getTime()) ? dt : null;
|
|
13
|
+
}
|
|
14
|
+
export function frDateToISO(s) {
|
|
15
|
+
if (!s)
|
|
16
|
+
return;
|
|
17
|
+
const months = {
|
|
18
|
+
janvier: 1,
|
|
19
|
+
février: 2,
|
|
20
|
+
fevrier: 2,
|
|
21
|
+
mars: 3,
|
|
22
|
+
avril: 4,
|
|
23
|
+
mai: 5,
|
|
24
|
+
juin: 6,
|
|
25
|
+
juillet: 7,
|
|
26
|
+
août: 8,
|
|
27
|
+
aout: 8,
|
|
28
|
+
septembre: 9,
|
|
29
|
+
octobre: 10,
|
|
30
|
+
novembre: 11,
|
|
31
|
+
décembre: 12,
|
|
32
|
+
decembre: 12,
|
|
33
|
+
};
|
|
34
|
+
const cleaned = s
|
|
35
|
+
.trim()
|
|
36
|
+
.replace(/\u00A0/g, " ")
|
|
37
|
+
.replace(/ +/g, " ");
|
|
38
|
+
const m = cleaned.match(/^(\d{1,2})(?:er)?\s+([a-zéèêîïôûùç]+)\s+(\d{4})$/i);
|
|
39
|
+
if (!m)
|
|
40
|
+
return;
|
|
41
|
+
const d = String(parseInt(m[1], 10)).padStart(2, "0");
|
|
42
|
+
const mon = months[m[2].toLowerCase()];
|
|
43
|
+
if (!mon)
|
|
44
|
+
return;
|
|
45
|
+
const y = m[3];
|
|
46
|
+
return `${y}-${String(mon).padStart(2, "0")}-${d}`;
|
|
47
|
+
}
|
|
48
|
+
export function hourShortToStartTime(hourShort) {
|
|
49
|
+
if (!hourShort || hourShort === "NA")
|
|
50
|
+
return null;
|
|
51
|
+
if (!/^\d{4}$/.test(hourShort))
|
|
52
|
+
return null;
|
|
53
|
+
const hh = hourShort.slice(0, 2);
|
|
54
|
+
const mm = hourShort.slice(2, 4);
|
|
55
|
+
return `${hh}:${mm}`;
|
|
56
|
+
}
|
|
57
|
+
export function epochToParisDateTime(epochSec) {
|
|
58
|
+
if (!Number.isFinite(epochSec))
|
|
59
|
+
return null;
|
|
60
|
+
const dUtc = new Date(epochSec * 1000);
|
|
61
|
+
// Offset heuristic (same logique que parisOffsetForDate)
|
|
62
|
+
const m = dUtc.getUTCMonth() + 1; // 1..12
|
|
63
|
+
const offsetHours = m >= 4 && m <= 10 ? 2 : 1;
|
|
64
|
+
const offsetStr = offsetHours === 2 ? "+02:00" : "+01:00";
|
|
65
|
+
// Applique l'offset pour obtenir la date/heure locales Paris
|
|
66
|
+
const localMs = dUtc.getTime() + offsetHours * 3600 * 1000;
|
|
67
|
+
const dl = new Date(localMs);
|
|
68
|
+
const yyyy = String(dl.getUTCFullYear());
|
|
69
|
+
const mm = String(dl.getUTCMonth() + 1).padStart(2, "0");
|
|
70
|
+
const dd = String(dl.getUTCDate()).padStart(2, "0");
|
|
71
|
+
const hh = String(dl.getUTCHours()).padStart(2, "0");
|
|
72
|
+
const mi = String(dl.getUTCMinutes()).padStart(2, "0");
|
|
73
|
+
const ss = String(dl.getUTCSeconds()).padStart(2, "0");
|
|
74
|
+
const ms = String(dl.getUTCMilliseconds()).padStart(3, "0");
|
|
75
|
+
return {
|
|
76
|
+
date: `${yyyy}-${mm}-${dd}`,
|
|
77
|
+
startTime: `${hh}:${mi}:${ss}.${ms}${offsetStr}`,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
export function toTargetEpoch(time, date) {
|
|
81
|
+
if (!time)
|
|
82
|
+
return null;
|
|
83
|
+
let dtLocal;
|
|
84
|
+
if (time.includes("T")) {
|
|
85
|
+
dtLocal = DateTime.fromISO(time, { zone: "Europe/Paris" });
|
|
86
|
+
}
|
|
87
|
+
else if (date) {
|
|
88
|
+
dtLocal = DateTime.fromISO(`${date}T${time}`, { zone: "Europe/Paris" });
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
if (!dtLocal.isValid)
|
|
94
|
+
return null;
|
|
95
|
+
return Math.floor(dtLocal.toUTC().toSeconds());
|
|
96
|
+
}
|
|
97
|
+
export function toFRDate(dateYYYYMMDD) {
|
|
98
|
+
const [y, m, d] = dateYYYYMMDD.split("-");
|
|
99
|
+
return `${d}/${m}/${y}`; // DD/MM/YYYY
|
|
100
|
+
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import commandLineArgs from "command-line-args";
|
|
2
2
|
import { DossierLegislatifResult } from "../model/dosleg";
|
|
3
|
-
import { AgendaEvent,
|
|
4
|
-
export declare function buildOdj(events: AgendaEvent[], dossierBySenatUrl: Record<string, DossierLegislatifResult>):
|
|
3
|
+
import { AgendaEvent, ReunionOdj } from "../types/agenda";
|
|
4
|
+
export declare function buildOdj(events: AgendaEvent[], dossierBySenatUrl: Record<string, DossierLegislatifResult>): ReunionOdj | undefined;
|
|
5
5
|
export declare function buildSenatDossierIndex(options: commandLineArgs.CommandLineOptions): Record<string, DossierLegislatifResult>;
|
|
@@ -7,12 +7,8 @@ export function buildOdj(events, dossierBySenatUrl) {
|
|
|
7
7
|
for (const ev of events) {
|
|
8
8
|
const objetKey = (ev.objet ?? "").trim();
|
|
9
9
|
const url = normalizeSenatUrl(ev.urlDossierSenat) ?? undefined;
|
|
10
|
-
if (url)
|
|
11
|
-
console.log(` urlDossierSenat: ${url}`);
|
|
12
10
|
dossier = url ? dossierBySenatUrl[url] : null;
|
|
13
11
|
const dossierUid = dossier ? pickDossierUid(dossier) : undefined;
|
|
14
|
-
if (url)
|
|
15
|
-
console.log(` → matched dossier uid: ${dossierUid}`);
|
|
16
12
|
codeEtape = dossier ? computeCodeEtape(ev, dossier) : null;
|
|
17
13
|
// si on n’a ni objet ni dossier, ça ne sert à rien de créer un point
|
|
18
14
|
if (!objetKey && !dossierUid)
|
|
@@ -94,36 +90,36 @@ function computeCodeEtape(ev, dossier) {
|
|
|
94
90
|
return false;
|
|
95
91
|
return true;
|
|
96
92
|
});
|
|
97
|
-
console.log(` → candidats STRICT (date==${evDate} & nature=${nature || "ANY"}): ${candidates.length}`)
|
|
98
|
-
candidates.forEach((c) => console.log(` STRICT MATCH: ${c.codeActe} (date=${c.date}, lecture=${c.ordreLecture})`))
|
|
93
|
+
// console.log(` → candidats STRICT (date==${evDate} & nature=${nature || "ANY"}): ${candidates.length}`)
|
|
94
|
+
// candidates.forEach((c) => console.log(` STRICT MATCH: ${c.codeActe} (date=${c.date}, lecture=${c.ordreLecture})`))
|
|
99
95
|
// Si lecture détectée → on filtre si ça garde des candidats
|
|
100
96
|
if (lecture !== undefined && candidates.length > 0) {
|
|
101
97
|
const withLecture = candidates.filter((c) => c.ordreLecture === lecture);
|
|
102
98
|
if (withLecture.length > 0) {
|
|
103
|
-
console.log(` → filtre lecture=${lecture} : ${withLecture.length} candidats`)
|
|
99
|
+
// console.log(` → filtre lecture=${lecture} : ${withLecture.length} candidats`)
|
|
104
100
|
candidates = withLecture;
|
|
105
101
|
}
|
|
106
102
|
}
|
|
107
103
|
// Multiple candidates : we take the longest ?
|
|
108
104
|
if (candidates.length > 0) {
|
|
109
105
|
candidates.sort((a, b) => b.codeActe.length - a.codeActe.length);
|
|
110
|
-
console.log(` ✔ match FINAL (intervalle) : ${candidates[0].codeActe}`)
|
|
106
|
+
// console.log(` ✔ match FINAL (intervalle) : ${candidates[0].codeActe}`)
|
|
111
107
|
return candidates[0].codeActe;
|
|
112
108
|
}
|
|
113
109
|
// 2) fallback COM : dernier acte COM avant la date
|
|
114
110
|
if (nature === "COM") {
|
|
115
111
|
let comActs = flat.filter((a) => a.codeActe.includes("COM") && a.date <= evDate);
|
|
116
|
-
console.log(` → fallback COM: actes COM <= date : ${comActs.length}`)
|
|
112
|
+
// console.log(` → fallback COM: actes COM <= date : ${comActs.length}`)
|
|
117
113
|
if (lecture !== undefined) {
|
|
118
114
|
const byLecture = comActs.filter((a) => a.ordreLecture === lecture);
|
|
119
115
|
if (byLecture.length > 0) {
|
|
120
116
|
comActs = byLecture;
|
|
121
|
-
console.log(` → filtrés lecture=${lecture}: ${comActs.length}`)
|
|
117
|
+
// console.log(` → filtrés lecture=${lecture}: ${comActs.length}`)
|
|
122
118
|
}
|
|
123
119
|
}
|
|
124
120
|
if (comActs.length > 0) {
|
|
125
121
|
comActs.sort((a, b) => b.date.localeCompare(a.date) || b.codeActe.length - a.codeActe.length);
|
|
126
|
-
console.log(` ✔ match FINAL (fallback COM): ${comActs[0].codeActe}`)
|
|
122
|
+
// console.log(` ✔ match FINAL (fallback COM): ${comActs[0].codeActe}`)
|
|
127
123
|
return comActs[0].codeActe;
|
|
128
124
|
}
|
|
129
125
|
}
|
|
@@ -133,7 +129,7 @@ function computeCodeEtape(ev, dossier) {
|
|
|
133
129
|
const lectureNode = lectures.find((l) => l.ordre_lecture === lecture);
|
|
134
130
|
const rootCode = lectureNode?.code_acte;
|
|
135
131
|
if (rootCode && typeof rootCode === "string") {
|
|
136
|
-
console.log(` ✔ FALLBACK LECTURE: ${rootCode}`)
|
|
132
|
+
// console.log(` ✔ FALLBACK LECTURE: ${rootCode}`)
|
|
137
133
|
return rootCode;
|
|
138
134
|
}
|
|
139
135
|
}
|