@tricoteuses/assemblee 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/amendements-CN7bRFdP.js +259 -0
- package/lib/amendements-CN7bRFdP.js.map +1 -0
- package/lib/amendements-Cg1lyCBp.js +735 -0
- package/lib/amendements-Cg1lyCBp.js.map +1 -0
- package/lib/api.d.ts +1 -0
- package/lib/api.d.ts.map +1 -0
- package/lib/cleaners/actes_legislatifs.d.ts +1 -0
- package/lib/cleaners/actes_legislatifs.d.ts.map +1 -0
- package/lib/cleaners/acteurs.d.ts +1 -0
- package/lib/cleaners/acteurs.d.ts.map +1 -0
- package/lib/cleaners/amendements.d.ts +1 -0
- package/lib/cleaners/amendements.d.ts.map +1 -0
- package/lib/cleaners/debats.d.ts +1 -0
- package/lib/cleaners/debats.d.ts.map +1 -0
- package/lib/cleaners/documents.d.ts +1 -0
- package/lib/cleaners/documents.d.ts.map +1 -0
- package/lib/cleaners/dossiers_legislatifs.d.ts +1 -0
- package/lib/cleaners/dossiers_legislatifs.d.ts.map +1 -0
- package/lib/cleaners/index.d.ts +1 -0
- package/lib/cleaners/index.d.ts.map +1 -0
- package/lib/cleaners/organes.d.ts +1 -0
- package/lib/cleaners/organes.d.ts.map +1 -0
- package/lib/cleaners/questions.d.ts +1 -0
- package/lib/cleaners/questions.d.ts.map +1 -0
- package/lib/cleaners/reunions.d.ts +1 -0
- package/lib/cleaners/reunions.d.ts.map +1 -0
- package/lib/cleaners/scrutins.d.ts +1 -0
- package/lib/cleaners/scrutins.d.ts.map +1 -0
- package/lib/cleaners/xml.d.ts +1 -0
- package/lib/cleaners/xml.d.ts.map +1 -0
- package/lib/cleaners.js +1082 -508
- package/lib/cleaners.js.map +1 -0
- package/lib/datasets.d.ts +1 -0
- package/lib/datasets.d.ts.map +1 -0
- package/lib/dates.d.ts +1 -0
- package/lib/dates.d.ts.map +1 -0
- package/lib/debats-BwZYgzXe.js +3978 -0
- package/lib/debats-BwZYgzXe.js.map +1 -0
- package/lib/dossiers_legislatifs.d.ts +1 -0
- package/lib/dossiers_legislatifs.d.ts.map +1 -0
- package/lib/file_systems.d.ts +1 -0
- package/lib/file_systems.d.ts.map +1 -0
- package/lib/git.d.ts +1 -0
- package/lib/git.d.ts.map +1 -0
- package/lib/index.d.ts +1 -0
- package/lib/index.d.ts.map +1 -0
- package/lib/index.js +1211 -808
- package/lib/index.js.map +1 -0
- package/lib/inserters.d.ts +1 -0
- package/lib/inserters.d.ts.map +1 -0
- package/lib/loaders.d.ts +1 -0
- package/lib/loaders.d.ts.map +1 -0
- package/lib/loaders.js +983 -27
- package/lib/loaders.js.map +1 -0
- package/lib/logger.d.ts +1 -0
- package/lib/logger.d.ts.map +1 -0
- package/lib/organes.d.ts +1 -0
- package/lib/organes.d.ts.map +1 -0
- package/lib/parse-CzW8NHW5.js +2850 -0
- package/lib/parse-CzW8NHW5.js.map +1 -0
- package/lib/parsers/index.d.ts +1 -0
- package/lib/parsers/index.d.ts.map +1 -0
- package/lib/parsers/plf.d.ts +1 -0
- package/lib/parsers/plf.d.ts.map +1 -0
- package/lib/parsers/recherche_amendements.d.ts +1 -0
- package/lib/parsers/recherche_amendements.d.ts.map +1 -0
- package/lib/parsers/textes_lois.d.ts +1 -0
- package/lib/parsers/textes_lois.d.ts.map +1 -0
- package/lib/parsers.js +287 -292
- package/lib/parsers.js.map +1 -0
- package/lib/raw_types/acteurs_et_organes.d.ts +1 -0
- package/lib/raw_types/acteurs_et_organes.d.ts.map +1 -0
- package/lib/raw_types/agendas.d.ts +1 -0
- package/lib/raw_types/agendas.d.ts.map +1 -0
- package/lib/raw_types/amendements.d.ts +1 -0
- package/lib/raw_types/amendements.d.ts.map +1 -0
- package/lib/raw_types/debats.d.ts +1 -0
- package/lib/raw_types/debats.d.ts.map +1 -0
- package/lib/raw_types/dossiers_legislatifs.d.ts +1 -0
- package/lib/raw_types/dossiers_legislatifs.d.ts.map +1 -0
- package/lib/raw_types/questions.d.ts +1 -0
- package/lib/raw_types/questions.d.ts.map +1 -0
- package/lib/raw_types/scrutins.d.ts +1 -0
- package/lib/raw_types/scrutins.d.ts.map +1 -0
- package/lib/schemas/acteurs_et_organes.d.json +982 -0
- package/lib/schemas/agendas.d.json +1561 -0
- package/lib/schemas/amendements.d.json +1901 -0
- package/lib/schemas/debats.d.json +623 -0
- package/lib/schemas/dossiers_legislatifs.d.json +3690 -0
- package/lib/schemas/legislatures.d.json +17 -0
- package/lib/schemas/questions.d.json +520 -0
- package/lib/schemas/scrutins.d.json +517 -0
- package/lib/scripts/add_links_to_documents.d.ts +1 -0
- package/lib/scripts/add_links_to_documents.d.ts.map +1 -0
- package/lib/scripts/clean_reorganized_data.d.ts +1 -0
- package/lib/scripts/clean_reorganized_data.d.ts.map +1 -0
- package/lib/scripts/copy-schemas.d.ts +1 -0
- package/lib/scripts/copy-schemas.d.ts.map +1 -0
- package/lib/scripts/diff_amendements.d.ts +1 -0
- package/lib/scripts/diff_amendements.d.ts.map +1 -0
- package/lib/scripts/document_dossiers_legislatifs.d.ts +1 -0
- package/lib/scripts/document_dossiers_legislatifs.d.ts.map +1 -0
- package/lib/scripts/generate-json-schemas.d.ts +1 -0
- package/lib/scripts/generate-json-schemas.d.ts.map +1 -0
- package/lib/scripts/get_today_reunions.d.ts +1 -0
- package/lib/scripts/get_today_reunions.d.ts.map +1 -0
- package/lib/scripts/merge_scrutins.d.ts +1 -0
- package/lib/scripts/merge_scrutins.d.ts.map +1 -0
- package/lib/scripts/process_open_dataset.d.ts +1 -0
- package/lib/scripts/process_open_dataset.d.ts.map +1 -0
- package/lib/scripts/raw_types_from_amendements.d.ts +1 -0
- package/lib/scripts/raw_types_from_amendements.d.ts.map +1 -0
- package/lib/scripts/reorganize_data.d.ts +1 -0
- package/lib/scripts/reorganize_data.d.ts.map +1 -0
- package/lib/scripts/retrieve_deputes_photos.d.ts +1 -0
- package/lib/scripts/retrieve_deputes_photos.d.ts.map +1 -0
- package/lib/scripts/retrieve_documents.d.ts +1 -0
- package/lib/scripts/retrieve_documents.d.ts.map +1 -0
- package/lib/scripts/retrieve_open_data.d.ts +1 -0
- package/lib/scripts/retrieve_open_data.d.ts.map +1 -0
- package/lib/scripts/retrieve_pending_amendments.d.ts +1 -0
- package/lib/scripts/retrieve_pending_amendments.d.ts.map +1 -0
- package/lib/scripts/retrieve_senateurs_photos.d.ts +1 -0
- package/lib/scripts/retrieve_senateurs_photos.d.ts.map +1 -0
- package/lib/scripts/shared/cli_helpers.d.ts +1 -0
- package/lib/scripts/shared/cli_helpers.d.ts.map +1 -0
- package/lib/scripts/test_iter_load.d.ts +1 -0
- package/lib/scripts/test_iter_load.d.ts.map +1 -0
- package/lib/scripts/test_load.d.ts +1 -0
- package/lib/scripts/test_load.d.ts.map +1 -0
- package/lib/scripts/test_load_big_files.d.ts +1 -0
- package/lib/scripts/test_load_big_files.d.ts.map +1 -0
- package/lib/scripts/validate_json.d.ts +1 -0
- package/lib/scripts/validate_json.d.ts.map +1 -0
- package/lib/shared_types/codes_actes.d.ts +1 -0
- package/lib/shared_types/codes_actes.d.ts.map +1 -0
- package/lib/strings.d.ts +1 -0
- package/lib/strings.d.ts.map +1 -0
- package/lib/types/acteurs_et_organes.d.ts +1 -0
- package/lib/types/acteurs_et_organes.d.ts.map +1 -0
- package/lib/types/agendas.d.ts +1 -0
- package/lib/types/agendas.d.ts.map +1 -0
- package/lib/types/amendements.d.ts +1 -0
- package/lib/types/amendements.d.ts.map +1 -0
- package/lib/types/debats.d.ts +1 -0
- package/lib/types/debats.d.ts.map +1 -0
- package/lib/types/dossiers_legislatifs.d.ts +1 -0
- package/lib/types/dossiers_legislatifs.d.ts.map +1 -0
- package/lib/types/legislatures.d.ts +1 -0
- package/lib/types/legislatures.d.ts.map +1 -0
- package/lib/types/questions.d.ts +1 -0
- package/lib/types/questions.d.ts.map +1 -0
- package/lib/types/scrutins.d.ts +1 -0
- package/lib/types/scrutins.d.ts.map +1 -0
- package/lib/urls.d.ts +1 -0
- package/lib/urls.d.ts.map +1 -0
- package/package.json +1 -3
- package/lib/amendements-79bwpkvR.js +0 -154
- package/lib/amendements-CV3s5a0M.js +0 -667
- package/lib/loaders-9mHdTl9L.js +0 -4158
- package/lib/parse-Ccs6wcUg.js +0 -2512
package/lib/parsers.js
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
import
|
|
2
|
-
import { JSDOM
|
|
3
|
-
import { t as
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
import
|
|
8
|
-
import
|
|
9
|
-
import { parse
|
|
10
|
-
import { d as
|
|
11
|
-
import { C as
|
|
12
|
-
function
|
|
13
|
-
const
|
|
1
|
+
import assert from "node:assert";
|
|
2
|
+
import { JSDOM } from "jsdom";
|
|
3
|
+
import { t as toDate, a as addLeadingZeros, p as parse$1, q as fr } from "./parse-CzW8NHW5.js";
|
|
4
|
+
import deepEqual from "deep-equal";
|
|
5
|
+
import fs from "fs-extra";
|
|
6
|
+
import frontMatter from "front-matter";
|
|
7
|
+
import jsYaml from "js-yaml";
|
|
8
|
+
import path from "node:path";
|
|
9
|
+
import { parse } from "node-html-parser";
|
|
10
|
+
import { d as cleanAmendement } from "./amendements-CN7bRFdP.js";
|
|
11
|
+
import { C as Convert } from "./amendements-Cg1lyCBp.js";
|
|
12
|
+
function parseHeader(header) {
|
|
13
|
+
const headersMapping = [
|
|
14
14
|
{ regex: /^(RAPPORT_)?ANNEXE(_|$)|^ETAT_/, level: 0, name: "Annexe" },
|
|
15
15
|
{ regex: /^TOME_/, level: 1, name: "Tome" },
|
|
16
16
|
{
|
|
@@ -39,61 +39,80 @@ function te(i) {
|
|
|
39
39
|
name: "CMP"
|
|
40
40
|
}
|
|
41
41
|
];
|
|
42
|
-
for (const { regex
|
|
43
|
-
if (
|
|
44
|
-
return [
|
|
42
|
+
for (const { regex, level, name } of headersMapping) {
|
|
43
|
+
if (regex.test(header)) {
|
|
44
|
+
return [level, name];
|
|
45
|
+
}
|
|
46
|
+
}
|
|
45
47
|
return [null, ""];
|
|
46
48
|
}
|
|
47
|
-
function
|
|
48
|
-
return
|
|
49
|
+
function cleanText(text) {
|
|
50
|
+
return text?.replace(/[\n\t]+/g, "").trim() || "";
|
|
49
51
|
}
|
|
50
|
-
const
|
|
52
|
+
const excludedAlineas = [
|
|
51
53
|
/^Délibéré en séance publique/,
|
|
52
54
|
/^Fait le/,
|
|
53
55
|
/^La Présidente,$/,
|
|
54
56
|
/^Le Président,$/,
|
|
55
57
|
/^Signé/
|
|
56
58
|
];
|
|
57
|
-
function
|
|
58
|
-
return
|
|
59
|
+
function isExcludedAlinea(text) {
|
|
60
|
+
return excludedAlineas.some((regex) => regex.test(text));
|
|
59
61
|
}
|
|
60
|
-
function
|
|
61
|
-
const
|
|
62
|
-
|
|
63
|
-
const
|
|
64
|
-
|
|
65
|
-
const
|
|
66
|
-
|
|
67
|
-
|
|
62
|
+
function parseTexte(assembleeUrl, page) {
|
|
63
|
+
const html = page.replace(/(<style[\w\W]+style>)/g, "");
|
|
64
|
+
const { window } = new JSDOM(html);
|
|
65
|
+
const document = window.document;
|
|
66
|
+
assert.strictEqual(document.children.length, 1);
|
|
67
|
+
const htmlElement = document.children[0];
|
|
68
|
+
assert.strictEqual(htmlElement.children.length, 2);
|
|
69
|
+
const bodyElement = htmlElement.children[1];
|
|
70
|
+
let currentSection = bodyElement.children?.[1];
|
|
71
|
+
if (bodyElement.children.length < 3 || currentSection.tagName !== "BR") {
|
|
68
72
|
return {
|
|
69
73
|
error: null,
|
|
70
74
|
subdivisions: [],
|
|
71
|
-
url:
|
|
75
|
+
url: assembleeUrl
|
|
72
76
|
};
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
let
|
|
77
|
+
}
|
|
78
|
+
assert.strictEqual(bodyElement.children[0].tagName, "DIV");
|
|
79
|
+
let alineaElement = null;
|
|
80
|
+
let isMultiLinesHeader = false;
|
|
81
|
+
let level = null;
|
|
82
|
+
let state = "section";
|
|
83
|
+
const levels = [];
|
|
84
|
+
const subdivisions = [];
|
|
85
|
+
let subdivisionAlineas = [];
|
|
86
|
+
let subdivisionHeaders = [];
|
|
77
87
|
try {
|
|
78
|
-
|
|
79
|
-
switch (
|
|
88
|
+
while (state !== null) {
|
|
89
|
+
switch (state) {
|
|
80
90
|
case "section":
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
91
|
+
if (!currentSection.nextElementSibling) {
|
|
92
|
+
state = null;
|
|
93
|
+
} else {
|
|
94
|
+
currentSection = currentSection.nextElementSibling;
|
|
95
|
+
if (currentSection.tagName === "DIV") {
|
|
96
|
+
alineaElement = currentSection.firstElementChild;
|
|
97
|
+
if (alineaElement && !currentSection.id.includes("ftn")) {
|
|
98
|
+
state = "alineaElement";
|
|
99
|
+
}
|
|
100
|
+
} else {
|
|
101
|
+
assert(["BR", "HR", "P"].includes(currentSection.tagName), `Unexpected tag name "${currentSection.tagName}" for body child section`);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
85
104
|
break;
|
|
86
105
|
case "alineaElement":
|
|
87
|
-
if (!
|
|
88
|
-
|
|
106
|
+
if (!alineaElement) {
|
|
107
|
+
state = "nextAlineaElement";
|
|
89
108
|
break;
|
|
90
109
|
}
|
|
91
|
-
switch (
|
|
110
|
+
switch (alineaElement.tagName) {
|
|
92
111
|
case "DIV":
|
|
93
112
|
case "OL":
|
|
94
113
|
case "BR":
|
|
95
114
|
case "SPAN":
|
|
96
|
-
|
|
115
|
+
state = "nextAlineaElement";
|
|
97
116
|
break;
|
|
98
117
|
case "H4":
|
|
99
118
|
case "P":
|
|
@@ -103,296 +122,272 @@ function he(i, t) {
|
|
|
103
122
|
return {
|
|
104
123
|
error: {
|
|
105
124
|
code: -2,
|
|
106
|
-
message: `Unexpected tag name for alinea element: ${
|
|
125
|
+
message: `Unexpected tag name for alinea element: ${alineaElement.tagName}`
|
|
107
126
|
}
|
|
108
127
|
};
|
|
109
128
|
}
|
|
110
|
-
const
|
|
111
|
-
|
|
112
|
-
|
|
129
|
+
const nameComputed = cleanText(alineaElement.textContent).normalize("NFD").replace(/[\u0300-\u036f]/g, "").replace(/\(nouveau\)/, "").replace(/\(Pour coordination\)/, "").replace(/\(Supprimés?\)/, "").replace(/ /g, " ").replace(/[\-,.…]/g, "").trim().replace(/ {1,}/g, "_").toUpperCase();
|
|
130
|
+
const [nextLevel, paragraphType] = parseHeader(nameComputed);
|
|
131
|
+
if (!nameComputed || nextLevel !== null && nextLevel < 0) {
|
|
132
|
+
state = "nextAlineaElement";
|
|
113
133
|
break;
|
|
114
134
|
}
|
|
115
|
-
const
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
135
|
+
const lineHtml = cleanText(alineaElement.outerHTML);
|
|
136
|
+
const lineText = cleanText(alineaElement.textContent);
|
|
137
|
+
if (nextLevel === null) {
|
|
138
|
+
if (isExcludedAlinea(lineText)) {
|
|
139
|
+
state = "nextAlineaElement";
|
|
119
140
|
break;
|
|
120
141
|
}
|
|
121
|
-
|
|
142
|
+
if (subdivisionHeaders.length === 0 || isMultiLinesHeader && subdivisionAlineas.length === 0) {
|
|
143
|
+
subdivisionHeaders.push({ texte: lineText, html: lineHtml });
|
|
144
|
+
} else {
|
|
145
|
+
subdivisionAlineas.push({ texte: lineText, html: lineHtml });
|
|
146
|
+
}
|
|
147
|
+
if (isMultiLinesHeader && subdivisionHeaders.length >= 2) {
|
|
148
|
+
isMultiLinesHeader = false;
|
|
149
|
+
}
|
|
122
150
|
} else {
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
151
|
+
level = nextLevel;
|
|
152
|
+
while (levels.length > 0 && level < levels[levels.length - 1]) {
|
|
153
|
+
levels.pop();
|
|
154
|
+
}
|
|
155
|
+
if (levels.length === 0 || level > levels[levels.length - 1]) {
|
|
156
|
+
levels.push(level);
|
|
157
|
+
}
|
|
158
|
+
subdivisionAlineas = [];
|
|
159
|
+
subdivisionHeaders = [{ texte: lineText, html: lineHtml }];
|
|
160
|
+
isMultiLinesHeader = !nameComputed.match(/^ARTICLES?_/) && !nameComputed.match(/^EXPOSE_DES_MOTIFS$/);
|
|
161
|
+
subdivisions.push({
|
|
162
|
+
id: `D_${nameComputed}`,
|
|
163
|
+
type: paragraphType,
|
|
164
|
+
niveau: level + 1,
|
|
165
|
+
niveauRelatif: levels.length,
|
|
166
|
+
titres: subdivisionHeaders,
|
|
167
|
+
alineas: subdivisionAlineas
|
|
132
168
|
});
|
|
133
169
|
}
|
|
134
|
-
|
|
170
|
+
state = "nextAlineaElement";
|
|
135
171
|
break;
|
|
136
172
|
case "nextAlineaElement":
|
|
137
|
-
if (!
|
|
138
|
-
|
|
173
|
+
if (!alineaElement) {
|
|
174
|
+
state = "section";
|
|
139
175
|
break;
|
|
140
176
|
}
|
|
141
|
-
|
|
177
|
+
alineaElement = alineaElement.nextElementSibling;
|
|
178
|
+
state = alineaElement ? "alineaElement" : "section";
|
|
142
179
|
break;
|
|
143
180
|
default:
|
|
144
|
-
throw new Error(`Unexpected state: ${
|
|
181
|
+
throw new Error(`Unexpected state: ${state}`);
|
|
145
182
|
}
|
|
146
|
-
|
|
183
|
+
}
|
|
184
|
+
const result = {
|
|
147
185
|
error: null,
|
|
148
|
-
subdivisions
|
|
149
|
-
url:
|
|
186
|
+
subdivisions,
|
|
187
|
+
url: assembleeUrl
|
|
150
188
|
};
|
|
151
|
-
|
|
189
|
+
subdivisionAlineas = [];
|
|
190
|
+
subdivisionHeaders = [];
|
|
191
|
+
return result;
|
|
152
192
|
} finally {
|
|
153
|
-
|
|
193
|
+
window.close();
|
|
154
194
|
}
|
|
155
195
|
}
|
|
156
|
-
function
|
|
157
|
-
const
|
|
158
|
-
if (isNaN(+
|
|
196
|
+
function formatISO(date, options) {
|
|
197
|
+
const date_ = toDate(date, options?.in);
|
|
198
|
+
if (isNaN(+date_)) {
|
|
159
199
|
throw new RangeError("Invalid time value");
|
|
160
|
-
const r = t?.format ?? "extended", m = t?.representation ?? "complete";
|
|
161
|
-
let d = "", u = "";
|
|
162
|
-
const n = r === "extended" ? "-" : "", o = r === "extended" ? ":" : "";
|
|
163
|
-
if (m !== "time") {
|
|
164
|
-
const s = v(a.getDate(), 2), f = v(a.getMonth() + 1, 2);
|
|
165
|
-
d = `${v(a.getFullYear(), 4)}${n}${f}${n}${s}`;
|
|
166
200
|
}
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
const
|
|
175
|
-
|
|
201
|
+
const format = options?.format ?? "extended";
|
|
202
|
+
const representation = options?.representation ?? "complete";
|
|
203
|
+
let result = "";
|
|
204
|
+
let tzOffset = "";
|
|
205
|
+
const dateDelimiter = format === "extended" ? "-" : "";
|
|
206
|
+
const timeDelimiter = format === "extended" ? ":" : "";
|
|
207
|
+
if (representation !== "time") {
|
|
208
|
+
const day = addLeadingZeros(date_.getDate(), 2);
|
|
209
|
+
const month = addLeadingZeros(date_.getMonth() + 1, 2);
|
|
210
|
+
const year = addLeadingZeros(date_.getFullYear(), 4);
|
|
211
|
+
result = `${year}${dateDelimiter}${month}${dateDelimiter}${day}`;
|
|
176
212
|
}
|
|
177
|
-
|
|
213
|
+
if (representation !== "date") {
|
|
214
|
+
const offset = date_.getTimezoneOffset();
|
|
215
|
+
if (offset !== 0) {
|
|
216
|
+
const absoluteOffset = Math.abs(offset);
|
|
217
|
+
const hourOffset = addLeadingZeros(Math.trunc(absoluteOffset / 60), 2);
|
|
218
|
+
const minuteOffset = addLeadingZeros(absoluteOffset % 60, 2);
|
|
219
|
+
const sign = offset < 0 ? "+" : "-";
|
|
220
|
+
tzOffset = `${sign}${hourOffset}:${minuteOffset}`;
|
|
221
|
+
} else {
|
|
222
|
+
tzOffset = "Z";
|
|
223
|
+
}
|
|
224
|
+
const hour = addLeadingZeros(date_.getHours(), 2);
|
|
225
|
+
const minute = addLeadingZeros(date_.getMinutes(), 2);
|
|
226
|
+
const second = addLeadingZeros(date_.getSeconds(), 2);
|
|
227
|
+
const separator = result === "" ? "" : "T";
|
|
228
|
+
const time = [hour, minute, second].join(timeDelimiter);
|
|
229
|
+
result = `${result}${separator}${time}${tzOffset}`;
|
|
230
|
+
}
|
|
231
|
+
return result;
|
|
178
232
|
}
|
|
179
|
-
async function*
|
|
180
|
-
for await (const [
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
t,
|
|
186
|
-
a,
|
|
187
|
-
r,
|
|
188
|
-
m,
|
|
189
|
-
d
|
|
190
|
-
))
|
|
191
|
-
G(n), yield [
|
|
192
|
-
u,
|
|
193
|
-
ee.toAmendement(JSON.stringify(n))
|
|
233
|
+
async function* iterRechercheAmendements(amendementsSearchCacheDir, url, incremental, minDateDepot, minDateExamen, options = {}) {
|
|
234
|
+
for await (const [amendementUrlPath, amendement] of iterRechercheRawAmendements(amendementsSearchCacheDir, url, incremental, minDateDepot, minDateExamen, options)) {
|
|
235
|
+
cleanAmendement(amendement);
|
|
236
|
+
yield [
|
|
237
|
+
amendementUrlPath,
|
|
238
|
+
Convert.toAmendement(JSON.stringify(amendement))
|
|
194
239
|
];
|
|
240
|
+
}
|
|
195
241
|
}
|
|
196
|
-
async function*
|
|
197
|
-
await
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
)
|
|
202
|
-
/[?&]
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
)
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
let u = 0;
|
|
219
|
-
e: for (; ; ) {
|
|
220
|
-
d.verbose && console.log(`Fetching amendements search page at ${t}…`);
|
|
221
|
-
const n = await fetch(t);
|
|
222
|
-
e(
|
|
223
|
-
n.ok,
|
|
224
|
-
`Retrieval of search page at ${t} failed with error: ${n.status} ${n.statusText}`
|
|
225
|
-
);
|
|
226
|
-
const o = await n.text(), s = X(o);
|
|
227
|
-
if (s.querySelector("div.no-result") !== null)
|
|
242
|
+
async function* iterRechercheRawAmendements(amendementsSearchCacheDir, url, incremental, minDateDepot, minDateExamen, options = {}) {
|
|
243
|
+
await fs.ensureDir(amendementsSearchCacheDir);
|
|
244
|
+
assert.strictEqual(/[?&]date_depot=/.exec(url), null, `URL ${url} already contains a deposit date`);
|
|
245
|
+
assert.strictEqual(/[?&]order=/.exec(url), null, `URL ${url} already contains a sort order`);
|
|
246
|
+
assert.strictEqual(/[?&]page=/.exec(url), null, `URL ${url} already contains a page number`);
|
|
247
|
+
if (incremental) {
|
|
248
|
+
assert.notStrictEqual(/[?&]etat=/.exec(url), null, `In incremental mode, URL ${url} must contain an "etat" query parameter`);
|
|
249
|
+
}
|
|
250
|
+
url += (url.includes("?") ? "&" : "?") + "order=date_depot,desc&page=1";
|
|
251
|
+
if (minDateDepot != null) {
|
|
252
|
+
assert.notStrictEqual(/\d{4}-\d{2}-\d{2}/.exec(minDateDepot), null, `Invalid format for minimum date: ${minDateDepot}`);
|
|
253
|
+
}
|
|
254
|
+
let amendementNumber = 0;
|
|
255
|
+
iterSearchPages: while (true) {
|
|
256
|
+
if (options.verbose) {
|
|
257
|
+
console.log(`Fetching amendements search page at ${url}…`);
|
|
258
|
+
}
|
|
259
|
+
const response = await fetch(url);
|
|
260
|
+
assert(response.ok, `Retrieval of search page at ${url} failed with error: ${response.status} ${response.statusText}`);
|
|
261
|
+
const page = await response.text();
|
|
262
|
+
const html = parse(page);
|
|
263
|
+
if (html.querySelector("div.no-result") !== null) {
|
|
228
264
|
break;
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
);
|
|
234
|
-
|
|
235
|
-
const
|
|
236
|
-
|
|
237
|
-
const
|
|
238
|
-
|
|
239
|
-
const
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
)
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
k(R.text, "d MMMM y", /* @__PURE__ */ new Date(), {
|
|
269
|
-
locale: J
|
|
270
|
-
}),
|
|
271
|
-
{ representation: "date" }
|
|
272
|
-
) < r)
|
|
273
|
-
break e;
|
|
265
|
+
}
|
|
266
|
+
const amendementsCountDiv = html.querySelector("div.mirror-card-subtitle");
|
|
267
|
+
assert.notStrictEqual(amendementsCountDiv, null);
|
|
268
|
+
const amendementsCount = parseInt(/\d+/.exec(amendementsCountDiv.text)[0]);
|
|
269
|
+
const amendementsDiv = html.querySelector("div.amendement-list--results-table");
|
|
270
|
+
assert.notStrictEqual(amendementsDiv, null);
|
|
271
|
+
const amendementsTable = amendementsDiv.querySelector("table");
|
|
272
|
+
assert.notStrictEqual(amendementsTable, null);
|
|
273
|
+
const tfootTr = amendementsTable.querySelector("tfoot > tr");
|
|
274
|
+
assert.notStrictEqual(tfootTr, null);
|
|
275
|
+
const tfootTrCells = tfootTr.querySelectorAll("td, th").map((cell) => cell.text);
|
|
276
|
+
assert(deepEqual(tfootTrCells, [
|
|
277
|
+
"",
|
|
278
|
+
"n°",
|
|
279
|
+
"Dossier législatif",
|
|
280
|
+
"Emplacement",
|
|
281
|
+
"Auteur",
|
|
282
|
+
"État",
|
|
283
|
+
"Sort",
|
|
284
|
+
"Date d'examen",
|
|
285
|
+
"Examiné par",
|
|
286
|
+
"Texte visé",
|
|
287
|
+
"Date de dépôt"
|
|
288
|
+
]), `Unexpected columns in ${JSON.stringify(tfootTrCells, null, 2)}`);
|
|
289
|
+
const amendementsTr = amendementsTable.querySelectorAll("tbody > tr");
|
|
290
|
+
assert.notStrictEqual(amendementsTr.length, 0);
|
|
291
|
+
for (const amendementTr of amendementsTr) {
|
|
292
|
+
amendementNumber++;
|
|
293
|
+
if (minDateDepot != null || minDateExamen != null) {
|
|
294
|
+
const amendementTdList = amendementTr.querySelectorAll("td");
|
|
295
|
+
assert.strictEqual(amendementTdList.length, 11, `Unexpected number of columns in amendment row: ${amendementTr.outerHTML}`);
|
|
296
|
+
if (minDateDepot != null) {
|
|
297
|
+
const dateDepotTd = amendementTdList[10];
|
|
298
|
+
const dateDepot = formatISO(parse$1(dateDepotTd.text, "d MMMM y", /* @__PURE__ */ new Date(), {
|
|
299
|
+
locale: fr
|
|
300
|
+
}), { representation: "date" });
|
|
301
|
+
if (dateDepot < minDateDepot) {
|
|
302
|
+
break iterSearchPages;
|
|
303
|
+
}
|
|
274
304
|
}
|
|
275
|
-
if (
|
|
276
|
-
const
|
|
277
|
-
if (
|
|
278
|
-
|
|
279
|
-
locale:
|
|
280
|
-
}),
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
305
|
+
if (minDateExamen != null) {
|
|
306
|
+
const dateExamenTd = amendementTdList[7];
|
|
307
|
+
if (dateExamenTd.text) {
|
|
308
|
+
const dateExamen = formatISO(parse$1(dateExamenTd.text, "d MMMM y", /* @__PURE__ */ new Date(), {
|
|
309
|
+
locale: fr
|
|
310
|
+
}), { representation: "date" });
|
|
311
|
+
if (dateExamen < minDateExamen) {
|
|
312
|
+
continue;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
284
315
|
}
|
|
285
316
|
}
|
|
286
|
-
const
|
|
287
|
-
|
|
288
|
-
const
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
)
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
)
|
|
302
|
-
await q.ensureDir(F.dirname(A));
|
|
303
|
-
const y = await q.pathExists(
|
|
304
|
-
A
|
|
305
|
-
) ? await q.readFile(A, "utf8") : null, L = T.outerHTML, U = `---
|
|
306
|
-
${W.dump(
|
|
307
|
-
{
|
|
308
|
-
position: l - u,
|
|
309
|
-
search: t.replace(/&page=[\d]+/, "")
|
|
310
|
-
},
|
|
311
|
-
{
|
|
312
|
-
sortKeys: !0
|
|
313
|
-
}
|
|
314
|
-
)}---
|
|
317
|
+
const amendementUrlPath = amendementTr.getAttribute("data-href");
|
|
318
|
+
assert.notStrictEqual(amendementUrlPath, void 0);
|
|
319
|
+
const amendementUrlPathSplitted = amendementUrlPath.split("/");
|
|
320
|
+
assert.strictEqual(amendementUrlPathSplitted[0], "", `Unexpected URL path for amendement: ${amendementUrlPath}`);
|
|
321
|
+
assert.strictEqual(amendementUrlPathSplitted[1], "dyn", `Unexpected URL path for amendement: ${amendementUrlPath}`);
|
|
322
|
+
const amendementSearchCacheFilePath = path.join(amendementsSearchCacheDir, ...amendementUrlPathSplitted.slice(2)) + ".html";
|
|
323
|
+
await fs.ensureDir(path.dirname(amendementSearchCacheFilePath));
|
|
324
|
+
const existingAmendementSearchCache = await fs.pathExists(amendementSearchCacheFilePath) ? await fs.readFile(amendementSearchCacheFilePath, "utf8") : null;
|
|
325
|
+
const amendementTrOuterHtml = amendementTr.outerHTML;
|
|
326
|
+
const amendementSearchCache = `---
|
|
327
|
+
${jsYaml.dump({
|
|
328
|
+
position: amendementsCount - amendementNumber,
|
|
329
|
+
search: url.replace(/&page=[\d]+/, "")
|
|
330
|
+
}, {
|
|
331
|
+
sortKeys: true
|
|
332
|
+
})}---
|
|
315
333
|
|
|
316
|
-
${
|
|
317
|
-
if (
|
|
318
|
-
if (
|
|
319
|
-
break
|
|
334
|
+
${amendementTrOuterHtml}`;
|
|
335
|
+
if (amendementSearchCache === existingAmendementSearchCache) {
|
|
336
|
+
if (incremental) {
|
|
337
|
+
break iterSearchPages;
|
|
338
|
+
}
|
|
320
339
|
continue;
|
|
321
340
|
}
|
|
322
|
-
if (
|
|
341
|
+
if (existingAmendementSearchCache !== null && frontMatter(existingAmendementSearchCache).body === amendementTrOuterHtml) {
|
|
323
342
|
continue;
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
343
|
+
}
|
|
344
|
+
if (options.verbose) {
|
|
345
|
+
if (existingAmendementSearchCache === null) {
|
|
346
|
+
console.log(`Adding amendement search cache: ${amendementSearchCacheFilePath}…`);
|
|
347
|
+
} else {
|
|
348
|
+
console.log(`Updating amendement search cache: ${amendementSearchCacheFilePath}…`);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
const amendementHtmlUrl = new URL(amendementUrlPath, "https://www.assemblee-nationale.fr/").toString();
|
|
352
|
+
const amendementHtmlResponse = await fetch(amendementHtmlUrl);
|
|
353
|
+
if (amendementHtmlResponse.status === 404) {
|
|
354
|
+
console.log(`Amendement HTML page not found at ${amendementHtmlUrl}. Skipping.`);
|
|
335
355
|
continue;
|
|
336
356
|
}
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
);
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
const R = b.querySelector("a > span");
|
|
347
|
-
return e.notStrictEqual(
|
|
348
|
-
R,
|
|
349
|
-
null,
|
|
350
|
-
`No <span> in <a> found in ${b.toString()}`
|
|
351
|
-
), R.text === "Version JSON";
|
|
357
|
+
assert(amendementHtmlResponse.ok, `Retrieval of amendement HTML page at ${amendementHtmlUrl} failed with error: ${amendementHtmlResponse.status} ${amendementHtmlResponse.statusText}`);
|
|
358
|
+
const amendementHtmlPage = await amendementHtmlResponse.text();
|
|
359
|
+
const amendementHtml = parse(amendementHtmlPage);
|
|
360
|
+
const formatsLi = amendementHtml.querySelectorAll("li.mirror-card-header--options--content--item");
|
|
361
|
+
assert.notStrictEqual(formatsLi.length, 0);
|
|
362
|
+
const jsonLi = formatsLi.find((formatLi) => {
|
|
363
|
+
const formatSpan = formatLi.querySelector("a > span");
|
|
364
|
+
assert.notStrictEqual(formatSpan, null, `No <span> in <a> found in ${formatLi.toString()}`);
|
|
365
|
+
return formatSpan.text === "Version JSON";
|
|
352
366
|
});
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
);
|
|
358
|
-
const
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
);
|
|
364
|
-
const H = C.getAttribute("href");
|
|
365
|
-
e.notStrictEqual(
|
|
366
|
-
H,
|
|
367
|
-
void 0,
|
|
368
|
-
`No URL found for JSON version of amendement: ${I.toString()}`
|
|
369
|
-
);
|
|
370
|
-
const D = new URL(
|
|
371
|
-
H,
|
|
372
|
-
"https://www.assemblee-nationale.fr/"
|
|
373
|
-
).toString(), M = await fetch(D);
|
|
374
|
-
e(
|
|
375
|
-
M.ok,
|
|
376
|
-
`Retrieval of amendement JSON page at ${D} failed with error: ${M.status} ${M.statusText}`
|
|
377
|
-
);
|
|
378
|
-
const B = await M.json();
|
|
379
|
-
yield [h, B], await q.writeFile(
|
|
380
|
-
A,
|
|
381
|
-
U,
|
|
382
|
-
"utf8"
|
|
383
|
-
);
|
|
367
|
+
assert.notStrictEqual(jsonLi, void 0, `No JSON version found for amendement at ${amendementHtmlUrl}`);
|
|
368
|
+
const jsonA = jsonLi.querySelector("a");
|
|
369
|
+
assert.notStrictEqual(jsonA, null, `No <a> found in ${jsonLi.toString()}`);
|
|
370
|
+
const amendementJsonUrlPath = jsonA.getAttribute("href");
|
|
371
|
+
assert.notStrictEqual(amendementJsonUrlPath, void 0, `No URL found for JSON version of amendement: ${jsonLi.toString()}`);
|
|
372
|
+
const amendementJsonUrl = new URL(amendementJsonUrlPath, "https://www.assemblee-nationale.fr/").toString();
|
|
373
|
+
const amendementJsonResponse = await fetch(amendementJsonUrl);
|
|
374
|
+
assert(amendementJsonResponse.ok, `Retrieval of amendement JSON page at ${amendementJsonUrl} failed with error: ${amendementJsonResponse.status} ${amendementJsonResponse.statusText}`);
|
|
375
|
+
const amendement = await amendementJsonResponse.json();
|
|
376
|
+
yield [amendementUrlPath, amendement];
|
|
377
|
+
await fs.writeFile(amendementSearchCacheFilePath, amendementSearchCache, "utf8");
|
|
384
378
|
}
|
|
385
|
-
const
|
|
386
|
-
|
|
387
|
-
);
|
|
388
|
-
if (E === null)
|
|
379
|
+
const paginationA = html.querySelector("div.an-pagination--item > i.an-icons-chevron-right ~ a");
|
|
380
|
+
if (paginationA === null) {
|
|
389
381
|
break;
|
|
390
|
-
|
|
391
|
-
|
|
382
|
+
}
|
|
383
|
+
const urlPath = paginationA.getAttribute("href");
|
|
384
|
+
assert.notStrictEqual(urlPath, void 0);
|
|
385
|
+
url = new URL(urlPath, "https://www.assemblee-nationale.fr/").toString();
|
|
392
386
|
}
|
|
393
387
|
}
|
|
394
388
|
export {
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
389
|
+
iterRechercheAmendements,
|
|
390
|
+
iterRechercheRawAmendements,
|
|
391
|
+
parseTexte
|
|
398
392
|
};
|
|
393
|
+
//# sourceMappingURL=parsers.js.map
|