@tricoteuses/senat 1.3.1 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/config.d.ts +1 -0
- package/lib/config.js +14 -45
- package/lib/databases.js +86 -143
- package/lib/datasets.js +78 -83
- package/lib/index.d.ts +12 -4
- package/lib/index.js +42 -419
- package/lib/loaders.js +149 -654
- package/lib/model/ameli.js +83 -21
- package/lib/model/debats.js +0 -1
- package/lib/model/dosleg.d.ts +1 -1
- package/lib/model/dosleg.js +179 -73
- package/lib/model/index.d.ts +3 -3
- package/lib/model/index.js +12 -46
- package/lib/model/questions.js +68 -39
- package/lib/model/sens.d.ts +1 -1
- package/lib/model/sens.js +383 -113
- package/lib/model/texte.js +220 -290
- package/lib/model/util.js +9 -26
- package/lib/raw_types/ameli.js +5 -6
- package/lib/raw_types/debats.js +5 -6
- package/lib/raw_types/dosleg.js +5 -6
- package/lib/raw_types/questions.js +5 -6
- package/lib/raw_types/sens.js +5 -6
- package/lib/raw_types_schemats/ameli.js +1 -43
- package/lib/raw_types_schemats/debats.js +1 -22
- package/lib/raw_types_schemats/dosleg.js +1 -96
- package/lib/raw_types_schemats/questions.js +1 -22
- package/lib/raw_types_schemats/sens.js +1 -112
- package/lib/scripts/convert_data.js +181 -631
- package/lib/scripts/datautil.js +17 -60
- package/lib/scripts/parse_textes.js +46 -129
- package/lib/scripts/retrieve_documents.js +247 -513
- package/lib/scripts/retrieve_open_data.js +211 -368
- package/lib/scripts/retrieve_senateurs_photos.js +144 -239
- package/lib/scripts/shared/cli_helpers.js +30 -30
- package/lib/scripts/shared/util.js +28 -94
- package/lib/strings.js +20 -45
- package/lib/types/ameli.d.ts +1 -1
- package/lib/types/ameli.js +14 -25
- package/lib/types/debats.d.ts +1 -1
- package/lib/types/debats.js +3 -21
- package/lib/types/dosleg.d.ts +1 -1
- package/lib/types/dosleg.js +152 -119
- package/lib/types/questions.d.ts +1 -1
- package/lib/types/questions.js +1 -13
- package/lib/types/sens.d.ts +1 -1
- package/lib/types/sens.js +1 -13
- package/lib/types/sessions.js +44 -49
- package/lib/types/texte.js +17 -22
- package/lib/validators/config.js +47 -111
- package/lib/validators/senat.js +1 -5
- package/package.json +18 -40
- package/lib/aggregates.d.ts +0 -52
- package/lib/aggregates.mjs +0 -930
- package/lib/aggregates.ts +0 -833
- package/lib/config.mjs +0 -16
- package/lib/config.ts +0 -26
- package/lib/data/legislatures.json +0 -38
- package/lib/databases.mjs +0 -57
- package/lib/databases.ts +0 -71
- package/lib/datasets.mjs +0 -78
- package/lib/datasets.ts +0 -118
- package/lib/fields.d.ts +0 -10
- package/lib/fields.mjs +0 -68
- package/lib/fields.ts +0 -29
- package/lib/index.mjs +0 -4
- package/lib/index.ts +0 -42
- package/lib/inserters.d.ts +0 -98
- package/lib/inserters.mjs +0 -500
- package/lib/inserters.ts +0 -521
- package/lib/loaders.mjs +0 -158
- package/lib/loaders.ts +0 -271
- package/lib/model/ameli.mjs +0 -84
- package/lib/model/ameli.ts +0 -100
- package/lib/model/debats.mjs +0 -1
- package/lib/model/debats.ts +0 -0
- package/lib/model/dosleg.mjs +0 -196
- package/lib/model/dosleg.ts +0 -240
- package/lib/model/index.mjs +0 -4
- package/lib/model/index.ts +0 -14
- package/lib/model/questions.mjs +0 -71
- package/lib/model/questions.ts +0 -93
- package/lib/model/sens.mjs +0 -415
- package/lib/model/sens.ts +0 -516
- package/lib/model/texte.mjs +0 -208
- package/lib/model/texte.ts +0 -229
- package/lib/model/util.mjs +0 -19
- package/lib/model/util.ts +0 -32
- package/lib/raw_types/ameli.mjs +0 -5
- package/lib/raw_types/ameli.ts +0 -951
- package/lib/raw_types/debats.mjs +0 -5
- package/lib/raw_types/debats.ts +0 -222
- package/lib/raw_types/dosleg.mjs +0 -5
- package/lib/raw_types/dosleg.ts +0 -3625
- package/lib/raw_types/questions.mjs +0 -5
- package/lib/raw_types/questions.ts +0 -427
- package/lib/raw_types/sens.mjs +0 -5
- package/lib/raw_types/sens.ts +0 -4499
- package/lib/raw_types_kysely/ameli.d.ts +0 -6
- package/lib/raw_types_kysely/ameli.mjs +0 -7
- package/lib/raw_types_kysely/ameli.ts +0 -6
- package/lib/raw_types_kysely/debats.d.ts +0 -6
- package/lib/raw_types_kysely/debats.mjs +0 -7
- package/lib/raw_types_kysely/debats.ts +0 -6
- package/lib/raw_types_kysely/dosleg.d.ts +0 -6
- package/lib/raw_types_kysely/dosleg.mjs +0 -7
- package/lib/raw_types_kysely/dosleg.ts +0 -6
- package/lib/raw_types_kysely/questions.d.ts +0 -6
- package/lib/raw_types_kysely/questions.mjs +0 -7
- package/lib/raw_types_kysely/questions.ts +0 -6
- package/lib/raw_types_kysely/sens.d.ts +0 -6
- package/lib/raw_types_kysely/sens.mjs +0 -7
- package/lib/raw_types_kysely/sens.ts +0 -6
- package/lib/raw_types_kysely/texte.d.ts +0 -45
- package/lib/raw_types_kysely/texte.mjs +0 -7
- package/lib/raw_types_kysely/texte.ts +0 -53
- package/lib/raw_types_schemats/ameli.mjs +0 -2
- package/lib/raw_types_schemats/ameli.ts +0 -601
- package/lib/raw_types_schemats/debats.mjs +0 -2
- package/lib/raw_types_schemats/debats.ts +0 -145
- package/lib/raw_types_schemats/dosleg.mjs +0 -2
- package/lib/raw_types_schemats/dosleg.ts +0 -2195
- package/lib/raw_types_schemats/questions.mjs +0 -2
- package/lib/raw_types_schemats/questions.ts +0 -251
- package/lib/raw_types_schemats/sens.mjs +0 -2
- package/lib/raw_types_schemats/sens.ts +0 -2907
- package/lib/scripts/convert_data.mjs +0 -181
- package/lib/scripts/convert_data.ts +0 -243
- package/lib/scripts/datautil.mjs +0 -16
- package/lib/scripts/datautil.ts +0 -19
- package/lib/scripts/images/transparent_150x192.jpg +0 -0
- package/lib/scripts/images/transparent_155x225.jpg +0 -0
- package/lib/scripts/parse_textes.mjs +0 -46
- package/lib/scripts/parse_textes.ts +0 -65
- package/lib/scripts/retrieve_documents.mjs +0 -249
- package/lib/scripts/retrieve_documents.ts +0 -298
- package/lib/scripts/retrieve_open_data.mjs +0 -217
- package/lib/scripts/retrieve_open_data.ts +0 -274
- package/lib/scripts/retrieve_senateurs_photos.mjs +0 -147
- package/lib/scripts/retrieve_senateurs_photos.ts +0 -177
- package/lib/scripts/retrieve_textes.d.ts +0 -1
- package/lib/scripts/retrieve_textes.mjs +0 -328
- package/lib/scripts/retrieve_textes.ts +0 -143
- package/lib/scripts/shared/cli_helpers.ts +0 -36
- package/lib/scripts/shared/util.ts +0 -33
- package/lib/src/aggregates.d.ts +0 -52
- package/lib/src/aggregates.mjs +0 -726
- package/lib/src/config.d.ts +0 -2
- package/lib/src/config.mjs +0 -16
- package/lib/src/databases.d.ts +0 -18
- package/lib/src/databases.mjs +0 -55
- package/lib/src/datasets.d.ts +0 -28
- package/lib/src/datasets.mjs +0 -78
- package/lib/src/fields.d.ts +0 -10
- package/lib/src/fields.mjs +0 -22
- package/lib/src/index.d.ts +0 -8
- package/lib/src/index.mjs +0 -7
- package/lib/src/inserters.d.ts +0 -98
- package/lib/src/inserters.mjs +0 -360
- package/lib/src/loaders.d.ts +0 -36
- package/lib/src/loaders.mjs +0 -107
- package/lib/src/model/ameli.d.ts +0 -4
- package/lib/src/model/ameli.js +0 -57
- package/lib/src/model/debats.d.ts +0 -4
- package/lib/src/model/debats.js +0 -43
- package/lib/src/model/dosleg.d.ts +0 -197
- package/lib/src/model/dosleg.js +0 -169
- package/lib/src/model/index.d.ts +0 -4
- package/lib/src/model/index.js +0 -4
- package/lib/src/model/questions.d.ts +0 -89
- package/lib/src/model/questions.js +0 -76
- package/lib/src/model/sens.d.ts +0 -390
- package/lib/src/model/sens.js +0 -339
- package/lib/src/model/texte.d.ts +0 -7
- package/lib/src/model/texte.js +0 -183
- package/lib/src/raw_types_kysely/ameli.d.ts +0 -915
- package/lib/src/raw_types_kysely/ameli.js +0 -5
- package/lib/src/raw_types_kysely/debats.d.ts +0 -207
- package/lib/src/raw_types_kysely/debats.js +0 -5
- package/lib/src/raw_types_kysely/dosleg.d.ts +0 -3532
- package/lib/src/raw_types_kysely/dosleg.js +0 -5
- package/lib/src/raw_types_kysely/questions.d.ts +0 -414
- package/lib/src/raw_types_kysely/questions.js +0 -5
- package/lib/src/raw_types_kysely/sens.d.ts +0 -4394
- package/lib/src/raw_types_kysely/sens.js +0 -5
- package/lib/src/raw_types_schemats/ameli.d.ts +0 -541
- package/lib/src/raw_types_schemats/ameli.js +0 -2
- package/lib/src/raw_types_schemats/debats.d.ts +0 -127
- package/lib/src/raw_types_schemats/debats.js +0 -2
- package/lib/src/raw_types_schemats/dosleg.d.ts +0 -2027
- package/lib/src/raw_types_schemats/dosleg.js +0 -2
- package/lib/src/raw_types_schemats/questions.d.ts +0 -231
- package/lib/src/raw_types_schemats/questions.js +0 -2
- package/lib/src/raw_types_schemats/sens.d.ts +0 -2709
- package/lib/src/raw_types_schemats/sens.js +0 -2
- package/lib/src/scripts/convert_data.d.ts +0 -1
- package/lib/src/scripts/convert_data.js +0 -95
- package/lib/src/scripts/datautil.d.ts +0 -5
- package/lib/src/scripts/datautil.js +0 -16
- package/lib/src/scripts/parse_textes.d.ts +0 -1
- package/lib/src/scripts/parse_textes.js +0 -47
- package/lib/src/scripts/retrieve_documents.d.ts +0 -1
- package/lib/src/scripts/retrieve_documents.js +0 -258
- package/lib/src/scripts/retrieve_open_data.d.ts +0 -1
- package/lib/src/scripts/retrieve_open_data.js +0 -214
- package/lib/src/scripts/retrieve_senateurs_photos.d.ts +0 -1
- package/lib/src/scripts/retrieve_senateurs_photos.js +0 -147
- package/lib/src/scripts/shared/cli_helpers.d.ts +0 -44
- package/lib/src/scripts/shared/cli_helpers.js +0 -32
- package/lib/src/scripts/shared/util.d.ts +0 -3
- package/lib/src/scripts/shared/util.js +0 -28
- package/lib/src/strings.d.ts +0 -1
- package/lib/src/strings.mjs +0 -18
- package/lib/src/types/ameli.d.ts +0 -10
- package/lib/src/types/ameli.js +0 -13
- package/lib/src/types/debats.d.ts +0 -4
- package/lib/src/types/debats.js +0 -2
- package/lib/src/types/dosleg.d.ts +0 -98
- package/lib/src/types/dosleg.js +0 -151
- package/lib/src/types/questions.d.ts +0 -2
- package/lib/src/types/questions.js +0 -1
- package/lib/src/types/sens.d.ts +0 -10
- package/lib/src/types/sens.js +0 -1
- package/lib/src/types/sessions.d.ts +0 -42
- package/lib/src/types/sessions.js +0 -43
- package/lib/src/types/texte.d.ts +0 -61
- package/lib/src/types/texte.js +0 -16
- package/lib/src/validators/config.d.ts +0 -1
- package/lib/src/validators/config.js +0 -54
- package/lib/src/validators/senat.d.ts +0 -0
- package/lib/src/validators/senat.js +0 -24
- package/lib/strings.mjs +0 -18
- package/lib/strings.ts +0 -26
- package/lib/types/ameli.mjs +0 -13
- package/lib/types/ameli.ts +0 -21
- package/lib/types/debats.mjs +0 -2
- package/lib/types/debats.ts +0 -6
- package/lib/types/dosleg.mjs +0 -151
- package/lib/types/dosleg.ts +0 -284
- package/lib/types/questions.mjs +0 -1
- package/lib/types/questions.ts +0 -3
- package/lib/types/sens.mjs +0 -1
- package/lib/types/sens.ts +0 -12
- package/lib/types/sessions.mjs +0 -43
- package/lib/types/sessions.ts +0 -42
- package/lib/types/texte.mjs +0 -16
- package/lib/types/texte.ts +0 -76
- package/lib/typings/windows-1252.d.js +0 -2
- package/lib/typings/windows-1252.d.mjs +0 -2
- package/lib/typings/windows-1252.d.ts +0 -11
- package/lib/validators/config.mjs +0 -54
- package/lib/validators/config.ts +0 -79
- package/lib/validators/senat.mjs +0 -24
- package/lib/validators/senat.ts +0 -26
package/lib/model/texte.mjs
DELETED
|
@@ -1,208 +0,0 @@
|
|
|
1
|
-
import { JSDOM } from "jsdom";
|
|
2
|
-
import { AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP, AKN_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil";
|
|
3
|
-
import { DivisionType, } from "../types/texte";
|
|
4
|
-
function buildWorklow(metaElement) {
|
|
5
|
-
const stepElements = metaElement.querySelectorAll("workflow step");
|
|
6
|
-
const steps = [];
|
|
7
|
-
for (const stepElement of stepElements) {
|
|
8
|
-
const identification = stepElement.getAttribute("href") ?? "";
|
|
9
|
-
const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
|
|
10
|
-
steps.push({
|
|
11
|
-
eId: stepElement.getAttribute("eId"),
|
|
12
|
-
date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
|
|
13
|
-
type: identificationParts?.type || null,
|
|
14
|
-
session: identificationParts?.session || null,
|
|
15
|
-
numero: identificationParts?.numTexte || null,
|
|
16
|
-
version: identificationParts?.version ? identificationParts.version : null,
|
|
17
|
-
outcome: stepElement.getAttribute("outcome"),
|
|
18
|
-
});
|
|
19
|
-
}
|
|
20
|
-
return steps;
|
|
21
|
-
}
|
|
22
|
-
function buildDivision(node, index) {
|
|
23
|
-
const eId = node.getAttribute("eId");
|
|
24
|
-
const tag = node.nodeName;
|
|
25
|
-
const level = DivisionType[tag];
|
|
26
|
-
const titleNode = node.querySelector("num");
|
|
27
|
-
const subtitleNode = node.querySelector("heading");
|
|
28
|
-
const headings = [
|
|
29
|
-
...(titleNode ? [{
|
|
30
|
-
text: titleNode.textContent?.trim() ?? null,
|
|
31
|
-
html: titleNode.innerHTML?.trim() ?? null,
|
|
32
|
-
}] : []),
|
|
33
|
-
...(subtitleNode ? [{
|
|
34
|
-
text: subtitleNode.textContent?.trim() ?? null,
|
|
35
|
-
html: subtitleNode.innerHTML?.trim() ?? null,
|
|
36
|
-
}] : []),
|
|
37
|
-
];
|
|
38
|
-
const division = {
|
|
39
|
-
index,
|
|
40
|
-
eId,
|
|
41
|
-
tag,
|
|
42
|
-
level,
|
|
43
|
-
headings,
|
|
44
|
-
};
|
|
45
|
-
if (tag === "article") {
|
|
46
|
-
division.alineas = [];
|
|
47
|
-
}
|
|
48
|
-
return division;
|
|
49
|
-
}
|
|
50
|
-
function buildAlinea(contentNode, alineaNode) {
|
|
51
|
-
const eId = alineaNode.getAttribute("eId");
|
|
52
|
-
const heading = {
|
|
53
|
-
text: alineaNode.querySelector("num")?.textContent ?? null,
|
|
54
|
-
};
|
|
55
|
-
const pastille = alineaNode.getAttribute("data:pastille") ?? null;
|
|
56
|
-
return {
|
|
57
|
-
eId,
|
|
58
|
-
heading,
|
|
59
|
-
text: contentNode.textContent?.trim() ?? null,
|
|
60
|
-
html: contentNode.innerHTML?.trim() ?? null,
|
|
61
|
-
pastille,
|
|
62
|
-
};
|
|
63
|
-
}
|
|
64
|
-
function buildEmptyArticle(index) {
|
|
65
|
-
return {
|
|
66
|
-
index: index,
|
|
67
|
-
eId: "",
|
|
68
|
-
tag: "article",
|
|
69
|
-
level: DivisionType["article"],
|
|
70
|
-
headings: [],
|
|
71
|
-
alineas: [],
|
|
72
|
-
};
|
|
73
|
-
}
|
|
74
|
-
function flattenTexte(texteContentRoot) {
|
|
75
|
-
const divisions = [];
|
|
76
|
-
let divisionIndex = 0;
|
|
77
|
-
const iter = (node) => {
|
|
78
|
-
if (node.nodeName === "content") {
|
|
79
|
-
return;
|
|
80
|
-
}
|
|
81
|
-
switch (node.nodeName) {
|
|
82
|
-
case "tome":
|
|
83
|
-
case "part":
|
|
84
|
-
case "book":
|
|
85
|
-
case "title":
|
|
86
|
-
case "subtitle":
|
|
87
|
-
case "chapter":
|
|
88
|
-
case "section":
|
|
89
|
-
case "subsection":
|
|
90
|
-
case "paragraph":
|
|
91
|
-
case "article":
|
|
92
|
-
divisions.push(buildDivision(node, divisionIndex++));
|
|
93
|
-
break;
|
|
94
|
-
}
|
|
95
|
-
if (node.nodeName === "alinea") {
|
|
96
|
-
Array.from(node.childNodes)
|
|
97
|
-
// Find direct content children programmatically
|
|
98
|
-
// because `:scope` selector does not work
|
|
99
|
-
// https://github.com/jsdom/jsdom/issues/2998
|
|
100
|
-
.filter((alineaChildNode) => alineaChildNode.nodeName === "content")
|
|
101
|
-
.forEach((alineaContentNode) => {
|
|
102
|
-
// Hypothesis: alineas should always be enclosed in articles
|
|
103
|
-
let lastArticle = divisions.findLast(division => division.tag === "article");
|
|
104
|
-
if (!lastArticle) {
|
|
105
|
-
lastArticle = buildEmptyArticle(divisionIndex++);
|
|
106
|
-
divisions.push(lastArticle);
|
|
107
|
-
}
|
|
108
|
-
lastArticle.alineas.push(buildAlinea(alineaContentNode, node));
|
|
109
|
-
});
|
|
110
|
-
}
|
|
111
|
-
if (node.hasChildNodes()) {
|
|
112
|
-
node.childNodes.forEach((childNode) => iter(childNode));
|
|
113
|
-
}
|
|
114
|
-
};
|
|
115
|
-
iter(texteContentRoot);
|
|
116
|
-
return divisions;
|
|
117
|
-
}
|
|
118
|
-
export function transformTexte(document) {
|
|
119
|
-
const metaElement = document.querySelector("meta");
|
|
120
|
-
const preambleElement = document.querySelector("preamble");
|
|
121
|
-
const identification = metaElement?.querySelector("FRBRExpression FRBRuri")?.getAttribute("value") ?? "";
|
|
122
|
-
const identificationParts = AKN_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
|
|
123
|
-
const bodyElement = document.querySelector("body");
|
|
124
|
-
const datePresentation = metaElement?.querySelector("FRBRdate[name='#presentation']")?.getAttribute("date");
|
|
125
|
-
const dateDepot = metaElement?.querySelector("FRBRdate[name='#depot']")?.getAttribute("date");
|
|
126
|
-
const datePublicationXml = metaElement?.querySelector("FRBRdate[name='#publication-xml']")?.getAttribute("date");
|
|
127
|
-
return {
|
|
128
|
-
titre: preambleElement?.querySelector("docTitle")?.textContent || null,
|
|
129
|
-
titreCourt: metaElement?.querySelector("FRBRalias[name='intitule-court']")
|
|
130
|
-
?.getAttribute("value") || null,
|
|
131
|
-
signetDossier: metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")
|
|
132
|
-
?.getAttribute("value") || null,
|
|
133
|
-
urlDossierSenat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
|
|
134
|
-
urlDossierAssemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
|
|
135
|
-
type: identificationParts?.type || null,
|
|
136
|
-
session: identificationParts?.session || null,
|
|
137
|
-
numero: identificationParts?.numTexte ? parseInt(identificationParts.numTexte) : null,
|
|
138
|
-
datePresentation: datePresentation ? new Date(datePresentation) : null,
|
|
139
|
-
dateDepot: dateDepot ? new Date(dateDepot) : null,
|
|
140
|
-
datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
|
|
141
|
-
version: identificationParts?.version ? identificationParts.version : null,
|
|
142
|
-
workflow: metaElement ? buildWorklow(metaElement) : [],
|
|
143
|
-
divisions: bodyElement ? flattenTexte(bodyElement) : [],
|
|
144
|
-
};
|
|
145
|
-
}
|
|
146
|
-
export function transformExposeDesMotifs(document) {
|
|
147
|
-
const sectionElements = document.querySelectorAll("section");
|
|
148
|
-
const exposeDesMotifsRegexp = new RegExp("EXPOS.{1,2}[\\n\\s]DES[\\n\\s]MOTIFS");
|
|
149
|
-
for (const sectionElement of sectionElements) {
|
|
150
|
-
const firstParagraph = sectionElement.querySelector("p:first-of-type");
|
|
151
|
-
const secondParagraph = sectionElement.querySelector("p:nth-of-type(2)");
|
|
152
|
-
if (!firstParagraph) {
|
|
153
|
-
continue;
|
|
154
|
-
}
|
|
155
|
-
const firstParagraphContent = firstParagraph.textContent;
|
|
156
|
-
const secondParagraphContent = secondParagraph?.textContent;
|
|
157
|
-
if (!firstParagraphContent || !exposeDesMotifsRegexp.test(firstParagraphContent.toUpperCase())) {
|
|
158
|
-
if (!secondParagraphContent || !exposeDesMotifsRegexp.test(secondParagraphContent.toUpperCase())) {
|
|
159
|
-
continue;
|
|
160
|
-
}
|
|
161
|
-
else {
|
|
162
|
-
sectionElement.removeChild(secondParagraph);
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
sectionElement.removeChild(firstParagraph);
|
|
166
|
-
return {
|
|
167
|
-
text: sectionElement.textContent?.trim() ?? null,
|
|
168
|
-
html: sectionElement.innerHTML?.trim() ?? null,
|
|
169
|
-
};
|
|
170
|
-
}
|
|
171
|
-
return null;
|
|
172
|
-
}
|
|
173
|
-
export function parseTexte(texteXml) {
|
|
174
|
-
try {
|
|
175
|
-
const { document } = (new JSDOM(texteXml, {
|
|
176
|
-
contentType: "text/xml",
|
|
177
|
-
})).window;
|
|
178
|
-
return transformTexte(document);
|
|
179
|
-
}
|
|
180
|
-
catch (error) {
|
|
181
|
-
console.error(`Could not parse texte with error ${error}`);
|
|
182
|
-
}
|
|
183
|
-
return null;
|
|
184
|
-
}
|
|
185
|
-
// Prevent from memory leak
|
|
186
|
-
// https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
|
|
187
|
-
export async function parseTexteFromFile(xmlFilePath) {
|
|
188
|
-
try {
|
|
189
|
-
const { document } = (await JSDOM.fromFile(xmlFilePath, { contentType: "text/xml" })).window;
|
|
190
|
-
return transformTexte(document);
|
|
191
|
-
}
|
|
192
|
-
catch (error) {
|
|
193
|
-
console.error(`Could not parse texte with error ${error}`);
|
|
194
|
-
}
|
|
195
|
-
return null;
|
|
196
|
-
}
|
|
197
|
-
export function parseExposeDesMotifs(exposeDesMotifsHtml) {
|
|
198
|
-
const { document } = (new JSDOM(exposeDesMotifsHtml, {
|
|
199
|
-
contentType: "text/html",
|
|
200
|
-
})).window;
|
|
201
|
-
return transformExposeDesMotifs(document);
|
|
202
|
-
}
|
|
203
|
-
// Prevent from memory leak
|
|
204
|
-
// https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
|
|
205
|
-
export async function parseExposeDesMotifsFromFile(htmlFilePath) {
|
|
206
|
-
const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window;
|
|
207
|
-
return transformExposeDesMotifs(document);
|
|
208
|
-
}
|
package/lib/model/texte.ts
DELETED
|
@@ -1,229 +0,0 @@
|
|
|
1
|
-
import { JSDOM } from "jsdom"
|
|
2
|
-
|
|
3
|
-
import { AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP, AKN_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil"
|
|
4
|
-
import {
|
|
5
|
-
Alinea,
|
|
6
|
-
Article,
|
|
7
|
-
Division,
|
|
8
|
-
DivisionContent,
|
|
9
|
-
DivisionTag,
|
|
10
|
-
DivisionType, ExposeDesMotifs,
|
|
11
|
-
FlatTexte, Step,
|
|
12
|
-
Version,
|
|
13
|
-
} from "../types/texte"
|
|
14
|
-
|
|
15
|
-
function buildWorklow (metaElement: HTMLMetaElement): Step[] {
|
|
16
|
-
const stepElements = metaElement.querySelectorAll("workflow step")
|
|
17
|
-
const steps: Step[] = []
|
|
18
|
-
for (const stepElement of stepElements) {
|
|
19
|
-
const identification = stepElement.getAttribute("href") ?? ""
|
|
20
|
-
const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups
|
|
21
|
-
steps.push({
|
|
22
|
-
eId: stepElement.getAttribute("eId")!,
|
|
23
|
-
date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
|
|
24
|
-
type: identificationParts?.type || null,
|
|
25
|
-
session: identificationParts?.session || null,
|
|
26
|
-
numero: identificationParts?.numTexte || null,
|
|
27
|
-
version: identificationParts?.version ? identificationParts.version as Version : null,
|
|
28
|
-
outcome: stepElement.getAttribute("outcome"),
|
|
29
|
-
})
|
|
30
|
-
}
|
|
31
|
-
return steps
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
function buildDivision (node: Node, index: number): Division {
|
|
35
|
-
const eId = (node as Element).getAttribute("eId")!
|
|
36
|
-
const tag = node.nodeName as DivisionTag
|
|
37
|
-
const level = DivisionType[tag]
|
|
38
|
-
const titleNode = (node as Element).querySelector("num")
|
|
39
|
-
const subtitleNode = (node as Element).querySelector("heading")
|
|
40
|
-
const headings: DivisionContent[] = [
|
|
41
|
-
...(titleNode ? [{
|
|
42
|
-
text: titleNode.textContent?.trim() ?? null,
|
|
43
|
-
html: titleNode.innerHTML?.trim() ?? null,
|
|
44
|
-
}] : []),
|
|
45
|
-
...(subtitleNode ? [{
|
|
46
|
-
text: subtitleNode.textContent?.trim() ?? null,
|
|
47
|
-
html: subtitleNode.innerHTML?.trim() ?? null,
|
|
48
|
-
}] : []),
|
|
49
|
-
]
|
|
50
|
-
const division = {
|
|
51
|
-
index,
|
|
52
|
-
eId,
|
|
53
|
-
tag,
|
|
54
|
-
level,
|
|
55
|
-
headings,
|
|
56
|
-
}
|
|
57
|
-
if (tag === "article") {
|
|
58
|
-
(division as Article).alineas = []
|
|
59
|
-
}
|
|
60
|
-
return division
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
function buildAlinea (contentNode: Node, alineaNode: Node): Alinea {
|
|
64
|
-
const eId = (alineaNode as Element).getAttribute("eId")!
|
|
65
|
-
const heading = {
|
|
66
|
-
text: (alineaNode as Element).querySelector("num")?.textContent ?? null,
|
|
67
|
-
}
|
|
68
|
-
const pastille = (alineaNode as Element).getAttribute("data:pastille") ?? null
|
|
69
|
-
return {
|
|
70
|
-
eId,
|
|
71
|
-
heading,
|
|
72
|
-
text: (contentNode as Element).textContent?.trim() ?? null,
|
|
73
|
-
html: (contentNode as Element).innerHTML?.trim() ?? null,
|
|
74
|
-
pastille,
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
function buildEmptyArticle (index: number): Article {
|
|
79
|
-
return {
|
|
80
|
-
index: index,
|
|
81
|
-
eId: "",
|
|
82
|
-
tag: "article",
|
|
83
|
-
level: DivisionType["article"],
|
|
84
|
-
headings: [],
|
|
85
|
-
alineas: [],
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
function flattenTexte (texteContentRoot: Node): Division[] {
|
|
90
|
-
const divisions: Division[] = []
|
|
91
|
-
let divisionIndex = 0
|
|
92
|
-
const iter = (node: Node) => {
|
|
93
|
-
if (node.nodeName === "content") {
|
|
94
|
-
return
|
|
95
|
-
}
|
|
96
|
-
switch (node.nodeName) {
|
|
97
|
-
case "tome":
|
|
98
|
-
case "part":
|
|
99
|
-
case "book":
|
|
100
|
-
case "title":
|
|
101
|
-
case "subtitle":
|
|
102
|
-
case "chapter":
|
|
103
|
-
case "section":
|
|
104
|
-
case "subsection":
|
|
105
|
-
case "paragraph":
|
|
106
|
-
case "article":
|
|
107
|
-
divisions.push(buildDivision(node, divisionIndex++))
|
|
108
|
-
break
|
|
109
|
-
}
|
|
110
|
-
if (node.nodeName === "alinea") {
|
|
111
|
-
Array.from(node.childNodes)
|
|
112
|
-
// Find direct content children programmatically
|
|
113
|
-
// because `:scope` selector does not work
|
|
114
|
-
// https://github.com/jsdom/jsdom/issues/2998
|
|
115
|
-
.filter((alineaChildNode: Node) => alineaChildNode.nodeName === "content")
|
|
116
|
-
.forEach((alineaContentNode: Node) => {
|
|
117
|
-
// Hypothesis: alineas should always be enclosed in articles
|
|
118
|
-
let lastArticle = divisions.findLast(division => division.tag === "article") as Article
|
|
119
|
-
if (!lastArticle) {
|
|
120
|
-
lastArticle = buildEmptyArticle(divisionIndex++)
|
|
121
|
-
divisions.push(lastArticle)
|
|
122
|
-
}
|
|
123
|
-
lastArticle.alineas.push(buildAlinea(alineaContentNode, node))
|
|
124
|
-
})
|
|
125
|
-
}
|
|
126
|
-
if (node.hasChildNodes()) {
|
|
127
|
-
node.childNodes.forEach((childNode: Node) => iter(childNode))
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
iter(texteContentRoot)
|
|
131
|
-
return divisions
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
export function transformTexte (document: Document): FlatTexte | null {
|
|
135
|
-
const metaElement = document.querySelector("meta")
|
|
136
|
-
const preambleElement = document.querySelector("preamble")
|
|
137
|
-
const identification = metaElement?.querySelector("FRBRExpression FRBRuri")?.getAttribute("value") ?? ""
|
|
138
|
-
const identificationParts = AKN_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups
|
|
139
|
-
const bodyElement = document.querySelector("body")
|
|
140
|
-
const datePresentation = metaElement?.querySelector("FRBRdate[name='#presentation']")?.getAttribute("date")
|
|
141
|
-
const dateDepot = metaElement?.querySelector("FRBRdate[name='#depot']")?.getAttribute("date")
|
|
142
|
-
const datePublicationXml = metaElement?.querySelector("FRBRdate[name='#publication-xml']")?.getAttribute("date")
|
|
143
|
-
return {
|
|
144
|
-
titre: preambleElement?.querySelector("docTitle")?.textContent || null,
|
|
145
|
-
titreCourt: metaElement?.querySelector("FRBRalias[name='intitule-court']")
|
|
146
|
-
?.getAttribute("value") || null,
|
|
147
|
-
signetDossier:
|
|
148
|
-
metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")
|
|
149
|
-
?.getAttribute("value") || null,
|
|
150
|
-
urlDossierSenat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
|
|
151
|
-
urlDossierAssemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
|
|
152
|
-
type: identificationParts?.type || null,
|
|
153
|
-
session: identificationParts?.session || null,
|
|
154
|
-
numero: identificationParts?.numTexte ? parseInt(identificationParts.numTexte) : null,
|
|
155
|
-
datePresentation: datePresentation ? new Date(datePresentation) : null,
|
|
156
|
-
dateDepot: dateDepot ? new Date(dateDepot) : null,
|
|
157
|
-
datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
|
|
158
|
-
version: identificationParts?.version ? identificationParts.version as Version : null,
|
|
159
|
-
workflow: metaElement ? buildWorklow(metaElement) : [],
|
|
160
|
-
divisions: bodyElement ? flattenTexte(bodyElement) : [],
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
export function transformExposeDesMotifs (document: Document): ExposeDesMotifs | null {
|
|
165
|
-
const sectionElements = document.querySelectorAll("section")
|
|
166
|
-
const exposeDesMotifsRegexp = new RegExp("EXPOS.{1,2}[\\n\\s]DES[\\n\\s]MOTIFS")
|
|
167
|
-
for (const sectionElement of sectionElements) {
|
|
168
|
-
const firstParagraph = sectionElement.querySelector("p:first-of-type")
|
|
169
|
-
const secondParagraph = sectionElement.querySelector("p:nth-of-type(2)")
|
|
170
|
-
if (!firstParagraph) {
|
|
171
|
-
continue
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
const firstParagraphContent = firstParagraph.textContent
|
|
175
|
-
const secondParagraphContent = secondParagraph?.textContent
|
|
176
|
-
if (!firstParagraphContent || !exposeDesMotifsRegexp.test(firstParagraphContent.toUpperCase())) {
|
|
177
|
-
if (!secondParagraphContent || !exposeDesMotifsRegexp.test(secondParagraphContent.toUpperCase())) {
|
|
178
|
-
continue
|
|
179
|
-
} else {
|
|
180
|
-
sectionElement.removeChild(secondParagraph)
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
sectionElement.removeChild(firstParagraph)
|
|
185
|
-
return {
|
|
186
|
-
text: sectionElement.textContent?.trim() ?? null,
|
|
187
|
-
html: sectionElement.innerHTML?.trim() ?? null,
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
return null
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
export function parseTexte (texteXml: string): FlatTexte | null {
|
|
194
|
-
try {
|
|
195
|
-
const { document } = (new JSDOM(texteXml, {
|
|
196
|
-
contentType: "text/xml",
|
|
197
|
-
})).window
|
|
198
|
-
return transformTexte(document)
|
|
199
|
-
} catch (error: any) {
|
|
200
|
-
console.error(`Could not parse texte with error ${error}`)
|
|
201
|
-
}
|
|
202
|
-
return null
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
// Prevent from memory leak
|
|
206
|
-
// https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
|
|
207
|
-
export async function parseTexteFromFile (xmlFilePath: string): Promise<FlatTexte | null> {
|
|
208
|
-
try {
|
|
209
|
-
const { document } = (await JSDOM.fromFile(xmlFilePath, { contentType: "text/xml" })).window
|
|
210
|
-
return transformTexte(document)
|
|
211
|
-
} catch (error: any) {
|
|
212
|
-
console.error(`Could not parse texte with error ${error}`)
|
|
213
|
-
}
|
|
214
|
-
return null
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
export function parseExposeDesMotifs (exposeDesMotifsHtml: string): ExposeDesMotifs | null {
|
|
218
|
-
const { document } = (new JSDOM(exposeDesMotifsHtml, {
|
|
219
|
-
contentType: "text/html",
|
|
220
|
-
})).window
|
|
221
|
-
return transformExposeDesMotifs(document)
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
// Prevent from memory leak
|
|
225
|
-
// https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
|
|
226
|
-
export async function parseExposeDesMotifsFromFile (htmlFilePath: string): Promise<ExposeDesMotifs | null> {
|
|
227
|
-
const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window
|
|
228
|
-
return transformExposeDesMotifs(document)
|
|
229
|
-
}
|
package/lib/model/util.mjs
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import { sql } from "kysely";
|
|
2
|
-
export function concat(...exprs) {
|
|
3
|
-
return sql.join(exprs, sql `||`).$castTo();
|
|
4
|
-
}
|
|
5
|
-
export function expandToRows(expr, regexp) {
|
|
6
|
-
return sql `unnest(regexp_matches(${expr}, ${regexp}, 'g'))`;
|
|
7
|
-
}
|
|
8
|
-
export function removeSubstring(expr, pattern) {
|
|
9
|
-
return replace(expr, pattern, sql.val(""));
|
|
10
|
-
}
|
|
11
|
-
export function replace(expr, pattern, replacement) {
|
|
12
|
-
return sql `replace(${expr}, ${pattern}, ${replacement})`;
|
|
13
|
-
}
|
|
14
|
-
export function rtrim(expr) {
|
|
15
|
-
return sql `rtrim(${expr})`;
|
|
16
|
-
}
|
|
17
|
-
export function toDateString(expr) {
|
|
18
|
-
return sql `to_char(${expr}, 'yyyy-MM-dd')`;
|
|
19
|
-
}
|
package/lib/model/util.ts
DELETED
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import { Expression, sql } from "kysely"
|
|
2
|
-
|
|
3
|
-
export function concat (...exprs: Expression<number | string | null | undefined>[]) {
|
|
4
|
-
return sql.join(exprs, sql`||`).$castTo<string>()
|
|
5
|
-
}
|
|
6
|
-
|
|
7
|
-
export function expandToRows (expr: Expression<string | null | undefined>, regexp: Expression<string>) {
|
|
8
|
-
return sql`unnest(regexp_matches(${expr}, ${regexp}, 'g'))`
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export function removeSubstring (
|
|
12
|
-
expr: Expression<string | null | undefined>,
|
|
13
|
-
pattern: Expression<string>
|
|
14
|
-
) {
|
|
15
|
-
return replace(expr, pattern, sql.val(""))
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
export function replace (
|
|
19
|
-
expr: Expression<string | null | undefined>,
|
|
20
|
-
pattern: Expression<string>,
|
|
21
|
-
replacement: Expression<string>
|
|
22
|
-
) {
|
|
23
|
-
return sql<string>`replace(${expr}, ${pattern}, ${replacement})`
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
export function rtrim (expr: Expression<string | null | undefined>) {
|
|
27
|
-
return sql<string>`rtrim(${expr})`
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
export function toDateString (expr: Expression<Date | null | undefined>) {
|
|
31
|
-
return sql<string>`to_char(${expr}, 'yyyy-MM-dd')`
|
|
32
|
-
}
|