@tricoteuses/senat 2.21.5 → 2.21.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/src/conversion_textes.d.ts +11 -0
- package/lib/src/conversion_textes.js +307 -0
- package/lib/src/loaders.js +1 -1
- package/lib/src/parsers/texte.js +7 -7
- package/lib/src/scripts/convert_data.js +1 -1
- package/lib/src/scripts/convert_xml_to_html.d.ts +1 -0
- package/lib/src/scripts/convert_xml_to_html.js +62 -0
- package/lib/src/scripts/retrieve_documents.js +1 -1
- package/lib/src/scripts/test_iter_load.js +4 -4
- package/lib/src/types/texte.d.ts +8 -8
- package/package.json +1 -1
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface SenatMetadata {
|
|
2
|
+
number: string | null;
|
|
3
|
+
session: string | null;
|
|
4
|
+
date: string | null;
|
|
5
|
+
type: string | null;
|
|
6
|
+
authors: string | null;
|
|
7
|
+
title: string | null;
|
|
8
|
+
commission: string | null;
|
|
9
|
+
}
|
|
10
|
+
export declare function extractMetadata(xmlDoc: Document): SenatMetadata;
|
|
11
|
+
export declare function convertSenatXmlToHtml(texteXml: string, outputFilePath: string): Promise<void>;
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
import { JSDOM } from "jsdom";
|
|
2
|
+
import fs from "fs-extra";
|
|
3
|
+
import path from "path";
|
|
4
|
+
import { DateTime } from "luxon";
|
|
5
|
+
export function extractMetadata(xmlDoc) {
|
|
6
|
+
const metadata = {
|
|
7
|
+
number: null,
|
|
8
|
+
session: null,
|
|
9
|
+
date: null,
|
|
10
|
+
type: null,
|
|
11
|
+
authors: null,
|
|
12
|
+
title: xmlDoc.querySelector("docTitle")?.textContent?.trim() || null,
|
|
13
|
+
commission: null,
|
|
14
|
+
};
|
|
15
|
+
// Extract Number
|
|
16
|
+
const docIdAlias = xmlDoc.querySelector('FRBRalias[name="signet-dossier-legislatif-senat"]');
|
|
17
|
+
if (docIdAlias) {
|
|
18
|
+
const value = docIdAlias.getAttribute("value");
|
|
19
|
+
if (value) {
|
|
20
|
+
const match = value.match(/\d+$/);
|
|
21
|
+
if (match)
|
|
22
|
+
metadata.number = match[0];
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
// Extract Session
|
|
26
|
+
const sessionUri = xmlDoc.querySelector("FRBRExpression > FRBRuri")?.getAttribute("value");
|
|
27
|
+
if (sessionUri) {
|
|
28
|
+
const match = sessionUri.match(/\d{4}-\d{4}/);
|
|
29
|
+
if (match)
|
|
30
|
+
metadata.session = match[0];
|
|
31
|
+
}
|
|
32
|
+
// Extract Date
|
|
33
|
+
const depotDate = xmlDoc.querySelector('FRBRdate[name="#depot"]')?.getAttribute("date");
|
|
34
|
+
if (depotDate) {
|
|
35
|
+
metadata.date = DateTime.fromISO(depotDate).setLocale("fr").toFormat("d MMMM yyyy");
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
const presentationDate = xmlDoc.querySelector('FRBRdate[name="#presentation"]')?.getAttribute("date");
|
|
39
|
+
if (presentationDate) {
|
|
40
|
+
metadata.date = DateTime.fromISO(presentationDate).setLocale("fr").toFormat("d MMMM yyyy");
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
// Extract Type
|
|
44
|
+
const bill = xmlDoc.querySelector("bill");
|
|
45
|
+
const typeCode = bill?.getAttribute("name");
|
|
46
|
+
if (typeCode === "ppl") {
|
|
47
|
+
metadata.type = "PROPOSITION DE LOI";
|
|
48
|
+
}
|
|
49
|
+
else if (typeCode === "pjl") {
|
|
50
|
+
metadata.type = "PROJET DE LOI";
|
|
51
|
+
}
|
|
52
|
+
// Extract Authors
|
|
53
|
+
const authorRef = xmlDoc.querySelector('FRBRWork > FRBRauthor[as="#auteur"]')?.getAttribute("href");
|
|
54
|
+
if (authorRef) {
|
|
55
|
+
const authorId = authorRef.replace(/^#/, "");
|
|
56
|
+
const authorPerson = xmlDoc.querySelector(`TLCPerson[eId="${authorId}"]`);
|
|
57
|
+
if (authorPerson) {
|
|
58
|
+
const showAs = authorPerson.getAttribute("showAs");
|
|
59
|
+
if (showAs) {
|
|
60
|
+
metadata.authors = showAs.replace(/, Sénateurs$/, ", Sénateurs et Sénatrices");
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
// Extract Commission
|
|
65
|
+
const commissionNode = xmlDoc.querySelector('TLCOrganization[eId="commission-senat"]') ||
|
|
66
|
+
xmlDoc.querySelector('TLCOrganization[eId^="commission-"]:not([eId*="assemblee"])');
|
|
67
|
+
if (commissionNode) {
|
|
68
|
+
metadata.commission = commissionNode.getAttribute("showAs");
|
|
69
|
+
}
|
|
70
|
+
return metadata;
|
|
71
|
+
}
|
|
72
|
+
export async function convertSenatXmlToHtml(texteXml, outputFilePath) {
|
|
73
|
+
const { document: xmlDoc } = new JSDOM(texteXml, { contentType: "text/xml" }).window;
|
|
74
|
+
const metadata = extractMetadata(xmlDoc);
|
|
75
|
+
const xmlBody = xmlDoc.querySelector("body");
|
|
76
|
+
const style = `
|
|
77
|
+
body {
|
|
78
|
+
font-family: "URW Bookman", "Bookman Old Style", serif;
|
|
79
|
+
max-width: 800px;
|
|
80
|
+
margin: 40px auto;
|
|
81
|
+
line-height: 1.5;
|
|
82
|
+
color: #333;
|
|
83
|
+
}
|
|
84
|
+
.header {
|
|
85
|
+
text-align: center;
|
|
86
|
+
margin-bottom: 40px;
|
|
87
|
+
border-bottom: 2px solid #333;
|
|
88
|
+
padding-bottom: 20px;
|
|
89
|
+
}
|
|
90
|
+
.header-top {
|
|
91
|
+
font-weight: bold;
|
|
92
|
+
font-size: 1.2em;
|
|
93
|
+
margin-bottom: 10px;
|
|
94
|
+
}
|
|
95
|
+
.header-session {
|
|
96
|
+
text-transform: uppercase;
|
|
97
|
+
font-size: 0.9em;
|
|
98
|
+
margin-bottom: 5px;
|
|
99
|
+
}
|
|
100
|
+
.header-date {
|
|
101
|
+
font-size: 0.9em;
|
|
102
|
+
margin-bottom: 5px;
|
|
103
|
+
}
|
|
104
|
+
.header-number {
|
|
105
|
+
font-weight: bold;
|
|
106
|
+
font-size: 1.1em;
|
|
107
|
+
margin-bottom: 20px;
|
|
108
|
+
}
|
|
109
|
+
.header-type {
|
|
110
|
+
font-weight: bold;
|
|
111
|
+
font-size: 1.5em;
|
|
112
|
+
margin-top: 20px;
|
|
113
|
+
}
|
|
114
|
+
.header-authors {
|
|
115
|
+
margin-top: 20px;
|
|
116
|
+
font-style: italic;
|
|
117
|
+
}
|
|
118
|
+
.header-commission {
|
|
119
|
+
margin-top: 15px;
|
|
120
|
+
font-size: 0.9em;
|
|
121
|
+
}
|
|
122
|
+
h1 {
|
|
123
|
+
text-align: center;
|
|
124
|
+
font-size: 1.8em;
|
|
125
|
+
margin-top: 10px;
|
|
126
|
+
}
|
|
127
|
+
p {
|
|
128
|
+
margin: 0.6em 0;
|
|
129
|
+
}
|
|
130
|
+
p.has-alinea {
|
|
131
|
+
position: relative;
|
|
132
|
+
padding-left: 2.5em;
|
|
133
|
+
}
|
|
134
|
+
.alinea {
|
|
135
|
+
position: absolute;
|
|
136
|
+
left: 0;
|
|
137
|
+
top: 0.15em;
|
|
138
|
+
display: inline-flex;
|
|
139
|
+
align-items: center;
|
|
140
|
+
justify-content: center;
|
|
141
|
+
min-width: 1.5em;
|
|
142
|
+
height: 1.5em;
|
|
143
|
+
padding: 0 0.3em;
|
|
144
|
+
margin-right: 0.3em;
|
|
145
|
+
font-size: 0.75em;
|
|
146
|
+
font-weight: bold;
|
|
147
|
+
color: #555;
|
|
148
|
+
background-color: #f0f0f0;
|
|
149
|
+
border: 1px solid #ccc;
|
|
150
|
+
border-radius: 1em;
|
|
151
|
+
}
|
|
152
|
+
.num {
|
|
153
|
+
font-weight: bold;
|
|
154
|
+
margin-right: 0.2em;
|
|
155
|
+
}
|
|
156
|
+
.article {
|
|
157
|
+
margin-top: 2em;
|
|
158
|
+
}
|
|
159
|
+
.article h3 {
|
|
160
|
+
border-bottom: 1px solid #eee;
|
|
161
|
+
padding-bottom: 5px;
|
|
162
|
+
}
|
|
163
|
+
`;
|
|
164
|
+
const htmlDocTemplate = `<!DOCTYPE html>
|
|
165
|
+
<html lang="fr">
|
|
166
|
+
<head>
|
|
167
|
+
<meta charset="utf-8">
|
|
168
|
+
<title>${metadata.title || "Document Sénat"}</title>
|
|
169
|
+
<style>${style}</style>
|
|
170
|
+
</head>
|
|
171
|
+
<body>
|
|
172
|
+
<div class="header">
|
|
173
|
+
<div class="header-top">SÉNAT</div>
|
|
174
|
+
<div class="header-session">SESSION ORDINAIRE DE ${metadata.session || "...."}</div>
|
|
175
|
+
${metadata.date ? `<div class="header-date">Enregistré à la Présidence du Sénat le ${metadata.date}</div>` : ""}
|
|
176
|
+
<div class="header-number">N° ${metadata.number || "...."}</div>
|
|
177
|
+
<div class="header-type">${metadata.type || ""}</div>
|
|
178
|
+
<div class="header-authors">${metadata.authors || ""}</div>
|
|
179
|
+
${metadata.commission
|
|
180
|
+
? `<div class="header-commission">Envoyée à la ${metadata.commission.toLowerCase()}, sous réserve de la constitution éventuelle d'une commission spéciale dans les conditions prévues par le Règlement.</div>`
|
|
181
|
+
: ""}
|
|
182
|
+
</div>
|
|
183
|
+
<h1>${metadata.title || ""}</h1>
|
|
184
|
+
</body>
|
|
185
|
+
</html>`;
|
|
186
|
+
const { document: htmlDoc } = new JSDOM(htmlDocTemplate).window;
|
|
187
|
+
const body = htmlDoc.body;
|
|
188
|
+
if (xmlBody) {
|
|
189
|
+
const processNode = (xmlNode, htmlParent, alineaData = null) => {
|
|
190
|
+
const children = Array.from(xmlNode.childNodes);
|
|
191
|
+
const alineaChildren = [];
|
|
192
|
+
const otherChildren = [];
|
|
193
|
+
for (const child of children) {
|
|
194
|
+
if (child.nodeType === 1 && child.tagName.toLowerCase() === "alinea") {
|
|
195
|
+
alineaChildren.push(child);
|
|
196
|
+
}
|
|
197
|
+
else {
|
|
198
|
+
otherChildren.push(child);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
for (const child of otherChildren) {
|
|
202
|
+
if (child.nodeType === 3) {
|
|
203
|
+
htmlParent.appendChild(htmlDoc.createTextNode(child.textContent || ""));
|
|
204
|
+
}
|
|
205
|
+
else if (child.nodeType === 1) {
|
|
206
|
+
const element = child;
|
|
207
|
+
const tagName = element.tagName.toLowerCase();
|
|
208
|
+
let htmlElement = null;
|
|
209
|
+
switch (tagName) {
|
|
210
|
+
case "article": {
|
|
211
|
+
htmlElement = htmlDoc.createElement("div");
|
|
212
|
+
htmlElement.className = "article";
|
|
213
|
+
const artId = element.getAttribute("eId");
|
|
214
|
+
if (artId)
|
|
215
|
+
htmlElement.id = artId;
|
|
216
|
+
const artGuid = element.getAttribute("GUID");
|
|
217
|
+
if (artGuid)
|
|
218
|
+
htmlElement.setAttribute("data-guid", artGuid);
|
|
219
|
+
break;
|
|
220
|
+
}
|
|
221
|
+
case "num": {
|
|
222
|
+
const parentTagName = element.parentElement?.tagName.toLowerCase();
|
|
223
|
+
if (parentTagName === "alinea" && alineaData) {
|
|
224
|
+
alineaData.numText = element.textContent?.trim();
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
htmlElement = htmlDoc.createElement("span");
|
|
228
|
+
htmlElement.className = "num";
|
|
229
|
+
break;
|
|
230
|
+
}
|
|
231
|
+
case "heading":
|
|
232
|
+
htmlElement = htmlDoc.createElement("h4");
|
|
233
|
+
break;
|
|
234
|
+
case "p":
|
|
235
|
+
htmlElement = htmlDoc.createElement("p");
|
|
236
|
+
if (alineaData) {
|
|
237
|
+
htmlElement.classList.add("has-alinea");
|
|
238
|
+
if (alineaData.id)
|
|
239
|
+
htmlElement.id = alineaData.id;
|
|
240
|
+
if (alineaData.guid)
|
|
241
|
+
htmlElement.setAttribute("data-guid", alineaData.guid);
|
|
242
|
+
const pastille = alineaData.pastille;
|
|
243
|
+
if (pastille) {
|
|
244
|
+
htmlElement.setAttribute("data-pastille", pastille);
|
|
245
|
+
if (!alineaData.pastilleApplied) {
|
|
246
|
+
const span = htmlDoc.createElement("span");
|
|
247
|
+
span.className = "alinea";
|
|
248
|
+
span.setAttribute("data-alinea", pastille);
|
|
249
|
+
span.textContent = pastille;
|
|
250
|
+
htmlElement.appendChild(span);
|
|
251
|
+
alineaData.pastilleApplied = true;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
if (alineaData.numText) {
|
|
255
|
+
const xmlPText = element.textContent || "";
|
|
256
|
+
const normalize = (s) => s.replace(/[\\s\\u00A0]+/g, " ").trim();
|
|
257
|
+
const normalizedNum = normalize(alineaData.numText);
|
|
258
|
+
const normalizedP = normalize(xmlPText);
|
|
259
|
+
if (normalizedNum && !normalizedP.startsWith(normalizedNum)) {
|
|
260
|
+
const numSpan = htmlDoc.createElement("span");
|
|
261
|
+
numSpan.className = "num";
|
|
262
|
+
numSpan.textContent = alineaData.numText + " ";
|
|
263
|
+
htmlElement.appendChild(numSpan);
|
|
264
|
+
}
|
|
265
|
+
alineaData.numText = null;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
break;
|
|
269
|
+
case "content":
|
|
270
|
+
processNode(element, htmlParent, alineaData);
|
|
271
|
+
continue;
|
|
272
|
+
case "doctitle":
|
|
273
|
+
continue;
|
|
274
|
+
case "i":
|
|
275
|
+
case "b":
|
|
276
|
+
case "u":
|
|
277
|
+
case "sup":
|
|
278
|
+
case "sub":
|
|
279
|
+
htmlElement = htmlDoc.createElement(tagName);
|
|
280
|
+
break;
|
|
281
|
+
default:
|
|
282
|
+
htmlElement = htmlDoc.createElement("span");
|
|
283
|
+
htmlElement.setAttribute("data-xml-tag", tagName);
|
|
284
|
+
break;
|
|
285
|
+
}
|
|
286
|
+
if (htmlElement) {
|
|
287
|
+
htmlParent.appendChild(htmlElement);
|
|
288
|
+
processNode(element, htmlElement, alineaData);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
for (const element of alineaChildren) {
|
|
293
|
+
const nextAlineaData = {
|
|
294
|
+
id: element.getAttribute("eId"),
|
|
295
|
+
guid: element.getAttribute("GUID"),
|
|
296
|
+
pastille: element.getAttribute("data:pastille"),
|
|
297
|
+
pastilleApplied: false,
|
|
298
|
+
};
|
|
299
|
+
processNode(element, htmlParent, nextAlineaData);
|
|
300
|
+
}
|
|
301
|
+
};
|
|
302
|
+
processNode(xmlBody, body);
|
|
303
|
+
}
|
|
304
|
+
const htmlContent = "<!DOCTYPE html>\n" + htmlDoc.documentElement.outerHTML;
|
|
305
|
+
await fs.ensureDir(path.dirname(outputFilePath));
|
|
306
|
+
await fs.outputFile(outputFilePath, htmlContent);
|
|
307
|
+
}
|
package/lib/src/loaders.js
CHANGED
|
@@ -199,7 +199,7 @@ export function* iterLoadSenatTextes(dataDir, session, options = {}) {
|
|
|
199
199
|
const texteId = texte["id"];
|
|
200
200
|
const { item: texteContent } = loadSenatTexteContent(dataDir, texte["session"], texteId);
|
|
201
201
|
if (texteContent) {
|
|
202
|
-
texteItem.item
|
|
202
|
+
Object.assign(texteItem.item, texteContent);
|
|
203
203
|
}
|
|
204
204
|
yield texteItem;
|
|
205
205
|
}
|
package/lib/src/parsers/texte.js
CHANGED
|
@@ -118,16 +118,16 @@ export function transformTexte(document) {
|
|
|
118
118
|
const datePublicationXml = metaElement?.querySelector("FRBRdate[name='#publication-xml']")?.getAttribute("date");
|
|
119
119
|
return {
|
|
120
120
|
titre: preambleElement?.querySelector("docTitle")?.textContent || null,
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
121
|
+
titre_court: metaElement?.querySelector("FRBRalias[name='intitule-court']")?.getAttribute("value") || null,
|
|
122
|
+
signet_dossier: metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")?.getAttribute("value") || null,
|
|
123
|
+
url_dossier_senat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
|
|
124
|
+
url_dossier_assemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
|
|
125
125
|
type: identificationParts?.["type"] || null,
|
|
126
126
|
session: sessionYears && sessionYears.length > 0 ? sessionYears[0] : null,
|
|
127
127
|
numero: identificationParts?.["numTexte"] ? parseInt(identificationParts["numTexte"]) : null,
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
128
|
+
date_presentation: datePresentation ? new Date(datePresentation) : null,
|
|
129
|
+
date_depot: dateDepot ? new Date(dateDepot) : null,
|
|
130
|
+
date_publication_xml: datePublicationXml ? new Date(datePublicationXml) : null,
|
|
131
131
|
version: identificationParts?.["version"] ? identificationParts["version"] : null,
|
|
132
132
|
divisions: bodyElement ? flattenTexte(bodyElement) : [],
|
|
133
133
|
};
|
|
@@ -234,7 +234,7 @@ async function convertTextes(dataDir, options) {
|
|
|
234
234
|
const texteName = path.parse(texte["url"]).name;
|
|
235
235
|
const texteDir = path.join(originalTextesDir, `${session}`, texteName);
|
|
236
236
|
// oritxtcod = 1 corresponds to "Texte de loi déposé au Sénat"
|
|
237
|
-
const hasExposeDesMotifs = texte["origine"] === "Sénat"
|
|
237
|
+
const hasExposeDesMotifs = texte["origine"] === "déposé au Sénat" || texte["origine"] === "transmis au Sénat";
|
|
238
238
|
const metadata = {
|
|
239
239
|
name: texteName,
|
|
240
240
|
session: texte["session"],
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import fs from "fs-extra";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import commandLineArgs from "command-line-args";
|
|
4
|
+
import { convertSenatXmlToHtml } from "../conversion_textes";
|
|
5
|
+
const optionDefinitions = [
|
|
6
|
+
{ name: "input", alias: "i", type: String, defaultOption: true },
|
|
7
|
+
{ name: "output", alias: "o", type: String },
|
|
8
|
+
{ name: "help", alias: "h", type: Boolean },
|
|
9
|
+
];
|
|
10
|
+
async function main() {
|
|
11
|
+
let options;
|
|
12
|
+
try {
|
|
13
|
+
options = commandLineArgs(optionDefinitions, { stopAtFirstUnknown: true });
|
|
14
|
+
}
|
|
15
|
+
catch (err) {
|
|
16
|
+
console.error(`Error: ${err.message}`);
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
// Handle positional arguments if not using flags
|
|
20
|
+
const argv = options["_unknown"] || [];
|
|
21
|
+
if (!options["output"] && argv.length > 0) {
|
|
22
|
+
options["output"] = argv[0];
|
|
23
|
+
}
|
|
24
|
+
if (options["help"] || !options["input"]) {
|
|
25
|
+
console.log("Usage: npx tsx src/scripts/convert_xml_to_html.ts <input_xml_path> [output_html_path]");
|
|
26
|
+
console.log("Options:");
|
|
27
|
+
console.log(" -i, --input <path> Input XML file path (default option)");
|
|
28
|
+
console.log(" -o, --output <path> Output HTML file path");
|
|
29
|
+
console.log(" -h, --help Show this help message");
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
const inputPath = path.resolve(options["input"]);
|
|
33
|
+
let outputPath = options["output"];
|
|
34
|
+
if (!outputPath) {
|
|
35
|
+
outputPath = inputPath.replace(/\.xml$/, ".html");
|
|
36
|
+
if (outputPath === inputPath) {
|
|
37
|
+
outputPath += ".html";
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
outputPath = path.resolve(outputPath);
|
|
42
|
+
}
|
|
43
|
+
if (!fs.existsSync(inputPath)) {
|
|
44
|
+
console.error(`Error: Input file not found: ${inputPath}`);
|
|
45
|
+
process.exit(1);
|
|
46
|
+
}
|
|
47
|
+
try {
|
|
48
|
+
const xmlContent = await fs.readFile(inputPath, "utf-8");
|
|
49
|
+
await convertSenatXmlToHtml(xmlContent, outputPath);
|
|
50
|
+
console.log("Successfully converted:");
|
|
51
|
+
console.log(` From: ${inputPath}`);
|
|
52
|
+
console.log(` To: ${outputPath}`);
|
|
53
|
+
}
|
|
54
|
+
catch (error) {
|
|
55
|
+
console.error(`Error during conversion: ${error.message}`);
|
|
56
|
+
process.exit(1);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
main().catch((error) => {
|
|
60
|
+
console.error(error);
|
|
61
|
+
process.exit(1);
|
|
62
|
+
});
|
|
@@ -214,7 +214,7 @@ async function parseDocument(session, transformedTextesDir, textePath, texteName
|
|
|
214
214
|
console.log("Parsing exposé des motifs…");
|
|
215
215
|
}
|
|
216
216
|
const exposeDesMotifsHtml = textDecoder.decode(exposeDesMotifs);
|
|
217
|
-
parsedTexte.
|
|
217
|
+
parsedTexte.expose_motifs = parseExposeDesMotifs(exposeDesMotifsHtml);
|
|
218
218
|
}
|
|
219
219
|
const transformedTexteDir = path.join(transformedTextesDir, `${session ?? UNDEFINED_SESSION}`, texteName);
|
|
220
220
|
await fs.outputJSON(path.join(transformedTexteDir, `${texteName}.json`), parsedTexte, { spaces: 2 });
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { iterLoadSenatTextes, } from "../loaders";
|
|
2
2
|
import commandLineArgs from "command-line-args";
|
|
3
3
|
import { dataDirDefaultOption } from "./shared/cli_helpers";
|
|
4
4
|
const optionsDefinitions = [dataDirDefaultOption];
|
|
5
5
|
const options = commandLineArgs(optionsDefinitions);
|
|
6
|
-
const session =
|
|
6
|
+
const session = 2025;
|
|
7
7
|
const sinceCommit = undefined;
|
|
8
|
-
for (const { item:
|
|
8
|
+
for (const { item: texte } of iterLoadSenatTextes(options["dataDir"], session, {
|
|
9
9
|
sinceCommit: sinceCommit,
|
|
10
10
|
})) {
|
|
11
|
-
console.log(
|
|
11
|
+
console.log(texte);
|
|
12
12
|
}
|
package/lib/src/types/texte.d.ts
CHANGED
|
@@ -24,19 +24,19 @@ export interface DocumentMetadata {
|
|
|
24
24
|
}
|
|
25
25
|
export interface FlatTexte {
|
|
26
26
|
titre: string | null;
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
27
|
+
titre_court: string | null;
|
|
28
|
+
signet_dossier: string | null;
|
|
29
|
+
url_dossier_senat: string | null;
|
|
30
|
+
url_dossier_assemblee: string | null;
|
|
31
31
|
type: string | null;
|
|
32
32
|
session: string | null;
|
|
33
33
|
numero: number | null;
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
date_presentation: Date | null;
|
|
35
|
+
date_depot: Date | null;
|
|
36
|
+
date_publication_xml: Date | null;
|
|
37
37
|
version: Version | null;
|
|
38
38
|
divisions: Division[];
|
|
39
|
-
|
|
39
|
+
expose_motifs?: ExposeDesMotifs | null;
|
|
40
40
|
}
|
|
41
41
|
export type Version = "RECT" | "RECT_BIS" | "RECT_TER" | "RECT_QUATER" | "RECT_QUINQUIES";
|
|
42
42
|
export interface Step {
|