@tricoteuses/senat 2.21.5 → 2.21.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ export interface SenatMetadata {
2
+ number: string | null;
3
+ session: string | null;
4
+ date: string | null;
5
+ type: string | null;
6
+ authors: string | null;
7
+ title: string | null;
8
+ commission: string | null;
9
+ }
10
+ export declare function extractMetadata(xmlDoc: Document): SenatMetadata;
11
+ export declare function convertSenatXmlToHtml(texteXml: string, outputFilePath: string): Promise<void>;
@@ -0,0 +1,307 @@
1
+ import { JSDOM } from "jsdom";
2
+ import fs from "fs-extra";
3
+ import path from "path";
4
+ import { DateTime } from "luxon";
5
+ export function extractMetadata(xmlDoc) {
6
+ const metadata = {
7
+ number: null,
8
+ session: null,
9
+ date: null,
10
+ type: null,
11
+ authors: null,
12
+ title: xmlDoc.querySelector("docTitle")?.textContent?.trim() || null,
13
+ commission: null,
14
+ };
15
+ // Extract Number
16
+ const docIdAlias = xmlDoc.querySelector('FRBRalias[name="signet-dossier-legislatif-senat"]');
17
+ if (docIdAlias) {
18
+ const value = docIdAlias.getAttribute("value");
19
+ if (value) {
20
+ const match = value.match(/\d+$/);
21
+ if (match)
22
+ metadata.number = match[0];
23
+ }
24
+ }
25
+ // Extract Session
26
+ const sessionUri = xmlDoc.querySelector("FRBRExpression > FRBRuri")?.getAttribute("value");
27
+ if (sessionUri) {
28
+ const match = sessionUri.match(/\d{4}-\d{4}/);
29
+ if (match)
30
+ metadata.session = match[0];
31
+ }
32
+ // Extract Date
33
+ const depotDate = xmlDoc.querySelector('FRBRdate[name="#depot"]')?.getAttribute("date");
34
+ if (depotDate) {
35
+ metadata.date = DateTime.fromISO(depotDate).setLocale("fr").toFormat("d MMMM yyyy");
36
+ }
37
+ else {
38
+ const presentationDate = xmlDoc.querySelector('FRBRdate[name="#presentation"]')?.getAttribute("date");
39
+ if (presentationDate) {
40
+ metadata.date = DateTime.fromISO(presentationDate).setLocale("fr").toFormat("d MMMM yyyy");
41
+ }
42
+ }
43
+ // Extract Type
44
+ const bill = xmlDoc.querySelector("bill");
45
+ const typeCode = bill?.getAttribute("name");
46
+ if (typeCode === "ppl") {
47
+ metadata.type = "PROPOSITION DE LOI";
48
+ }
49
+ else if (typeCode === "pjl") {
50
+ metadata.type = "PROJET DE LOI";
51
+ }
52
+ // Extract Authors
53
+ const authorRef = xmlDoc.querySelector('FRBRWork > FRBRauthor[as="#auteur"]')?.getAttribute("href");
54
+ if (authorRef) {
55
+ const authorId = authorRef.replace(/^#/, "");
56
+ const authorPerson = xmlDoc.querySelector(`TLCPerson[eId="${authorId}"]`);
57
+ if (authorPerson) {
58
+ const showAs = authorPerson.getAttribute("showAs");
59
+ if (showAs) {
60
+ metadata.authors = showAs.replace(/, Sénateurs$/, ", Sénateurs et Sénatrices");
61
+ }
62
+ }
63
+ }
64
+ // Extract Commission
65
+ const commissionNode = xmlDoc.querySelector('TLCOrganization[eId="commission-senat"]') ||
66
+ xmlDoc.querySelector('TLCOrganization[eId^="commission-"]:not([eId*="assemblee"])');
67
+ if (commissionNode) {
68
+ metadata.commission = commissionNode.getAttribute("showAs");
69
+ }
70
+ return metadata;
71
+ }
72
+ export async function convertSenatXmlToHtml(texteXml, outputFilePath) {
73
+ const { document: xmlDoc } = new JSDOM(texteXml, { contentType: "text/xml" }).window;
74
+ const metadata = extractMetadata(xmlDoc);
75
+ const xmlBody = xmlDoc.querySelector("body");
76
+ const style = `
77
+ body {
78
+ font-family: "URW Bookman", "Bookman Old Style", serif;
79
+ max-width: 800px;
80
+ margin: 40px auto;
81
+ line-height: 1.5;
82
+ color: #333;
83
+ }
84
+ .header {
85
+ text-align: center;
86
+ margin-bottom: 40px;
87
+ border-bottom: 2px solid #333;
88
+ padding-bottom: 20px;
89
+ }
90
+ .header-top {
91
+ font-weight: bold;
92
+ font-size: 1.2em;
93
+ margin-bottom: 10px;
94
+ }
95
+ .header-session {
96
+ text-transform: uppercase;
97
+ font-size: 0.9em;
98
+ margin-bottom: 5px;
99
+ }
100
+ .header-date {
101
+ font-size: 0.9em;
102
+ margin-bottom: 5px;
103
+ }
104
+ .header-number {
105
+ font-weight: bold;
106
+ font-size: 1.1em;
107
+ margin-bottom: 20px;
108
+ }
109
+ .header-type {
110
+ font-weight: bold;
111
+ font-size: 1.5em;
112
+ margin-top: 20px;
113
+ }
114
+ .header-authors {
115
+ margin-top: 20px;
116
+ font-style: italic;
117
+ }
118
+ .header-commission {
119
+ margin-top: 15px;
120
+ font-size: 0.9em;
121
+ }
122
+ h1 {
123
+ text-align: center;
124
+ font-size: 1.8em;
125
+ margin-top: 10px;
126
+ }
127
+ p {
128
+ margin: 0.6em 0;
129
+ }
130
+ p.has-alinea {
131
+ position: relative;
132
+ padding-left: 2.5em;
133
+ }
134
+ .alinea {
135
+ position: absolute;
136
+ left: 0;
137
+ top: 0.15em;
138
+ display: inline-flex;
139
+ align-items: center;
140
+ justify-content: center;
141
+ min-width: 1.5em;
142
+ height: 1.5em;
143
+ padding: 0 0.3em;
144
+ margin-right: 0.3em;
145
+ font-size: 0.75em;
146
+ font-weight: bold;
147
+ color: #555;
148
+ background-color: #f0f0f0;
149
+ border: 1px solid #ccc;
150
+ border-radius: 1em;
151
+ }
152
+ .num {
153
+ font-weight: bold;
154
+ margin-right: 0.2em;
155
+ }
156
+ .article {
157
+ margin-top: 2em;
158
+ }
159
+ .article h3 {
160
+ border-bottom: 1px solid #eee;
161
+ padding-bottom: 5px;
162
+ }
163
+ `;
164
+ const htmlDocTemplate = `<!DOCTYPE html>
165
+ <html lang="fr">
166
+ <head>
167
+ <meta charset="utf-8">
168
+ <title>${metadata.title || "Document Sénat"}</title>
169
+ <style>${style}</style>
170
+ </head>
171
+ <body>
172
+ <div class="header">
173
+ <div class="header-top">SÉNAT</div>
174
+ <div class="header-session">SESSION ORDINAIRE DE ${metadata.session || "...."}</div>
175
+ ${metadata.date ? `<div class="header-date">Enregistré à la Présidence du Sénat le ${metadata.date}</div>` : ""}
176
+ <div class="header-number">N° ${metadata.number || "...."}</div>
177
+ <div class="header-type">${metadata.type || ""}</div>
178
+ <div class="header-authors">${metadata.authors || ""}</div>
179
+ ${metadata.commission
180
+ ? `<div class="header-commission">Envoyée à la ${metadata.commission.toLowerCase()}, sous réserve de la constitution éventuelle d'une commission spéciale dans les conditions prévues par le Règlement.</div>`
181
+ : ""}
182
+ </div>
183
+ <h1>${metadata.title || ""}</h1>
184
+ </body>
185
+ </html>`;
186
+ const { document: htmlDoc } = new JSDOM(htmlDocTemplate).window;
187
+ const body = htmlDoc.body;
188
+ if (xmlBody) {
189
+ const processNode = (xmlNode, htmlParent, alineaData = null) => {
190
+ const children = Array.from(xmlNode.childNodes);
191
+ const alineaChildren = [];
192
+ const otherChildren = [];
193
+ for (const child of children) {
194
+ if (child.nodeType === 1 && child.tagName.toLowerCase() === "alinea") {
195
+ alineaChildren.push(child);
196
+ }
197
+ else {
198
+ otherChildren.push(child);
199
+ }
200
+ }
201
+ for (const child of otherChildren) {
202
+ if (child.nodeType === 3) {
203
+ htmlParent.appendChild(htmlDoc.createTextNode(child.textContent || ""));
204
+ }
205
+ else if (child.nodeType === 1) {
206
+ const element = child;
207
+ const tagName = element.tagName.toLowerCase();
208
+ let htmlElement = null;
209
+ switch (tagName) {
210
+ case "article": {
211
+ htmlElement = htmlDoc.createElement("div");
212
+ htmlElement.className = "article";
213
+ const artId = element.getAttribute("eId");
214
+ if (artId)
215
+ htmlElement.id = artId;
216
+ const artGuid = element.getAttribute("GUID");
217
+ if (artGuid)
218
+ htmlElement.setAttribute("data-guid", artGuid);
219
+ break;
220
+ }
221
+ case "num": {
222
+ const parentTagName = element.parentElement?.tagName.toLowerCase();
223
+ if (parentTagName === "alinea" && alineaData) {
224
+ alineaData.numText = element.textContent?.trim();
225
+ continue;
226
+ }
227
+ htmlElement = htmlDoc.createElement("span");
228
+ htmlElement.className = "num";
229
+ break;
230
+ }
231
+ case "heading":
232
+ htmlElement = htmlDoc.createElement("h4");
233
+ break;
234
+ case "p":
235
+ htmlElement = htmlDoc.createElement("p");
236
+ if (alineaData) {
237
+ htmlElement.classList.add("has-alinea");
238
+ if (alineaData.id)
239
+ htmlElement.id = alineaData.id;
240
+ if (alineaData.guid)
241
+ htmlElement.setAttribute("data-guid", alineaData.guid);
242
+ const pastille = alineaData.pastille;
243
+ if (pastille) {
244
+ htmlElement.setAttribute("data-pastille", pastille);
245
+ if (!alineaData.pastilleApplied) {
246
+ const span = htmlDoc.createElement("span");
247
+ span.className = "alinea";
248
+ span.setAttribute("data-alinea", pastille);
249
+ span.textContent = pastille;
250
+ htmlElement.appendChild(span);
251
+ alineaData.pastilleApplied = true;
252
+ }
253
+ }
254
+ if (alineaData.numText) {
255
+ const xmlPText = element.textContent || "";
256
+ const normalize = (s) => s.replace(/[\\s\\u00A0]+/g, " ").trim();
257
+ const normalizedNum = normalize(alineaData.numText);
258
+ const normalizedP = normalize(xmlPText);
259
+ if (normalizedNum && !normalizedP.startsWith(normalizedNum)) {
260
+ const numSpan = htmlDoc.createElement("span");
261
+ numSpan.className = "num";
262
+ numSpan.textContent = alineaData.numText + " ";
263
+ htmlElement.appendChild(numSpan);
264
+ }
265
+ alineaData.numText = null;
266
+ }
267
+ }
268
+ break;
269
+ case "content":
270
+ processNode(element, htmlParent, alineaData);
271
+ continue;
272
+ case "doctitle":
273
+ continue;
274
+ case "i":
275
+ case "b":
276
+ case "u":
277
+ case "sup":
278
+ case "sub":
279
+ htmlElement = htmlDoc.createElement(tagName);
280
+ break;
281
+ default:
282
+ htmlElement = htmlDoc.createElement("span");
283
+ htmlElement.setAttribute("data-xml-tag", tagName);
284
+ break;
285
+ }
286
+ if (htmlElement) {
287
+ htmlParent.appendChild(htmlElement);
288
+ processNode(element, htmlElement, alineaData);
289
+ }
290
+ }
291
+ }
292
+ for (const element of alineaChildren) {
293
+ const nextAlineaData = {
294
+ id: element.getAttribute("eId"),
295
+ guid: element.getAttribute("GUID"),
296
+ pastille: element.getAttribute("data:pastille"),
297
+ pastilleApplied: false,
298
+ };
299
+ processNode(element, htmlParent, nextAlineaData);
300
+ }
301
+ };
302
+ processNode(xmlBody, body);
303
+ }
304
+ const htmlContent = "<!DOCTYPE html>\n" + htmlDoc.documentElement.outerHTML;
305
+ await fs.ensureDir(path.dirname(outputFilePath));
306
+ await fs.outputFile(outputFilePath, htmlContent);
307
+ }
@@ -199,7 +199,7 @@ export function* iterLoadSenatTextes(dataDir, session, options = {}) {
199
199
  const texteId = texte["id"];
200
200
  const { item: texteContent } = loadSenatTexteContent(dataDir, texte["session"], texteId);
201
201
  if (texteContent) {
202
- texteItem.item.divisions = texteContent.divisions;
202
+ Object.assign(texteItem.item, texteContent);
203
203
  }
204
204
  yield texteItem;
205
205
  }
@@ -118,16 +118,16 @@ export function transformTexte(document) {
118
118
  const datePublicationXml = metaElement?.querySelector("FRBRdate[name='#publication-xml']")?.getAttribute("date");
119
119
  return {
120
120
  titre: preambleElement?.querySelector("docTitle")?.textContent || null,
121
- titreCourt: metaElement?.querySelector("FRBRalias[name='intitule-court']")?.getAttribute("value") || null,
122
- signetDossier: metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")?.getAttribute("value") || null,
123
- urlDossierSenat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
124
- urlDossierAssemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
121
+ titre_court: metaElement?.querySelector("FRBRalias[name='intitule-court']")?.getAttribute("value") || null,
122
+ signet_dossier: metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")?.getAttribute("value") || null,
123
+ url_dossier_senat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
124
+ url_dossier_assemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
125
125
  type: identificationParts?.["type"] || null,
126
126
  session: sessionYears && sessionYears.length > 0 ? sessionYears[0] : null,
127
127
  numero: identificationParts?.["numTexte"] ? parseInt(identificationParts["numTexte"]) : null,
128
- datePresentation: datePresentation ? new Date(datePresentation) : null,
129
- dateDepot: dateDepot ? new Date(dateDepot) : null,
130
- datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
128
+ date_presentation: datePresentation ? new Date(datePresentation) : null,
129
+ date_depot: dateDepot ? new Date(dateDepot) : null,
130
+ date_publication_xml: datePublicationXml ? new Date(datePublicationXml) : null,
131
131
  version: identificationParts?.["version"] ? identificationParts["version"] : null,
132
132
  divisions: bodyElement ? flattenTexte(bodyElement) : [],
133
133
  };
@@ -234,7 +234,7 @@ async function convertTextes(dataDir, options) {
234
234
  const texteName = path.parse(texte["url"]).name;
235
235
  const texteDir = path.join(originalTextesDir, `${session}`, texteName);
236
236
  // oritxtcod = 1 corresponds to "Texte de loi déposé au Sénat"
237
- const hasExposeDesMotifs = texte["origine"] === "Sénat" && texte["ordre_origine"] === "1";
237
+ const hasExposeDesMotifs = texte["origine"] === "déposé au Sénat" || texte["origine"] === "transmis au Sénat";
238
238
  const metadata = {
239
239
  name: texteName,
240
240
  session: texte["session"],
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,62 @@
1
+ import fs from "fs-extra";
2
+ import path from "path";
3
+ import commandLineArgs from "command-line-args";
4
+ import { convertSenatXmlToHtml } from "../conversion_textes";
5
+ const optionDefinitions = [
6
+ { name: "input", alias: "i", type: String, defaultOption: true },
7
+ { name: "output", alias: "o", type: String },
8
+ { name: "help", alias: "h", type: Boolean },
9
+ ];
10
+ async function main() {
11
+ let options;
12
+ try {
13
+ options = commandLineArgs(optionDefinitions, { stopAtFirstUnknown: true });
14
+ }
15
+ catch (err) {
16
+ console.error(`Error: ${err.message}`);
17
+ process.exit(1);
18
+ }
19
+ // Handle positional arguments if not using flags
20
+ const argv = options["_unknown"] || [];
21
+ if (!options["output"] && argv.length > 0) {
22
+ options["output"] = argv[0];
23
+ }
24
+ if (options["help"] || !options["input"]) {
25
+ console.log("Usage: npx tsx src/scripts/convert_xml_to_html.ts <input_xml_path> [output_html_path]");
26
+ console.log("Options:");
27
+ console.log(" -i, --input <path> Input XML file path (default option)");
28
+ console.log(" -o, --output <path> Output HTML file path");
29
+ console.log(" -h, --help Show this help message");
30
+ return;
31
+ }
32
+ const inputPath = path.resolve(options["input"]);
33
+ let outputPath = options["output"];
34
+ if (!outputPath) {
35
+ outputPath = inputPath.replace(/\.xml$/, ".html");
36
+ if (outputPath === inputPath) {
37
+ outputPath += ".html";
38
+ }
39
+ }
40
+ else {
41
+ outputPath = path.resolve(outputPath);
42
+ }
43
+ if (!fs.existsSync(inputPath)) {
44
+ console.error(`Error: Input file not found: ${inputPath}`);
45
+ process.exit(1);
46
+ }
47
+ try {
48
+ const xmlContent = await fs.readFile(inputPath, "utf-8");
49
+ await convertSenatXmlToHtml(xmlContent, outputPath);
50
+ console.log("Successfully converted:");
51
+ console.log(` From: ${inputPath}`);
52
+ console.log(` To: ${outputPath}`);
53
+ }
54
+ catch (error) {
55
+ console.error(`Error during conversion: ${error.message}`);
56
+ process.exit(1);
57
+ }
58
+ }
59
+ main().catch((error) => {
60
+ console.error(error);
61
+ process.exit(1);
62
+ });
@@ -214,7 +214,7 @@ async function parseDocument(session, transformedTextesDir, textePath, texteName
214
214
  console.log("Parsing exposé des motifs…");
215
215
  }
216
216
  const exposeDesMotifsHtml = textDecoder.decode(exposeDesMotifs);
217
- parsedTexte.exposeDesMotifs = parseExposeDesMotifs(exposeDesMotifsHtml);
217
+ parsedTexte.expose_motifs = parseExposeDesMotifs(exposeDesMotifsHtml);
218
218
  }
219
219
  const transformedTexteDir = path.join(transformedTextesDir, `${session ?? UNDEFINED_SESSION}`, texteName);
220
220
  await fs.outputJSON(path.join(transformedTexteDir, `${texteName}.json`), parsedTexte, { spaces: 2 });
@@ -1,12 +1,12 @@
1
- import { iterLoadSenatRapports, } from "../loaders";
1
+ import { iterLoadSenatTextes, } from "../loaders";
2
2
  import commandLineArgs from "command-line-args";
3
3
  import { dataDirDefaultOption } from "./shared/cli_helpers";
4
4
  const optionsDefinitions = [dataDirDefaultOption];
5
5
  const options = commandLineArgs(optionsDefinitions);
6
- const session = 2024;
6
+ const session = 2025;
7
7
  const sinceCommit = undefined;
8
- for (const { item: rapport } of iterLoadSenatRapports(options["dataDir"], session, {
8
+ for (const { item: texte } of iterLoadSenatTextes(options["dataDir"], session, {
9
9
  sinceCommit: sinceCommit,
10
10
  })) {
11
- console.log(rapport);
11
+ console.log(texte);
12
12
  }
@@ -24,19 +24,19 @@ export interface DocumentMetadata {
24
24
  }
25
25
  export interface FlatTexte {
26
26
  titre: string | null;
27
- titreCourt: string | null;
28
- signetDossier: string | null;
29
- urlDossierSenat: string | null;
30
- urlDossierAssemblee: string | null;
27
+ titre_court: string | null;
28
+ signet_dossier: string | null;
29
+ url_dossier_senat: string | null;
30
+ url_dossier_assemblee: string | null;
31
31
  type: string | null;
32
32
  session: string | null;
33
33
  numero: number | null;
34
- datePresentation: Date | null;
35
- dateDepot: Date | null;
36
- datePublicationXml: Date | null;
34
+ date_presentation: Date | null;
35
+ date_depot: Date | null;
36
+ date_publication_xml: Date | null;
37
37
  version: Version | null;
38
38
  divisions: Division[];
39
- exposeDesMotifs?: ExposeDesMotifs | null;
39
+ expose_motifs?: ExposeDesMotifs | null;
40
40
  }
41
41
  export type Version = "RECT" | "RECT_BIS" | "RECT_TER" | "RECT_QUATER" | "RECT_QUINQUIES";
42
42
  export interface Step {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.21.5",
3
+ "version": "2.21.6",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",