@tricoteuses/senat 2.22.13 → 2.22.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/lib/src/loaders.d.ts +2 -8
  2. package/lib/src/loaders.js +7 -25
  3. package/lib/src/scripts/retrieve_cr_commission.js +1 -1
  4. package/lib/src/scripts/retrieve_cr_seance.js +1 -1
  5. package/lib/src/scripts/retrieve_videos.js +2 -2
  6. package/lib/tests/test_iter_load.test.js +17 -0
  7. package/package.json +2 -2
  8. package/lib/config.d.ts +0 -21
  9. package/lib/config.js +0 -27
  10. package/lib/databases.d.ts +0 -2
  11. package/lib/databases.js +0 -26
  12. package/lib/datasets.d.ts +0 -34
  13. package/lib/datasets.js +0 -233
  14. package/lib/git.d.ts +0 -26
  15. package/lib/git.js +0 -167
  16. package/lib/index.d.ts +0 -13
  17. package/lib/index.js +0 -1
  18. package/lib/loaders.d.ts +0 -58
  19. package/lib/loaders.js +0 -286
  20. package/lib/model/agenda.d.ts +0 -6
  21. package/lib/model/agenda.js +0 -148
  22. package/lib/model/ameli.d.ts +0 -51
  23. package/lib/model/ameli.js +0 -147
  24. package/lib/model/commission.d.ts +0 -18
  25. package/lib/model/commission.js +0 -269
  26. package/lib/model/debats.d.ts +0 -67
  27. package/lib/model/debats.js +0 -95
  28. package/lib/model/documents.d.ts +0 -12
  29. package/lib/model/documents.js +0 -138
  30. package/lib/model/dosleg.d.ts +0 -7
  31. package/lib/model/dosleg.js +0 -326
  32. package/lib/model/index.d.ts +0 -7
  33. package/lib/model/index.js +0 -7
  34. package/lib/model/questions.d.ts +0 -45
  35. package/lib/model/questions.js +0 -89
  36. package/lib/model/scrutins.d.ts +0 -13
  37. package/lib/model/scrutins.js +0 -114
  38. package/lib/model/seance.d.ts +0 -3
  39. package/lib/model/seance.js +0 -267
  40. package/lib/model/sens.d.ts +0 -146
  41. package/lib/model/sens.js +0 -454
  42. package/lib/model/texte.d.ts +0 -7
  43. package/lib/model/texte.js +0 -228
  44. package/lib/model/util.d.ts +0 -9
  45. package/lib/model/util.js +0 -38
  46. package/lib/parsers/texte.d.ts +0 -7
  47. package/lib/parsers/texte.js +0 -228
  48. package/lib/raw_types/ameli.d.ts +0 -914
  49. package/lib/raw_types/ameli.js +0 -5
  50. package/lib/raw_types/debats.d.ts +0 -207
  51. package/lib/raw_types/debats.js +0 -5
  52. package/lib/raw_types/dosleg.d.ts +0 -1619
  53. package/lib/raw_types/dosleg.js +0 -5
  54. package/lib/raw_types/questions.d.ts +0 -423
  55. package/lib/raw_types/questions.js +0 -5
  56. package/lib/raw_types/senat.d.ts +0 -11372
  57. package/lib/raw_types/senat.js +0 -5
  58. package/lib/raw_types/sens.d.ts +0 -8248
  59. package/lib/raw_types/sens.js +0 -5
  60. package/lib/raw_types_schemats/ameli.d.ts +0 -539
  61. package/lib/raw_types_schemats/ameli.js +0 -2
  62. package/lib/raw_types_schemats/debats.d.ts +0 -127
  63. package/lib/raw_types_schemats/debats.js +0 -2
  64. package/lib/raw_types_schemats/dosleg.d.ts +0 -977
  65. package/lib/raw_types_schemats/dosleg.js +0 -2
  66. package/lib/raw_types_schemats/questions.d.ts +0 -237
  67. package/lib/raw_types_schemats/questions.js +0 -2
  68. package/lib/raw_types_schemats/sens.d.ts +0 -6915
  69. package/lib/raw_types_schemats/sens.js +0 -2
  70. package/lib/scripts/convert_data.js +0 -354
  71. package/lib/scripts/data-download.d.ts +0 -1
  72. package/lib/scripts/data-download.js +0 -12
  73. package/lib/scripts/datautil.d.ts +0 -8
  74. package/lib/scripts/datautil.js +0 -34
  75. package/lib/scripts/parse_textes.d.ts +0 -1
  76. package/lib/scripts/parse_textes.js +0 -44
  77. package/lib/scripts/retrieve_agenda.d.ts +0 -1
  78. package/lib/scripts/retrieve_agenda.js +0 -132
  79. package/lib/scripts/retrieve_cr_commission.d.ts +0 -1
  80. package/lib/scripts/retrieve_cr_commission.js +0 -364
  81. package/lib/scripts/retrieve_cr_seance.d.ts +0 -6
  82. package/lib/scripts/retrieve_cr_seance.js +0 -347
  83. package/lib/scripts/retrieve_documents.d.ts +0 -3
  84. package/lib/scripts/retrieve_documents.js +0 -219
  85. package/lib/scripts/retrieve_open_data.d.ts +0 -1
  86. package/lib/scripts/retrieve_open_data.js +0 -316
  87. package/lib/scripts/retrieve_senateurs_photos.d.ts +0 -1
  88. package/lib/scripts/retrieve_senateurs_photos.js +0 -147
  89. package/lib/scripts/retrieve_videos.d.ts +0 -1
  90. package/lib/scripts/retrieve_videos.js +0 -461
  91. package/lib/scripts/shared/cli_helpers.d.ts +0 -95
  92. package/lib/scripts/shared/cli_helpers.js +0 -91
  93. package/lib/scripts/shared/util.d.ts +0 -4
  94. package/lib/scripts/shared/util.js +0 -35
  95. package/lib/scripts/test_iter_load.d.ts +0 -1
  96. package/lib/scripts/test_iter_load.js +0 -12
  97. package/lib/src/utils/nvs-timecode.d.ts +0 -17
  98. package/lib/src/utils/nvs-timecode.js +0 -79
  99. package/lib/src/utils/weights_scoring_config.d.ts +0 -2
  100. package/lib/src/utils/weights_scoring_config.js +0 -15
  101. package/lib/strings.d.ts +0 -1
  102. package/lib/strings.js +0 -18
  103. package/lib/types/agenda.d.ts +0 -44
  104. package/lib/types/agenda.js +0 -1
  105. package/lib/types/ameli.d.ts +0 -5
  106. package/lib/types/ameli.js +0 -1
  107. package/lib/types/compte_rendu.d.ts +0 -83
  108. package/lib/types/compte_rendu.js +0 -1
  109. package/lib/types/debats.d.ts +0 -2
  110. package/lib/types/debats.js +0 -1
  111. package/lib/types/dosleg.d.ts +0 -70
  112. package/lib/types/dosleg.js +0 -1
  113. package/lib/types/questions.d.ts +0 -2
  114. package/lib/types/questions.js +0 -1
  115. package/lib/types/sens.d.ts +0 -10
  116. package/lib/types/sens.js +0 -1
  117. package/lib/types/sessions.d.ts +0 -5
  118. package/lib/types/sessions.js +0 -84
  119. package/lib/types/texte.d.ts +0 -74
  120. package/lib/types/texte.js +0 -16
  121. package/lib/utils/cr_spliting.d.ts +0 -28
  122. package/lib/utils/cr_spliting.js +0 -265
  123. package/lib/utils/date.d.ts +0 -10
  124. package/lib/utils/date.js +0 -100
  125. package/lib/utils/nvs-timecode.d.ts +0 -7
  126. package/lib/utils/nvs-timecode.js +0 -79
  127. package/lib/utils/reunion_grouping.d.ts +0 -11
  128. package/lib/utils/reunion_grouping.js +0 -337
  129. package/lib/utils/reunion_odj_building.d.ts +0 -5
  130. package/lib/utils/reunion_odj_building.js +0 -154
  131. package/lib/utils/reunion_parsing.d.ts +0 -23
  132. package/lib/utils/reunion_parsing.js +0 -209
  133. package/lib/utils/scoring.d.ts +0 -14
  134. package/lib/utils/scoring.js +0 -147
  135. package/lib/utils/string_cleaning.d.ts +0 -7
  136. package/lib/utils/string_cleaning.js +0 -57
  137. package/lib/validators/config.d.ts +0 -9
  138. package/lib/validators/config.js +0 -10
  139. /package/lib/{scripts/convert_data.d.ts → tests/test_iter_load.test.d.ts} +0 -0
@@ -1,228 +0,0 @@
1
- import { JSDOM } from "jsdom";
2
- import { AKN_IDENTIFICATION_STRUCTURE_REGEXP, AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil";
3
- import { DivisionType, } from "../types/texte";
4
- function buildWorklow(metaElement) {
5
- const stepElements = metaElement.querySelectorAll("workflow step");
6
- const steps = [];
7
- for (const stepElement of stepElements) {
8
- const identification = stepElement.getAttribute("href") ?? "";
9
- const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
10
- steps.push({
11
- eId: stepElement.getAttribute("eId"),
12
- date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
13
- type: identificationParts?.["type"] || null,
14
- session: identificationParts?.["session"] || null,
15
- numero: identificationParts?.["numTexte"] || null,
16
- version: identificationParts?.["version"] ? identificationParts["version"] : null,
17
- outcome: stepElement.getAttribute("outcome"),
18
- });
19
- }
20
- return steps;
21
- }
22
- function buildDivision(node, index) {
23
- const eId = node.getAttribute("eId");
24
- const tag = node.nodeName;
25
- const level = DivisionType[tag];
26
- const titleNode = node.querySelector("num");
27
- const subtitleNode = node.querySelector("heading");
28
- const headings = [
29
- ...(titleNode
30
- ? [
31
- {
32
- text: titleNode.textContent?.trim() ?? null,
33
- html: titleNode.innerHTML?.trim() ?? null,
34
- },
35
- ]
36
- : []),
37
- ...(subtitleNode
38
- ? [
39
- {
40
- text: subtitleNode.textContent?.trim() ?? null,
41
- html: subtitleNode.innerHTML?.trim() ?? null,
42
- },
43
- ]
44
- : []),
45
- ];
46
- const division = {
47
- index,
48
- eId,
49
- tag,
50
- level,
51
- headings,
52
- };
53
- if (tag === "article") {
54
- ;
55
- division.alineas = [];
56
- }
57
- return division;
58
- }
59
- function buildAlinea(contentNode, alineaNode) {
60
- const eId = alineaNode.getAttribute("eId");
61
- const heading = {
62
- text: alineaNode.querySelector("num")?.textContent ?? null,
63
- };
64
- const pastille = alineaNode.getAttribute("data:pastille") ?? null;
65
- return {
66
- eId,
67
- heading,
68
- text: contentNode.textContent?.trim() ?? null,
69
- html: contentNode.innerHTML?.trim() ?? null,
70
- pastille,
71
- };
72
- }
73
- function buildEmptyArticle(index) {
74
- return {
75
- index: index,
76
- eId: "",
77
- tag: "article",
78
- level: DivisionType["article"],
79
- headings: [],
80
- alineas: [],
81
- };
82
- }
83
- function flattenTexte(texteContentRoot) {
84
- const divisions = [];
85
- let divisionIndex = 0;
86
- const iter = (node) => {
87
- if (node.nodeName === "content") {
88
- return;
89
- }
90
- switch (node.nodeName) {
91
- case "tome":
92
- case "part":
93
- case "book":
94
- case "title":
95
- case "subtitle":
96
- case "chapter":
97
- case "section":
98
- case "subsection":
99
- case "paragraph":
100
- case "article":
101
- divisions.push(buildDivision(node, divisionIndex++));
102
- break;
103
- }
104
- if (node.nodeName === "alinea") {
105
- Array.from(node.childNodes)
106
- // Find direct content children programmatically
107
- // because `:scope` selector does not work
108
- // https://github.com/jsdom/jsdom/issues/2998
109
- .filter((alineaChildNode) => alineaChildNode.nodeName === "content")
110
- .forEach((alineaContentNode) => {
111
- // Hypothesis: alineas should always be enclosed in articles
112
- let lastArticle = divisions.findLast((division) => division.tag === "article");
113
- if (!lastArticle) {
114
- lastArticle = buildEmptyArticle(divisionIndex++);
115
- divisions.push(lastArticle);
116
- }
117
- lastArticle.alineas.push(buildAlinea(alineaContentNode, node));
118
- });
119
- }
120
- if (node.hasChildNodes()) {
121
- node.childNodes.forEach((childNode) => iter(childNode));
122
- }
123
- };
124
- iter(texteContentRoot);
125
- return divisions;
126
- }
127
- export function transformTexte(document) {
128
- const metaElement = document.querySelector("meta");
129
- const preambleElement = document.querySelector("preamble");
130
- const identification = metaElement?.querySelector("FRBRExpression FRBRuri")?.getAttribute("value") ?? "";
131
- const identificationParts = AKN_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
132
- const bodyElement = document.querySelector("body");
133
- const sessionYears = identificationParts?.["session"]?.split("-") || null;
134
- const datePresentation = metaElement?.querySelector("FRBRdate[name='#presentation']")?.getAttribute("date");
135
- const dateDepot = metaElement?.querySelector("FRBRdate[name='#depot']")?.getAttribute("date");
136
- const datePublicationXml = metaElement?.querySelector("FRBRdate[name='#publication-xml']")?.getAttribute("date");
137
- return {
138
- titre: preambleElement?.querySelector("docTitle")?.textContent || null,
139
- titreCourt: metaElement?.querySelector("FRBRalias[name='intitule-court']")?.getAttribute("value") || null,
140
- signetDossier: metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")?.getAttribute("value") || null,
141
- urlDossierSenat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
142
- urlDossierAssemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
143
- type: identificationParts?.["type"] || null,
144
- session: sessionYears && sessionYears.length > 0 ? sessionYears[0] : null,
145
- numero: identificationParts?.["numTexte"] ? parseInt(identificationParts["numTexte"]) : null,
146
- datePresentation: datePresentation ? new Date(datePresentation) : null,
147
- dateDepot: dateDepot ? new Date(dateDepot) : null,
148
- datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
149
- version: identificationParts?.["version"] ? identificationParts["version"] : null,
150
- workflow: metaElement ? buildWorklow(metaElement) : [],
151
- divisions: bodyElement ? flattenTexte(bodyElement) : [],
152
- };
153
- }
154
- export function transformExposeDesMotifs(document) {
155
- const sectionElements = document.querySelectorAll("section");
156
- const exposeDesMotifsRegexp = new RegExp("EXPOS.{1,2}[\\n\\s]DES[\\n\\s]MOTIFS");
157
- for (const sectionElement of sectionElements) {
158
- const firstParagraph = sectionElement.querySelector("p:first-of-type");
159
- const secondParagraph = sectionElement.querySelector("p:nth-of-type(2)");
160
- if (!firstParagraph) {
161
- continue;
162
- }
163
- const firstParagraphContent = firstParagraph.textContent;
164
- const secondParagraphContent = secondParagraph?.textContent;
165
- if (!firstParagraphContent || !exposeDesMotifsRegexp.test(firstParagraphContent.toUpperCase())) {
166
- if (!secondParagraphContent || !exposeDesMotifsRegexp.test(secondParagraphContent.toUpperCase())) {
167
- continue;
168
- }
169
- else {
170
- secondParagraph.remove();
171
- }
172
- }
173
- firstParagraph.remove();
174
- return {
175
- text: sectionElement.textContent?.trim() ?? null,
176
- html: sectionElement.innerHTML?.trim() ?? null,
177
- };
178
- }
179
- return null;
180
- }
181
- export function parseTexte(texteXml) {
182
- try {
183
- const { document } = new JSDOM(texteXml, {
184
- contentType: "text/xml",
185
- }).window;
186
- return transformTexte(document);
187
- }
188
- catch (error) {
189
- console.error(`Could not parse texte with error ${error}`);
190
- }
191
- return null;
192
- }
193
- // Prevent from memory leak
194
- // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
195
- export async function parseTexteFromFile(xmlFilePath) {
196
- try {
197
- const { document } = (await JSDOM.fromFile(xmlFilePath, { contentType: "text/xml" })).window;
198
- return transformTexte(document);
199
- }
200
- catch (error) {
201
- console.error(`Could not parse texte with error ${error}`);
202
- }
203
- return null;
204
- }
205
- export function parseExposeDesMotifs(exposeDesMotifsHtml) {
206
- try {
207
- const { document } = new JSDOM(exposeDesMotifsHtml, {
208
- contentType: "text/html",
209
- }).window;
210
- return transformExposeDesMotifs(document);
211
- }
212
- catch (error) {
213
- console.error(`Could not parse exposé des motifs with error ${error}`);
214
- }
215
- return null;
216
- }
217
- // Prevent from memory leak
218
- // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
219
- export async function parseExposeDesMotifsFromFile(htmlFilePath) {
220
- try {
221
- const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window;
222
- return transformExposeDesMotifs(document);
223
- }
224
- catch (error) {
225
- console.error(`Could not parse exposé des motifs with error ${error}`);
226
- }
227
- return null;
228
- }
@@ -1,9 +0,0 @@
1
- import { Expression } from "kysely";
2
- export declare function concat(...exprs: Expression<number | string | null | undefined>[]): import("kysely").RawBuilder<string>;
3
- export declare function expandToRows(expr: Expression<string | null | undefined>, regexp: Expression<string>): import("kysely").RawBuilder<unknown>;
4
- export declare function nullIf(expr: Expression<number | string | null | undefined>): import("kysely").RawBuilder<unknown>;
5
- export declare function removeSubstring(expr: Expression<string | null | undefined>, pattern: Expression<string>): import("kysely").RawBuilder<string>;
6
- export declare function replace(expr: Expression<string | null | undefined>, pattern: Expression<string>, replacement: Expression<string>): import("kysely").RawBuilder<string>;
7
- export declare function rtrim(expr: Expression<string | null | undefined>): import("kysely").RawBuilder<string>;
8
- export declare function toDateString(expr: Expression<Date | null | undefined>, format?: Expression<string>): import("kysely").RawBuilder<string>;
9
- export declare function toCRDate(dateISO: string, startTime?: string | null): string;
package/lib/model/util.js DELETED
@@ -1,38 +0,0 @@
1
- import { sql } from "kysely";
2
- import { STANDARD_DATE_FORMAT } from "../scripts/datautil";
3
- export function concat(...exprs) {
4
- return sql.join(exprs, sql `||`).$castTo();
5
- }
6
- export function expandToRows(expr, regexp) {
7
- return sql `unnest(regexp_matches(${expr}, ${regexp}, 'g'))`;
8
- }
9
- export function nullIf(expr) {
10
- return sql `nullif(trim(${expr}), '')`;
11
- }
12
- export function removeSubstring(expr, pattern) {
13
- return replace(expr, pattern, sql.val(""));
14
- }
15
- export function replace(expr, pattern, replacement) {
16
- return sql `replace(${expr}, ${pattern}, ${replacement})`;
17
- }
18
- export function rtrim(expr) {
19
- return sql `rtrim(${expr})`;
20
- }
21
- export function toDateString(expr, format = sql.val(STANDARD_DATE_FORMAT)) {
22
- return sql `to_char(${expr}, ${format})`;
23
- }
24
- export function toCRDate(dateISO, startTime) {
25
- const yyyymmdd = dateISO.replace(/-/g, ""); // "20250716"
26
- let hh = "00", mm = "00", ss = "00", SSS = "000";
27
- if (startTime) {
28
- // accepte "HH:MM:SS", "HH:MM:SS.mmm", "HH:MM:SS.mmm+02:00"
29
- const m = startTime.match(/(\d{2}):(\d{2}):(\d{2})(?:\.(\d{3}))?/);
30
- if (m) {
31
- hh = m[1];
32
- mm = m[2];
33
- ss = m[3];
34
- SSS = m[4] || "000";
35
- }
36
- }
37
- return `${yyyymmdd}${hh}${mm}${ss}${SSS}`;
38
- }
@@ -1,7 +0,0 @@
1
- import { ExposeDesMotifs, FlatTexte } from "../types/texte";
2
- export declare function transformTexte(document: Document): FlatTexte | null;
3
- export declare function transformExposeDesMotifs(document: Document): ExposeDesMotifs | null;
4
- export declare function parseTexte(texteXml: string): FlatTexte | null;
5
- export declare function parseTexteFromFile(xmlFilePath: string): Promise<FlatTexte | null>;
6
- export declare function parseExposeDesMotifs(exposeDesMotifsHtml: string): ExposeDesMotifs | null;
7
- export declare function parseExposeDesMotifsFromFile(htmlFilePath: string): Promise<ExposeDesMotifs | null>;
@@ -1,228 +0,0 @@
1
- import { JSDOM } from "jsdom";
2
- import { AKN_IDENTIFICATION_STRUCTURE_REGEXP, AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil";
3
- import { DivisionType, } from "../types/texte";
4
- function buildWorklow(metaElement) {
5
- const stepElements = metaElement.querySelectorAll("workflow step");
6
- const steps = [];
7
- for (const stepElement of stepElements) {
8
- const identification = stepElement.getAttribute("href") ?? "";
9
- const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
10
- steps.push({
11
- eId: stepElement.getAttribute("eId"),
12
- date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
13
- type: identificationParts?.["type"] || null,
14
- session: identificationParts?.["session"] || null,
15
- numero: identificationParts?.["numTexte"] || null,
16
- version: identificationParts?.["version"] ? identificationParts["version"] : null,
17
- outcome: stepElement.getAttribute("outcome"),
18
- });
19
- }
20
- return steps;
21
- }
22
- function buildDivision(node, index) {
23
- const eId = node.getAttribute("eId");
24
- const tag = node.nodeName;
25
- const level = DivisionType[tag];
26
- const titleNode = node.querySelector("num");
27
- const subtitleNode = node.querySelector("heading");
28
- const headings = [
29
- ...(titleNode
30
- ? [
31
- {
32
- text: titleNode.textContent?.trim() ?? null,
33
- html: titleNode.innerHTML?.trim() ?? null,
34
- },
35
- ]
36
- : []),
37
- ...(subtitleNode
38
- ? [
39
- {
40
- text: subtitleNode.textContent?.trim() ?? null,
41
- html: subtitleNode.innerHTML?.trim() ?? null,
42
- },
43
- ]
44
- : []),
45
- ];
46
- const division = {
47
- index,
48
- eId,
49
- tag,
50
- level,
51
- headings,
52
- };
53
- if (tag === "article") {
54
- ;
55
- division.alineas = [];
56
- }
57
- return division;
58
- }
59
- function buildAlinea(contentNode, alineaNode) {
60
- const eId = alineaNode.getAttribute("eId");
61
- const heading = {
62
- text: alineaNode.querySelector("num")?.textContent ?? null,
63
- };
64
- const pastille = alineaNode.getAttribute("data:pastille") ?? null;
65
- return {
66
- eId,
67
- heading,
68
- text: contentNode.textContent?.trim() ?? null,
69
- html: contentNode.innerHTML?.trim() ?? null,
70
- pastille,
71
- };
72
- }
73
- function buildEmptyArticle(index) {
74
- return {
75
- index: index,
76
- eId: "",
77
- tag: "article",
78
- level: DivisionType["article"],
79
- headings: [],
80
- alineas: [],
81
- };
82
- }
83
- function flattenTexte(texteContentRoot) {
84
- const divisions = [];
85
- let divisionIndex = 0;
86
- const iter = (node) => {
87
- if (node.nodeName === "content") {
88
- return;
89
- }
90
- switch (node.nodeName) {
91
- case "tome":
92
- case "part":
93
- case "book":
94
- case "title":
95
- case "subtitle":
96
- case "chapter":
97
- case "section":
98
- case "subsection":
99
- case "paragraph":
100
- case "article":
101
- divisions.push(buildDivision(node, divisionIndex++));
102
- break;
103
- }
104
- if (node.nodeName === "alinea") {
105
- Array.from(node.childNodes)
106
- // Find direct content children programmatically
107
- // because `:scope` selector does not work
108
- // https://github.com/jsdom/jsdom/issues/2998
109
- .filter((alineaChildNode) => alineaChildNode.nodeName === "content")
110
- .forEach((alineaContentNode) => {
111
- // Hypothesis: alineas should always be enclosed in articles
112
- let lastArticle = divisions.findLast((division) => division.tag === "article");
113
- if (!lastArticle) {
114
- lastArticle = buildEmptyArticle(divisionIndex++);
115
- divisions.push(lastArticle);
116
- }
117
- lastArticle.alineas.push(buildAlinea(alineaContentNode, node));
118
- });
119
- }
120
- if (node.hasChildNodes()) {
121
- node.childNodes.forEach((childNode) => iter(childNode));
122
- }
123
- };
124
- iter(texteContentRoot);
125
- return divisions;
126
- }
127
- export function transformTexte(document) {
128
- const metaElement = document.querySelector("meta");
129
- const preambleElement = document.querySelector("preamble");
130
- const identification = metaElement?.querySelector("FRBRExpression FRBRuri")?.getAttribute("value") ?? "";
131
- const identificationParts = AKN_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
132
- const bodyElement = document.querySelector("body");
133
- const sessionYears = identificationParts?.["session"]?.split("-") || null;
134
- const datePresentation = metaElement?.querySelector("FRBRdate[name='#presentation']")?.getAttribute("date");
135
- const dateDepot = metaElement?.querySelector("FRBRdate[name='#depot']")?.getAttribute("date");
136
- const datePublicationXml = metaElement?.querySelector("FRBRdate[name='#publication-xml']")?.getAttribute("date");
137
- return {
138
- titre: preambleElement?.querySelector("docTitle")?.textContent || null,
139
- titreCourt: metaElement?.querySelector("FRBRalias[name='intitule-court']")?.getAttribute("value") || null,
140
- signetDossier: metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")?.getAttribute("value") || null,
141
- urlDossierSenat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
142
- urlDossierAssemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
143
- type: identificationParts?.["type"] || null,
144
- session: sessionYears && sessionYears.length > 0 ? sessionYears[0] : null,
145
- numero: identificationParts?.["numTexte"] ? parseInt(identificationParts["numTexte"]) : null,
146
- datePresentation: datePresentation ? new Date(datePresentation) : null,
147
- dateDepot: dateDepot ? new Date(dateDepot) : null,
148
- datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
149
- version: identificationParts?.["version"] ? identificationParts["version"] : null,
150
- workflow: metaElement ? buildWorklow(metaElement) : [],
151
- divisions: bodyElement ? flattenTexte(bodyElement) : [],
152
- };
153
- }
154
- export function transformExposeDesMotifs(document) {
155
- const sectionElements = document.querySelectorAll("section");
156
- const exposeDesMotifsRegexp = new RegExp("EXPOS.{1,2}[\\n\\s]DES[\\n\\s]MOTIFS");
157
- for (const sectionElement of sectionElements) {
158
- const firstParagraph = sectionElement.querySelector("p:first-of-type");
159
- const secondParagraph = sectionElement.querySelector("p:nth-of-type(2)");
160
- if (!firstParagraph) {
161
- continue;
162
- }
163
- const firstParagraphContent = firstParagraph.textContent;
164
- const secondParagraphContent = secondParagraph?.textContent;
165
- if (!firstParagraphContent || !exposeDesMotifsRegexp.test(firstParagraphContent.toUpperCase())) {
166
- if (!secondParagraphContent || !exposeDesMotifsRegexp.test(secondParagraphContent.toUpperCase())) {
167
- continue;
168
- }
169
- else {
170
- secondParagraph.remove();
171
- }
172
- }
173
- firstParagraph.remove();
174
- return {
175
- text: sectionElement.textContent?.trim() ?? null,
176
- html: sectionElement.innerHTML?.trim() ?? null,
177
- };
178
- }
179
- return null;
180
- }
181
- export function parseTexte(texteXml) {
182
- try {
183
- const { document } = new JSDOM(texteXml, {
184
- contentType: "text/xml",
185
- }).window;
186
- return transformTexte(document);
187
- }
188
- catch (error) {
189
- console.error(`Could not parse texte with error ${error}`);
190
- }
191
- return null;
192
- }
193
- // Prevent from memory leak
194
- // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
195
- export async function parseTexteFromFile(xmlFilePath) {
196
- try {
197
- const { document } = (await JSDOM.fromFile(xmlFilePath, { contentType: "text/xml" })).window;
198
- return transformTexte(document);
199
- }
200
- catch (error) {
201
- console.error(`Could not parse texte with error ${error}`);
202
- }
203
- return null;
204
- }
205
- export function parseExposeDesMotifs(exposeDesMotifsHtml) {
206
- try {
207
- const { document } = new JSDOM(exposeDesMotifsHtml, {
208
- contentType: "text/html",
209
- }).window;
210
- return transformExposeDesMotifs(document);
211
- }
212
- catch (error) {
213
- console.error(`Could not parse exposé des motifs with error ${error}`);
214
- }
215
- return null;
216
- }
217
- // Prevent from memory leak
218
- // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
219
- export async function parseExposeDesMotifsFromFile(htmlFilePath) {
220
- try {
221
- const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window;
222
- return transformExposeDesMotifs(document);
223
- }
224
- catch (error) {
225
- console.error(`Could not parse exposé des motifs with error ${error}`);
226
- }
227
- return null;
228
- }