@tricoteuses/senat 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. package/lib/config.d.ts +1 -0
  2. package/lib/config.js +14 -45
  3. package/lib/databases.js +86 -143
  4. package/lib/datasets.js +78 -83
  5. package/lib/index.d.ts +7 -4
  6. package/lib/index.js +42 -419
  7. package/lib/loaders.js +149 -654
  8. package/lib/model/ameli.js +83 -21
  9. package/lib/model/debats.js +0 -1
  10. package/lib/model/dosleg.d.ts +1 -1
  11. package/lib/model/dosleg.js +179 -73
  12. package/lib/model/index.d.ts +3 -3
  13. package/lib/model/index.js +12 -46
  14. package/lib/model/questions.js +68 -39
  15. package/lib/model/sens.js +383 -113
  16. package/lib/model/texte.js +220 -290
  17. package/lib/model/util.js +9 -26
  18. package/lib/raw_types/ameli.js +5 -6
  19. package/lib/raw_types/debats.js +5 -6
  20. package/lib/raw_types/dosleg.js +5 -6
  21. package/lib/raw_types/questions.js +5 -6
  22. package/lib/raw_types/sens.js +5 -6
  23. package/lib/raw_types_schemats/ameli.js +1 -43
  24. package/lib/raw_types_schemats/debats.js +1 -22
  25. package/lib/raw_types_schemats/dosleg.js +1 -96
  26. package/lib/raw_types_schemats/questions.js +1 -22
  27. package/lib/raw_types_schemats/sens.js +1 -112
  28. package/lib/scripts/convert_data.js +181 -631
  29. package/lib/scripts/datautil.js +17 -60
  30. package/lib/scripts/parse_textes.js +46 -129
  31. package/lib/scripts/retrieve_documents.js +247 -513
  32. package/lib/scripts/retrieve_open_data.js +211 -368
  33. package/lib/scripts/retrieve_senateurs_photos.js +144 -239
  34. package/lib/scripts/shared/cli_helpers.js +30 -30
  35. package/lib/scripts/shared/util.js +28 -94
  36. package/lib/strings.js +20 -45
  37. package/lib/types/ameli.d.ts +1 -1
  38. package/lib/types/ameli.js +14 -25
  39. package/lib/types/debats.d.ts +1 -1
  40. package/lib/types/debats.js +3 -21
  41. package/lib/types/dosleg.d.ts +1 -1
  42. package/lib/types/dosleg.js +152 -119
  43. package/lib/types/questions.d.ts +1 -1
  44. package/lib/types/questions.js +1 -13
  45. package/lib/types/sens.d.ts +1 -1
  46. package/lib/types/sens.js +1 -13
  47. package/lib/types/sessions.js +44 -49
  48. package/lib/types/texte.js +17 -22
  49. package/lib/validators/config.js +47 -111
  50. package/lib/validators/senat.js +1 -5
  51. package/package.json +16 -38
  52. package/lib/aggregates.d.ts +0 -52
  53. package/lib/aggregates.mjs +0 -930
  54. package/lib/aggregates.ts +0 -833
  55. package/lib/config.mjs +0 -16
  56. package/lib/config.ts +0 -26
  57. package/lib/data/legislatures.json +0 -38
  58. package/lib/databases.mjs +0 -57
  59. package/lib/databases.ts +0 -71
  60. package/lib/datasets.mjs +0 -78
  61. package/lib/datasets.ts +0 -118
  62. package/lib/fields.d.ts +0 -10
  63. package/lib/fields.mjs +0 -68
  64. package/lib/fields.ts +0 -29
  65. package/lib/index.mjs +0 -4
  66. package/lib/index.ts +0 -42
  67. package/lib/inserters.d.ts +0 -98
  68. package/lib/inserters.mjs +0 -500
  69. package/lib/inserters.ts +0 -521
  70. package/lib/loaders.mjs +0 -158
  71. package/lib/loaders.ts +0 -271
  72. package/lib/model/ameli.mjs +0 -84
  73. package/lib/model/ameli.ts +0 -100
  74. package/lib/model/debats.mjs +0 -1
  75. package/lib/model/debats.ts +0 -0
  76. package/lib/model/dosleg.mjs +0 -196
  77. package/lib/model/dosleg.ts +0 -240
  78. package/lib/model/index.mjs +0 -4
  79. package/lib/model/index.ts +0 -14
  80. package/lib/model/questions.mjs +0 -71
  81. package/lib/model/questions.ts +0 -93
  82. package/lib/model/sens.mjs +0 -415
  83. package/lib/model/sens.ts +0 -516
  84. package/lib/model/texte.mjs +0 -208
  85. package/lib/model/texte.ts +0 -229
  86. package/lib/model/util.mjs +0 -19
  87. package/lib/model/util.ts +0 -32
  88. package/lib/raw_types/ameli.mjs +0 -5
  89. package/lib/raw_types/ameli.ts +0 -951
  90. package/lib/raw_types/debats.mjs +0 -5
  91. package/lib/raw_types/debats.ts +0 -222
  92. package/lib/raw_types/dosleg.mjs +0 -5
  93. package/lib/raw_types/dosleg.ts +0 -3625
  94. package/lib/raw_types/questions.mjs +0 -5
  95. package/lib/raw_types/questions.ts +0 -427
  96. package/lib/raw_types/sens.mjs +0 -5
  97. package/lib/raw_types/sens.ts +0 -4499
  98. package/lib/raw_types_kysely/ameli.d.ts +0 -6
  99. package/lib/raw_types_kysely/ameli.mjs +0 -7
  100. package/lib/raw_types_kysely/ameli.ts +0 -6
  101. package/lib/raw_types_kysely/debats.d.ts +0 -6
  102. package/lib/raw_types_kysely/debats.mjs +0 -7
  103. package/lib/raw_types_kysely/debats.ts +0 -6
  104. package/lib/raw_types_kysely/dosleg.d.ts +0 -6
  105. package/lib/raw_types_kysely/dosleg.mjs +0 -7
  106. package/lib/raw_types_kysely/dosleg.ts +0 -6
  107. package/lib/raw_types_kysely/questions.d.ts +0 -6
  108. package/lib/raw_types_kysely/questions.mjs +0 -7
  109. package/lib/raw_types_kysely/questions.ts +0 -6
  110. package/lib/raw_types_kysely/sens.d.ts +0 -6
  111. package/lib/raw_types_kysely/sens.mjs +0 -7
  112. package/lib/raw_types_kysely/sens.ts +0 -6
  113. package/lib/raw_types_kysely/texte.d.ts +0 -45
  114. package/lib/raw_types_kysely/texte.mjs +0 -7
  115. package/lib/raw_types_kysely/texte.ts +0 -53
  116. package/lib/raw_types_schemats/ameli.mjs +0 -2
  117. package/lib/raw_types_schemats/ameli.ts +0 -601
  118. package/lib/raw_types_schemats/debats.mjs +0 -2
  119. package/lib/raw_types_schemats/debats.ts +0 -145
  120. package/lib/raw_types_schemats/dosleg.mjs +0 -2
  121. package/lib/raw_types_schemats/dosleg.ts +0 -2195
  122. package/lib/raw_types_schemats/questions.mjs +0 -2
  123. package/lib/raw_types_schemats/questions.ts +0 -251
  124. package/lib/raw_types_schemats/sens.mjs +0 -2
  125. package/lib/raw_types_schemats/sens.ts +0 -2907
  126. package/lib/scripts/convert_data.mjs +0 -181
  127. package/lib/scripts/convert_data.ts +0 -243
  128. package/lib/scripts/datautil.mjs +0 -16
  129. package/lib/scripts/datautil.ts +0 -19
  130. package/lib/scripts/images/transparent_150x192.jpg +0 -0
  131. package/lib/scripts/images/transparent_155x225.jpg +0 -0
  132. package/lib/scripts/parse_textes.mjs +0 -46
  133. package/lib/scripts/parse_textes.ts +0 -65
  134. package/lib/scripts/retrieve_documents.mjs +0 -249
  135. package/lib/scripts/retrieve_documents.ts +0 -298
  136. package/lib/scripts/retrieve_open_data.mjs +0 -217
  137. package/lib/scripts/retrieve_open_data.ts +0 -274
  138. package/lib/scripts/retrieve_senateurs_photos.mjs +0 -147
  139. package/lib/scripts/retrieve_senateurs_photos.ts +0 -177
  140. package/lib/scripts/retrieve_textes.d.ts +0 -1
  141. package/lib/scripts/retrieve_textes.mjs +0 -328
  142. package/lib/scripts/retrieve_textes.ts +0 -143
  143. package/lib/scripts/shared/cli_helpers.ts +0 -36
  144. package/lib/scripts/shared/util.ts +0 -33
  145. package/lib/src/aggregates.d.ts +0 -52
  146. package/lib/src/aggregates.mjs +0 -726
  147. package/lib/src/config.d.ts +0 -2
  148. package/lib/src/config.mjs +0 -16
  149. package/lib/src/databases.d.ts +0 -18
  150. package/lib/src/databases.mjs +0 -55
  151. package/lib/src/datasets.d.ts +0 -28
  152. package/lib/src/datasets.mjs +0 -78
  153. package/lib/src/fields.d.ts +0 -10
  154. package/lib/src/fields.mjs +0 -22
  155. package/lib/src/index.d.ts +0 -8
  156. package/lib/src/index.mjs +0 -7
  157. package/lib/src/inserters.d.ts +0 -98
  158. package/lib/src/inserters.mjs +0 -360
  159. package/lib/src/loaders.d.ts +0 -36
  160. package/lib/src/loaders.mjs +0 -107
  161. package/lib/src/model/ameli.d.ts +0 -4
  162. package/lib/src/model/ameli.js +0 -57
  163. package/lib/src/model/debats.d.ts +0 -4
  164. package/lib/src/model/debats.js +0 -43
  165. package/lib/src/model/dosleg.d.ts +0 -197
  166. package/lib/src/model/dosleg.js +0 -169
  167. package/lib/src/model/index.d.ts +0 -4
  168. package/lib/src/model/index.js +0 -4
  169. package/lib/src/model/questions.d.ts +0 -89
  170. package/lib/src/model/questions.js +0 -76
  171. package/lib/src/model/sens.d.ts +0 -390
  172. package/lib/src/model/sens.js +0 -339
  173. package/lib/src/model/texte.d.ts +0 -7
  174. package/lib/src/model/texte.js +0 -183
  175. package/lib/src/raw_types_kysely/ameli.d.ts +0 -915
  176. package/lib/src/raw_types_kysely/ameli.js +0 -5
  177. package/lib/src/raw_types_kysely/debats.d.ts +0 -207
  178. package/lib/src/raw_types_kysely/debats.js +0 -5
  179. package/lib/src/raw_types_kysely/dosleg.d.ts +0 -3532
  180. package/lib/src/raw_types_kysely/dosleg.js +0 -5
  181. package/lib/src/raw_types_kysely/questions.d.ts +0 -414
  182. package/lib/src/raw_types_kysely/questions.js +0 -5
  183. package/lib/src/raw_types_kysely/sens.d.ts +0 -4394
  184. package/lib/src/raw_types_kysely/sens.js +0 -5
  185. package/lib/src/raw_types_schemats/ameli.d.ts +0 -541
  186. package/lib/src/raw_types_schemats/ameli.js +0 -2
  187. package/lib/src/raw_types_schemats/debats.d.ts +0 -127
  188. package/lib/src/raw_types_schemats/debats.js +0 -2
  189. package/lib/src/raw_types_schemats/dosleg.d.ts +0 -2027
  190. package/lib/src/raw_types_schemats/dosleg.js +0 -2
  191. package/lib/src/raw_types_schemats/questions.d.ts +0 -231
  192. package/lib/src/raw_types_schemats/questions.js +0 -2
  193. package/lib/src/raw_types_schemats/sens.d.ts +0 -2709
  194. package/lib/src/raw_types_schemats/sens.js +0 -2
  195. package/lib/src/scripts/convert_data.d.ts +0 -1
  196. package/lib/src/scripts/convert_data.js +0 -95
  197. package/lib/src/scripts/datautil.d.ts +0 -5
  198. package/lib/src/scripts/datautil.js +0 -16
  199. package/lib/src/scripts/parse_textes.d.ts +0 -1
  200. package/lib/src/scripts/parse_textes.js +0 -47
  201. package/lib/src/scripts/retrieve_documents.d.ts +0 -1
  202. package/lib/src/scripts/retrieve_documents.js +0 -258
  203. package/lib/src/scripts/retrieve_open_data.d.ts +0 -1
  204. package/lib/src/scripts/retrieve_open_data.js +0 -214
  205. package/lib/src/scripts/retrieve_senateurs_photos.d.ts +0 -1
  206. package/lib/src/scripts/retrieve_senateurs_photos.js +0 -147
  207. package/lib/src/scripts/shared/cli_helpers.d.ts +0 -44
  208. package/lib/src/scripts/shared/cli_helpers.js +0 -32
  209. package/lib/src/scripts/shared/util.d.ts +0 -3
  210. package/lib/src/scripts/shared/util.js +0 -28
  211. package/lib/src/strings.d.ts +0 -1
  212. package/lib/src/strings.mjs +0 -18
  213. package/lib/src/types/ameli.d.ts +0 -10
  214. package/lib/src/types/ameli.js +0 -13
  215. package/lib/src/types/debats.d.ts +0 -4
  216. package/lib/src/types/debats.js +0 -2
  217. package/lib/src/types/dosleg.d.ts +0 -98
  218. package/lib/src/types/dosleg.js +0 -151
  219. package/lib/src/types/questions.d.ts +0 -2
  220. package/lib/src/types/questions.js +0 -1
  221. package/lib/src/types/sens.d.ts +0 -10
  222. package/lib/src/types/sens.js +0 -1
  223. package/lib/src/types/sessions.d.ts +0 -42
  224. package/lib/src/types/sessions.js +0 -43
  225. package/lib/src/types/texte.d.ts +0 -61
  226. package/lib/src/types/texte.js +0 -16
  227. package/lib/src/validators/config.d.ts +0 -1
  228. package/lib/src/validators/config.js +0 -54
  229. package/lib/src/validators/senat.d.ts +0 -0
  230. package/lib/src/validators/senat.js +0 -24
  231. package/lib/strings.mjs +0 -18
  232. package/lib/strings.ts +0 -26
  233. package/lib/types/ameli.mjs +0 -13
  234. package/lib/types/ameli.ts +0 -21
  235. package/lib/types/debats.mjs +0 -2
  236. package/lib/types/debats.ts +0 -6
  237. package/lib/types/dosleg.mjs +0 -151
  238. package/lib/types/dosleg.ts +0 -284
  239. package/lib/types/questions.mjs +0 -1
  240. package/lib/types/questions.ts +0 -3
  241. package/lib/types/sens.mjs +0 -1
  242. package/lib/types/sens.ts +0 -12
  243. package/lib/types/sessions.mjs +0 -43
  244. package/lib/types/sessions.ts +0 -42
  245. package/lib/types/texte.mjs +0 -16
  246. package/lib/types/texte.ts +0 -76
  247. package/lib/typings/windows-1252.d.js +0 -2
  248. package/lib/typings/windows-1252.d.mjs +0 -2
  249. package/lib/typings/windows-1252.d.ts +0 -11
  250. package/lib/validators/config.mjs +0 -54
  251. package/lib/validators/config.ts +0 -79
  252. package/lib/validators/senat.mjs +0 -24
  253. package/lib/validators/senat.ts +0 -26
@@ -1,208 +0,0 @@
1
- import { JSDOM } from "jsdom";
2
- import { AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP, AKN_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil";
3
- import { DivisionType, } from "../types/texte";
4
- function buildWorklow(metaElement) {
5
- const stepElements = metaElement.querySelectorAll("workflow step");
6
- const steps = [];
7
- for (const stepElement of stepElements) {
8
- const identification = stepElement.getAttribute("href") ?? "";
9
- const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
10
- steps.push({
11
- eId: stepElement.getAttribute("eId"),
12
- date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
13
- type: identificationParts?.type || null,
14
- session: identificationParts?.session || null,
15
- numero: identificationParts?.numTexte || null,
16
- version: identificationParts?.version ? identificationParts.version : null,
17
- outcome: stepElement.getAttribute("outcome"),
18
- });
19
- }
20
- return steps;
21
- }
22
- function buildDivision(node, index) {
23
- const eId = node.getAttribute("eId");
24
- const tag = node.nodeName;
25
- const level = DivisionType[tag];
26
- const titleNode = node.querySelector("num");
27
- const subtitleNode = node.querySelector("heading");
28
- const headings = [
29
- ...(titleNode ? [{
30
- text: titleNode.textContent?.trim() ?? null,
31
- html: titleNode.innerHTML?.trim() ?? null,
32
- }] : []),
33
- ...(subtitleNode ? [{
34
- text: subtitleNode.textContent?.trim() ?? null,
35
- html: subtitleNode.innerHTML?.trim() ?? null,
36
- }] : []),
37
- ];
38
- const division = {
39
- index,
40
- eId,
41
- tag,
42
- level,
43
- headings,
44
- };
45
- if (tag === "article") {
46
- division.alineas = [];
47
- }
48
- return division;
49
- }
50
- function buildAlinea(contentNode, alineaNode) {
51
- const eId = alineaNode.getAttribute("eId");
52
- const heading = {
53
- text: alineaNode.querySelector("num")?.textContent ?? null,
54
- };
55
- const pastille = alineaNode.getAttribute("data:pastille") ?? null;
56
- return {
57
- eId,
58
- heading,
59
- text: contentNode.textContent?.trim() ?? null,
60
- html: contentNode.innerHTML?.trim() ?? null,
61
- pastille,
62
- };
63
- }
64
- function buildEmptyArticle(index) {
65
- return {
66
- index: index,
67
- eId: "",
68
- tag: "article",
69
- level: DivisionType["article"],
70
- headings: [],
71
- alineas: [],
72
- };
73
- }
74
- function flattenTexte(texteContentRoot) {
75
- const divisions = [];
76
- let divisionIndex = 0;
77
- const iter = (node) => {
78
- if (node.nodeName === "content") {
79
- return;
80
- }
81
- switch (node.nodeName) {
82
- case "tome":
83
- case "part":
84
- case "book":
85
- case "title":
86
- case "subtitle":
87
- case "chapter":
88
- case "section":
89
- case "subsection":
90
- case "paragraph":
91
- case "article":
92
- divisions.push(buildDivision(node, divisionIndex++));
93
- break;
94
- }
95
- if (node.nodeName === "alinea") {
96
- Array.from(node.childNodes)
97
- // Find direct content children programmatically
98
- // because `:scope` selector does not work
99
- // https://github.com/jsdom/jsdom/issues/2998
100
- .filter((alineaChildNode) => alineaChildNode.nodeName === "content")
101
- .forEach((alineaContentNode) => {
102
- // Hypothesis: alineas should always be enclosed in articles
103
- let lastArticle = divisions.findLast(division => division.tag === "article");
104
- if (!lastArticle) {
105
- lastArticle = buildEmptyArticle(divisionIndex++);
106
- divisions.push(lastArticle);
107
- }
108
- lastArticle.alineas.push(buildAlinea(alineaContentNode, node));
109
- });
110
- }
111
- if (node.hasChildNodes()) {
112
- node.childNodes.forEach((childNode) => iter(childNode));
113
- }
114
- };
115
- iter(texteContentRoot);
116
- return divisions;
117
- }
118
- export function transformTexte(document) {
119
- const metaElement = document.querySelector("meta");
120
- const preambleElement = document.querySelector("preamble");
121
- const identification = metaElement?.querySelector("FRBRExpression FRBRuri")?.getAttribute("value") ?? "";
122
- const identificationParts = AKN_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
123
- const bodyElement = document.querySelector("body");
124
- const datePresentation = metaElement?.querySelector("FRBRdate[name='#presentation']")?.getAttribute("date");
125
- const dateDepot = metaElement?.querySelector("FRBRdate[name='#depot']")?.getAttribute("date");
126
- const datePublicationXml = metaElement?.querySelector("FRBRdate[name='#publication-xml']")?.getAttribute("date");
127
- return {
128
- titre: preambleElement?.querySelector("docTitle")?.textContent || null,
129
- titreCourt: metaElement?.querySelector("FRBRalias[name='intitule-court']")
130
- ?.getAttribute("value") || null,
131
- signetDossier: metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")
132
- ?.getAttribute("value") || null,
133
- urlDossierSenat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
134
- urlDossierAssemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
135
- type: identificationParts?.type || null,
136
- session: identificationParts?.session || null,
137
- numero: identificationParts?.numTexte ? parseInt(identificationParts.numTexte) : null,
138
- datePresentation: datePresentation ? new Date(datePresentation) : null,
139
- dateDepot: dateDepot ? new Date(dateDepot) : null,
140
- datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
141
- version: identificationParts?.version ? identificationParts.version : null,
142
- workflow: metaElement ? buildWorklow(metaElement) : [],
143
- divisions: bodyElement ? flattenTexte(bodyElement) : [],
144
- };
145
- }
146
- export function transformExposeDesMotifs(document) {
147
- const sectionElements = document.querySelectorAll("section");
148
- const exposeDesMotifsRegexp = new RegExp("EXPOS.{1,2}[\\n\\s]DES[\\n\\s]MOTIFS");
149
- for (const sectionElement of sectionElements) {
150
- const firstParagraph = sectionElement.querySelector("p:first-of-type");
151
- const secondParagraph = sectionElement.querySelector("p:nth-of-type(2)");
152
- if (!firstParagraph) {
153
- continue;
154
- }
155
- const firstParagraphContent = firstParagraph.textContent;
156
- const secondParagraphContent = secondParagraph?.textContent;
157
- if (!firstParagraphContent || !exposeDesMotifsRegexp.test(firstParagraphContent.toUpperCase())) {
158
- if (!secondParagraphContent || !exposeDesMotifsRegexp.test(secondParagraphContent.toUpperCase())) {
159
- continue;
160
- }
161
- else {
162
- sectionElement.removeChild(secondParagraph);
163
- }
164
- }
165
- sectionElement.removeChild(firstParagraph);
166
- return {
167
- text: sectionElement.textContent?.trim() ?? null,
168
- html: sectionElement.innerHTML?.trim() ?? null,
169
- };
170
- }
171
- return null;
172
- }
173
- export function parseTexte(texteXml) {
174
- try {
175
- const { document } = (new JSDOM(texteXml, {
176
- contentType: "text/xml",
177
- })).window;
178
- return transformTexte(document);
179
- }
180
- catch (error) {
181
- console.error(`Could not parse texte with error ${error}`);
182
- }
183
- return null;
184
- }
185
- // Prevent from memory leak
186
- // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
187
- export async function parseTexteFromFile(xmlFilePath) {
188
- try {
189
- const { document } = (await JSDOM.fromFile(xmlFilePath, { contentType: "text/xml" })).window;
190
- return transformTexte(document);
191
- }
192
- catch (error) {
193
- console.error(`Could not parse texte with error ${error}`);
194
- }
195
- return null;
196
- }
197
- export function parseExposeDesMotifs(exposeDesMotifsHtml) {
198
- const { document } = (new JSDOM(exposeDesMotifsHtml, {
199
- contentType: "text/html",
200
- })).window;
201
- return transformExposeDesMotifs(document);
202
- }
203
- // Prevent from memory leak
204
- // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
205
- export async function parseExposeDesMotifsFromFile(htmlFilePath) {
206
- const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window;
207
- return transformExposeDesMotifs(document);
208
- }
@@ -1,229 +0,0 @@
1
- import { JSDOM } from "jsdom"
2
-
3
- import { AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP, AKN_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil"
4
- import {
5
- Alinea,
6
- Article,
7
- Division,
8
- DivisionContent,
9
- DivisionTag,
10
- DivisionType, ExposeDesMotifs,
11
- FlatTexte, Step,
12
- Version,
13
- } from "../types/texte"
14
-
15
- function buildWorklow (metaElement: HTMLMetaElement): Step[] {
16
- const stepElements = metaElement.querySelectorAll("workflow step")
17
- const steps: Step[] = []
18
- for (const stepElement of stepElements) {
19
- const identification = stepElement.getAttribute("href") ?? ""
20
- const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups
21
- steps.push({
22
- eId: stepElement.getAttribute("eId")!,
23
- date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
24
- type: identificationParts?.type || null,
25
- session: identificationParts?.session || null,
26
- numero: identificationParts?.numTexte || null,
27
- version: identificationParts?.version ? identificationParts.version as Version : null,
28
- outcome: stepElement.getAttribute("outcome"),
29
- })
30
- }
31
- return steps
32
- }
33
-
34
- function buildDivision (node: Node, index: number): Division {
35
- const eId = (node as Element).getAttribute("eId")!
36
- const tag = node.nodeName as DivisionTag
37
- const level = DivisionType[tag]
38
- const titleNode = (node as Element).querySelector("num")
39
- const subtitleNode = (node as Element).querySelector("heading")
40
- const headings: DivisionContent[] = [
41
- ...(titleNode ? [{
42
- text: titleNode.textContent?.trim() ?? null,
43
- html: titleNode.innerHTML?.trim() ?? null,
44
- }] : []),
45
- ...(subtitleNode ? [{
46
- text: subtitleNode.textContent?.trim() ?? null,
47
- html: subtitleNode.innerHTML?.trim() ?? null,
48
- }] : []),
49
- ]
50
- const division = {
51
- index,
52
- eId,
53
- tag,
54
- level,
55
- headings,
56
- }
57
- if (tag === "article") {
58
- (division as Article).alineas = []
59
- }
60
- return division
61
- }
62
-
63
- function buildAlinea (contentNode: Node, alineaNode: Node): Alinea {
64
- const eId = (alineaNode as Element).getAttribute("eId")!
65
- const heading = {
66
- text: (alineaNode as Element).querySelector("num")?.textContent ?? null,
67
- }
68
- const pastille = (alineaNode as Element).getAttribute("data:pastille") ?? null
69
- return {
70
- eId,
71
- heading,
72
- text: (contentNode as Element).textContent?.trim() ?? null,
73
- html: (contentNode as Element).innerHTML?.trim() ?? null,
74
- pastille,
75
- }
76
- }
77
-
78
- function buildEmptyArticle (index: number): Article {
79
- return {
80
- index: index,
81
- eId: "",
82
- tag: "article",
83
- level: DivisionType["article"],
84
- headings: [],
85
- alineas: [],
86
- }
87
- }
88
-
89
- function flattenTexte (texteContentRoot: Node): Division[] {
90
- const divisions: Division[] = []
91
- let divisionIndex = 0
92
- const iter = (node: Node) => {
93
- if (node.nodeName === "content") {
94
- return
95
- }
96
- switch (node.nodeName) {
97
- case "tome":
98
- case "part":
99
- case "book":
100
- case "title":
101
- case "subtitle":
102
- case "chapter":
103
- case "section":
104
- case "subsection":
105
- case "paragraph":
106
- case "article":
107
- divisions.push(buildDivision(node, divisionIndex++))
108
- break
109
- }
110
- if (node.nodeName === "alinea") {
111
- Array.from(node.childNodes)
112
- // Find direct content children programmatically
113
- // because `:scope` selector does not work
114
- // https://github.com/jsdom/jsdom/issues/2998
115
- .filter((alineaChildNode: Node) => alineaChildNode.nodeName === "content")
116
- .forEach((alineaContentNode: Node) => {
117
- // Hypothesis: alineas should always be enclosed in articles
118
- let lastArticle = divisions.findLast(division => division.tag === "article") as Article
119
- if (!lastArticle) {
120
- lastArticle = buildEmptyArticle(divisionIndex++)
121
- divisions.push(lastArticle)
122
- }
123
- lastArticle.alineas.push(buildAlinea(alineaContentNode, node))
124
- })
125
- }
126
- if (node.hasChildNodes()) {
127
- node.childNodes.forEach((childNode: Node) => iter(childNode))
128
- }
129
- }
130
- iter(texteContentRoot)
131
- return divisions
132
- }
133
-
134
- export function transformTexte (document: Document): FlatTexte | null {
135
- const metaElement = document.querySelector("meta")
136
- const preambleElement = document.querySelector("preamble")
137
- const identification = metaElement?.querySelector("FRBRExpression FRBRuri")?.getAttribute("value") ?? ""
138
- const identificationParts = AKN_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups
139
- const bodyElement = document.querySelector("body")
140
- const datePresentation = metaElement?.querySelector("FRBRdate[name='#presentation']")?.getAttribute("date")
141
- const dateDepot = metaElement?.querySelector("FRBRdate[name='#depot']")?.getAttribute("date")
142
- const datePublicationXml = metaElement?.querySelector("FRBRdate[name='#publication-xml']")?.getAttribute("date")
143
- return {
144
- titre: preambleElement?.querySelector("docTitle")?.textContent || null,
145
- titreCourt: metaElement?.querySelector("FRBRalias[name='intitule-court']")
146
- ?.getAttribute("value") || null,
147
- signetDossier:
148
- metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")
149
- ?.getAttribute("value") || null,
150
- urlDossierSenat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
151
- urlDossierAssemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
152
- type: identificationParts?.type || null,
153
- session: identificationParts?.session || null,
154
- numero: identificationParts?.numTexte ? parseInt(identificationParts.numTexte) : null,
155
- datePresentation: datePresentation ? new Date(datePresentation) : null,
156
- dateDepot: dateDepot ? new Date(dateDepot) : null,
157
- datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
158
- version: identificationParts?.version ? identificationParts.version as Version : null,
159
- workflow: metaElement ? buildWorklow(metaElement) : [],
160
- divisions: bodyElement ? flattenTexte(bodyElement) : [],
161
- }
162
- }
163
-
164
- export function transformExposeDesMotifs (document: Document): ExposeDesMotifs | null {
165
- const sectionElements = document.querySelectorAll("section")
166
- const exposeDesMotifsRegexp = new RegExp("EXPOS.{1,2}[\\n\\s]DES[\\n\\s]MOTIFS")
167
- for (const sectionElement of sectionElements) {
168
- const firstParagraph = sectionElement.querySelector("p:first-of-type")
169
- const secondParagraph = sectionElement.querySelector("p:nth-of-type(2)")
170
- if (!firstParagraph) {
171
- continue
172
- }
173
-
174
- const firstParagraphContent = firstParagraph.textContent
175
- const secondParagraphContent = secondParagraph?.textContent
176
- if (!firstParagraphContent || !exposeDesMotifsRegexp.test(firstParagraphContent.toUpperCase())) {
177
- if (!secondParagraphContent || !exposeDesMotifsRegexp.test(secondParagraphContent.toUpperCase())) {
178
- continue
179
- } else {
180
- sectionElement.removeChild(secondParagraph)
181
- }
182
- }
183
-
184
- sectionElement.removeChild(firstParagraph)
185
- return {
186
- text: sectionElement.textContent?.trim() ?? null,
187
- html: sectionElement.innerHTML?.trim() ?? null,
188
- }
189
- }
190
- return null
191
- }
192
-
193
- export function parseTexte (texteXml: string): FlatTexte | null {
194
- try {
195
- const { document } = (new JSDOM(texteXml, {
196
- contentType: "text/xml",
197
- })).window
198
- return transformTexte(document)
199
- } catch (error: any) {
200
- console.error(`Could not parse texte with error ${error}`)
201
- }
202
- return null
203
- }
204
-
205
- // Prevent from memory leak
206
- // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
207
- export async function parseTexteFromFile (xmlFilePath: string): Promise<FlatTexte | null> {
208
- try {
209
- const { document } = (await JSDOM.fromFile(xmlFilePath, { contentType: "text/xml" })).window
210
- return transformTexte(document)
211
- } catch (error: any) {
212
- console.error(`Could not parse texte with error ${error}`)
213
- }
214
- return null
215
- }
216
-
217
- export function parseExposeDesMotifs (exposeDesMotifsHtml: string): ExposeDesMotifs | null {
218
- const { document } = (new JSDOM(exposeDesMotifsHtml, {
219
- contentType: "text/html",
220
- })).window
221
- return transformExposeDesMotifs(document)
222
- }
223
-
224
- // Prevent from memory leak
225
- // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
226
- export async function parseExposeDesMotifsFromFile (htmlFilePath: string): Promise<ExposeDesMotifs | null> {
227
- const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window
228
- return transformExposeDesMotifs(document)
229
- }
@@ -1,19 +0,0 @@
1
- import { sql } from "kysely";
2
- export function concat(...exprs) {
3
- return sql.join(exprs, sql `||`).$castTo();
4
- }
5
- export function expandToRows(expr, regexp) {
6
- return sql `unnest(regexp_matches(${expr}, ${regexp}, 'g'))`;
7
- }
8
- export function removeSubstring(expr, pattern) {
9
- return replace(expr, pattern, sql.val(""));
10
- }
11
- export function replace(expr, pattern, replacement) {
12
- return sql `replace(${expr}, ${pattern}, ${replacement})`;
13
- }
14
- export function rtrim(expr) {
15
- return sql `rtrim(${expr})`;
16
- }
17
- export function toDateString(expr) {
18
- return sql `to_char(${expr}, 'yyyy-MM-dd')`;
19
- }
package/lib/model/util.ts DELETED
@@ -1,32 +0,0 @@
1
- import { Expression, sql } from "kysely"
2
-
3
- export function concat (...exprs: Expression<number | string | null | undefined>[]) {
4
- return sql.join(exprs, sql`||`).$castTo<string>()
5
- }
6
-
7
- export function expandToRows (expr: Expression<string | null | undefined>, regexp: Expression<string>) {
8
- return sql`unnest(regexp_matches(${expr}, ${regexp}, 'g'))`
9
- }
10
-
11
- export function removeSubstring (
12
- expr: Expression<string | null | undefined>,
13
- pattern: Expression<string>
14
- ) {
15
- return replace(expr, pattern, sql.val(""))
16
- }
17
-
18
- export function replace (
19
- expr: Expression<string | null | undefined>,
20
- pattern: Expression<string>,
21
- replacement: Expression<string>
22
- ) {
23
- return sql<string>`replace(${expr}, ${pattern}, ${replacement})`
24
- }
25
-
26
- export function rtrim (expr: Expression<string | null | undefined>) {
27
- return sql<string>`rtrim(${expr})`
28
- }
29
-
30
- export function toDateString (expr: Expression<Date | null | undefined>) {
31
- return sql<string>`to_char(${expr}, 'yyyy-MM-dd')`
32
- }
@@ -1,5 +0,0 @@
1
- /**
2
- * This file was generated by kysely-codegen.
3
- * Please do not edit it manually.
4
- */
5
- export {};