@tricoteuses/senat 1.3.1 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. package/lib/config.d.ts +1 -0
  2. package/lib/config.js +14 -45
  3. package/lib/databases.js +86 -143
  4. package/lib/datasets.js +78 -83
  5. package/lib/index.d.ts +12 -4
  6. package/lib/index.js +42 -419
  7. package/lib/loaders.js +149 -654
  8. package/lib/model/ameli.js +83 -21
  9. package/lib/model/debats.js +0 -1
  10. package/lib/model/dosleg.d.ts +1 -1
  11. package/lib/model/dosleg.js +179 -73
  12. package/lib/model/index.d.ts +3 -3
  13. package/lib/model/index.js +12 -46
  14. package/lib/model/questions.js +68 -39
  15. package/lib/model/sens.d.ts +1 -1
  16. package/lib/model/sens.js +383 -113
  17. package/lib/model/texte.js +220 -290
  18. package/lib/model/util.js +9 -26
  19. package/lib/raw_types/ameli.js +5 -6
  20. package/lib/raw_types/debats.js +5 -6
  21. package/lib/raw_types/dosleg.js +5 -6
  22. package/lib/raw_types/questions.js +5 -6
  23. package/lib/raw_types/sens.js +5 -6
  24. package/lib/raw_types_schemats/ameli.js +1 -43
  25. package/lib/raw_types_schemats/debats.js +1 -22
  26. package/lib/raw_types_schemats/dosleg.js +1 -96
  27. package/lib/raw_types_schemats/questions.js +1 -22
  28. package/lib/raw_types_schemats/sens.js +1 -112
  29. package/lib/scripts/convert_data.js +181 -631
  30. package/lib/scripts/datautil.js +17 -60
  31. package/lib/scripts/parse_textes.js +46 -129
  32. package/lib/scripts/retrieve_documents.js +247 -513
  33. package/lib/scripts/retrieve_open_data.js +211 -368
  34. package/lib/scripts/retrieve_senateurs_photos.js +144 -239
  35. package/lib/scripts/shared/cli_helpers.js +30 -30
  36. package/lib/scripts/shared/util.js +28 -94
  37. package/lib/strings.js +20 -45
  38. package/lib/types/ameli.d.ts +1 -1
  39. package/lib/types/ameli.js +14 -25
  40. package/lib/types/debats.d.ts +1 -1
  41. package/lib/types/debats.js +3 -21
  42. package/lib/types/dosleg.d.ts +1 -1
  43. package/lib/types/dosleg.js +152 -119
  44. package/lib/types/questions.d.ts +1 -1
  45. package/lib/types/questions.js +1 -13
  46. package/lib/types/sens.d.ts +1 -1
  47. package/lib/types/sens.js +1 -13
  48. package/lib/types/sessions.js +44 -49
  49. package/lib/types/texte.js +17 -22
  50. package/lib/validators/config.js +47 -111
  51. package/lib/validators/senat.js +1 -5
  52. package/package.json +18 -40
  53. package/lib/aggregates.d.ts +0 -52
  54. package/lib/aggregates.mjs +0 -930
  55. package/lib/aggregates.ts +0 -833
  56. package/lib/config.mjs +0 -16
  57. package/lib/config.ts +0 -26
  58. package/lib/data/legislatures.json +0 -38
  59. package/lib/databases.mjs +0 -57
  60. package/lib/databases.ts +0 -71
  61. package/lib/datasets.mjs +0 -78
  62. package/lib/datasets.ts +0 -118
  63. package/lib/fields.d.ts +0 -10
  64. package/lib/fields.mjs +0 -68
  65. package/lib/fields.ts +0 -29
  66. package/lib/index.mjs +0 -4
  67. package/lib/index.ts +0 -42
  68. package/lib/inserters.d.ts +0 -98
  69. package/lib/inserters.mjs +0 -500
  70. package/lib/inserters.ts +0 -521
  71. package/lib/loaders.mjs +0 -158
  72. package/lib/loaders.ts +0 -271
  73. package/lib/model/ameli.mjs +0 -84
  74. package/lib/model/ameli.ts +0 -100
  75. package/lib/model/debats.mjs +0 -1
  76. package/lib/model/debats.ts +0 -0
  77. package/lib/model/dosleg.mjs +0 -196
  78. package/lib/model/dosleg.ts +0 -240
  79. package/lib/model/index.mjs +0 -4
  80. package/lib/model/index.ts +0 -14
  81. package/lib/model/questions.mjs +0 -71
  82. package/lib/model/questions.ts +0 -93
  83. package/lib/model/sens.mjs +0 -415
  84. package/lib/model/sens.ts +0 -516
  85. package/lib/model/texte.mjs +0 -208
  86. package/lib/model/texte.ts +0 -229
  87. package/lib/model/util.mjs +0 -19
  88. package/lib/model/util.ts +0 -32
  89. package/lib/raw_types/ameli.mjs +0 -5
  90. package/lib/raw_types/ameli.ts +0 -951
  91. package/lib/raw_types/debats.mjs +0 -5
  92. package/lib/raw_types/debats.ts +0 -222
  93. package/lib/raw_types/dosleg.mjs +0 -5
  94. package/lib/raw_types/dosleg.ts +0 -3625
  95. package/lib/raw_types/questions.mjs +0 -5
  96. package/lib/raw_types/questions.ts +0 -427
  97. package/lib/raw_types/sens.mjs +0 -5
  98. package/lib/raw_types/sens.ts +0 -4499
  99. package/lib/raw_types_kysely/ameli.d.ts +0 -6
  100. package/lib/raw_types_kysely/ameli.mjs +0 -7
  101. package/lib/raw_types_kysely/ameli.ts +0 -6
  102. package/lib/raw_types_kysely/debats.d.ts +0 -6
  103. package/lib/raw_types_kysely/debats.mjs +0 -7
  104. package/lib/raw_types_kysely/debats.ts +0 -6
  105. package/lib/raw_types_kysely/dosleg.d.ts +0 -6
  106. package/lib/raw_types_kysely/dosleg.mjs +0 -7
  107. package/lib/raw_types_kysely/dosleg.ts +0 -6
  108. package/lib/raw_types_kysely/questions.d.ts +0 -6
  109. package/lib/raw_types_kysely/questions.mjs +0 -7
  110. package/lib/raw_types_kysely/questions.ts +0 -6
  111. package/lib/raw_types_kysely/sens.d.ts +0 -6
  112. package/lib/raw_types_kysely/sens.mjs +0 -7
  113. package/lib/raw_types_kysely/sens.ts +0 -6
  114. package/lib/raw_types_kysely/texte.d.ts +0 -45
  115. package/lib/raw_types_kysely/texte.mjs +0 -7
  116. package/lib/raw_types_kysely/texte.ts +0 -53
  117. package/lib/raw_types_schemats/ameli.mjs +0 -2
  118. package/lib/raw_types_schemats/ameli.ts +0 -601
  119. package/lib/raw_types_schemats/debats.mjs +0 -2
  120. package/lib/raw_types_schemats/debats.ts +0 -145
  121. package/lib/raw_types_schemats/dosleg.mjs +0 -2
  122. package/lib/raw_types_schemats/dosleg.ts +0 -2195
  123. package/lib/raw_types_schemats/questions.mjs +0 -2
  124. package/lib/raw_types_schemats/questions.ts +0 -251
  125. package/lib/raw_types_schemats/sens.mjs +0 -2
  126. package/lib/raw_types_schemats/sens.ts +0 -2907
  127. package/lib/scripts/convert_data.mjs +0 -181
  128. package/lib/scripts/convert_data.ts +0 -243
  129. package/lib/scripts/datautil.mjs +0 -16
  130. package/lib/scripts/datautil.ts +0 -19
  131. package/lib/scripts/images/transparent_150x192.jpg +0 -0
  132. package/lib/scripts/images/transparent_155x225.jpg +0 -0
  133. package/lib/scripts/parse_textes.mjs +0 -46
  134. package/lib/scripts/parse_textes.ts +0 -65
  135. package/lib/scripts/retrieve_documents.mjs +0 -249
  136. package/lib/scripts/retrieve_documents.ts +0 -298
  137. package/lib/scripts/retrieve_open_data.mjs +0 -217
  138. package/lib/scripts/retrieve_open_data.ts +0 -274
  139. package/lib/scripts/retrieve_senateurs_photos.mjs +0 -147
  140. package/lib/scripts/retrieve_senateurs_photos.ts +0 -177
  141. package/lib/scripts/retrieve_textes.d.ts +0 -1
  142. package/lib/scripts/retrieve_textes.mjs +0 -328
  143. package/lib/scripts/retrieve_textes.ts +0 -143
  144. package/lib/scripts/shared/cli_helpers.ts +0 -36
  145. package/lib/scripts/shared/util.ts +0 -33
  146. package/lib/src/aggregates.d.ts +0 -52
  147. package/lib/src/aggregates.mjs +0 -726
  148. package/lib/src/config.d.ts +0 -2
  149. package/lib/src/config.mjs +0 -16
  150. package/lib/src/databases.d.ts +0 -18
  151. package/lib/src/databases.mjs +0 -55
  152. package/lib/src/datasets.d.ts +0 -28
  153. package/lib/src/datasets.mjs +0 -78
  154. package/lib/src/fields.d.ts +0 -10
  155. package/lib/src/fields.mjs +0 -22
  156. package/lib/src/index.d.ts +0 -8
  157. package/lib/src/index.mjs +0 -7
  158. package/lib/src/inserters.d.ts +0 -98
  159. package/lib/src/inserters.mjs +0 -360
  160. package/lib/src/loaders.d.ts +0 -36
  161. package/lib/src/loaders.mjs +0 -107
  162. package/lib/src/model/ameli.d.ts +0 -4
  163. package/lib/src/model/ameli.js +0 -57
  164. package/lib/src/model/debats.d.ts +0 -4
  165. package/lib/src/model/debats.js +0 -43
  166. package/lib/src/model/dosleg.d.ts +0 -197
  167. package/lib/src/model/dosleg.js +0 -169
  168. package/lib/src/model/index.d.ts +0 -4
  169. package/lib/src/model/index.js +0 -4
  170. package/lib/src/model/questions.d.ts +0 -89
  171. package/lib/src/model/questions.js +0 -76
  172. package/lib/src/model/sens.d.ts +0 -390
  173. package/lib/src/model/sens.js +0 -339
  174. package/lib/src/model/texte.d.ts +0 -7
  175. package/lib/src/model/texte.js +0 -183
  176. package/lib/src/raw_types_kysely/ameli.d.ts +0 -915
  177. package/lib/src/raw_types_kysely/ameli.js +0 -5
  178. package/lib/src/raw_types_kysely/debats.d.ts +0 -207
  179. package/lib/src/raw_types_kysely/debats.js +0 -5
  180. package/lib/src/raw_types_kysely/dosleg.d.ts +0 -3532
  181. package/lib/src/raw_types_kysely/dosleg.js +0 -5
  182. package/lib/src/raw_types_kysely/questions.d.ts +0 -414
  183. package/lib/src/raw_types_kysely/questions.js +0 -5
  184. package/lib/src/raw_types_kysely/sens.d.ts +0 -4394
  185. package/lib/src/raw_types_kysely/sens.js +0 -5
  186. package/lib/src/raw_types_schemats/ameli.d.ts +0 -541
  187. package/lib/src/raw_types_schemats/ameli.js +0 -2
  188. package/lib/src/raw_types_schemats/debats.d.ts +0 -127
  189. package/lib/src/raw_types_schemats/debats.js +0 -2
  190. package/lib/src/raw_types_schemats/dosleg.d.ts +0 -2027
  191. package/lib/src/raw_types_schemats/dosleg.js +0 -2
  192. package/lib/src/raw_types_schemats/questions.d.ts +0 -231
  193. package/lib/src/raw_types_schemats/questions.js +0 -2
  194. package/lib/src/raw_types_schemats/sens.d.ts +0 -2709
  195. package/lib/src/raw_types_schemats/sens.js +0 -2
  196. package/lib/src/scripts/convert_data.d.ts +0 -1
  197. package/lib/src/scripts/convert_data.js +0 -95
  198. package/lib/src/scripts/datautil.d.ts +0 -5
  199. package/lib/src/scripts/datautil.js +0 -16
  200. package/lib/src/scripts/parse_textes.d.ts +0 -1
  201. package/lib/src/scripts/parse_textes.js +0 -47
  202. package/lib/src/scripts/retrieve_documents.d.ts +0 -1
  203. package/lib/src/scripts/retrieve_documents.js +0 -258
  204. package/lib/src/scripts/retrieve_open_data.d.ts +0 -1
  205. package/lib/src/scripts/retrieve_open_data.js +0 -214
  206. package/lib/src/scripts/retrieve_senateurs_photos.d.ts +0 -1
  207. package/lib/src/scripts/retrieve_senateurs_photos.js +0 -147
  208. package/lib/src/scripts/shared/cli_helpers.d.ts +0 -44
  209. package/lib/src/scripts/shared/cli_helpers.js +0 -32
  210. package/lib/src/scripts/shared/util.d.ts +0 -3
  211. package/lib/src/scripts/shared/util.js +0 -28
  212. package/lib/src/strings.d.ts +0 -1
  213. package/lib/src/strings.mjs +0 -18
  214. package/lib/src/types/ameli.d.ts +0 -10
  215. package/lib/src/types/ameli.js +0 -13
  216. package/lib/src/types/debats.d.ts +0 -4
  217. package/lib/src/types/debats.js +0 -2
  218. package/lib/src/types/dosleg.d.ts +0 -98
  219. package/lib/src/types/dosleg.js +0 -151
  220. package/lib/src/types/questions.d.ts +0 -2
  221. package/lib/src/types/questions.js +0 -1
  222. package/lib/src/types/sens.d.ts +0 -10
  223. package/lib/src/types/sens.js +0 -1
  224. package/lib/src/types/sessions.d.ts +0 -42
  225. package/lib/src/types/sessions.js +0 -43
  226. package/lib/src/types/texte.d.ts +0 -61
  227. package/lib/src/types/texte.js +0 -16
  228. package/lib/src/validators/config.d.ts +0 -1
  229. package/lib/src/validators/config.js +0 -54
  230. package/lib/src/validators/senat.d.ts +0 -0
  231. package/lib/src/validators/senat.js +0 -24
  232. package/lib/strings.mjs +0 -18
  233. package/lib/strings.ts +0 -26
  234. package/lib/types/ameli.mjs +0 -13
  235. package/lib/types/ameli.ts +0 -21
  236. package/lib/types/debats.mjs +0 -2
  237. package/lib/types/debats.ts +0 -6
  238. package/lib/types/dosleg.mjs +0 -151
  239. package/lib/types/dosleg.ts +0 -284
  240. package/lib/types/questions.mjs +0 -1
  241. package/lib/types/questions.ts +0 -3
  242. package/lib/types/sens.mjs +0 -1
  243. package/lib/types/sens.ts +0 -12
  244. package/lib/types/sessions.mjs +0 -43
  245. package/lib/types/sessions.ts +0 -42
  246. package/lib/types/texte.mjs +0 -16
  247. package/lib/types/texte.ts +0 -76
  248. package/lib/typings/windows-1252.d.js +0 -2
  249. package/lib/typings/windows-1252.d.mjs +0 -2
  250. package/lib/typings/windows-1252.d.ts +0 -11
  251. package/lib/validators/config.mjs +0 -54
  252. package/lib/validators/config.ts +0 -79
  253. package/lib/validators/senat.mjs +0 -24
  254. package/lib/validators/senat.ts +0 -26
@@ -1,208 +0,0 @@
1
- import { JSDOM } from "jsdom";
2
- import { AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP, AKN_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil";
3
- import { DivisionType, } from "../types/texte";
4
- function buildWorklow(metaElement) {
5
- const stepElements = metaElement.querySelectorAll("workflow step");
6
- const steps = [];
7
- for (const stepElement of stepElements) {
8
- const identification = stepElement.getAttribute("href") ?? "";
9
- const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
10
- steps.push({
11
- eId: stepElement.getAttribute("eId"),
12
- date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
13
- type: identificationParts?.type || null,
14
- session: identificationParts?.session || null,
15
- numero: identificationParts?.numTexte || null,
16
- version: identificationParts?.version ? identificationParts.version : null,
17
- outcome: stepElement.getAttribute("outcome"),
18
- });
19
- }
20
- return steps;
21
- }
22
- function buildDivision(node, index) {
23
- const eId = node.getAttribute("eId");
24
- const tag = node.nodeName;
25
- const level = DivisionType[tag];
26
- const titleNode = node.querySelector("num");
27
- const subtitleNode = node.querySelector("heading");
28
- const headings = [
29
- ...(titleNode ? [{
30
- text: titleNode.textContent?.trim() ?? null,
31
- html: titleNode.innerHTML?.trim() ?? null,
32
- }] : []),
33
- ...(subtitleNode ? [{
34
- text: subtitleNode.textContent?.trim() ?? null,
35
- html: subtitleNode.innerHTML?.trim() ?? null,
36
- }] : []),
37
- ];
38
- const division = {
39
- index,
40
- eId,
41
- tag,
42
- level,
43
- headings,
44
- };
45
- if (tag === "article") {
46
- division.alineas = [];
47
- }
48
- return division;
49
- }
50
- function buildAlinea(contentNode, alineaNode) {
51
- const eId = alineaNode.getAttribute("eId");
52
- const heading = {
53
- text: alineaNode.querySelector("num")?.textContent ?? null,
54
- };
55
- const pastille = alineaNode.getAttribute("data:pastille") ?? null;
56
- return {
57
- eId,
58
- heading,
59
- text: contentNode.textContent?.trim() ?? null,
60
- html: contentNode.innerHTML?.trim() ?? null,
61
- pastille,
62
- };
63
- }
64
- function buildEmptyArticle(index) {
65
- return {
66
- index: index,
67
- eId: "",
68
- tag: "article",
69
- level: DivisionType["article"],
70
- headings: [],
71
- alineas: [],
72
- };
73
- }
74
- function flattenTexte(texteContentRoot) {
75
- const divisions = [];
76
- let divisionIndex = 0;
77
- const iter = (node) => {
78
- if (node.nodeName === "content") {
79
- return;
80
- }
81
- switch (node.nodeName) {
82
- case "tome":
83
- case "part":
84
- case "book":
85
- case "title":
86
- case "subtitle":
87
- case "chapter":
88
- case "section":
89
- case "subsection":
90
- case "paragraph":
91
- case "article":
92
- divisions.push(buildDivision(node, divisionIndex++));
93
- break;
94
- }
95
- if (node.nodeName === "alinea") {
96
- Array.from(node.childNodes)
97
- // Find direct content children programmatically
98
- // because `:scope` selector does not work
99
- // https://github.com/jsdom/jsdom/issues/2998
100
- .filter((alineaChildNode) => alineaChildNode.nodeName === "content")
101
- .forEach((alineaContentNode) => {
102
- // Hypothesis: alineas should always be enclosed in articles
103
- let lastArticle = divisions.findLast(division => division.tag === "article");
104
- if (!lastArticle) {
105
- lastArticle = buildEmptyArticle(divisionIndex++);
106
- divisions.push(lastArticle);
107
- }
108
- lastArticle.alineas.push(buildAlinea(alineaContentNode, node));
109
- });
110
- }
111
- if (node.hasChildNodes()) {
112
- node.childNodes.forEach((childNode) => iter(childNode));
113
- }
114
- };
115
- iter(texteContentRoot);
116
- return divisions;
117
- }
118
- export function transformTexte(document) {
119
- const metaElement = document.querySelector("meta");
120
- const preambleElement = document.querySelector("preamble");
121
- const identification = metaElement?.querySelector("FRBRExpression FRBRuri")?.getAttribute("value") ?? "";
122
- const identificationParts = AKN_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups;
123
- const bodyElement = document.querySelector("body");
124
- const datePresentation = metaElement?.querySelector("FRBRdate[name='#presentation']")?.getAttribute("date");
125
- const dateDepot = metaElement?.querySelector("FRBRdate[name='#depot']")?.getAttribute("date");
126
- const datePublicationXml = metaElement?.querySelector("FRBRdate[name='#publication-xml']")?.getAttribute("date");
127
- return {
128
- titre: preambleElement?.querySelector("docTitle")?.textContent || null,
129
- titreCourt: metaElement?.querySelector("FRBRalias[name='intitule-court']")
130
- ?.getAttribute("value") || null,
131
- signetDossier: metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")
132
- ?.getAttribute("value") || null,
133
- urlDossierSenat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
134
- urlDossierAssemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
135
- type: identificationParts?.type || null,
136
- session: identificationParts?.session || null,
137
- numero: identificationParts?.numTexte ? parseInt(identificationParts.numTexte) : null,
138
- datePresentation: datePresentation ? new Date(datePresentation) : null,
139
- dateDepot: dateDepot ? new Date(dateDepot) : null,
140
- datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
141
- version: identificationParts?.version ? identificationParts.version : null,
142
- workflow: metaElement ? buildWorklow(metaElement) : [],
143
- divisions: bodyElement ? flattenTexte(bodyElement) : [],
144
- };
145
- }
146
- export function transformExposeDesMotifs(document) {
147
- const sectionElements = document.querySelectorAll("section");
148
- const exposeDesMotifsRegexp = new RegExp("EXPOS.{1,2}[\\n\\s]DES[\\n\\s]MOTIFS");
149
- for (const sectionElement of sectionElements) {
150
- const firstParagraph = sectionElement.querySelector("p:first-of-type");
151
- const secondParagraph = sectionElement.querySelector("p:nth-of-type(2)");
152
- if (!firstParagraph) {
153
- continue;
154
- }
155
- const firstParagraphContent = firstParagraph.textContent;
156
- const secondParagraphContent = secondParagraph?.textContent;
157
- if (!firstParagraphContent || !exposeDesMotifsRegexp.test(firstParagraphContent.toUpperCase())) {
158
- if (!secondParagraphContent || !exposeDesMotifsRegexp.test(secondParagraphContent.toUpperCase())) {
159
- continue;
160
- }
161
- else {
162
- sectionElement.removeChild(secondParagraph);
163
- }
164
- }
165
- sectionElement.removeChild(firstParagraph);
166
- return {
167
- text: sectionElement.textContent?.trim() ?? null,
168
- html: sectionElement.innerHTML?.trim() ?? null,
169
- };
170
- }
171
- return null;
172
- }
173
- export function parseTexte(texteXml) {
174
- try {
175
- const { document } = (new JSDOM(texteXml, {
176
- contentType: "text/xml",
177
- })).window;
178
- return transformTexte(document);
179
- }
180
- catch (error) {
181
- console.error(`Could not parse texte with error ${error}`);
182
- }
183
- return null;
184
- }
185
- // Prevent from memory leak
186
- // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
187
- export async function parseTexteFromFile(xmlFilePath) {
188
- try {
189
- const { document } = (await JSDOM.fromFile(xmlFilePath, { contentType: "text/xml" })).window;
190
- return transformTexte(document);
191
- }
192
- catch (error) {
193
- console.error(`Could not parse texte with error ${error}`);
194
- }
195
- return null;
196
- }
197
- export function parseExposeDesMotifs(exposeDesMotifsHtml) {
198
- const { document } = (new JSDOM(exposeDesMotifsHtml, {
199
- contentType: "text/html",
200
- })).window;
201
- return transformExposeDesMotifs(document);
202
- }
203
- // Prevent from memory leak
204
- // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
205
- export async function parseExposeDesMotifsFromFile(htmlFilePath) {
206
- const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window;
207
- return transformExposeDesMotifs(document);
208
- }
@@ -1,229 +0,0 @@
1
- import { JSDOM } from "jsdom"
2
-
3
- import { AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP, AKN_IDENTIFICATION_STRUCTURE_REGEXP } from "../scripts/datautil"
4
- import {
5
- Alinea,
6
- Article,
7
- Division,
8
- DivisionContent,
9
- DivisionTag,
10
- DivisionType, ExposeDesMotifs,
11
- FlatTexte, Step,
12
- Version,
13
- } from "../types/texte"
14
-
15
- function buildWorklow (metaElement: HTMLMetaElement): Step[] {
16
- const stepElements = metaElement.querySelectorAll("workflow step")
17
- const steps: Step[] = []
18
- for (const stepElement of stepElements) {
19
- const identification = stepElement.getAttribute("href") ?? ""
20
- const identificationParts = AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups
21
- steps.push({
22
- eId: stepElement.getAttribute("eId")!,
23
- date: stepElement.getAttribute("date") ? new Date(stepElement.getAttribute("date") ?? "") : null,
24
- type: identificationParts?.type || null,
25
- session: identificationParts?.session || null,
26
- numero: identificationParts?.numTexte || null,
27
- version: identificationParts?.version ? identificationParts.version as Version : null,
28
- outcome: stepElement.getAttribute("outcome"),
29
- })
30
- }
31
- return steps
32
- }
33
-
34
- function buildDivision (node: Node, index: number): Division {
35
- const eId = (node as Element).getAttribute("eId")!
36
- const tag = node.nodeName as DivisionTag
37
- const level = DivisionType[tag]
38
- const titleNode = (node as Element).querySelector("num")
39
- const subtitleNode = (node as Element).querySelector("heading")
40
- const headings: DivisionContent[] = [
41
- ...(titleNode ? [{
42
- text: titleNode.textContent?.trim() ?? null,
43
- html: titleNode.innerHTML?.trim() ?? null,
44
- }] : []),
45
- ...(subtitleNode ? [{
46
- text: subtitleNode.textContent?.trim() ?? null,
47
- html: subtitleNode.innerHTML?.trim() ?? null,
48
- }] : []),
49
- ]
50
- const division = {
51
- index,
52
- eId,
53
- tag,
54
- level,
55
- headings,
56
- }
57
- if (tag === "article") {
58
- (division as Article).alineas = []
59
- }
60
- return division
61
- }
62
-
63
- function buildAlinea (contentNode: Node, alineaNode: Node): Alinea {
64
- const eId = (alineaNode as Element).getAttribute("eId")!
65
- const heading = {
66
- text: (alineaNode as Element).querySelector("num")?.textContent ?? null,
67
- }
68
- const pastille = (alineaNode as Element).getAttribute("data:pastille") ?? null
69
- return {
70
- eId,
71
- heading,
72
- text: (contentNode as Element).textContent?.trim() ?? null,
73
- html: (contentNode as Element).innerHTML?.trim() ?? null,
74
- pastille,
75
- }
76
- }
77
-
78
- function buildEmptyArticle (index: number): Article {
79
- return {
80
- index: index,
81
- eId: "",
82
- tag: "article",
83
- level: DivisionType["article"],
84
- headings: [],
85
- alineas: [],
86
- }
87
- }
88
-
89
- function flattenTexte (texteContentRoot: Node): Division[] {
90
- const divisions: Division[] = []
91
- let divisionIndex = 0
92
- const iter = (node: Node) => {
93
- if (node.nodeName === "content") {
94
- return
95
- }
96
- switch (node.nodeName) {
97
- case "tome":
98
- case "part":
99
- case "book":
100
- case "title":
101
- case "subtitle":
102
- case "chapter":
103
- case "section":
104
- case "subsection":
105
- case "paragraph":
106
- case "article":
107
- divisions.push(buildDivision(node, divisionIndex++))
108
- break
109
- }
110
- if (node.nodeName === "alinea") {
111
- Array.from(node.childNodes)
112
- // Find direct content children programmatically
113
- // because `:scope` selector does not work
114
- // https://github.com/jsdom/jsdom/issues/2998
115
- .filter((alineaChildNode: Node) => alineaChildNode.nodeName === "content")
116
- .forEach((alineaContentNode: Node) => {
117
- // Hypothesis: alineas should always be enclosed in articles
118
- let lastArticle = divisions.findLast(division => division.tag === "article") as Article
119
- if (!lastArticle) {
120
- lastArticle = buildEmptyArticle(divisionIndex++)
121
- divisions.push(lastArticle)
122
- }
123
- lastArticle.alineas.push(buildAlinea(alineaContentNode, node))
124
- })
125
- }
126
- if (node.hasChildNodes()) {
127
- node.childNodes.forEach((childNode: Node) => iter(childNode))
128
- }
129
- }
130
- iter(texteContentRoot)
131
- return divisions
132
- }
133
-
134
- export function transformTexte (document: Document): FlatTexte | null {
135
- const metaElement = document.querySelector("meta")
136
- const preambleElement = document.querySelector("preamble")
137
- const identification = metaElement?.querySelector("FRBRExpression FRBRuri")?.getAttribute("value") ?? ""
138
- const identificationParts = AKN_IDENTIFICATION_STRUCTURE_REGEXP.exec(identification)?.groups
139
- const bodyElement = document.querySelector("body")
140
- const datePresentation = metaElement?.querySelector("FRBRdate[name='#presentation']")?.getAttribute("date")
141
- const dateDepot = metaElement?.querySelector("FRBRdate[name='#depot']")?.getAttribute("date")
142
- const datePublicationXml = metaElement?.querySelector("FRBRdate[name='#publication-xml']")?.getAttribute("date")
143
- return {
144
- titre: preambleElement?.querySelector("docTitle")?.textContent || null,
145
- titreCourt: metaElement?.querySelector("FRBRalias[name='intitule-court']")
146
- ?.getAttribute("value") || null,
147
- signetDossier:
148
- metaElement?.querySelector("FRBRalias[name='signet-dossier-legislatif-senat']")
149
- ?.getAttribute("value") || null,
150
- urlDossierSenat: metaElement?.querySelector("FRBRalias[name='url-senat']")?.getAttribute("value") || null,
151
- urlDossierAssemblee: metaElement?.querySelector("FRBRalias[name='url-AN']")?.getAttribute("value") || null,
152
- type: identificationParts?.type || null,
153
- session: identificationParts?.session || null,
154
- numero: identificationParts?.numTexte ? parseInt(identificationParts.numTexte) : null,
155
- datePresentation: datePresentation ? new Date(datePresentation) : null,
156
- dateDepot: dateDepot ? new Date(dateDepot) : null,
157
- datePublicationXml: datePublicationXml ? new Date(datePublicationXml) : null,
158
- version: identificationParts?.version ? identificationParts.version as Version : null,
159
- workflow: metaElement ? buildWorklow(metaElement) : [],
160
- divisions: bodyElement ? flattenTexte(bodyElement) : [],
161
- }
162
- }
163
-
164
- export function transformExposeDesMotifs (document: Document): ExposeDesMotifs | null {
165
- const sectionElements = document.querySelectorAll("section")
166
- const exposeDesMotifsRegexp = new RegExp("EXPOS.{1,2}[\\n\\s]DES[\\n\\s]MOTIFS")
167
- for (const sectionElement of sectionElements) {
168
- const firstParagraph = sectionElement.querySelector("p:first-of-type")
169
- const secondParagraph = sectionElement.querySelector("p:nth-of-type(2)")
170
- if (!firstParagraph) {
171
- continue
172
- }
173
-
174
- const firstParagraphContent = firstParagraph.textContent
175
- const secondParagraphContent = secondParagraph?.textContent
176
- if (!firstParagraphContent || !exposeDesMotifsRegexp.test(firstParagraphContent.toUpperCase())) {
177
- if (!secondParagraphContent || !exposeDesMotifsRegexp.test(secondParagraphContent.toUpperCase())) {
178
- continue
179
- } else {
180
- sectionElement.removeChild(secondParagraph)
181
- }
182
- }
183
-
184
- sectionElement.removeChild(firstParagraph)
185
- return {
186
- text: sectionElement.textContent?.trim() ?? null,
187
- html: sectionElement.innerHTML?.trim() ?? null,
188
- }
189
- }
190
- return null
191
- }
192
-
193
- export function parseTexte (texteXml: string): FlatTexte | null {
194
- try {
195
- const { document } = (new JSDOM(texteXml, {
196
- contentType: "text/xml",
197
- })).window
198
- return transformTexte(document)
199
- } catch (error: any) {
200
- console.error(`Could not parse texte with error ${error}`)
201
- }
202
- return null
203
- }
204
-
205
- // Prevent from memory leak
206
- // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
207
- export async function parseTexteFromFile (xmlFilePath: string): Promise<FlatTexte | null> {
208
- try {
209
- const { document } = (await JSDOM.fromFile(xmlFilePath, { contentType: "text/xml" })).window
210
- return transformTexte(document)
211
- } catch (error: any) {
212
- console.error(`Could not parse texte with error ${error}`)
213
- }
214
- return null
215
- }
216
-
217
- export function parseExposeDesMotifs (exposeDesMotifsHtml: string): ExposeDesMotifs | null {
218
- const { document } = (new JSDOM(exposeDesMotifsHtml, {
219
- contentType: "text/html",
220
- })).window
221
- return transformExposeDesMotifs(document)
222
- }
223
-
224
- // Prevent from memory leak
225
- // https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
226
- export async function parseExposeDesMotifsFromFile (htmlFilePath: string): Promise<ExposeDesMotifs | null> {
227
- const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window
228
- return transformExposeDesMotifs(document)
229
- }
@@ -1,19 +0,0 @@
1
- import { sql } from "kysely";
2
- export function concat(...exprs) {
3
- return sql.join(exprs, sql `||`).$castTo();
4
- }
5
- export function expandToRows(expr, regexp) {
6
- return sql `unnest(regexp_matches(${expr}, ${regexp}, 'g'))`;
7
- }
8
- export function removeSubstring(expr, pattern) {
9
- return replace(expr, pattern, sql.val(""));
10
- }
11
- export function replace(expr, pattern, replacement) {
12
- return sql `replace(${expr}, ${pattern}, ${replacement})`;
13
- }
14
- export function rtrim(expr) {
15
- return sql `rtrim(${expr})`;
16
- }
17
- export function toDateString(expr) {
18
- return sql `to_char(${expr}, 'yyyy-MM-dd')`;
19
- }
package/lib/model/util.ts DELETED
@@ -1,32 +0,0 @@
1
- import { Expression, sql } from "kysely"
2
-
3
- export function concat (...exprs: Expression<number | string | null | undefined>[]) {
4
- return sql.join(exprs, sql`||`).$castTo<string>()
5
- }
6
-
7
- export function expandToRows (expr: Expression<string | null | undefined>, regexp: Expression<string>) {
8
- return sql`unnest(regexp_matches(${expr}, ${regexp}, 'g'))`
9
- }
10
-
11
- export function removeSubstring (
12
- expr: Expression<string | null | undefined>,
13
- pattern: Expression<string>
14
- ) {
15
- return replace(expr, pattern, sql.val(""))
16
- }
17
-
18
- export function replace (
19
- expr: Expression<string | null | undefined>,
20
- pattern: Expression<string>,
21
- replacement: Expression<string>
22
- ) {
23
- return sql<string>`replace(${expr}, ${pattern}, ${replacement})`
24
- }
25
-
26
- export function rtrim (expr: Expression<string | null | undefined>) {
27
- return sql<string>`rtrim(${expr})`
28
- }
29
-
30
- export function toDateString (expr: Expression<Date | null | undefined>) {
31
- return sql<string>`to_char(${expr}, 'yyyy-MM-dd')`
32
- }
@@ -1,5 +0,0 @@
1
- /**
2
- * This file was generated by kysely-codegen.
3
- * Please do not edit it manually.
4
- */
5
- export {};