@tricoteuses/assemblee 1.9.13 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/README.md +0 -5
  2. package/lib/api.js +1 -1
  3. package/lib/api.mjs +1 -1
  4. package/lib/bugs/acteur-00010/plugin.test.js +5 -5
  5. package/lib/bugs/acteur-00010/plugin.test.mjs +1 -1
  6. package/lib/bugs/acteur-00010.js +17 -18
  7. package/lib/bugs/acteur-00010.mjs +1 -1
  8. package/lib/bugs/agenda-00002/plugin.test.js +6 -6
  9. package/lib/bugs/agenda-00002/plugin.test.mjs +1 -1
  10. package/lib/bugs/agenda-00002.js +31 -32
  11. package/lib/bugs/agenda-00002.mjs +3 -3
  12. package/lib/bugs/agenda-00008/plugin.test.js +5 -5
  13. package/lib/bugs/agenda-00008/plugin.test.mjs +1 -1
  14. package/lib/bugs/agenda-00008.js +17 -18
  15. package/lib/bugs/agenda-00008.mjs +1 -1
  16. package/lib/bugs/agenda-00011/plugin.test.js +10 -10
  17. package/lib/bugs/agenda-00011/plugin.test.mjs +1 -1
  18. package/lib/bugs/agenda-00011.js +28 -29
  19. package/lib/bugs/agenda-00011.mjs +3 -3
  20. package/lib/bugs.js +27 -29
  21. package/lib/bugs.mjs +3 -3
  22. package/lib/cleaners/actes_legislatifs.js +5 -5
  23. package/lib/cleaners/actes_legislatifs.mjs +1 -1
  24. package/lib/cleaners/acteurs.js +5 -5
  25. package/lib/cleaners/acteurs.mjs +1 -1
  26. package/lib/cleaners/amendements.js +5 -5
  27. package/lib/cleaners/amendements.mjs +1 -1
  28. package/lib/cleaners/debats.js +1 -1
  29. package/lib/cleaners/debats.mjs +1 -1
  30. package/lib/cleaners/documents.js +5 -5
  31. package/lib/cleaners/documents.mjs +1 -1
  32. package/lib/cleaners/dossiers_legislatifs.js +5 -5
  33. package/lib/cleaners/dossiers_legislatifs.mjs +1 -1
  34. package/lib/cleaners/index.d.ts +0 -1
  35. package/lib/cleaners/index.js +1 -8
  36. package/lib/cleaners/index.mjs +1 -2
  37. package/lib/cleaners/organes.js +2 -2
  38. package/lib/cleaners/organes.mjs +1 -1
  39. package/lib/cleaners/questions.js +1 -1
  40. package/lib/cleaners/questions.mjs +1 -1
  41. package/lib/cleaners/reunions.js +5 -5
  42. package/lib/cleaners/reunions.mjs +1 -1
  43. package/lib/cleaners/scrutins.js +8 -8
  44. package/lib/cleaners/scrutins.mjs +1 -1
  45. package/lib/cleaners/xml.js +8 -8
  46. package/lib/cleaners/xml.mjs +1 -1
  47. package/lib/datasets.d.ts +1 -13
  48. package/lib/datasets.js +10 -29
  49. package/lib/datasets.mjs +2 -21
  50. package/lib/dates.js +3 -4
  51. package/lib/dates.mjs +1 -1
  52. package/lib/dossiers_legislatifs.js +8 -8
  53. package/lib/dossiers_legislatifs.mjs +1 -1
  54. package/lib/file_systems.js +15 -14
  55. package/lib/file_systems.mjs +3 -3
  56. package/lib/git.js +5 -5
  57. package/lib/git.mjs +1 -1
  58. package/lib/index.d.ts +1 -0
  59. package/lib/index.js +8 -1
  60. package/lib/index.mjs +2 -1
  61. package/lib/inserters.js +4 -4
  62. package/lib/inserters.mjs +1 -1
  63. package/lib/loaders.d.ts +0 -5
  64. package/lib/loaders.js +255 -299
  65. package/lib/loaders.mjs +1 -13
  66. package/lib/logger.js +1 -1
  67. package/lib/logger.mjs +1 -1
  68. package/lib/organes.js +1 -1
  69. package/lib/organes.mjs +1 -1
  70. package/lib/parsers/documents.js +11 -11
  71. package/lib/parsers/documents.mjs +1 -1
  72. package/lib/parsers/index.d.ts +1 -0
  73. package/lib/parsers/index.js +8 -1
  74. package/lib/parsers/index.mjs +2 -1
  75. package/lib/parsers/recherche_amendements.js +17 -15
  76. package/lib/parsers/recherche_amendements.mjs +1 -1
  77. package/lib/parsers/textes_lois.d.ts +22 -15
  78. package/lib/parsers/textes_lois.js +264 -752
  79. package/lib/parsers/textes_lois.mjs +249 -597
  80. package/lib/raw_types/acteurs_et_organes.js +9 -9
  81. package/lib/raw_types/acteurs_et_organes.mjs +1 -1
  82. package/lib/raw_types/agendas.d.ts +1 -0
  83. package/lib/raw_types/agendas.js +11 -10
  84. package/lib/raw_types/agendas.mjs +3 -2
  85. package/lib/raw_types/amendements.js +9 -9
  86. package/lib/raw_types/amendements.mjs +1 -1
  87. package/lib/raw_types/debats.js +9 -9
  88. package/lib/raw_types/debats.mjs +1 -1
  89. package/lib/raw_types/dossiers_legislatifs.js +9 -9
  90. package/lib/raw_types/dossiers_legislatifs.mjs +1 -1
  91. package/lib/raw_types/questions.js +9 -9
  92. package/lib/raw_types/questions.mjs +1 -1
  93. package/lib/raw_types/scrutins.js +9 -9
  94. package/lib/raw_types/scrutins.mjs +1 -1
  95. package/lib/scripts/bugs_helper.js +25 -26
  96. package/lib/scripts/bugs_helper.mjs +3 -3
  97. package/lib/scripts/clean_reorganized_data.js +256 -245
  98. package/lib/scripts/clean_reorganized_data.mjs +18 -29
  99. package/lib/scripts/diff_amendements.js +9 -8
  100. package/lib/scripts/diff_amendements.mjs +1 -1
  101. package/lib/scripts/document_dossiers_legislatifs.js +24 -27
  102. package/lib/scripts/document_dossiers_legislatifs.mjs +3 -3
  103. package/lib/scripts/get_today_reunions.js +3 -2
  104. package/lib/scripts/get_today_reunions.mjs +1 -1
  105. package/lib/scripts/merge_scrutins.js +9 -8
  106. package/lib/scripts/merge_scrutins.mjs +1 -1
  107. package/lib/scripts/parse_textes_lois.js +6 -5
  108. package/lib/scripts/parse_textes_lois.mjs +1 -1
  109. package/lib/scripts/process_open_dataset.d.ts +0 -2
  110. package/lib/scripts/process_open_dataset.js +19 -19
  111. package/lib/scripts/process_open_dataset.mjs +3 -5
  112. package/lib/scripts/raw_types_from_amendements.d.ts +0 -1
  113. package/lib/scripts/raw_types_from_amendements.js +122 -198
  114. package/lib/scripts/raw_types_from_amendements.mjs +105 -60
  115. package/lib/scripts/reorganize_data.js +14 -15
  116. package/lib/scripts/reorganize_data.mjs +3 -3
  117. package/lib/scripts/retrieve_deputes_photos.js +40 -31
  118. package/lib/scripts/retrieve_deputes_photos.mjs +10 -2
  119. package/lib/scripts/retrieve_documents.d.ts +1 -1
  120. package/lib/scripts/retrieve_documents.js +230 -138
  121. package/lib/scripts/retrieve_documents.mjs +87 -38
  122. package/lib/scripts/retrieve_open_data.js +27 -30
  123. package/lib/scripts/retrieve_open_data.mjs +5 -8
  124. package/lib/scripts/retrieve_pending_amendments.js +14 -14
  125. package/lib/scripts/retrieve_pending_amendments.mjs +1 -1
  126. package/lib/scripts/retrieve_senateurs_photos.js +10 -10
  127. package/lib/scripts/retrieve_senateurs_photos.mjs +1 -1
  128. package/lib/scripts/retrieve_textes_lois.js +17 -15
  129. package/lib/scripts/retrieve_textes_lois.mjs +1 -1
  130. package/lib/scripts/shared/cli_helpers.d.ts +11 -0
  131. package/lib/scripts/shared/cli_helpers.js +13 -2
  132. package/lib/scripts/shared/cli_helpers.mjs +12 -1
  133. package/lib/scripts/test_iter_load.js +5 -19
  134. package/lib/scripts/test_iter_load.mjs +2 -9
  135. package/lib/scripts/test_load.js +2 -2
  136. package/lib/scripts/test_load.mjs +1 -1
  137. package/lib/scripts/test_load_big_files.js +2 -2
  138. package/lib/scripts/test_load_big_files.mjs +1 -1
  139. package/lib/scripts/validate_json.js +17 -18
  140. package/lib/scripts/validate_json.mjs +3 -3
  141. package/lib/shared_types/codes_actes.js +1 -1
  142. package/lib/shared_types/codes_actes.mjs +1 -1
  143. package/lib/strings.js +1 -1
  144. package/lib/strings.mjs +1 -1
  145. package/lib/types/acteurs_et_organes.js +9 -9
  146. package/lib/types/acteurs_et_organes.mjs +1 -1
  147. package/lib/types/agendas.d.ts +1 -0
  148. package/lib/types/agendas.js +11 -10
  149. package/lib/types/agendas.mjs +3 -2
  150. package/lib/types/amendements.js +9 -9
  151. package/lib/types/amendements.mjs +1 -1
  152. package/lib/types/debats.js +9 -9
  153. package/lib/types/debats.mjs +1 -1
  154. package/lib/types/dossiers_legislatifs.d.ts +13 -0
  155. package/lib/types/dossiers_legislatifs.js +47 -9
  156. package/lib/types/dossiers_legislatifs.mjs +39 -1
  157. package/lib/types/legislatures.js +1 -1
  158. package/lib/types/legislatures.mjs +1 -1
  159. package/lib/types/questions.js +9 -9
  160. package/lib/types/questions.mjs +1 -1
  161. package/lib/types/scrutins.js +9 -9
  162. package/lib/types/scrutins.mjs +1 -1
  163. package/lib/urls.js +2 -2
  164. package/lib/urls.mjs +1 -1
  165. package/package.json +14 -20
  166. package/lib/cleaners/textes_loi.d.ts +0 -1
  167. package/lib/cleaners/textes_loi.js +0 -12
  168. package/lib/cleaners/textes_loi.mjs +0 -5
  169. package/lib/examples/PIONANR5L15B0020/input.d.ts +0 -2
  170. package/lib/examples/PIONANR5L15B0020/input.js +0 -10
  171. package/lib/examples/PIONANR5L15B0020/input.mjs +0 -193
  172. package/lib/examples/PIONANR5L15B0020/input.ts +0 -193
  173. package/lib/examples/PIONANR5L15B0020/loi.json +0 -26
  174. package/lib/examples/PIONANR5L15B0020/loi_populated.json +0 -29
  175. package/lib/examples/PIONANR5L15B0020/meta.json +0 -28
  176. package/lib/examples/PIONANR5L15B0020/motifs.json +0 -5
  177. package/lib/examples/PIONANR5L15B0020/motifs_populated.json +0 -6
  178. package/lib/examples/PIONANR5L15B0020/sommaire.json +0 -3
  179. package/lib/examples/PRJLANR5L16B0914/input.d.ts +0 -2
  180. package/lib/examples/PRJLANR5L16B0914/input.js +0 -10
  181. package/lib/examples/PRJLANR5L16B0914/input.mjs +0 -322
  182. package/lib/examples/PRJLANR5L16B0914/input.ts +0 -322
  183. package/lib/examples/PRJLANR5L16B0914/loi.json +0 -14
  184. package/lib/examples/PRJLANR5L16B0914/loi_populated.json +0 -20
  185. package/lib/examples/PRJLANR5L16B0914/meta.json +0 -35
  186. package/lib/examples/PRJLANR5L16B0914/motifs.json +0 -4
  187. package/lib/examples/PRJLANR5L16B0914/motifs_populated.json +0 -5
  188. package/lib/examples/PRJLANR5L16B0914/sommaire.json +0 -3
  189. package/lib/examples/PRJLANR5L16B2014/README.md +0 -2
  190. package/lib/examples/PRJLANR5L16B2014/input.d.ts +0 -2
  191. package/lib/examples/PRJLANR5L16B2014/input.js +0 -10
  192. package/lib/examples/PRJLANR5L16B2014/input.mjs +0 -577
  193. package/lib/examples/PRJLANR5L16B2014/input.ts +0 -577
  194. package/lib/examples/PRJLANR5L16B2014/loi.json +0 -369
  195. package/lib/examples/PRJLANR5L16B2014/meta.json +0 -31
  196. package/lib/examples/PRJLANR5L16B2014/motifs.json +0 -5
  197. package/lib/examples/PRJLANR5L16B2014/sommaire.json +0 -51
  198. package/lib/examples/PRJLANR5L16B2424/alineas.json +0 -44
  199. package/lib/examples/PRJLANR5L16B2424/input.d.ts +0 -2
  200. package/lib/examples/PRJLANR5L16B2424/input.js +0 -10
  201. package/lib/examples/PRJLANR5L16B2424/input.mjs +0 -322
  202. package/lib/examples/PRJLANR5L16B2424/input.ts +0 -322
  203. package/lib/examples/PRJLANR5L16B2424/loi.json +0 -74
  204. package/lib/examples/PRJLANR5L16B2424/loi_populated.json +0 -87
  205. package/lib/examples/PRJLANR5L16B2424/meta.json +0 -31
  206. package/lib/examples/PRJLANR5L16B2424/motifs.json +0 -4
  207. package/lib/examples/PRJLANR5L16B2424/motifs_populated.json +0 -5
  208. package/lib/examples/PRJLANR5L16B2424/sommaire.json +0 -3
  209. package/lib/examples/PRJLANR5L16B2462/input.d.ts +0 -2
  210. package/lib/examples/PRJLANR5L16B2462/input.js +0 -10
  211. package/lib/examples/PRJLANR5L16B2462/input.mjs +0 -1308
  212. package/lib/examples/PRJLANR5L16B2462/input.ts +0 -1308
  213. package/lib/examples/PRJLANR5L16B2462/loi.json +0 -74
  214. package/lib/examples/PRJLANR5L16B2462/loi_populated.json +0 -310
  215. package/lib/examples/PRJLANR5L16B2462/meta.json +0 -29
  216. package/lib/examples/PRJLANR5L16B2462/motifs.json +0 -6
  217. package/lib/examples/PRJLANR5L16B2462/motifs_populated.json +0 -6
  218. package/lib/examples/PRJLANR5L16B2462/sommaire.json +0 -50
  219. package/lib/examples/README.md +0 -3
  220. package/lib/raw_types/textes_loi.d.ts +0 -93
  221. package/lib/raw_types/textes_loi.js +0 -429
  222. package/lib/raw_types/textes_loi.mjs +0 -378
  223. package/lib/schemas/texte_loi/Alinea.json +0 -36
  224. package/lib/schemas/texte_loi/ExposeMotifs.json +0 -24
  225. package/lib/schemas/texte_loi/MetaDonnees.json +0 -126
  226. package/lib/schemas/texte_loi/ProjetLoi.json +0 -64
  227. package/lib/schemas/texte_loi/Sommaire.json +0 -48
  228. package/lib/schemas/texte_loi/TexteLoi.json +0 -225
  229. package/lib/schemas/texte_loi/TexteLoiPartiel.json +0 -237
  230. package/lib/scripts/process_textes_loi_dataset.d.ts +0 -7
  231. package/lib/scripts/process_textes_loi_dataset.js +0 -373
  232. package/lib/scripts/process_textes_loi_dataset.mjs +0 -239
  233. package/lib/types/textes_loi.d.ts +0 -94
  234. package/lib/types/textes_loi.js +0 -429
  235. package/lib/types/textes_loi.mjs +0 -378
@@ -1,608 +1,260 @@
1
- import { ChatOpenAI } from "@langchain/openai";
2
- import { ChatPromptTemplate } from "@langchain/core/prompts";
3
- import Ajv from "ajv";
4
- import { delimiter, runLogFun } from "../logger.mjs";
5
- const ALINEA_SCHEMA = require("../schemas/texte_loi/Alinea.json");
6
- const EXPOSE_MOTIFS_SCHEMA = require("../schemas/texte_loi/ExposeMotifs.json");
7
- const META_DONNEES_SCHEMA = require("../schemas/texte_loi/MetaDonnees.json");
8
- const PROJET_LOI_SCHEMA = require("../schemas/texte_loi/ProjetLoi.json");
9
- const SOMMAIRE_SCHEMA = require("../schemas/texte_loi/Sommaire.json");
10
-
11
- /**
12
- * Object containing input examples and their output schemas.
13
- * Used to train the LLM.
14
- * @type {Record<string, {motifs: any, input: any, loi: any, meta: any, sommaire: any}>}
15
- */
16
- const EXAMPLES = {
17
- PRJLANR5L16B0914: {
18
- motifs: require("../examples/PRJLANR5L16B0914/motifs.json"),
19
- input: require("../examples/PRJLANR5L16B0914/input"),
20
- loi: require("../examples/PRJLANR5L16B0914/loi.json"),
21
- meta: require("../examples/PRJLANR5L16B0914/meta.json"),
22
- sommaire: require("../examples/PRJLANR5L16B0914/sommaire.json")
23
- },
24
- PRJLANR5L16B2014: {
25
- motifs: require("../examples/PRJLANR5L16B2014/motifs.json"),
26
- input: require("../examples/PRJLANR5L16B2014/input"),
27
- loi: require("../examples/PRJLANR5L16B2014/loi.json"),
28
- meta: require("../examples/PRJLANR5L16B2014/meta.json"),
29
- sommaire: require("../examples/PRJLANR5L16B2014/sommaire.json")
30
- },
31
- PRJLANR5L16B2424: {
32
- alineas: require("../examples/PRJLANR5L16B2424/alineas.json"),
33
- motifs: require("../examples/PRJLANR5L16B2424/motifs.json"),
34
- input: require("../examples/PRJLANR5L16B2424/input"),
35
- loi: require("../examples/PRJLANR5L16B2424/loi.json"),
36
- meta: require("../examples/PRJLANR5L16B2424/meta.json"),
37
- sommaire: require("../examples/PRJLANR5L16B2424/sommaire.json")
38
- }
39
- };
40
- const FULL_TEXTE_SCHEMA = {
41
- $schema: "http://json-schema.org/draft-07/schema#",
42
- title: "Sommaire",
43
- description: "Récupération d'un texte complet",
44
- type: "object",
45
- properties: {
46
- texte: {
47
- description: "Texte complet de la section indiquée.",
48
- type: "string"
49
- }
50
- },
51
- required: ["texte"]
52
- };
53
-
54
- /**
55
- * LLM model.
56
- */
57
- let MODEL = null;
58
- if (process.env.OPENAI_API_KEY) {
59
- MODEL = new ChatOpenAI({
60
- temperature: 0,
61
- topP: 0.0,
62
- model: "gpt-4o-mini",
63
- apiKey: process.env.OPENAI_API_KEY,
64
- maxTokens: -1
65
- });
66
- }
67
-
68
- /**
69
- * System input template for the LLM.
70
- */
71
- const SYSTEM_TEMPLATE = `
72
- # Instructions :
73
-
74
- Tu es un expert de la loi et de l'analyse de pages HTML.
75
- Tu dois analyser un texte de loi afin de séparer et de structurer les éléments qui le composent. Le format d'entrée est HTML.
76
-
77
- Le texte est divisé en deux parties :
78
- A) un exposé des motifs qui indique les raisons pour lesquelles ce projet est soumis au Parlement. Il peut contenir 3 éléments : 1) une citation au début d'un document, 2) le texte principal qui développe les arguments de l'auteur à l'appui de la modification législative ou des dispositions nouvelles qu'il propose, 3) des paragraphes qui présentent les articles.
79
- B) le dispositif qui est la partie normative et rédigée en articles. Il peut comporter des divisions nommées titres, chapitres, sections, sous-sections. Lorsqu'il y a un seul niveau de structure, on retrouvera le chapitre ; s’il y a deux niveaux de structure, on retrouvera des chapitres puis des sections. S’il y a trois niveaux de structure, on retrouvera des titres puis des chapitres puis des sections.
80
-
81
- Si le texte de loi ne comporte pas de division, le sommaire doit rester vide.
82
- Toutes les lignes qui débutent par un '«' ne sont pas à prendre en compte dans les divisions
83
- Ta réponse doit être un JSON structuré avec les éléments du texte de loi en respectant le schéma JSON fourni.
84
- `;
85
- let SILENT_LOG = false;
86
-
87
- /**
88
- * Parses the given HTML string and returns the parsed textes loi.
89
- *
90
- * @param {string} html - The HTML string to parse.
91
- * @return {Promise<{partial: {sommaire: any, loi: any, motifs: any, meta: any}, completed: {sommaire: any, loi: any, motifs: any, meta: any}}>} - The parsed textes loi, including the partial and completed versions.
92
- */
93
- export async function parseTexteLoi(html, options = {
94
- silent: false
95
- }) {
96
- SILENT_LOG = options.silent;
97
- runLogFun(console.info("Start extracting loi..."), options);
98
- const meta = await extractMetaDonnees(html);
99
- const sommaire = await extractSommaire(html);
100
- const partialMotifs = await extractMotifs(html);
101
- const partialLoi = await extractProjetLoi(html);
102
- const exposeMotifs = await populateMotifs(partialMotifs, html);
103
- const loi = await populateProjetLoi(partialLoi, html);
104
- return {
105
- sommaire,
106
- loi,
107
- exposeMotifs,
108
- meta
109
- };
110
- }
111
-
112
- /**
113
- * Cleans the given HTML string by removing unnecessary tags and content.
114
- *
115
- * @param {string} html - The HTML string to be cleaned.
116
- * @return {string} The cleaned HTML string.
117
- */
118
- function cleanHtml(html) {
119
- let cleanedHtml = html.replace(/(<style[\w\W]+style>)/g, "");
120
- cleanedHtml = cleanedHtml.replace(/(<head[\w\W]+head>)/g, "");
121
- cleanedHtml = cleanedHtml.replace(/<img[^>]*>/g, "");
122
- return cleanedHtml;
123
- }
124
-
125
- /**
126
- * Removes all HTML tags from the given HTML string and returns the cleaned text.
127
- *
128
- * @param {string} html - The HTML string from which to remove tags.
129
- * @return {string} The cleaned text without any HTML tags.
130
- */
131
- function removeHtmlTags(html) {
132
- // Regular expression to match HTML tags
133
- const htmlTagRegex = /<[^>]*>/g;
134
-
135
- // Replace all HTML tags with an empty string
136
- const textWithoutTags = html.replace(htmlTagRegex, "");
137
- return textWithoutTags;
138
- }
139
-
140
- /**
141
- * Populates the motifs object with the extracted text from the given HTML.
142
- *
143
- * @param {Record<string, any>} motifs - The motifs object to be populated.
144
- * @param {string} html - The HTML string from which to extract the text.
145
- * @return {Promise<Record<string, any>>} - The populated motifs object.
146
- */
147
- async function populateMotifs(motifs, html) {
148
- runLogFun(console.info("Populating expose motifs..."));
149
- let cleanedHtml = cleanHtml(html);
150
- if (motifs.debut === motifs.fin || !motifs.fin) {
151
- motifs.texte = motifs.debut;
152
- } else {
153
- const systemTemplate = `
154
- # Instructions :
155
-
156
- Tu es un expert de la loi et de l'analyse de pages HTML.
157
- Tu dois analyser un texte de loi afin d'en extraire certaines parties.
158
- Le format d'entrée est du HTML. Le format de sortie est du texte.
159
- Récupère l'intégralité du texte commençant par :
160
-
161
- """
162
- {debut}
163
- """
164
-
165
- Et finissant par :
166
-
167
- """
168
- {fin}
169
- """
170
- `;
171
- const prompt = ChatPromptTemplate.fromMessages([["system", systemTemplate], ["human", "{input}"]]);
172
- const structuredModel = MODEL.withStructuredOutput(FULL_TEXTE_SCHEMA);
173
- const chain = prompt.pipe(structuredModel);
174
- const articlePromptResult = await chain.invoke({
175
- input: cleanedHtml,
176
- debut: motifs.debut,
177
- fin: motifs.fin
178
- });
179
- motifs.texte = articlePromptResult.texte;
180
- }
181
- runLogFun(console.info("Opération de peuplement de l'exposé des motifs terminée ✅"), {
182
- silent: SILENT_LOG
183
- });
184
- delimiter(SILENT_LOG);
185
- delete motifs.debut;
186
- delete motifs.fin;
187
- return motifs;
188
- }
189
-
190
- /**
191
- * Asynchronously populates a projet loi object with the full text of each article and alinea in the given HTML.
192
- *
193
- * @param {Record<string, any>} partial - The partial projet loi object to be populated.
194
- * @param {string} html - The HTML containing the text of the articles and alineas.
195
- * @return {Promise<Record<string, any>>} A Promise that resolves to the fully populated projet loi object.
196
- */
197
- async function populateProjetLoi(partial, html) {
198
- runLogFun(console.group("Peuplement du projet loi..."), {
199
- silent: SILENT_LOG
200
- });
201
- let cleanedHtml = cleanHtml(html);
202
- const articles = partial.articles;
203
- const fullArticles = [];
204
- const systemTemplate = `
205
- # Instructions :
206
-
207
- Tu es un expert de la loi et de l'analyse de pages HTML.
208
- Tu dois analyser un texte de loi afin d'en extraire certaines parties.
209
- Le format d'entrée est du HTML. Le format de sortie est du texte.
210
- Récupère l'intégralité du texte commençant par :
211
-
212
- """
213
- {debut}
214
- """
215
-
216
- Et finissant par :
217
-
218
- """
219
- {fin}
220
- """
221
- `;
222
- const prompt = ChatPromptTemplate.fromMessages([["system", systemTemplate], ["human", "{input}"]]);
223
- const structuredModel = MODEL.withStructuredOutput(FULL_TEXTE_SCHEMA);
224
- const chain = prompt.pipe(structuredModel);
225
- for (const article of articles) {
226
- if (article.debut === article.fin || !article.fin) {
227
- article.texte = article.debut;
228
- } else {
229
- const articlePromptResult = await chain.invoke({
230
- input: cleanedHtml,
231
- debut: article.debut,
232
- fin: article.fin
233
- });
234
- article.texte = articlePromptResult.texte;
1
+ import assert from "assert";
2
+ import { JSDOM } from "jsdom";
3
+ function parseHeader(header) {
4
+ const headersMapping = [{
5
+ regex: /^(RAPPORT_)?ANNEXE(_|$)|^ETAT_/,
6
+ level: 0,
7
+ name: "Annexe"
8
+ }, {
9
+ regex: /^TOME_/,
10
+ level: 1,
11
+ name: "Tome"
12
+ }, {
13
+ regex: /^PARTIE_|^(PREMIERE|SECONDE|DEUXIEME|TROISIEME|QUATRIEME)_PARTIE(_|$)/,
14
+ level: 2,
15
+ name: "Partie"
16
+ }, {
17
+ regex: /^LIVRE_/,
18
+ level: 3,
19
+ name: "Livre"
20
+ }, {
21
+ regex: /^TITRE_/,
22
+ level: 4,
23
+ name: "Titre"
24
+ }, {
25
+ regex: /^SOUSTITRE_/,
26
+ level: 5,
27
+ name: "SousTitre"
28
+ }, {
29
+ regex: /^CHAPITRE_/,
30
+ level: 6,
31
+ name: "Chapitre"
32
+ }, {
33
+ regex: /^SECTION_/,
34
+ level: 7,
35
+ name: "Section"
36
+ }, {
37
+ regex: /^SOUSSECTION_/,
38
+ level: 8,
39
+ name: "SousSection"
40
+ }, {
41
+ regex: /^ARTICLES?_|^EXPOSE_DES_MOTIFS$/,
42
+ level: 9,
43
+ name: "Article"
44
+ }];
45
+ for (let {
46
+ regex,
47
+ level,
48
+ name
49
+ } of headersMapping) {
50
+ if (header.match(regex)) {
51
+ return [level, name];
235
52
  }
236
- for (const alinea of article.alineas) {
237
- if (alinea.debut === alinea.fin || !alinea.fin) {
238
- alinea.texte = alinea.debut;
239
- continue;
240
- }
241
- const alineaPromptResult = await chain.invoke({
242
- input: article.texte,
243
- debut: alinea.debut,
244
- fin: alinea.fin
245
- });
246
- delete alinea.debut;
247
- delete alinea.fin;
248
- alinea.texte = alineaPromptResult.texte;
249
- }
250
- runLogFun(console.info("Opération de peuplement du contenu du projet de loi terminée ✅"), {
251
- silent: SILENT_LOG
252
- });
253
- delete article.debut;
254
- delete article.fin;
255
- fullArticles.push(article);
256
53
  }
257
- const projetLoi = {
258
- ...partial,
259
- articles: fullArticles
260
- };
261
- runLogFun(console.groupEnd(), {
262
- silent: SILENT_LOG
263
- });
264
- delimiter();
265
- return projetLoi;
54
+ return [null, ""];
266
55
  }
267
-
268
- /**
269
- * Asynchronously extracts the "expose motifs" from the given HTML.
270
- *
271
- * @param {string} html - The HTML string from which to extract the "expose motifs".
272
- * @return {Promise<Record<string, any>>} - A Promise that resolves to the extracted "expose motifs" object.
273
- * The object contains the following properties:
274
- * - `debut`: The beginning of the "expose motifs".
275
- * - `fin`: The end of the "expose motifs".
276
- * @throws {Error} - If the extracted "expose motifs" do not validate against the EXPOSE_MOTIFS_SCHEMA.
277
- */
278
- async function extractMotifs(html) {
279
- runLogFun(console.group("Parsing expose motifs..."), {
280
- silent: SILENT_LOG
281
- });
282
- let cleanedHtml = cleanHtml(html);
283
- cleanedHtml = removeHtmlTags(cleanedHtml);
284
- const structuredModel = MODEL.withStructuredOutput(EXPOSE_MOTIFS_SCHEMA);
285
- const motifsSystemTemplate = `
286
- ${SYSTEM_TEMPLATE}
287
-
288
- # Examples:
289
-
290
- ## Exemple 1 User Input :
291
- """
292
- {example1}
293
- """
294
-
295
- ## Exemple 1 JSON Output :
296
- """
297
- {output1}
298
- """
299
-
300
- ## Exemple 2 User Input :
301
- """
302
- {example2}
303
- """
304
-
305
- ## Exemple 2 JSON Output :
306
- """
307
- {output2}
308
- """
309
- `;
310
- const prompt = ChatPromptTemplate.fromMessages([["system", motifsSystemTemplate], ["human", "{input}"]]);
311
- const chain = prompt.pipe(structuredModel);
312
- const motifsPromptResult = await chain.invoke({
313
- input: cleanedHtml,
314
- example1: EXAMPLES.PRJLANR5L16B0914.input,
315
- example2: EXAMPLES.PRJLANR5L16B2424.input,
316
- output1: EXAMPLES.PRJLANR5L16B0914.motifs,
317
- output2: EXAMPLES.PRJLANR5L16B2424.motifs
318
- });
319
-
320
- // console.log(JSON.stringify(motifsPromptResult))
321
- runLogFun(console.table([["Début", motifsPromptResult.debut.substring(0, 30)], ["Fin", motifsPromptResult.fin.substring(0, 30)]]), {
322
- silent: SILENT_LOG
323
- });
324
- runLogFun(console.groupEnd(), {
325
- silent: SILENT_LOG
326
- });
327
- delimiter();
328
- const ajv = new Ajv();
329
- if (ajv.validate(EXPOSE_MOTIFS_SCHEMA, motifsPromptResult)) {
330
- runLogFun(console.info("Exposé des motifs OK 👌"), {
331
- silent: SILENT_LOG
332
- });
333
- return motifsPromptResult;
334
- }
335
- runLogFun(console.warn("Exposé des motifs KO ❌"), {
336
- silent: SILENT_LOG
337
- });
338
- return {};
56
+ function cleanText(text) {
57
+ return text?.replace(/[\n\t]+/g, "").trim() || "";
339
58
  }
340
-
341
- /**
342
- * Asynchronously extracts the metadata from the given HTML.
343
- *
344
- * @param {string} html - The HTML string from which to extract the metadata.
345
- * @return {Promise<Record<string, any>>} - A Promise that resolves to the extracted metadata object.
346
- * The object contains the following properties:
347
- * - `documentRef`: The reference of the document.
348
- * - `titre`: The title of the document.
349
- * - `numeroTexte`: The number of the text.
350
- * - `procedureAcceleree`: The accelerated procedure.
351
- * @throws {Error} - If the extracted metadata do not validate against the META_DONNEES_SCHEMA.
352
- */
353
- async function extractMetaDonnees(html) {
354
- delimiter();
355
- let cleanedHtml = cleanHtml(html);
356
- cleanedHtml = removeHtmlTags(cleanedHtml);
357
- const structuredModel = MODEL.withStructuredOutput(META_DONNEES_SCHEMA);
358
- const metaDonneesSystemTemplate = `
359
- ${SYSTEM_TEMPLATE}
360
- `;
361
- const prompt = ChatPromptTemplate.fromMessages([["system", metaDonneesSystemTemplate], ["human", "{input}"]]);
362
- const chain = prompt.pipe(structuredModel);
363
- const result = await chain.invoke({
364
- input: cleanedHtml,
365
- example1: EXAMPLES.PRJLANR5L16B0914.input,
366
- example2: EXAMPLES.PRJLANR5L16B2424.input,
367
- output1: EXAMPLES.PRJLANR5L16B0914.meta,
368
- output2: EXAMPLES.PRJLANR5L16B2424.meta
369
- });
370
- console.group("Meta-données");
371
- console.table([["Référence", result.documentRef], ["Titre", result.titre], ["Numéro du texte", result.numeroTexte], ["Procédure accéléré", result.procedureAcceleree]]);
372
- console.groupEnd();
373
- delimiter();
374
- const ajv = new Ajv();
375
- if (ajv.validate(META_DONNEES_SCHEMA, result)) {
376
- runLogFun(console.warn("Meta-données OK 👌"), {
377
- silent: SILENT_LOG
378
- });
379
- return result;
380
- }
381
- runLogFun(console.warn("Meta-données non-conforme ❌"), {
382
- silent: SILENT_LOG
383
- });
384
- return {};
385
- }
386
-
387
- /**
388
- * Asynchronously extracts the "projet de loi" from the given HTML.
389
- *
390
- * @param {string} html - The HTML string from which to extract the "projet de loi".
391
- * @return {Promise<Record<string, any>>} - A Promise that resolves to the extracted "projet de loi" object.
392
- * The object contains the following properties:
393
- * - `articles`: An array of article objects, each containing the following properties:
394
- * - `numeroArticle`: The number of the article.
395
- * - `cardinal`: The cardinal of the article.
396
- * - `titre`: The title of the article.
397
- * - `debut`: The beginning of the article.
398
- * - `fin`: The end of the article.
399
- * - `alineas`: An array of alineas.
400
- * @throws {Error} - If the extracted "projet de loi" does not validate against the PROJET_LOI_SCHEMA.
401
- */
402
- async function extractProjetLoi(html) {
403
- runLogFun(console.group("Parsing loi..."), {
404
- silent: SILENT_LOG
405
- });
406
- let cleanedHtml = cleanHtml(html);
407
- const structuredModel = MODEL.withStructuredOutput(PROJET_LOI_SCHEMA);
408
- const projetLoiSystemTemplate = `
409
- ${SYSTEM_TEMPLATE}
410
-
411
- # Examples:
412
-
413
- ## Exemple 1 User Input :
414
- """
415
- {example1}
416
- """
417
-
418
- ## Exemple 1 JSON Output :
419
- """
420
- {output1}
421
- """
422
-
423
- ## Exemple 2 User Input :
424
- """
425
- {example2}
426
- """
427
-
428
- ## Exemple 2 JSON Output :
429
- """
430
- {output2}
431
- """
432
- `;
433
- const prompt = ChatPromptTemplate.fromMessages([["system", projetLoiSystemTemplate], ["human", "{input}"]]);
434
- const chain = prompt.pipe(structuredModel);
435
- const articlesPromptResult = await chain.invoke({
436
- input: cleanedHtml,
437
- example1: EXAMPLES.PRJLANR5L16B0914.input,
438
- example2: EXAMPLES.PRJLANR5L16B2424.input,
439
- output1: EXAMPLES.PRJLANR5L16B0914.loi,
440
- output2: EXAMPLES.PRJLANR5L16B2424.loi
441
- });
442
- runLogFun(console.log("Found ", articlesPromptResult.articles.length, " articles."), {
443
- silent: SILENT_LOG
444
- });
445
- for (const article of articlesPromptResult.articles) {
446
- article.alineas = await extractAlineas(html, article);
447
- }
448
- runLogFun(console.table(articlesPromptResult.articles.map(article => [article.numeroArticle, article.titre, article.debut.substring(0, 10), article.fin.substring(0, 10), `Nombre d'alineas : ${article.alineas.length}`])), {
449
- silent: SILENT_LOG
450
- });
451
- runLogFun(console.groupEnd(), {
452
- silent: SILENT_LOG
453
- });
454
- const ajv = new Ajv();
455
- if (ajv.validate(PROJET_LOI_SCHEMA, articlesPromptResult)) {
456
- runLogFun(console.info("Projet de loi OK 👌"), {
457
- silent: SILENT_LOG
458
- });
459
- delimiter();
460
- return articlesPromptResult;
461
- }
462
- runLogFun(console.warn("Projet de loi KO ❌"), {
463
- silent: SILENT_LOG
464
- });
465
- delimiter();
466
- return {};
467
- }
468
- async function extractAlineas(html, article) {
469
- runLogFun(console.group("Parsing alineas..."), {
470
- silent: SILENT_LOG
471
- });
472
- let cleanedHtml = cleanHtml(html);
473
- const structuredModel = MODEL.withStructuredOutput(ALINEA_SCHEMA);
474
- const alineasSystemTemplate = `
475
- ${SYSTEM_TEMPLATE}
476
-
477
- Extrais tous les alineas de la portion de texte commençant par « {debut} » et finissant par « {fin} ».
478
- Un alinéa contiennent toujours une balise « <img ».
479
-
480
- # Examples:
481
-
482
- ## Exemple 1 User Input :
483
- """
484
- {example1}
485
- """
486
-
487
- ## Exemple 1 JSON Output :
488
- """
489
- {output1}
490
- """
491
- `;
492
- const prompt = ChatPromptTemplate.fromMessages([["system", alineasSystemTemplate], ["human", "{input}"]]);
493
- const chain = prompt.pipe(structuredModel);
494
- const alineasPromptResult = await chain.invoke({
495
- input: cleanedHtml,
496
- debut: article.debut,
497
- fin: article.fin,
498
- example1: EXAMPLES.PRJLANR5L16B2424.input,
499
- output1: EXAMPLES.PRJLANR5L16B2424.alineas
500
- });
501
- const ajv = new Ajv();
502
- if (ajv.validate(ALINEA_SCHEMA, alineasPromptResult)) {
503
- runLogFun(console.info("Found ", alineasPromptResult.alineas.length, " alineas for Article ", article.numeroArticle, article.titre), {
504
- silent: SILENT_LOG
505
- });
506
- runLogFun(console.groupEnd(), {
507
- silent: SILENT_LOG
508
- });
509
- delimiter();
510
- return alineasPromptResult.alineas;
511
- }
512
- runLogFun(console.warn("Alineas KO"), {
513
- silent: SILENT_LOG
514
- });
515
- delimiter();
516
- return {};
517
- }
518
-
519
- /**
520
- * Asynchronously extracts the "sommaire" (outline) from the given HTML.
521
- *
522
- * @param {string} html - The HTML string from which to extract the "sommaire".
523
- * @return {Promise<Record<string, any>>} - A Promise that resolves to the extracted "sommaire" object.
524
- * The object contains the following properties:
525
- * - `sommaire`: An array of main division objects, each containing the following properties:
526
- * - `numeroDivision`: The number of the main division.
527
- * - `titre`: The title of the main division.
528
- * - `typeDivision`: The type of content in the main division.
529
- * - `sequence`: The sequence of the main division.
530
- * - `parent`: The parent of the main division.
531
- * @throws {Error} - If the extracted "sommaire" does not validate against the SOMMAIRE_SCHEMA.
532
- */
533
- async function extractSommaire(html) {
534
- let cleanedHtml = cleanHtml(html);
535
- cleanedHtml = removeHtmlTags(cleanedHtml);
536
- const structuredModel = MODEL.withStructuredOutput(SOMMAIRE_SCHEMA);
537
- const sommaireSystemTemplate = `
538
- ${SYSTEM_TEMPLATE}
539
-
540
- # Examples:
541
-
542
- ## Exemple 1 User Input :
543
- """
544
- {example1}
545
- """
546
-
547
- ## Exemple 1 JSON Output :
548
- """
549
- {output1}
550
- """
551
-
552
- ## Exemple 2 User Input :
553
- """
554
- {example2}
555
- """
556
-
557
- ## Exemple 2 JSON Output :
558
- """
559
- {output2}
560
- """
561
- `;
562
- const prompt = ChatPromptTemplate.fromMessages([["system", sommaireSystemTemplate], ["human", "{input}"]]);
563
- const chain = prompt.pipe(structuredModel);
564
- const sommairePromptResult = await chain.invoke({
565
- input: cleanedHtml,
566
- example1: EXAMPLES.PRJLANR5L16B2014.input,
567
- output1: EXAMPLES.PRJLANR5L16B2014.sommaire,
568
- example2: EXAMPLES.PRJLANR5L16B0914.input,
569
- output2: EXAMPLES.PRJLANR5L16B0914.sommaire
570
- });
571
- if (!sommairePromptResult.sommaire) {
572
- runLogFun(console.warn("Aucun sommaire n'a été trouvé ❓"), {
573
- silent: SILENT_LOG
574
- });
575
- delimiter();
59
+ const excludedAlineas = [/^Délibéré en séance publique/, /^Fait le/, /^Le Président,$/, /^Signé/];
60
+ export function parseTexte(assembleeUrl, page) {
61
+ // Repair HTML.
62
+ let html = page.replace(/(<style[\w\W]+style>)/g, "");
63
+
64
+ // Extract subdivisions from HTML.
65
+ const {
66
+ window
67
+ } = new JSDOM(html);
68
+ const {
69
+ document
70
+ } = window;
71
+ assert.strictEqual(document.children.length, 1);
72
+ const htmlElement = document.children[0];
73
+ assert.strictEqual(htmlElement.children.length, 2);
74
+ const bodyElement = htmlElement.children[1];
75
+ if (bodyElement.children.length < 3) {
76
+ // Occurs in http://www.assemblee-nationale.fr/15/textes/0326.asp.
77
+ window.close(); // Free memory.
576
78
  return {
577
- sommaire: []
79
+ error: {
80
+ code: -1,
81
+ message: "Texte de loi sans contenu"
82
+ },
83
+ html,
84
+ page
578
85
  };
579
86
  }
580
- runLogFun(console.group("Sommaire"), {
581
- silent: SILENT_LOG
582
- });
583
- runLogFun(console.log("Found ", sommairePromptResult.sommaire.length, " main divisions (level 1)."), {
584
- silent: SILENT_LOG
585
- });
586
- runLogFun(console.table(sommairePromptResult.sommaire.map(item => [item.sequence, item.titre, item.typeDivision, item.parent])), {
587
- silent: SILENT_LOG
588
- });
589
- runLogFun(console.groupEnd(), {
590
- silent: SILENT_LOG
591
- });
592
- const ajv = new Ajv();
593
- if (ajv.validate(SOMMAIRE_SCHEMA, sommairePromptResult)) {
594
- runLogFun(console.info("Sommaire OK 👌"), {
595
- silent: SILENT_LOG
596
- });
597
- delimiter();
598
- return sommairePromptResult.sommaire;
87
+ assert.strictEqual(bodyElement.children[0].tagName, "DIV");
88
+ // First child is a DIV describing the document (Assemblée's header). Skip it for now.
89
+ let bodyChild = bodyElement.children[1];
90
+ assert.strictEqual(bodyChild.tagName, "BR");
91
+ let alineaElement = null;
92
+ let isMultiLinesHeader = false;
93
+ let level = null;
94
+ let levels = [];
95
+ let nextParentState = null;
96
+ let state = "nextBodyChild";
97
+ const subdivisions = [];
98
+ let subdivisionAlineas = null;
99
+ let subdivisionHeaders = null;
100
+ while (state !== null) {
101
+ switch (state) {
102
+ case "nextBodyChild":
103
+ if (bodyChild.nextElementSibling === null) {
104
+ // The document has been fully parsed.
105
+ state = null;
106
+ } else {
107
+ bodyChild = bodyChild.nextElementSibling;
108
+ if (bodyChild.tagName === "DIV") {
109
+ alineaElement = bodyChild.children[0];
110
+ if (alineaElement === undefined || bodyChild.id.includes("ftn")) {
111
+ // No alinea in current bodyChild: go to next bodyChild.
112
+ state = "nextBodyChild";
113
+ } else {
114
+ state = "alineaElement";
115
+ }
116
+ } else {
117
+ // <P/> is for footnotes
118
+ assert(["BR", "HR", "P"].includes(bodyChild.tagName), `Unexpected tag name "${bodyChild.tagName}" for body child`);
119
+ // Stay in the same state to go to next bodyChild.
120
+ }
121
+ }
122
+ break;
123
+ case "alineaElement":
124
+ switch (alineaElement.tagName) {
125
+ case "DIV":
126
+ state = "nextAlineaElement";
127
+ break;
128
+ case "H4":
129
+ // TODO
130
+ nextParentState = "nextAlineaElement";
131
+ state = "firstParagraphChild";
132
+ break;
133
+ case "P":
134
+ nextParentState = "nextAlineaElement";
135
+ state = "firstParagraphChild";
136
+ break;
137
+ case "OL":
138
+ // TODO
139
+ state = "nextAlineaElement";
140
+ break;
141
+ case "TABLE":
142
+ nextParentState = "nextAlineaElement";
143
+ state = "firstParagraphChild";
144
+ break;
145
+ default:
146
+ return {
147
+ error: {
148
+ code: -2,
149
+ message: `Unexpected tag name for alinea element: ${alineaElement.tagName}`
150
+ },
151
+ html,
152
+ page
153
+ };
154
+ }
155
+ break;
156
+ case "firstParagraphChild":
157
+ const headerText = alineaElement.textContent;
158
+ const nameComputed = (headerText || "").normalize("NFD").replace(/[\u0300-\u036f]/g, "").replace(/\(nouveau\)/, "").replace(/\(Pour coordination\)/, "").replace(/\(Supprimés?\)/, "").replace(/ /g, " ").replace(/[\-,.…]/g, "").trim().replace(/ {1,}/g, "_");
159
+ const nameUpper = nameComputed.toUpperCase();
160
+ const [nextLevel, paragraphType] = parseHeader(nameUpper);
161
+ if (!nameUpper || nameUpper.match(/^(PROJET|PROPOSITION)_DE_LOI(_|$)/) !== null) {
162
+ // Occurs in:
163
+ // * http://www.assemblee-nationale.fr/15/textes/0232.asp
164
+ // * http://www.assemblee-nationale.fr/15/textes/0626.asp
165
+ // * http://www.assemblee-nationale.fr/15/textes/0676.asp
166
+ state = nextParentState;
167
+ break;
168
+ } else if (nameUpper === "JEUX_OLYMPIQUES_ET_PARALYMPIQUES_DE_2024") {
169
+ // Occurs in http://www.assemblee-nationale.fr/15/textes/0676.asp
170
+ state = nextParentState;
171
+ break;
172
+ } else if (nameUpper === "TEXTE_DE_LA_COMMISSION_MIXTE_PARITAIRE") {
173
+ // Occurs in http://www.assemblee-nationale.fr/15/textes/1294.asp
174
+ state = nextParentState;
175
+ break;
176
+ }
177
+ const lineHtml = cleanText(alineaElement.outerHTML);
178
+ const lineText = cleanText(alineaElement.textContent);
179
+ if (nextLevel === null) {
180
+ // It is a regular alinea
181
+ // Exclude some alineas
182
+ let excludeAlinea = false;
183
+ for (let regex of excludedAlineas) {
184
+ if (regex.test(lineText)) {
185
+ excludeAlinea = true;
186
+ }
187
+ }
188
+ if (excludeAlinea) {
189
+ state = nextParentState;
190
+ break;
191
+ }
192
+ if (subdivisionHeaders.length === 0 || isMultiLinesHeader && subdivisionAlineas.length === 0) {
193
+ subdivisionHeaders.push({
194
+ texte: lineText,
195
+ html: lineHtml
196
+ });
197
+ } else {
198
+ subdivisionAlineas.push({
199
+ texte: lineText,
200
+ html: lineHtml
201
+ });
202
+ }
203
+ if (isMultiLinesHeader && subdivisionHeaders.length >= 2) {
204
+ isMultiLinesHeader = false;
205
+ }
206
+ // Skip paragraph.
207
+ state = nextParentState;
208
+ break;
209
+ } else if (nextLevel !== null) {
210
+ // It is a header
211
+ level = nextLevel;
212
+ while (levels.length > 0 && level < levels[levels.length - 1]) {
213
+ levels.pop();
214
+ }
215
+ if (levels.length === 0 || level > levels[levels.length - 1]) {
216
+ levels.push(level);
217
+ }
218
+ subdivisionAlineas = [];
219
+ subdivisionHeaders = [{
220
+ texte: lineText,
221
+ html: lineHtml
222
+ }];
223
+
224
+ // Articles & "Exposé des motifs" are the only divisions without second title.
225
+ isMultiLinesHeader = nameUpper.match(/^ARTICLES?_/) === null && nameUpper.match(/^EXPOSE_DES_MOTIFS$/) === null;
226
+ subdivisions.push({
227
+ id: "D_" + nameComputed,
228
+ type: paragraphType,
229
+ niveau: level + 1,
230
+ niveauRelatif: levels.length,
231
+ titres: subdivisionHeaders,
232
+ alineas: subdivisionAlineas
233
+ });
234
+ }
235
+ state = nextParentState;
236
+ break;
237
+ case "nextAlineaElement":
238
+ alineaElement = alineaElement.nextElementSibling;
239
+ if (alineaElement === null) {
240
+ // The bodyChild has been fully parsed. Go to next bodyChild
241
+ state = "nextBodyChild";
242
+ } else {
243
+ state = "alineaElement";
244
+ }
245
+ break;
246
+ default:
247
+ throw `Unexpected state: ${state}`;
248
+ }
599
249
  }
600
- runLogFun(console.warn("La construction du sommaire est KO ❌"), {
601
- silent: SILENT_LOG
602
- });
603
- delimiter();
250
+ window.close(); // Free memory.
251
+
604
252
  return {
605
- sommaire: []
253
+ error: null,
254
+ html,
255
+ page,
256
+ subdivisions,
257
+ url: assembleeUrl
606
258
  };
607
259
  }
608
- //# sourceMappingURL=data:application/json;charset=utf-8;base64,{"version":3,"names":["ChatOpenAI","ChatPromptTemplate","Ajv","delimiter","runLogFun","ALINEA_SCHEMA","require","EXPOSE_MOTIFS_SCHEMA","META_DONNEES_SCHEMA","PROJET_LOI_SCHEMA","SOMMAIRE_SCHEMA","EXAMPLES","PRJLANR5L16B0914","motifs","input","loi","meta","sommaire","PRJLANR5L16B2014","PRJLANR5L16B2424","alineas","FULL_TEXTE_SCHEMA","$schema","title","description","type","properties","texte","required","MODEL","process","env","OPENAI_API_KEY","temperature","topP","model","apiKey","maxTokens","SYSTEM_TEMPLATE","SILENT_LOG","parseTexteLoi","html","options","silent","console","info","extractMetaDonnees","extractSommaire","partialMotifs","extractMotifs","partialLoi","extractProjetLoi","exposeMotifs","populateMotifs","populateProjetLoi","cleanHtml","cleanedHtml","replace","removeHtmlTags","htmlTagRegex","textWithoutTags","debut","fin","systemTemplate","prompt","fromMessages","structuredModel","withStructuredOutput","chain","pipe","articlePromptResult","invoke","partial","group","articles","fullArticles","article","alinea","alineaPromptResult","push","projetLoi","groupEnd","motifsSystemTemplate","motifsPromptResult","example1","example2","output1","output2","table","substring","ajv","validate","warn","metaDonneesSystemTemplate","result","documentRef","titre","numeroTexte","procedureAcceleree","projetLoiSystemTemplate","articlesPromptResult","log","length","extractAlineas","map","numeroArticle","alineasSystemTemplate","alineasPromptResult","sommaireSystemTemplate","sommairePromptResult","item","sequence","typeDivision","parent"],"sources":["../../src/parsers/textes_lois.ts"],"sourcesContent":["import { ChatOpenAI } from \"@langchain/openai\"\nimport { ChatPromptTemplate } from \"@langchain/core/prompts\"\nimport Ajv from \"ajv\"\n\nimport { delimiter, runLogFun } from \"../logger\"\n\nconst ALINEA_SCHEMA = require(\"../schemas/texte_loi/Alinea.json\")\nconst EXPOSE_MOTIFS_SCHEMA = require(\"../schemas/texte_loi/ExposeMotifs.json\")\nconst META_DONNEES_SCHEMA = require(\"../schemas/texte_loi/MetaDonnees.json\")\nconst PROJET_LOI_SCHEMA = require(\"../schemas/texte_loi/ProjetLoi.json\")\nconst SOMMAIRE_SCHEMA = require(\"../schemas/texte_loi/Sommaire.json\")\n\n/**\n * Object containing input examples and their output schemas.\n * Used to train the LLM.\n * @type {Record<string, {motifs: any, input: any, loi: any, meta: any, sommaire: any}>}\n */\nconst EXAMPLES = {\n  PRJLANR5L16B0914: {\n    motifs: require(\"../examples/PRJLANR5L16B0914/motifs.json\"),\n    input: require(\"../examples/PRJLANR5L16B0914/input\"),\n    loi: require(\"../examples/PRJLANR5L16B0914/loi.json\"),\n    meta: require(\"../examples/PRJLANR5L16B0914/meta.json\"),\n    sommaire: require(\"../examples/PRJLANR5L16B0914/sommaire.json\"),\n  },\n  PRJLANR5L16B2014: {\n    motifs: require(\"../examples/PRJLANR5L16B2014/motifs.json\"),\n    input: require(\"../examples/PRJLANR5L16B2014/input\"),\n    loi: require(\"../examples/PRJLANR5L16B2014/loi.json\"),\n    meta: require(\"../examples/PRJLANR5L16B2014/meta.json\"),\n    sommaire: require(\"../examples/PRJLANR5L16B2014/sommaire.json\"),\n  },\n  PRJLANR5L16B2424: {\n    alineas: require(\"../examples/PRJLANR5L16B2424/alineas.json\"),\n    motifs: require(\"../examples/PRJLANR5L16B2424/motifs.json\"),\n    input: require(\"../examples/PRJLANR5L16B2424/input\"),\n    loi: require(\"../examples/PRJLANR5L16B2424/loi.json\"),\n    meta: require(\"../examples/PRJLANR5L16B2424/meta.json\"),\n    sommaire: require(\"../examples/PRJLANR5L16B2424/sommaire.json\"),\n  },\n}\n\nconst FULL_TEXTE_SCHEMA = {\n  $schema: \"http://json-schema.org/draft-07/schema#\",\n  title: \"Sommaire\",\n  description: \"Récupération d'un texte complet\",\n  type: \"object\",\n  properties: {\n    texte: {\n      description: \"Texte complet de la section indiquée.\",\n      type: \"string\",\n    },\n  },\n  required: [\"texte\"],\n}\n\n/**\n * LLM model.\n */\nlet MODEL: any = null\n\nif (process.env.OPENAI_API_KEY) {\n  MODEL = new ChatOpenAI({\n    temperature: 0,\n    topP: 0.0,\n    model: \"gpt-4o-mini\",\n    apiKey: process.env.OPENAI_API_KEY,\n    maxTokens: -1,\n  })\n}\n\n/**\n * System input template for the LLM.\n */\nconst SYSTEM_TEMPLATE: string = `\n  # Instructions :\n  \n  Tu es un expert de la loi et de l'analyse de pages HTML.\n  Tu dois analyser un texte de loi afin de séparer et de structurer les éléments qui le composent. Le format d'entrée est HTML.\n\n  Le texte est divisé en deux parties :\n  A) un exposé des motifs qui indique les raisons pour lesquelles ce projet est soumis au Parlement. Il peut contenir 3 éléments : 1) une citation au début d'un document, 2) le texte principal qui développe les arguments de l'auteur à l'appui de la modification législative ou des dispositions nouvelles qu'il propose, 3) des paragraphes qui présentent les articles.\n  B) le dispositif qui est la partie normative et rédigée en articles. Il peut comporter des divisions nommées titres, chapitres, sections, sous-sections. Lorsqu'il y a un seul niveau de structure, on retrouvera le chapitre ; s’il y a deux niveaux de structure, on retrouvera des chapitres puis des sections. S’il y a trois niveaux de structure, on retrouvera des titres puis des chapitres puis des sections.\n\n  Si le texte de loi ne comporte pas de division, le sommaire doit rester vide.\n  Toutes les lignes qui débutent par un '«' ne sont pas à prendre en compte dans les divisions\n  Ta réponse doit être un JSON structuré avec les éléments du texte de loi en respectant le schéma JSON fourni.\n`\n\nlet SILENT_LOG = false\n\n/**\n * Parses the given HTML string and returns the parsed textes loi.\n *\n * @param {string} html - The HTML string to parse.\n * @return {Promise<{partial: {sommaire: any, loi: any, motifs: any, meta: any}, completed: {sommaire: any, loi: any, motifs: any, meta: any}}>} - The parsed textes loi, including the partial and completed versions.\n */\nexport async function parseTexteLoi(html: string, options = { silent: false }) {\n  SILENT_LOG = options.silent\n\n  runLogFun(console.info(\"Start extracting loi...\"), options)\n  const meta = await extractMetaDonnees(html)\n  const sommaire = await extractSommaire(html)\n  const partialMotifs: any = await extractMotifs(html)\n  const partialLoi: any = await extractProjetLoi(html)\n  const exposeMotifs = await populateMotifs(partialMotifs, html)\n  const loi = await populateProjetLoi(partialLoi, html)\n\n  return {\n    sommaire,\n    loi,\n    exposeMotifs,\n    meta,\n  }\n}\n\n/**\n * Cleans the given HTML string by removing unnecessary tags and content.\n *\n * @param {string} html - The HTML string to be cleaned.\n * @return {string} The cleaned HTML string.\n */\nfunction cleanHtml(html: string) {\n  let cleanedHtml = html.replace(/(<style[\\w\\W]+style>)/g, \"\")\n  cleanedHtml = cleanedHtml.replace(/(<head[\\w\\W]+head>)/g, \"\")\n  cleanedHtml = cleanedHtml.replace(/<img[^>]*>/g, \"\")\n\n  return cleanedHtml\n}\n\n/**\n * Removes all HTML tags from the given HTML string and returns the cleaned text.\n *\n * @param {string} html - The HTML string from which to remove tags.\n * @return {string} The cleaned text without any HTML tags.\n */\nfunction removeHtmlTags(html: string): string {\n  // Regular expression to match HTML tags\n  const htmlTagRegex = /<[^>]*>/g\n\n  // Replace all HTML tags with an empty string\n  const textWithoutTags = html.replace(htmlTagRegex, \"\")\n  return textWithoutTags\n}\n\n/**\n * Populates the motifs object with the extracted text from the given HTML.\n *\n * @param {Record<string, any>} motifs - The motifs object to be populated.\n * @param {string} html - The HTML string from which to extract the text.\n * @return {Promise<Record<string, any>>} - The populated motifs object.\n */\nasync function populateMotifs(motifs: Record<string, any>, html: string) {\n  runLogFun(console.info(\"Populating expose motifs...\"))\n  let cleanedHtml = cleanHtml(html)\n\n  if (motifs.debut === motifs.fin || !motifs.fin) {\n    motifs.texte = motifs.debut\n  } else {\n    const systemTemplate = `\n      # Instructions :\n\n      Tu es un expert de la loi et de l'analyse de pages HTML.\n      Tu dois analyser un texte de loi afin d'en extraire certaines parties. \n      Le format d'entrée est du HTML. Le format de sortie est du texte.\n      Récupère l'intégralité du texte commençant par :\n\n      \"\"\"\n      {debut}\n      \"\"\"\n\n      Et finissant par :\n\n      \"\"\"\n      {fin}\n      \"\"\"\n    `\n\n    const prompt = ChatPromptTemplate.fromMessages([\n      [\"system\", systemTemplate],\n      [\"human\", \"{input}\"],\n    ])\n\n    const structuredModel = MODEL.withStructuredOutput(FULL_TEXTE_SCHEMA)\n    const chain = prompt.pipe(structuredModel)\n\n    const articlePromptResult: any = await chain.invoke({\n      input: cleanedHtml,\n      debut: motifs.debut,\n      fin: motifs.fin,\n    })\n\n    motifs.texte = articlePromptResult.texte\n  }\n\n  runLogFun(\n    console.info(\"Opération de peuplement de l'exposé des motifs terminée ✅\"),\n    { silent: SILENT_LOG },\n  )\n  delimiter(SILENT_LOG)\n\n  delete motifs.debut\n  delete motifs.fin\n  return motifs\n}\n\n/**\n * Asynchronously populates a projet loi object with the full text of each article and alinea in the given HTML.\n *\n * @param {Record<string, any>} partial - The partial projet loi object to be populated.\n * @param {string} html - The HTML containing the text of the articles and alineas.\n * @return {Promise<Record<string, any>>} A Promise that resolves to the fully populated projet loi object.\n */\nasync function populateProjetLoi(partial: Record<string, any>, html: string) {\n  runLogFun(console.group(\"Peuplement du projet loi...\"), {\n    silent: SILENT_LOG,\n  })\n  let cleanedHtml = cleanHtml(html)\n\n  const articles: Record<string, any>[] = partial.articles\n  const fullArticles: Record<string, any>[] = []\n\n  const systemTemplate = `\n    # Instructions :\n\n    Tu es un expert de la loi et de l'analyse de pages HTML.\n    Tu dois analyser un texte de loi afin d'en extraire certaines parties. \n    Le format d'entrée est du HTML. Le format de sortie est du texte.\n    Récupère l'intégralité du texte commençant par :\n\n    \"\"\"\n    {debut}\n    \"\"\"\n\n    Et finissant par :\n\n    \"\"\"\n    {fin}\n    \"\"\"\n  `\n\n  const prompt = ChatPromptTemplate.fromMessages([\n    [\"system\", systemTemplate],\n    [\"human\", \"{input}\"],\n  ])\n\n  const structuredModel = MODEL.withStructuredOutput(FULL_TEXTE_SCHEMA)\n  const chain = prompt.pipe(structuredModel)\n\n  for (const article of articles) {\n    if (article.debut === article.fin || !article.fin) {\n      article.texte = article.debut\n    } else {\n      const articlePromptResult: any = await chain.invoke({\n        input: cleanedHtml,\n        debut: article.debut,\n        fin: article.fin,\n      })\n\n      article.texte = articlePromptResult.texte\n    }\n\n    for (const alinea of article.alineas) {\n      if (alinea.debut === alinea.fin || !alinea.fin) {\n        alinea.texte = alinea.debut\n        continue\n      }\n\n      const alineaPromptResult: any = await chain.invoke({\n        input: article.texte,\n        debut: alinea.debut,\n        fin: alinea.fin,\n      })\n\n      delete alinea.debut\n      delete alinea.fin\n      alinea.texte = alineaPromptResult.texte\n    }\n\n    runLogFun(\n      console.info(\n        \"Opération de peuplement du contenu du projet de loi terminée ✅\",\n      ),\n      { silent: SILENT_LOG },\n    )\n    delete article.debut\n    delete article.fin\n    fullArticles.push(article)\n  }\n\n  const projetLoi = {\n    ...partial,\n    articles: fullArticles,\n  }\n\n  runLogFun(console.groupEnd(), { silent: SILENT_LOG })\n  delimiter()\n  return projetLoi\n}\n\n/**\n * Asynchronously extracts the \"expose motifs\" from the given HTML.\n *\n * @param {string} html - The HTML string from which to extract the \"expose motifs\".\n * @return {Promise<Record<string, any>>} - A Promise that resolves to the extracted \"expose motifs\" object.\n * The object contains the following properties:\n * - `debut`: The beginning of the \"expose motifs\".\n * - `fin`: The end of the \"expose motifs\".\n * @throws {Error} - If the extracted \"expose motifs\" do not validate against the EXPOSE_MOTIFS_SCHEMA.\n */\nasync function extractMotifs(html: string) {\n  runLogFun(console.group(\"Parsing expose motifs...\"), { silent: SILENT_LOG })\n  let cleanedHtml = cleanHtml(html)\n  cleanedHtml = removeHtmlTags(cleanedHtml)\n\n  const structuredModel = MODEL.withStructuredOutput(EXPOSE_MOTIFS_SCHEMA)\n\n  const motifsSystemTemplate = `\n    ${SYSTEM_TEMPLATE}\n\n    # Examples: \n\n    ## Exemple 1 User Input :\n    \"\"\"\n    {example1}\n    \"\"\"\n    \n    ## Exemple 1 JSON Output :\n    \"\"\"\n    {output1}\n    \"\"\"\n\n    ## Exemple 2 User Input :\n    \"\"\"\n    {example2}\n    \"\"\"\n    \n    ## Exemple 2 JSON Output :\n    \"\"\"\n    {output2}\n    \"\"\"\n    `\n\n  const prompt = ChatPromptTemplate.fromMessages([\n    [\"system\", motifsSystemTemplate],\n    [\"human\", \"{input}\"],\n  ])\n\n  const chain = prompt.pipe(structuredModel)\n  const motifsPromptResult: any = await chain.invoke({\n    input: cleanedHtml,\n    example1: EXAMPLES.PRJLANR5L16B0914.input,\n    example2: EXAMPLES.PRJLANR5L16B2424.input,\n    output1: EXAMPLES.PRJLANR5L16B0914.motifs,\n    output2: EXAMPLES.PRJLANR5L16B2424.motifs,\n  })\n\n  // console.log(JSON.stringify(motifsPromptResult))\n  runLogFun(\n    console.table([\n      [\"Début\", motifsPromptResult.debut.substring(0, 30)],\n      [\"Fin\", motifsPromptResult.fin.substring(0, 30)],\n    ]),\n    { silent: SILENT_LOG },\n  )\n\n  runLogFun(console.groupEnd(), { silent: SILENT_LOG })\n  delimiter()\n\n  const ajv = new Ajv()\n  if (ajv.validate(EXPOSE_MOTIFS_SCHEMA, motifsPromptResult)) {\n    runLogFun(console.info(\"Exposé des motifs OK 👌\"), { silent: SILENT_LOG })\n    return motifsPromptResult\n  }\n\n  runLogFun(console.warn(\"Exposé des motifs KO ❌\"), { silent: SILENT_LOG })\n  return {}\n}\n\n/**\n * Asynchronously extracts the metadata from the given HTML.\n *\n * @param {string} html - The HTML string from which to extract the metadata.\n * @return {Promise<Record<string, any>>} - A Promise that resolves to the extracted metadata object.\n * The object contains the following properties:\n * - `documentRef`: The reference of the document.\n * - `titre`: The title of the document.\n * - `numeroTexte`: The number of the text.\n * - `procedureAcceleree`: The accelerated procedure.\n * @throws {Error} - If the extracted metadata do not validate against the META_DONNEES_SCHEMA.\n */\nasync function extractMetaDonnees(html: string) {\n  delimiter()\n  let cleanedHtml = cleanHtml(html)\n  cleanedHtml = removeHtmlTags(cleanedHtml)\n\n  const structuredModel = MODEL.withStructuredOutput(META_DONNEES_SCHEMA)\n\n  const metaDonneesSystemTemplate = `\n    ${SYSTEM_TEMPLATE}\n    `\n\n  const prompt = ChatPromptTemplate.fromMessages([\n    [\"system\", metaDonneesSystemTemplate],\n    [\"human\", \"{input}\"],\n  ])\n\n  const chain = prompt.pipe(structuredModel)\n  const result: any = await chain.invoke({\n    input: cleanedHtml,\n    example1: EXAMPLES.PRJLANR5L16B0914.input,\n    example2: EXAMPLES.PRJLANR5L16B2424.input,\n    output1: EXAMPLES.PRJLANR5L16B0914.meta,\n    output2: EXAMPLES.PRJLANR5L16B2424.meta,\n  })\n\n  console.group(\"Meta-données\")\n  console.table([\n    [\"Référence\", result.documentRef],\n    [\"Titre\", result.titre],\n    [\"Numéro du texte\", result.numeroTexte],\n    [\"Procédure accéléré\", result.procedureAcceleree],\n  ])\n  console.groupEnd()\n  delimiter()\n\n  const ajv = new Ajv()\n  if (ajv.validate(META_DONNEES_SCHEMA, result)) {\n    runLogFun(console.warn(\"Meta-données OK 👌\"), { silent: SILENT_LOG })\n    return result\n  }\n\n  runLogFun(console.warn(\"Meta-données non-conforme ❌\"), {\n    silent: SILENT_LOG,\n  })\n  return {}\n}\n\n/**\n * Asynchronously extracts the \"projet de loi\" from the given HTML.\n *\n * @param {string} html - The HTML string from which to extract the \"projet de loi\".\n * @return {Promise<Record<string, any>>} - A Promise that resolves to the extracted \"projet de loi\" object.\n * The object contains the following properties:\n * - `articles`: An array of article objects, each containing the following properties:\n *   - `numeroArticle`: The number of the article.\n *   - `cardinal`: The cardinal of the article.\n *   - `titre`: The title of the article.\n *   - `debut`: The beginning of the article.\n *   - `fin`: The end of the article.\n *   - `alineas`: An array of alineas.\n * @throws {Error} - If the extracted \"projet de loi\" does not validate against the PROJET_LOI_SCHEMA.\n */\nasync function extractProjetLoi(html: string) {\n  runLogFun(console.group(\"Parsing loi...\"), { silent: SILENT_LOG })\n  let cleanedHtml = cleanHtml(html)\n  const structuredModel = MODEL.withStructuredOutput(PROJET_LOI_SCHEMA)\n\n  const projetLoiSystemTemplate = `\n    ${SYSTEM_TEMPLATE}\n\n    # Examples: \n\n    ## Exemple 1 User Input :\n    \"\"\"\n    {example1}\n    \"\"\"\n    \n    ## Exemple 1 JSON Output :\n    \"\"\"\n    {output1}\n    \"\"\"\n\n    ## Exemple 2 User Input :\n    \"\"\"\n    {example2}\n    \"\"\"\n    \n    ## Exemple 2 JSON Output :\n    \"\"\"\n    {output2}\n    \"\"\"\n    `\n\n  const prompt = ChatPromptTemplate.fromMessages([\n    [\"system\", projetLoiSystemTemplate],\n    [\"human\", \"{input}\"],\n  ])\n\n  const chain = prompt.pipe(structuredModel)\n  const articlesPromptResult: any = await chain.invoke({\n    input: cleanedHtml,\n    example1: EXAMPLES.PRJLANR5L16B0914.input,\n    example2: EXAMPLES.PRJLANR5L16B2424.input,\n    output1: EXAMPLES.PRJLANR5L16B0914.loi,\n    output2: EXAMPLES.PRJLANR5L16B2424.loi,\n  })\n\n  runLogFun(\n    console.log(\"Found \", articlesPromptResult.articles.length, \" articles.\"),\n    { silent: SILENT_LOG },\n  )\n\n  for (const article of articlesPromptResult.articles) {\n    article.alineas = await extractAlineas(html, article)\n  }\n\n  runLogFun(\n    console.table(\n      articlesPromptResult.articles.map((article: any) => [\n        article.numeroArticle,\n        article.titre,\n        article.debut.substring(0, 10),\n        article.fin.substring(0, 10),\n        `Nombre d'alineas : ${article.alineas.length}`,\n      ]),\n    ),\n    { silent: SILENT_LOG },\n  )\n  runLogFun(console.groupEnd(), { silent: SILENT_LOG })\n\n  const ajv = new Ajv()\n  if (ajv.validate(PROJET_LOI_SCHEMA, articlesPromptResult)) {\n    runLogFun(console.info(\"Projet de loi OK 👌\"), { silent: SILENT_LOG })\n    delimiter()\n    return articlesPromptResult\n  }\n\n  runLogFun(console.warn(\"Projet de loi KO ❌\"), { silent: SILENT_LOG })\n  delimiter()\n  return {}\n}\n\nasync function extractAlineas(html: string, article: any) {\n  runLogFun(console.group(\"Parsing alineas...\"), { silent: SILENT_LOG })\n  let cleanedHtml = cleanHtml(html)\n\n  const structuredModel = MODEL.withStructuredOutput(ALINEA_SCHEMA)\n\n  const alineasSystemTemplate = `\n    ${SYSTEM_TEMPLATE}\n\n    Extrais tous les alineas de la portion de texte commençant par « {debut} » et finissant par « {fin} ».\n    Un alinéa contiennent toujours une balise « <img ».\n\n    # Examples: \n\n    ## Exemple 1 User Input :\n    \"\"\"\n    {example1}\n    \"\"\"\n    \n    ## Exemple 1 JSON Output :\n    \"\"\"\n    {output1}\n    \"\"\"\n    `\n\n  const prompt = ChatPromptTemplate.fromMessages([\n    [\"system\", alineasSystemTemplate],\n    [\"human\", \"{input}\"],\n  ])\n\n  const chain = prompt.pipe(structuredModel)\n  const alineasPromptResult: any = await chain.invoke({\n    input: cleanedHtml,\n    debut: article.debut,\n    fin: article.fin,\n    example1: EXAMPLES.PRJLANR5L16B2424.input,\n    output1: EXAMPLES.PRJLANR5L16B2424.alineas,\n  })\n\n  const ajv = new Ajv()\n  if (ajv.validate(ALINEA_SCHEMA, alineasPromptResult as any)) {\n    runLogFun(\n      console.info(\n        \"Found \",\n        alineasPromptResult.alineas.length,\n        \" alineas for Article \",\n        article.numeroArticle,\n        article.titre,\n      ),\n      { silent: SILENT_LOG },\n    )\n    runLogFun(console.groupEnd(), { silent: SILENT_LOG })\n    delimiter()\n    return alineasPromptResult.alineas\n  }\n\n  runLogFun(console.warn(\"Alineas KO\"), { silent: SILENT_LOG })\n  delimiter()\n  return {}\n}\n\n/**\n * Asynchronously extracts the \"sommaire\" (outline) from the given HTML.\n *\n * @param {string} html - The HTML string from which to extract the \"sommaire\".\n * @return {Promise<Record<string, any>>} - A Promise that resolves to the extracted \"sommaire\" object.\n * The object contains the following properties:\n * - `sommaire`: An array of main division objects, each containing the following properties:\n *   - `numeroDivision`: The number of the main division.\n *   - `titre`: The title of the main division.\n *   - `typeDivision`: The type of content in the main division.\n *   - `sequence`: The sequence of the main division.\n *   - `parent`: The parent of the main division.\n * @throws {Error} - If the extracted \"sommaire\" does not validate against the SOMMAIRE_SCHEMA.\n */\nasync function extractSommaire(html: string) {\n  let cleanedHtml = cleanHtml(html)\n  cleanedHtml = removeHtmlTags(cleanedHtml)\n\n  const structuredModel = MODEL.withStructuredOutput(SOMMAIRE_SCHEMA)\n\n  const sommaireSystemTemplate = `\n    ${SYSTEM_TEMPLATE}\n\n    # Examples: \n\n    ## Exemple 1 User Input :\n    \"\"\"\n    {example1}\n    \"\"\"\n    \n    ## Exemple 1 JSON Output :\n    \"\"\"\n    {output1}\n    \"\"\"\n\n    ## Exemple 2 User Input :\n    \"\"\"\n    {example2}\n    \"\"\"\n    \n    ## Exemple 2 JSON Output :\n    \"\"\"\n    {output2}\n    \"\"\"\n    `\n\n  const prompt = ChatPromptTemplate.fromMessages([\n    [\"system\", sommaireSystemTemplate],\n    [\"human\", \"{input}\"],\n  ])\n\n  const chain = prompt.pipe(structuredModel)\n  const sommairePromptResult: any = await chain.invoke({\n    input: cleanedHtml,\n    example1: EXAMPLES.PRJLANR5L16B2014.input,\n    output1: EXAMPLES.PRJLANR5L16B2014.sommaire,\n    example2: EXAMPLES.PRJLANR5L16B0914.input,\n    output2: EXAMPLES.PRJLANR5L16B0914.sommaire,\n  })\n\n  if (!sommairePromptResult.sommaire) {\n    runLogFun(console.warn(\"Aucun sommaire n'a été trouvé ❓\"), {\n      silent: SILENT_LOG,\n    })\n    delimiter()\n    return { sommaire: [] }\n  }\n\n  runLogFun(console.group(\"Sommaire\"), { silent: SILENT_LOG })\n  runLogFun(\n    console.log(\n      \"Found \",\n      sommairePromptResult.sommaire.length,\n      \" main divisions (level 1).\",\n    ),\n    { silent: SILENT_LOG },\n  )\n  runLogFun(\n    console.table(\n      sommairePromptResult.sommaire.map((item: any) => [\n        item.sequence,\n        item.titre,\n        item.typeDivision,\n        item.parent,\n      ]),\n    ),\n    { silent: SILENT_LOG },\n  )\n  runLogFun(console.groupEnd(), { silent: SILENT_LOG })\n\n  const ajv = new Ajv()\n  if (ajv.validate(SOMMAIRE_SCHEMA, sommairePromptResult as any)) {\n    runLogFun(console.info(\"Sommaire OK 👌\"), { silent: SILENT_LOG })\n    delimiter()\n    return sommairePromptResult.sommaire\n  }\n\n  runLogFun(console.warn(\"La construction du sommaire est KO ❌\"), {\n    silent: SILENT_LOG,\n  })\n  delimiter()\n  return { sommaire: [] }\n}\n"],"mappings":"AAAA,SAASA,UAAU,QAAQ,mBAAmB;AAC9C,SAASC,kBAAkB,QAAQ,yBAAyB;AAC5D,OAAOC,GAAG,MAAM,KAAK;AAAA,SAEZC,SAAS,EAAEC,SAAS;AAE7B,MAAMC,aAAa,GAAGC,OAAO,CAAC,kCAAkC,CAAC;AACjE,MAAMC,oBAAoB,GAAGD,OAAO,CAAC,wCAAwC,CAAC;AAC9E,MAAME,mBAAmB,GAAGF,OAAO,CAAC,uCAAuC,CAAC;AAC5E,MAAMG,iBAAiB,GAAGH,OAAO,CAAC,qCAAqC,CAAC;AACxE,MAAMI,eAAe,GAAGJ,OAAO,CAAC,oCAAoC,CAAC;;AAErE;AACA;AACA;AACA;AACA;AACA,MAAMK,QAAQ,GAAG;EACfC,gBAAgB,EAAE;IAChBC,MAAM,EAAEP,OAAO,CAAC,0CAA0C,CAAC;IAC3DQ,KAAK,EAAER,OAAO,CAAC,oCAAoC,CAAC;IACpDS,GAAG,EAAET,OAAO,CAAC,uCAAuC,CAAC;IACrDU,IAAI,EAAEV,OAAO,CAAC,wCAAwC,CAAC;IACvDW,QAAQ,EAAEX,OAAO,CAAC,4CAA4C;EAChE,CAAC;EACDY,gBAAgB,EAAE;IAChBL,MAAM,EAAEP,OAAO,CAAC,0CAA0C,CAAC;IAC3DQ,KAAK,EAAER,OAAO,CAAC,oCAAoC,CAAC;IACpDS,GAAG,EAAET,OAAO,CAAC,uCAAuC,CAAC;IACrDU,IAAI,EAAEV,OAAO,CAAC,wCAAwC,CAAC;IACvDW,QAAQ,EAAEX,OAAO,CAAC,4CAA4C;EAChE,CAAC;EACDa,gBAAgB,EAAE;IAChBC,OAAO,EAAEd,OAAO,CAAC,2CAA2C,CAAC;IAC7DO,MAAM,EAAEP,OAAO,CAAC,0CAA0C,CAAC;IAC3DQ,KAAK,EAAER,OAAO,CAAC,oCAAoC,CAAC;IACpDS,GAAG,EAAET,OAAO,CAAC,uCAAuC,CAAC;IACrDU,IAAI,EAAEV,OAAO,CAAC,wCAAwC,CAAC;IACvDW,QAAQ,EAAEX,OAAO,CAAC,4CAA4C;EAChE;AACF,CAAC;AAED,MAAMe,iBAAiB,GAAG;EACxBC,OAAO,EAAE,yCAAyC;EAClDC,KAAK,EAAE,UAAU;EACjBC,WAAW,EAAE,iCAAiC;EAC9CC,IAAI,EAAE,QAAQ;EACdC,UAAU,EAAE;IACVC,KAAK,EAAE;MACLH,WAAW,EAAE,uCAAuC;MACpDC,IAAI,EAAE;IACR;EACF,CAAC;EACDG,QAAQ,EAAE,CAAC,OAAO;AACpB,CAAC;;AAED;AACA;AACA;AACA,IAAIC,KAAU,GAAG,IAAI;AAErB,IAAIC,OAAO,CAACC,GAAG,CAACC,cAAc,EAAE;EAC9BH,KAAK,GAAG,IAAI7B,UAAU,CAAC;IACrBiC,WAAW,EAAE,CAAC;IACdC,IAAI,EAAE,GAAG;IACTC,KAAK,EAAE,aAAa;IACpBC,MAAM,EAAEN,OAAO,CAACC,GAAG,CAACC,cAAc;IAClCK,SAAS,EAAE,CAAC;EACd,CAAC,CAAC;AACJ;;AAEA;AACA;AACA;AACA,MAAMC,eAAuB,GAAI;AACjC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,CAAC;AAED,IAAIC,UAAU,GAAG,KAAK;;AAEtB;AACA;AACA;AACA;AACA;AACA;AACA,OAAO,eAAeC,aAAaA,CAACC,IAAY,EAAEC,OAAO,GAAG;EAAEC,MAAM,EAAE;AAAM,CAAC,EAAE;EAC7EJ,UAAU,GAAGG,OAAO,CAACC,MAAM;EAE3BvC,SAAS,CAACwC,OAAO,CAACC,IAAI,CAAC,yBAAyB,CAAC,EAAEH,OAAO,CAAC;EAC3D,MAAM1B,IAAI,GAAG,MAAM8B,kBAAkB,CAACL,IAAI,CAAC;EAC3C,MAAMxB,QAAQ,GAAG,MAAM8B,eAAe,CAACN,IAAI,CAAC;EAC5C,MAAMO,aAAkB,GAAG,MAAMC,aAAa,CAACR,IAAI,CAAC;EACpD,MAAMS,UAAe,GAAG,MAAMC,gBAAgB,CAACV,IAAI,CAAC;EACpD,MAAMW,YAAY,GAAG,MAAMC,cAAc,CAACL,aAAa,EAAEP,IAAI,CAAC;EAC9D,MAAM1B,GAAG,GAAG,MAAMuC,iBAAiB,CAACJ,UAAU,EAAET,IAAI,CAAC;EAErD,OAAO;IACLxB,QAAQ;IACRF,GAAG;IACHqC,YAAY;IACZpC;EACF,CAAC;AACH;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA,SAASuC,SAASA,CAACd,IAAY,EAAE;EAC/B,IAAIe,WAAW,GAAGf,IAAI,CAACgB,OAAO,CAAC,wBAAwB,EAAE,EAAE,CAAC;EAC5DD,WAAW,GAAGA,WAAW,CAACC,OAAO,CAAC,sBAAsB,EAAE,EAAE,CAAC;EAC7DD,WAAW,GAAGA,WAAW,CAACC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC;EAEpD,OAAOD,WAAW;AACpB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA,SAASE,cAAcA,CAACjB,IAAY,EAAU;EAC5C;EACA,MAAMkB,YAAY,GAAG,UAAU;;EAE/B;EACA,MAAMC,eAAe,GAAGnB,IAAI,CAACgB,OAAO,CAACE,YAAY,EAAE,EAAE,CAAC;EACtD,OAAOC,eAAe;AACxB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAeP,cAAcA,CAACxC,MAA2B,EAAE4B,IAAY,EAAE;EACvErC,SAAS,CAACwC,OAAO,CAACC,IAAI,CAAC,6BAA6B,CAAC,CAAC;EACtD,IAAIW,WAAW,GAAGD,SAAS,CAACd,IAAI,CAAC;EAEjC,IAAI5B,MAAM,CAACgD,KAAK,KAAKhD,MAAM,CAACiD,GAAG,IAAI,CAACjD,MAAM,CAACiD,GAAG,EAAE;IAC9CjD,MAAM,CAACc,KAAK,GAAGd,MAAM,CAACgD,KAAK;EAC7B,CAAC,MAAM;IACL,MAAME,cAAc,GAAI;AAC5B;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,KAAK;IAED,MAAMC,MAAM,GAAG/D,kBAAkB,CAACgE,YAAY,CAAC,CAC7C,CAAC,QAAQ,EAAEF,cAAc,CAAC,EAC1B,CAAC,OAAO,EAAE,SAAS,CAAC,CACrB,CAAC;IAEF,MAAMG,eAAe,GAAGrC,KAAK,CAACsC,oBAAoB,CAAC9C,iBAAiB,CAAC;IACrE,MAAM+C,KAAK,GAAGJ,MAAM,CAACK,IAAI,CAACH,eAAe,CAAC;IAE1C,MAAMI,mBAAwB,GAAG,MAAMF,KAAK,CAACG,MAAM,CAAC;MAClDzD,KAAK,EAAE0C,WAAW;MAClBK,KAAK,EAAEhD,MAAM,CAACgD,KAAK;MACnBC,GAAG,EAAEjD,MAAM,CAACiD;IACd,CAAC,CAAC;IAEFjD,MAAM,CAACc,KAAK,GAAG2C,mBAAmB,CAAC3C,KAAK;EAC1C;EAEAvB,SAAS,CACPwC,OAAO,CAACC,IAAI,CAAC,2DAA2D,CAAC,EACzE;IAAEF,MAAM,EAAEJ;EAAW,CACvB,CAAC;EACDpC,SAAS,CAACoC,UAAU,CAAC;EAErB,OAAO1B,MAAM,CAACgD,KAAK;EACnB,OAAOhD,MAAM,CAACiD,GAAG;EACjB,OAAOjD,MAAM;AACf;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAeyC,iBAAiBA,CAACkB,OAA4B,EAAE/B,IAAY,EAAE;EAC3ErC,SAAS,CAACwC,OAAO,CAAC6B,KAAK,CAAC,6BAA6B,CAAC,EAAE;IACtD9B,MAAM,EAAEJ;EACV,CAAC,CAAC;EACF,IAAIiB,WAAW,GAAGD,SAAS,CAACd,IAAI,CAAC;EAEjC,MAAMiC,QAA+B,GAAGF,OAAO,CAACE,QAAQ;EACxD,MAAMC,YAAmC,GAAG,EAAE;EAE9C,MAAMZ,cAAc,GAAI;AAC1B;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,GAAG;EAED,MAAMC,MAAM,GAAG/D,kBAAkB,CAACgE,YAAY,CAAC,CAC7C,CAAC,QAAQ,EAAEF,cAAc,CAAC,EAC1B,CAAC,OAAO,EAAE,SAAS,CAAC,CACrB,CAAC;EAEF,MAAMG,eAAe,GAAGrC,KAAK,CAACsC,oBAAoB,CAAC9C,iBAAiB,CAAC;EACrE,MAAM+C,KAAK,GAAGJ,MAAM,CAACK,IAAI,CAACH,eAAe,CAAC;EAE1C,KAAK,MAAMU,OAAO,IAAIF,QAAQ,EAAE;IAC9B,IAAIE,OAAO,CAACf,KAAK,KAAKe,OAAO,CAACd,GAAG,IAAI,CAACc,OAAO,CAACd,GAAG,EAAE;MACjDc,OAAO,CAACjD,KAAK,GAAGiD,OAAO,CAACf,KAAK;IAC/B,CAAC,MAAM;MACL,MAAMS,mBAAwB,GAAG,MAAMF,KAAK,CAACG,MAAM,CAAC;QAClDzD,KAAK,EAAE0C,WAAW;QAClBK,KAAK,EAAEe,OAAO,CAACf,KAAK;QACpBC,GAAG,EAAEc,OAAO,CAACd;MACf,CAAC,CAAC;MAEFc,OAAO,CAACjD,KAAK,GAAG2C,mBAAmB,CAAC3C,KAAK;IAC3C;IAEA,KAAK,MAAMkD,MAAM,IAAID,OAAO,CAACxD,OAAO,EAAE;MACpC,IAAIyD,MAAM,CAAChB,KAAK,KAAKgB,MAAM,CAACf,GAAG,IAAI,CAACe,MAAM,CAACf,GAAG,EAAE;QAC9Ce,MAAM,CAAClD,KAAK,GAAGkD,MAAM,CAAChB,KAAK;QAC3B;MACF;MAEA,MAAMiB,kBAAuB,GAAG,MAAMV,KAAK,CAACG,MAAM,CAAC;QACjDzD,KAAK,EAAE8D,OAAO,CAACjD,KAAK;QACpBkC,KAAK,EAAEgB,MAAM,CAAChB,KAAK;QACnBC,GAAG,EAAEe,MAAM,CAACf;MACd,CAAC,CAAC;MAEF,OAAOe,MAAM,CAAChB,KAAK;MACnB,OAAOgB,MAAM,CAACf,GAAG;MACjBe,MAAM,CAAClD,KAAK,GAAGmD,kBAAkB,CAACnD,KAAK;IACzC;IAEAvB,SAAS,CACPwC,OAAO,CAACC,IAAI,CACV,gEACF,CAAC,EACD;MAAEF,MAAM,EAAEJ;IAAW,CACvB,CAAC;IACD,OAAOqC,OAAO,CAACf,KAAK;IACpB,OAAOe,OAAO,CAACd,GAAG;IAClBa,YAAY,CAACI,IAAI,CAACH,OAAO,CAAC;EAC5B;EAEA,MAAMI,SAAS,GAAG;IAChB,GAAGR,OAAO;IACVE,QAAQ,EAAEC;EACZ,CAAC;EAEDvE,SAAS,CAACwC,OAAO,CAACqC,QAAQ,CAAC,CAAC,EAAE;IAAEtC,MAAM,EAAEJ;EAAW,CAAC,CAAC;EACrDpC,SAAS,CAAC,CAAC;EACX,OAAO6E,SAAS;AAClB;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe/B,aAAaA,CAACR,IAAY,EAAE;EACzCrC,SAAS,CAACwC,OAAO,CAAC6B,KAAK,CAAC,0BAA0B,CAAC,EAAE;IAAE9B,MAAM,EAAEJ;EAAW,CAAC,CAAC;EAC5E,IAAIiB,WAAW,GAAGD,SAAS,CAACd,IAAI,CAAC;EACjCe,WAAW,GAAGE,cAAc,CAACF,WAAW,CAAC;EAEzC,MAAMU,eAAe,GAAGrC,KAAK,CAACsC,oBAAoB,CAAC5D,oBAAoB,CAAC;EAExE,MAAM2E,oBAAoB,GAAI;AAChC,MAAM5C,eAAgB;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,KAAK;EAEH,MAAM0B,MAAM,GAAG/D,kBAAkB,CAACgE,YAAY,CAAC,CAC7C,CAAC,QAAQ,EAAEiB,oBAAoB,CAAC,EAChC,CAAC,OAAO,EAAE,SAAS,CAAC,CACrB,CAAC;EAEF,MAAMd,KAAK,GAAGJ,MAAM,CAACK,IAAI,CAACH,eAAe,CAAC;EAC1C,MAAMiB,kBAAuB,GAAG,MAAMf,KAAK,CAACG,MAAM,CAAC;IACjDzD,KAAK,EAAE0C,WAAW;IAClB4B,QAAQ,EAAEzE,QAAQ,CAACC,gBAAgB,CAACE,KAAK;IACzCuE,QAAQ,EAAE1E,QAAQ,CAACQ,gBAAgB,CAACL,KAAK;IACzCwE,OAAO,EAAE3E,QAAQ,CAACC,gBAAgB,CAACC,MAAM;IACzC0E,OAAO,EAAE5E,QAAQ,CAACQ,gBAAgB,CAACN;EACrC,CAAC,CAAC;;EAEF;EACAT,SAAS,CACPwC,OAAO,CAAC4C,KAAK,CAAC,CACZ,CAAC,OAAO,EAAEL,kBAAkB,CAACtB,KAAK,CAAC4B,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EACpD,CAAC,KAAK,EAAEN,kBAAkB,CAACrB,GAAG,CAAC2B,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CACjD,CAAC,EACF;IAAE9C,MAAM,EAAEJ;EAAW,CACvB,CAAC;EAEDnC,SAAS,CAACwC,OAAO,CAACqC,QAAQ,CAAC,CAAC,EAAE;IAAEtC,MAAM,EAAEJ;EAAW,CAAC,CAAC;EACrDpC,SAAS,CAAC,CAAC;EAEX,MAAMuF,GAAG,GAAG,IAAIxF,GAAG,CAAC,CAAC;EACrB,IAAIwF,GAAG,CAACC,QAAQ,CAACpF,oBAAoB,EAAE4E,kBAAkB,CAAC,EAAE;IAC1D/E,SAAS,CAACwC,OAAO,CAACC,IAAI,CAAC,yBAAyB,CAAC,EAAE;MAAEF,MAAM,EAAEJ;IAAW,CAAC,CAAC;IAC1E,OAAO4C,kBAAkB;EAC3B;EAEA/E,SAAS,CAACwC,OAAO,CAACgD,IAAI,CAAC,wBAAwB,CAAC,EAAE;IAAEjD,MAAM,EAAEJ;EAAW,CAAC,CAAC;EACzE,OAAO,CAAC,CAAC;AACX;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAeO,kBAAkBA,CAACL,IAAY,EAAE;EAC9CtC,SAAS,CAAC,CAAC;EACX,IAAIqD,WAAW,GAAGD,SAAS,CAACd,IAAI,CAAC;EACjCe,WAAW,GAAGE,cAAc,CAACF,WAAW,CAAC;EAEzC,MAAMU,eAAe,GAAGrC,KAAK,CAACsC,oBAAoB,CAAC3D,mBAAmB,CAAC;EAEvE,MAAMqF,yBAAyB,GAAI;AACrC,MAAMvD,eAAgB;AACtB,KAAK;EAEH,MAAM0B,MAAM,GAAG/D,kBAAkB,CAACgE,YAAY,CAAC,CAC7C,CAAC,QAAQ,EAAE4B,yBAAyB,CAAC,EACrC,CAAC,OAAO,EAAE,SAAS,CAAC,CACrB,CAAC;EAEF,MAAMzB,KAAK,GAAGJ,MAAM,CAACK,IAAI,CAACH,eAAe,CAAC;EAC1C,MAAM4B,MAAW,GAAG,MAAM1B,KAAK,CAACG,MAAM,CAAC;IACrCzD,KAAK,EAAE0C,WAAW;IAClB4B,QAAQ,EAAEzE,QAAQ,CAACC,gBAAgB,CAACE,KAAK;IACzCuE,QAAQ,EAAE1E,QAAQ,CAACQ,gBAAgB,CAACL,KAAK;IACzCwE,OAAO,EAAE3E,QAAQ,CAACC,gBAAgB,CAACI,IAAI;IACvCuE,OAAO,EAAE5E,QAAQ,CAACQ,gBAAgB,CAACH;EACrC,CAAC,CAAC;EAEF4B,OAAO,CAAC6B,KAAK,CAAC,cAAc,CAAC;EAC7B7B,OAAO,CAAC4C,KAAK,CAAC,CACZ,CAAC,WAAW,EAAEM,MAAM,CAACC,WAAW,CAAC,EACjC,CAAC,OAAO,EAAED,MAAM,CAACE,KAAK,CAAC,EACvB,CAAC,iBAAiB,EAAEF,MAAM,CAACG,WAAW,CAAC,EACvC,CAAC,oBAAoB,EAAEH,MAAM,CAACI,kBAAkB,CAAC,CAClD,CAAC;EACFtD,OAAO,CAACqC,QAAQ,CAAC,CAAC;EAClB9E,SAAS,CAAC,CAAC;EAEX,MAAMuF,GAAG,GAAG,IAAIxF,GAAG,CAAC,CAAC;EACrB,IAAIwF,GAAG,CAACC,QAAQ,CAACnF,mBAAmB,EAAEsF,MAAM,CAAC,EAAE;IAC7C1F,SAAS,CAACwC,OAAO,CAACgD,IAAI,CAAC,oBAAoB,CAAC,EAAE;MAAEjD,MAAM,EAAEJ;IAAW,CAAC,CAAC;IACrE,OAAOuD,MAAM;EACf;EAEA1F,SAAS,CAACwC,OAAO,CAACgD,IAAI,CAAC,6BAA6B,CAAC,EAAE;IACrDjD,MAAM,EAAEJ;EACV,CAAC,CAAC;EACF,OAAO,CAAC,CAAC;AACX;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAeY,gBAAgBA,CAACV,IAAY,EAAE;EAC5CrC,SAAS,CAACwC,OAAO,CAAC6B,KAAK,CAAC,gBAAgB,CAAC,EAAE;IAAE9B,MAAM,EAAEJ;EAAW,CAAC,CAAC;EAClE,IAAIiB,WAAW,GAAGD,SAAS,CAACd,IAAI,CAAC;EACjC,MAAMyB,eAAe,GAAGrC,KAAK,CAACsC,oBAAoB,CAAC1D,iBAAiB,CAAC;EAErE,MAAM0F,uBAAuB,GAAI;AACnC,MAAM7D,eAAgB;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,KAAK;EAEH,MAAM0B,MAAM,GAAG/D,kBAAkB,CAACgE,YAAY,CAAC,CAC7C,CAAC,QAAQ,EAAEkC,uBAAuB,CAAC,EACnC,CAAC,OAAO,EAAE,SAAS,CAAC,CACrB,CAAC;EAEF,MAAM/B,KAAK,GAAGJ,MAAM,CAACK,IAAI,CAACH,eAAe,CAAC;EAC1C,MAAMkC,oBAAyB,GAAG,MAAMhC,KAAK,CAACG,MAAM,CAAC;IACnDzD,KAAK,EAAE0C,WAAW;IAClB4B,QAAQ,EAAEzE,QAAQ,CAACC,gBAAgB,CAACE,KAAK;IACzCuE,QAAQ,EAAE1E,QAAQ,CAACQ,gBAAgB,CAACL,KAAK;IACzCwE,OAAO,EAAE3E,QAAQ,CAACC,gBAAgB,CAACG,GAAG;IACtCwE,OAAO,EAAE5E,QAAQ,CAACQ,gBAAgB,CAACJ;EACrC,CAAC,CAAC;EAEFX,SAAS,CACPwC,OAAO,CAACyD,GAAG,CAAC,QAAQ,EAAED,oBAAoB,CAAC1B,QAAQ,CAAC4B,MAAM,EAAE,YAAY,CAAC,EACzE;IAAE3D,MAAM,EAAEJ;EAAW,CACvB,CAAC;EAED,KAAK,MAAMqC,OAAO,IAAIwB,oBAAoB,CAAC1B,QAAQ,EAAE;IACnDE,OAAO,CAACxD,OAAO,GAAG,MAAMmF,cAAc,CAAC9D,IAAI,EAAEmC,OAAO,CAAC;EACvD;EAEAxE,SAAS,CACPwC,OAAO,CAAC4C,KAAK,CACXY,oBAAoB,CAAC1B,QAAQ,CAAC8B,GAAG,CAAE5B,OAAY,IAAK,CAClDA,OAAO,CAAC6B,aAAa,EACrB7B,OAAO,CAACoB,KAAK,EACbpB,OAAO,CAACf,KAAK,CAAC4B,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,EAC9Bb,OAAO,CAACd,GAAG,CAAC2B,SAAS,CAAC,CAAC,EAAE,EAAE,CAAC,EAC3B,sBAAqBb,OAAO,CAACxD,OAAO,CAACkF,MAAO,EAAC,CAC/C,CACH,CAAC,EACD;IAAE3D,MAAM,EAAEJ;EAAW,CACvB,CAAC;EACDnC,SAAS,CAACwC,OAAO,CAACqC,QAAQ,CAAC,CAAC,EAAE;IAAEtC,MAAM,EAAEJ;EAAW,CAAC,CAAC;EAErD,MAAMmD,GAAG,GAAG,IAAIxF,GAAG,CAAC,CAAC;EACrB,IAAIwF,GAAG,CAACC,QAAQ,CAAClF,iBAAiB,EAAE2F,oBAAoB,CAAC,EAAE;IACzDhG,SAAS,CAACwC,OAAO,CAACC,IAAI,CAAC,qBAAqB,CAAC,EAAE;MAAEF,MAAM,EAAEJ;IAAW,CAAC,CAAC;IACtEpC,SAAS,CAAC,CAAC;IACX,OAAOiG,oBAAoB;EAC7B;EAEAhG,SAAS,CAACwC,OAAO,CAACgD,IAAI,CAAC,oBAAoB,CAAC,EAAE;IAAEjD,MAAM,EAAEJ;EAAW,CAAC,CAAC;EACrEpC,SAAS,CAAC,CAAC;EACX,OAAO,CAAC,CAAC;AACX;AAEA,eAAeoG,cAAcA,CAAC9D,IAAY,EAAEmC,OAAY,EAAE;EACxDxE,SAAS,CAACwC,OAAO,CAAC6B,KAAK,CAAC,oBAAoB,CAAC,EAAE;IAAE9B,MAAM,EAAEJ;EAAW,CAAC,CAAC;EACtE,IAAIiB,WAAW,GAAGD,SAAS,CAACd,IAAI,CAAC;EAEjC,MAAMyB,eAAe,GAAGrC,KAAK,CAACsC,oBAAoB,CAAC9D,aAAa,CAAC;EAEjE,MAAMqG,qBAAqB,GAAI;AACjC,MAAMpE,eAAgB;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,KAAK;EAEH,MAAM0B,MAAM,GAAG/D,kBAAkB,CAACgE,YAAY,CAAC,CAC7C,CAAC,QAAQ,EAAEyC,qBAAqB,CAAC,EACjC,CAAC,OAAO,EAAE,SAAS,CAAC,CACrB,CAAC;EAEF,MAAMtC,KAAK,GAAGJ,MAAM,CAACK,IAAI,CAACH,eAAe,CAAC;EAC1C,MAAMyC,mBAAwB,GAAG,MAAMvC,KAAK,CAACG,MAAM,CAAC;IAClDzD,KAAK,EAAE0C,WAAW;IAClBK,KAAK,EAAEe,OAAO,CAACf,KAAK;IACpBC,GAAG,EAAEc,OAAO,CAACd,GAAG;IAChBsB,QAAQ,EAAEzE,QAAQ,CAACQ,gBAAgB,CAACL,KAAK;IACzCwE,OAAO,EAAE3E,QAAQ,CAACQ,gBAAgB,CAACC;EACrC,CAAC,CAAC;EAEF,MAAMsE,GAAG,GAAG,IAAIxF,GAAG,CAAC,CAAC;EACrB,IAAIwF,GAAG,CAACC,QAAQ,CAACtF,aAAa,EAAEsG,mBAA0B,CAAC,EAAE;IAC3DvG,SAAS,CACPwC,OAAO,CAACC,IAAI,CACV,QAAQ,EACR8D,mBAAmB,CAACvF,OAAO,CAACkF,MAAM,EAClC,uBAAuB,EACvB1B,OAAO,CAAC6B,aAAa,EACrB7B,OAAO,CAACoB,KACV,CAAC,EACD;MAAErD,MAAM,EAAEJ;IAAW,CACvB,CAAC;IACDnC,SAAS,CAACwC,OAAO,CAACqC,QAAQ,CAAC,CAAC,EAAE;MAAEtC,MAAM,EAAEJ;IAAW,CAAC,CAAC;IACrDpC,SAAS,CAAC,CAAC;IACX,OAAOwG,mBAAmB,CAACvF,OAAO;EACpC;EAEAhB,SAAS,CAACwC,OAAO,CAACgD,IAAI,CAAC,YAAY,CAAC,EAAE;IAAEjD,MAAM,EAAEJ;EAAW,CAAC,CAAC;EAC7DpC,SAAS,CAAC,CAAC;EACX,OAAO,CAAC,CAAC;AACX;;AAEA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,eAAe4C,eAAeA,CAACN,IAAY,EAAE;EAC3C,IAAIe,WAAW,GAAGD,SAAS,CAACd,IAAI,CAAC;EACjCe,WAAW,GAAGE,cAAc,CAACF,WAAW,CAAC;EAEzC,MAAMU,eAAe,GAAGrC,KAAK,CAACsC,oBAAoB,CAACzD,eAAe,CAAC;EAEnE,MAAMkG,sBAAsB,GAAI;AAClC,MAAMtE,eAAgB;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA,KAAK;EAEH,MAAM0B,MAAM,GAAG/D,kBAAkB,CAACgE,YAAY,CAAC,CAC7C,CAAC,QAAQ,EAAE2C,sBAAsB,CAAC,EAClC,CAAC,OAAO,EAAE,SAAS,CAAC,CACrB,CAAC;EAEF,MAAMxC,KAAK,GAAGJ,MAAM,CAACK,IAAI,CAACH,eAAe,CAAC;EAC1C,MAAM2C,oBAAyB,GAAG,MAAMzC,KAAK,CAACG,MAAM,CAAC;IACnDzD,KAAK,EAAE0C,WAAW;IAClB4B,QAAQ,EAAEzE,QAAQ,CAACO,gBAAgB,CAACJ,KAAK;IACzCwE,OAAO,EAAE3E,QAAQ,CAACO,gBAAgB,CAACD,QAAQ;IAC3CoE,QAAQ,EAAE1E,QAAQ,CAACC,gBAAgB,CAACE,KAAK;IACzCyE,OAAO,EAAE5E,QAAQ,CAACC,gBAAgB,CAACK;EACrC,CAAC,CAAC;EAEF,IAAI,CAAC4F,oBAAoB,CAAC5F,QAAQ,EAAE;IAClCb,SAAS,CAACwC,OAAO,CAACgD,IAAI,CAAC,iCAAiC,CAAC,EAAE;MACzDjD,MAAM,EAAEJ;IACV,CAAC,CAAC;IACFpC,SAAS,CAAC,CAAC;IACX,OAAO;MAAEc,QAAQ,EAAE;IAAG,CAAC;EACzB;EAEAb,SAAS,CAACwC,OAAO,CAAC6B,KAAK,CAAC,UAAU,CAAC,EAAE;IAAE9B,MAAM,EAAEJ;EAAW,CAAC,CAAC;EAC5DnC,SAAS,CACPwC,OAAO,CAACyD,GAAG,CACT,QAAQ,EACRQ,oBAAoB,CAAC5F,QAAQ,CAACqF,MAAM,EACpC,4BACF,CAAC,EACD;IAAE3D,MAAM,EAAEJ;EAAW,CACvB,CAAC;EACDnC,SAAS,CACPwC,OAAO,CAAC4C,KAAK,CACXqB,oBAAoB,CAAC5F,QAAQ,CAACuF,GAAG,CAAEM,IAAS,IAAK,CAC/CA,IAAI,CAACC,QAAQ,EACbD,IAAI,CAACd,KAAK,EACVc,IAAI,CAACE,YAAY,EACjBF,IAAI,CAACG,MAAM,CACZ,CACH,CAAC,EACD;IAAEtE,MAAM,EAAEJ;EAAW,CACvB,CAAC;EACDnC,SAAS,CAACwC,OAAO,CAACqC,QAAQ,CAAC,CAAC,EAAE;IAAEtC,MAAM,EAAEJ;EAAW,CAAC,CAAC;EAErD,MAAMmD,GAAG,GAAG,IAAIxF,GAAG,CAAC,CAAC;EACrB,IAAIwF,GAAG,CAACC,QAAQ,CAACjF,eAAe,EAAEmG,oBAA2B,CAAC,EAAE;IAC9DzG,SAAS,CAACwC,OAAO,CAACC,IAAI,CAAC,gBAAgB,CAAC,EAAE;MAAEF,MAAM,EAAEJ;IAAW,CAAC,CAAC;IACjEpC,SAAS,CAAC,CAAC;IACX,OAAO0G,oBAAoB,CAAC5F,QAAQ;EACtC;EAEAb,SAAS,CAACwC,OAAO,CAACgD,IAAI,CAAC,sCAAsC,CAAC,EAAE;IAC9DjD,MAAM,EAAEJ;EACV,CAAC,CAAC;EACFpC,SAAS,CAAC,CAAC;EACX,OAAO;IAAEc,QAAQ,EAAE;EAAG,CAAC;AACzB"}
260
+ //# sourceMappingURL=data:application/json;charset=utf-8;base64,{"version":3,"names":["assert","JSDOM","parseHeader","header","headersMapping","regex","level","name","match","cleanText","text","replace","trim","excludedAlineas","parseTexte","assembleeUrl","page","html","window","document","strictEqual","children","length","htmlElement","bodyElement","close","error","code","message","tagName","bodyChild","alineaElement","isMultiLinesHeader","levels","nextParentState","state","subdivisions","subdivisionAlineas","subdivisionHeaders","nextElementSibling","undefined","id","includes","headerText","textContent","nameComputed","normalize","nameUpper","toUpperCase","nextLevel","paragraphType","lineHtml","outerHTML","lineText","excludeAlinea","test","push","texte","pop","type","niveau","niveauRelatif","titres","alineas","url"],"sources":["../../src/parsers/textes_lois.ts"],"sourcesContent":["import assert from \"assert\"\nimport { JSDOM } from \"jsdom\"\n\nfunction parseHeader(header: string): [number | null, string] {\n  const headersMapping: { regex: RegExp; level: number; name: string }[] = [\n    { regex: /^(RAPPORT_)?ANNEXE(_|$)|^ETAT_/, level: 0, name: \"Annexe\" },\n    { regex: /^TOME_/, level: 1, name: \"Tome\" },\n    {\n      regex:\n        /^PARTIE_|^(PREMIERE|SECONDE|DEUXIEME|TROISIEME|QUATRIEME)_PARTIE(_|$)/,\n      level: 2,\n      name: \"Partie\",\n    },\n    { regex: /^LIVRE_/, level: 3, name: \"Livre\" },\n    { regex: /^TITRE_/, level: 4, name: \"Titre\" },\n    { regex: /^SOUSTITRE_/, level: 5, name: \"SousTitre\" },\n    { regex: /^CHAPITRE_/, level: 6, name: \"Chapitre\" },\n    { regex: /^SECTION_/, level: 7, name: \"Section\" },\n    { regex: /^SOUSSECTION_/, level: 8, name: \"SousSection\" },\n    { regex: /^ARTICLES?_|^EXPOSE_DES_MOTIFS$/, level: 9, name: \"Article\" },\n  ]\n\n  for (let { regex, level, name } of headersMapping) {\n    if (header.match(regex)) {\n      return [level, name]\n    }\n  }\n  return [null, \"\"]\n}\n\nfunction cleanText(text: string | null): string {\n  return text?.replace(/[\\n\\t]+/g, \"\").trim() || \"\"\n}\n\nconst excludedAlineas = [\n  /^Délibéré en séance publique/,\n  /^Fait le/,\n  /^Le Président,$/,\n  /^Signé/,\n]\n\nexport function parseTexte(assembleeUrl: string, page: string) {\n  // Repair HTML.\n  let html = page.replace(/(<style[\\w\\W]+style>)/g, \"\")\n\n  // Extract subdivisions from HTML.\n  const { window } = new JSDOM(html)\n  const { document } = window\n  assert.strictEqual(document.children.length, 1)\n  const htmlElement = document.children[0]\n  assert.strictEqual(htmlElement.children.length, 2)\n  const bodyElement = htmlElement.children[1]\n\n  if (bodyElement.children.length < 3) {\n    // Occurs in http://www.assemblee-nationale.fr/15/textes/0326.asp.\n    window.close() // Free memory.\n    return {\n      error: { code: -1, message: \"Texte de loi sans contenu\" },\n      html,\n      page,\n    }\n  }\n  assert.strictEqual(bodyElement.children[0].tagName, \"DIV\")\n  // First child is a DIV describing the document (Assemblée's header). Skip it for now.\n  let bodyChild = bodyElement.children[1]\n\n  assert.strictEqual(bodyChild.tagName, \"BR\")\n  let alineaElement: Element | null = null\n  let isMultiLinesHeader: boolean = false\n  let level: number | null = null\n  let levels: number[] = []\n  let nextParentState = null\n  let state: string | null = \"nextBodyChild\"\n  const subdivisions = []\n  let subdivisionAlineas: object[] | null = null\n  let subdivisionHeaders: object[] | null = null\n  while (state !== null) {\n    switch (state) {\n      case \"nextBodyChild\":\n        if (bodyChild.nextElementSibling === null) {\n          // The document has been fully parsed.\n          state = null\n        } else {\n          bodyChild = bodyChild.nextElementSibling\n          if (bodyChild.tagName === \"DIV\") {\n            alineaElement = bodyChild.children[0]\n\n            if (alineaElement === undefined || bodyChild.id.includes(\"ftn\")) {\n              // No alinea in current bodyChild: go to next bodyChild.\n              state = \"nextBodyChild\"\n            } else {\n              state = \"alineaElement\"\n            }\n          } else {\n            // <P/> is for footnotes\n            assert(\n              [\"BR\", \"HR\", \"P\"].includes(bodyChild.tagName),\n              `Unexpected tag name \"${bodyChild.tagName}\" for body child`,\n            )\n            // Stay in the same state to go to next bodyChild.\n          }\n        }\n        break\n      case \"alineaElement\":\n        switch (alineaElement!.tagName) {\n          case \"DIV\":\n            state = \"nextAlineaElement\"\n            break\n          case \"H4\":\n            // TODO\n            nextParentState = \"nextAlineaElement\"\n            state = \"firstParagraphChild\"\n            break\n          case \"P\":\n            nextParentState = \"nextAlineaElement\"\n            state = \"firstParagraphChild\"\n            break\n          case \"OL\":\n            // TODO\n            state = \"nextAlineaElement\"\n            break\n          case \"TABLE\":\n            nextParentState = \"nextAlineaElement\"\n            state = \"firstParagraphChild\"\n            break\n          default:\n            return {\n              error: {\n                code: -2,\n                message: `Unexpected tag name for alinea element: ${\n                  alineaElement!.tagName\n                }`,\n              },\n              html,\n              page,\n            }\n        }\n        break\n      case \"firstParagraphChild\":\n        const headerText = alineaElement!.textContent\n        const nameComputed = (headerText || \"\")\n          .normalize(\"NFD\")\n          .replace(/[\\u0300-\\u036f]/g, \"\")\n          .replace(/\\(nouveau\\)/, \"\")\n          .replace(/\\(Pour coordination\\)/, \"\")\n          .replace(/\\(Supprimés?\\)/, \"\")\n          .replace(/ /g, \" \")\n          .replace(/[\\-,.…]/g, \"\")\n          .trim()\n          .replace(/ {1,}/g, \"_\")\n\n        const nameUpper = nameComputed.toUpperCase()\n        const [nextLevel, paragraphType] = parseHeader(nameUpper)\n\n        if (\n          !nameUpper ||\n          nameUpper.match(/^(PROJET|PROPOSITION)_DE_LOI(_|$)/) !== null\n        ) {\n          // Occurs in:\n          // * http://www.assemblee-nationale.fr/15/textes/0232.asp\n          // * http://www.assemblee-nationale.fr/15/textes/0626.asp\n          // * http://www.assemblee-nationale.fr/15/textes/0676.asp\n          state = nextParentState\n          break\n        } else if (nameUpper === \"JEUX_OLYMPIQUES_ET_PARALYMPIQUES_DE_2024\") {\n          // Occurs in http://www.assemblee-nationale.fr/15/textes/0676.asp\n          state = nextParentState\n          break\n        } else if (nameUpper === \"TEXTE_DE_LA_COMMISSION_MIXTE_PARITAIRE\") {\n          // Occurs in http://www.assemblee-nationale.fr/15/textes/1294.asp\n          state = nextParentState\n          break\n        }\n\n        const lineHtml = cleanText(alineaElement!.outerHTML)\n        const lineText = cleanText(alineaElement!.textContent)\n\n        if (nextLevel === null) {\n          // It is a regular alinea\n          // Exclude some alineas\n          let excludeAlinea = false\n          for (let regex of excludedAlineas) {\n            if (regex.test(lineText)) {\n              excludeAlinea = true\n            }\n          }\n          if (excludeAlinea) {\n            state = nextParentState\n            break\n          }\n          if (\n            subdivisionHeaders!.length === 0 ||\n            (isMultiLinesHeader && subdivisionAlineas!.length === 0)\n          ) {\n            subdivisionHeaders!.push({\n              texte: lineText,\n              html: lineHtml,\n            })\n          } else {\n            subdivisionAlineas!.push({\n              texte: lineText,\n              html: lineHtml,\n            })\n          }\n          if (isMultiLinesHeader && subdivisionHeaders!.length >= 2) {\n            isMultiLinesHeader = false\n          }\n          // Skip paragraph.\n          state = nextParentState\n          break\n        } else if (nextLevel !== null) {\n          // It is a header\n          level = nextLevel\n          while (levels.length > 0 && level < levels[levels.length - 1]) {\n            levels.pop()\n          }\n          if (levels.length === 0 || level > levels[levels.length - 1]) {\n            levels.push(level)\n          }\n\n          subdivisionAlineas = []\n          subdivisionHeaders = [{ texte: lineText, html: lineHtml }]\n\n          // Articles & \"Exposé des motifs\" are the only divisions without second title.\n          isMultiLinesHeader =\n            nameUpper.match(/^ARTICLES?_/) === null &&\n            nameUpper.match(/^EXPOSE_DES_MOTIFS$/) === null\n\n          subdivisions.push({\n            id: \"D_\" + nameComputed,\n            type: paragraphType,\n            niveau: level + 1,\n            niveauRelatif: levels.length,\n            titres: subdivisionHeaders,\n            alineas: subdivisionAlineas,\n          })\n        }\n\n        state = nextParentState\n        break\n      case \"nextAlineaElement\":\n        alineaElement = alineaElement!.nextElementSibling\n        if (alineaElement === null) {\n          // The bodyChild has been fully parsed. Go to next bodyChild\n          state = \"nextBodyChild\"\n        } else {\n          state = \"alineaElement\"\n        }\n        break\n      default:\n        throw `Unexpected state: ${state}`\n    }\n  }\n\n  window.close() // Free memory.\n\n  return {\n    error: null,\n    html,\n    page,\n    subdivisions,\n    url: assembleeUrl,\n  }\n}\n"],"mappings":"AAAA,OAAOA,MAAM,MAAM,QAAQ;AAC3B,SAASC,KAAK,QAAQ,OAAO;AAE7B,SAASC,WAAWA,CAACC,MAAc,EAA2B;EAC5D,MAAMC,cAAgE,GAAG,CACvE;IAAEC,KAAK,EAAE,gCAAgC;IAAEC,KAAK,EAAE,CAAC;IAAEC,IAAI,EAAE;EAAS,CAAC,EACrE;IAAEF,KAAK,EAAE,QAAQ;IAAEC,KAAK,EAAE,CAAC;IAAEC,IAAI,EAAE;EAAO,CAAC,EAC3C;IACEF,KAAK,EACH,uEAAuE;IACzEC,KAAK,EAAE,CAAC;IACRC,IAAI,EAAE;EACR,CAAC,EACD;IAAEF,KAAK,EAAE,SAAS;IAAEC,KAAK,EAAE,CAAC;IAAEC,IAAI,EAAE;EAAQ,CAAC,EAC7C;IAAEF,KAAK,EAAE,SAAS;IAAEC,KAAK,EAAE,CAAC;IAAEC,IAAI,EAAE;EAAQ,CAAC,EAC7C;IAAEF,KAAK,EAAE,aAAa;IAAEC,KAAK,EAAE,CAAC;IAAEC,IAAI,EAAE;EAAY,CAAC,EACrD;IAAEF,KAAK,EAAE,YAAY;IAAEC,KAAK,EAAE,CAAC;IAAEC,IAAI,EAAE;EAAW,CAAC,EACnD;IAAEF,KAAK,EAAE,WAAW;IAAEC,KAAK,EAAE,CAAC;IAAEC,IAAI,EAAE;EAAU,CAAC,EACjD;IAAEF,KAAK,EAAE,eAAe;IAAEC,KAAK,EAAE,CAAC;IAAEC,IAAI,EAAE;EAAc,CAAC,EACzD;IAAEF,KAAK,EAAE,iCAAiC;IAAEC,KAAK,EAAE,CAAC;IAAEC,IAAI,EAAE;EAAU,CAAC,CACxE;EAED,KAAK,IAAI;IAAEF,KAAK;IAAEC,KAAK;IAAEC;EAAK,CAAC,IAAIH,cAAc,EAAE;IACjD,IAAID,MAAM,CAACK,KAAK,CAACH,KAAK,CAAC,EAAE;MACvB,OAAO,CAACC,KAAK,EAAEC,IAAI,CAAC;IACtB;EACF;EACA,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;AACnB;AAEA,SAASE,SAASA,CAACC,IAAmB,EAAU;EAC9C,OAAOA,IAAI,EAAEC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAACC,IAAI,CAAC,CAAC,IAAI,EAAE;AACnD;AAEA,MAAMC,eAAe,GAAG,CACtB,8BAA8B,EAC9B,UAAU,EACV,iBAAiB,EACjB,QAAQ,CACT;AAED,OAAO,SAASC,UAAUA,CAACC,YAAoB,EAAEC,IAAY,EAAE;EAC7D;EACA,IAAIC,IAAI,GAAGD,IAAI,CAACL,OAAO,CAAC,wBAAwB,EAAE,EAAE,CAAC;;EAErD;EACA,MAAM;IAAEO;EAAO,CAAC,GAAG,IAAIjB,KAAK,CAACgB,IAAI,CAAC;EAClC,MAAM;IAAEE;EAAS,CAAC,GAAGD,MAAM;EAC3BlB,MAAM,CAACoB,WAAW,CAACD,QAAQ,CAACE,QAAQ,CAACC,MAAM,EAAE,CAAC,CAAC;EAC/C,MAAMC,WAAW,GAAGJ,QAAQ,CAACE,QAAQ,CAAC,CAAC,CAAC;EACxCrB,MAAM,CAACoB,WAAW,CAACG,WAAW,CAACF,QAAQ,CAACC,MAAM,EAAE,CAAC,CAAC;EAClD,MAAME,WAAW,GAAGD,WAAW,CAACF,QAAQ,CAAC,CAAC,CAAC;EAE3C,IAAIG,WAAW,CAACH,QAAQ,CAACC,MAAM,GAAG,CAAC,EAAE;IACnC;IACAJ,MAAM,CAACO,KAAK,CAAC,CAAC,EAAC;IACf,OAAO;MACLC,KAAK,EAAE;QAAEC,IAAI,EAAE,CAAC,CAAC;QAAEC,OAAO,EAAE;MAA4B,CAAC;MACzDX,IAAI;MACJD;IACF,CAAC;EACH;EACAhB,MAAM,CAACoB,WAAW,CAACI,WAAW,CAACH,QAAQ,CAAC,CAAC,CAAC,CAACQ,OAAO,EAAE,KAAK,CAAC;EAC1D;EACA,IAAIC,SAAS,GAAGN,WAAW,CAACH,QAAQ,CAAC,CAAC,CAAC;EAEvCrB,MAAM,CAACoB,WAAW,CAACU,SAAS,CAACD,OAAO,EAAE,IAAI,CAAC;EAC3C,IAAIE,aAA6B,GAAG,IAAI;EACxC,IAAIC,kBAA2B,GAAG,KAAK;EACvC,IAAI1B,KAAoB,GAAG,IAAI;EAC/B,IAAI2B,MAAgB,GAAG,EAAE;EACzB,IAAIC,eAAe,GAAG,IAAI;EAC1B,IAAIC,KAAoB,GAAG,eAAe;EAC1C,MAAMC,YAAY,GAAG,EAAE;EACvB,IAAIC,kBAAmC,GAAG,IAAI;EAC9C,IAAIC,kBAAmC,GAAG,IAAI;EAC9C,OAAOH,KAAK,KAAK,IAAI,EAAE;IACrB,QAAQA,KAAK;MACX,KAAK,eAAe;QAClB,IAAIL,SAAS,CAACS,kBAAkB,KAAK,IAAI,EAAE;UACzC;UACAJ,KAAK,GAAG,IAAI;QACd,CAAC,MAAM;UACLL,SAAS,GAAGA,SAAS,CAACS,kBAAkB;UACxC,IAAIT,SAAS,CAACD,OAAO,KAAK,KAAK,EAAE;YAC/BE,aAAa,GAAGD,SAAS,CAACT,QAAQ,CAAC,CAAC,CAAC;YAErC,IAAIU,aAAa,KAAKS,SAAS,IAAIV,SAAS,CAACW,EAAE,CAACC,QAAQ,CAAC,KAAK,CAAC,EAAE;cAC/D;cACAP,KAAK,GAAG,eAAe;YACzB,CAAC,MAAM;cACLA,KAAK,GAAG,eAAe;YACzB;UACF,CAAC,MAAM;YACL;YACAnC,MAAM,CACJ,CAAC,IAAI,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC0C,QAAQ,CAACZ,SAAS,CAACD,OAAO,CAAC,EAC7C,wBAAwBC,SAAS,CAACD,OAAO,kBAC3C,CAAC;YACD;UACF;QACF;QACA;MACF,KAAK,eAAe;QAClB,QAAQE,aAAa,CAAEF,OAAO;UAC5B,KAAK,KAAK;YACRM,KAAK,GAAG,mBAAmB;YAC3B;UACF,KAAK,IAAI;YACP;YACAD,eAAe,GAAG,mBAAmB;YACrCC,KAAK,GAAG,qBAAqB;YAC7B;UACF,KAAK,GAAG;YACND,eAAe,GAAG,mBAAmB;YACrCC,KAAK,GAAG,qBAAqB;YAC7B;UACF,KAAK,IAAI;YACP;YACAA,KAAK,GAAG,mBAAmB;YAC3B;UACF,KAAK,OAAO;YACVD,eAAe,GAAG,mBAAmB;YACrCC,KAAK,GAAG,qBAAqB;YAC7B;UACF;YACE,OAAO;cACLT,KAAK,EAAE;gBACLC,IAAI,EAAE,CAAC,CAAC;gBACRC,OAAO,EAAE,2CACPG,aAAa,CAAEF,OAAO;cAE1B,CAAC;cACDZ,IAAI;cACJD;YACF,CAAC;QACL;QACA;MACF,KAAK,qBAAqB;QACxB,MAAM2B,UAAU,GAAGZ,aAAa,CAAEa,WAAW;QAC7C,MAAMC,YAAY,GAAG,CAACF,UAAU,IAAI,EAAE,EACnCG,SAAS,CAAC,KAAK,CAAC,CAChBnC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAC/BA,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAC1BA,OAAO,CAAC,uBAAuB,EAAE,EAAE,CAAC,CACpCA,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAC7BA,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAClBA,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CACvBC,IAAI,CAAC,CAAC,CACND,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;QAEzB,MAAMoC,SAAS,GAAGF,YAAY,CAACG,WAAW,CAAC,CAAC;QAC5C,MAAM,CAACC,SAAS,EAAEC,aAAa,CAAC,GAAGhD,WAAW,CAAC6C,SAAS,CAAC;QAEzD,IACE,CAACA,SAAS,IACVA,SAAS,CAACvC,KAAK,CAAC,mCAAmC,CAAC,KAAK,IAAI,EAC7D;UACA;UACA;UACA;UACA;UACA2B,KAAK,GAAGD,eAAe;UACvB;QACF,CAAC,MAAM,IAAIa,SAAS,KAAK,0CAA0C,EAAE;UACnE;UACAZ,KAAK,GAAGD,eAAe;UACvB;QACF,CAAC,MAAM,IAAIa,SAAS,KAAK,wCAAwC,EAAE;UACjE;UACAZ,KAAK,GAAGD,eAAe;UACvB;QACF;QAEA,MAAMiB,QAAQ,GAAG1C,SAAS,CAACsB,aAAa,CAAEqB,SAAS,CAAC;QACpD,MAAMC,QAAQ,GAAG5C,SAAS,CAACsB,aAAa,CAAEa,WAAW,CAAC;QAEtD,IAAIK,SAAS,KAAK,IAAI,EAAE;UACtB;UACA;UACA,IAAIK,aAAa,GAAG,KAAK;UACzB,KAAK,IAAIjD,KAAK,IAAIQ,eAAe,EAAE;YACjC,IAAIR,KAAK,CAACkD,IAAI,CAACF,QAAQ,CAAC,EAAE;cACxBC,aAAa,GAAG,IAAI;YACtB;UACF;UACA,IAAIA,aAAa,EAAE;YACjBnB,KAAK,GAAGD,eAAe;YACvB;UACF;UACA,IACEI,kBAAkB,CAAEhB,MAAM,KAAK,CAAC,IAC/BU,kBAAkB,IAAIK,kBAAkB,CAAEf,MAAM,KAAK,CAAE,EACxD;YACAgB,kBAAkB,CAAEkB,IAAI,CAAC;cACvBC,KAAK,EAAEJ,QAAQ;cACfpC,IAAI,EAAEkC;YACR,CAAC,CAAC;UACJ,CAAC,MAAM;YACLd,kBAAkB,CAAEmB,IAAI,CAAC;cACvBC,KAAK,EAAEJ,QAAQ;cACfpC,IAAI,EAAEkC;YACR,CAAC,CAAC;UACJ;UACA,IAAInB,kBAAkB,IAAIM,kBAAkB,CAAEhB,MAAM,IAAI,CAAC,EAAE;YACzDU,kBAAkB,GAAG,KAAK;UAC5B;UACA;UACAG,KAAK,GAAGD,eAAe;UACvB;QACF,CAAC,MAAM,IAAIe,SAAS,KAAK,IAAI,EAAE;UAC7B;UACA3C,KAAK,GAAG2C,SAAS;UACjB,OAAOhB,MAAM,CAACX,MAAM,GAAG,CAAC,IAAIhB,KAAK,GAAG2B,MAAM,CAACA,MAAM,CAACX,MAAM,GAAG,CAAC,CAAC,EAAE;YAC7DW,MAAM,CAACyB,GAAG,CAAC,CAAC;UACd;UACA,IAAIzB,MAAM,CAACX,MAAM,KAAK,CAAC,IAAIhB,KAAK,GAAG2B,MAAM,CAACA,MAAM,CAACX,MAAM,GAAG,CAAC,CAAC,EAAE;YAC5DW,MAAM,CAACuB,IAAI,CAAClD,KAAK,CAAC;UACpB;UAEA+B,kBAAkB,GAAG,EAAE;UACvBC,kBAAkB,GAAG,CAAC;YAAEmB,KAAK,EAAEJ,QAAQ;YAAEpC,IAAI,EAAEkC;UAAS,CAAC,CAAC;;UAE1D;UACAnB,kBAAkB,GAChBe,SAAS,CAACvC,KAAK,CAAC,aAAa,CAAC,KAAK,IAAI,IACvCuC,SAAS,CAACvC,KAAK,CAAC,qBAAqB,CAAC,KAAK,IAAI;UAEjD4B,YAAY,CAACoB,IAAI,CAAC;YAChBf,EAAE,EAAE,IAAI,GAAGI,YAAY;YACvBc,IAAI,EAAET,aAAa;YACnBU,MAAM,EAAEtD,KAAK,GAAG,CAAC;YACjBuD,aAAa,EAAE5B,MAAM,CAACX,MAAM;YAC5BwC,MAAM,EAAExB,kBAAkB;YAC1ByB,OAAO,EAAE1B;UACX,CAAC,CAAC;QACJ;QAEAF,KAAK,GAAGD,eAAe;QACvB;MACF,KAAK,mBAAmB;QACtBH,aAAa,GAAGA,aAAa,CAAEQ,kBAAkB;QACjD,IAAIR,aAAa,KAAK,IAAI,EAAE;UAC1B;UACAI,KAAK,GAAG,eAAe;QACzB,CAAC,MAAM;UACLA,KAAK,GAAG,eAAe;QACzB;QACA;MACF;QACE,MAAM,qBAAqBA,KAAK,EAAE;IACtC;EACF;EAEAjB,MAAM,CAACO,KAAK,CAAC,CAAC,EAAC;;EAEf,OAAO;IACLC,KAAK,EAAE,IAAI;IACXT,IAAI;IACJD,IAAI;IACJoB,YAAY;IACZ4B,GAAG,EAAEjD;EACP,CAAC;AACH","ignoreList":[]}