@tricoteuses/assemblee 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/lib/amendements-CN7bRFdP.js +259 -0
  2. package/lib/amendements-CN7bRFdP.js.map +1 -0
  3. package/lib/amendements-Cg1lyCBp.js +735 -0
  4. package/lib/amendements-Cg1lyCBp.js.map +1 -0
  5. package/lib/api.d.ts +1 -0
  6. package/lib/api.d.ts.map +1 -0
  7. package/lib/cleaners/actes_legislatifs.d.ts +1 -0
  8. package/lib/cleaners/actes_legislatifs.d.ts.map +1 -0
  9. package/lib/cleaners/acteurs.d.ts +1 -0
  10. package/lib/cleaners/acteurs.d.ts.map +1 -0
  11. package/lib/cleaners/amendements.d.ts +1 -0
  12. package/lib/cleaners/amendements.d.ts.map +1 -0
  13. package/lib/cleaners/debats.d.ts +1 -0
  14. package/lib/cleaners/debats.d.ts.map +1 -0
  15. package/lib/cleaners/documents.d.ts +1 -0
  16. package/lib/cleaners/documents.d.ts.map +1 -0
  17. package/lib/cleaners/dossiers_legislatifs.d.ts +1 -0
  18. package/lib/cleaners/dossiers_legislatifs.d.ts.map +1 -0
  19. package/lib/cleaners/index.d.ts +1 -0
  20. package/lib/cleaners/index.d.ts.map +1 -0
  21. package/lib/cleaners/organes.d.ts +1 -0
  22. package/lib/cleaners/organes.d.ts.map +1 -0
  23. package/lib/cleaners/questions.d.ts +1 -0
  24. package/lib/cleaners/questions.d.ts.map +1 -0
  25. package/lib/cleaners/reunions.d.ts +1 -0
  26. package/lib/cleaners/reunions.d.ts.map +1 -0
  27. package/lib/cleaners/scrutins.d.ts +1 -0
  28. package/lib/cleaners/scrutins.d.ts.map +1 -0
  29. package/lib/cleaners/xml.d.ts +1 -0
  30. package/lib/cleaners/xml.d.ts.map +1 -0
  31. package/lib/cleaners.js +1082 -508
  32. package/lib/cleaners.js.map +1 -0
  33. package/lib/datasets.d.ts +1 -0
  34. package/lib/datasets.d.ts.map +1 -0
  35. package/lib/dates.d.ts +1 -0
  36. package/lib/dates.d.ts.map +1 -0
  37. package/lib/debats-BwZYgzXe.js +3978 -0
  38. package/lib/debats-BwZYgzXe.js.map +1 -0
  39. package/lib/dossiers_legislatifs.d.ts +1 -0
  40. package/lib/dossiers_legislatifs.d.ts.map +1 -0
  41. package/lib/file_systems.d.ts +1 -0
  42. package/lib/file_systems.d.ts.map +1 -0
  43. package/lib/git.d.ts +1 -0
  44. package/lib/git.d.ts.map +1 -0
  45. package/lib/index.d.ts +1 -0
  46. package/lib/index.d.ts.map +1 -0
  47. package/lib/index.js +1211 -808
  48. package/lib/index.js.map +1 -0
  49. package/lib/inserters.d.ts +1 -0
  50. package/lib/inserters.d.ts.map +1 -0
  51. package/lib/loaders.d.ts +1 -0
  52. package/lib/loaders.d.ts.map +1 -0
  53. package/lib/loaders.js +983 -27
  54. package/lib/loaders.js.map +1 -0
  55. package/lib/logger.d.ts +1 -0
  56. package/lib/logger.d.ts.map +1 -0
  57. package/lib/organes.d.ts +1 -0
  58. package/lib/organes.d.ts.map +1 -0
  59. package/lib/parse-CzW8NHW5.js +2850 -0
  60. package/lib/parse-CzW8NHW5.js.map +1 -0
  61. package/lib/parsers/index.d.ts +1 -0
  62. package/lib/parsers/index.d.ts.map +1 -0
  63. package/lib/parsers/plf.d.ts +1 -0
  64. package/lib/parsers/plf.d.ts.map +1 -0
  65. package/lib/parsers/recherche_amendements.d.ts +1 -0
  66. package/lib/parsers/recherche_amendements.d.ts.map +1 -0
  67. package/lib/parsers/textes_lois.d.ts +1 -0
  68. package/lib/parsers/textes_lois.d.ts.map +1 -0
  69. package/lib/parsers.js +287 -292
  70. package/lib/parsers.js.map +1 -0
  71. package/lib/raw_types/acteurs_et_organes.d.ts +1 -0
  72. package/lib/raw_types/acteurs_et_organes.d.ts.map +1 -0
  73. package/lib/raw_types/agendas.d.ts +1 -0
  74. package/lib/raw_types/agendas.d.ts.map +1 -0
  75. package/lib/raw_types/amendements.d.ts +1 -0
  76. package/lib/raw_types/amendements.d.ts.map +1 -0
  77. package/lib/raw_types/debats.d.ts +1 -0
  78. package/lib/raw_types/debats.d.ts.map +1 -0
  79. package/lib/raw_types/dossiers_legislatifs.d.ts +1 -0
  80. package/lib/raw_types/dossiers_legislatifs.d.ts.map +1 -0
  81. package/lib/raw_types/questions.d.ts +1 -0
  82. package/lib/raw_types/questions.d.ts.map +1 -0
  83. package/lib/raw_types/scrutins.d.ts +1 -0
  84. package/lib/raw_types/scrutins.d.ts.map +1 -0
  85. package/lib/schemas/acteurs_et_organes.d.json +982 -0
  86. package/lib/schemas/agendas.d.json +1561 -0
  87. package/lib/schemas/amendements.d.json +1901 -0
  88. package/lib/schemas/debats.d.json +623 -0
  89. package/lib/schemas/dossiers_legislatifs.d.json +3690 -0
  90. package/lib/schemas/legislatures.d.json +17 -0
  91. package/lib/schemas/questions.d.json +520 -0
  92. package/lib/schemas/scrutins.d.json +517 -0
  93. package/lib/scripts/add_links_to_documents.d.ts +1 -0
  94. package/lib/scripts/add_links_to_documents.d.ts.map +1 -0
  95. package/lib/scripts/clean_reorganized_data.d.ts +1 -0
  96. package/lib/scripts/clean_reorganized_data.d.ts.map +1 -0
  97. package/lib/scripts/copy-schemas.d.ts +1 -0
  98. package/lib/scripts/copy-schemas.d.ts.map +1 -0
  99. package/lib/scripts/diff_amendements.d.ts +1 -0
  100. package/lib/scripts/diff_amendements.d.ts.map +1 -0
  101. package/lib/scripts/document_dossiers_legislatifs.d.ts +1 -0
  102. package/lib/scripts/document_dossiers_legislatifs.d.ts.map +1 -0
  103. package/lib/scripts/generate-json-schemas.d.ts +1 -0
  104. package/lib/scripts/generate-json-schemas.d.ts.map +1 -0
  105. package/lib/scripts/get_today_reunions.d.ts +1 -0
  106. package/lib/scripts/get_today_reunions.d.ts.map +1 -0
  107. package/lib/scripts/merge_scrutins.d.ts +1 -0
  108. package/lib/scripts/merge_scrutins.d.ts.map +1 -0
  109. package/lib/scripts/process_open_dataset.d.ts +1 -0
  110. package/lib/scripts/process_open_dataset.d.ts.map +1 -0
  111. package/lib/scripts/raw_types_from_amendements.d.ts +1 -0
  112. package/lib/scripts/raw_types_from_amendements.d.ts.map +1 -0
  113. package/lib/scripts/reorganize_data.d.ts +1 -0
  114. package/lib/scripts/reorganize_data.d.ts.map +1 -0
  115. package/lib/scripts/retrieve_deputes_photos.d.ts +1 -0
  116. package/lib/scripts/retrieve_deputes_photos.d.ts.map +1 -0
  117. package/lib/scripts/retrieve_documents.d.ts +1 -0
  118. package/lib/scripts/retrieve_documents.d.ts.map +1 -0
  119. package/lib/scripts/retrieve_open_data.d.ts +1 -0
  120. package/lib/scripts/retrieve_open_data.d.ts.map +1 -0
  121. package/lib/scripts/retrieve_pending_amendments.d.ts +1 -0
  122. package/lib/scripts/retrieve_pending_amendments.d.ts.map +1 -0
  123. package/lib/scripts/retrieve_senateurs_photos.d.ts +1 -0
  124. package/lib/scripts/retrieve_senateurs_photos.d.ts.map +1 -0
  125. package/lib/scripts/shared/cli_helpers.d.ts +1 -0
  126. package/lib/scripts/shared/cli_helpers.d.ts.map +1 -0
  127. package/lib/scripts/test_iter_load.d.ts +1 -0
  128. package/lib/scripts/test_iter_load.d.ts.map +1 -0
  129. package/lib/scripts/test_load.d.ts +1 -0
  130. package/lib/scripts/test_load.d.ts.map +1 -0
  131. package/lib/scripts/test_load_big_files.d.ts +1 -0
  132. package/lib/scripts/test_load_big_files.d.ts.map +1 -0
  133. package/lib/scripts/validate_json.d.ts +1 -0
  134. package/lib/scripts/validate_json.d.ts.map +1 -0
  135. package/lib/shared_types/codes_actes.d.ts +1 -0
  136. package/lib/shared_types/codes_actes.d.ts.map +1 -0
  137. package/lib/strings.d.ts +1 -0
  138. package/lib/strings.d.ts.map +1 -0
  139. package/lib/types/acteurs_et_organes.d.ts +1 -0
  140. package/lib/types/acteurs_et_organes.d.ts.map +1 -0
  141. package/lib/types/agendas.d.ts +1 -0
  142. package/lib/types/agendas.d.ts.map +1 -0
  143. package/lib/types/amendements.d.ts +1 -0
  144. package/lib/types/amendements.d.ts.map +1 -0
  145. package/lib/types/debats.d.ts +1 -0
  146. package/lib/types/debats.d.ts.map +1 -0
  147. package/lib/types/dossiers_legislatifs.d.ts +1 -0
  148. package/lib/types/dossiers_legislatifs.d.ts.map +1 -0
  149. package/lib/types/legislatures.d.ts +1 -0
  150. package/lib/types/legislatures.d.ts.map +1 -0
  151. package/lib/types/questions.d.ts +1 -0
  152. package/lib/types/questions.d.ts.map +1 -0
  153. package/lib/types/scrutins.d.ts +1 -0
  154. package/lib/types/scrutins.d.ts.map +1 -0
  155. package/lib/urls.d.ts +1 -0
  156. package/lib/urls.d.ts.map +1 -0
  157. package/package.json +1 -3
  158. package/lib/amendements-79bwpkvR.js +0 -154
  159. package/lib/amendements-CV3s5a0M.js +0 -667
  160. package/lib/loaders-9mHdTl9L.js +0 -4158
  161. package/lib/parse-Ccs6wcUg.js +0 -2512
package/lib/parsers.js CHANGED
@@ -1,16 +1,16 @@
1
- import e from "node:assert";
2
- import { JSDOM as z } from "jsdom";
3
- import { t as Q, a as v, p as k, q as J } from "./parse-Ccs6wcUg.js";
4
- import Z from "deep-equal";
5
- import q from "fs-extra";
6
- import K from "front-matter";
7
- import W from "js-yaml";
8
- import F from "node:path";
9
- import { parse as X } from "node-html-parser";
10
- import { d as G } from "./amendements-79bwpkvR.js";
11
- import { C as ee } from "./amendements-CV3s5a0M.js";
12
- function te(i) {
13
- const t = [
1
+ import assert from "node:assert";
2
+ import { JSDOM } from "jsdom";
3
+ import { t as toDate, a as addLeadingZeros, p as parse$1, q as fr } from "./parse-CzW8NHW5.js";
4
+ import deepEqual from "deep-equal";
5
+ import fs from "fs-extra";
6
+ import frontMatter from "front-matter";
7
+ import jsYaml from "js-yaml";
8
+ import path from "node:path";
9
+ import { parse } from "node-html-parser";
10
+ import { d as cleanAmendement } from "./amendements-CN7bRFdP.js";
11
+ import { C as Convert } from "./amendements-Cg1lyCBp.js";
12
+ function parseHeader(header) {
13
+ const headersMapping = [
14
14
  { regex: /^(RAPPORT_)?ANNEXE(_|$)|^ETAT_/, level: 0, name: "Annexe" },
15
15
  { regex: /^TOME_/, level: 1, name: "Tome" },
16
16
  {
@@ -39,61 +39,80 @@ function te(i) {
39
39
  name: "CMP"
40
40
  }
41
41
  ];
42
- for (const { regex: a, level: r, name: m } of t)
43
- if (a.test(i))
44
- return [r, m];
42
+ for (const { regex, level, name } of headersMapping) {
43
+ if (regex.test(header)) {
44
+ return [level, name];
45
+ }
46
+ }
45
47
  return [null, ""];
46
48
  }
47
- function P(i) {
48
- return i?.replace(/[\n\t]+/g, "").trim() || "";
49
+ function cleanText(text) {
50
+ return text?.replace(/[\n\t]+/g, "").trim() || "";
49
51
  }
50
- const ne = [
52
+ const excludedAlineas = [
51
53
  /^Délibéré en séance publique/,
52
54
  /^Fait le/,
53
55
  /^La Présidente,$/,
54
56
  /^Le Président,$/,
55
57
  /^Signé/
56
58
  ];
57
- function ae(i) {
58
- return ne.some((t) => t.test(i));
59
+ function isExcludedAlinea(text) {
60
+ return excludedAlineas.some((regex) => regex.test(text));
59
61
  }
60
- function he(i, t) {
61
- const a = t.replace(/(<style[\w\W]+style>)/g, ""), { window: r } = new z(a), m = r.document;
62
- e.strictEqual(m.children.length, 1);
63
- const d = m.children[0];
64
- e.strictEqual(d.children.length, 2);
65
- const u = d.children[1];
66
- let n = u.children?.[1];
67
- if (u.children.length < 3 || n.tagName !== "BR")
62
+ function parseTexte(assembleeUrl, page) {
63
+ const html = page.replace(/(<style[\w\W]+style>)/g, "");
64
+ const { window } = new JSDOM(html);
65
+ const document = window.document;
66
+ assert.strictEqual(document.children.length, 1);
67
+ const htmlElement = document.children[0];
68
+ assert.strictEqual(htmlElement.children.length, 2);
69
+ const bodyElement = htmlElement.children[1];
70
+ let currentSection = bodyElement.children?.[1];
71
+ if (bodyElement.children.length < 3 || currentSection.tagName !== "BR") {
68
72
  return {
69
73
  error: null,
70
74
  subdivisions: [],
71
- url: i
75
+ url: assembleeUrl
72
76
  };
73
- e.strictEqual(u.children[0].tagName, "DIV");
74
- let o = null, s = !1, f = null, l = "section";
75
- const c = [], S = [];
76
- let p = [], g = [];
77
+ }
78
+ assert.strictEqual(bodyElement.children[0].tagName, "DIV");
79
+ let alineaElement = null;
80
+ let isMultiLinesHeader = false;
81
+ let level = null;
82
+ let state = "section";
83
+ const levels = [];
84
+ const subdivisions = [];
85
+ let subdivisionAlineas = [];
86
+ let subdivisionHeaders = [];
77
87
  try {
78
- for (; l !== null; )
79
- switch (l) {
88
+ while (state !== null) {
89
+ switch (state) {
80
90
  case "section":
81
- n.nextElementSibling ? (n = n.nextElementSibling, n.tagName === "DIV" ? (o = n.firstElementChild, o && !n.id.includes("ftn") && (l = "alineaElement")) : e(
82
- ["BR", "HR", "P"].includes(n.tagName),
83
- `Unexpected tag name "${n.tagName}" for body child section`
84
- )) : l = null;
91
+ if (!currentSection.nextElementSibling) {
92
+ state = null;
93
+ } else {
94
+ currentSection = currentSection.nextElementSibling;
95
+ if (currentSection.tagName === "DIV") {
96
+ alineaElement = currentSection.firstElementChild;
97
+ if (alineaElement && !currentSection.id.includes("ftn")) {
98
+ state = "alineaElement";
99
+ }
100
+ } else {
101
+ assert(["BR", "HR", "P"].includes(currentSection.tagName), `Unexpected tag name "${currentSection.tagName}" for body child section`);
102
+ }
103
+ }
85
104
  break;
86
105
  case "alineaElement":
87
- if (!o) {
88
- l = "nextAlineaElement";
106
+ if (!alineaElement) {
107
+ state = "nextAlineaElement";
89
108
  break;
90
109
  }
91
- switch (o.tagName) {
110
+ switch (alineaElement.tagName) {
92
111
  case "DIV":
93
112
  case "OL":
94
113
  case "BR":
95
114
  case "SPAN":
96
- l = "nextAlineaElement";
115
+ state = "nextAlineaElement";
97
116
  break;
98
117
  case "H4":
99
118
  case "P":
@@ -103,296 +122,272 @@ function he(i, t) {
103
122
  return {
104
123
  error: {
105
124
  code: -2,
106
- message: `Unexpected tag name for alinea element: ${o.tagName}`
125
+ message: `Unexpected tag name for alinea element: ${alineaElement.tagName}`
107
126
  }
108
127
  };
109
128
  }
110
- const E = P(o.textContent).normalize("NFD").replace(/[\u0300-\u036f]/g, "").replace(/\(nouveau\)/, "").replace(/\(Pour coordination\)/, "").replace(/\(Supprimés?\)/, "").replace(/ /g, " ").replace(/[\-,.…]/g, "").trim().replace(/ {1,}/g, "_").toUpperCase(), [x, T] = te(E);
111
- if (!E || x !== null && x < 0) {
112
- l = "nextAlineaElement";
129
+ const nameComputed = cleanText(alineaElement.textContent).normalize("NFD").replace(/[\u0300-\u036f]/g, "").replace(/\(nouveau\)/, "").replace(/\(Pour coordination\)/, "").replace(/\(Supprimés?\)/, "").replace(/ /g, " ").replace(/[\-,.…]/g, "").trim().replace(/ {1,}/g, "_").toUpperCase();
130
+ const [nextLevel, paragraphType] = parseHeader(nameComputed);
131
+ if (!nameComputed || nextLevel !== null && nextLevel < 0) {
132
+ state = "nextAlineaElement";
113
133
  break;
114
134
  }
115
- const h = P(o.outerHTML), $ = P(o.textContent);
116
- if (x === null) {
117
- if (ae($)) {
118
- l = "nextAlineaElement";
135
+ const lineHtml = cleanText(alineaElement.outerHTML);
136
+ const lineText = cleanText(alineaElement.textContent);
137
+ if (nextLevel === null) {
138
+ if (isExcludedAlinea(lineText)) {
139
+ state = "nextAlineaElement";
119
140
  break;
120
141
  }
121
- g.length === 0 || s && p.length === 0 ? g.push({ texte: $, html: h }) : p.push({ texte: $, html: h }), s && g.length >= 2 && (s = !1);
142
+ if (subdivisionHeaders.length === 0 || isMultiLinesHeader && subdivisionAlineas.length === 0) {
143
+ subdivisionHeaders.push({ texte: lineText, html: lineHtml });
144
+ } else {
145
+ subdivisionAlineas.push({ texte: lineText, html: lineHtml });
146
+ }
147
+ if (isMultiLinesHeader && subdivisionHeaders.length >= 2) {
148
+ isMultiLinesHeader = false;
149
+ }
122
150
  } else {
123
- for (f = x; c.length > 0 && f < c[c.length - 1]; )
124
- c.pop();
125
- (c.length === 0 || f > c[c.length - 1]) && c.push(f), p = [], g = [{ texte: $, html: h }], s = !E.match(/^ARTICLES?_/) && !E.match(/^EXPOSE_DES_MOTIFS$/), S.push({
126
- id: `D_${E}`,
127
- type: T,
128
- niveau: f + 1,
129
- niveauRelatif: c.length,
130
- titres: g,
131
- alineas: p
151
+ level = nextLevel;
152
+ while (levels.length > 0 && level < levels[levels.length - 1]) {
153
+ levels.pop();
154
+ }
155
+ if (levels.length === 0 || level > levels[levels.length - 1]) {
156
+ levels.push(level);
157
+ }
158
+ subdivisionAlineas = [];
159
+ subdivisionHeaders = [{ texte: lineText, html: lineHtml }];
160
+ isMultiLinesHeader = !nameComputed.match(/^ARTICLES?_/) && !nameComputed.match(/^EXPOSE_DES_MOTIFS$/);
161
+ subdivisions.push({
162
+ id: `D_${nameComputed}`,
163
+ type: paragraphType,
164
+ niveau: level + 1,
165
+ niveauRelatif: levels.length,
166
+ titres: subdivisionHeaders,
167
+ alineas: subdivisionAlineas
132
168
  });
133
169
  }
134
- l = "nextAlineaElement";
170
+ state = "nextAlineaElement";
135
171
  break;
136
172
  case "nextAlineaElement":
137
- if (!o) {
138
- l = "section";
173
+ if (!alineaElement) {
174
+ state = "section";
139
175
  break;
140
176
  }
141
- o = o.nextElementSibling, l = o ? "alineaElement" : "section";
177
+ alineaElement = alineaElement.nextElementSibling;
178
+ state = alineaElement ? "alineaElement" : "section";
142
179
  break;
143
180
  default:
144
- throw new Error(`Unexpected state: ${l}`);
181
+ throw new Error(`Unexpected state: ${state}`);
145
182
  }
146
- const w = {
183
+ }
184
+ const result = {
147
185
  error: null,
148
- subdivisions: S,
149
- url: i
186
+ subdivisions,
187
+ url: assembleeUrl
150
188
  };
151
- return p = [], g = [], w;
189
+ subdivisionAlineas = [];
190
+ subdivisionHeaders = [];
191
+ return result;
152
192
  } finally {
153
- r.close();
193
+ window.close();
154
194
  }
155
195
  }
156
- function j(i, t) {
157
- const a = Q(i, t?.in);
158
- if (isNaN(+a))
196
+ function formatISO(date, options) {
197
+ const date_ = toDate(date, options?.in);
198
+ if (isNaN(+date_)) {
159
199
  throw new RangeError("Invalid time value");
160
- const r = t?.format ?? "extended", m = t?.representation ?? "complete";
161
- let d = "", u = "";
162
- const n = r === "extended" ? "-" : "", o = r === "extended" ? ":" : "";
163
- if (m !== "time") {
164
- const s = v(a.getDate(), 2), f = v(a.getMonth() + 1, 2);
165
- d = `${v(a.getFullYear(), 4)}${n}${f}${n}${s}`;
166
200
  }
167
- if (m !== "date") {
168
- const s = a.getTimezoneOffset();
169
- if (s !== 0) {
170
- const g = Math.abs(s), w = v(Math.trunc(g / 60), 2), E = v(g % 60, 2);
171
- u = `${s < 0 ? "+" : "-"}${w}:${E}`;
172
- } else
173
- u = "Z";
174
- const f = v(a.getHours(), 2), l = v(a.getMinutes(), 2), c = v(a.getSeconds(), 2), S = d === "" ? "" : "T", p = [f, l, c].join(o);
175
- d = `${d}${S}${p}${u}`;
201
+ const format = options?.format ?? "extended";
202
+ const representation = options?.representation ?? "complete";
203
+ let result = "";
204
+ let tzOffset = "";
205
+ const dateDelimiter = format === "extended" ? "-" : "";
206
+ const timeDelimiter = format === "extended" ? ":" : "";
207
+ if (representation !== "time") {
208
+ const day = addLeadingZeros(date_.getDate(), 2);
209
+ const month = addLeadingZeros(date_.getMonth() + 1, 2);
210
+ const year = addLeadingZeros(date_.getFullYear(), 4);
211
+ result = `${year}${dateDelimiter}${month}${dateDelimiter}${day}`;
176
212
  }
177
- return d;
213
+ if (representation !== "date") {
214
+ const offset = date_.getTimezoneOffset();
215
+ if (offset !== 0) {
216
+ const absoluteOffset = Math.abs(offset);
217
+ const hourOffset = addLeadingZeros(Math.trunc(absoluteOffset / 60), 2);
218
+ const minuteOffset = addLeadingZeros(absoluteOffset % 60, 2);
219
+ const sign = offset < 0 ? "+" : "-";
220
+ tzOffset = `${sign}${hourOffset}:${minuteOffset}`;
221
+ } else {
222
+ tzOffset = "Z";
223
+ }
224
+ const hour = addLeadingZeros(date_.getHours(), 2);
225
+ const minute = addLeadingZeros(date_.getMinutes(), 2);
226
+ const second = addLeadingZeros(date_.getSeconds(), 2);
227
+ const separator = result === "" ? "" : "T";
228
+ const time = [hour, minute, second].join(timeDelimiter);
229
+ result = `${result}${separator}${time}${tzOffset}`;
230
+ }
231
+ return result;
178
232
  }
179
- async function* Se(i, t, a, r, m, d = {}) {
180
- for await (const [
181
- u,
182
- n
183
- ] of le(
184
- i,
185
- t,
186
- a,
187
- r,
188
- m,
189
- d
190
- ))
191
- G(n), yield [
192
- u,
193
- ee.toAmendement(JSON.stringify(n))
233
+ async function* iterRechercheAmendements(amendementsSearchCacheDir, url, incremental, minDateDepot, minDateExamen, options = {}) {
234
+ for await (const [amendementUrlPath, amendement] of iterRechercheRawAmendements(amendementsSearchCacheDir, url, incremental, minDateDepot, minDateExamen, options)) {
235
+ cleanAmendement(amendement);
236
+ yield [
237
+ amendementUrlPath,
238
+ Convert.toAmendement(JSON.stringify(amendement))
194
239
  ];
240
+ }
195
241
  }
196
- async function* le(i, t, a, r, m, d = {}) {
197
- await q.ensureDir(i), e.strictEqual(
198
- /[?&]date_depot=/.exec(t),
199
- null,
200
- `URL ${t} already contains a deposit date`
201
- ), e.strictEqual(
202
- /[?&]order=/.exec(t),
203
- null,
204
- `URL ${t} already contains a sort order`
205
- ), e.strictEqual(
206
- /[?&]page=/.exec(t),
207
- null,
208
- `URL ${t} already contains a page number`
209
- ), a && e.notStrictEqual(
210
- /[?&]etat=/.exec(t),
211
- null,
212
- `In incremental mode, URL ${t} must contain an "etat" query parameter`
213
- ), t += (t.includes("?") ? "&" : "?") + "order=date_depot,desc&page=1", r != null && e.notStrictEqual(
214
- /\d{4}-\d{2}-\d{2}/.exec(r),
215
- null,
216
- `Invalid format for minimum date: ${r}`
217
- );
218
- let u = 0;
219
- e: for (; ; ) {
220
- d.verbose && console.log(`Fetching amendements search page at ${t}…`);
221
- const n = await fetch(t);
222
- e(
223
- n.ok,
224
- `Retrieval of search page at ${t} failed with error: ${n.status} ${n.statusText}`
225
- );
226
- const o = await n.text(), s = X(o);
227
- if (s.querySelector("div.no-result") !== null)
242
+ async function* iterRechercheRawAmendements(amendementsSearchCacheDir, url, incremental, minDateDepot, minDateExamen, options = {}) {
243
+ await fs.ensureDir(amendementsSearchCacheDir);
244
+ assert.strictEqual(/[?&]date_depot=/.exec(url), null, `URL ${url} already contains a deposit date`);
245
+ assert.strictEqual(/[?&]order=/.exec(url), null, `URL ${url} already contains a sort order`);
246
+ assert.strictEqual(/[?&]page=/.exec(url), null, `URL ${url} already contains a page number`);
247
+ if (incremental) {
248
+ assert.notStrictEqual(/[?&]etat=/.exec(url), null, `In incremental mode, URL ${url} must contain an "etat" query parameter`);
249
+ }
250
+ url += (url.includes("?") ? "&" : "?") + "order=date_depot,desc&page=1";
251
+ if (minDateDepot != null) {
252
+ assert.notStrictEqual(/\d{4}-\d{2}-\d{2}/.exec(minDateDepot), null, `Invalid format for minimum date: ${minDateDepot}`);
253
+ }
254
+ let amendementNumber = 0;
255
+ iterSearchPages: while (true) {
256
+ if (options.verbose) {
257
+ console.log(`Fetching amendements search page at ${url}…`);
258
+ }
259
+ const response = await fetch(url);
260
+ assert(response.ok, `Retrieval of search page at ${url} failed with error: ${response.status} ${response.statusText}`);
261
+ const page = await response.text();
262
+ const html = parse(page);
263
+ if (html.querySelector("div.no-result") !== null) {
228
264
  break;
229
- const f = s.querySelector("div.mirror-card-subtitle");
230
- e.notStrictEqual(f, null);
231
- const l = parseInt(/\d+/.exec(f.text)[0]), c = s.querySelector(
232
- "div.amendement-list--results-table"
233
- );
234
- e.notStrictEqual(c, null);
235
- const S = c.querySelector("table");
236
- e.notStrictEqual(S, null);
237
- const p = S.querySelector("tfoot > tr");
238
- e.notStrictEqual(p, null);
239
- const g = p.querySelectorAll("td, th").map((T) => T.text);
240
- e(
241
- Z(g, [
242
- "",
243
- "",
244
- "Dossier législatif",
245
- "Emplacement",
246
- "Auteur",
247
- "État",
248
- "Sort",
249
- "Date d'examen",
250
- "Examiné par",
251
- "Texte visé",
252
- "Date de dépôt"
253
- ]),
254
- `Unexpected columns in ${JSON.stringify(g, null, 2)}`
255
- );
256
- const w = S.querySelectorAll("tbody > tr");
257
- e.notStrictEqual(w.length, 0);
258
- for (const T of w) {
259
- if (u++, r != null || m != null) {
260
- const b = T.querySelectorAll("td");
261
- if (e.strictEqual(
262
- b.length,
263
- 11,
264
- `Unexpected number of columns in amendment row: ${T.outerHTML}`
265
- ), r != null) {
266
- const R = b[10];
267
- if (j(
268
- k(R.text, "d MMMM y", /* @__PURE__ */ new Date(), {
269
- locale: J
270
- }),
271
- { representation: "date" }
272
- ) < r)
273
- break e;
265
+ }
266
+ const amendementsCountDiv = html.querySelector("div.mirror-card-subtitle");
267
+ assert.notStrictEqual(amendementsCountDiv, null);
268
+ const amendementsCount = parseInt(/\d+/.exec(amendementsCountDiv.text)[0]);
269
+ const amendementsDiv = html.querySelector("div.amendement-list--results-table");
270
+ assert.notStrictEqual(amendementsDiv, null);
271
+ const amendementsTable = amendementsDiv.querySelector("table");
272
+ assert.notStrictEqual(amendementsTable, null);
273
+ const tfootTr = amendementsTable.querySelector("tfoot > tr");
274
+ assert.notStrictEqual(tfootTr, null);
275
+ const tfootTrCells = tfootTr.querySelectorAll("td, th").map((cell) => cell.text);
276
+ assert(deepEqual(tfootTrCells, [
277
+ "",
278
+ "",
279
+ "Dossier législatif",
280
+ "Emplacement",
281
+ "Auteur",
282
+ "État",
283
+ "Sort",
284
+ "Date d'examen",
285
+ "Examiné par",
286
+ "Texte visé",
287
+ "Date de dépôt"
288
+ ]), `Unexpected columns in ${JSON.stringify(tfootTrCells, null, 2)}`);
289
+ const amendementsTr = amendementsTable.querySelectorAll("tbody > tr");
290
+ assert.notStrictEqual(amendementsTr.length, 0);
291
+ for (const amendementTr of amendementsTr) {
292
+ amendementNumber++;
293
+ if (minDateDepot != null || minDateExamen != null) {
294
+ const amendementTdList = amendementTr.querySelectorAll("td");
295
+ assert.strictEqual(amendementTdList.length, 11, `Unexpected number of columns in amendment row: ${amendementTr.outerHTML}`);
296
+ if (minDateDepot != null) {
297
+ const dateDepotTd = amendementTdList[10];
298
+ const dateDepot = formatISO(parse$1(dateDepotTd.text, "d MMMM y", /* @__PURE__ */ new Date(), {
299
+ locale: fr
300
+ }), { representation: "date" });
301
+ if (dateDepot < minDateDepot) {
302
+ break iterSearchPages;
303
+ }
274
304
  }
275
- if (m != null) {
276
- const R = b[7];
277
- if (R.text && j(
278
- k(R.text, "d MMMM y", /* @__PURE__ */ new Date(), {
279
- locale: J
280
- }),
281
- { representation: "date" }
282
- ) < m)
283
- continue;
305
+ if (minDateExamen != null) {
306
+ const dateExamenTd = amendementTdList[7];
307
+ if (dateExamenTd.text) {
308
+ const dateExamen = formatISO(parse$1(dateExamenTd.text, "d MMMM y", /* @__PURE__ */ new Date(), {
309
+ locale: fr
310
+ }), { representation: "date" });
311
+ if (dateExamen < minDateExamen) {
312
+ continue;
313
+ }
314
+ }
284
315
  }
285
316
  }
286
- const h = T.getAttribute("data-href");
287
- e.notStrictEqual(h, void 0);
288
- const $ = h.split("/");
289
- e.strictEqual(
290
- $[0],
291
- "",
292
- `Unexpected URL path for amendement: ${h}`
293
- ), e.strictEqual(
294
- $[1],
295
- "dyn",
296
- `Unexpected URL path for amendement: ${h}`
297
- );
298
- const A = F.join(
299
- i,
300
- ...$.slice(2)
301
- ) + ".html";
302
- await q.ensureDir(F.dirname(A));
303
- const y = await q.pathExists(
304
- A
305
- ) ? await q.readFile(A, "utf8") : null, L = T.outerHTML, U = `---
306
- ${W.dump(
307
- {
308
- position: l - u,
309
- search: t.replace(/&page=[\d]+/, "")
310
- },
311
- {
312
- sortKeys: !0
313
- }
314
- )}---
317
+ const amendementUrlPath = amendementTr.getAttribute("data-href");
318
+ assert.notStrictEqual(amendementUrlPath, void 0);
319
+ const amendementUrlPathSplitted = amendementUrlPath.split("/");
320
+ assert.strictEqual(amendementUrlPathSplitted[0], "", `Unexpected URL path for amendement: ${amendementUrlPath}`);
321
+ assert.strictEqual(amendementUrlPathSplitted[1], "dyn", `Unexpected URL path for amendement: ${amendementUrlPath}`);
322
+ const amendementSearchCacheFilePath = path.join(amendementsSearchCacheDir, ...amendementUrlPathSplitted.slice(2)) + ".html";
323
+ await fs.ensureDir(path.dirname(amendementSearchCacheFilePath));
324
+ const existingAmendementSearchCache = await fs.pathExists(amendementSearchCacheFilePath) ? await fs.readFile(amendementSearchCacheFilePath, "utf8") : null;
325
+ const amendementTrOuterHtml = amendementTr.outerHTML;
326
+ const amendementSearchCache = `---
327
+ ${jsYaml.dump({
328
+ position: amendementsCount - amendementNumber,
329
+ search: url.replace(/&page=[\d]+/, "")
330
+ }, {
331
+ sortKeys: true
332
+ })}---
315
333
 
316
- ${L}`;
317
- if (U === y) {
318
- if (a)
319
- break e;
334
+ ${amendementTrOuterHtml}`;
335
+ if (amendementSearchCache === existingAmendementSearchCache) {
336
+ if (incremental) {
337
+ break iterSearchPages;
338
+ }
320
339
  continue;
321
340
  }
322
- if (y !== null && K(y).body === L)
341
+ if (existingAmendementSearchCache !== null && frontMatter(existingAmendementSearchCache).body === amendementTrOuterHtml) {
323
342
  continue;
324
- d.verbose && console.log(
325
- y === null ? `Adding amendement search cache: ${A}…` : `Updating amendement search cache: ${A}…`
326
- );
327
- const _ = new URL(
328
- h,
329
- "https://www.assemblee-nationale.fr/"
330
- ).toString(), O = await fetch(_);
331
- if (O.status === 404) {
332
- console.log(
333
- `Amendement HTML page not found at ${_}. Skipping.`
334
- );
343
+ }
344
+ if (options.verbose) {
345
+ if (existingAmendementSearchCache === null) {
346
+ console.log(`Adding amendement search cache: ${amendementSearchCacheFilePath}…`);
347
+ } else {
348
+ console.log(`Updating amendement search cache: ${amendementSearchCacheFilePath}…`);
349
+ }
350
+ }
351
+ const amendementHtmlUrl = new URL(amendementUrlPath, "https://www.assemblee-nationale.fr/").toString();
352
+ const amendementHtmlResponse = await fetch(amendementHtmlUrl);
353
+ if (amendementHtmlResponse.status === 404) {
354
+ console.log(`Amendement HTML page not found at ${amendementHtmlUrl}. Skipping.`);
335
355
  continue;
336
356
  }
337
- e(
338
- O.ok,
339
- `Retrieval of amendement HTML page at ${_} failed with error: ${O.status} ${O.statusText}`
340
- );
341
- const V = await O.text(), N = X(V).querySelectorAll(
342
- "li.mirror-card-header--options--content--item"
343
- );
344
- e.notStrictEqual(N.length, 0);
345
- const I = N.find((b) => {
346
- const R = b.querySelector("a > span");
347
- return e.notStrictEqual(
348
- R,
349
- null,
350
- `No <span> in <a> found in ${b.toString()}`
351
- ), R.text === "Version JSON";
357
+ assert(amendementHtmlResponse.ok, `Retrieval of amendement HTML page at ${amendementHtmlUrl} failed with error: ${amendementHtmlResponse.status} ${amendementHtmlResponse.statusText}`);
358
+ const amendementHtmlPage = await amendementHtmlResponse.text();
359
+ const amendementHtml = parse(amendementHtmlPage);
360
+ const formatsLi = amendementHtml.querySelectorAll("li.mirror-card-header--options--content--item");
361
+ assert.notStrictEqual(formatsLi.length, 0);
362
+ const jsonLi = formatsLi.find((formatLi) => {
363
+ const formatSpan = formatLi.querySelector("a > span");
364
+ assert.notStrictEqual(formatSpan, null, `No <span> in <a> found in ${formatLi.toString()}`);
365
+ return formatSpan.text === "Version JSON";
352
366
  });
353
- e.notStrictEqual(
354
- I,
355
- void 0,
356
- `No JSON version found for amendement at ${_}`
357
- );
358
- const C = I.querySelector("a");
359
- e.notStrictEqual(
360
- C,
361
- null,
362
- `No <a> found in ${I.toString()}`
363
- );
364
- const H = C.getAttribute("href");
365
- e.notStrictEqual(
366
- H,
367
- void 0,
368
- `No URL found for JSON version of amendement: ${I.toString()}`
369
- );
370
- const D = new URL(
371
- H,
372
- "https://www.assemblee-nationale.fr/"
373
- ).toString(), M = await fetch(D);
374
- e(
375
- M.ok,
376
- `Retrieval of amendement JSON page at ${D} failed with error: ${M.status} ${M.statusText}`
377
- );
378
- const B = await M.json();
379
- yield [h, B], await q.writeFile(
380
- A,
381
- U,
382
- "utf8"
383
- );
367
+ assert.notStrictEqual(jsonLi, void 0, `No JSON version found for amendement at ${amendementHtmlUrl}`);
368
+ const jsonA = jsonLi.querySelector("a");
369
+ assert.notStrictEqual(jsonA, null, `No <a> found in ${jsonLi.toString()}`);
370
+ const amendementJsonUrlPath = jsonA.getAttribute("href");
371
+ assert.notStrictEqual(amendementJsonUrlPath, void 0, `No URL found for JSON version of amendement: ${jsonLi.toString()}`);
372
+ const amendementJsonUrl = new URL(amendementJsonUrlPath, "https://www.assemblee-nationale.fr/").toString();
373
+ const amendementJsonResponse = await fetch(amendementJsonUrl);
374
+ assert(amendementJsonResponse.ok, `Retrieval of amendement JSON page at ${amendementJsonUrl} failed with error: ${amendementJsonResponse.status} ${amendementJsonResponse.statusText}`);
375
+ const amendement = await amendementJsonResponse.json();
376
+ yield [amendementUrlPath, amendement];
377
+ await fs.writeFile(amendementSearchCacheFilePath, amendementSearchCache, "utf8");
384
378
  }
385
- const E = s.querySelector(
386
- "div.an-pagination--item > i.an-icons-chevron-right ~ a"
387
- );
388
- if (E === null)
379
+ const paginationA = html.querySelector("div.an-pagination--item > i.an-icons-chevron-right ~ a");
380
+ if (paginationA === null) {
389
381
  break;
390
- const x = E.getAttribute("href");
391
- e.notStrictEqual(x, void 0), t = new URL(x, "https://www.assemblee-nationale.fr/").toString();
382
+ }
383
+ const urlPath = paginationA.getAttribute("href");
384
+ assert.notStrictEqual(urlPath, void 0);
385
+ url = new URL(urlPath, "https://www.assemblee-nationale.fr/").toString();
392
386
  }
393
387
  }
394
388
  export {
395
- Se as iterRechercheAmendements,
396
- le as iterRechercheRawAmendements,
397
- he as parseTexte
389
+ iterRechercheAmendements,
390
+ iterRechercheRawAmendements,
391
+ parseTexte
398
392
  };
393
+ //# sourceMappingURL=parsers.js.map