@tricoteuses/senat 2.5.9 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,11 +2,11 @@ import assert from "assert";
2
2
  import commandLineArgs from "command-line-args";
3
3
  import fs from "fs-extra";
4
4
  import path from "path";
5
- import { iterLoadSenatDossiersLegislatifsRapportUrls, iterLoadSenatDossiersLegislatifsTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, } from "../loaders";
6
- import { parseExposeDesMotifs, parseTexte, parseTexteFromFile, } from "../model/texte";
5
+ import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatDossiersLegislatifsRapportUrls, iterLoadSenatDossiersLegislatifsTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, } from "../loaders";
6
+ import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../model/texte";
7
7
  import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
8
8
  import { commonOptions } from "./shared/cli_helpers";
9
- import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue, } from "./shared/util";
9
+ import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue } from "./shared/util";
10
10
  const optionsDefinitions = [
11
11
  ...commonOptions,
12
12
  {
@@ -35,28 +35,6 @@ const optionsDefinitions = [
35
35
  ];
36
36
  const options = commandLineArgs(optionsDefinitions);
37
37
  const textDecoder = new TextDecoder("utf8");
38
- async function retrieveDocument(documentUrl) {
39
- if (!options["silent"]) {
40
- console.log(`Retrieving document ${documentUrl}…`);
41
- }
42
- try {
43
- const response = await fetchWithRetry(documentUrl);
44
- if (!response.ok) {
45
- if (response.status === 404) {
46
- console.warn(`Texte ${documentUrl} not found`);
47
- }
48
- else {
49
- console.error(`An error occurred while retrieving texte ${documentUrl}: ${response.status}`);
50
- }
51
- return null;
52
- }
53
- return response.arrayBuffer();
54
- }
55
- catch (error) {
56
- console.error(error.message);
57
- return null;
58
- }
59
- }
60
38
  async function retrieveTextes(dataDir, sessions) {
61
39
  const textesDir = path.join(dataDir, TEXTE_FOLDER);
62
40
  fs.ensureDirSync(textesDir);
@@ -74,26 +52,18 @@ async function retrieveTextes(dataDir, sessions) {
74
52
  fs.ensureDirSync(texteDir);
75
53
  let exposeDesMotifsContent = null;
76
54
  if (texteMetadata.url_expose_des_motifs) {
77
- if (!options["silent"]) {
78
- console.log("Retrieving exposé des motifs…");
79
- }
80
- const exposeDesMotifsPath = path.join(texteDir, `${texteMetadata.name}-expose.html`);
81
- exposeDesMotifsContent = await retrieveDocument(texteMetadata.url_expose_des_motifs.toString());
82
- if (!exposeDesMotifsContent) {
83
- continue;
84
- }
85
- fs.writeFileSync(exposeDesMotifsPath, Buffer.from(exposeDesMotifsContent));
55
+ exposeDesMotifsContent = await downloadExposeDesMotifs(texteDir, texteMetadata.name, String(texteMetadata.url_expose_des_motifs));
86
56
  }
87
57
  if (isOptionEmptyOrHasValue(options["formats"], "xml")) {
88
58
  const textePath = path.join(texteDir, `${texteMetadata.name}.xml`);
89
59
  let texteBuffer = null;
90
60
  if (!options["force"] && fs.existsSync(textePath)) {
91
61
  if (!options["silent"]) {
92
- console.info(`Already retrieved texte ${textePath}…`);
62
+ console.info(`Already downloaded texte ${textePath}…`);
93
63
  }
94
64
  }
95
65
  else {
96
- texteBuffer = await retrieveDocument(texteMetadata.url_xml.toString());
66
+ texteBuffer = await downloadDocument(texteMetadata.url_xml.toString());
97
67
  if (!texteBuffer) {
98
68
  texteUrlsNotFoundOrError.push(texteMetadata.url_xml);
99
69
  continue;
@@ -102,43 +72,21 @@ async function retrieveTextes(dataDir, sessions) {
102
72
  retrievedTextesCount++;
103
73
  }
104
74
  if (options["parseDocuments"]) {
105
- if (!options["silent"]) {
106
- console.log(`Parsing texte ${texteMetadata.name}.xml…`);
107
- }
108
- let parsedTexte = null;
109
- if (texteBuffer) {
110
- const texteXml = textDecoder.decode(texteBuffer);
111
- parsedTexte = parseTexte(texteXml);
112
- }
113
- else {
114
- parsedTexte = await parseTexteFromFile(textePath);
115
- }
75
+ const parsedTexte = await parseDocument(texteMetadata.session, transformedTextesDir, textePath, texteMetadata.name, texteBuffer, exposeDesMotifsContent);
116
76
  if (!parsedTexte) {
117
77
  texteUrlsParseError.push(texteMetadata.url_xml);
118
- continue;
119
78
  }
120
- if (exposeDesMotifsContent) {
121
- if (!options["silent"]) {
122
- console.log("Parsing exposé des motifs…");
123
- }
124
- const exposeDesMotifsHtml = textDecoder.decode(exposeDesMotifsContent);
125
- parsedTexte.exposeDesMotifs =
126
- parseExposeDesMotifs(exposeDesMotifsHtml);
127
- }
128
- const transformedTexteDir = path.join(transformedTextesDir, `${texteMetadata.session ?? UNDEFINED_SESSION}`, texteMetadata.name);
129
- fs.ensureDirSync(transformedTexteDir);
130
- fs.writeJSONSync(path.join(transformedTexteDir, `${texteMetadata.name}.json`), parsedTexte, { spaces: 2 });
131
79
  }
132
80
  }
133
81
  if (isOptionEmptyOrHasValue(options["formats"], "html")) {
134
82
  const textePath = path.join(texteDir, `${texteMetadata.name}.html`);
135
83
  if (!options["force"] && fs.existsSync(textePath)) {
136
84
  if (!options["silent"]) {
137
- console.info(`Already retrieved texte ${textePath}…`);
85
+ console.info(`Already downloaded texte ${textePath}…`);
138
86
  }
139
87
  }
140
88
  else {
141
- const texteBuffer = await retrieveDocument(texteMetadata.url_html.toString());
89
+ const texteBuffer = await downloadDocument(texteMetadata.url_html.toString());
142
90
  if (!texteBuffer) {
143
91
  texteUrlsNotFoundOrError.push(texteMetadata.url_html);
144
92
  continue;
@@ -151,11 +99,11 @@ async function retrieveTextes(dataDir, sessions) {
151
99
  const textePath = path.join(texteDir, `${texteMetadata.name}.pdf`);
152
100
  if (!options["force"] && fs.existsSync(textePath)) {
153
101
  if (!options["silent"]) {
154
- console.info(`Already retrieved texte ${textePath}…`);
102
+ console.info(`Already downloaded texte ${textePath}…`);
155
103
  }
156
104
  }
157
105
  else {
158
- const texteBuffer = await retrieveDocument(texteMetadata.url_pdf.toString());
106
+ const texteBuffer = await downloadDocument(texteMetadata.url_pdf.toString());
159
107
  if (!texteBuffer) {
160
108
  texteUrlsNotFoundOrError.push(texteMetadata.url_pdf);
161
109
  continue;
@@ -188,11 +136,11 @@ async function retrieveRapports(dataDir, sessions) {
188
136
  const rapportPath = path.join(rapportDir, `${rapportMetadata.name}.html`);
189
137
  if (!options["force"] && fs.existsSync(rapportPath)) {
190
138
  if (!options["silent"]) {
191
- console.info(`Already retrieved rapport ${rapportPath}…`);
139
+ console.info(`Already downloaded rapport ${rapportPath}…`);
192
140
  }
193
141
  continue;
194
142
  }
195
- const rapportBuffer = await retrieveDocument(rapportMetadata.url_html.toString());
143
+ const rapportBuffer = await downloadDocument(rapportMetadata.url_html.toString());
196
144
  if (!rapportBuffer) {
197
145
  rapportUrlsNotFoundOrError.push(rapportMetadata.url_html);
198
146
  continue;
@@ -204,11 +152,11 @@ async function retrieveRapports(dataDir, sessions) {
204
152
  const rapportPath = path.join(rapportDir, `${rapportMetadata.name}.pdf`);
205
153
  if (!options["force"] && fs.existsSync(rapportPath)) {
206
154
  if (!options["silent"]) {
207
- console.info(`Already retrieved rapport ${rapportPath}…`);
155
+ console.info(`Already downloaded rapport ${rapportPath}…`);
208
156
  }
209
157
  continue;
210
158
  }
211
- const rapportBuffer = await retrieveDocument(rapportMetadata.url_pdf.toString());
159
+ const rapportBuffer = await downloadDocument(rapportMetadata.url_pdf.toString());
212
160
  if (!rapportBuffer) {
213
161
  rapportUrlsNotFoundOrError.push(rapportMetadata.url_pdf);
214
162
  continue;
@@ -223,6 +171,65 @@ async function retrieveRapports(dataDir, sessions) {
223
171
  console.log(`${rapportUrlsNotFoundOrError.length} rapports failed with URLs ${rapportUrlsNotFoundOrError.join(", ")}`);
224
172
  }
225
173
  }
174
+ async function downloadExposeDesMotifs(texteDir, texteName, url) {
175
+ const content = await downloadDocument(url);
176
+ if (!content) {
177
+ return null;
178
+ }
179
+ const exposeDesMotifsPath = path.join(texteDir, `${texteName}-expose.html`);
180
+ fs.writeFileSync(exposeDesMotifsPath, Buffer.from(content));
181
+ return content;
182
+ }
183
+ async function downloadDocument(documentUrl) {
184
+ if (!options["silent"]) {
185
+ console.log(`Downloading document ${documentUrl}…`);
186
+ }
187
+ try {
188
+ const response = await fetchWithRetry(documentUrl);
189
+ if (!response.ok) {
190
+ if (response.status === 404) {
191
+ console.warn(`Texte ${documentUrl} not found`);
192
+ }
193
+ else {
194
+ console.error(`An error occurred while retrieving document ${documentUrl}: ${response.status}`);
195
+ }
196
+ return null;
197
+ }
198
+ return response.arrayBuffer();
199
+ }
200
+ catch (error) {
201
+ console.error(error.message);
202
+ return null;
203
+ }
204
+ }
205
+ async function parseDocument(session, transformedTextesDir, textePath, texteName, texteBuffer, exposeDesMotifs = null) {
206
+ if (!options["silent"]) {
207
+ console.log(`Parsing texte ${textePath}…`);
208
+ }
209
+ let parsedTexte;
210
+ if (texteBuffer) {
211
+ const texteXml = textDecoder.decode(texteBuffer);
212
+ parsedTexte = parseTexte(texteXml);
213
+ }
214
+ else {
215
+ parsedTexte = await parseTexteFromFile(textePath);
216
+ }
217
+ if (!parsedTexte) {
218
+ return null;
219
+ }
220
+ if (exposeDesMotifs) {
221
+ if (!options["silent"]) {
222
+ console.log("Parsing exposé des motifs…");
223
+ }
224
+ const exposeDesMotifsHtml = textDecoder.decode(exposeDesMotifs);
225
+ parsedTexte.exposeDesMotifs =
226
+ parseExposeDesMotifs(exposeDesMotifsHtml);
227
+ }
228
+ const transformedTexteDir = path.join(transformedTextesDir, `${session ?? UNDEFINED_SESSION}`, texteName);
229
+ fs.ensureDirSync(transformedTexteDir);
230
+ fs.writeJSONSync(path.join(transformedTexteDir, `${texteName}.json`), parsedTexte, { spaces: 2 });
231
+ return parsedTexte;
232
+ }
226
233
  async function main() {
227
234
  const dataDir = options["dataDir"];
228
235
  assert(dataDir, "Missing argument: data directory");
@@ -0,0 +1,11 @@
1
+ export interface CompteRendu {
2
+ sections: Section[];
3
+ }
4
+ export interface Section {
5
+ id: string | null;
6
+ interventions: Intervention[];
7
+ }
8
+ export interface Intervention {
9
+ id: string | null;
10
+ texteHtml: string | null;
11
+ }
@@ -0,0 +1 @@
1
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.5.9",
3
+ "version": "2.6.1",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",
@@ -46,6 +46,7 @@
46
46
  "data:download": "bash -c 'npm run data:retrieve_open_data -- $@ && npm run data:convert_data -- $@' bash",
47
47
  "data:generate_schemas": "tsx src/scripts/retrieve_open_data.ts --schema",
48
48
  "data:retrieve_agenda": "TZ='Etc/UTC' tsx src/scripts/retrieve_agenda.ts",
49
+ "data:retrieve_comptes_rendus": "tsx src/scripts/retrieve_comptes_rendus.ts",
49
50
  "data:retrieve_documents": "tsx src/scripts/retrieve_documents.ts",
50
51
  "data:retrieve_open_data": "tsx src/scripts/retrieve_open_data.ts --all",
51
52
  "data:retrieve_senateurs_photos": "tsx src/scripts/retrieve_senateurs_photos.ts --fetch",
@@ -67,7 +68,6 @@
67
68
  "node-stream-zip": "^1.8.2",
68
69
  "pg": "^8.13.1",
69
70
  "pg-cursor": "^2.12.1",
70
- "pg-promise": "^10.9.2",
71
71
  "slug": "^4.0.2",
72
72
  "tsx": "^4.19.4",
73
73
  "windows-1252": "^1.0.0"