@tricoteuses/senat 2.8.1 → 2.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/lib/aggregates.d.ts +52 -0
  2. package/lib/aggregates.js +949 -0
  3. package/lib/aggregates.mjs +726 -0
  4. package/lib/aggregates.ts +852 -0
  5. package/lib/config.mjs +16 -0
  6. package/lib/config.ts +26 -0
  7. package/lib/databases.mjs +55 -0
  8. package/lib/databases.ts +68 -0
  9. package/lib/datasets.mjs +78 -0
  10. package/lib/datasets.ts +118 -0
  11. package/lib/fields.d.ts +10 -0
  12. package/lib/fields.js +68 -0
  13. package/lib/fields.mjs +22 -0
  14. package/lib/fields.ts +29 -0
  15. package/lib/index.mjs +7 -0
  16. package/lib/index.ts +64 -0
  17. package/lib/inserters.d.ts +98 -0
  18. package/lib/inserters.js +500 -0
  19. package/lib/inserters.mjs +360 -0
  20. package/lib/inserters.ts +521 -0
  21. package/lib/loaders.mjs +97 -0
  22. package/lib/loaders.ts +173 -0
  23. package/lib/model/ameli.mjs +57 -0
  24. package/lib/model/ameli.ts +86 -0
  25. package/lib/model/debats.mjs +43 -0
  26. package/lib/model/debats.ts +68 -0
  27. package/lib/model/dosleg.mjs +163 -0
  28. package/lib/model/dosleg.ts +204 -0
  29. package/lib/model/index.mjs +4 -0
  30. package/lib/model/index.ts +13 -0
  31. package/lib/model/questions.d.ts +0 -20
  32. package/lib/model/questions.js +1 -32
  33. package/lib/model/questions.mjs +76 -0
  34. package/lib/model/questions.ts +102 -0
  35. package/lib/model/sens.mjs +339 -0
  36. package/lib/model/sens.ts +432 -0
  37. package/lib/model/texte.mjs +156 -0
  38. package/lib/model/texte.ts +174 -0
  39. package/lib/raw_types/ameli.d.ts +20 -0
  40. package/lib/raw_types/questions.d.ts +4 -70
  41. package/lib/raw_types_kysely/ameli.d.ts +915 -0
  42. package/lib/raw_types_kysely/ameli.js +7 -0
  43. package/lib/raw_types_kysely/ameli.mjs +5 -0
  44. package/lib/raw_types_kysely/ameli.ts +951 -0
  45. package/lib/raw_types_kysely/debats.d.ts +207 -0
  46. package/lib/raw_types_kysely/debats.js +7 -0
  47. package/lib/raw_types_kysely/debats.mjs +5 -0
  48. package/lib/raw_types_kysely/debats.ts +222 -0
  49. package/lib/raw_types_kysely/dosleg.d.ts +3532 -0
  50. package/lib/raw_types_kysely/dosleg.js +7 -0
  51. package/lib/raw_types_kysely/dosleg.mjs +5 -0
  52. package/lib/raw_types_kysely/dosleg.ts +3621 -0
  53. package/lib/raw_types_kysely/questions.d.ts +414 -0
  54. package/lib/raw_types_kysely/questions.js +7 -0
  55. package/lib/raw_types_kysely/questions.mjs +5 -0
  56. package/lib/raw_types_kysely/questions.ts +426 -0
  57. package/lib/raw_types_kysely/sens.d.ts +4394 -0
  58. package/lib/raw_types_kysely/sens.js +7 -0
  59. package/lib/raw_types_kysely/sens.mjs +5 -0
  60. package/lib/raw_types_kysely/sens.ts +4499 -0
  61. package/lib/raw_types_schemats/ameli.mjs +2 -0
  62. package/lib/raw_types_schemats/ameli.ts +601 -0
  63. package/lib/raw_types_schemats/debats.mjs +2 -0
  64. package/lib/raw_types_schemats/debats.ts +145 -0
  65. package/lib/raw_types_schemats/dosleg.mjs +2 -0
  66. package/lib/raw_types_schemats/dosleg.ts +2193 -0
  67. package/lib/raw_types_schemats/questions.mjs +2 -0
  68. package/lib/raw_types_schemats/questions.ts +249 -0
  69. package/lib/raw_types_schemats/sens.mjs +2 -0
  70. package/lib/raw_types_schemats/sens.ts +2907 -0
  71. package/lib/scripts/convert_data.mjs +95 -0
  72. package/lib/scripts/convert_data.ts +119 -0
  73. package/lib/scripts/data-download.d.ts +1 -0
  74. package/lib/scripts/data-download.js +9 -0
  75. package/lib/scripts/datautil.mjs +16 -0
  76. package/lib/scripts/datautil.ts +19 -0
  77. package/lib/scripts/images/transparent_150x192.jpg +0 -0
  78. package/lib/scripts/images/transparent_155x225.jpg +0 -0
  79. package/lib/scripts/parse_textes.mjs +38 -0
  80. package/lib/scripts/parse_textes.ts +52 -0
  81. package/lib/scripts/retrieve_documents.mjs +243 -0
  82. package/lib/scripts/retrieve_documents.ts +279 -0
  83. package/lib/scripts/retrieve_open_data.js +11 -9
  84. package/lib/scripts/retrieve_open_data.mjs +214 -0
  85. package/lib/scripts/retrieve_open_data.ts +261 -0
  86. package/lib/scripts/retrieve_senateurs_photos.mjs +147 -0
  87. package/lib/scripts/retrieve_senateurs_photos.ts +177 -0
  88. package/lib/scripts/retrieve_textes.d.ts +1 -0
  89. package/lib/scripts/retrieve_textes.mjs +165 -0
  90. package/lib/scripts/retrieve_textes.ts +79 -0
  91. package/lib/scripts/shared/cli_helpers.ts +36 -0
  92. package/lib/scripts/shared/util.ts +33 -0
  93. package/lib/strings.mjs +18 -0
  94. package/lib/strings.ts +26 -0
  95. package/lib/types/ameli.mjs +13 -0
  96. package/lib/types/ameli.ts +21 -0
  97. package/lib/types/debats.mjs +2 -0
  98. package/lib/types/debats.ts +6 -0
  99. package/lib/types/dosleg.mjs +151 -0
  100. package/lib/types/dosleg.ts +284 -0
  101. package/lib/types/questions.mjs +1 -0
  102. package/lib/types/questions.ts +3 -0
  103. package/lib/types/sens.mjs +1 -0
  104. package/lib/types/sens.ts +12 -0
  105. package/lib/types/sessions.mjs +43 -0
  106. package/lib/types/sessions.ts +42 -0
  107. package/lib/types/texte.mjs +16 -0
  108. package/lib/types/texte.ts +66 -0
  109. package/lib/typings/windows-1252.d.js +2 -0
  110. package/lib/typings/windows-1252.d.mjs +2 -0
  111. package/lib/typings/windows-1252.d.ts +11 -0
  112. package/lib/validators/config.mjs +54 -0
  113. package/lib/validators/config.ts +79 -0
  114. package/lib/validators/senat.mjs +24 -0
  115. package/lib/validators/senat.ts +26 -0
  116. package/package.json +7 -5
@@ -0,0 +1,279 @@
1
+ import assert from "assert"
2
+ import commandLineArgs from "command-line-args"
3
+ import fs from "fs-extra"
4
+ import path from "path"
5
+
6
+ import { findSenatRapportUrls, findSenatTexteUrls } from "../model/dosleg"
7
+ import { parseTexte, parseTexteFromFile } from "../model/texte"
8
+ import { UNDEFINED_SESSION } from "./datautil"
9
+ import { commonOptions } from "./shared/cli_helpers"
10
+ import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue } from "./shared/util"
11
+
12
+ const optionsDefinitions = [
13
+ ...commonOptions,
14
+ {
15
+ help: "sessions of textes to retrieve; leave empty for all",
16
+ multiple: true,
17
+ name: "sessions",
18
+ type: String,
19
+ },
20
+ {
21
+ help: "parse and convert documents into JSON (textes only for now, requires format xml)",
22
+ name: "parseDocuments",
23
+ type: Boolean,
24
+ },
25
+ {
26
+ alias: "F",
27
+ help: "formats of documents to retrieve (xml/html/pdf for textes, html/pdf for rapports); leave empty for all",
28
+ multiple: true,
29
+ name: "formats",
30
+ type: String,
31
+ },
32
+ {
33
+ help: "types of documents to retrieve (textes/rapports); leave empty for all",
34
+ multiple: true,
35
+ name: "types",
36
+ type: String,
37
+ },
38
+ {
39
+ help: "force retrieve all documents, even already retrieved ones",
40
+ name: "force",
41
+ type: Boolean,
42
+ },
43
+ ]
44
+ const options = commandLineArgs(optionsDefinitions)
45
+
46
+ const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/"
47
+ const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/"
48
+ const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/"
49
+
50
+ const textDecoder = new TextDecoder("utf8")
51
+
52
+ async function retrieveDocument (documentUrl: string): Promise<ArrayBuffer | null> {
53
+ if (!options.silent) {
54
+ console.log(`Retrieving document ${documentUrl}…`)
55
+ }
56
+
57
+ try {
58
+ const response = await fetchWithRetry(documentUrl)
59
+ if (!response.ok) {
60
+ if (response.status === 404) {
61
+ console.warn(`Texte ${documentUrl} not found`)
62
+ } else {
63
+ console.error(`An error occurred while retrieving texte ${documentUrl}: ${response.status}`)
64
+ }
65
+ return null
66
+ }
67
+ return response.arrayBuffer()
68
+ } catch (error: any) {
69
+ console.error(error.message)
70
+ return null
71
+ }
72
+ }
73
+
74
+ async function retrieveTextes (dataDir: string) {
75
+ const textesDir = path.join(dataDir, "leg")
76
+ fs.ensureDirSync(textesDir)
77
+ const originalTextesDir = path.join(textesDir, "original")
78
+ const transformedTextesDir = path.join(textesDir, "transformed")
79
+ ensureAndClearDir(transformedTextesDir)
80
+
81
+ let retrievedTextesCount = 0
82
+ const texteUrlsNotFoundOrError = []
83
+ const texteUrlsParseError = []
84
+
85
+ for await (const texte of findSenatTexteUrls(options.sessions)) {
86
+ const texteName = path.parse(texte.url).name
87
+ const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName)
88
+ fs.ensureDirSync(texteDir)
89
+
90
+ if (isOptionEmptyOrHasValue(options.formats, "xml")) {
91
+ const texteXmlUrl = `${texteName}.akn.xml`
92
+ const texteXmlAbsoluteUrl = new URL(texteXmlUrl, SENAT_TEXTE_XML_BASE_URL).toString()
93
+ const textePath = path.join(texteDir, texteXmlUrl)
94
+ let texteBuffer = null
95
+
96
+ if (!options.force && fs.existsSync(textePath)) {
97
+ if (!options.silent) {
98
+ console.info(`Already retrieved texte ${textePath}…`)
99
+ }
100
+ } else {
101
+ texteBuffer = await retrieveDocument(texteXmlAbsoluteUrl)
102
+ if (!texteBuffer) {
103
+ texteUrlsNotFoundOrError.push(texteXmlAbsoluteUrl)
104
+ continue
105
+ }
106
+ fs.writeFileSync(textePath, Buffer.from(texteBuffer))
107
+ retrievedTextesCount++
108
+ }
109
+
110
+ if (options.parseDocuments) {
111
+ if (!options.silent) {
112
+ console.log(`Parsing texte ${texteXmlUrl}…`)
113
+ }
114
+
115
+ let parsedTexte = null
116
+ if (texteBuffer) {
117
+ const texteXml = textDecoder.decode(texteBuffer)
118
+ parsedTexte = parseTexte(texteXml)
119
+ } else {
120
+ parsedTexte = await parseTexteFromFile(textePath)
121
+ }
122
+
123
+ if (!parsedTexte) {
124
+ texteUrlsParseError.push(texteXmlAbsoluteUrl)
125
+ continue
126
+ }
127
+
128
+ const transformedTexteDir
129
+ = path.join(transformedTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName)
130
+ fs.ensureDirSync(transformedTexteDir)
131
+ fs.writeJSONSync(path.join(transformedTexteDir, `${texteName}.akn.json`), parsedTexte, { spaces: 2 })
132
+ }
133
+ }
134
+
135
+ if (isOptionEmptyOrHasValue(options.formats, "html")) {
136
+ const texteHtmlUrl = `${texteName}.html`
137
+ const texteHtmlAbsoluteUrl = new URL(texteHtmlUrl, SENAT_TEXTE_BASE_URL).toString()
138
+ const textePath = path.join(texteDir, texteHtmlUrl)
139
+
140
+ if (!options.force && fs.existsSync(textePath)) {
141
+ if (!options.silent) {
142
+ console.info(`Already retrieved texte ${textePath}…`)
143
+ }
144
+ continue
145
+ }
146
+
147
+ const texteBuffer = await retrieveDocument(texteHtmlAbsoluteUrl)
148
+ if (!texteBuffer) {
149
+ texteUrlsNotFoundOrError.push(texteHtmlAbsoluteUrl)
150
+ continue
151
+ }
152
+ fs.writeFileSync(textePath, Buffer.from(texteBuffer))
153
+ retrievedTextesCount++
154
+ }
155
+
156
+ if (isOptionEmptyOrHasValue(options.formats, "pdf")) {
157
+ const textePdfUrl = `${texteName}.pdf`
158
+ const textePdfAbsoluteUrl = new URL(textePdfUrl, SENAT_TEXTE_BASE_URL).toString()
159
+ const textePath = path.join(texteDir, textePdfUrl)
160
+
161
+ if (!options.force && fs.existsSync(textePath)) {
162
+ if (!options.silent) {
163
+ console.info(`Already retrieved texte ${textePath}…`)
164
+ }
165
+ continue
166
+ }
167
+
168
+ const texteBuffer = await retrieveDocument(textePdfAbsoluteUrl)
169
+ if (!texteBuffer) {
170
+ texteUrlsNotFoundOrError.push(textePdfAbsoluteUrl)
171
+ continue
172
+ }
173
+ fs.writeFileSync(textePath, Buffer.from(texteBuffer))
174
+ retrievedTextesCount++
175
+ }
176
+ }
177
+
178
+ if (options.verbose) {
179
+ console.log(`${retrievedTextesCount} textes retrieved`)
180
+ console.log(
181
+ `${texteUrlsNotFoundOrError.length} textes failed to be retrieved with URLs ${texteUrlsNotFoundOrError.join(", ")}`
182
+ )
183
+ if (options.parseDocuments) {
184
+ console.log(`${texteUrlsParseError.length} textes failed to be parsed with URLs ${texteUrlsParseError.join(", ")}`)
185
+ }
186
+ }
187
+ // TODO retrieve exposé des motifs (/leg/exposes-des-motifs)
188
+ }
189
+
190
+ async function retrieveRapports (dataDir: string) {
191
+ const rapportsDir = path.join(dataDir, "rap")
192
+ fs.ensureDirSync(rapportsDir)
193
+
194
+ let retrievedRapportsCount = 0
195
+ const rapportUrlsNotFoundOrError = []
196
+
197
+ for await (const rapport of findSenatRapportUrls(options.sessions)) {
198
+ const parsedRapportUrl = path.parse(rapport.url)
199
+ const rapportName = parsedRapportUrl.name
200
+ const rapportDir = path.join(rapportsDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName)
201
+ fs.ensureDirSync(rapportDir)
202
+
203
+ if (isOptionEmptyOrHasValue(options.formats, "html")) {
204
+ const rapportHtmlUrlBase = `${rapportName}_mono.html`
205
+ const rapportHtmlUrl = path.format({
206
+ dir: parsedRapportUrl.dir,
207
+ base: rapportHtmlUrlBase,
208
+ })
209
+ const rapportHtmlAbsoluteUrl = new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL).toString()
210
+ const rapportPath = path.join(rapportDir, rapportHtmlUrlBase)
211
+
212
+ if (!options.force && fs.existsSync(rapportPath)) {
213
+ if (!options.silent) {
214
+ console.info(`Already retrieved rapport ${rapportPath}…`)
215
+ }
216
+ continue
217
+ }
218
+
219
+ const rapportBuffer = await retrieveDocument(rapportHtmlAbsoluteUrl)
220
+ if (!rapportBuffer) {
221
+ rapportUrlsNotFoundOrError.push(rapportHtmlAbsoluteUrl)
222
+ continue
223
+ }
224
+ fs.writeFileSync(rapportPath, Buffer.from(rapportBuffer))
225
+ retrievedRapportsCount++
226
+ }
227
+
228
+ if (isOptionEmptyOrHasValue(options.formats, "pdf")) {
229
+ const rapportPdfUrlBase = `${rapportName}1.pdf`
230
+ const rapportPdfUrl = path.format({
231
+ dir: parsedRapportUrl.dir,
232
+ base: rapportPdfUrlBase,
233
+ })
234
+ const rapportPdfAbsoluteUrl = new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL).toString()
235
+ const rapportPath = path.join(rapportDir, rapportPdfUrlBase)
236
+
237
+ if (!options.force && fs.existsSync(rapportPath)) {
238
+ if (!options.silent) {
239
+ console.info(`Already retrieved rapport ${rapportPath}…`)
240
+ }
241
+ continue
242
+ }
243
+
244
+ const rapportBuffer = await retrieveDocument(rapportPdfAbsoluteUrl)
245
+ if (!rapportBuffer) {
246
+ rapportUrlsNotFoundOrError.push(rapportPdfAbsoluteUrl)
247
+ continue
248
+ }
249
+ fs.writeFileSync(rapportPath, Buffer.from(rapportBuffer))
250
+ retrievedRapportsCount++
251
+ }
252
+ }
253
+
254
+ if (options.verbose) {
255
+ console.log(`${retrievedRapportsCount} rapports retrieved`)
256
+ console.log(
257
+ `${rapportUrlsNotFoundOrError.length} rapports failed with URLs ${rapportUrlsNotFoundOrError.join(", ")}`
258
+ )
259
+ }
260
+ }
261
+
262
+ async function main() {
263
+ const dataDir = options.dataDir
264
+ assert(dataDir, "Missing argument: data directory")
265
+
266
+ if (isOptionEmptyOrHasValue(options.types, "textes")) {
267
+ await retrieveTextes(dataDir)
268
+ }
269
+ if (isOptionEmptyOrHasValue(options.types, "rapports")) {
270
+ await retrieveRapports(dataDir)
271
+ }
272
+ }
273
+
274
+ main()
275
+ .then(() => process.exit(0))
276
+ .catch((error) => {
277
+ console.log(error)
278
+ process.exit(1)
279
+ })
@@ -4,11 +4,11 @@ import commandLineArgs from "command-line-args";
4
4
  import fs from "fs-extra";
5
5
  // import fetch from "node-fetch"
6
6
  import path from "path";
7
- // import stream from "stream"
8
7
  import StreamZip from "node-stream-zip";
9
8
  import readline from "readline";
10
- // import util from "util"
11
9
  import windows1252 from "windows-1252";
10
+ import { pipeline } from "stream";
11
+ import { promisify } from "util";
12
12
  import config from "../config";
13
13
  import { datasets, getChosenDatasets, getEnabledDatasets } from "../datasets";
14
14
  import { commonOptions } from "./shared/cli_helpers";
@@ -59,7 +59,14 @@ const optionsDefinitions = [
59
59
  },
60
60
  ];
61
61
  const options = commandLineArgs(optionsDefinitions);
62
- // const pipeline = util.promisify(stream.pipeline)
62
+ const streamPipeline = promisify(pipeline);
63
+ async function downloadFile(url, dest) {
64
+ const response = await fetch(url);
65
+ if (!response.ok) {
66
+ throw new Error(`Download failed ${response.status} ${response.statusText} for ${url}`);
67
+ }
68
+ await streamPipeline(response.body, fs.createWriteStream(dest));
69
+ }
63
70
  async function retrieveDataset(dataDir, dataset) {
64
71
  const zipFilename = dataset.url.substring(dataset.url.lastIndexOf("/") + 1);
65
72
  const zipFilePath = path.join(dataDir, zipFilename);
@@ -78,12 +85,7 @@ async function retrieveDataset(dataDir, dataset) {
78
85
  // }
79
86
  // await pipeline(response.body!, fs.createWriteStream(zipFilePath))
80
87
  fs.removeSync(zipFilePath);
81
- execSync(`wget --quiet ${dataset.url}`, {
82
- cwd: dataDir,
83
- env: process.env,
84
- encoding: "utf-8",
85
- // stdio: ["ignore", "ignore", "pipe"],
86
- });
88
+ await downloadFile(dataset.url, zipFilePath);
87
89
  }
88
90
  const sqlFilename = `${dataset.database}.sql`;
89
91
  const sqlFilePath = path.join(dataDir, sqlFilename);
@@ -0,0 +1,214 @@
1
+ import assert from "assert";
2
+ import { execSync } from "child_process";
3
+ import commandLineArgs from "command-line-args";
4
+ import fs from "fs-extra";
5
+ // import fetch from "node-fetch"
6
+ import path from "path";
7
+ // import stream from "stream"
8
+ import StreamZip from "node-stream-zip";
9
+ import readline from "readline";
10
+ // import util from "util"
11
+ import windows1252 from "windows-1252";
12
+ import config from "../config";
13
+ import { getChosenFromEnabledDatasets, } from "../datasets";
14
+ import { commonOptions } from "./shared/cli_helpers";
15
+ const badWindows1252CharacterRegex = /[\u0080-\u009f]/g;
16
+ const optionsDefinitions = [
17
+ ...commonOptions,
18
+ {
19
+ alias: "a",
20
+ help: "all options: fetch, unzip, repair-encoding, import",
21
+ name: "all",
22
+ type: Boolean,
23
+ },
24
+ {
25
+ alias: "f",
26
+ help: "fetch datasets instead of retrieving them from files",
27
+ name: "fetch",
28
+ type: Boolean,
29
+ },
30
+ {
31
+ alias: "z",
32
+ help: "unzip SQL files",
33
+ name: "unzip",
34
+ type: Boolean,
35
+ },
36
+ {
37
+ alias: "e",
38
+ help: "repair Windows CP 1252 encoding of SQL dumps",
39
+ name: "repairEncoding",
40
+ type: Boolean,
41
+ },
42
+ {
43
+ alias: "i",
44
+ help: "import SQL dumps into a freshly (re-)created database",
45
+ name: "import",
46
+ type: Boolean,
47
+ },
48
+ {
49
+ alias: "d",
50
+ help: "repair database (update schema and types)",
51
+ name: "repairDatabase",
52
+ type: Boolean,
53
+ },
54
+ {
55
+ alias: "c",
56
+ help: "create TypeScript interfaces from databases schemas into src/raw_types_* directories",
57
+ name: "schema",
58
+ type: Boolean,
59
+ },
60
+ ];
61
+ const options = commandLineArgs(optionsDefinitions);
62
+ // const pipeline = util.promisify(stream.pipeline)
63
+ async function retrieveDataset(dataDir, dataset) {
64
+ const zipFilename = dataset.url.substring(dataset.url.lastIndexOf("/") + 1);
65
+ const zipFilePath = path.join(dataDir, zipFilename);
66
+ if (options.all || options.fetch) {
67
+ // Fetch & save ZIP file.
68
+ if (!options.silent) {
69
+ console.log(`Loading ${dataset.title}: ${zipFilename}…`);
70
+ }
71
+ // Fetch fails with OpenSSL error: dh key too small.
72
+ // (so does "curl").
73
+ // const response = await fetch(dataset.url)
74
+ // if (!response.ok) {
75
+ // console.error(response.status, response.statusText)
76
+ // console.error(await response.text())
77
+ // throw new Error(`Fetch failed: ${dataset.url}`)
78
+ // }
79
+ // await pipeline(response.body!, fs.createWriteStream(zipFilePath))
80
+ fs.removeSync(zipFilePath);
81
+ execSync(`wget --quiet ${dataset.url}`, {
82
+ cwd: dataDir,
83
+ env: process.env,
84
+ encoding: "utf-8",
85
+ // stdio: ["ignore", "ignore", "pipe"],
86
+ });
87
+ }
88
+ const sqlFilename = `${dataset.database}.sql`;
89
+ const sqlFilePath = path.join(dataDir, sqlFilename);
90
+ if (options.all || options.unzip) {
91
+ if (!options.silent) {
92
+ console.log(`Unzipping ${dataset.title}: ${zipFilename}…`);
93
+ }
94
+ fs.removeSync(sqlFilePath);
95
+ const zip = new StreamZip({
96
+ file: zipFilePath,
97
+ storeEntries: true,
98
+ });
99
+ await new Promise((resolve, reject) => {
100
+ zip.on("ready", () => {
101
+ zip.extract(null, dataDir, (err, _count) => {
102
+ zip.close();
103
+ if (err) {
104
+ reject(err);
105
+ }
106
+ else {
107
+ resolve(null);
108
+ }
109
+ });
110
+ });
111
+ });
112
+ if (dataset.repairZip !== undefined) {
113
+ if (!options.silent) {
114
+ console.log(`Repairing Zip path ${dataset.title}: ${sqlFilename}…`);
115
+ }
116
+ dataset.repairZip(dataset, dataDir);
117
+ }
118
+ }
119
+ if ((options.all || options.repairEncoding) && dataset.repairEncoding) {
120
+ if (!options.silent) {
121
+ console.log(`Repairing Windows CP1252 encoding of ${dataset.title}: ${sqlFilename}…`);
122
+ }
123
+ const repairedSqlFilePath = sqlFilePath + ".repaired";
124
+ const repairedSqlWriter = fs.createWriteStream(repairedSqlFilePath, {
125
+ encoding: "utf8",
126
+ });
127
+ const lineReader = readline.createInterface({
128
+ input: fs.createReadStream(sqlFilePath, { encoding: "utf8" }),
129
+ crlfDelay: Infinity,
130
+ });
131
+ for await (const line of lineReader) {
132
+ repairedSqlWriter.write(line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" })) + "\n");
133
+ }
134
+ repairedSqlWriter.end();
135
+ await fs.move(repairedSqlFilePath, sqlFilePath, { overwrite: true });
136
+ }
137
+ if (options.all || options.import) {
138
+ if (!options.silent) {
139
+ console.log(`Importing ${dataset.title}: ${sqlFilename}…`);
140
+ }
141
+ execSync(`psql -c "DROP DATABASE IF EXISTS ${dataset.database}"`, {
142
+ cwd: dataDir,
143
+ env: process.env,
144
+ encoding: "utf-8",
145
+ // stdio: ["ignore", "ignore", "pipe"],
146
+ });
147
+ execSync(`psql -c "CREATE DATABASE ${dataset.database} WITH OWNER opendata"`, {
148
+ cwd: dataDir,
149
+ env: process.env,
150
+ encoding: "utf-8",
151
+ // stdio: ["ignore", "ignore", "pipe"],
152
+ });
153
+ execSync(`psql -f ${sqlFilename} ${dataset.database}`, {
154
+ cwd: dataDir,
155
+ env: process.env,
156
+ encoding: "utf-8",
157
+ // stdio: ["ignore", "ignore", "pipe"],
158
+ });
159
+ }
160
+ if (options.schema) {
161
+ let definitionsDir = path.resolve("src", "raw_types_schemats");
162
+ assert(fs.statSync(definitionsDir).isDirectory());
163
+ if (!options.silent) {
164
+ console.log(`Creating TypeScript definitions from schema of database ${dataset.database}…`);
165
+ }
166
+ const dbConnectionString = `postgres://${process.env.PGUSER}:${process.env.PGPASSWORD}@${process.env.PGHOST}:${process.env.PGPORT}/${dataset.database}`;
167
+ let definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
168
+ execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.schema} -o ${definitionFilePath}`, {
169
+ // cwd: dataDir,
170
+ env: process.env,
171
+ encoding: "utf-8",
172
+ // stdio: ["ignore", "ignore", "pipe"],
173
+ });
174
+ const definition = fs.readFileSync(definitionFilePath, { encoding: "utf8" });
175
+ const definitionRepaired = definition
176
+ .replace(/\r\n/g, "\n")
177
+ .replace(/AUTO-GENERATED FILE @ \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}/, "AUTO-GENERATED FILE");
178
+ fs.writeFileSync(definitionFilePath, definitionRepaired);
179
+ definitionsDir = path.resolve("src", "raw_types_kysely");
180
+ definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
181
+ execSync(`kysely-codegen --url ${dbConnectionString} --schema=${dataset.schema} --out-file=${definitionFilePath}`, {
182
+ // cwd: dataDir,
183
+ env: process.env,
184
+ encoding: "utf-8",
185
+ // stdio: ["ignore", "ignore", "pipe"],
186
+ });
187
+ }
188
+ }
189
+ async function retrieveOpenData() {
190
+ const dataDir = options.dataDir;
191
+ assert(dataDir, "Missing argument: data directory");
192
+ process.env = {
193
+ ...process.env,
194
+ PGHOST: process.env.PGHOST || config.db.host,
195
+ PGPORT: process.env.PGPORT || config.db.port,
196
+ PGUSER: process.env.PGUSER || config.db.user,
197
+ PGPASSWORD: process.env.PGPASSWORD || config.db.password,
198
+ };
199
+ assert(process.env.PGHOST
200
+ && process.env.PGPORT
201
+ && process.env.PGUSER
202
+ && process.env.PGPASSWORD, "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
203
+ const choosenDatasets = getChosenFromEnabledDatasets(options.categories);
204
+ // await Promise.all(choosenDatasets.map(dataset => retrieveDataset(dataDir, dataset)))
205
+ for (const dataset of choosenDatasets) {
206
+ await retrieveDataset(dataDir, dataset);
207
+ }
208
+ }
209
+ retrieveOpenData()
210
+ .then(() => process.exit(0))
211
+ .catch((error) => {
212
+ console.log(error);
213
+ process.exit(1);
214
+ });