@tricoteuses/senat 2.22.5 → 2.22.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/src/model/ameli.d.ts +4 -0
- package/lib/src/model/ameli.js +3 -3
- package/package.json +1 -1
- package/lib/aggregates.d.ts +0 -52
- package/lib/aggregates.js +0 -930
- package/lib/aggregates.mjs +0 -713
- package/lib/aggregates.ts +0 -833
- package/lib/config.d.ts +0 -10
- package/lib/config.js +0 -16
- package/lib/config.mjs +0 -16
- package/lib/config.ts +0 -26
- package/lib/databases.d.ts +0 -2
- package/lib/databases.js +0 -26
- package/lib/databases.mjs +0 -57
- package/lib/databases.ts +0 -71
- package/lib/datasets.d.ts +0 -34
- package/lib/datasets.js +0 -233
- package/lib/datasets.mjs +0 -78
- package/lib/datasets.ts +0 -118
- package/lib/fields.d.ts +0 -10
- package/lib/fields.js +0 -68
- package/lib/fields.mjs +0 -22
- package/lib/fields.ts +0 -29
- package/lib/git.d.ts +0 -26
- package/lib/git.js +0 -167
- package/lib/index.d.ts +0 -13
- package/lib/index.js +0 -1
- package/lib/index.mjs +0 -7
- package/lib/index.ts +0 -64
- package/lib/inserters.d.ts +0 -98
- package/lib/inserters.js +0 -500
- package/lib/inserters.mjs +0 -360
- package/lib/inserters.ts +0 -521
- package/lib/legislatures.json +0 -38
- package/lib/loaders.d.ts +0 -58
- package/lib/loaders.js +0 -286
- package/lib/loaders.mjs +0 -158
- package/lib/loaders.ts +0 -271
- package/lib/model/agenda.d.ts +0 -6
- package/lib/model/agenda.js +0 -148
- package/lib/model/ameli.d.ts +0 -51
- package/lib/model/ameli.js +0 -149
- package/lib/model/ameli.mjs +0 -84
- package/lib/model/ameli.ts +0 -100
- package/lib/model/commission.d.ts +0 -18
- package/lib/model/commission.js +0 -269
- package/lib/model/debats.d.ts +0 -67
- package/lib/model/debats.js +0 -95
- package/lib/model/debats.mjs +0 -43
- package/lib/model/debats.ts +0 -68
- package/lib/model/documents.d.ts +0 -12
- package/lib/model/documents.js +0 -151
- package/lib/model/dosleg.d.ts +0 -7
- package/lib/model/dosleg.js +0 -326
- package/lib/model/dosleg.mjs +0 -196
- package/lib/model/dosleg.ts +0 -240
- package/lib/model/index.d.ts +0 -7
- package/lib/model/index.js +0 -7
- package/lib/model/index.mjs +0 -5
- package/lib/model/index.ts +0 -15
- package/lib/model/questions.d.ts +0 -45
- package/lib/model/questions.js +0 -89
- package/lib/model/questions.mjs +0 -71
- package/lib/model/questions.ts +0 -93
- package/lib/model/scrutins.d.ts +0 -13
- package/lib/model/scrutins.js +0 -114
- package/lib/model/seance.d.ts +0 -3
- package/lib/model/seance.js +0 -267
- package/lib/model/sens.d.ts +0 -146
- package/lib/model/sens.js +0 -454
- package/lib/model/sens.mjs +0 -415
- package/lib/model/sens.ts +0 -516
- package/lib/model/texte.d.ts +0 -7
- package/lib/model/texte.js +0 -256
- package/lib/model/texte.mjs +0 -208
- package/lib/model/texte.ts +0 -229
- package/lib/model/util.d.ts +0 -9
- package/lib/model/util.js +0 -38
- package/lib/model/util.mjs +0 -19
- package/lib/model/util.ts +0 -32
- package/lib/parsers/texte.d.ts +0 -7
- package/lib/parsers/texte.js +0 -228
- package/lib/raw_types/ameli.d.ts +0 -914
- package/lib/raw_types/ameli.js +0 -5
- package/lib/raw_types/ameli.mjs +0 -163
- package/lib/raw_types/debats.d.ts +0 -207
- package/lib/raw_types/debats.js +0 -5
- package/lib/raw_types/debats.mjs +0 -58
- package/lib/raw_types/dosleg.d.ts +0 -1619
- package/lib/raw_types/dosleg.js +0 -5
- package/lib/raw_types/dosleg.mjs +0 -438
- package/lib/raw_types/questions.d.ts +0 -419
- package/lib/raw_types/questions.js +0 -5
- package/lib/raw_types/questions.mjs +0 -11
- package/lib/raw_types/senat.d.ts +0 -11368
- package/lib/raw_types/senat.js +0 -5
- package/lib/raw_types/sens.d.ts +0 -8248
- package/lib/raw_types/sens.js +0 -5
- package/lib/raw_types/sens.mjs +0 -508
- package/lib/raw_types_kysely/ameli.d.ts +0 -915
- package/lib/raw_types_kysely/ameli.js +0 -7
- package/lib/raw_types_kysely/ameli.mjs +0 -5
- package/lib/raw_types_kysely/ameli.ts +0 -951
- package/lib/raw_types_kysely/debats.d.ts +0 -207
- package/lib/raw_types_kysely/debats.js +0 -7
- package/lib/raw_types_kysely/debats.mjs +0 -5
- package/lib/raw_types_kysely/debats.ts +0 -222
- package/lib/raw_types_kysely/dosleg.d.ts +0 -3532
- package/lib/raw_types_kysely/dosleg.js +0 -7
- package/lib/raw_types_kysely/dosleg.mjs +0 -5
- package/lib/raw_types_kysely/dosleg.ts +0 -3621
- package/lib/raw_types_kysely/questions.d.ts +0 -414
- package/lib/raw_types_kysely/questions.js +0 -7
- package/lib/raw_types_kysely/questions.mjs +0 -5
- package/lib/raw_types_kysely/questions.ts +0 -426
- package/lib/raw_types_kysely/sens.d.ts +0 -4394
- package/lib/raw_types_kysely/sens.js +0 -7
- package/lib/raw_types_kysely/sens.mjs +0 -5
- package/lib/raw_types_kysely/sens.ts +0 -4499
- package/lib/raw_types_schemats/ameli.d.ts +0 -539
- package/lib/raw_types_schemats/ameli.js +0 -2
- package/lib/raw_types_schemats/ameli.mjs +0 -2
- package/lib/raw_types_schemats/ameli.ts +0 -601
- package/lib/raw_types_schemats/debats.d.ts +0 -127
- package/lib/raw_types_schemats/debats.js +0 -2
- package/lib/raw_types_schemats/debats.mjs +0 -2
- package/lib/raw_types_schemats/debats.ts +0 -145
- package/lib/raw_types_schemats/dosleg.d.ts +0 -977
- package/lib/raw_types_schemats/dosleg.js +0 -2
- package/lib/raw_types_schemats/dosleg.mjs +0 -2
- package/lib/raw_types_schemats/dosleg.ts +0 -2193
- package/lib/raw_types_schemats/questions.d.ts +0 -235
- package/lib/raw_types_schemats/questions.js +0 -2
- package/lib/raw_types_schemats/questions.mjs +0 -2
- package/lib/raw_types_schemats/questions.ts +0 -249
- package/lib/raw_types_schemats/sens.d.ts +0 -6915
- package/lib/raw_types_schemats/sens.js +0 -2
- package/lib/raw_types_schemats/sens.mjs +0 -2
- package/lib/raw_types_schemats/sens.ts +0 -2907
- package/lib/scripts/convert_data.d.ts +0 -1
- package/lib/scripts/convert_data.js +0 -354
- package/lib/scripts/convert_data.mjs +0 -181
- package/lib/scripts/convert_data.ts +0 -243
- package/lib/scripts/data-download.d.ts +0 -1
- package/lib/scripts/data-download.js +0 -12
- package/lib/scripts/datautil.d.ts +0 -8
- package/lib/scripts/datautil.js +0 -34
- package/lib/scripts/datautil.mjs +0 -16
- package/lib/scripts/datautil.ts +0 -19
- package/lib/scripts/images/transparent_150x192.jpg +0 -0
- package/lib/scripts/images/transparent_155x225.jpg +0 -0
- package/lib/scripts/parse_textes.d.ts +0 -1
- package/lib/scripts/parse_textes.js +0 -44
- package/lib/scripts/parse_textes.mjs +0 -46
- package/lib/scripts/parse_textes.ts +0 -65
- package/lib/scripts/retrieve_agenda.d.ts +0 -1
- package/lib/scripts/retrieve_agenda.js +0 -132
- package/lib/scripts/retrieve_cr_commission.d.ts +0 -1
- package/lib/scripts/retrieve_cr_commission.js +0 -364
- package/lib/scripts/retrieve_cr_seance.d.ts +0 -6
- package/lib/scripts/retrieve_cr_seance.js +0 -347
- package/lib/scripts/retrieve_documents.d.ts +0 -3
- package/lib/scripts/retrieve_documents.js +0 -219
- package/lib/scripts/retrieve_documents.mjs +0 -249
- package/lib/scripts/retrieve_documents.ts +0 -298
- package/lib/scripts/retrieve_open_data.d.ts +0 -1
- package/lib/scripts/retrieve_open_data.js +0 -315
- package/lib/scripts/retrieve_open_data.mjs +0 -217
- package/lib/scripts/retrieve_open_data.ts +0 -268
- package/lib/scripts/retrieve_senateurs_photos.d.ts +0 -1
- package/lib/scripts/retrieve_senateurs_photos.js +0 -147
- package/lib/scripts/retrieve_senateurs_photos.mjs +0 -147
- package/lib/scripts/retrieve_senateurs_photos.ts +0 -177
- package/lib/scripts/retrieve_videos.d.ts +0 -1
- package/lib/scripts/retrieve_videos.js +0 -461
- package/lib/scripts/shared/cli_helpers.d.ts +0 -95
- package/lib/scripts/shared/cli_helpers.js +0 -91
- package/lib/scripts/shared/cli_helpers.ts +0 -36
- package/lib/scripts/shared/util.d.ts +0 -4
- package/lib/scripts/shared/util.js +0 -35
- package/lib/scripts/shared/util.ts +0 -33
- package/lib/scripts/test_iter_load.d.ts +0 -1
- package/lib/scripts/test_iter_load.js +0 -12
- package/lib/strings.d.ts +0 -1
- package/lib/strings.js +0 -18
- package/lib/strings.mjs +0 -18
- package/lib/strings.ts +0 -26
- package/lib/types/agenda.d.ts +0 -44
- package/lib/types/agenda.js +0 -1
- package/lib/types/ameli.d.ts +0 -5
- package/lib/types/ameli.js +0 -1
- package/lib/types/ameli.mjs +0 -13
- package/lib/types/ameli.ts +0 -21
- package/lib/types/compte_rendu.d.ts +0 -83
- package/lib/types/compte_rendu.js +0 -1
- package/lib/types/debats.d.ts +0 -2
- package/lib/types/debats.js +0 -1
- package/lib/types/debats.mjs +0 -2
- package/lib/types/debats.ts +0 -6
- package/lib/types/dosleg.d.ts +0 -70
- package/lib/types/dosleg.js +0 -1
- package/lib/types/dosleg.mjs +0 -151
- package/lib/types/dosleg.ts +0 -284
- package/lib/types/questions.d.ts +0 -2
- package/lib/types/questions.js +0 -1
- package/lib/types/questions.mjs +0 -1
- package/lib/types/questions.ts +0 -3
- package/lib/types/sens.d.ts +0 -10
- package/lib/types/sens.js +0 -1
- package/lib/types/sens.mjs +0 -1
- package/lib/types/sens.ts +0 -12
- package/lib/types/sessions.d.ts +0 -5
- package/lib/types/sessions.js +0 -84
- package/lib/types/sessions.mjs +0 -43
- package/lib/types/sessions.ts +0 -42
- package/lib/types/texte.d.ts +0 -74
- package/lib/types/texte.js +0 -16
- package/lib/types/texte.mjs +0 -16
- package/lib/types/texte.ts +0 -76
- package/lib/typings/windows-1252.d.js +0 -2
- package/lib/typings/windows-1252.d.mjs +0 -2
- package/lib/typings/windows-1252.d.ts +0 -11
- package/lib/utils/cr_spliting.d.ts +0 -28
- package/lib/utils/cr_spliting.js +0 -265
- package/lib/utils/date.d.ts +0 -10
- package/lib/utils/date.js +0 -100
- package/lib/utils/nvs-timecode.d.ts +0 -7
- package/lib/utils/nvs-timecode.js +0 -79
- package/lib/utils/reunion_grouping.d.ts +0 -9
- package/lib/utils/reunion_grouping.js +0 -361
- package/lib/utils/reunion_odj_building.d.ts +0 -5
- package/lib/utils/reunion_odj_building.js +0 -154
- package/lib/utils/reunion_parsing.d.ts +0 -23
- package/lib/utils/reunion_parsing.js +0 -209
- package/lib/utils/scoring.d.ts +0 -14
- package/lib/utils/scoring.js +0 -147
- package/lib/utils/string_cleaning.d.ts +0 -7
- package/lib/utils/string_cleaning.js +0 -57
- package/lib/validators/config.d.ts +0 -9
- package/lib/validators/config.js +0 -10
- package/lib/validators/config.mjs +0 -54
- package/lib/validators/config.ts +0 -79
- package/lib/validators/senat.d.ts +0 -0
- package/lib/validators/senat.js +0 -28
- package/lib/validators/senat.mjs +0 -24
- package/lib/validators/senat.ts +0 -26
|
@@ -1,315 +0,0 @@
|
|
|
1
|
-
import assert from "assert";
|
|
2
|
-
import { execSync } from "child_process";
|
|
3
|
-
import commandLineArgs from "command-line-args";
|
|
4
|
-
import fs from "fs-extra";
|
|
5
|
-
import path from "path";
|
|
6
|
-
import StreamZip from "node-stream-zip";
|
|
7
|
-
import readline from "readline";
|
|
8
|
-
import * as windows1252 from "windows-1252";
|
|
9
|
-
import { pipeline } from "stream";
|
|
10
|
-
import { promisify } from "util";
|
|
11
|
-
import config from "../config";
|
|
12
|
-
import { getChosenDatasets, getEnabledDatasets } from "../datasets";
|
|
13
|
-
import { commonOptions } from "./shared/cli_helpers";
|
|
14
|
-
const badWindows1252CharacterRegex = /[\u0080-\u009f]/g;
|
|
15
|
-
const optionsDefinitions = [
|
|
16
|
-
...commonOptions,
|
|
17
|
-
{
|
|
18
|
-
alias: "a",
|
|
19
|
-
help: "all options: fetch, unzip, repair-encoding, import",
|
|
20
|
-
name: "all",
|
|
21
|
-
type: Boolean,
|
|
22
|
-
},
|
|
23
|
-
{
|
|
24
|
-
alias: "c",
|
|
25
|
-
help: "create TypeScript interfaces from databases schemas into src/raw_types_* directories",
|
|
26
|
-
name: "schema",
|
|
27
|
-
type: Boolean,
|
|
28
|
-
},
|
|
29
|
-
{
|
|
30
|
-
alias: "e",
|
|
31
|
-
help: "repair Windows CP 1252 encoding of SQL dumps",
|
|
32
|
-
name: "repairEncoding",
|
|
33
|
-
type: Boolean,
|
|
34
|
-
},
|
|
35
|
-
{
|
|
36
|
-
alias: "f",
|
|
37
|
-
help: "fetch datasets instead of retrieving them from files",
|
|
38
|
-
name: "fetch",
|
|
39
|
-
type: Boolean,
|
|
40
|
-
},
|
|
41
|
-
{
|
|
42
|
-
alias: "i",
|
|
43
|
-
help: "import SQL dumps into a freshly (re-)created database",
|
|
44
|
-
name: "import",
|
|
45
|
-
type: Boolean,
|
|
46
|
-
},
|
|
47
|
-
{
|
|
48
|
-
alias: "S",
|
|
49
|
-
help: "sudo psql commands with given user",
|
|
50
|
-
name: "sudo",
|
|
51
|
-
type: String,
|
|
52
|
-
},
|
|
53
|
-
{
|
|
54
|
-
alias: "z",
|
|
55
|
-
help: "unzip SQL files",
|
|
56
|
-
name: "unzip",
|
|
57
|
-
type: Boolean,
|
|
58
|
-
},
|
|
59
|
-
];
|
|
60
|
-
const options = commandLineArgs(optionsDefinitions);
|
|
61
|
-
const streamPipeline = promisify(pipeline);
|
|
62
|
-
async function downloadFile(url, dest) {
|
|
63
|
-
const response = await fetch(url);
|
|
64
|
-
if (!response.ok) {
|
|
65
|
-
throw new Error(`Download failed ${response.status} ${response.statusText} for ${url}`);
|
|
66
|
-
}
|
|
67
|
-
await streamPipeline(response.body, fs.createWriteStream(dest));
|
|
68
|
-
}
|
|
69
|
-
/**
|
|
70
|
-
* Copy a dataset database to the main Senat database (overwriting its contents).
|
|
71
|
-
*/
|
|
72
|
-
async function copyToSenat(dataset, dataDir, options) {
|
|
73
|
-
if (!options["silent"]) {
|
|
74
|
-
console.log(`Copying ${dataset.database} to Senat database...`);
|
|
75
|
-
}
|
|
76
|
-
const sqlFilePath = path.join(dataDir, `${dataset.database}.sql`);
|
|
77
|
-
const schemaDumpFile = path.join(dataDir, `${dataset.database}_schema_dump.sql`);
|
|
78
|
-
// Write the header and then stream the rest of the SQL file
|
|
79
|
-
const schemaSqlWriter = fs.createWriteStream(schemaDumpFile, { encoding: "utf8" });
|
|
80
|
-
// Add CREATE SCHEMA statement at the top
|
|
81
|
-
schemaSqlWriter.write(`DROP SCHEMA IF EXISTS ${dataset.database} CASCADE;\n`);
|
|
82
|
-
schemaSqlWriter.write(`CREATE SCHEMA IF NOT EXISTS ${dataset.database};\n`);
|
|
83
|
-
schemaSqlWriter.write(`GRANT USAGE ON SCHEMA ${dataset.database} TO ${config.db.user};\n`);
|
|
84
|
-
schemaSqlWriter.write(`GRANT SELECT ON ALL TABLES IN SCHEMA ${dataset.database} TO ${config.db.user};\n`);
|
|
85
|
-
schemaSqlWriter.write(`ALTER DEFAULT PRIVILEGES IN SCHEMA ${dataset.database} GRANT SELECT ON TABLES TO ${config.db.user};\n`);
|
|
86
|
-
const lineReader = readline.createInterface({
|
|
87
|
-
input: fs.createReadStream(sqlFilePath, { encoding: "utf8" }),
|
|
88
|
-
crlfDelay: Infinity,
|
|
89
|
-
});
|
|
90
|
-
for await (const line of lineReader) {
|
|
91
|
-
let newLine = line;
|
|
92
|
-
// Replace 'public' schema outside single-quoted strings
|
|
93
|
-
function replacePublicOutsideStrings(line, schema) {
|
|
94
|
-
const parts = line.split(/(')/);
|
|
95
|
-
let inString = false;
|
|
96
|
-
for (let i = 0; i < parts.length; i++) {
|
|
97
|
-
if (parts[i] === "'") {
|
|
98
|
-
inString = !inString;
|
|
99
|
-
}
|
|
100
|
-
else if (!inString) {
|
|
101
|
-
// Only replace outside of strings, including before comma
|
|
102
|
-
parts[i] = parts[i].replace(/\bpublic\b(?=(\s*\.|\s*[,;]|\s|$))/g, schema);
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
return parts.join("");
|
|
106
|
-
}
|
|
107
|
-
newLine = replacePublicOutsideStrings(line, dataset.database);
|
|
108
|
-
// Replace SET client_encoding to UTF8
|
|
109
|
-
newLine = newLine.replace(/SET client_encoding = 'LATIN1';/i, "SET client_encoding = 'UTF8';");
|
|
110
|
-
schemaSqlWriter.write(newLine + "\n");
|
|
111
|
-
}
|
|
112
|
-
schemaSqlWriter.end();
|
|
113
|
-
await new Promise((resolve, reject) => {
|
|
114
|
-
schemaSqlWriter.on("finish", () => {
|
|
115
|
-
try {
|
|
116
|
-
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d senat -f ${schemaDumpFile}`, {
|
|
117
|
-
env: process.env,
|
|
118
|
-
encoding: "utf-8",
|
|
119
|
-
stdio: ["ignore", "pipe", "pipe"],
|
|
120
|
-
});
|
|
121
|
-
}
|
|
122
|
-
catch (error) {
|
|
123
|
-
if (!options["silent"]) {
|
|
124
|
-
console.error(`Failed to import ${dataset.database} schema:`);
|
|
125
|
-
if (error.stderr) {
|
|
126
|
-
console.error(error.stderr);
|
|
127
|
-
}
|
|
128
|
-
if (error.stdout) {
|
|
129
|
-
console.error(error.stdout);
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
resolve();
|
|
134
|
-
});
|
|
135
|
-
schemaSqlWriter.on("error", reject);
|
|
136
|
-
});
|
|
137
|
-
}
|
|
138
|
-
async function retrieveDataset(dataDir, dataset) {
|
|
139
|
-
const zipFilename = dataset.url.substring(dataset.url.lastIndexOf("/") + 1);
|
|
140
|
-
const zipFilePath = path.join(dataDir, zipFilename);
|
|
141
|
-
if (options["all"] || options["fetch"]) {
|
|
142
|
-
// Fetch & save ZIP file.
|
|
143
|
-
if (!options["silent"]) {
|
|
144
|
-
console.log(`Loading ${dataset.title}: ${zipFilename}…`);
|
|
145
|
-
}
|
|
146
|
-
// Fetch fails with OpenSSL error: dh key too small.
|
|
147
|
-
// (so does "curl").
|
|
148
|
-
// const response = await fetch(dataset.url)
|
|
149
|
-
// if (!response.ok) {
|
|
150
|
-
// console.error(response.status, response.statusText)
|
|
151
|
-
// console.error(await response.text())
|
|
152
|
-
// throw new Error(`Fetch failed: ${dataset.url}`)
|
|
153
|
-
// }
|
|
154
|
-
// await pipeline(response.body!, fs.createWriteStream(zipFilePath))
|
|
155
|
-
fs.removeSync(zipFilePath);
|
|
156
|
-
await downloadFile(dataset.url, zipFilePath);
|
|
157
|
-
}
|
|
158
|
-
const sqlFilename = `${dataset.database}.sql`;
|
|
159
|
-
const sqlFilePath = path.join(dataDir, sqlFilename);
|
|
160
|
-
if (options["all"] || options["unzip"]) {
|
|
161
|
-
if (!options["silent"]) {
|
|
162
|
-
console.log(`Unzipping ${dataset.title}: ${zipFilename}…`);
|
|
163
|
-
}
|
|
164
|
-
fs.removeSync(sqlFilePath);
|
|
165
|
-
const zip = new StreamZip({
|
|
166
|
-
file: zipFilePath,
|
|
167
|
-
storeEntries: true,
|
|
168
|
-
});
|
|
169
|
-
await new Promise((resolve, reject) => {
|
|
170
|
-
zip.on("ready", () => {
|
|
171
|
-
zip.extract(null, dataDir, (err, _count) => {
|
|
172
|
-
zip.close();
|
|
173
|
-
if (err) {
|
|
174
|
-
reject(err);
|
|
175
|
-
}
|
|
176
|
-
else {
|
|
177
|
-
resolve(null);
|
|
178
|
-
}
|
|
179
|
-
});
|
|
180
|
-
});
|
|
181
|
-
});
|
|
182
|
-
if (dataset.repairZip !== undefined) {
|
|
183
|
-
if (!options["silent"]) {
|
|
184
|
-
console.log(`Repairing Zip path ${dataset.title}: ${sqlFilename}…`);
|
|
185
|
-
}
|
|
186
|
-
dataset.repairZip(dataset, dataDir);
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
if ((options["all"] || options["repairEncoding"]) && dataset.repairEncoding) {
|
|
190
|
-
if (!options["silent"]) {
|
|
191
|
-
console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}…`);
|
|
192
|
-
}
|
|
193
|
-
const repairedSqlFilePath = sqlFilePath + ".repaired";
|
|
194
|
-
const repairedSqlWriter = fs.createWriteStream(repairedSqlFilePath, {
|
|
195
|
-
encoding: "utf8",
|
|
196
|
-
});
|
|
197
|
-
// Read the file as latin1 (ISO-8859-1/CP1252) and write as UTF-8
|
|
198
|
-
const lineReader = readline.createInterface({
|
|
199
|
-
input: fs.createReadStream(sqlFilePath, { encoding: "latin1" }),
|
|
200
|
-
crlfDelay: Infinity,
|
|
201
|
-
});
|
|
202
|
-
for await (const line of lineReader) {
|
|
203
|
-
// Optionally repair Windows-1252 control characters
|
|
204
|
-
let repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
|
|
205
|
-
repairedSqlWriter.write(repairedLine + "\n");
|
|
206
|
-
}
|
|
207
|
-
repairedSqlWriter.end();
|
|
208
|
-
await fs.move(repairedSqlFilePath, sqlFilePath, { overwrite: true });
|
|
209
|
-
}
|
|
210
|
-
if (options["all"] || options["import"] || options["schema"]) {
|
|
211
|
-
if (!options["silent"]) {
|
|
212
|
-
console.log(`Importing ${dataset.title}: ${sqlFilename}…`);
|
|
213
|
-
}
|
|
214
|
-
await copyToSenat(dataset, dataDir, options);
|
|
215
|
-
// Create indexes programmatically after import
|
|
216
|
-
if (dataset.indexes) {
|
|
217
|
-
for (const [table, indexes] of Object.entries(dataset.indexes)) {
|
|
218
|
-
for (const index of indexes) {
|
|
219
|
-
const indexName = index.name;
|
|
220
|
-
const columns = index.columns.join(", ");
|
|
221
|
-
const schema = dataset.database;
|
|
222
|
-
const sql = `CREATE INDEX IF NOT EXISTS ${indexName} ON ${schema}.${table} (${columns});`;
|
|
223
|
-
try {
|
|
224
|
-
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d senat -c "${sql}"`, {
|
|
225
|
-
env: process.env,
|
|
226
|
-
encoding: "utf-8",
|
|
227
|
-
stdio: ["ignore", "ignore", "pipe"],
|
|
228
|
-
});
|
|
229
|
-
if (!options["silent"]) {
|
|
230
|
-
console.log(`Created index: ${indexName} on ${schema}.${table} (${columns})`);
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
catch (err) {
|
|
234
|
-
console.error(`Failed to create index ${indexName} on ${schema}.${table}:`, err);
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
if (options["schema"]) {
|
|
241
|
-
let definitionsDir = path.resolve("src", "raw_types_schemats");
|
|
242
|
-
assert(fs.statSync(definitionsDir).isDirectory());
|
|
243
|
-
if (!options["silent"]) {
|
|
244
|
-
console.log(`Creating TypeScript definitions from schema '${dataset.database}' in database 'senat'…`);
|
|
245
|
-
}
|
|
246
|
-
const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}/senat`;
|
|
247
|
-
let definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
|
|
248
|
-
execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.database} -o ${definitionFilePath}`, {
|
|
249
|
-
// cwd: dataDir,
|
|
250
|
-
env: process.env,
|
|
251
|
-
encoding: "utf-8",
|
|
252
|
-
// stdio: ["ignore", "ignore", "pipe"],
|
|
253
|
-
});
|
|
254
|
-
const definition = fs.readFileSync(definitionFilePath, { encoding: "utf8" });
|
|
255
|
-
const definitionRepaired = definition
|
|
256
|
-
.replace(/\r\n/g, "\n")
|
|
257
|
-
.replace(/AUTO-GENERATED FILE @ \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}/, "AUTO-GENERATED FILE");
|
|
258
|
-
fs.writeFileSync(definitionFilePath, definitionRepaired);
|
|
259
|
-
definitionsDir = path.resolve("src", "raw_types");
|
|
260
|
-
definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
|
|
261
|
-
execSync(`npx kysely-codegen --url '${dbConnectionString}' --default-schema ${dataset.database} --include-pattern '${dataset.database}.*' --out-file ${definitionFilePath}`, {
|
|
262
|
-
env: process.env,
|
|
263
|
-
encoding: "utf-8",
|
|
264
|
-
// stdio: ["ignore", "ignore", "pipe"],
|
|
265
|
-
});
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
async function retrieveOpenData() {
|
|
269
|
-
const dataDir = options["dataDir"];
|
|
270
|
-
assert(dataDir, "Missing argument: data directory");
|
|
271
|
-
process.env = {
|
|
272
|
-
...process.env,
|
|
273
|
-
PGHOST: process.env["PGHOST"] || config.db.host,
|
|
274
|
-
PGPORT: process.env["PGPORT"] || String(config.db.port),
|
|
275
|
-
PGUSER: process.env["PGUSER"] || config.db.user,
|
|
276
|
-
PGPASSWORD: process.env["PGPASSWORD"] || config.db.password,
|
|
277
|
-
};
|
|
278
|
-
assert(process.env["PGHOST"] && process.env["PGPORT"] && process.env["PGUSER"] && process.env["PGPASSWORD"], "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
|
|
279
|
-
console.time("data extraction time");
|
|
280
|
-
// Create role 'opendata' if it does not exist
|
|
281
|
-
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE ROLE opendata" || true`, {
|
|
282
|
-
cwd: dataDir,
|
|
283
|
-
env: process.env,
|
|
284
|
-
encoding: "utf-8",
|
|
285
|
-
});
|
|
286
|
-
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE DATABASE senat WITH OWNER opendata" || true`, {
|
|
287
|
-
cwd: dataDir,
|
|
288
|
-
env: process.env,
|
|
289
|
-
encoding: "utf-8",
|
|
290
|
-
});
|
|
291
|
-
const enabledDatasets = getEnabledDatasets(options["categories"]);
|
|
292
|
-
const chosenDatasets = getChosenDatasets(enabledDatasets);
|
|
293
|
-
for (const dataset of chosenDatasets) {
|
|
294
|
-
await retrieveDataset(dataDir, dataset);
|
|
295
|
-
}
|
|
296
|
-
if (options["schema"]) {
|
|
297
|
-
const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}/senat`;
|
|
298
|
-
const definitionsDir = path.resolve("src", "raw_types");
|
|
299
|
-
const definitionFilePath = path.join(definitionsDir, `senat.ts`);
|
|
300
|
-
execSync(`npx kysely-codegen --url '${dbConnectionString}' --out-file ${definitionFilePath}`, {
|
|
301
|
-
env: process.env,
|
|
302
|
-
encoding: "utf-8",
|
|
303
|
-
// stdio: ["ignore", "ignore", "pipe"],
|
|
304
|
-
});
|
|
305
|
-
}
|
|
306
|
-
if (!options["silent"]) {
|
|
307
|
-
console.timeEnd("data extraction time");
|
|
308
|
-
}
|
|
309
|
-
}
|
|
310
|
-
retrieveOpenData()
|
|
311
|
-
.then(() => process.exit(0))
|
|
312
|
-
.catch((error) => {
|
|
313
|
-
console.log(error);
|
|
314
|
-
process.exit(1);
|
|
315
|
-
});
|
|
@@ -1,217 +0,0 @@
|
|
|
1
|
-
import assert from "assert";
|
|
2
|
-
import { execSync } from "child_process";
|
|
3
|
-
import commandLineArgs from "command-line-args";
|
|
4
|
-
import fs from "fs-extra";
|
|
5
|
-
// import fetch from "node-fetch"
|
|
6
|
-
import path from "path";
|
|
7
|
-
// import stream from "stream"
|
|
8
|
-
import StreamZip from "node-stream-zip";
|
|
9
|
-
import readline from "readline";
|
|
10
|
-
// import util from "util"
|
|
11
|
-
import windows1252 from "windows-1252";
|
|
12
|
-
import config from "../config";
|
|
13
|
-
import { datasets, getChosenFromEnabledDatasets, } from "../datasets";
|
|
14
|
-
import { commonOptions } from "./shared/cli_helpers";
|
|
15
|
-
const badWindows1252CharacterRegex = /[\u0080-\u009f]/g;
|
|
16
|
-
const optionsDefinitions = [
|
|
17
|
-
...commonOptions,
|
|
18
|
-
{
|
|
19
|
-
alias: "a",
|
|
20
|
-
help: "all options: fetch, unzip, repair-encoding, import",
|
|
21
|
-
name: "all",
|
|
22
|
-
type: Boolean,
|
|
23
|
-
},
|
|
24
|
-
{
|
|
25
|
-
alias: "f",
|
|
26
|
-
help: "fetch datasets instead of retrieving them from files",
|
|
27
|
-
name: "fetch",
|
|
28
|
-
type: Boolean,
|
|
29
|
-
},
|
|
30
|
-
{
|
|
31
|
-
alias: "z",
|
|
32
|
-
help: "unzip SQL files",
|
|
33
|
-
name: "unzip",
|
|
34
|
-
type: Boolean,
|
|
35
|
-
},
|
|
36
|
-
{
|
|
37
|
-
alias: "e",
|
|
38
|
-
help: "repair Windows CP 1252 encoding of SQL dumps",
|
|
39
|
-
name: "repairEncoding",
|
|
40
|
-
type: Boolean,
|
|
41
|
-
},
|
|
42
|
-
{
|
|
43
|
-
alias: "i",
|
|
44
|
-
help: "import SQL dumps into a freshly (re-)created database",
|
|
45
|
-
name: "import",
|
|
46
|
-
type: Boolean,
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
alias: "d",
|
|
50
|
-
help: "repair database (update schema and types)",
|
|
51
|
-
name: "repairDatabase",
|
|
52
|
-
type: Boolean,
|
|
53
|
-
},
|
|
54
|
-
{
|
|
55
|
-
alias: "c",
|
|
56
|
-
help: "create TypeScript interfaces from databases schemas into src/raw_types_* directories",
|
|
57
|
-
name: "schema",
|
|
58
|
-
type: Boolean,
|
|
59
|
-
},
|
|
60
|
-
];
|
|
61
|
-
const options = commandLineArgs(optionsDefinitions);
|
|
62
|
-
// const pipeline = util.promisify(stream.pipeline)
|
|
63
|
-
async function retrieveDataset(dataDir, dataset) {
|
|
64
|
-
const zipFilename = dataset.url.substring(dataset.url.lastIndexOf("/") + 1);
|
|
65
|
-
const zipFilePath = path.join(dataDir, zipFilename);
|
|
66
|
-
if (options.all || options.fetch) {
|
|
67
|
-
// Fetch & save ZIP file.
|
|
68
|
-
if (!options.silent) {
|
|
69
|
-
console.log(`Loading ${dataset.title}: ${zipFilename}…`);
|
|
70
|
-
}
|
|
71
|
-
// Fetch fails with OpenSSL error: dh key too small.
|
|
72
|
-
// (so does "curl").
|
|
73
|
-
// const response = await fetch(dataset.url)
|
|
74
|
-
// if (!response.ok) {
|
|
75
|
-
// console.error(response.status, response.statusText)
|
|
76
|
-
// console.error(await response.text())
|
|
77
|
-
// throw new Error(`Fetch failed: ${dataset.url}`)
|
|
78
|
-
// }
|
|
79
|
-
// await pipeline(response.body!, fs.createWriteStream(zipFilePath))
|
|
80
|
-
fs.removeSync(zipFilePath);
|
|
81
|
-
execSync(`wget --quiet ${dataset.url}`, {
|
|
82
|
-
cwd: dataDir,
|
|
83
|
-
env: process.env,
|
|
84
|
-
encoding: "utf-8",
|
|
85
|
-
// stdio: ["ignore", "ignore", "pipe"],
|
|
86
|
-
});
|
|
87
|
-
}
|
|
88
|
-
const sqlFilename = `${dataset.database}.sql`;
|
|
89
|
-
const sqlFilePath = path.join(dataDir, sqlFilename);
|
|
90
|
-
if (options.all || options.unzip) {
|
|
91
|
-
if (!options.silent) {
|
|
92
|
-
console.log(`Unzipping ${dataset.title}: ${zipFilename}…`);
|
|
93
|
-
}
|
|
94
|
-
fs.removeSync(sqlFilePath);
|
|
95
|
-
const zip = new StreamZip({
|
|
96
|
-
file: zipFilePath,
|
|
97
|
-
storeEntries: true,
|
|
98
|
-
});
|
|
99
|
-
await new Promise((resolve, reject) => {
|
|
100
|
-
zip.on("ready", () => {
|
|
101
|
-
zip.extract(null, dataDir, (err, _count) => {
|
|
102
|
-
zip.close();
|
|
103
|
-
if (err) {
|
|
104
|
-
reject(err);
|
|
105
|
-
}
|
|
106
|
-
else {
|
|
107
|
-
resolve(null);
|
|
108
|
-
}
|
|
109
|
-
});
|
|
110
|
-
});
|
|
111
|
-
});
|
|
112
|
-
if (dataset.repairZip !== undefined) {
|
|
113
|
-
if (!options.silent) {
|
|
114
|
-
console.log(`Repairing Zip path ${dataset.title}: ${sqlFilename}…`);
|
|
115
|
-
}
|
|
116
|
-
dataset.repairZip(dataset, dataDir);
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
if ((options.all || options.repairEncoding) && dataset.repairEncoding) {
|
|
120
|
-
if (!options.silent) {
|
|
121
|
-
console.log(`Repairing Windows CP1252 encoding of ${dataset.title}: ${sqlFilename}…`);
|
|
122
|
-
}
|
|
123
|
-
const repairedSqlFilePath = sqlFilePath + ".repaired";
|
|
124
|
-
const repairedSqlWriter = fs.createWriteStream(repairedSqlFilePath, {
|
|
125
|
-
encoding: "utf8",
|
|
126
|
-
});
|
|
127
|
-
const lineReader = readline.createInterface({
|
|
128
|
-
input: fs.createReadStream(sqlFilePath, { encoding: "utf8" }),
|
|
129
|
-
crlfDelay: Infinity,
|
|
130
|
-
});
|
|
131
|
-
for await (const line of lineReader) {
|
|
132
|
-
repairedSqlWriter.write(line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" })) + "\n");
|
|
133
|
-
}
|
|
134
|
-
repairedSqlWriter.end();
|
|
135
|
-
await fs.move(repairedSqlFilePath, sqlFilePath, { overwrite: true });
|
|
136
|
-
}
|
|
137
|
-
if (options.all || options.import) {
|
|
138
|
-
if (!options.silent) {
|
|
139
|
-
console.log(`Importing ${dataset.title}: ${sqlFilename}…`);
|
|
140
|
-
}
|
|
141
|
-
execSync(`psql --quiet -d ${dataset.database} -f ${sqlFilename}`, {
|
|
142
|
-
cwd: dataDir,
|
|
143
|
-
env: process.env,
|
|
144
|
-
encoding: "utf-8",
|
|
145
|
-
stdio: ["pipe", "ignore", "ignore"],
|
|
146
|
-
});
|
|
147
|
-
}
|
|
148
|
-
if (options.schema) {
|
|
149
|
-
let definitionsDir = path.resolve("src", "raw_types_schemats");
|
|
150
|
-
assert(fs.statSync(definitionsDir).isDirectory());
|
|
151
|
-
if (!options.silent) {
|
|
152
|
-
console.log(`Creating TypeScript definitions from schema of database ${dataset.database}…`);
|
|
153
|
-
}
|
|
154
|
-
const dbConnectionString = `postgres://${process.env.PGUSER}:${process.env.PGPASSWORD}@${process.env.PGHOST}:${process.env.PGPORT}/${dataset.database}`;
|
|
155
|
-
let definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
|
|
156
|
-
execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.schema} -o ${definitionFilePath}`, {
|
|
157
|
-
// cwd: dataDir,
|
|
158
|
-
env: process.env,
|
|
159
|
-
encoding: "utf-8",
|
|
160
|
-
// stdio: ["ignore", "ignore", "pipe"],
|
|
161
|
-
});
|
|
162
|
-
const definition = fs.readFileSync(definitionFilePath, { encoding: "utf8" });
|
|
163
|
-
const definitionRepaired = definition
|
|
164
|
-
.replace(/\r\n/g, "\n")
|
|
165
|
-
.replace(/AUTO-GENERATED FILE @ \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}/, "AUTO-GENERATED FILE");
|
|
166
|
-
fs.writeFileSync(definitionFilePath, definitionRepaired);
|
|
167
|
-
definitionsDir = path.resolve("src", "raw_types_kysely");
|
|
168
|
-
definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
|
|
169
|
-
execSync(`kysely-codegen --url ${dbConnectionString} --schema=${dataset.schema} --out-file=${definitionFilePath}`, {
|
|
170
|
-
// cwd: dataDir,
|
|
171
|
-
env: process.env,
|
|
172
|
-
encoding: "utf-8",
|
|
173
|
-
// stdio: ["ignore", "ignore", "pipe"],
|
|
174
|
-
});
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
async function retrieveOpenData() {
|
|
178
|
-
const dataDir = options.dataDir;
|
|
179
|
-
assert(dataDir, "Missing argument: data directory");
|
|
180
|
-
process.env = {
|
|
181
|
-
...process.env,
|
|
182
|
-
PGHOST: process.env.PGHOST || config.db.host,
|
|
183
|
-
PGPORT: process.env.PGPORT || config.db.port,
|
|
184
|
-
PGUSER: process.env.PGUSER || config.db.user,
|
|
185
|
-
PGPASSWORD: process.env.PGPASSWORD || config.db.password,
|
|
186
|
-
};
|
|
187
|
-
assert(process.env.PGHOST
|
|
188
|
-
&& process.env.PGPORT
|
|
189
|
-
&& process.env.PGUSER
|
|
190
|
-
&& process.env.PGPASSWORD, "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
|
|
191
|
-
console.time("data extraction time");
|
|
192
|
-
for (const [, dataset] of Object.entries(datasets)) {
|
|
193
|
-
execSync(`psql --quiet -c "DROP DATABASE IF EXISTS ${dataset.database}"`, {
|
|
194
|
-
cwd: dataDir,
|
|
195
|
-
env: process.env,
|
|
196
|
-
encoding: "utf-8",
|
|
197
|
-
});
|
|
198
|
-
execSync(`psql --quiet -c "CREATE DATABASE ${dataset.database} WITH OWNER opendata"`, {
|
|
199
|
-
cwd: dataDir,
|
|
200
|
-
env: process.env,
|
|
201
|
-
encoding: "utf-8",
|
|
202
|
-
});
|
|
203
|
-
}
|
|
204
|
-
const choosenDatasets = getChosenFromEnabledDatasets(options.categories);
|
|
205
|
-
for (const dataset of choosenDatasets) {
|
|
206
|
-
await retrieveDataset(dataDir, dataset);
|
|
207
|
-
}
|
|
208
|
-
if (!options.silent) {
|
|
209
|
-
console.timeEnd("data extraction time");
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
retrieveOpenData()
|
|
213
|
-
.then(() => process.exit(0))
|
|
214
|
-
.catch((error) => {
|
|
215
|
-
console.log(error);
|
|
216
|
-
process.exit(1);
|
|
217
|
-
});
|