@tricoteuses/senat 2.9.9 → 2.9.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/databases.d.ts +1 -0
- package/lib/databases.js +21 -22
- package/lib/datasets.d.ts +0 -1
- package/lib/datasets.js +1 -6
- package/lib/scripts/retrieve_open_data.js +77 -25
- package/package.json +2 -1
package/lib/databases.d.ts
CHANGED
|
@@ -19,6 +19,7 @@ type QuestionsDb = {
|
|
|
19
19
|
type SensDb = {
|
|
20
20
|
[K in keyof SensTableTypes]: SensTableTypes[K]["select"];
|
|
21
21
|
};
|
|
22
|
+
export declare const dbSenat: Kysely<unknown>;
|
|
22
23
|
export declare const dbAmeli: Kysely<AmeliDb>;
|
|
23
24
|
export declare const dbDebats: Kysely<DebatsDb>;
|
|
24
25
|
export declare const dbDosleg: Kysely<DoslegDb>;
|
package/lib/databases.js
CHANGED
|
@@ -9,25 +9,24 @@ import { datasets } from "./datasets";
|
|
|
9
9
|
pg.types.setTypeParser(types.builtins.INT8, (val) => {
|
|
10
10
|
return parseInt(val, 10);
|
|
11
11
|
});
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
}
|
|
12
|
+
const senatPool = new pg.Pool({
|
|
13
|
+
database: "senat",
|
|
14
|
+
host: config.db.host,
|
|
15
|
+
user: config.db.user,
|
|
16
|
+
password: config.db.password,
|
|
17
|
+
port: config.db.port,
|
|
18
|
+
max: 10,
|
|
19
|
+
});
|
|
20
|
+
const senatDialect = new PostgresDialect({
|
|
21
|
+
pool: senatPool,
|
|
22
|
+
cursor: Cursor,
|
|
23
|
+
});
|
|
24
|
+
export const dbSenat = new Kysely({
|
|
25
|
+
log: ["error"],
|
|
26
|
+
dialect: senatDialect,
|
|
27
|
+
});
|
|
28
|
+
export const dbAmeli = dbSenat.withSchema(datasets.ameli.database);
|
|
29
|
+
export const dbDebats = dbSenat.withSchema(datasets.debats.database);
|
|
30
|
+
export const dbDosleg = dbSenat.withSchema(datasets.dosleg.database);
|
|
31
|
+
export const dbQuestions = dbSenat.withSchema(datasets.questions.database);
|
|
32
|
+
export const dbSens = dbSenat.withSchema(datasets.sens.database);
|
package/lib/datasets.d.ts
CHANGED
package/lib/datasets.js
CHANGED
|
@@ -16,35 +16,31 @@ export var EnabledDatasets;
|
|
|
16
16
|
export const datasets = {
|
|
17
17
|
ameli: {
|
|
18
18
|
database: "ameli",
|
|
19
|
-
repairEncoding:
|
|
19
|
+
repairEncoding: true,
|
|
20
20
|
repairZip: (dataset, dataDir) => {
|
|
21
21
|
const sqlFilename = `${dataset.database}.sql`;
|
|
22
22
|
const sqlFilePath = path.join(dataDir, sqlFilename);
|
|
23
23
|
fs.removeSync(sqlFilePath);
|
|
24
24
|
fs.moveSync(path.join(dataDir, "var", "opt", "opendata", sqlFilename), sqlFilePath);
|
|
25
25
|
},
|
|
26
|
-
schema: "public",
|
|
27
26
|
title: "Amendements",
|
|
28
27
|
url: "https://data.senat.fr/data/ameli/ameli.zip",
|
|
29
28
|
},
|
|
30
29
|
debats: {
|
|
31
30
|
database: "debats",
|
|
32
31
|
repairEncoding: true,
|
|
33
|
-
schema: "public",
|
|
34
32
|
title: "Informations relatives aux comptes rendus intégraux de la séance publique",
|
|
35
33
|
url: "https://data.senat.fr/data/debats/debats.zip",
|
|
36
34
|
},
|
|
37
35
|
dosleg: {
|
|
38
36
|
database: "dosleg",
|
|
39
37
|
repairEncoding: true,
|
|
40
|
-
schema: "public",
|
|
41
38
|
title: "Dossiers législatifs",
|
|
42
39
|
url: "https://data.senat.fr/data/dosleg/dosleg.zip",
|
|
43
40
|
},
|
|
44
41
|
questions: {
|
|
45
42
|
database: "questions",
|
|
46
43
|
repairEncoding: true,
|
|
47
|
-
schema: "questions",
|
|
48
44
|
title: "Questions écrites et orales posées par les sénateurs au Gouvernement",
|
|
49
45
|
url: "https://data.senat.fr/data/questions/questions.zip",
|
|
50
46
|
},
|
|
@@ -57,7 +53,6 @@ export const datasets = {
|
|
|
57
53
|
fs.removeSync(sqlFilePath);
|
|
58
54
|
fs.moveSync(path.join(dataDir, "export_sens.sql"), sqlFilePath);
|
|
59
55
|
},
|
|
60
|
-
schema: "public",
|
|
61
56
|
title: "Sénateurs (y compris organes et présence)",
|
|
62
57
|
url: "https://data.senat.fr/data/senateurs/export_sens.zip",
|
|
63
58
|
},
|
|
@@ -8,9 +8,10 @@ import StreamZip from "node-stream-zip";
|
|
|
8
8
|
import readline from "readline";
|
|
9
9
|
import windows1252 from "windows-1252";
|
|
10
10
|
import { pipeline } from "stream";
|
|
11
|
+
import iconv from "iconv-lite";
|
|
11
12
|
import { promisify } from "util";
|
|
12
13
|
import config from "../config";
|
|
13
|
-
import {
|
|
14
|
+
import { getChosenDatasets, getEnabledDatasets } from "../datasets";
|
|
14
15
|
import { commonOptions } from "./shared/cli_helpers";
|
|
15
16
|
const badWindows1252CharacterRegex = /[\u0080-\u009f]/g;
|
|
16
17
|
const optionsDefinitions = [
|
|
@@ -67,6 +68,62 @@ async function downloadFile(url, dest) {
|
|
|
67
68
|
}
|
|
68
69
|
await streamPipeline(response.body, fs.createWriteStream(dest));
|
|
69
70
|
}
|
|
71
|
+
/**
|
|
72
|
+
* Copy a dataset database to the main Senat database (overwriting its contents).
|
|
73
|
+
*/
|
|
74
|
+
async function copyToSenat(dataset, dataDir, options) {
|
|
75
|
+
if (!options["silent"]) {
|
|
76
|
+
console.log(`Copying ${dataset.database} to Senat database...`);
|
|
77
|
+
}
|
|
78
|
+
const sqlFilePath = path.join(dataDir, `${dataset.database}.sql`);
|
|
79
|
+
const schemaDumpFile = path.join(dataDir, `${dataset.database}_schema_dump.sql`);
|
|
80
|
+
// Write the header and then stream the rest of the SQL file
|
|
81
|
+
const schemaSqlWriter = fs.createWriteStream(schemaDumpFile, { encoding: "utf8" });
|
|
82
|
+
const lineReader = readline.createInterface({
|
|
83
|
+
input: fs.createReadStream(sqlFilePath, { encoding: "utf8" }),
|
|
84
|
+
crlfDelay: Infinity,
|
|
85
|
+
});
|
|
86
|
+
for await (const line of lineReader) {
|
|
87
|
+
let newLine = line;
|
|
88
|
+
// Replace 'public' schema outside single-quoted strings
|
|
89
|
+
function replacePublicOutsideStrings(line, schema) {
|
|
90
|
+
const parts = line.split(/(')/);
|
|
91
|
+
let inString = false;
|
|
92
|
+
for (let i = 0; i < parts.length; i++) {
|
|
93
|
+
if (parts[i] === "'") {
|
|
94
|
+
inString = !inString;
|
|
95
|
+
}
|
|
96
|
+
else if (!inString) {
|
|
97
|
+
// Only replace outside of strings, including before comma
|
|
98
|
+
parts[i] = parts[i].replace(/\bpublic\b(?=(\s*\.|\s*[,;]|\s|$))/g, schema);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return parts.join('');
|
|
102
|
+
}
|
|
103
|
+
newLine = replacePublicOutsideStrings(line, dataset.database);
|
|
104
|
+
// Convert to LATIN1, replacing unconvertible characters with '?'
|
|
105
|
+
const latin1Line = iconv.encode(newLine, 'latin1').toString('latin1');
|
|
106
|
+
schemaSqlWriter.write(latin1Line + "\n");
|
|
107
|
+
}
|
|
108
|
+
schemaSqlWriter.end();
|
|
109
|
+
await new Promise((resolve, reject) => {
|
|
110
|
+
schemaSqlWriter.on("finish", () => {
|
|
111
|
+
try {
|
|
112
|
+
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d senat -f ${schemaDumpFile}`, {
|
|
113
|
+
cwd: dataDir,
|
|
114
|
+
env: process.env,
|
|
115
|
+
encoding: "utf-8",
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
finally {
|
|
119
|
+
try { }
|
|
120
|
+
catch { }
|
|
121
|
+
}
|
|
122
|
+
resolve();
|
|
123
|
+
});
|
|
124
|
+
schemaSqlWriter.on("error", reject);
|
|
125
|
+
});
|
|
126
|
+
}
|
|
70
127
|
async function retrieveDataset(dataDir, dataset) {
|
|
71
128
|
const zipFilename = dataset.url.substring(dataset.url.lastIndexOf("/") + 1);
|
|
72
129
|
const zipFilePath = path.join(dataDir, zipFilename);
|
|
@@ -120,7 +177,7 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
120
177
|
}
|
|
121
178
|
if ((options["all"] || options["repairEncoding"]) && dataset.repairEncoding) {
|
|
122
179
|
if (!options["silent"]) {
|
|
123
|
-
console.log(`Repairing Windows CP1252 encoding
|
|
180
|
+
console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}…`);
|
|
124
181
|
}
|
|
125
182
|
const repairedSqlFilePath = sqlFilePath + ".repaired";
|
|
126
183
|
const repairedSqlWriter = fs.createWriteStream(repairedSqlFilePath, {
|
|
@@ -131,7 +188,9 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
131
188
|
crlfDelay: Infinity,
|
|
132
189
|
});
|
|
133
190
|
for await (const line of lineReader) {
|
|
134
|
-
|
|
191
|
+
// Only repair encoding
|
|
192
|
+
let repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
|
|
193
|
+
repairedSqlWriter.write(repairedLine + "\n");
|
|
135
194
|
}
|
|
136
195
|
repairedSqlWriter.end();
|
|
137
196
|
await fs.move(repairedSqlFilePath, sqlFilePath, { overwrite: true });
|
|
@@ -140,22 +199,17 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
140
199
|
if (!options["silent"]) {
|
|
141
200
|
console.log(`Importing ${dataset.title}: ${sqlFilename}…`);
|
|
142
201
|
}
|
|
143
|
-
|
|
144
|
-
cwd: dataDir,
|
|
145
|
-
env: process.env,
|
|
146
|
-
encoding: "utf-8",
|
|
147
|
-
stdio: ["pipe", "ignore", "ignore"],
|
|
148
|
-
});
|
|
202
|
+
await copyToSenat(dataset, dataDir, options);
|
|
149
203
|
}
|
|
150
204
|
if (options["schema"]) {
|
|
151
205
|
let definitionsDir = path.resolve("src", "raw_types_schemats");
|
|
152
206
|
assert(fs.statSync(definitionsDir).isDirectory());
|
|
153
207
|
if (!options["silent"]) {
|
|
154
|
-
console.log(`Creating TypeScript definitions from schema
|
|
208
|
+
console.log(`Creating TypeScript definitions from schema '${dataset.database}' in database 'senat'…`);
|
|
155
209
|
}
|
|
156
|
-
const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}
|
|
210
|
+
const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}/senat`;
|
|
157
211
|
let definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
|
|
158
|
-
execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.
|
|
212
|
+
execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.database} -o ${definitionFilePath}`, {
|
|
159
213
|
// cwd: dataDir,
|
|
160
214
|
env: process.env,
|
|
161
215
|
encoding: "utf-8",
|
|
@@ -168,7 +222,7 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
168
222
|
fs.writeFileSync(definitionFilePath, definitionRepaired);
|
|
169
223
|
definitionsDir = path.resolve("src", "raw_types");
|
|
170
224
|
definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
|
|
171
|
-
execSync(`npx pg-to-ts generate -c '${dbConnectionString}' -s ${dataset.
|
|
225
|
+
execSync(`npx pg-to-ts generate -c '${dbConnectionString}' -s ${dataset.database} -o ${definitionFilePath}`, {
|
|
172
226
|
env: process.env,
|
|
173
227
|
encoding: "utf-8",
|
|
174
228
|
// stdio: ["ignore", "ignore", "pipe"],
|
|
@@ -190,18 +244,16 @@ async function retrieveOpenData() {
|
|
|
190
244
|
process.env["PGUSER"] &&
|
|
191
245
|
process.env["PGPASSWORD"], "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
|
|
192
246
|
console.time("data extraction time");
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
});
|
|
204
|
-
}
|
|
247
|
+
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "DROP DATABASE IF EXISTS senat"`, {
|
|
248
|
+
cwd: dataDir,
|
|
249
|
+
env: process.env,
|
|
250
|
+
encoding: "utf-8",
|
|
251
|
+
});
|
|
252
|
+
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE DATABASE senat WITH OWNER opendata"`, {
|
|
253
|
+
cwd: dataDir,
|
|
254
|
+
env: process.env,
|
|
255
|
+
encoding: "utf-8",
|
|
256
|
+
});
|
|
205
257
|
const enabledDatasets = getEnabledDatasets(options["categories"]);
|
|
206
258
|
const chosenDatasets = getChosenDatasets(enabledDatasets);
|
|
207
259
|
for (const dataset of chosenDatasets) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tricoteuses/senat",
|
|
3
|
-
"version": "2.9.
|
|
3
|
+
"version": "2.9.10",
|
|
4
4
|
"description": "Handle French Sénat's open data",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"France",
|
|
@@ -89,6 +89,7 @@
|
|
|
89
89
|
"@typescript-eslint/parser": "^8.13.0",
|
|
90
90
|
"cross-env": "^10.0.0",
|
|
91
91
|
"eslint": "^8.57.1",
|
|
92
|
+
"iconv-lite": "^0.7.0",
|
|
92
93
|
"pg-to-ts": "^4.1.1",
|
|
93
94
|
"prettier": "^3.5.3",
|
|
94
95
|
"tslib": "^2.1.0",
|