@tricoteuses/senat 2.9.9 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/databases.d.ts +1 -0
- package/lib/databases.js +21 -22
- package/lib/datasets.d.ts +0 -1
- package/lib/datasets.js +1 -6
- package/lib/scripts/retrieve_open_data.js +79 -25
- package/package.json +2 -1
package/lib/databases.d.ts
CHANGED
|
@@ -19,6 +19,7 @@ type QuestionsDb = {
|
|
|
19
19
|
type SensDb = {
|
|
20
20
|
[K in keyof SensTableTypes]: SensTableTypes[K]["select"];
|
|
21
21
|
};
|
|
22
|
+
export declare const dbSenat: Kysely<unknown>;
|
|
22
23
|
export declare const dbAmeli: Kysely<AmeliDb>;
|
|
23
24
|
export declare const dbDebats: Kysely<DebatsDb>;
|
|
24
25
|
export declare const dbDosleg: Kysely<DoslegDb>;
|
package/lib/databases.js
CHANGED
|
@@ -9,25 +9,24 @@ import { datasets } from "./datasets";
|
|
|
9
9
|
pg.types.setTypeParser(types.builtins.INT8, (val) => {
|
|
10
10
|
return parseInt(val, 10);
|
|
11
11
|
});
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
}
|
|
12
|
+
const senatPool = new pg.Pool({
|
|
13
|
+
database: "senat",
|
|
14
|
+
host: config.db.host,
|
|
15
|
+
user: config.db.user,
|
|
16
|
+
password: config.db.password,
|
|
17
|
+
port: config.db.port,
|
|
18
|
+
max: 10,
|
|
19
|
+
});
|
|
20
|
+
const senatDialect = new PostgresDialect({
|
|
21
|
+
pool: senatPool,
|
|
22
|
+
cursor: Cursor,
|
|
23
|
+
});
|
|
24
|
+
export const dbSenat = new Kysely({
|
|
25
|
+
log: ["error"],
|
|
26
|
+
dialect: senatDialect,
|
|
27
|
+
});
|
|
28
|
+
export const dbAmeli = dbSenat.withSchema(datasets.ameli.database);
|
|
29
|
+
export const dbDebats = dbSenat.withSchema(datasets.debats.database);
|
|
30
|
+
export const dbDosleg = dbSenat.withSchema(datasets.dosleg.database);
|
|
31
|
+
export const dbQuestions = dbSenat.withSchema(datasets.questions.database);
|
|
32
|
+
export const dbSens = dbSenat.withSchema(datasets.sens.database);
|
package/lib/datasets.d.ts
CHANGED
package/lib/datasets.js
CHANGED
|
@@ -16,35 +16,31 @@ export var EnabledDatasets;
|
|
|
16
16
|
export const datasets = {
|
|
17
17
|
ameli: {
|
|
18
18
|
database: "ameli",
|
|
19
|
-
repairEncoding:
|
|
19
|
+
repairEncoding: true,
|
|
20
20
|
repairZip: (dataset, dataDir) => {
|
|
21
21
|
const sqlFilename = `${dataset.database}.sql`;
|
|
22
22
|
const sqlFilePath = path.join(dataDir, sqlFilename);
|
|
23
23
|
fs.removeSync(sqlFilePath);
|
|
24
24
|
fs.moveSync(path.join(dataDir, "var", "opt", "opendata", sqlFilename), sqlFilePath);
|
|
25
25
|
},
|
|
26
|
-
schema: "public",
|
|
27
26
|
title: "Amendements",
|
|
28
27
|
url: "https://data.senat.fr/data/ameli/ameli.zip",
|
|
29
28
|
},
|
|
30
29
|
debats: {
|
|
31
30
|
database: "debats",
|
|
32
31
|
repairEncoding: true,
|
|
33
|
-
schema: "public",
|
|
34
32
|
title: "Informations relatives aux comptes rendus intégraux de la séance publique",
|
|
35
33
|
url: "https://data.senat.fr/data/debats/debats.zip",
|
|
36
34
|
},
|
|
37
35
|
dosleg: {
|
|
38
36
|
database: "dosleg",
|
|
39
37
|
repairEncoding: true,
|
|
40
|
-
schema: "public",
|
|
41
38
|
title: "Dossiers législatifs",
|
|
42
39
|
url: "https://data.senat.fr/data/dosleg/dosleg.zip",
|
|
43
40
|
},
|
|
44
41
|
questions: {
|
|
45
42
|
database: "questions",
|
|
46
43
|
repairEncoding: true,
|
|
47
|
-
schema: "questions",
|
|
48
44
|
title: "Questions écrites et orales posées par les sénateurs au Gouvernement",
|
|
49
45
|
url: "https://data.senat.fr/data/questions/questions.zip",
|
|
50
46
|
},
|
|
@@ -57,7 +53,6 @@ export const datasets = {
|
|
|
57
53
|
fs.removeSync(sqlFilePath);
|
|
58
54
|
fs.moveSync(path.join(dataDir, "export_sens.sql"), sqlFilePath);
|
|
59
55
|
},
|
|
60
|
-
schema: "public",
|
|
61
56
|
title: "Sénateurs (y compris organes et présence)",
|
|
62
57
|
url: "https://data.senat.fr/data/senateurs/export_sens.zip",
|
|
63
58
|
},
|
|
@@ -8,9 +8,10 @@ import StreamZip from "node-stream-zip";
|
|
|
8
8
|
import readline from "readline";
|
|
9
9
|
import windows1252 from "windows-1252";
|
|
10
10
|
import { pipeline } from "stream";
|
|
11
|
+
import iconv from "iconv-lite";
|
|
11
12
|
import { promisify } from "util";
|
|
12
13
|
import config from "../config";
|
|
13
|
-
import {
|
|
14
|
+
import { getChosenDatasets, getEnabledDatasets } from "../datasets";
|
|
14
15
|
import { commonOptions } from "./shared/cli_helpers";
|
|
15
16
|
const badWindows1252CharacterRegex = /[\u0080-\u009f]/g;
|
|
16
17
|
const optionsDefinitions = [
|
|
@@ -67,6 +68,64 @@ async function downloadFile(url, dest) {
|
|
|
67
68
|
}
|
|
68
69
|
await streamPipeline(response.body, fs.createWriteStream(dest));
|
|
69
70
|
}
|
|
71
|
+
/**
|
|
72
|
+
* Copy a dataset database to the main Senat database (overwriting its contents).
|
|
73
|
+
*/
|
|
74
|
+
async function copyToSenat(dataset, dataDir, options) {
|
|
75
|
+
if (!options["silent"]) {
|
|
76
|
+
console.log(`Copying ${dataset.database} to Senat database...`);
|
|
77
|
+
}
|
|
78
|
+
const sqlFilePath = path.join(dataDir, `${dataset.database}.sql`);
|
|
79
|
+
const schemaDumpFile = path.join(dataDir, `${dataset.database}_schema_dump.sql`);
|
|
80
|
+
// Write the header and then stream the rest of the SQL file
|
|
81
|
+
const schemaSqlWriter = fs.createWriteStream(schemaDumpFile, { encoding: "utf8" });
|
|
82
|
+
// Add CREATE SCHEMA statement at the top
|
|
83
|
+
schemaSqlWriter.write(`CREATE SCHEMA IF NOT EXISTS ${dataset.database};\n`);
|
|
84
|
+
const lineReader = readline.createInterface({
|
|
85
|
+
input: fs.createReadStream(sqlFilePath, { encoding: "utf8" }),
|
|
86
|
+
crlfDelay: Infinity,
|
|
87
|
+
});
|
|
88
|
+
for await (const line of lineReader) {
|
|
89
|
+
let newLine = line;
|
|
90
|
+
// Replace 'public' schema outside single-quoted strings
|
|
91
|
+
function replacePublicOutsideStrings(line, schema) {
|
|
92
|
+
const parts = line.split(/(')/);
|
|
93
|
+
let inString = false;
|
|
94
|
+
for (let i = 0; i < parts.length; i++) {
|
|
95
|
+
if (parts[i] === "'") {
|
|
96
|
+
inString = !inString;
|
|
97
|
+
}
|
|
98
|
+
else if (!inString) {
|
|
99
|
+
// Only replace outside of strings, including before comma
|
|
100
|
+
parts[i] = parts[i].replace(/\bpublic\b(?=(\s*\.|\s*[,;]|\s|$))/g, schema);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return parts.join('');
|
|
104
|
+
}
|
|
105
|
+
newLine = replacePublicOutsideStrings(line, dataset.database);
|
|
106
|
+
// Convert to LATIN1, replacing unconvertible characters with '?'
|
|
107
|
+
const latin1Line = iconv.encode(newLine, 'latin1').toString('latin1');
|
|
108
|
+
schemaSqlWriter.write(latin1Line + "\n");
|
|
109
|
+
}
|
|
110
|
+
schemaSqlWriter.end();
|
|
111
|
+
await new Promise((resolve, reject) => {
|
|
112
|
+
schemaSqlWriter.on("finish", () => {
|
|
113
|
+
try {
|
|
114
|
+
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d senat -f ${schemaDumpFile}`, {
|
|
115
|
+
env: process.env,
|
|
116
|
+
encoding: "utf-8",
|
|
117
|
+
stdio: ["ignore", "ignore", "pipe"],
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
finally {
|
|
121
|
+
try { }
|
|
122
|
+
catch { }
|
|
123
|
+
}
|
|
124
|
+
resolve();
|
|
125
|
+
});
|
|
126
|
+
schemaSqlWriter.on("error", reject);
|
|
127
|
+
});
|
|
128
|
+
}
|
|
70
129
|
async function retrieveDataset(dataDir, dataset) {
|
|
71
130
|
const zipFilename = dataset.url.substring(dataset.url.lastIndexOf("/") + 1);
|
|
72
131
|
const zipFilePath = path.join(dataDir, zipFilename);
|
|
@@ -120,7 +179,7 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
120
179
|
}
|
|
121
180
|
if ((options["all"] || options["repairEncoding"]) && dataset.repairEncoding) {
|
|
122
181
|
if (!options["silent"]) {
|
|
123
|
-
console.log(`Repairing Windows CP1252 encoding
|
|
182
|
+
console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}…`);
|
|
124
183
|
}
|
|
125
184
|
const repairedSqlFilePath = sqlFilePath + ".repaired";
|
|
126
185
|
const repairedSqlWriter = fs.createWriteStream(repairedSqlFilePath, {
|
|
@@ -131,7 +190,9 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
131
190
|
crlfDelay: Infinity,
|
|
132
191
|
});
|
|
133
192
|
for await (const line of lineReader) {
|
|
134
|
-
|
|
193
|
+
// Only repair encoding
|
|
194
|
+
let repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
|
|
195
|
+
repairedSqlWriter.write(repairedLine + "\n");
|
|
135
196
|
}
|
|
136
197
|
repairedSqlWriter.end();
|
|
137
198
|
await fs.move(repairedSqlFilePath, sqlFilePath, { overwrite: true });
|
|
@@ -140,22 +201,17 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
140
201
|
if (!options["silent"]) {
|
|
141
202
|
console.log(`Importing ${dataset.title}: ${sqlFilename}…`);
|
|
142
203
|
}
|
|
143
|
-
|
|
144
|
-
cwd: dataDir,
|
|
145
|
-
env: process.env,
|
|
146
|
-
encoding: "utf-8",
|
|
147
|
-
stdio: ["pipe", "ignore", "ignore"],
|
|
148
|
-
});
|
|
204
|
+
await copyToSenat(dataset, dataDir, options);
|
|
149
205
|
}
|
|
150
206
|
if (options["schema"]) {
|
|
151
207
|
let definitionsDir = path.resolve("src", "raw_types_schemats");
|
|
152
208
|
assert(fs.statSync(definitionsDir).isDirectory());
|
|
153
209
|
if (!options["silent"]) {
|
|
154
|
-
console.log(`Creating TypeScript definitions from schema
|
|
210
|
+
console.log(`Creating TypeScript definitions from schema '${dataset.database}' in database 'senat'…`);
|
|
155
211
|
}
|
|
156
|
-
const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}
|
|
212
|
+
const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}/senat`;
|
|
157
213
|
let definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
|
|
158
|
-
execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.
|
|
214
|
+
execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.database} -o ${definitionFilePath}`, {
|
|
159
215
|
// cwd: dataDir,
|
|
160
216
|
env: process.env,
|
|
161
217
|
encoding: "utf-8",
|
|
@@ -168,7 +224,7 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
168
224
|
fs.writeFileSync(definitionFilePath, definitionRepaired);
|
|
169
225
|
definitionsDir = path.resolve("src", "raw_types");
|
|
170
226
|
definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
|
|
171
|
-
execSync(`npx pg-to-ts generate -c '${dbConnectionString}' -s ${dataset.
|
|
227
|
+
execSync(`npx pg-to-ts generate -c '${dbConnectionString}' -s ${dataset.database} -o ${definitionFilePath}`, {
|
|
172
228
|
env: process.env,
|
|
173
229
|
encoding: "utf-8",
|
|
174
230
|
// stdio: ["ignore", "ignore", "pipe"],
|
|
@@ -190,18 +246,16 @@ async function retrieveOpenData() {
|
|
|
190
246
|
process.env["PGUSER"] &&
|
|
191
247
|
process.env["PGPASSWORD"], "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
|
|
192
248
|
console.time("data extraction time");
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
});
|
|
204
|
-
}
|
|
249
|
+
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "DROP DATABASE IF EXISTS senat"`, {
|
|
250
|
+
cwd: dataDir,
|
|
251
|
+
env: process.env,
|
|
252
|
+
encoding: "utf-8",
|
|
253
|
+
});
|
|
254
|
+
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE DATABASE senat WITH OWNER opendata"`, {
|
|
255
|
+
cwd: dataDir,
|
|
256
|
+
env: process.env,
|
|
257
|
+
encoding: "utf-8",
|
|
258
|
+
});
|
|
205
259
|
const enabledDatasets = getEnabledDatasets(options["categories"]);
|
|
206
260
|
const chosenDatasets = getChosenDatasets(enabledDatasets);
|
|
207
261
|
for (const dataset of chosenDatasets) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tricoteuses/senat",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.10.0",
|
|
4
4
|
"description": "Handle French Sénat's open data",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"France",
|
|
@@ -89,6 +89,7 @@
|
|
|
89
89
|
"@typescript-eslint/parser": "^8.13.0",
|
|
90
90
|
"cross-env": "^10.0.0",
|
|
91
91
|
"eslint": "^8.57.1",
|
|
92
|
+
"iconv-lite": "^0.7.0",
|
|
92
93
|
"pg-to-ts": "^4.1.1",
|
|
93
94
|
"prettier": "^3.5.3",
|
|
94
95
|
"tslib": "^2.1.0",
|