@tricoteuses/senat 2.9.9 → 2.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ type QuestionsDb = {
19
19
  type SensDb = {
20
20
  [K in keyof SensTableTypes]: SensTableTypes[K]["select"];
21
21
  };
22
+ export declare const dbSenat: Kysely<unknown>;
22
23
  export declare const dbAmeli: Kysely<AmeliDb>;
23
24
  export declare const dbDebats: Kysely<DebatsDb>;
24
25
  export declare const dbDosleg: Kysely<DoslegDb>;
package/lib/databases.js CHANGED
@@ -9,25 +9,24 @@ import { datasets } from "./datasets";
9
9
  pg.types.setTypeParser(types.builtins.INT8, (val) => {
10
10
  return parseInt(val, 10);
11
11
  });
12
- export const dbAmeli = createDb(datasets.ameli.database, datasets.ameli.schema);
13
- export const dbDebats = createDb(datasets.debats.database, datasets.debats.schema);
14
- export const dbDosleg = createDb(datasets.dosleg.database, datasets.dosleg.schema);
15
- export const dbQuestions = createDb(datasets.questions.database, datasets.questions.schema);
16
- export const dbSens = createDb(datasets.sens.database, datasets.sens.schema);
17
- function createDb(database, schema) {
18
- const dialect = new PostgresDialect({
19
- pool: new pg.Pool({
20
- database: database,
21
- host: config.db.host,
22
- user: config.db.user,
23
- password: config.db.password,
24
- port: config.db.port,
25
- max: 10,
26
- }),
27
- cursor: Cursor,
28
- });
29
- return new Kysely({
30
- log: ["error"],
31
- dialect: dialect,
32
- }).withSchema(schema);
33
- }
12
+ const senatPool = new pg.Pool({
13
+ database: "senat",
14
+ host: config.db.host,
15
+ user: config.db.user,
16
+ password: config.db.password,
17
+ port: config.db.port,
18
+ max: 10,
19
+ });
20
+ const senatDialect = new PostgresDialect({
21
+ pool: senatPool,
22
+ cursor: Cursor,
23
+ });
24
+ export const dbSenat = new Kysely({
25
+ log: ["error"],
26
+ dialect: senatDialect,
27
+ });
28
+ export const dbAmeli = dbSenat.withSchema(datasets.ameli.database);
29
+ export const dbDebats = dbSenat.withSchema(datasets.debats.database);
30
+ export const dbDosleg = dbSenat.withSchema(datasets.dosleg.database);
31
+ export const dbQuestions = dbSenat.withSchema(datasets.questions.database);
32
+ export const dbSens = dbSenat.withSchema(datasets.sens.database);
package/lib/datasets.d.ts CHANGED
@@ -2,7 +2,6 @@ export interface Dataset {
2
2
  database: string;
3
3
  repairEncoding: boolean;
4
4
  repairZip?: (dataset: Dataset, dataDir: string) => void;
5
- schema: string;
6
5
  title: string;
7
6
  url: string;
8
7
  }
package/lib/datasets.js CHANGED
@@ -16,35 +16,31 @@ export var EnabledDatasets;
16
16
  export const datasets = {
17
17
  ameli: {
18
18
  database: "ameli",
19
- repairEncoding: false,
19
+ repairEncoding: true,
20
20
  repairZip: (dataset, dataDir) => {
21
21
  const sqlFilename = `${dataset.database}.sql`;
22
22
  const sqlFilePath = path.join(dataDir, sqlFilename);
23
23
  fs.removeSync(sqlFilePath);
24
24
  fs.moveSync(path.join(dataDir, "var", "opt", "opendata", sqlFilename), sqlFilePath);
25
25
  },
26
- schema: "public",
27
26
  title: "Amendements",
28
27
  url: "https://data.senat.fr/data/ameli/ameli.zip",
29
28
  },
30
29
  debats: {
31
30
  database: "debats",
32
31
  repairEncoding: true,
33
- schema: "public",
34
32
  title: "Informations relatives aux comptes rendus intégraux de la séance publique",
35
33
  url: "https://data.senat.fr/data/debats/debats.zip",
36
34
  },
37
35
  dosleg: {
38
36
  database: "dosleg",
39
37
  repairEncoding: true,
40
- schema: "public",
41
38
  title: "Dossiers législatifs",
42
39
  url: "https://data.senat.fr/data/dosleg/dosleg.zip",
43
40
  },
44
41
  questions: {
45
42
  database: "questions",
46
43
  repairEncoding: true,
47
- schema: "questions",
48
44
  title: "Questions écrites et orales posées par les sénateurs au Gouvernement",
49
45
  url: "https://data.senat.fr/data/questions/questions.zip",
50
46
  },
@@ -57,7 +53,6 @@ export const datasets = {
57
53
  fs.removeSync(sqlFilePath);
58
54
  fs.moveSync(path.join(dataDir, "export_sens.sql"), sqlFilePath);
59
55
  },
60
- schema: "public",
61
56
  title: "Sénateurs (y compris organes et présence)",
62
57
  url: "https://data.senat.fr/data/senateurs/export_sens.zip",
63
58
  },
@@ -8,9 +8,10 @@ import StreamZip from "node-stream-zip";
8
8
  import readline from "readline";
9
9
  import windows1252 from "windows-1252";
10
10
  import { pipeline } from "stream";
11
+ import iconv from "iconv-lite";
11
12
  import { promisify } from "util";
12
13
  import config from "../config";
13
- import { datasets, getChosenDatasets, getEnabledDatasets } from "../datasets";
14
+ import { getChosenDatasets, getEnabledDatasets } from "../datasets";
14
15
  import { commonOptions } from "./shared/cli_helpers";
15
16
  const badWindows1252CharacterRegex = /[\u0080-\u009f]/g;
16
17
  const optionsDefinitions = [
@@ -67,6 +68,62 @@ async function downloadFile(url, dest) {
67
68
  }
68
69
  await streamPipeline(response.body, fs.createWriteStream(dest));
69
70
  }
71
+ /**
72
+ * Copy a dataset database to the main Senat database (overwriting its contents).
73
+ */
74
+ async function copyToSenat(dataset, dataDir, options) {
75
+ if (!options["silent"]) {
76
+ console.log(`Copying ${dataset.database} to Senat database...`);
77
+ }
78
+ const sqlFilePath = path.join(dataDir, `${dataset.database}.sql`);
79
+ const schemaDumpFile = path.join(dataDir, `${dataset.database}_schema_dump.sql`);
80
+ // Write the header and then stream the rest of the SQL file
81
+ const schemaSqlWriter = fs.createWriteStream(schemaDumpFile, { encoding: "utf8" });
82
+ const lineReader = readline.createInterface({
83
+ input: fs.createReadStream(sqlFilePath, { encoding: "utf8" }),
84
+ crlfDelay: Infinity,
85
+ });
86
+ for await (const line of lineReader) {
87
+ let newLine = line;
88
+ // Replace 'public' schema outside single-quoted strings
89
+ function replacePublicOutsideStrings(line, schema) {
90
+ const parts = line.split(/(')/);
91
+ let inString = false;
92
+ for (let i = 0; i < parts.length; i++) {
93
+ if (parts[i] === "'") {
94
+ inString = !inString;
95
+ }
96
+ else if (!inString) {
97
+ // Only replace outside of strings, including before comma
98
+ parts[i] = parts[i].replace(/\bpublic\b(?=(\s*\.|\s*[,;]|\s|$))/g, schema);
99
+ }
100
+ }
101
+ return parts.join('');
102
+ }
103
+ newLine = replacePublicOutsideStrings(line, dataset.database);
104
+ // Convert to LATIN1, replacing unconvertible characters with '?'
105
+ const latin1Line = iconv.encode(newLine, 'latin1').toString('latin1');
106
+ schemaSqlWriter.write(latin1Line + "\n");
107
+ }
108
+ schemaSqlWriter.end();
109
+ await new Promise((resolve, reject) => {
110
+ schemaSqlWriter.on("finish", () => {
111
+ try {
112
+ execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d senat -f ${schemaDumpFile}`, {
113
+ cwd: dataDir,
114
+ env: process.env,
115
+ encoding: "utf-8",
116
+ });
117
+ }
118
+ finally {
119
+ try { }
120
+ catch { }
121
+ }
122
+ resolve();
123
+ });
124
+ schemaSqlWriter.on("error", reject);
125
+ });
126
+ }
70
127
  async function retrieveDataset(dataDir, dataset) {
71
128
  const zipFilename = dataset.url.substring(dataset.url.lastIndexOf("/") + 1);
72
129
  const zipFilePath = path.join(dataDir, zipFilename);
@@ -120,7 +177,7 @@ async function retrieveDataset(dataDir, dataset) {
120
177
  }
121
178
  if ((options["all"] || options["repairEncoding"]) && dataset.repairEncoding) {
122
179
  if (!options["silent"]) {
123
- console.log(`Repairing Windows CP1252 encoding of ${dataset.title}: ${sqlFilename}…`);
180
+ console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}…`);
124
181
  }
125
182
  const repairedSqlFilePath = sqlFilePath + ".repaired";
126
183
  const repairedSqlWriter = fs.createWriteStream(repairedSqlFilePath, {
@@ -131,7 +188,9 @@ async function retrieveDataset(dataDir, dataset) {
131
188
  crlfDelay: Infinity,
132
189
  });
133
190
  for await (const line of lineReader) {
134
- repairedSqlWriter.write(line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" })) + "\n");
191
+ // Only repair encoding
192
+ let repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
193
+ repairedSqlWriter.write(repairedLine + "\n");
135
194
  }
136
195
  repairedSqlWriter.end();
137
196
  await fs.move(repairedSqlFilePath, sqlFilePath, { overwrite: true });
@@ -140,22 +199,17 @@ async function retrieveDataset(dataDir, dataset) {
140
199
  if (!options["silent"]) {
141
200
  console.log(`Importing ${dataset.title}: ${sqlFilename}…`);
142
201
  }
143
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d ${dataset.database} -f ${sqlFilename}`, {
144
- cwd: dataDir,
145
- env: process.env,
146
- encoding: "utf-8",
147
- stdio: ["pipe", "ignore", "ignore"],
148
- });
202
+ await copyToSenat(dataset, dataDir, options);
149
203
  }
150
204
  if (options["schema"]) {
151
205
  let definitionsDir = path.resolve("src", "raw_types_schemats");
152
206
  assert(fs.statSync(definitionsDir).isDirectory());
153
207
  if (!options["silent"]) {
154
- console.log(`Creating TypeScript definitions from schema of database ${dataset.database}…`);
208
+ console.log(`Creating TypeScript definitions from schema '${dataset.database}' in database 'senat'…`);
155
209
  }
156
- const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}/${dataset.database}`;
210
+ const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}/senat`;
157
211
  let definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
158
- execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.schema} -o ${definitionFilePath}`, {
212
+ execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.database} -o ${definitionFilePath}`, {
159
213
  // cwd: dataDir,
160
214
  env: process.env,
161
215
  encoding: "utf-8",
@@ -168,7 +222,7 @@ async function retrieveDataset(dataDir, dataset) {
168
222
  fs.writeFileSync(definitionFilePath, definitionRepaired);
169
223
  definitionsDir = path.resolve("src", "raw_types");
170
224
  definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
171
- execSync(`npx pg-to-ts generate -c '${dbConnectionString}' -s ${dataset.schema} -o ${definitionFilePath}`, {
225
+ execSync(`npx pg-to-ts generate -c '${dbConnectionString}' -s ${dataset.database} -o ${definitionFilePath}`, {
172
226
  env: process.env,
173
227
  encoding: "utf-8",
174
228
  // stdio: ["ignore", "ignore", "pipe"],
@@ -190,18 +244,16 @@ async function retrieveOpenData() {
190
244
  process.env["PGUSER"] &&
191
245
  process.env["PGPASSWORD"], "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
192
246
  console.time("data extraction time");
193
- for (const [, dataset] of Object.entries(datasets)) {
194
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "DROP DATABASE IF EXISTS ${dataset.database}"`, {
195
- cwd: dataDir,
196
- env: process.env,
197
- encoding: "utf-8",
198
- });
199
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE DATABASE ${dataset.database} WITH OWNER opendata"`, {
200
- cwd: dataDir,
201
- env: process.env,
202
- encoding: "utf-8",
203
- });
204
- }
247
+ execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "DROP DATABASE IF EXISTS senat"`, {
248
+ cwd: dataDir,
249
+ env: process.env,
250
+ encoding: "utf-8",
251
+ });
252
+ execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE DATABASE senat WITH OWNER opendata"`, {
253
+ cwd: dataDir,
254
+ env: process.env,
255
+ encoding: "utf-8",
256
+ });
205
257
  const enabledDatasets = getEnabledDatasets(options["categories"]);
206
258
  const chosenDatasets = getChosenDatasets(enabledDatasets);
207
259
  for (const dataset of chosenDatasets) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.9.9",
3
+ "version": "2.9.10",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",
@@ -89,6 +89,7 @@
89
89
  "@typescript-eslint/parser": "^8.13.0",
90
90
  "cross-env": "^10.0.0",
91
91
  "eslint": "^8.57.1",
92
+ "iconv-lite": "^0.7.0",
92
93
  "pg-to-ts": "^4.1.1",
93
94
  "prettier": "^3.5.3",
94
95
  "tslib": "^2.1.0",