@tricoteuses/senat 2.9.9 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ type QuestionsDb = {
19
19
  type SensDb = {
20
20
  [K in keyof SensTableTypes]: SensTableTypes[K]["select"];
21
21
  };
22
+ export declare const dbSenat: Kysely<unknown>;
22
23
  export declare const dbAmeli: Kysely<AmeliDb>;
23
24
  export declare const dbDebats: Kysely<DebatsDb>;
24
25
  export declare const dbDosleg: Kysely<DoslegDb>;
package/lib/databases.js CHANGED
@@ -9,25 +9,24 @@ import { datasets } from "./datasets";
9
9
  pg.types.setTypeParser(types.builtins.INT8, (val) => {
10
10
  return parseInt(val, 10);
11
11
  });
12
- export const dbAmeli = createDb(datasets.ameli.database, datasets.ameli.schema);
13
- export const dbDebats = createDb(datasets.debats.database, datasets.debats.schema);
14
- export const dbDosleg = createDb(datasets.dosleg.database, datasets.dosleg.schema);
15
- export const dbQuestions = createDb(datasets.questions.database, datasets.questions.schema);
16
- export const dbSens = createDb(datasets.sens.database, datasets.sens.schema);
17
- function createDb(database, schema) {
18
- const dialect = new PostgresDialect({
19
- pool: new pg.Pool({
20
- database: database,
21
- host: config.db.host,
22
- user: config.db.user,
23
- password: config.db.password,
24
- port: config.db.port,
25
- max: 10,
26
- }),
27
- cursor: Cursor,
28
- });
29
- return new Kysely({
30
- log: ["error"],
31
- dialect: dialect,
32
- }).withSchema(schema);
33
- }
12
+ const senatPool = new pg.Pool({
13
+ database: "senat",
14
+ host: config.db.host,
15
+ user: config.db.user,
16
+ password: config.db.password,
17
+ port: config.db.port,
18
+ max: 10,
19
+ });
20
+ const senatDialect = new PostgresDialect({
21
+ pool: senatPool,
22
+ cursor: Cursor,
23
+ });
24
+ export const dbSenat = new Kysely({
25
+ log: ["error"],
26
+ dialect: senatDialect,
27
+ });
28
+ export const dbAmeli = dbSenat.withSchema(datasets.ameli.database);
29
+ export const dbDebats = dbSenat.withSchema(datasets.debats.database);
30
+ export const dbDosleg = dbSenat.withSchema(datasets.dosleg.database);
31
+ export const dbQuestions = dbSenat.withSchema(datasets.questions.database);
32
+ export const dbSens = dbSenat.withSchema(datasets.sens.database);
package/lib/datasets.d.ts CHANGED
@@ -2,7 +2,6 @@ export interface Dataset {
2
2
  database: string;
3
3
  repairEncoding: boolean;
4
4
  repairZip?: (dataset: Dataset, dataDir: string) => void;
5
- schema: string;
6
5
  title: string;
7
6
  url: string;
8
7
  }
package/lib/datasets.js CHANGED
@@ -16,35 +16,31 @@ export var EnabledDatasets;
16
16
  export const datasets = {
17
17
  ameli: {
18
18
  database: "ameli",
19
- repairEncoding: false,
19
+ repairEncoding: true,
20
20
  repairZip: (dataset, dataDir) => {
21
21
  const sqlFilename = `${dataset.database}.sql`;
22
22
  const sqlFilePath = path.join(dataDir, sqlFilename);
23
23
  fs.removeSync(sqlFilePath);
24
24
  fs.moveSync(path.join(dataDir, "var", "opt", "opendata", sqlFilename), sqlFilePath);
25
25
  },
26
- schema: "public",
27
26
  title: "Amendements",
28
27
  url: "https://data.senat.fr/data/ameli/ameli.zip",
29
28
  },
30
29
  debats: {
31
30
  database: "debats",
32
31
  repairEncoding: true,
33
- schema: "public",
34
32
  title: "Informations relatives aux comptes rendus intégraux de la séance publique",
35
33
  url: "https://data.senat.fr/data/debats/debats.zip",
36
34
  },
37
35
  dosleg: {
38
36
  database: "dosleg",
39
37
  repairEncoding: true,
40
- schema: "public",
41
38
  title: "Dossiers législatifs",
42
39
  url: "https://data.senat.fr/data/dosleg/dosleg.zip",
43
40
  },
44
41
  questions: {
45
42
  database: "questions",
46
43
  repairEncoding: true,
47
- schema: "questions",
48
44
  title: "Questions écrites et orales posées par les sénateurs au Gouvernement",
49
45
  url: "https://data.senat.fr/data/questions/questions.zip",
50
46
  },
@@ -57,7 +53,6 @@ export const datasets = {
57
53
  fs.removeSync(sqlFilePath);
58
54
  fs.moveSync(path.join(dataDir, "export_sens.sql"), sqlFilePath);
59
55
  },
60
- schema: "public",
61
56
  title: "Sénateurs (y compris organes et présence)",
62
57
  url: "https://data.senat.fr/data/senateurs/export_sens.zip",
63
58
  },
@@ -8,9 +8,10 @@ import StreamZip from "node-stream-zip";
8
8
  import readline from "readline";
9
9
  import windows1252 from "windows-1252";
10
10
  import { pipeline } from "stream";
11
+ import iconv from "iconv-lite";
11
12
  import { promisify } from "util";
12
13
  import config from "../config";
13
- import { datasets, getChosenDatasets, getEnabledDatasets } from "../datasets";
14
+ import { getChosenDatasets, getEnabledDatasets } from "../datasets";
14
15
  import { commonOptions } from "./shared/cli_helpers";
15
16
  const badWindows1252CharacterRegex = /[\u0080-\u009f]/g;
16
17
  const optionsDefinitions = [
@@ -67,6 +68,64 @@ async function downloadFile(url, dest) {
67
68
  }
68
69
  await streamPipeline(response.body, fs.createWriteStream(dest));
69
70
  }
71
+ /**
72
+ * Copy a dataset database to the main Senat database (overwriting its contents).
73
+ */
74
+ async function copyToSenat(dataset, dataDir, options) {
75
+ if (!options["silent"]) {
76
+ console.log(`Copying ${dataset.database} to Senat database...`);
77
+ }
78
+ const sqlFilePath = path.join(dataDir, `${dataset.database}.sql`);
79
+ const schemaDumpFile = path.join(dataDir, `${dataset.database}_schema_dump.sql`);
80
+ // Write the header and then stream the rest of the SQL file
81
+ const schemaSqlWriter = fs.createWriteStream(schemaDumpFile, { encoding: "utf8" });
82
+ // Add CREATE SCHEMA statement at the top
83
+ schemaSqlWriter.write(`CREATE SCHEMA IF NOT EXISTS ${dataset.database};\n`);
84
+ const lineReader = readline.createInterface({
85
+ input: fs.createReadStream(sqlFilePath, { encoding: "utf8" }),
86
+ crlfDelay: Infinity,
87
+ });
88
+ for await (const line of lineReader) {
89
+ let newLine = line;
90
+ // Replace 'public' schema outside single-quoted strings
91
+ function replacePublicOutsideStrings(line, schema) {
92
+ const parts = line.split(/(')/);
93
+ let inString = false;
94
+ for (let i = 0; i < parts.length; i++) {
95
+ if (parts[i] === "'") {
96
+ inString = !inString;
97
+ }
98
+ else if (!inString) {
99
+ // Only replace outside of strings, including before comma
100
+ parts[i] = parts[i].replace(/\bpublic\b(?=(\s*\.|\s*[,;]|\s|$))/g, schema);
101
+ }
102
+ }
103
+ return parts.join('');
104
+ }
105
+ newLine = replacePublicOutsideStrings(line, dataset.database);
106
+ // Convert to LATIN1, replacing unconvertible characters with '?'
107
+ const latin1Line = iconv.encode(newLine, 'latin1').toString('latin1');
108
+ schemaSqlWriter.write(latin1Line + "\n");
109
+ }
110
+ schemaSqlWriter.end();
111
+ await new Promise((resolve, reject) => {
112
+ schemaSqlWriter.on("finish", () => {
113
+ try {
114
+ execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d senat -f ${schemaDumpFile}`, {
115
+ env: process.env,
116
+ encoding: "utf-8",
117
+ stdio: ["ignore", "ignore", "pipe"],
118
+ });
119
+ }
120
+ finally {
121
+ try { }
122
+ catch { }
123
+ }
124
+ resolve();
125
+ });
126
+ schemaSqlWriter.on("error", reject);
127
+ });
128
+ }
70
129
  async function retrieveDataset(dataDir, dataset) {
71
130
  const zipFilename = dataset.url.substring(dataset.url.lastIndexOf("/") + 1);
72
131
  const zipFilePath = path.join(dataDir, zipFilename);
@@ -120,7 +179,7 @@ async function retrieveDataset(dataDir, dataset) {
120
179
  }
121
180
  if ((options["all"] || options["repairEncoding"]) && dataset.repairEncoding) {
122
181
  if (!options["silent"]) {
123
- console.log(`Repairing Windows CP1252 encoding of ${dataset.title}: ${sqlFilename}…`);
182
+ console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}…`);
124
183
  }
125
184
  const repairedSqlFilePath = sqlFilePath + ".repaired";
126
185
  const repairedSqlWriter = fs.createWriteStream(repairedSqlFilePath, {
@@ -131,7 +190,9 @@ async function retrieveDataset(dataDir, dataset) {
131
190
  crlfDelay: Infinity,
132
191
  });
133
192
  for await (const line of lineReader) {
134
- repairedSqlWriter.write(line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" })) + "\n");
193
+ // Only repair encoding
194
+ let repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
195
+ repairedSqlWriter.write(repairedLine + "\n");
135
196
  }
136
197
  repairedSqlWriter.end();
137
198
  await fs.move(repairedSqlFilePath, sqlFilePath, { overwrite: true });
@@ -140,22 +201,17 @@ async function retrieveDataset(dataDir, dataset) {
140
201
  if (!options["silent"]) {
141
202
  console.log(`Importing ${dataset.title}: ${sqlFilename}…`);
142
203
  }
143
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d ${dataset.database} -f ${sqlFilename}`, {
144
- cwd: dataDir,
145
- env: process.env,
146
- encoding: "utf-8",
147
- stdio: ["pipe", "ignore", "ignore"],
148
- });
204
+ await copyToSenat(dataset, dataDir, options);
149
205
  }
150
206
  if (options["schema"]) {
151
207
  let definitionsDir = path.resolve("src", "raw_types_schemats");
152
208
  assert(fs.statSync(definitionsDir).isDirectory());
153
209
  if (!options["silent"]) {
154
- console.log(`Creating TypeScript definitions from schema of database ${dataset.database}…`);
210
+ console.log(`Creating TypeScript definitions from schema '${dataset.database}' in database 'senat'…`);
155
211
  }
156
- const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}/${dataset.database}`;
212
+ const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}/senat`;
157
213
  let definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
158
- execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.schema} -o ${definitionFilePath}`, {
214
+ execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.database} -o ${definitionFilePath}`, {
159
215
  // cwd: dataDir,
160
216
  env: process.env,
161
217
  encoding: "utf-8",
@@ -168,7 +224,7 @@ async function retrieveDataset(dataDir, dataset) {
168
224
  fs.writeFileSync(definitionFilePath, definitionRepaired);
169
225
  definitionsDir = path.resolve("src", "raw_types");
170
226
  definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
171
- execSync(`npx pg-to-ts generate -c '${dbConnectionString}' -s ${dataset.schema} -o ${definitionFilePath}`, {
227
+ execSync(`npx pg-to-ts generate -c '${dbConnectionString}' -s ${dataset.database} -o ${definitionFilePath}`, {
172
228
  env: process.env,
173
229
  encoding: "utf-8",
174
230
  // stdio: ["ignore", "ignore", "pipe"],
@@ -190,18 +246,16 @@ async function retrieveOpenData() {
190
246
  process.env["PGUSER"] &&
191
247
  process.env["PGPASSWORD"], "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
192
248
  console.time("data extraction time");
193
- for (const [, dataset] of Object.entries(datasets)) {
194
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "DROP DATABASE IF EXISTS ${dataset.database}"`, {
195
- cwd: dataDir,
196
- env: process.env,
197
- encoding: "utf-8",
198
- });
199
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE DATABASE ${dataset.database} WITH OWNER opendata"`, {
200
- cwd: dataDir,
201
- env: process.env,
202
- encoding: "utf-8",
203
- });
204
- }
249
+ execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "DROP DATABASE IF EXISTS senat"`, {
250
+ cwd: dataDir,
251
+ env: process.env,
252
+ encoding: "utf-8",
253
+ });
254
+ execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE DATABASE senat WITH OWNER opendata"`, {
255
+ cwd: dataDir,
256
+ env: process.env,
257
+ encoding: "utf-8",
258
+ });
205
259
  const enabledDatasets = getEnabledDatasets(options["categories"]);
206
260
  const chosenDatasets = getChosenDatasets(enabledDatasets);
207
261
  for (const dataset of chosenDatasets) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.9.9",
3
+ "version": "2.10.0",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",
@@ -89,6 +89,7 @@
89
89
  "@typescript-eslint/parser": "^8.13.0",
90
90
  "cross-env": "^10.0.0",
91
91
  "eslint": "^8.57.1",
92
+ "iconv-lite": "^0.7.0",
92
93
  "pg-to-ts": "^4.1.1",
93
94
  "prettier": "^3.5.3",
94
95
  "tslib": "^2.1.0",