@tricoteuses/senat 2.22.16 → 2.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +168 -0
- package/lib/aggregates.d.ts +52 -0
- package/lib/aggregates.js +930 -0
- package/lib/aggregates.mjs +713 -0
- package/lib/aggregates.ts +833 -0
- package/lib/config.d.ts +10 -0
- package/lib/config.js +16 -0
- package/lib/config.mjs +16 -0
- package/lib/config.ts +26 -0
- package/lib/databases.d.ts +2 -0
- package/lib/databases.js +26 -0
- package/lib/databases.mjs +57 -0
- package/lib/databases.ts +71 -0
- package/lib/datasets.d.ts +34 -0
- package/lib/datasets.js +233 -0
- package/lib/datasets.mjs +78 -0
- package/lib/datasets.ts +118 -0
- package/lib/fields.d.ts +10 -0
- package/lib/fields.js +68 -0
- package/lib/fields.mjs +22 -0
- package/lib/fields.ts +29 -0
- package/lib/git.d.ts +26 -0
- package/lib/git.js +167 -0
- package/lib/index.d.ts +13 -0
- package/lib/index.js +1 -0
- package/lib/index.mjs +7 -0
- package/lib/index.ts +64 -0
- package/lib/inserters.d.ts +98 -0
- package/lib/inserters.js +500 -0
- package/lib/inserters.mjs +360 -0
- package/lib/inserters.ts +521 -0
- package/lib/legislatures.json +38 -0
- package/lib/loaders.d.ts +58 -0
- package/lib/loaders.js +286 -0
- package/lib/loaders.mjs +158 -0
- package/lib/loaders.ts +271 -0
- package/lib/model/agenda.d.ts +6 -0
- package/lib/model/agenda.js +148 -0
- package/lib/model/ameli.d.ts +51 -0
- package/lib/model/ameli.js +149 -0
- package/lib/model/ameli.mjs +84 -0
- package/lib/model/ameli.ts +100 -0
- package/lib/model/commission.d.ts +18 -0
- package/lib/model/commission.js +269 -0
- package/lib/model/debats.d.ts +67 -0
- package/lib/model/debats.js +95 -0
- package/lib/model/debats.mjs +43 -0
- package/lib/model/debats.ts +68 -0
- package/lib/model/documents.d.ts +12 -0
- package/lib/model/documents.js +151 -0
- package/lib/model/dosleg.d.ts +7 -0
- package/lib/model/dosleg.js +326 -0
- package/lib/model/dosleg.mjs +196 -0
- package/lib/model/dosleg.ts +240 -0
- package/lib/model/index.d.ts +7 -0
- package/lib/model/index.js +7 -0
- package/lib/model/index.mjs +5 -0
- package/lib/model/index.ts +15 -0
- package/lib/model/questions.d.ts +45 -0
- package/lib/model/questions.js +89 -0
- package/lib/model/questions.mjs +71 -0
- package/lib/model/questions.ts +93 -0
- package/lib/model/scrutins.d.ts +13 -0
- package/lib/model/scrutins.js +114 -0
- package/lib/model/seance.d.ts +3 -0
- package/lib/model/seance.js +267 -0
- package/lib/model/sens.d.ts +146 -0
- package/lib/model/sens.js +454 -0
- package/lib/model/sens.mjs +415 -0
- package/lib/model/sens.ts +516 -0
- package/lib/model/texte.d.ts +7 -0
- package/lib/model/texte.js +256 -0
- package/lib/model/texte.mjs +208 -0
- package/lib/model/texte.ts +229 -0
- package/lib/model/util.d.ts +9 -0
- package/lib/model/util.js +38 -0
- package/lib/model/util.mjs +19 -0
- package/lib/model/util.ts +32 -0
- package/lib/parsers/texte.d.ts +7 -0
- package/lib/parsers/texte.js +228 -0
- package/lib/raw_types/ameli.d.ts +914 -0
- package/lib/raw_types/ameli.js +5 -0
- package/lib/raw_types/ameli.mjs +163 -0
- package/lib/raw_types/debats.d.ts +207 -0
- package/lib/raw_types/debats.js +5 -0
- package/lib/raw_types/debats.mjs +58 -0
- package/lib/raw_types/dosleg.d.ts +1619 -0
- package/lib/raw_types/dosleg.js +5 -0
- package/lib/raw_types/dosleg.mjs +438 -0
- package/lib/raw_types/questions.d.ts +419 -0
- package/lib/raw_types/questions.js +5 -0
- package/lib/raw_types/questions.mjs +11 -0
- package/lib/raw_types/senat.d.ts +11368 -0
- package/lib/raw_types/senat.js +5 -0
- package/lib/raw_types/sens.d.ts +8248 -0
- package/lib/raw_types/sens.js +5 -0
- package/lib/raw_types/sens.mjs +508 -0
- package/lib/raw_types_kysely/ameli.d.ts +915 -0
- package/lib/raw_types_kysely/ameli.js +7 -0
- package/lib/raw_types_kysely/ameli.mjs +5 -0
- package/lib/raw_types_kysely/ameli.ts +951 -0
- package/lib/raw_types_kysely/debats.d.ts +207 -0
- package/lib/raw_types_kysely/debats.js +7 -0
- package/lib/raw_types_kysely/debats.mjs +5 -0
- package/lib/raw_types_kysely/debats.ts +222 -0
- package/lib/raw_types_kysely/dosleg.d.ts +3532 -0
- package/lib/raw_types_kysely/dosleg.js +7 -0
- package/lib/raw_types_kysely/dosleg.mjs +5 -0
- package/lib/raw_types_kysely/dosleg.ts +3621 -0
- package/lib/raw_types_kysely/questions.d.ts +414 -0
- package/lib/raw_types_kysely/questions.js +7 -0
- package/lib/raw_types_kysely/questions.mjs +5 -0
- package/lib/raw_types_kysely/questions.ts +426 -0
- package/lib/raw_types_kysely/sens.d.ts +4394 -0
- package/lib/raw_types_kysely/sens.js +7 -0
- package/lib/raw_types_kysely/sens.mjs +5 -0
- package/lib/raw_types_kysely/sens.ts +4499 -0
- package/lib/raw_types_schemats/ameli.d.ts +539 -0
- package/lib/raw_types_schemats/ameli.js +2 -0
- package/lib/raw_types_schemats/ameli.mjs +2 -0
- package/lib/raw_types_schemats/ameli.ts +601 -0
- package/lib/raw_types_schemats/debats.d.ts +127 -0
- package/lib/raw_types_schemats/debats.js +2 -0
- package/lib/raw_types_schemats/debats.mjs +2 -0
- package/lib/raw_types_schemats/debats.ts +145 -0
- package/lib/raw_types_schemats/dosleg.d.ts +977 -0
- package/lib/raw_types_schemats/dosleg.js +2 -0
- package/lib/raw_types_schemats/dosleg.mjs +2 -0
- package/lib/raw_types_schemats/dosleg.ts +2193 -0
- package/lib/raw_types_schemats/questions.d.ts +235 -0
- package/lib/raw_types_schemats/questions.js +2 -0
- package/lib/raw_types_schemats/questions.mjs +2 -0
- package/lib/raw_types_schemats/questions.ts +249 -0
- package/lib/raw_types_schemats/sens.d.ts +6915 -0
- package/lib/raw_types_schemats/sens.js +2 -0
- package/lib/raw_types_schemats/sens.mjs +2 -0
- package/lib/raw_types_schemats/sens.ts +2907 -0
- package/lib/scripts/convert_data.d.ts +1 -0
- package/lib/scripts/convert_data.js +354 -0
- package/lib/scripts/convert_data.mjs +181 -0
- package/lib/scripts/convert_data.ts +243 -0
- package/lib/scripts/data-download.d.ts +1 -0
- package/lib/scripts/data-download.js +12 -0
- package/lib/scripts/datautil.d.ts +8 -0
- package/lib/scripts/datautil.js +34 -0
- package/lib/scripts/datautil.mjs +16 -0
- package/lib/scripts/datautil.ts +19 -0
- package/lib/scripts/images/transparent_150x192.jpg +0 -0
- package/lib/scripts/images/transparent_155x225.jpg +0 -0
- package/lib/scripts/parse_textes.d.ts +1 -0
- package/lib/scripts/parse_textes.js +44 -0
- package/lib/scripts/parse_textes.mjs +46 -0
- package/lib/scripts/parse_textes.ts +65 -0
- package/lib/scripts/retrieve_agenda.d.ts +1 -0
- package/lib/scripts/retrieve_agenda.js +132 -0
- package/lib/scripts/retrieve_cr_commission.d.ts +1 -0
- package/lib/scripts/retrieve_cr_commission.js +364 -0
- package/lib/scripts/retrieve_cr_seance.d.ts +6 -0
- package/lib/scripts/retrieve_cr_seance.js +347 -0
- package/lib/scripts/retrieve_documents.d.ts +3 -0
- package/lib/scripts/retrieve_documents.js +219 -0
- package/lib/scripts/retrieve_documents.mjs +249 -0
- package/lib/scripts/retrieve_documents.ts +298 -0
- package/lib/scripts/retrieve_open_data.d.ts +1 -0
- package/lib/scripts/retrieve_open_data.js +315 -0
- package/lib/scripts/retrieve_open_data.mjs +217 -0
- package/lib/scripts/retrieve_open_data.ts +268 -0
- package/lib/scripts/retrieve_senateurs_photos.d.ts +1 -0
- package/lib/scripts/retrieve_senateurs_photos.js +147 -0
- package/lib/scripts/retrieve_senateurs_photos.mjs +147 -0
- package/lib/scripts/retrieve_senateurs_photos.ts +177 -0
- package/lib/scripts/retrieve_videos.d.ts +1 -0
- package/lib/scripts/retrieve_videos.js +461 -0
- package/lib/scripts/shared/cli_helpers.d.ts +95 -0
- package/lib/scripts/shared/cli_helpers.js +91 -0
- package/lib/scripts/shared/cli_helpers.ts +36 -0
- package/lib/scripts/shared/util.d.ts +4 -0
- package/lib/scripts/shared/util.js +35 -0
- package/lib/scripts/shared/util.ts +33 -0
- package/lib/scripts/test_iter_load.d.ts +1 -0
- package/lib/scripts/test_iter_load.js +12 -0
- package/lib/src/config.d.ts +22 -0
- package/lib/src/config.js +17 -7
- package/lib/src/conversion_textes.js +5 -1
- package/lib/src/databases.d.ts +2 -1
- package/lib/src/databases_postgres.d.ts +4 -0
- package/lib/src/databases_postgres.js +23 -0
- package/lib/src/datasets.d.ts +4 -0
- package/lib/src/datasets.js +16 -2
- package/lib/src/git.d.ts +1 -0
- package/lib/src/git.js +45 -11
- package/lib/src/loaders.js +10 -4
- package/lib/src/model/agenda.js +2 -2
- package/lib/src/model/ameli.d.ts +64 -52
- package/lib/src/model/ameli.js +147 -145
- package/lib/src/model/ameli_postgres.d.ts +67 -0
- package/lib/src/model/ameli_postgres.js +150 -0
- package/lib/src/model/commission.d.ts +3 -2
- package/lib/src/model/commission.js +2 -2
- package/lib/src/model/debats.d.ts +38 -66
- package/lib/src/model/debats.js +110 -93
- package/lib/src/model/documents.d.ts +32 -12
- package/lib/src/model/documents.js +171 -130
- package/lib/src/model/dosleg.d.ts +142 -5
- package/lib/src/model/dosleg.js +298 -156
- package/lib/src/model/questions.d.ts +54 -45
- package/lib/src/model/questions.js +89 -87
- package/lib/src/model/scrutins.d.ts +48 -13
- package/lib/src/model/scrutins.js +118 -111
- package/lib/src/model/seance.js +3 -3
- package/lib/src/model/sens.d.ts +109 -179
- package/lib/src/model/sens.js +384 -484
- package/lib/src/model/util.d.ts +0 -8
- package/lib/src/model/util.js +0 -23
- package/lib/src/parsers/texte.js +7 -7
- package/lib/src/raw_types_schemats/ameli.d.ts +4 -2
- package/lib/src/raw_types_schemats/debats.d.ts +2 -2
- package/lib/src/raw_types_schemats/dosleg.d.ts +2 -2
- package/lib/src/raw_types_schemats/questions.d.ts +2 -2
- package/lib/src/raw_types_schemats/sens.d.ts +10 -4216
- package/lib/src/scripts/convert_data.js +7 -6
- package/lib/src/scripts/convert_xml_to_html.js +2 -2
- package/lib/src/scripts/data-download.js +3 -2
- package/lib/src/scripts/retrieve_agenda.js +21 -9
- package/lib/src/scripts/retrieve_cr_commission.js +17 -17
- package/lib/src/scripts/retrieve_cr_seance.d.ts +14 -1
- package/lib/src/scripts/retrieve_cr_seance.js +10 -11
- package/lib/src/scripts/retrieve_documents.d.ts +11 -2
- package/lib/src/scripts/retrieve_documents.js +25 -14
- package/lib/src/scripts/retrieve_open_data.js +400 -145
- package/lib/src/scripts/retrieve_senateurs_photos.js +25 -11
- package/lib/src/scripts/retrieve_videos.js +12 -11
- package/lib/src/scripts/shared/cli_helpers.d.ts +1 -6
- package/lib/src/scripts/shared/cli_helpers.js +9 -8
- package/lib/src/scripts/shared/incremental_import_sql.d.ts +2 -0
- package/lib/src/scripts/shared/incremental_import_sql.js +894 -0
- package/lib/src/scripts/shared/prefixed_tables.d.ts +7 -0
- package/lib/src/scripts/shared/prefixed_tables.js +30 -0
- package/lib/src/scripts/shared/schema_version.d.ts +3 -0
- package/lib/src/scripts/shared/schema_version.js +97 -0
- package/lib/src/scripts/shared/staging_import.d.ts +3 -0
- package/lib/src/scripts/shared/staging_import.js +80 -0
- package/lib/src/scripts/shared/staging_metadata_sql.d.ts +1 -0
- package/lib/src/scripts/shared/staging_metadata_sql.js +221 -0
- package/lib/src/scripts/validate_prefixed_tables.d.ts +1 -0
- package/lib/src/scripts/validate_prefixed_tables.js +102 -0
- package/lib/src/types/texte.d.ts +1 -1
- package/lib/src/utils/cr_spliting.d.ts +9 -6
- package/lib/src/utils/cr_spliting.js +6 -101
- package/lib/src/utils/reunion_odj_building.d.ts +7 -3
- package/lib/src/utils/reunion_parsing.d.ts +2 -1
- package/lib/src/utils/reunion_parsing.js +2 -2
- package/lib/src/videos/match.js +8 -5
- package/lib/src/videos/pipeline.d.ts +6 -2
- package/lib/src/videos/pipeline.js +21 -8
- package/lib/src/videos/search.js +6 -2
- package/lib/strings.d.ts +1 -0
- package/lib/strings.js +18 -0
- package/lib/strings.mjs +18 -0
- package/lib/strings.ts +26 -0
- package/lib/tests/incrementalImportSql.test.d.ts +1 -0
- package/lib/tests/incrementalImportSql.test.js +155 -0
- package/lib/tests/prefixedTables.test.d.ts +1 -0
- package/lib/tests/prefixedTables.test.js +29 -0
- package/lib/tests/schemaVersion.test.d.ts +1 -0
- package/lib/tests/schemaVersion.test.js +23 -0
- package/lib/tests/validatePrefixedTables.test.d.ts +1 -0
- package/lib/tests/validatePrefixedTables.test.js +14 -0
- package/lib/types/agenda.d.ts +44 -0
- package/lib/types/agenda.js +1 -0
- package/lib/types/ameli.d.ts +5 -0
- package/lib/types/ameli.js +1 -0
- package/lib/types/ameli.mjs +13 -0
- package/lib/types/ameli.ts +21 -0
- package/lib/types/compte_rendu.d.ts +83 -0
- package/lib/types/compte_rendu.js +1 -0
- package/lib/types/debats.d.ts +2 -0
- package/lib/types/debats.js +1 -0
- package/lib/types/debats.mjs +2 -0
- package/lib/types/debats.ts +6 -0
- package/lib/types/dosleg.d.ts +70 -0
- package/lib/types/dosleg.js +1 -0
- package/lib/types/dosleg.mjs +151 -0
- package/lib/types/dosleg.ts +284 -0
- package/lib/types/questions.d.ts +2 -0
- package/lib/types/questions.js +1 -0
- package/lib/types/questions.mjs +1 -0
- package/lib/types/questions.ts +3 -0
- package/lib/types/sens.d.ts +10 -0
- package/lib/types/sens.js +1 -0
- package/lib/types/sens.mjs +1 -0
- package/lib/types/sens.ts +12 -0
- package/lib/types/sessions.d.ts +5 -0
- package/lib/types/sessions.js +84 -0
- package/lib/types/sessions.mjs +43 -0
- package/lib/types/sessions.ts +42 -0
- package/lib/types/texte.d.ts +74 -0
- package/lib/types/texte.js +16 -0
- package/lib/types/texte.mjs +16 -0
- package/lib/types/texte.ts +76 -0
- package/lib/typings/windows-1252.d.js +2 -0
- package/lib/typings/windows-1252.d.mjs +2 -0
- package/lib/typings/windows-1252.d.ts +11 -0
- package/lib/utils/cr_spliting.d.ts +28 -0
- package/lib/utils/cr_spliting.js +265 -0
- package/lib/utils/date.d.ts +10 -0
- package/lib/utils/date.js +100 -0
- package/lib/utils/nvs-timecode.d.ts +7 -0
- package/lib/utils/nvs-timecode.js +79 -0
- package/lib/utils/reunion_grouping.d.ts +9 -0
- package/lib/utils/reunion_grouping.js +361 -0
- package/lib/utils/reunion_odj_building.d.ts +5 -0
- package/lib/utils/reunion_odj_building.js +154 -0
- package/lib/utils/reunion_parsing.d.ts +23 -0
- package/lib/utils/reunion_parsing.js +209 -0
- package/lib/utils/scoring.d.ts +14 -0
- package/lib/utils/scoring.js +147 -0
- package/lib/utils/string_cleaning.d.ts +7 -0
- package/lib/utils/string_cleaning.js +57 -0
- package/lib/validators/config.d.ts +9 -0
- package/lib/validators/config.js +10 -0
- package/lib/validators/config.mjs +54 -0
- package/lib/validators/config.ts +79 -0
- package/lib/validators/senat.d.ts +0 -0
- package/lib/validators/senat.js +28 -0
- package/lib/validators/senat.mjs +24 -0
- package/lib/validators/senat.ts +26 -0
- package/package.json +6 -10
|
@@ -5,12 +5,17 @@ import fs from "fs-extra";
|
|
|
5
5
|
import path from "path";
|
|
6
6
|
import StreamZip from "node-stream-zip";
|
|
7
7
|
import readline from "readline";
|
|
8
|
-
import
|
|
9
|
-
import { pipeline } from "stream";
|
|
8
|
+
import { pipeline, Readable } from "stream";
|
|
10
9
|
import { promisify } from "util";
|
|
10
|
+
import * as windows1252 from "windows-1252";
|
|
11
11
|
import config from "../config";
|
|
12
12
|
import { getChosenDatasets, getEnabledDatasets } from "../datasets";
|
|
13
|
-
import { commonOptions } from "./shared/cli_helpers";
|
|
13
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
14
|
+
import { buildIncrementalDatasetImportSql, buildNormalizeStagingSchemaSql } from "./shared/incremental_import_sql";
|
|
15
|
+
import { normalizeGeneratedDefinition, prefixedName, senatSchemaName, stagingSchemaName, } from "./shared/prefixed_tables";
|
|
16
|
+
import { buildEnsureSchemaVersionTableSql, buildIncrementSchemaVersionSql, buildSchemaStructureFingerprintQuery, } from "./shared/schema_version";
|
|
17
|
+
import { buildExportStagingMetadataStatementsQuery } from "./shared/staging_metadata_sql";
|
|
18
|
+
import { isCopyFromStdinLine, rewriteLineForStagingImport } from "./shared/staging_import";
|
|
14
19
|
const badWindows1252CharacterRegex = /[\u0080-\u009f]/g;
|
|
15
20
|
const optionsDefinitions = [
|
|
16
21
|
...commonOptions,
|
|
@@ -22,7 +27,7 @@ const optionsDefinitions = [
|
|
|
22
27
|
},
|
|
23
28
|
{
|
|
24
29
|
alias: "c",
|
|
25
|
-
help: "create TypeScript interfaces from
|
|
30
|
+
help: "create TypeScript interfaces from database schemas into src/raw_types_* directories",
|
|
26
31
|
name: "schema",
|
|
27
32
|
type: Boolean,
|
|
28
33
|
},
|
|
@@ -38,15 +43,20 @@ const optionsDefinitions = [
|
|
|
38
43
|
name: "fetch",
|
|
39
44
|
type: Boolean,
|
|
40
45
|
},
|
|
46
|
+
{
|
|
47
|
+
help: "use separate staging database and postgres_fdw for incremental merge into target schema",
|
|
48
|
+
name: "incremental",
|
|
49
|
+
type: Boolean,
|
|
50
|
+
},
|
|
41
51
|
{
|
|
42
52
|
alias: "i",
|
|
43
|
-
help: "import SQL dumps into
|
|
53
|
+
help: "import SQL dumps into PostgreSQL",
|
|
44
54
|
name: "import",
|
|
45
55
|
type: Boolean,
|
|
46
56
|
},
|
|
47
57
|
{
|
|
48
58
|
alias: "S",
|
|
49
|
-
help: "sudo psql commands with given user",
|
|
59
|
+
help: "sudo psql commands with given user (example: --sudo postgres)",
|
|
50
60
|
name: "sudo",
|
|
51
61
|
type: String,
|
|
52
62
|
},
|
|
@@ -59,99 +69,358 @@ const optionsDefinitions = [
|
|
|
59
69
|
];
|
|
60
70
|
const options = commandLineArgs(optionsDefinitions);
|
|
61
71
|
const streamPipeline = promisify(pipeline);
|
|
72
|
+
const stagingServerName = "staging_server";
|
|
73
|
+
function isIncrementalImport(options) {
|
|
74
|
+
return options["incremental"] === true;
|
|
75
|
+
}
|
|
76
|
+
function shellQuote(value) {
|
|
77
|
+
return `'${value.replace(/'/g, `'"'"'`)}'`;
|
|
78
|
+
}
|
|
79
|
+
function connectionEnv(connection) {
|
|
80
|
+
return {
|
|
81
|
+
...process.env,
|
|
82
|
+
PGDATABASE: connection.name,
|
|
83
|
+
PGHOST: connection.host,
|
|
84
|
+
PGPASSWORD: connection.password,
|
|
85
|
+
PGPORT: String(connection.port),
|
|
86
|
+
PGUSER: connection.user,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
function escapeSqlLiteral(value) {
|
|
90
|
+
return value.replace(/'/g, "''");
|
|
91
|
+
}
|
|
92
|
+
function sleep(delayMs) {
|
|
93
|
+
return new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
94
|
+
}
|
|
95
|
+
function getExecSyncErrorOutput(error) {
|
|
96
|
+
const execError = error;
|
|
97
|
+
return `${execError.stderr ?? ""}\n${execError.stdout ?? ""}`;
|
|
98
|
+
}
|
|
99
|
+
function isRetryablePostgresError(error) {
|
|
100
|
+
const output = getExecSyncErrorOutput(error);
|
|
101
|
+
return (output.includes("deadlock detected") ||
|
|
102
|
+
output.includes("could not obtain lock") ||
|
|
103
|
+
output.includes("canceling statement due to lock timeout") ||
|
|
104
|
+
output.includes("could not serialize access"));
|
|
105
|
+
}
|
|
106
|
+
function canReuseExistingStagingDatabase(error) {
|
|
107
|
+
const output = getExecSyncErrorOutput(error);
|
|
108
|
+
return (output.includes("permission denied to terminate process") ||
|
|
109
|
+
output.includes("database is being accessed by other users") ||
|
|
110
|
+
output.includes("cannot drop the currently open database"));
|
|
111
|
+
}
|
|
112
|
+
function isMissingForeignServerError(error, serverName) {
|
|
113
|
+
return getExecSyncErrorOutput(error).includes(`server "${serverName}" does not exist`);
|
|
114
|
+
}
|
|
115
|
+
async function runWithRetry(operation, options, retryOptions) {
|
|
116
|
+
let attempt = 1;
|
|
117
|
+
let delayMs = retryOptions.delayMs;
|
|
118
|
+
while (true) {
|
|
119
|
+
try {
|
|
120
|
+
return operation();
|
|
121
|
+
}
|
|
122
|
+
catch (error) {
|
|
123
|
+
if (!isRetryablePostgresError(error) || attempt >= retryOptions.attempts) {
|
|
124
|
+
throw error;
|
|
125
|
+
}
|
|
126
|
+
if (!options["silent"]) {
|
|
127
|
+
console.warn(`${retryOptions.label} hit a transient PostgreSQL lock error on attempt ${attempt}/${retryOptions.attempts}; retrying in ${delayMs}ms...`);
|
|
128
|
+
}
|
|
129
|
+
await sleep(delayMs);
|
|
130
|
+
attempt += 1;
|
|
131
|
+
delayMs *= 2;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
function buildPsqlCommand(baseArgs, connection, options) {
|
|
136
|
+
const sudoPrefix = options["sudo"] ? `sudo -u ${options["sudo"]} ` : "";
|
|
137
|
+
return (`${sudoPrefix}psql --quiet ` +
|
|
138
|
+
`-h ${shellQuote(connection.host)} ` +
|
|
139
|
+
`-p ${shellQuote(String(connection.port))} ` +
|
|
140
|
+
`-U ${shellQuote(connection.user)} ` +
|
|
141
|
+
`-d ${shellQuote(connection.name)} ` +
|
|
142
|
+
baseArgs);
|
|
143
|
+
}
|
|
144
|
+
function runPsqlFile(sqlFilePath, dataDir, options, connection, stopOnError = true) {
|
|
145
|
+
const onErrorFlag = stopOnError ? "-v ON_ERROR_STOP=1 " : "";
|
|
146
|
+
execSync(buildPsqlCommand(`${onErrorFlag}-f ${shellQuote(sqlFilePath)}`, connection, options), {
|
|
147
|
+
cwd: dataDir,
|
|
148
|
+
env: connectionEnv(connection),
|
|
149
|
+
encoding: "utf-8",
|
|
150
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
function runPsqlCommand(command, dataDir, options, connection, stopOnError = true) {
|
|
154
|
+
const onErrorFlag = stopOnError ? "-v ON_ERROR_STOP=1 " : "";
|
|
155
|
+
execSync(buildPsqlCommand(`${onErrorFlag}-c ${shellQuote(command)}`, connection, options), {
|
|
156
|
+
cwd: dataDir,
|
|
157
|
+
env: connectionEnv(connection),
|
|
158
|
+
encoding: "utf-8",
|
|
159
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
function runPsqlQuery(command, dataDir, options, connection, stopOnError = true) {
|
|
163
|
+
const onErrorFlag = stopOnError ? "-v ON_ERROR_STOP=1 " : "";
|
|
164
|
+
return execSync(buildPsqlCommand(`${onErrorFlag}-At -c ${shellQuote(command)}`, connection, options), {
|
|
165
|
+
cwd: dataDir,
|
|
166
|
+
env: connectionEnv(connection),
|
|
167
|
+
encoding: "utf-8",
|
|
168
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
function ensureStagingSchemaHasTables(dataset, dataDir, options, connection) {
|
|
172
|
+
const stagingSchema = stagingSchemaName(dataset.database);
|
|
173
|
+
const tableCount = Number.parseInt(runPsqlQuery(`SELECT count(*) FROM pg_tables WHERE schemaname = '${escapeSqlLiteral(stagingSchema)}'`, dataDir, options, connection).trim(), 10);
|
|
174
|
+
assert(tableCount > 0, `Staging schema ${stagingSchema} is empty after importing ${dataset.database}. Aborting incremental merge to protect ${senatSchemaName}.`);
|
|
175
|
+
}
|
|
176
|
+
function ensureStagingDatabase(dataDir, options, runtime) {
|
|
177
|
+
const maintenanceDb = process.env["PGDATABASE"] || "postgres";
|
|
178
|
+
const maintenanceConnection = {
|
|
179
|
+
...runtime.staging,
|
|
180
|
+
name: maintenanceDb,
|
|
181
|
+
};
|
|
182
|
+
try {
|
|
183
|
+
runPsqlCommand(`SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${runtime.staging.name.replace(/'/g, "''")}' AND pid <> pg_backend_pid();`, dataDir, options, maintenanceConnection, false);
|
|
184
|
+
runPsqlCommand(`DROP DATABASE IF EXISTS ${runtime.staging.name}`, dataDir, options, maintenanceConnection, false);
|
|
185
|
+
runPsqlCommand(`CREATE DATABASE ${runtime.staging.name} WITH OWNER ${runtime.staging.user}`, dataDir, options, maintenanceConnection);
|
|
186
|
+
}
|
|
187
|
+
catch (error) {
|
|
188
|
+
if (!canReuseExistingStagingDatabase(error)) {
|
|
189
|
+
throw error;
|
|
190
|
+
}
|
|
191
|
+
if (!options["silent"]) {
|
|
192
|
+
console.warn(`Could not recreate staging database ${runtime.staging.name}; reusing the existing database instead.`);
|
|
193
|
+
}
|
|
194
|
+
ensureDatabaseExists(runtime.staging, dataDir, options);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
function ensureDatabaseExists(connection, dataDir, options) {
|
|
198
|
+
const maintenanceDb = process.env["PGDATABASE"] || "postgres";
|
|
199
|
+
const maintenanceConnection = {
|
|
200
|
+
...connection,
|
|
201
|
+
name: maintenanceDb,
|
|
202
|
+
};
|
|
203
|
+
const exists = runPsqlQuery(`SELECT 1 FROM pg_database WHERE datname = '${escapeSqlLiteral(connection.name)}'`, dataDir, options, maintenanceConnection).trim();
|
|
204
|
+
if (exists === "1") {
|
|
205
|
+
return;
|
|
206
|
+
}
|
|
207
|
+
runPsqlCommand(`CREATE DATABASE ${connection.name} WITH OWNER ${connection.user}`, dataDir, options, maintenanceConnection);
|
|
208
|
+
}
|
|
209
|
+
function ensureForeignStagingServer(dataDir, options, runtime) {
|
|
210
|
+
runPsqlCommand(`CREATE EXTENSION IF NOT EXISTS postgres_fdw`, dataDir, options, runtime.target);
|
|
211
|
+
runPsqlCommand(`DROP SERVER IF EXISTS ${stagingServerName} CASCADE`, dataDir, options, runtime.target, false);
|
|
212
|
+
runPsqlCommand([
|
|
213
|
+
`CREATE SERVER ${stagingServerName}`,
|
|
214
|
+
`FOREIGN DATA WRAPPER postgres_fdw`,
|
|
215
|
+
`OPTIONS (host '${escapeSqlLiteral(runtime.staging.host)}', dbname '${escapeSqlLiteral(runtime.staging.name)}', port '${escapeSqlLiteral(String(runtime.staging.port))}')`,
|
|
216
|
+
].join(" "), dataDir, options, runtime.target);
|
|
217
|
+
runPsqlCommand([
|
|
218
|
+
`CREATE USER MAPPING FOR CURRENT_USER SERVER ${stagingServerName}`,
|
|
219
|
+
`OPTIONS (user '${escapeSqlLiteral(runtime.staging.user)}', password '${escapeSqlLiteral(runtime.staging.password)}')`,
|
|
220
|
+
].join(" "), dataDir, options, runtime.target);
|
|
221
|
+
}
|
|
222
|
+
function cleanupForeignStagingServer(dataDir, options, runtime) {
|
|
223
|
+
runPsqlCommand(`DROP SERVER IF EXISTS ${stagingServerName} CASCADE`, dataDir, options, runtime.target, false);
|
|
224
|
+
}
|
|
225
|
+
function mountForeignStagingSchema(dataset, dataDir, options, runtime) {
|
|
226
|
+
const stagingSchema = stagingSchemaName(dataset.database);
|
|
227
|
+
const importForeignSchemaCommand = `IMPORT FOREIGN SCHEMA ${stagingSchema} FROM SERVER ${stagingServerName} INTO ${stagingSchema}`;
|
|
228
|
+
runPsqlCommand(`DROP SCHEMA IF EXISTS ${stagingSchema} CASCADE`, dataDir, options, runtime.target, false);
|
|
229
|
+
runPsqlCommand(`CREATE SCHEMA ${stagingSchema}`, dataDir, options, runtime.target);
|
|
230
|
+
try {
|
|
231
|
+
runPsqlCommand(importForeignSchemaCommand, dataDir, options, runtime.target);
|
|
232
|
+
}
|
|
233
|
+
catch (error) {
|
|
234
|
+
if (!isMissingForeignServerError(error, stagingServerName)) {
|
|
235
|
+
throw error;
|
|
236
|
+
}
|
|
237
|
+
if (!options["silent"]) {
|
|
238
|
+
console.warn(`Foreign server ${stagingServerName} disappeared before schema import; recreating it.`);
|
|
239
|
+
}
|
|
240
|
+
ensureForeignStagingServer(dataDir, options, runtime);
|
|
241
|
+
runPsqlCommand(importForeignSchemaCommand, dataDir, options, runtime.target);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
function ensureSchemaVersionTable(dataDir, options, runtime) {
|
|
245
|
+
runPsqlCommand(buildEnsureSchemaVersionTableSql(senatSchemaName), dataDir, options, runtime.target);
|
|
246
|
+
}
|
|
247
|
+
function getSchemaStructureFingerprint(dataDir, options, runtime) {
|
|
248
|
+
return runPsqlQuery(buildSchemaStructureFingerprintQuery(senatSchemaName), dataDir, options, runtime.target).trim();
|
|
249
|
+
}
|
|
250
|
+
function getSchemaVersionNumber(dataDir, options, runtime) {
|
|
251
|
+
const version = runPsqlQuery(`SELECT number FROM ${senatSchemaName}.version`, dataDir, options, runtime.target).trim();
|
|
252
|
+
return Number.parseInt(version, 10);
|
|
253
|
+
}
|
|
254
|
+
function bumpSchemaVersionIfNeeded(previousFingerprint, dataDir, options, runtime) {
|
|
255
|
+
const currentFingerprint = getSchemaStructureFingerprint(dataDir, options, runtime);
|
|
256
|
+
if (currentFingerprint !== previousFingerprint) {
|
|
257
|
+
runPsqlCommand(buildIncrementSchemaVersionSql(senatSchemaName), dataDir, options, runtime.target);
|
|
258
|
+
}
|
|
259
|
+
if (!options["silent"]) {
|
|
260
|
+
const versionNumber = getSchemaVersionNumber(dataDir, options, runtime);
|
|
261
|
+
if (currentFingerprint !== previousFingerprint) {
|
|
262
|
+
console.log(`Incremented ${senatSchemaName}.version to ${versionNumber} after schema structure change.`);
|
|
263
|
+
}
|
|
264
|
+
else {
|
|
265
|
+
console.log(`Current ${senatSchemaName}.version: ${versionNumber}.`);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
function finalizeDatasetImport(dataset, dataDir, options, runtime) {
|
|
270
|
+
const postImportFilePath = path.join(dataDir, `${dataset.database}_post_import.sql`);
|
|
271
|
+
const postImportSql = buildIncrementalDatasetImportSql(dataset.database, runtime.target.user, dataset.mergeKeys, dataset.rowMultisetMergeTables);
|
|
272
|
+
const stagingSchema = stagingSchemaName(dataset.database);
|
|
273
|
+
fs.writeFileSync(postImportFilePath, postImportSql, { encoding: "utf8" });
|
|
274
|
+
if (isIncrementalImport(options)) {
|
|
275
|
+
mountForeignStagingSchema(dataset, dataDir, options, runtime);
|
|
276
|
+
}
|
|
277
|
+
runPsqlFile(postImportFilePath, dataDir, options, runtime.target);
|
|
278
|
+
runPsqlCommand(`DROP SCHEMA IF EXISTS ${stagingSchema} CASCADE`, dataDir, options, runtime.target, false);
|
|
279
|
+
}
|
|
280
|
+
function applyStagingMetadataToTarget(dataset, dataDir, options, runtime) {
|
|
281
|
+
const stagingSchema = stagingSchemaName(dataset.database);
|
|
282
|
+
const encodedStatements = runPsqlQuery(buildExportStagingMetadataStatementsQuery(stagingSchema, senatSchemaName), dataDir, options, runtime.staging)
|
|
283
|
+
.split("\n")
|
|
284
|
+
.map((line) => line.trim())
|
|
285
|
+
.filter((line) => line.length > 0);
|
|
286
|
+
for (const encodedStatement of encodedStatements) {
|
|
287
|
+
const statement = Buffer.from(encodedStatement, "hex").toString("utf8");
|
|
288
|
+
runPsqlCommand(statement, dataDir, options, runtime.target);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
function listTablesInSchema(schemaName, dataDir, options, connection) {
|
|
292
|
+
const query = ["SELECT tablename", "FROM pg_tables", `WHERE schemaname = '${schemaName}'`, "ORDER BY tablename"].join("\n");
|
|
293
|
+
const output = runPsqlQuery(query, dataDir, options, connection);
|
|
294
|
+
return output
|
|
295
|
+
.split("\n")
|
|
296
|
+
.map((tableName) => tableName.trim())
|
|
297
|
+
.filter((tableName) => tableName.length > 0);
|
|
298
|
+
}
|
|
299
|
+
function createManagedIndexesInStaging(dataset, dataDir, options, runtime) {
|
|
300
|
+
if (!dataset.indexes) {
|
|
301
|
+
return;
|
|
302
|
+
}
|
|
303
|
+
const stagingSchema = stagingSchemaName(dataset.database);
|
|
304
|
+
const importedTables = new Set(listTablesInSchema(stagingSchema, dataDir, options, runtime.staging));
|
|
305
|
+
runPsqlCommand(`CREATE SCHEMA IF NOT EXISTS ${stagingSchema};`, dataDir, options, runtime.staging);
|
|
306
|
+
for (const [table, indexes] of Object.entries(dataset.indexes)) {
|
|
307
|
+
if (!importedTables.has(table)) {
|
|
308
|
+
if (!options["silent"]) {
|
|
309
|
+
console.warn(`Skipping managed indexes for missing table ${stagingSchema}.${table}`);
|
|
310
|
+
}
|
|
311
|
+
continue;
|
|
312
|
+
}
|
|
313
|
+
for (const index of indexes) {
|
|
314
|
+
const indexName = prefixedName(dataset.database, `${table}_${index.name}`);
|
|
315
|
+
const columns = index.columns.join(", ");
|
|
316
|
+
const sql = `CREATE INDEX IF NOT EXISTS ${indexName} ON ${stagingSchema}.${table} (${columns});`;
|
|
317
|
+
try {
|
|
318
|
+
runPsqlCommand(sql, dataDir, options, runtime.staging);
|
|
319
|
+
}
|
|
320
|
+
catch (err) {
|
|
321
|
+
console.error(`Failed to create managed index ${indexName} on ${stagingSchema}.${table}:`, err);
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
if (!options["silent"]) {
|
|
325
|
+
console.log(`Prepared managed index ${indexName} on ${stagingSchema}.${table}`);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
function listPrefixedTables(dataset, dataDir, options, runtime) {
|
|
331
|
+
const prefix = `${dataset.database}_`;
|
|
332
|
+
const query = [
|
|
333
|
+
"SELECT tablename",
|
|
334
|
+
"FROM pg_tables",
|
|
335
|
+
`WHERE schemaname = '${senatSchemaName}'`,
|
|
336
|
+
` AND tablename LIKE '${prefix.replace(/_/g, "\\_")}%' ESCAPE '\\'`,
|
|
337
|
+
"ORDER BY tablename",
|
|
338
|
+
].join("\n");
|
|
339
|
+
const output = runPsqlQuery(query, dataDir, options, runtime.target);
|
|
340
|
+
return output
|
|
341
|
+
.split("\n")
|
|
342
|
+
.map((tableName) => tableName.trim())
|
|
343
|
+
.filter((tableName) => tableName.length > 0);
|
|
344
|
+
}
|
|
62
345
|
async function downloadFile(url, dest) {
|
|
63
346
|
const response = await fetch(url);
|
|
64
347
|
if (!response.ok) {
|
|
65
348
|
throw new Error(`Download failed ${response.status} ${response.statusText} for ${url}`);
|
|
66
349
|
}
|
|
67
|
-
|
|
350
|
+
assert(response.body, `Empty response body for ${url}`);
|
|
351
|
+
await streamPipeline(Readable.fromWeb(response.body), fs.createWriteStream(dest));
|
|
68
352
|
}
|
|
69
|
-
|
|
70
|
-
* Copy a dataset database to the main Senat database (overwriting its contents).
|
|
71
|
-
*/
|
|
72
|
-
async function copyToSenat(dataset, dataDir, options) {
|
|
353
|
+
async function importIntoStaging(dataset, dataDir, options, runtime) {
|
|
73
354
|
if (!options["silent"]) {
|
|
74
|
-
console.log(`
|
|
355
|
+
console.log(`Importing ${dataset.database} into staging database ${runtime.staging.name}...`);
|
|
75
356
|
}
|
|
76
357
|
const sqlFilePath = path.join(dataDir, `${dataset.database}.sql`);
|
|
77
358
|
const schemaDumpFile = path.join(dataDir, `${dataset.database}_schema_dump.sql`);
|
|
78
|
-
|
|
359
|
+
const normalizeSqlFile = path.join(dataDir, `${dataset.database}_normalize_staging.sql`);
|
|
360
|
+
const stagingSchema = stagingSchemaName(dataset.database);
|
|
79
361
|
const schemaSqlWriter = fs.createWriteStream(schemaDumpFile, { encoding: "utf8" });
|
|
80
|
-
|
|
81
|
-
schemaSqlWriter.write(`
|
|
82
|
-
schemaSqlWriter.write(`CREATE SCHEMA IF NOT EXISTS ${dataset.database};\n`);
|
|
83
|
-
schemaSqlWriter.write(`GRANT USAGE ON SCHEMA ${dataset.database} TO ${config.db.user};\n`);
|
|
84
|
-
schemaSqlWriter.write(`GRANT SELECT ON ALL TABLES IN SCHEMA ${dataset.database} TO ${config.db.user};\n`);
|
|
85
|
-
schemaSqlWriter.write(`ALTER DEFAULT PRIVILEGES IN SCHEMA ${dataset.database} GRANT SELECT ON TABLES TO ${config.db.user};\n`);
|
|
362
|
+
schemaSqlWriter.write(`DROP SCHEMA IF EXISTS ${stagingSchema} CASCADE;\n`);
|
|
363
|
+
schemaSqlWriter.write(`CREATE SCHEMA IF NOT EXISTS ${stagingSchema};\n`);
|
|
86
364
|
const lineReader = readline.createInterface({
|
|
87
365
|
input: fs.createReadStream(sqlFilePath, { encoding: "utf8" }),
|
|
88
366
|
crlfDelay: Infinity,
|
|
89
367
|
});
|
|
368
|
+
let inCopyData = false;
|
|
90
369
|
for await (const line of lineReader) {
|
|
91
|
-
let newLine = line;
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
for (let i = 0; i < parts.length; i++) {
|
|
97
|
-
if (parts[i] === "'") {
|
|
98
|
-
inString = !inString;
|
|
99
|
-
}
|
|
100
|
-
else if (!inString) {
|
|
101
|
-
// Only replace outside of strings, including before comma
|
|
102
|
-
parts[i] = parts[i].replace(/\bpublic\b(?=(\s*\.|\s*[,;]|\s|$))/g, schema);
|
|
103
|
-
}
|
|
370
|
+
let newLine = rewriteLineForStagingImport(line, dataset, stagingSchema, inCopyData);
|
|
371
|
+
if (!inCopyData) {
|
|
372
|
+
newLine = newLine.replace(/SET client_encoding = 'LATIN1';/i, "SET client_encoding = 'UTF8';");
|
|
373
|
+
if (isCopyFromStdinLine(newLine)) {
|
|
374
|
+
inCopyData = true;
|
|
104
375
|
}
|
|
105
|
-
return parts.join("");
|
|
106
376
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
377
|
+
else if (line === "\\.") {
|
|
378
|
+
inCopyData = false;
|
|
379
|
+
}
|
|
110
380
|
schemaSqlWriter.write(newLine + "\n");
|
|
111
381
|
}
|
|
112
382
|
schemaSqlWriter.end();
|
|
113
383
|
await new Promise((resolve, reject) => {
|
|
114
384
|
schemaSqlWriter.on("finish", () => {
|
|
115
385
|
try {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
386
|
+
runPsqlFile(schemaDumpFile, dataDir, options, runtime.staging, false);
|
|
387
|
+
createManagedIndexesInStaging(dataset, dataDir, options, runtime);
|
|
388
|
+
fs.writeFileSync(normalizeSqlFile, buildNormalizeStagingSchemaSql(dataset.database), { encoding: "utf8" });
|
|
389
|
+
runWithRetry(() => runPsqlFile(normalizeSqlFile, dataDir, options, runtime.staging), options, {
|
|
390
|
+
attempts: 4,
|
|
391
|
+
delayMs: 500,
|
|
392
|
+
label: `Staging normalization for ${dataset.database}`,
|
|
393
|
+
})
|
|
394
|
+
.then(() => {
|
|
395
|
+
ensureStagingSchemaHasTables(dataset, dataDir, options, runtime.staging);
|
|
396
|
+
resolve();
|
|
397
|
+
})
|
|
398
|
+
.catch(reject);
|
|
121
399
|
}
|
|
122
400
|
catch (error) {
|
|
401
|
+
const execError = error;
|
|
123
402
|
if (!options["silent"]) {
|
|
124
|
-
console.error(`Failed to import ${dataset.database}
|
|
125
|
-
if (
|
|
126
|
-
console.error(
|
|
403
|
+
console.error(`Failed to import ${dataset.database} data into staging:`);
|
|
404
|
+
if (execError.stderr) {
|
|
405
|
+
console.error(execError.stderr);
|
|
127
406
|
}
|
|
128
|
-
if (
|
|
129
|
-
console.error(
|
|
407
|
+
if (execError.stdout) {
|
|
408
|
+
console.error(execError.stdout);
|
|
130
409
|
}
|
|
131
410
|
}
|
|
411
|
+
reject(error);
|
|
132
412
|
}
|
|
133
|
-
resolve();
|
|
134
413
|
});
|
|
135
414
|
schemaSqlWriter.on("error", reject);
|
|
136
415
|
});
|
|
137
416
|
}
|
|
138
|
-
async function retrieveDataset(dataDir, dataset) {
|
|
417
|
+
async function retrieveDataset(dataDir, dataset, options, runtime) {
|
|
139
418
|
const zipFilename = dataset.url.substring(dataset.url.lastIndexOf("/") + 1);
|
|
140
419
|
const zipFilePath = path.join(dataDir, zipFilename);
|
|
141
420
|
if (options["all"] || options["fetch"]) {
|
|
142
|
-
// Fetch & save ZIP file.
|
|
143
421
|
if (!options["silent"]) {
|
|
144
|
-
console.log(`Loading ${dataset.title}: ${zipFilename}
|
|
422
|
+
console.log(`Loading ${dataset.title}: ${zipFilename}...`);
|
|
145
423
|
}
|
|
146
|
-
// Fetch fails with OpenSSL error: dh key too small.
|
|
147
|
-
// (so does "curl").
|
|
148
|
-
// const response = await fetch(dataset.url)
|
|
149
|
-
// if (!response.ok) {
|
|
150
|
-
// console.error(response.status, response.statusText)
|
|
151
|
-
// console.error(await response.text())
|
|
152
|
-
// throw new Error(`Fetch failed: ${dataset.url}`)
|
|
153
|
-
// }
|
|
154
|
-
// await pipeline(response.body!, fs.createWriteStream(zipFilePath))
|
|
155
424
|
fs.removeSync(zipFilePath);
|
|
156
425
|
await downloadFile(dataset.url, zipFilePath);
|
|
157
426
|
}
|
|
@@ -159,7 +428,7 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
159
428
|
const sqlFilePath = path.join(dataDir, sqlFilename);
|
|
160
429
|
if (options["all"] || options["unzip"]) {
|
|
161
430
|
if (!options["silent"]) {
|
|
162
|
-
console.log(`Unzipping ${dataset.title}: ${zipFilename}
|
|
431
|
+
console.log(`Unzipping ${dataset.title}: ${zipFilename}...`);
|
|
163
432
|
}
|
|
164
433
|
fs.removeSync(sqlFilePath);
|
|
165
434
|
const zip = new StreamZip({
|
|
@@ -168,7 +437,7 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
168
437
|
});
|
|
169
438
|
await new Promise((resolve, reject) => {
|
|
170
439
|
zip.on("ready", () => {
|
|
171
|
-
zip.extract(null, dataDir, (err
|
|
440
|
+
zip.extract(null, dataDir, (err) => {
|
|
172
441
|
zip.close();
|
|
173
442
|
if (err) {
|
|
174
443
|
reject(err);
|
|
@@ -181,27 +450,25 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
181
450
|
});
|
|
182
451
|
if (dataset.repairZip !== undefined) {
|
|
183
452
|
if (!options["silent"]) {
|
|
184
|
-
console.log(`Repairing Zip path ${dataset.title}: ${sqlFilename}
|
|
453
|
+
console.log(`Repairing Zip path ${dataset.title}: ${sqlFilename}...`);
|
|
185
454
|
}
|
|
186
455
|
dataset.repairZip(dataset, dataDir);
|
|
187
456
|
}
|
|
188
457
|
}
|
|
189
458
|
if ((options["all"] || options["repairEncoding"]) && dataset.repairEncoding) {
|
|
190
459
|
if (!options["silent"]) {
|
|
191
|
-
console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}
|
|
460
|
+
console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}...`);
|
|
192
461
|
}
|
|
193
462
|
const repairedSqlFilePath = sqlFilePath + ".repaired";
|
|
194
463
|
const repairedSqlWriter = fs.createWriteStream(repairedSqlFilePath, {
|
|
195
464
|
encoding: "utf8",
|
|
196
465
|
});
|
|
197
|
-
// Read the file as latin1 (ISO-8859-1/CP1252) and write as UTF-8
|
|
198
466
|
const lineReader = readline.createInterface({
|
|
199
467
|
input: fs.createReadStream(sqlFilePath, { encoding: "latin1" }),
|
|
200
468
|
crlfDelay: Infinity,
|
|
201
469
|
});
|
|
202
470
|
for await (const line of lineReader) {
|
|
203
|
-
|
|
204
|
-
let repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
|
|
471
|
+
const repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
|
|
205
472
|
repairedSqlWriter.write(repairedLine + "\n");
|
|
206
473
|
}
|
|
207
474
|
repairedSqlWriter.end();
|
|
@@ -209,103 +476,91 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
209
476
|
}
|
|
210
477
|
if (options["all"] || options["import"] || options["schema"]) {
|
|
211
478
|
if (!options["silent"]) {
|
|
212
|
-
console.log(`
|
|
479
|
+
console.log(`Merging ${dataset.title}: ${sqlFilename} into ${runtime.target.name}.${senatSchemaName}...`);
|
|
213
480
|
}
|
|
214
|
-
await
|
|
215
|
-
|
|
216
|
-
if (
|
|
217
|
-
|
|
218
|
-
for (const index of indexes) {
|
|
219
|
-
const indexName = index.name;
|
|
220
|
-
const columns = index.columns.join(", ");
|
|
221
|
-
const schema = dataset.database;
|
|
222
|
-
const sql = `CREATE INDEX IF NOT EXISTS ${indexName} ON ${schema}.${table} (${columns});`;
|
|
223
|
-
try {
|
|
224
|
-
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d senat -c "${sql}"`, {
|
|
225
|
-
env: process.env,
|
|
226
|
-
encoding: "utf-8",
|
|
227
|
-
stdio: ["ignore", "ignore", "pipe"],
|
|
228
|
-
});
|
|
229
|
-
if (!options["silent"]) {
|
|
230
|
-
console.log(`Created index: ${indexName} on ${schema}.${table} (${columns})`);
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
catch (err) {
|
|
234
|
-
console.error(`Failed to create index ${indexName} on ${schema}.${table}:`, err);
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
}
|
|
481
|
+
await importIntoStaging(dataset, dataDir, options, runtime);
|
|
482
|
+
finalizeDatasetImport(dataset, dataDir, options, runtime);
|
|
483
|
+
if (isIncrementalImport(options)) {
|
|
484
|
+
applyStagingMetadataToTarget(dataset, dataDir, options, runtime);
|
|
238
485
|
}
|
|
239
486
|
}
|
|
240
487
|
if (options["schema"]) {
|
|
241
|
-
|
|
488
|
+
const definitionsDir = path.resolve("src", "raw_types_schemats");
|
|
242
489
|
assert(fs.statSync(definitionsDir).isDirectory());
|
|
243
490
|
if (!options["silent"]) {
|
|
244
|
-
console.log(`Creating TypeScript definitions from
|
|
491
|
+
console.log(`Creating TypeScript definitions from prefixed ${senatSchemaName} tables ` +
|
|
492
|
+
`for '${dataset.database}' in database '${runtime.target.name}'...`);
|
|
245
493
|
}
|
|
246
|
-
const dbConnectionString = `postgres://${
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
494
|
+
const dbConnectionString = `postgres://${runtime.target.user}:${runtime.target.password}` +
|
|
495
|
+
`@${runtime.target.host}:${runtime.target.port}/${runtime.target.name}`;
|
|
496
|
+
const definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
|
|
497
|
+
const tables = listPrefixedTables(dataset, dataDir, options, runtime);
|
|
498
|
+
const tableOptions = tables.map((tableName) => `-t ${tableName}`).join(" ");
|
|
499
|
+
execSync(`npx schemats generate -c ${dbConnectionString} -s ${senatSchemaName} ${tableOptions} -o ${definitionFilePath}`, {
|
|
250
500
|
env: process.env,
|
|
251
501
|
encoding: "utf-8",
|
|
252
|
-
// stdio: ["ignore", "ignore", "pipe"],
|
|
253
502
|
});
|
|
254
503
|
const definition = fs.readFileSync(definitionFilePath, { encoding: "utf8" });
|
|
255
|
-
const definitionRepaired = definition
|
|
256
|
-
.replace(/\r\n/g, "\n")
|
|
257
|
-
.replace(/AUTO-GENERATED FILE @ \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}/, "AUTO-GENERATED FILE");
|
|
504
|
+
const definitionRepaired = normalizeGeneratedDefinition(definition, dataset.database);
|
|
258
505
|
fs.writeFileSync(definitionFilePath, definitionRepaired);
|
|
259
|
-
definitionsDir = path.resolve("src", "raw_types");
|
|
260
|
-
definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
|
|
261
|
-
execSync(`npx kysely-codegen --url '${dbConnectionString}' --default-schema ${dataset.database} --include-pattern '${dataset.database}.*' --out-file ${definitionFilePath}`, {
|
|
262
|
-
env: process.env,
|
|
263
|
-
encoding: "utf-8",
|
|
264
|
-
// stdio: ["ignore", "ignore", "pipe"],
|
|
265
|
-
});
|
|
266
506
|
}
|
|
267
507
|
}
|
|
508
|
+
function buildRuntimeContext() {
|
|
509
|
+
const target = {
|
|
510
|
+
host: config.db.host,
|
|
511
|
+
name: config.db.name,
|
|
512
|
+
password: config.db.password,
|
|
513
|
+
port: config.db.port,
|
|
514
|
+
user: config.db.user,
|
|
515
|
+
};
|
|
516
|
+
if (!isIncrementalImport(options)) {
|
|
517
|
+
return {
|
|
518
|
+
staging: target,
|
|
519
|
+
target,
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
const staging = {
|
|
523
|
+
host: config.stagingDb.host,
|
|
524
|
+
name: config.stagingDb.name,
|
|
525
|
+
password: config.stagingDb.password,
|
|
526
|
+
port: config.stagingDb.port,
|
|
527
|
+
user: config.stagingDb.user,
|
|
528
|
+
};
|
|
529
|
+
return { staging, target };
|
|
530
|
+
}
|
|
268
531
|
async function retrieveOpenData() {
|
|
269
532
|
const dataDir = options["dataDir"];
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
PGPASSWORD: process.env["PGPASSWORD"] || config.db.password,
|
|
278
|
-
};
|
|
279
|
-
assert(process.env["PGHOST"] && process.env["PGPORT"] && process.env["PGUSER"] && process.env["PGPASSWORD"], "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
|
|
280
|
-
console.time("data extraction time");
|
|
281
|
-
// Create role 'opendata' if it does not exist
|
|
282
|
-
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE ROLE opendata" || true`, {
|
|
283
|
-
cwd: dataDir,
|
|
284
|
-
env: process.env,
|
|
285
|
-
encoding: "utf-8",
|
|
286
|
-
});
|
|
287
|
-
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE DATABASE senat WITH OWNER opendata" || true`, {
|
|
288
|
-
cwd: dataDir,
|
|
289
|
-
env: process.env,
|
|
290
|
-
encoding: "utf-8",
|
|
291
|
-
});
|
|
292
|
-
const enabledDatasets = getEnabledDatasets(options["categories"]);
|
|
293
|
-
const chosenDatasets = getChosenDatasets(enabledDatasets);
|
|
294
|
-
for (const dataset of chosenDatasets) {
|
|
295
|
-
await retrieveDataset(dataDir, dataset);
|
|
533
|
+
assertExistingDirectory(dataDir, "data directory");
|
|
534
|
+
assert(!options["sudo"] || !options["sudo"].startsWith("-"), "Option --sudo expects a Unix user name, for example: --sudo postgres");
|
|
535
|
+
const runtime = buildRuntimeContext();
|
|
536
|
+
assert(runtime.target.host && runtime.target.port && runtime.target.user && runtime.target.password, "Missing target database configuration: DB_* in .env file");
|
|
537
|
+
if (isIncrementalImport(options)) {
|
|
538
|
+
assert(runtime.staging.host && runtime.staging.port && runtime.staging.user && runtime.staging.password, "Missing staging database configuration: STAGING_DB_* in .env file");
|
|
539
|
+
assert(runtime.target.name !== runtime.staging.name, "Target and staging databases must be different");
|
|
296
540
|
}
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
541
|
+
console.time("data extraction time");
|
|
542
|
+
try {
|
|
543
|
+
ensureDatabaseExists(runtime.target, dataDir, options);
|
|
544
|
+
if (isIncrementalImport(options)) {
|
|
545
|
+
ensureStagingDatabase(dataDir, options, runtime);
|
|
546
|
+
ensureForeignStagingServer(dataDir, options, runtime);
|
|
547
|
+
}
|
|
548
|
+
ensureSchemaVersionTable(dataDir, options, runtime);
|
|
549
|
+
const initialSchemaFingerprint = getSchemaStructureFingerprint(dataDir, options, runtime);
|
|
550
|
+
const enabledDatasets = getEnabledDatasets(options["categories"]);
|
|
551
|
+
const chosenDatasets = getChosenDatasets(enabledDatasets);
|
|
552
|
+
for (const dataset of chosenDatasets) {
|
|
553
|
+
await retrieveDataset(dataDir, dataset, options, runtime);
|
|
554
|
+
}
|
|
555
|
+
bumpSchemaVersionIfNeeded(initialSchemaFingerprint, dataDir, options, runtime);
|
|
306
556
|
}
|
|
307
|
-
|
|
308
|
-
|
|
557
|
+
finally {
|
|
558
|
+
if (isIncrementalImport(options)) {
|
|
559
|
+
cleanupForeignStagingServer(dataDir, options, runtime);
|
|
560
|
+
}
|
|
561
|
+
if (!options["silent"]) {
|
|
562
|
+
console.timeEnd("data extraction time");
|
|
563
|
+
}
|
|
309
564
|
}
|
|
310
565
|
}
|
|
311
566
|
retrieveOpenData()
|