@tricoteuses/senat 2.22.16 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +168 -0
- package/lib/aggregates.d.ts +52 -0
- package/lib/aggregates.js +930 -0
- package/lib/aggregates.mjs +713 -0
- package/lib/aggregates.ts +833 -0
- package/lib/config.d.ts +10 -0
- package/lib/config.js +16 -0
- package/lib/config.mjs +16 -0
- package/lib/config.ts +26 -0
- package/lib/databases.d.ts +2 -0
- package/lib/databases.js +26 -0
- package/lib/databases.mjs +57 -0
- package/lib/databases.ts +71 -0
- package/lib/datasets.d.ts +34 -0
- package/lib/datasets.js +233 -0
- package/lib/datasets.mjs +78 -0
- package/lib/datasets.ts +118 -0
- package/lib/fields.d.ts +10 -0
- package/lib/fields.js +68 -0
- package/lib/fields.mjs +22 -0
- package/lib/fields.ts +29 -0
- package/lib/git.d.ts +26 -0
- package/lib/git.js +167 -0
- package/lib/index.d.ts +13 -0
- package/lib/index.js +1 -0
- package/lib/index.mjs +7 -0
- package/lib/index.ts +64 -0
- package/lib/inserters.d.ts +98 -0
- package/lib/inserters.js +500 -0
- package/lib/inserters.mjs +360 -0
- package/lib/inserters.ts +521 -0
- package/lib/legislatures.json +38 -0
- package/lib/loaders.d.ts +58 -0
- package/lib/loaders.js +286 -0
- package/lib/loaders.mjs +158 -0
- package/lib/loaders.ts +271 -0
- package/lib/model/agenda.d.ts +6 -0
- package/lib/model/agenda.js +148 -0
- package/lib/model/ameli.d.ts +51 -0
- package/lib/model/ameli.js +149 -0
- package/lib/model/ameli.mjs +84 -0
- package/lib/model/ameli.ts +100 -0
- package/lib/model/commission.d.ts +18 -0
- package/lib/model/commission.js +269 -0
- package/lib/model/debats.d.ts +67 -0
- package/lib/model/debats.js +95 -0
- package/lib/model/debats.mjs +43 -0
- package/lib/model/debats.ts +68 -0
- package/lib/model/documents.d.ts +12 -0
- package/lib/model/documents.js +151 -0
- package/lib/model/dosleg.d.ts +7 -0
- package/lib/model/dosleg.js +326 -0
- package/lib/model/dosleg.mjs +196 -0
- package/lib/model/dosleg.ts +240 -0
- package/lib/model/index.d.ts +7 -0
- package/lib/model/index.js +7 -0
- package/lib/model/index.mjs +5 -0
- package/lib/model/index.ts +15 -0
- package/lib/model/questions.d.ts +45 -0
- package/lib/model/questions.js +89 -0
- package/lib/model/questions.mjs +71 -0
- package/lib/model/questions.ts +93 -0
- package/lib/model/scrutins.d.ts +13 -0
- package/lib/model/scrutins.js +114 -0
- package/lib/model/seance.d.ts +3 -0
- package/lib/model/seance.js +267 -0
- package/lib/model/sens.d.ts +146 -0
- package/lib/model/sens.js +454 -0
- package/lib/model/sens.mjs +415 -0
- package/lib/model/sens.ts +516 -0
- package/lib/model/texte.d.ts +7 -0
- package/lib/model/texte.js +256 -0
- package/lib/model/texte.mjs +208 -0
- package/lib/model/texte.ts +229 -0
- package/lib/model/util.d.ts +9 -0
- package/lib/model/util.js +38 -0
- package/lib/model/util.mjs +19 -0
- package/lib/model/util.ts +32 -0
- package/lib/parsers/texte.d.ts +7 -0
- package/lib/parsers/texte.js +228 -0
- package/lib/raw_types/ameli.d.ts +914 -0
- package/lib/raw_types/ameli.js +5 -0
- package/lib/raw_types/ameli.mjs +163 -0
- package/lib/raw_types/debats.d.ts +207 -0
- package/lib/raw_types/debats.js +5 -0
- package/lib/raw_types/debats.mjs +58 -0
- package/lib/raw_types/dosleg.d.ts +1619 -0
- package/lib/raw_types/dosleg.js +5 -0
- package/lib/raw_types/dosleg.mjs +438 -0
- package/lib/raw_types/questions.d.ts +419 -0
- package/lib/raw_types/questions.js +5 -0
- package/lib/raw_types/questions.mjs +11 -0
- package/lib/raw_types/senat.d.ts +11368 -0
- package/lib/raw_types/senat.js +5 -0
- package/lib/raw_types/sens.d.ts +8248 -0
- package/lib/raw_types/sens.js +5 -0
- package/lib/raw_types/sens.mjs +508 -0
- package/lib/raw_types_kysely/ameli.d.ts +915 -0
- package/lib/raw_types_kysely/ameli.js +7 -0
- package/lib/raw_types_kysely/ameli.mjs +5 -0
- package/lib/raw_types_kysely/ameli.ts +951 -0
- package/lib/raw_types_kysely/debats.d.ts +207 -0
- package/lib/raw_types_kysely/debats.js +7 -0
- package/lib/raw_types_kysely/debats.mjs +5 -0
- package/lib/raw_types_kysely/debats.ts +222 -0
- package/lib/raw_types_kysely/dosleg.d.ts +3532 -0
- package/lib/raw_types_kysely/dosleg.js +7 -0
- package/lib/raw_types_kysely/dosleg.mjs +5 -0
- package/lib/raw_types_kysely/dosleg.ts +3621 -0
- package/lib/raw_types_kysely/questions.d.ts +414 -0
- package/lib/raw_types_kysely/questions.js +7 -0
- package/lib/raw_types_kysely/questions.mjs +5 -0
- package/lib/raw_types_kysely/questions.ts +426 -0
- package/lib/raw_types_kysely/sens.d.ts +4394 -0
- package/lib/raw_types_kysely/sens.js +7 -0
- package/lib/raw_types_kysely/sens.mjs +5 -0
- package/lib/raw_types_kysely/sens.ts +4499 -0
- package/lib/raw_types_schemats/ameli.d.ts +539 -0
- package/lib/raw_types_schemats/ameli.js +2 -0
- package/lib/raw_types_schemats/ameli.mjs +2 -0
- package/lib/raw_types_schemats/ameli.ts +601 -0
- package/lib/raw_types_schemats/debats.d.ts +127 -0
- package/lib/raw_types_schemats/debats.js +2 -0
- package/lib/raw_types_schemats/debats.mjs +2 -0
- package/lib/raw_types_schemats/debats.ts +145 -0
- package/lib/raw_types_schemats/dosleg.d.ts +977 -0
- package/lib/raw_types_schemats/dosleg.js +2 -0
- package/lib/raw_types_schemats/dosleg.mjs +2 -0
- package/lib/raw_types_schemats/dosleg.ts +2193 -0
- package/lib/raw_types_schemats/questions.d.ts +235 -0
- package/lib/raw_types_schemats/questions.js +2 -0
- package/lib/raw_types_schemats/questions.mjs +2 -0
- package/lib/raw_types_schemats/questions.ts +249 -0
- package/lib/raw_types_schemats/sens.d.ts +6915 -0
- package/lib/raw_types_schemats/sens.js +2 -0
- package/lib/raw_types_schemats/sens.mjs +2 -0
- package/lib/raw_types_schemats/sens.ts +2907 -0
- package/lib/scripts/convert_data.d.ts +1 -0
- package/lib/scripts/convert_data.js +354 -0
- package/lib/scripts/convert_data.mjs +181 -0
- package/lib/scripts/convert_data.ts +243 -0
- package/lib/scripts/data-download.d.ts +1 -0
- package/lib/scripts/data-download.js +12 -0
- package/lib/scripts/datautil.d.ts +8 -0
- package/lib/scripts/datautil.js +34 -0
- package/lib/scripts/datautil.mjs +16 -0
- package/lib/scripts/datautil.ts +19 -0
- package/lib/scripts/images/transparent_150x192.jpg +0 -0
- package/lib/scripts/images/transparent_155x225.jpg +0 -0
- package/lib/scripts/parse_textes.d.ts +1 -0
- package/lib/scripts/parse_textes.js +44 -0
- package/lib/scripts/parse_textes.mjs +46 -0
- package/lib/scripts/parse_textes.ts +65 -0
- package/lib/scripts/retrieve_agenda.d.ts +1 -0
- package/lib/scripts/retrieve_agenda.js +132 -0
- package/lib/scripts/retrieve_cr_commission.d.ts +1 -0
- package/lib/scripts/retrieve_cr_commission.js +364 -0
- package/lib/scripts/retrieve_cr_seance.d.ts +6 -0
- package/lib/scripts/retrieve_cr_seance.js +347 -0
- package/lib/scripts/retrieve_documents.d.ts +3 -0
- package/lib/scripts/retrieve_documents.js +219 -0
- package/lib/scripts/retrieve_documents.mjs +249 -0
- package/lib/scripts/retrieve_documents.ts +298 -0
- package/lib/scripts/retrieve_open_data.d.ts +1 -0
- package/lib/scripts/retrieve_open_data.js +315 -0
- package/lib/scripts/retrieve_open_data.mjs +217 -0
- package/lib/scripts/retrieve_open_data.ts +268 -0
- package/lib/scripts/retrieve_senateurs_photos.d.ts +1 -0
- package/lib/scripts/retrieve_senateurs_photos.js +147 -0
- package/lib/scripts/retrieve_senateurs_photos.mjs +147 -0
- package/lib/scripts/retrieve_senateurs_photos.ts +177 -0
- package/lib/scripts/retrieve_videos.d.ts +1 -0
- package/lib/scripts/retrieve_videos.js +461 -0
- package/lib/scripts/shared/cli_helpers.d.ts +95 -0
- package/lib/scripts/shared/cli_helpers.js +91 -0
- package/lib/scripts/shared/cli_helpers.ts +36 -0
- package/lib/scripts/shared/util.d.ts +4 -0
- package/lib/scripts/shared/util.js +35 -0
- package/lib/scripts/shared/util.ts +33 -0
- package/lib/scripts/test_iter_load.d.ts +1 -0
- package/lib/scripts/test_iter_load.js +12 -0
- package/lib/src/config.d.ts +22 -0
- package/lib/src/config.js +17 -7
- package/lib/src/conversion_textes.js +5 -1
- package/lib/src/databases.d.ts +2 -1
- package/lib/src/databases_postgres.d.ts +4 -0
- package/lib/src/databases_postgres.js +23 -0
- package/lib/src/datasets.d.ts +4 -0
- package/lib/src/datasets.js +16 -2
- package/lib/src/git.d.ts +1 -0
- package/lib/src/git.js +45 -11
- package/lib/src/index.d.ts +19 -8
- package/lib/src/index.js +6 -1
- package/lib/src/loaders.js +10 -4
- package/lib/src/model/agenda.js +2 -2
- package/lib/src/model/ameli.d.ts +64 -52
- package/lib/src/model/ameli.js +147 -145
- package/lib/src/model/ameli_postgres.d.ts +67 -0
- package/lib/src/model/ameli_postgres.js +150 -0
- package/lib/src/model/commission.d.ts +3 -2
- package/lib/src/model/commission.js +2 -2
- package/lib/src/model/debats.d.ts +38 -66
- package/lib/src/model/debats.js +110 -93
- package/lib/src/model/documents.d.ts +32 -12
- package/lib/src/model/documents.js +171 -130
- package/lib/src/model/dosleg.d.ts +142 -5
- package/lib/src/model/dosleg.js +298 -156
- package/lib/src/model/questions.d.ts +54 -45
- package/lib/src/model/questions.js +89 -87
- package/lib/src/model/scrutins.d.ts +48 -13
- package/lib/src/model/scrutins.js +118 -111
- package/lib/src/model/seance.js +3 -3
- package/lib/src/model/sens.d.ts +109 -179
- package/lib/src/model/sens.js +384 -484
- package/lib/src/model/util.d.ts +0 -8
- package/lib/src/model/util.js +0 -23
- package/lib/src/parsers/texte.js +7 -7
- package/lib/src/raw_types/ameli.d.ts +1651 -803
- package/lib/src/raw_types/ameli.js +1816 -5
- package/lib/src/raw_types/debats.d.ts +353 -180
- package/lib/src/raw_types/debats.js +517 -5
- package/lib/src/raw_types/dosleg.d.ts +2862 -1527
- package/lib/src/raw_types/dosleg.js +4354 -5
- package/lib/src/raw_types/questions.d.ts +671 -395
- package/lib/src/raw_types/questions.js +1303 -5
- package/lib/src/raw_types/sens.d.ts +7743 -8148
- package/lib/src/raw_types/sens.js +10429 -5
- package/lib/src/raw_types_schemats/ameli.d.ts +4 -2
- package/lib/src/raw_types_schemats/debats.d.ts +2 -2
- package/lib/src/raw_types_schemats/dosleg.d.ts +2 -2
- package/lib/src/raw_types_schemats/questions.d.ts +2 -2
- package/lib/src/raw_types_schemats/sens.d.ts +10 -4216
- package/lib/src/scripts/convert_data.js +7 -6
- package/lib/src/scripts/convert_xml_to_html.js +2 -2
- package/lib/src/scripts/data-download.js +3 -2
- package/lib/src/scripts/retrieve_agenda.js +21 -9
- package/lib/src/scripts/retrieve_cr_commission.js +17 -17
- package/lib/src/scripts/retrieve_cr_seance.d.ts +14 -1
- package/lib/src/scripts/retrieve_cr_seance.js +10 -11
- package/lib/src/scripts/retrieve_documents.d.ts +11 -2
- package/lib/src/scripts/retrieve_documents.js +25 -14
- package/lib/src/scripts/retrieve_open_data.js +514 -153
- package/lib/src/scripts/retrieve_senateurs_photos.js +25 -11
- package/lib/src/scripts/retrieve_videos.js +12 -11
- package/lib/src/scripts/shared/cli_helpers.d.ts +1 -6
- package/lib/src/scripts/shared/cli_helpers.js +9 -8
- package/lib/src/scripts/shared/incremental_import_sql.d.ts +2 -0
- package/lib/src/scripts/shared/incremental_import_sql.js +894 -0
- package/lib/src/scripts/shared/prefixed_tables.d.ts +10 -0
- package/lib/src/scripts/shared/prefixed_tables.js +36 -0
- package/lib/src/scripts/shared/schema_version.d.ts +3 -0
- package/lib/src/scripts/shared/schema_version.js +97 -0
- package/lib/src/scripts/shared/staging_import.d.ts +3 -0
- package/lib/src/scripts/shared/staging_import.js +80 -0
- package/lib/src/scripts/shared/staging_metadata_sql.d.ts +1 -0
- package/lib/src/scripts/shared/staging_metadata_sql.js +221 -0
- package/lib/src/scripts/validate_prefixed_tables.d.ts +1 -0
- package/lib/src/scripts/validate_prefixed_tables.js +101 -0
- package/lib/src/types/ameli.d.ts +4 -4
- package/lib/src/types/debats.d.ts +2 -2
- package/lib/src/types/dosleg.d.ts +39 -39
- package/lib/src/types/questions.d.ts +2 -2
- package/lib/src/types/sens.d.ts +0 -2
- package/lib/src/types/texte.d.ts +1 -1
- package/lib/src/utils/cr_spliting.d.ts +9 -6
- package/lib/src/utils/cr_spliting.js +6 -101
- package/lib/src/utils/reunion_odj_building.d.ts +7 -3
- package/lib/src/utils/reunion_parsing.d.ts +2 -1
- package/lib/src/utils/reunion_parsing.js +2 -2
- package/lib/src/videos/match.js +8 -5
- package/lib/src/videos/pipeline.d.ts +6 -2
- package/lib/src/videos/pipeline.js +21 -8
- package/lib/src/videos/search.js +6 -2
- package/lib/strings.d.ts +1 -0
- package/lib/strings.js +18 -0
- package/lib/strings.mjs +18 -0
- package/lib/strings.ts +26 -0
- package/lib/tests/incrementalImportSql.test.d.ts +1 -0
- package/lib/tests/incrementalImportSql.test.js +155 -0
- package/lib/tests/prefixedTables.test.d.ts +1 -0
- package/lib/tests/prefixedTables.test.js +22 -0
- package/lib/tests/schemaVersion.test.d.ts +1 -0
- package/lib/tests/schemaVersion.test.js +23 -0
- package/lib/tests/validatePrefixedTables.test.d.ts +1 -0
- package/lib/tests/validatePrefixedTables.test.js +14 -0
- package/lib/types/agenda.d.ts +44 -0
- package/lib/types/agenda.js +1 -0
- package/lib/types/ameli.d.ts +5 -0
- package/lib/types/ameli.js +1 -0
- package/lib/types/ameli.mjs +13 -0
- package/lib/types/ameli.ts +21 -0
- package/lib/types/compte_rendu.d.ts +83 -0
- package/lib/types/compte_rendu.js +1 -0
- package/lib/types/debats.d.ts +2 -0
- package/lib/types/debats.js +1 -0
- package/lib/types/debats.mjs +2 -0
- package/lib/types/debats.ts +6 -0
- package/lib/types/dosleg.d.ts +70 -0
- package/lib/types/dosleg.js +1 -0
- package/lib/types/dosleg.mjs +151 -0
- package/lib/types/dosleg.ts +284 -0
- package/lib/types/questions.d.ts +2 -0
- package/lib/types/questions.js +1 -0
- package/lib/types/questions.mjs +1 -0
- package/lib/types/questions.ts +3 -0
- package/lib/types/sens.d.ts +10 -0
- package/lib/types/sens.js +1 -0
- package/lib/types/sens.mjs +1 -0
- package/lib/types/sens.ts +12 -0
- package/lib/types/sessions.d.ts +5 -0
- package/lib/types/sessions.js +84 -0
- package/lib/types/sessions.mjs +43 -0
- package/lib/types/sessions.ts +42 -0
- package/lib/types/texte.d.ts +74 -0
- package/lib/types/texte.js +16 -0
- package/lib/types/texte.mjs +16 -0
- package/lib/types/texte.ts +76 -0
- package/lib/typings/windows-1252.d.js +2 -0
- package/lib/typings/windows-1252.d.mjs +2 -0
- package/lib/typings/windows-1252.d.ts +11 -0
- package/lib/utils/cr_spliting.d.ts +28 -0
- package/lib/utils/cr_spliting.js +265 -0
- package/lib/utils/date.d.ts +10 -0
- package/lib/utils/date.js +100 -0
- package/lib/utils/nvs-timecode.d.ts +7 -0
- package/lib/utils/nvs-timecode.js +79 -0
- package/lib/utils/reunion_grouping.d.ts +9 -0
- package/lib/utils/reunion_grouping.js +361 -0
- package/lib/utils/reunion_odj_building.d.ts +5 -0
- package/lib/utils/reunion_odj_building.js +154 -0
- package/lib/utils/reunion_parsing.d.ts +23 -0
- package/lib/utils/reunion_parsing.js +209 -0
- package/lib/utils/scoring.d.ts +14 -0
- package/lib/utils/scoring.js +147 -0
- package/lib/utils/string_cleaning.d.ts +7 -0
- package/lib/utils/string_cleaning.js +57 -0
- package/lib/validators/config.d.ts +9 -0
- package/lib/validators/config.js +10 -0
- package/lib/validators/config.mjs +54 -0
- package/lib/validators/config.ts +79 -0
- package/lib/validators/senat.d.ts +0 -0
- package/lib/validators/senat.js +28 -0
- package/lib/validators/senat.mjs +24 -0
- package/lib/validators/senat.ts +26 -0
- package/package.json +11 -11
|
@@ -1,16 +1,23 @@
|
|
|
1
1
|
import assert from "assert";
|
|
2
|
-
import {
|
|
2
|
+
import { execFileSync } from "child_process";
|
|
3
3
|
import commandLineArgs from "command-line-args";
|
|
4
4
|
import fs from "fs-extra";
|
|
5
|
+
import { formatWithPrettier, makePgTsGenerator, markAsGenerated, processDatabase } from "kanel";
|
|
6
|
+
import { makeGenerateZodSchemas } from "kanel-zod";
|
|
5
7
|
import path from "path";
|
|
6
8
|
import StreamZip from "node-stream-zip";
|
|
7
9
|
import readline from "readline";
|
|
8
|
-
import
|
|
9
|
-
import { pipeline } from "stream";
|
|
10
|
+
import { pipeline, Readable } from "stream";
|
|
10
11
|
import { promisify } from "util";
|
|
12
|
+
import * as windows1252 from "windows-1252";
|
|
11
13
|
import config from "../config";
|
|
12
14
|
import { getChosenDatasets, getEnabledDatasets } from "../datasets";
|
|
13
|
-
import { commonOptions } from "./shared/cli_helpers";
|
|
15
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
16
|
+
import { buildIncrementalDatasetImportSql, buildNormalizeStagingSchemaSql } from "./shared/incremental_import_sql";
|
|
17
|
+
import { buildGeneratedTableManifest, getGeneratedDefinitionPath, getGeneratedTableManifestPath, prefixedName, rawTypesDir, senatSchemaName, stagingSchemaName, stripDatasetPrefix, } from "./shared/prefixed_tables";
|
|
18
|
+
import { buildEnsureSchemaVersionTableSql, buildIncrementSchemaVersionSql, buildSchemaStructureFingerprintQuery, } from "./shared/schema_version";
|
|
19
|
+
import { buildExportStagingMetadataStatementsQuery } from "./shared/staging_metadata_sql";
|
|
20
|
+
import { isCopyFromStdinLine, rewriteLineForStagingImport } from "./shared/staging_import";
|
|
14
21
|
const badWindows1252CharacterRegex = /[\u0080-\u009f]/g;
|
|
15
22
|
const optionsDefinitions = [
|
|
16
23
|
...commonOptions,
|
|
@@ -22,7 +29,7 @@ const optionsDefinitions = [
|
|
|
22
29
|
},
|
|
23
30
|
{
|
|
24
31
|
alias: "c",
|
|
25
|
-
help: "create TypeScript interfaces
|
|
32
|
+
help: "create TypeScript interfaces and Zod schemas from database tables into src/raw_types",
|
|
26
33
|
name: "schema",
|
|
27
34
|
type: Boolean,
|
|
28
35
|
},
|
|
@@ -38,15 +45,20 @@ const optionsDefinitions = [
|
|
|
38
45
|
name: "fetch",
|
|
39
46
|
type: Boolean,
|
|
40
47
|
},
|
|
48
|
+
{
|
|
49
|
+
help: "use separate staging database and postgres_fdw for incremental merge into target schema",
|
|
50
|
+
name: "incremental",
|
|
51
|
+
type: Boolean,
|
|
52
|
+
},
|
|
41
53
|
{
|
|
42
54
|
alias: "i",
|
|
43
|
-
help: "import SQL dumps into
|
|
55
|
+
help: "import SQL dumps into PostgreSQL",
|
|
44
56
|
name: "import",
|
|
45
57
|
type: Boolean,
|
|
46
58
|
},
|
|
47
59
|
{
|
|
48
60
|
alias: "S",
|
|
49
|
-
help: "sudo psql commands with given user",
|
|
61
|
+
help: "sudo psql commands with given user (example: --sudo postgres)",
|
|
50
62
|
name: "sudo",
|
|
51
63
|
type: String,
|
|
52
64
|
},
|
|
@@ -59,99 +71,473 @@ const optionsDefinitions = [
|
|
|
59
71
|
];
|
|
60
72
|
const options = commandLineArgs(optionsDefinitions);
|
|
61
73
|
const streamPipeline = promisify(pipeline);
|
|
74
|
+
const stagingServerName = "staging_server";
|
|
75
|
+
function isIncrementalImport(options) {
|
|
76
|
+
return options["incremental"] === true;
|
|
77
|
+
}
|
|
78
|
+
function connectionEnv(connection) {
|
|
79
|
+
return {
|
|
80
|
+
...process.env,
|
|
81
|
+
PGDATABASE: connection.name,
|
|
82
|
+
PGHOST: connection.host,
|
|
83
|
+
PGPASSWORD: connection.password,
|
|
84
|
+
PGPORT: String(connection.port),
|
|
85
|
+
PGUSER: connection.user,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
function escapeSqlLiteral(value) {
|
|
89
|
+
return value.replace(/'/g, "''");
|
|
90
|
+
}
|
|
91
|
+
function sleep(delayMs) {
|
|
92
|
+
return new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
93
|
+
}
|
|
94
|
+
function getExecSyncErrorOutput(error) {
|
|
95
|
+
const execError = error;
|
|
96
|
+
return `${execError.stderr ?? ""}\n${execError.stdout ?? ""}`;
|
|
97
|
+
}
|
|
98
|
+
function isRetryablePostgresError(error) {
|
|
99
|
+
const output = getExecSyncErrorOutput(error);
|
|
100
|
+
return (output.includes("deadlock detected") ||
|
|
101
|
+
output.includes("could not obtain lock") ||
|
|
102
|
+
output.includes("canceling statement due to lock timeout") ||
|
|
103
|
+
output.includes("could not serialize access"));
|
|
104
|
+
}
|
|
105
|
+
function canReuseExistingStagingDatabase(error) {
|
|
106
|
+
const output = getExecSyncErrorOutput(error);
|
|
107
|
+
return (output.includes("permission denied to terminate process") ||
|
|
108
|
+
output.includes("database is being accessed by other users") ||
|
|
109
|
+
output.includes("cannot drop the currently open database"));
|
|
110
|
+
}
|
|
111
|
+
function isMissingForeignServerError(error, serverName) {
|
|
112
|
+
return getExecSyncErrorOutput(error).includes(`server "${serverName}" does not exist`);
|
|
113
|
+
}
|
|
114
|
+
async function runWithRetry(operation, options, retryOptions) {
|
|
115
|
+
let attempt = 1;
|
|
116
|
+
let delayMs = retryOptions.delayMs;
|
|
117
|
+
while (true) {
|
|
118
|
+
try {
|
|
119
|
+
return operation();
|
|
120
|
+
}
|
|
121
|
+
catch (error) {
|
|
122
|
+
if (!isRetryablePostgresError(error) || attempt >= retryOptions.attempts) {
|
|
123
|
+
throw error;
|
|
124
|
+
}
|
|
125
|
+
if (!options["silent"]) {
|
|
126
|
+
console.warn(`${retryOptions.label} hit a transient PostgreSQL lock error ` +
|
|
127
|
+
`on attempt ${attempt}/${retryOptions.attempts}; retrying in ${delayMs}ms...`);
|
|
128
|
+
}
|
|
129
|
+
await sleep(delayMs);
|
|
130
|
+
attempt += 1;
|
|
131
|
+
delayMs *= 2;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
function buildPsqlInvocation(baseArgs, connection, options) {
|
|
136
|
+
const psqlArgs = [
|
|
137
|
+
"--quiet",
|
|
138
|
+
"-h",
|
|
139
|
+
connection.host,
|
|
140
|
+
"-p",
|
|
141
|
+
String(connection.port),
|
|
142
|
+
"-U",
|
|
143
|
+
connection.user,
|
|
144
|
+
"-d",
|
|
145
|
+
connection.name,
|
|
146
|
+
...baseArgs,
|
|
147
|
+
];
|
|
148
|
+
if (!options["sudo"]) {
|
|
149
|
+
return { command: "psql", args: psqlArgs };
|
|
150
|
+
}
|
|
151
|
+
return {
|
|
152
|
+
command: "sudo",
|
|
153
|
+
args: ["-u", options["sudo"], "psql", ...psqlArgs],
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
function runPsqlFile(sqlFilePath, dataDir, options, connection, stopOnError = true) {
|
|
157
|
+
const { command, args } = buildPsqlInvocation([...(stopOnError ? ["-v", "ON_ERROR_STOP=1"] : []), "-f", sqlFilePath], connection, options);
|
|
158
|
+
execFileSync(command, args, {
|
|
159
|
+
cwd: dataDir,
|
|
160
|
+
env: connectionEnv(connection),
|
|
161
|
+
encoding: "utf-8",
|
|
162
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
function runPsqlCommand(command, dataDir, options, connection, stopOnError = true) {
|
|
166
|
+
const psqlCommand = command;
|
|
167
|
+
const { command: binary, args } = buildPsqlInvocation([...(stopOnError ? ["-v", "ON_ERROR_STOP=1"] : []), "-c", psqlCommand], connection, options);
|
|
168
|
+
execFileSync(binary, args, {
|
|
169
|
+
cwd: dataDir,
|
|
170
|
+
env: connectionEnv(connection),
|
|
171
|
+
encoding: "utf-8",
|
|
172
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
function runPsqlQuery(command, dataDir, options, connection, stopOnError = true) {
|
|
176
|
+
const psqlCommand = command;
|
|
177
|
+
const { command: binary, args } = buildPsqlInvocation([...(stopOnError ? ["-v", "ON_ERROR_STOP=1"] : []), "-At", "-c", psqlCommand], connection, options);
|
|
178
|
+
return execFileSync(binary, args, {
|
|
179
|
+
cwd: dataDir,
|
|
180
|
+
env: connectionEnv(connection),
|
|
181
|
+
encoding: "utf-8",
|
|
182
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
function ensureStagingSchemaHasTables(dataset, dataDir, options, connection) {
|
|
186
|
+
const stagingSchema = stagingSchemaName(dataset.database);
|
|
187
|
+
const tableCount = Number.parseInt(runPsqlQuery(`SELECT count(*) FROM pg_tables WHERE schemaname = '${escapeSqlLiteral(stagingSchema)}'`, dataDir, options, connection).trim(), 10);
|
|
188
|
+
assert(tableCount > 0, `Staging schema ${stagingSchema} is empty after importing ${dataset.database}. ` +
|
|
189
|
+
`Aborting incremental merge to protect ${senatSchemaName}.`);
|
|
190
|
+
}
|
|
191
|
+
function ensureStagingDatabase(dataDir, options, runtime) {
|
|
192
|
+
const maintenanceDb = process.env["PGDATABASE"] || "postgres";
|
|
193
|
+
const maintenanceConnection = {
|
|
194
|
+
...runtime.staging,
|
|
195
|
+
name: maintenanceDb,
|
|
196
|
+
};
|
|
197
|
+
try {
|
|
198
|
+
runPsqlCommand(`SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${runtime.staging.name.replace(/'/g, "''")}' AND pid <> pg_backend_pid();`, dataDir, options, maintenanceConnection, false);
|
|
199
|
+
runPsqlCommand(`DROP DATABASE IF EXISTS ${runtime.staging.name}`, dataDir, options, maintenanceConnection, false);
|
|
200
|
+
runPsqlCommand(`CREATE DATABASE ${runtime.staging.name} WITH OWNER ${runtime.staging.user}`, dataDir, options, maintenanceConnection);
|
|
201
|
+
}
|
|
202
|
+
catch (error) {
|
|
203
|
+
if (!canReuseExistingStagingDatabase(error)) {
|
|
204
|
+
throw error;
|
|
205
|
+
}
|
|
206
|
+
if (!options["silent"]) {
|
|
207
|
+
console.warn(`Could not recreate staging database ${runtime.staging.name}; reusing the existing database instead.`);
|
|
208
|
+
}
|
|
209
|
+
ensureDatabaseExists(runtime.staging, dataDir, options);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
function ensureDatabaseExists(connection, dataDir, options) {
|
|
213
|
+
const maintenanceDb = process.env["PGDATABASE"] || "postgres";
|
|
214
|
+
const maintenanceConnection = {
|
|
215
|
+
...connection,
|
|
216
|
+
name: maintenanceDb,
|
|
217
|
+
};
|
|
218
|
+
const exists = runPsqlQuery(`SELECT 1 FROM pg_database WHERE datname = '${escapeSqlLiteral(connection.name)}'`, dataDir, options, maintenanceConnection).trim();
|
|
219
|
+
if (exists === "1") {
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
runPsqlCommand(`CREATE DATABASE ${connection.name} WITH OWNER ${connection.user}`, dataDir, options, maintenanceConnection);
|
|
223
|
+
}
|
|
224
|
+
function ensureForeignStagingServer(dataDir, options, runtime) {
|
|
225
|
+
runPsqlCommand("CREATE EXTENSION IF NOT EXISTS postgres_fdw", dataDir, options, runtime.target);
|
|
226
|
+
runPsqlCommand(`DROP SERVER IF EXISTS ${stagingServerName} CASCADE`, dataDir, options, runtime.target, false);
|
|
227
|
+
runPsqlCommand([
|
|
228
|
+
`CREATE SERVER ${stagingServerName}`,
|
|
229
|
+
"FOREIGN DATA WRAPPER postgres_fdw",
|
|
230
|
+
[
|
|
231
|
+
"OPTIONS (",
|
|
232
|
+
`host '${escapeSqlLiteral(runtime.staging.host)}', `,
|
|
233
|
+
`dbname '${escapeSqlLiteral(runtime.staging.name)}', `,
|
|
234
|
+
`port '${escapeSqlLiteral(String(runtime.staging.port))}'`,
|
|
235
|
+
")",
|
|
236
|
+
].join(""),
|
|
237
|
+
].join(" "), dataDir, options, runtime.target);
|
|
238
|
+
runPsqlCommand([
|
|
239
|
+
`CREATE USER MAPPING FOR CURRENT_USER SERVER ${stagingServerName}`,
|
|
240
|
+
[
|
|
241
|
+
"OPTIONS (",
|
|
242
|
+
`user '${escapeSqlLiteral(runtime.staging.user)}', `,
|
|
243
|
+
`password '${escapeSqlLiteral(runtime.staging.password)}'`,
|
|
244
|
+
")",
|
|
245
|
+
].join(""),
|
|
246
|
+
].join(" "), dataDir, options, runtime.target);
|
|
247
|
+
}
|
|
248
|
+
function cleanupForeignStagingServer(dataDir, options, runtime) {
|
|
249
|
+
runPsqlCommand(`DROP SERVER IF EXISTS ${stagingServerName} CASCADE`, dataDir, options, runtime.target, false);
|
|
250
|
+
}
|
|
251
|
+
function mountForeignStagingSchema(dataset, dataDir, options, runtime) {
|
|
252
|
+
const stagingSchema = stagingSchemaName(dataset.database);
|
|
253
|
+
const importForeignSchemaCommand = `IMPORT FOREIGN SCHEMA ${stagingSchema} ` + `FROM SERVER ${stagingServerName} INTO ${stagingSchema}`;
|
|
254
|
+
runPsqlCommand(`DROP SCHEMA IF EXISTS ${stagingSchema} CASCADE`, dataDir, options, runtime.target, false);
|
|
255
|
+
runPsqlCommand(`CREATE SCHEMA ${stagingSchema}`, dataDir, options, runtime.target);
|
|
256
|
+
try {
|
|
257
|
+
runPsqlCommand(importForeignSchemaCommand, dataDir, options, runtime.target);
|
|
258
|
+
}
|
|
259
|
+
catch (error) {
|
|
260
|
+
if (!isMissingForeignServerError(error, stagingServerName)) {
|
|
261
|
+
throw error;
|
|
262
|
+
}
|
|
263
|
+
if (!options["silent"]) {
|
|
264
|
+
console.warn(`Foreign server ${stagingServerName} disappeared before schema import; recreating it.`);
|
|
265
|
+
}
|
|
266
|
+
ensureForeignStagingServer(dataDir, options, runtime);
|
|
267
|
+
runPsqlCommand(importForeignSchemaCommand, dataDir, options, runtime.target);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
function ensureSchemaVersionTable(dataDir, options, runtime) {
|
|
271
|
+
runPsqlCommand(buildEnsureSchemaVersionTableSql(senatSchemaName), dataDir, options, runtime.target);
|
|
272
|
+
}
|
|
273
|
+
function getSchemaStructureFingerprint(dataDir, options, runtime) {
|
|
274
|
+
return runPsqlQuery(buildSchemaStructureFingerprintQuery(senatSchemaName), dataDir, options, runtime.target).trim();
|
|
275
|
+
}
|
|
276
|
+
function getSchemaVersionNumber(dataDir, options, runtime) {
|
|
277
|
+
const version = runPsqlQuery(`SELECT number FROM ${senatSchemaName}.version`, dataDir, options, runtime.target).trim();
|
|
278
|
+
return Number.parseInt(version, 10);
|
|
279
|
+
}
|
|
280
|
+
function bumpSchemaVersionIfNeeded(previousFingerprint, dataDir, options, runtime) {
|
|
281
|
+
const currentFingerprint = getSchemaStructureFingerprint(dataDir, options, runtime);
|
|
282
|
+
if (currentFingerprint !== previousFingerprint) {
|
|
283
|
+
runPsqlCommand(buildIncrementSchemaVersionSql(senatSchemaName), dataDir, options, runtime.target);
|
|
284
|
+
}
|
|
285
|
+
if (!options["silent"]) {
|
|
286
|
+
const versionNumber = getSchemaVersionNumber(dataDir, options, runtime);
|
|
287
|
+
if (currentFingerprint !== previousFingerprint) {
|
|
288
|
+
console.log(`Incremented ${senatSchemaName}.version to ${versionNumber} after schema structure change.`);
|
|
289
|
+
}
|
|
290
|
+
else {
|
|
291
|
+
console.log(`Current ${senatSchemaName}.version: ${versionNumber}.`);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
function finalizeDatasetImport(dataset, dataDir, options, runtime) {
|
|
296
|
+
const postImportFilePath = path.join(dataDir, `${dataset.database}_post_import.sql`);
|
|
297
|
+
const postImportSql = buildIncrementalDatasetImportSql(dataset.database, runtime.target.user, dataset.mergeKeys, dataset.rowMultisetMergeTables);
|
|
298
|
+
const stagingSchema = stagingSchemaName(dataset.database);
|
|
299
|
+
fs.writeFileSync(postImportFilePath, postImportSql, { encoding: "utf8" });
|
|
300
|
+
if (isIncrementalImport(options)) {
|
|
301
|
+
mountForeignStagingSchema(dataset, dataDir, options, runtime);
|
|
302
|
+
}
|
|
303
|
+
runPsqlFile(postImportFilePath, dataDir, options, runtime.target);
|
|
304
|
+
runPsqlCommand(`DROP SCHEMA IF EXISTS ${stagingSchema} CASCADE`, dataDir, options, runtime.target, false);
|
|
305
|
+
}
|
|
306
|
+
function applyStagingMetadataToTarget(dataset, dataDir, options, runtime) {
|
|
307
|
+
const stagingSchema = stagingSchemaName(dataset.database);
|
|
308
|
+
const encodedStatements = runPsqlQuery(buildExportStagingMetadataStatementsQuery(stagingSchema, senatSchemaName), dataDir, options, runtime.staging)
|
|
309
|
+
.split("\n")
|
|
310
|
+
.map((line) => line.trim())
|
|
311
|
+
.filter((line) => line.length > 0);
|
|
312
|
+
for (const encodedStatement of encodedStatements) {
|
|
313
|
+
const statement = Buffer.from(encodedStatement, "hex").toString("utf8");
|
|
314
|
+
runPsqlCommand(statement, dataDir, options, runtime.target);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
function listTablesInSchema(schemaName, dataDir, options, connection) {
|
|
318
|
+
const query = ["SELECT tablename", "FROM pg_tables", `WHERE schemaname = '${schemaName}'`, "ORDER BY tablename"].join("\n");
|
|
319
|
+
const output = runPsqlQuery(query, dataDir, options, connection);
|
|
320
|
+
return output
|
|
321
|
+
.split("\n")
|
|
322
|
+
.map((tableName) => tableName.trim())
|
|
323
|
+
.filter((tableName) => tableName.length > 0);
|
|
324
|
+
}
|
|
325
|
+
function createManagedIndexesInStaging(dataset, dataDir, options, runtime) {
|
|
326
|
+
if (!dataset.indexes) {
|
|
327
|
+
return;
|
|
328
|
+
}
|
|
329
|
+
const stagingSchema = stagingSchemaName(dataset.database);
|
|
330
|
+
const importedTables = new Set(listTablesInSchema(stagingSchema, dataDir, options, runtime.staging));
|
|
331
|
+
runPsqlCommand(`CREATE SCHEMA IF NOT EXISTS ${stagingSchema};`, dataDir, options, runtime.staging);
|
|
332
|
+
for (const [table, indexes] of Object.entries(dataset.indexes)) {
|
|
333
|
+
if (!importedTables.has(table)) {
|
|
334
|
+
if (!options["silent"]) {
|
|
335
|
+
console.warn(`Skipping managed indexes for missing table ${stagingSchema}.${table}`);
|
|
336
|
+
}
|
|
337
|
+
continue;
|
|
338
|
+
}
|
|
339
|
+
for (const index of indexes) {
|
|
340
|
+
const indexName = prefixedName(dataset.database, `${table}_${index.name}`);
|
|
341
|
+
const columns = index.columns.join(", ");
|
|
342
|
+
const sql = `CREATE INDEX IF NOT EXISTS ${indexName} ON ${stagingSchema}.${table} (${columns});`;
|
|
343
|
+
try {
|
|
344
|
+
runPsqlCommand(sql, dataDir, options, runtime.staging);
|
|
345
|
+
}
|
|
346
|
+
catch (err) {
|
|
347
|
+
console.error(`Failed to create managed index ${indexName} on ${stagingSchema}.${table}:`, err);
|
|
348
|
+
continue;
|
|
349
|
+
}
|
|
350
|
+
if (!options["silent"]) {
|
|
351
|
+
console.log(`Prepared managed index ${indexName} on ${stagingSchema}.${table}`);
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
function listPrefixedTables(dataset, dataDir, options, runtime) {
|
|
357
|
+
const prefix = `${dataset.database}_`;
|
|
358
|
+
const query = [
|
|
359
|
+
"SELECT tablename",
|
|
360
|
+
"FROM pg_tables",
|
|
361
|
+
`WHERE schemaname = '${senatSchemaName}'`,
|
|
362
|
+
` AND tablename LIKE '${prefix.replace(/_/g, "\\_")}%' ESCAPE '\\'`,
|
|
363
|
+
"ORDER BY tablename",
|
|
364
|
+
].join("\n");
|
|
365
|
+
const output = runPsqlQuery(query, dataDir, options, runtime.target);
|
|
366
|
+
return output
|
|
367
|
+
.split("\n")
|
|
368
|
+
.map((tableName) => tableName.trim())
|
|
369
|
+
.filter((tableName) => tableName.length > 0);
|
|
370
|
+
}
|
|
371
|
+
function toPascalCase(text) {
|
|
372
|
+
return text.replace(/(^|_)([a-z0-9])/gi, (_match, _separator, letter) => letter.toUpperCase());
|
|
373
|
+
}
|
|
374
|
+
function toIdentifierName(tableName, columnName) {
|
|
375
|
+
return `${toPascalCase(tableName)}${toPascalCase(columnName)}`;
|
|
376
|
+
}
|
|
377
|
+
function trimComment(comment) {
|
|
378
|
+
const trimmed = comment?.trim();
|
|
379
|
+
return trimmed ? trimmed : undefined;
|
|
380
|
+
}
|
|
381
|
+
async function generateRawTypes(dataset, runtime, prefixedTables) {
|
|
382
|
+
await fs.ensureDir(rawTypesDir);
|
|
383
|
+
const definitionFilePath = getGeneratedDefinitionPath(dataset.database);
|
|
384
|
+
const manifestFilePath = getGeneratedTableManifestPath(dataset.database);
|
|
385
|
+
const datasetPrefix = `${dataset.database}_`;
|
|
386
|
+
const datasetOutputPath = path.join(rawTypesDir, dataset.database);
|
|
387
|
+
const generateZodSchemas = makeGenerateZodSchemas({
|
|
388
|
+
castToSchema: true,
|
|
389
|
+
getZodIdentifierMetadata: (column, details) => ({
|
|
390
|
+
name: `${toIdentifierName(stripDatasetPrefix(details.name, dataset.database), column.name)}Schema`,
|
|
391
|
+
}),
|
|
392
|
+
getZodSchemaMetadata: (details, generateFor) => {
|
|
393
|
+
const baseName = toPascalCase(stripDatasetPrefix(details.name, dataset.database));
|
|
394
|
+
const suffix = generateFor === "selector" || generateFor === undefined ? "" : toPascalCase(generateFor);
|
|
395
|
+
return {
|
|
396
|
+
name: `${baseName}${suffix}Schema`,
|
|
397
|
+
path: datasetOutputPath,
|
|
398
|
+
};
|
|
399
|
+
},
|
|
400
|
+
});
|
|
401
|
+
await fs.remove(definitionFilePath);
|
|
402
|
+
await fs.remove(manifestFilePath);
|
|
403
|
+
await processDatabase({
|
|
404
|
+
connection: {
|
|
405
|
+
database: runtime.target.name,
|
|
406
|
+
host: runtime.target.host,
|
|
407
|
+
password: runtime.target.password,
|
|
408
|
+
port: runtime.target.port,
|
|
409
|
+
user: runtime.target.user,
|
|
410
|
+
},
|
|
411
|
+
filter: (pgType) => pgType.schemaName === senatSchemaName && pgType.name.startsWith(datasetPrefix),
|
|
412
|
+
generators: [
|
|
413
|
+
makePgTsGenerator({
|
|
414
|
+
filter: (pgType) => pgType.schemaName === senatSchemaName && pgType.name.startsWith(datasetPrefix),
|
|
415
|
+
generateIdentifierType: (column, details, builtinType) => {
|
|
416
|
+
const tableName = stripDatasetPrefix(details.name, dataset.database);
|
|
417
|
+
return {
|
|
418
|
+
...builtinType,
|
|
419
|
+
comment: undefined,
|
|
420
|
+
name: toIdentifierName(tableName, column.name),
|
|
421
|
+
typeDefinition: builtinType.typeDefinition.map((typeDefinition) => typeDefinition.replace(/ & \{ __flavor\?: '[^']+' \}/g, "").replace(/ & \{ __brand: '[^']+' \}/g, "")),
|
|
422
|
+
};
|
|
423
|
+
},
|
|
424
|
+
getMetadata: (details, generateFor, builtinMetadata) => {
|
|
425
|
+
const baseName = toPascalCase(stripDatasetPrefix(details.name, dataset.database));
|
|
426
|
+
const suffix = generateFor === "selector" || generateFor === undefined ? "" : toPascalCase(generateFor);
|
|
427
|
+
const tableComment = trimComment(details.comment);
|
|
428
|
+
return {
|
|
429
|
+
...builtinMetadata,
|
|
430
|
+
comment: tableComment ? [tableComment] : undefined,
|
|
431
|
+
exportAs: "named",
|
|
432
|
+
name: `${baseName}${suffix}`,
|
|
433
|
+
path: datasetOutputPath,
|
|
434
|
+
};
|
|
435
|
+
},
|
|
436
|
+
getPropertyMetadata: (property, _details, generateFor, builtinMetadata) => {
|
|
437
|
+
const comment = trimComment(property.comment);
|
|
438
|
+
const defaultComment = generateFor === "initializer" && property.defaultValue !== null && property.defaultValue !== undefined
|
|
439
|
+
? `Default value: ${property.defaultValue}`
|
|
440
|
+
: undefined;
|
|
441
|
+
const comments = [comment, defaultComment].filter((value) => value !== undefined);
|
|
442
|
+
return {
|
|
443
|
+
...builtinMetadata,
|
|
444
|
+
comment: comments.length > 0 ? comments : undefined,
|
|
445
|
+
};
|
|
446
|
+
},
|
|
447
|
+
preRenderHooks: [generateZodSchemas],
|
|
448
|
+
}),
|
|
449
|
+
],
|
|
450
|
+
outputPath: rawTypesDir,
|
|
451
|
+
postRenderHooks: [markAsGenerated, formatWithPrettier],
|
|
452
|
+
schemaNames: [senatSchemaName],
|
|
453
|
+
typescriptConfig: {
|
|
454
|
+
enumStyle: "literal-union",
|
|
455
|
+
tsModuleFormat: "esm",
|
|
456
|
+
},
|
|
457
|
+
});
|
|
458
|
+
const generatedDefinition = await fs.readFile(definitionFilePath, { encoding: "utf8" });
|
|
459
|
+
await fs.writeFile(definitionFilePath, generatedDefinition.replace(/\r\n/g, "\n"));
|
|
460
|
+
await fs.writeFile(manifestFilePath, buildGeneratedTableManifest(dataset.database, prefixedTables));
|
|
461
|
+
}
|
|
62
462
|
async function downloadFile(url, dest) {
|
|
63
463
|
const response = await fetch(url);
|
|
64
464
|
if (!response.ok) {
|
|
65
465
|
throw new Error(`Download failed ${response.status} ${response.statusText} for ${url}`);
|
|
66
466
|
}
|
|
67
|
-
|
|
467
|
+
assert(response.body, `Empty response body for ${url}`);
|
|
468
|
+
await streamPipeline(Readable.fromWeb(response.body), fs.createWriteStream(dest));
|
|
68
469
|
}
|
|
69
|
-
|
|
70
|
-
* Copy a dataset database to the main Senat database (overwriting its contents).
|
|
71
|
-
*/
|
|
72
|
-
async function copyToSenat(dataset, dataDir, options) {
|
|
470
|
+
async function importIntoStaging(dataset, dataDir, options, runtime) {
|
|
73
471
|
if (!options["silent"]) {
|
|
74
|
-
console.log(`
|
|
472
|
+
console.log(`Importing ${dataset.database} into staging database ${runtime.staging.name}...`);
|
|
75
473
|
}
|
|
76
474
|
const sqlFilePath = path.join(dataDir, `${dataset.database}.sql`);
|
|
77
475
|
const schemaDumpFile = path.join(dataDir, `${dataset.database}_schema_dump.sql`);
|
|
78
|
-
|
|
476
|
+
const normalizeSqlFile = path.join(dataDir, `${dataset.database}_normalize_staging.sql`);
|
|
477
|
+
const stagingSchema = stagingSchemaName(dataset.database);
|
|
79
478
|
const schemaSqlWriter = fs.createWriteStream(schemaDumpFile, { encoding: "utf8" });
|
|
80
|
-
|
|
81
|
-
schemaSqlWriter.write(`
|
|
82
|
-
schemaSqlWriter.write(`CREATE SCHEMA IF NOT EXISTS ${dataset.database};\n`);
|
|
83
|
-
schemaSqlWriter.write(`GRANT USAGE ON SCHEMA ${dataset.database} TO ${config.db.user};\n`);
|
|
84
|
-
schemaSqlWriter.write(`GRANT SELECT ON ALL TABLES IN SCHEMA ${dataset.database} TO ${config.db.user};\n`);
|
|
85
|
-
schemaSqlWriter.write(`ALTER DEFAULT PRIVILEGES IN SCHEMA ${dataset.database} GRANT SELECT ON TABLES TO ${config.db.user};\n`);
|
|
479
|
+
schemaSqlWriter.write(`DROP SCHEMA IF EXISTS ${stagingSchema} CASCADE;\n`);
|
|
480
|
+
schemaSqlWriter.write(`CREATE SCHEMA IF NOT EXISTS ${stagingSchema};\n`);
|
|
86
481
|
const lineReader = readline.createInterface({
|
|
87
482
|
input: fs.createReadStream(sqlFilePath, { encoding: "utf8" }),
|
|
88
483
|
crlfDelay: Infinity,
|
|
89
484
|
});
|
|
485
|
+
let inCopyData = false;
|
|
90
486
|
for await (const line of lineReader) {
|
|
91
|
-
let newLine = line;
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
for (let i = 0; i < parts.length; i++) {
|
|
97
|
-
if (parts[i] === "'") {
|
|
98
|
-
inString = !inString;
|
|
99
|
-
}
|
|
100
|
-
else if (!inString) {
|
|
101
|
-
// Only replace outside of strings, including before comma
|
|
102
|
-
parts[i] = parts[i].replace(/\bpublic\b(?=(\s*\.|\s*[,;]|\s|$))/g, schema);
|
|
103
|
-
}
|
|
487
|
+
let newLine = rewriteLineForStagingImport(line, dataset, stagingSchema, inCopyData);
|
|
488
|
+
if (!inCopyData) {
|
|
489
|
+
newLine = newLine.replace(/SET client_encoding = 'LATIN1';/i, "SET client_encoding = 'UTF8';");
|
|
490
|
+
if (isCopyFromStdinLine(newLine)) {
|
|
491
|
+
inCopyData = true;
|
|
104
492
|
}
|
|
105
|
-
return parts.join("");
|
|
106
493
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
494
|
+
else if (line === "\\.") {
|
|
495
|
+
inCopyData = false;
|
|
496
|
+
}
|
|
110
497
|
schemaSqlWriter.write(newLine + "\n");
|
|
111
498
|
}
|
|
112
499
|
schemaSqlWriter.end();
|
|
113
500
|
await new Promise((resolve, reject) => {
|
|
114
501
|
schemaSqlWriter.on("finish", () => {
|
|
115
502
|
try {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
503
|
+
runPsqlFile(schemaDumpFile, dataDir, options, runtime.staging, false);
|
|
504
|
+
createManagedIndexesInStaging(dataset, dataDir, options, runtime);
|
|
505
|
+
fs.writeFileSync(normalizeSqlFile, buildNormalizeStagingSchemaSql(dataset.database), { encoding: "utf8" });
|
|
506
|
+
runWithRetry(() => runPsqlFile(normalizeSqlFile, dataDir, options, runtime.staging), options, {
|
|
507
|
+
attempts: 4,
|
|
508
|
+
delayMs: 500,
|
|
509
|
+
label: `Staging normalization for ${dataset.database}`,
|
|
510
|
+
})
|
|
511
|
+
.then(() => {
|
|
512
|
+
ensureStagingSchemaHasTables(dataset, dataDir, options, runtime.staging);
|
|
513
|
+
resolve();
|
|
514
|
+
})
|
|
515
|
+
.catch(reject);
|
|
121
516
|
}
|
|
122
517
|
catch (error) {
|
|
518
|
+
const execError = error;
|
|
123
519
|
if (!options["silent"]) {
|
|
124
|
-
console.error(`Failed to import ${dataset.database}
|
|
125
|
-
if (
|
|
126
|
-
console.error(
|
|
520
|
+
console.error(`Failed to import ${dataset.database} data into staging:`);
|
|
521
|
+
if (execError.stderr) {
|
|
522
|
+
console.error(execError.stderr);
|
|
127
523
|
}
|
|
128
|
-
if (
|
|
129
|
-
console.error(
|
|
524
|
+
if (execError.stdout) {
|
|
525
|
+
console.error(execError.stdout);
|
|
130
526
|
}
|
|
131
527
|
}
|
|
528
|
+
reject(error);
|
|
132
529
|
}
|
|
133
|
-
resolve();
|
|
134
530
|
});
|
|
135
531
|
schemaSqlWriter.on("error", reject);
|
|
136
532
|
});
|
|
137
533
|
}
|
|
138
|
-
async function retrieveDataset(dataDir, dataset) {
|
|
534
|
+
async function retrieveDataset(dataDir, dataset, options, runtime) {
|
|
139
535
|
const zipFilename = dataset.url.substring(dataset.url.lastIndexOf("/") + 1);
|
|
140
536
|
const zipFilePath = path.join(dataDir, zipFilename);
|
|
141
537
|
if (options["all"] || options["fetch"]) {
|
|
142
|
-
// Fetch & save ZIP file.
|
|
143
538
|
if (!options["silent"]) {
|
|
144
|
-
console.log(`Loading ${dataset.title}: ${zipFilename}
|
|
539
|
+
console.log(`Loading ${dataset.title}: ${zipFilename}...`);
|
|
145
540
|
}
|
|
146
|
-
// Fetch fails with OpenSSL error: dh key too small.
|
|
147
|
-
// (so does "curl").
|
|
148
|
-
// const response = await fetch(dataset.url)
|
|
149
|
-
// if (!response.ok) {
|
|
150
|
-
// console.error(response.status, response.statusText)
|
|
151
|
-
// console.error(await response.text())
|
|
152
|
-
// throw new Error(`Fetch failed: ${dataset.url}`)
|
|
153
|
-
// }
|
|
154
|
-
// await pipeline(response.body!, fs.createWriteStream(zipFilePath))
|
|
155
541
|
fs.removeSync(zipFilePath);
|
|
156
542
|
await downloadFile(dataset.url, zipFilePath);
|
|
157
543
|
}
|
|
@@ -159,7 +545,7 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
159
545
|
const sqlFilePath = path.join(dataDir, sqlFilename);
|
|
160
546
|
if (options["all"] || options["unzip"]) {
|
|
161
547
|
if (!options["silent"]) {
|
|
162
|
-
console.log(`Unzipping ${dataset.title}: ${zipFilename}
|
|
548
|
+
console.log(`Unzipping ${dataset.title}: ${zipFilename}...`);
|
|
163
549
|
}
|
|
164
550
|
fs.removeSync(sqlFilePath);
|
|
165
551
|
const zip = new StreamZip({
|
|
@@ -168,7 +554,7 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
168
554
|
});
|
|
169
555
|
await new Promise((resolve, reject) => {
|
|
170
556
|
zip.on("ready", () => {
|
|
171
|
-
zip.extract(null, dataDir, (err
|
|
557
|
+
zip.extract(null, dataDir, (err) => {
|
|
172
558
|
zip.close();
|
|
173
559
|
if (err) {
|
|
174
560
|
reject(err);
|
|
@@ -181,131 +567,106 @@ async function retrieveDataset(dataDir, dataset) {
|
|
|
181
567
|
});
|
|
182
568
|
if (dataset.repairZip !== undefined) {
|
|
183
569
|
if (!options["silent"]) {
|
|
184
|
-
console.log(`Repairing Zip path ${dataset.title}: ${sqlFilename}
|
|
570
|
+
console.log(`Repairing Zip path ${dataset.title}: ${sqlFilename}...`);
|
|
185
571
|
}
|
|
186
572
|
dataset.repairZip(dataset, dataDir);
|
|
187
573
|
}
|
|
188
574
|
}
|
|
189
575
|
if ((options["all"] || options["repairEncoding"]) && dataset.repairEncoding) {
|
|
190
576
|
if (!options["silent"]) {
|
|
191
|
-
console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}
|
|
577
|
+
console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}...`);
|
|
192
578
|
}
|
|
193
579
|
const repairedSqlFilePath = sqlFilePath + ".repaired";
|
|
194
580
|
const repairedSqlWriter = fs.createWriteStream(repairedSqlFilePath, {
|
|
195
581
|
encoding: "utf8",
|
|
196
582
|
});
|
|
197
|
-
// Read the file as latin1 (ISO-8859-1/CP1252) and write as UTF-8
|
|
198
583
|
const lineReader = readline.createInterface({
|
|
199
584
|
input: fs.createReadStream(sqlFilePath, { encoding: "latin1" }),
|
|
200
585
|
crlfDelay: Infinity,
|
|
201
586
|
});
|
|
202
587
|
for await (const line of lineReader) {
|
|
203
|
-
|
|
204
|
-
let repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
|
|
588
|
+
const repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
|
|
205
589
|
repairedSqlWriter.write(repairedLine + "\n");
|
|
206
590
|
}
|
|
207
591
|
repairedSqlWriter.end();
|
|
208
592
|
await fs.move(repairedSqlFilePath, sqlFilePath, { overwrite: true });
|
|
209
593
|
}
|
|
210
|
-
if (options["all"] || options["import"]
|
|
594
|
+
if (options["all"] || options["import"]) {
|
|
211
595
|
if (!options["silent"]) {
|
|
212
|
-
console.log(`
|
|
596
|
+
console.log(`Merging ${dataset.title}: ${sqlFilename} into ${runtime.target.name}.${senatSchemaName}...`);
|
|
213
597
|
}
|
|
214
|
-
await
|
|
215
|
-
|
|
216
|
-
if (
|
|
217
|
-
|
|
218
|
-
for (const index of indexes) {
|
|
219
|
-
const indexName = index.name;
|
|
220
|
-
const columns = index.columns.join(", ");
|
|
221
|
-
const schema = dataset.database;
|
|
222
|
-
const sql = `CREATE INDEX IF NOT EXISTS ${indexName} ON ${schema}.${table} (${columns});`;
|
|
223
|
-
try {
|
|
224
|
-
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d senat -c "${sql}"`, {
|
|
225
|
-
env: process.env,
|
|
226
|
-
encoding: "utf-8",
|
|
227
|
-
stdio: ["ignore", "ignore", "pipe"],
|
|
228
|
-
});
|
|
229
|
-
if (!options["silent"]) {
|
|
230
|
-
console.log(`Created index: ${indexName} on ${schema}.${table} (${columns})`);
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
catch (err) {
|
|
234
|
-
console.error(`Failed to create index ${indexName} on ${schema}.${table}:`, err);
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
}
|
|
598
|
+
await importIntoStaging(dataset, dataDir, options, runtime);
|
|
599
|
+
finalizeDatasetImport(dataset, dataDir, options, runtime);
|
|
600
|
+
if (isIncrementalImport(options)) {
|
|
601
|
+
applyStagingMetadataToTarget(dataset, dataDir, options, runtime);
|
|
238
602
|
}
|
|
239
603
|
}
|
|
240
604
|
if (options["schema"]) {
|
|
241
|
-
|
|
242
|
-
assert(fs.statSync(definitionsDir).isDirectory());
|
|
605
|
+
await fs.ensureDir(rawTypesDir);
|
|
243
606
|
if (!options["silent"]) {
|
|
244
|
-
console.log(`Creating TypeScript definitions from
|
|
607
|
+
console.log(`Creating TypeScript definitions from prefixed ${senatSchemaName} tables ` +
|
|
608
|
+
`for '${dataset.database}' in database '${runtime.target.name}'...`);
|
|
245
609
|
}
|
|
246
|
-
const
|
|
247
|
-
|
|
248
|
-
execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.database} -o ${definitionFilePath}`, {
|
|
249
|
-
// cwd: dataDir,
|
|
250
|
-
env: process.env,
|
|
251
|
-
encoding: "utf-8",
|
|
252
|
-
// stdio: ["ignore", "ignore", "pipe"],
|
|
253
|
-
});
|
|
254
|
-
const definition = fs.readFileSync(definitionFilePath, { encoding: "utf8" });
|
|
255
|
-
const definitionRepaired = definition
|
|
256
|
-
.replace(/\r\n/g, "\n")
|
|
257
|
-
.replace(/AUTO-GENERATED FILE @ \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}/, "AUTO-GENERATED FILE");
|
|
258
|
-
fs.writeFileSync(definitionFilePath, definitionRepaired);
|
|
259
|
-
definitionsDir = path.resolve("src", "raw_types");
|
|
260
|
-
definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
|
|
261
|
-
execSync(`npx kysely-codegen --url '${dbConnectionString}' --default-schema ${dataset.database} --include-pattern '${dataset.database}.*' --out-file ${definitionFilePath}`, {
|
|
262
|
-
env: process.env,
|
|
263
|
-
encoding: "utf-8",
|
|
264
|
-
// stdio: ["ignore", "ignore", "pipe"],
|
|
265
|
-
});
|
|
610
|
+
const prefixedTables = listPrefixedTables(dataset, dataDir, options, runtime);
|
|
611
|
+
await generateRawTypes(dataset, runtime, prefixedTables);
|
|
266
612
|
}
|
|
267
613
|
}
|
|
614
|
+
function buildRuntimeContext() {
|
|
615
|
+
const target = {
|
|
616
|
+
host: config.db.host,
|
|
617
|
+
name: config.db.name,
|
|
618
|
+
password: config.db.password,
|
|
619
|
+
port: config.db.port,
|
|
620
|
+
user: config.db.user,
|
|
621
|
+
};
|
|
622
|
+
if (!isIncrementalImport(options)) {
|
|
623
|
+
return {
|
|
624
|
+
staging: target,
|
|
625
|
+
target,
|
|
626
|
+
};
|
|
627
|
+
}
|
|
628
|
+
const staging = {
|
|
629
|
+
host: config.stagingDb.host,
|
|
630
|
+
name: config.stagingDb.name,
|
|
631
|
+
password: config.stagingDb.password,
|
|
632
|
+
port: config.stagingDb.port,
|
|
633
|
+
user: config.stagingDb.user,
|
|
634
|
+
};
|
|
635
|
+
return { staging, target };
|
|
636
|
+
}
|
|
268
637
|
async function retrieveOpenData() {
|
|
269
638
|
const dataDir = options["dataDir"];
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
PGPASSWORD: process.env["PGPASSWORD"] || config.db.password,
|
|
278
|
-
};
|
|
279
|
-
assert(process.env["PGHOST"] && process.env["PGPORT"] && process.env["PGUSER"] && process.env["PGPASSWORD"], "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
|
|
280
|
-
console.time("data extraction time");
|
|
281
|
-
// Create role 'opendata' if it does not exist
|
|
282
|
-
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE ROLE opendata" || true`, {
|
|
283
|
-
cwd: dataDir,
|
|
284
|
-
env: process.env,
|
|
285
|
-
encoding: "utf-8",
|
|
286
|
-
});
|
|
287
|
-
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE DATABASE senat WITH OWNER opendata" || true`, {
|
|
288
|
-
cwd: dataDir,
|
|
289
|
-
env: process.env,
|
|
290
|
-
encoding: "utf-8",
|
|
291
|
-
});
|
|
292
|
-
const enabledDatasets = getEnabledDatasets(options["categories"]);
|
|
293
|
-
const chosenDatasets = getChosenDatasets(enabledDatasets);
|
|
294
|
-
for (const dataset of chosenDatasets) {
|
|
295
|
-
await retrieveDataset(dataDir, dataset);
|
|
639
|
+
assertExistingDirectory(dataDir, "data directory");
|
|
640
|
+
assert(!options["sudo"] || !options["sudo"].startsWith("-"), "Option --sudo expects a Unix user name, for example: --sudo postgres");
|
|
641
|
+
const runtime = buildRuntimeContext();
|
|
642
|
+
assert(runtime.target.host && runtime.target.port && runtime.target.user && runtime.target.password, "Missing target database configuration: DB_* in .env file");
|
|
643
|
+
if (isIncrementalImport(options)) {
|
|
644
|
+
assert(runtime.staging.host && runtime.staging.port && runtime.staging.user && runtime.staging.password, "Missing staging database configuration: STAGING_DB_* in .env file");
|
|
645
|
+
assert(runtime.target.name !== runtime.staging.name, "Target and staging databases must be different");
|
|
296
646
|
}
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
647
|
+
console.time("data extraction time");
|
|
648
|
+
try {
|
|
649
|
+
ensureDatabaseExists(runtime.target, dataDir, options);
|
|
650
|
+
if (isIncrementalImport(options)) {
|
|
651
|
+
ensureStagingDatabase(dataDir, options, runtime);
|
|
652
|
+
ensureForeignStagingServer(dataDir, options, runtime);
|
|
653
|
+
}
|
|
654
|
+
ensureSchemaVersionTable(dataDir, options, runtime);
|
|
655
|
+
const initialSchemaFingerprint = getSchemaStructureFingerprint(dataDir, options, runtime);
|
|
656
|
+
const enabledDatasets = getEnabledDatasets(options["categories"]);
|
|
657
|
+
const chosenDatasets = getChosenDatasets(enabledDatasets);
|
|
658
|
+
for (const dataset of chosenDatasets) {
|
|
659
|
+
await retrieveDataset(dataDir, dataset, options, runtime);
|
|
660
|
+
}
|
|
661
|
+
bumpSchemaVersionIfNeeded(initialSchemaFingerprint, dataDir, options, runtime);
|
|
306
662
|
}
|
|
307
|
-
|
|
308
|
-
|
|
663
|
+
finally {
|
|
664
|
+
if (isIncrementalImport(options)) {
|
|
665
|
+
cleanupForeignStagingServer(dataDir, options, runtime);
|
|
666
|
+
}
|
|
667
|
+
if (!options["silent"]) {
|
|
668
|
+
console.timeEnd("data extraction time");
|
|
669
|
+
}
|
|
309
670
|
}
|
|
310
671
|
}
|
|
311
672
|
retrieveOpenData()
|