@tricoteuses/senat 2.22.16 → 2.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. package/README.md +168 -0
  2. package/lib/aggregates.d.ts +52 -0
  3. package/lib/aggregates.js +930 -0
  4. package/lib/aggregates.mjs +713 -0
  5. package/lib/aggregates.ts +833 -0
  6. package/lib/config.d.ts +10 -0
  7. package/lib/config.js +16 -0
  8. package/lib/config.mjs +16 -0
  9. package/lib/config.ts +26 -0
  10. package/lib/databases.d.ts +2 -0
  11. package/lib/databases.js +26 -0
  12. package/lib/databases.mjs +57 -0
  13. package/lib/databases.ts +71 -0
  14. package/lib/datasets.d.ts +34 -0
  15. package/lib/datasets.js +233 -0
  16. package/lib/datasets.mjs +78 -0
  17. package/lib/datasets.ts +118 -0
  18. package/lib/fields.d.ts +10 -0
  19. package/lib/fields.js +68 -0
  20. package/lib/fields.mjs +22 -0
  21. package/lib/fields.ts +29 -0
  22. package/lib/git.d.ts +26 -0
  23. package/lib/git.js +167 -0
  24. package/lib/index.d.ts +13 -0
  25. package/lib/index.js +1 -0
  26. package/lib/index.mjs +7 -0
  27. package/lib/index.ts +64 -0
  28. package/lib/inserters.d.ts +98 -0
  29. package/lib/inserters.js +500 -0
  30. package/lib/inserters.mjs +360 -0
  31. package/lib/inserters.ts +521 -0
  32. package/lib/legislatures.json +38 -0
  33. package/lib/loaders.d.ts +58 -0
  34. package/lib/loaders.js +286 -0
  35. package/lib/loaders.mjs +158 -0
  36. package/lib/loaders.ts +271 -0
  37. package/lib/model/agenda.d.ts +6 -0
  38. package/lib/model/agenda.js +148 -0
  39. package/lib/model/ameli.d.ts +51 -0
  40. package/lib/model/ameli.js +149 -0
  41. package/lib/model/ameli.mjs +84 -0
  42. package/lib/model/ameli.ts +100 -0
  43. package/lib/model/commission.d.ts +18 -0
  44. package/lib/model/commission.js +269 -0
  45. package/lib/model/debats.d.ts +67 -0
  46. package/lib/model/debats.js +95 -0
  47. package/lib/model/debats.mjs +43 -0
  48. package/lib/model/debats.ts +68 -0
  49. package/lib/model/documents.d.ts +12 -0
  50. package/lib/model/documents.js +151 -0
  51. package/lib/model/dosleg.d.ts +7 -0
  52. package/lib/model/dosleg.js +326 -0
  53. package/lib/model/dosleg.mjs +196 -0
  54. package/lib/model/dosleg.ts +240 -0
  55. package/lib/model/index.d.ts +7 -0
  56. package/lib/model/index.js +7 -0
  57. package/lib/model/index.mjs +5 -0
  58. package/lib/model/index.ts +15 -0
  59. package/lib/model/questions.d.ts +45 -0
  60. package/lib/model/questions.js +89 -0
  61. package/lib/model/questions.mjs +71 -0
  62. package/lib/model/questions.ts +93 -0
  63. package/lib/model/scrutins.d.ts +13 -0
  64. package/lib/model/scrutins.js +114 -0
  65. package/lib/model/seance.d.ts +3 -0
  66. package/lib/model/seance.js +267 -0
  67. package/lib/model/sens.d.ts +146 -0
  68. package/lib/model/sens.js +454 -0
  69. package/lib/model/sens.mjs +415 -0
  70. package/lib/model/sens.ts +516 -0
  71. package/lib/model/texte.d.ts +7 -0
  72. package/lib/model/texte.js +256 -0
  73. package/lib/model/texte.mjs +208 -0
  74. package/lib/model/texte.ts +229 -0
  75. package/lib/model/util.d.ts +9 -0
  76. package/lib/model/util.js +38 -0
  77. package/lib/model/util.mjs +19 -0
  78. package/lib/model/util.ts +32 -0
  79. package/lib/parsers/texte.d.ts +7 -0
  80. package/lib/parsers/texte.js +228 -0
  81. package/lib/raw_types/ameli.d.ts +914 -0
  82. package/lib/raw_types/ameli.js +5 -0
  83. package/lib/raw_types/ameli.mjs +163 -0
  84. package/lib/raw_types/debats.d.ts +207 -0
  85. package/lib/raw_types/debats.js +5 -0
  86. package/lib/raw_types/debats.mjs +58 -0
  87. package/lib/raw_types/dosleg.d.ts +1619 -0
  88. package/lib/raw_types/dosleg.js +5 -0
  89. package/lib/raw_types/dosleg.mjs +438 -0
  90. package/lib/raw_types/questions.d.ts +419 -0
  91. package/lib/raw_types/questions.js +5 -0
  92. package/lib/raw_types/questions.mjs +11 -0
  93. package/lib/raw_types/senat.d.ts +11368 -0
  94. package/lib/raw_types/senat.js +5 -0
  95. package/lib/raw_types/sens.d.ts +8248 -0
  96. package/lib/raw_types/sens.js +5 -0
  97. package/lib/raw_types/sens.mjs +508 -0
  98. package/lib/raw_types_kysely/ameli.d.ts +915 -0
  99. package/lib/raw_types_kysely/ameli.js +7 -0
  100. package/lib/raw_types_kysely/ameli.mjs +5 -0
  101. package/lib/raw_types_kysely/ameli.ts +951 -0
  102. package/lib/raw_types_kysely/debats.d.ts +207 -0
  103. package/lib/raw_types_kysely/debats.js +7 -0
  104. package/lib/raw_types_kysely/debats.mjs +5 -0
  105. package/lib/raw_types_kysely/debats.ts +222 -0
  106. package/lib/raw_types_kysely/dosleg.d.ts +3532 -0
  107. package/lib/raw_types_kysely/dosleg.js +7 -0
  108. package/lib/raw_types_kysely/dosleg.mjs +5 -0
  109. package/lib/raw_types_kysely/dosleg.ts +3621 -0
  110. package/lib/raw_types_kysely/questions.d.ts +414 -0
  111. package/lib/raw_types_kysely/questions.js +7 -0
  112. package/lib/raw_types_kysely/questions.mjs +5 -0
  113. package/lib/raw_types_kysely/questions.ts +426 -0
  114. package/lib/raw_types_kysely/sens.d.ts +4394 -0
  115. package/lib/raw_types_kysely/sens.js +7 -0
  116. package/lib/raw_types_kysely/sens.mjs +5 -0
  117. package/lib/raw_types_kysely/sens.ts +4499 -0
  118. package/lib/raw_types_schemats/ameli.d.ts +539 -0
  119. package/lib/raw_types_schemats/ameli.js +2 -0
  120. package/lib/raw_types_schemats/ameli.mjs +2 -0
  121. package/lib/raw_types_schemats/ameli.ts +601 -0
  122. package/lib/raw_types_schemats/debats.d.ts +127 -0
  123. package/lib/raw_types_schemats/debats.js +2 -0
  124. package/lib/raw_types_schemats/debats.mjs +2 -0
  125. package/lib/raw_types_schemats/debats.ts +145 -0
  126. package/lib/raw_types_schemats/dosleg.d.ts +977 -0
  127. package/lib/raw_types_schemats/dosleg.js +2 -0
  128. package/lib/raw_types_schemats/dosleg.mjs +2 -0
  129. package/lib/raw_types_schemats/dosleg.ts +2193 -0
  130. package/lib/raw_types_schemats/questions.d.ts +235 -0
  131. package/lib/raw_types_schemats/questions.js +2 -0
  132. package/lib/raw_types_schemats/questions.mjs +2 -0
  133. package/lib/raw_types_schemats/questions.ts +249 -0
  134. package/lib/raw_types_schemats/sens.d.ts +6915 -0
  135. package/lib/raw_types_schemats/sens.js +2 -0
  136. package/lib/raw_types_schemats/sens.mjs +2 -0
  137. package/lib/raw_types_schemats/sens.ts +2907 -0
  138. package/lib/scripts/convert_data.d.ts +1 -0
  139. package/lib/scripts/convert_data.js +354 -0
  140. package/lib/scripts/convert_data.mjs +181 -0
  141. package/lib/scripts/convert_data.ts +243 -0
  142. package/lib/scripts/data-download.d.ts +1 -0
  143. package/lib/scripts/data-download.js +12 -0
  144. package/lib/scripts/datautil.d.ts +8 -0
  145. package/lib/scripts/datautil.js +34 -0
  146. package/lib/scripts/datautil.mjs +16 -0
  147. package/lib/scripts/datautil.ts +19 -0
  148. package/lib/scripts/images/transparent_150x192.jpg +0 -0
  149. package/lib/scripts/images/transparent_155x225.jpg +0 -0
  150. package/lib/scripts/parse_textes.d.ts +1 -0
  151. package/lib/scripts/parse_textes.js +44 -0
  152. package/lib/scripts/parse_textes.mjs +46 -0
  153. package/lib/scripts/parse_textes.ts +65 -0
  154. package/lib/scripts/retrieve_agenda.d.ts +1 -0
  155. package/lib/scripts/retrieve_agenda.js +132 -0
  156. package/lib/scripts/retrieve_cr_commission.d.ts +1 -0
  157. package/lib/scripts/retrieve_cr_commission.js +364 -0
  158. package/lib/scripts/retrieve_cr_seance.d.ts +6 -0
  159. package/lib/scripts/retrieve_cr_seance.js +347 -0
  160. package/lib/scripts/retrieve_documents.d.ts +3 -0
  161. package/lib/scripts/retrieve_documents.js +219 -0
  162. package/lib/scripts/retrieve_documents.mjs +249 -0
  163. package/lib/scripts/retrieve_documents.ts +298 -0
  164. package/lib/scripts/retrieve_open_data.d.ts +1 -0
  165. package/lib/scripts/retrieve_open_data.js +315 -0
  166. package/lib/scripts/retrieve_open_data.mjs +217 -0
  167. package/lib/scripts/retrieve_open_data.ts +268 -0
  168. package/lib/scripts/retrieve_senateurs_photos.d.ts +1 -0
  169. package/lib/scripts/retrieve_senateurs_photos.js +147 -0
  170. package/lib/scripts/retrieve_senateurs_photos.mjs +147 -0
  171. package/lib/scripts/retrieve_senateurs_photos.ts +177 -0
  172. package/lib/scripts/retrieve_videos.d.ts +1 -0
  173. package/lib/scripts/retrieve_videos.js +461 -0
  174. package/lib/scripts/shared/cli_helpers.d.ts +95 -0
  175. package/lib/scripts/shared/cli_helpers.js +91 -0
  176. package/lib/scripts/shared/cli_helpers.ts +36 -0
  177. package/lib/scripts/shared/util.d.ts +4 -0
  178. package/lib/scripts/shared/util.js +35 -0
  179. package/lib/scripts/shared/util.ts +33 -0
  180. package/lib/scripts/test_iter_load.d.ts +1 -0
  181. package/lib/scripts/test_iter_load.js +12 -0
  182. package/lib/src/config.d.ts +22 -0
  183. package/lib/src/config.js +17 -7
  184. package/lib/src/conversion_textes.js +5 -1
  185. package/lib/src/databases.d.ts +2 -1
  186. package/lib/src/databases_postgres.d.ts +4 -0
  187. package/lib/src/databases_postgres.js +23 -0
  188. package/lib/src/datasets.d.ts +4 -0
  189. package/lib/src/datasets.js +16 -2
  190. package/lib/src/git.d.ts +1 -0
  191. package/lib/src/git.js +45 -11
  192. package/lib/src/loaders.js +10 -4
  193. package/lib/src/model/agenda.js +2 -2
  194. package/lib/src/model/ameli.d.ts +64 -52
  195. package/lib/src/model/ameli.js +147 -145
  196. package/lib/src/model/ameli_postgres.d.ts +67 -0
  197. package/lib/src/model/ameli_postgres.js +150 -0
  198. package/lib/src/model/commission.d.ts +3 -2
  199. package/lib/src/model/commission.js +2 -2
  200. package/lib/src/model/debats.d.ts +38 -66
  201. package/lib/src/model/debats.js +110 -93
  202. package/lib/src/model/documents.d.ts +32 -12
  203. package/lib/src/model/documents.js +171 -130
  204. package/lib/src/model/dosleg.d.ts +142 -5
  205. package/lib/src/model/dosleg.js +298 -156
  206. package/lib/src/model/questions.d.ts +54 -45
  207. package/lib/src/model/questions.js +89 -87
  208. package/lib/src/model/scrutins.d.ts +48 -13
  209. package/lib/src/model/scrutins.js +118 -111
  210. package/lib/src/model/seance.js +3 -3
  211. package/lib/src/model/sens.d.ts +109 -179
  212. package/lib/src/model/sens.js +384 -484
  213. package/lib/src/model/util.d.ts +0 -8
  214. package/lib/src/model/util.js +0 -23
  215. package/lib/src/parsers/texte.js +7 -7
  216. package/lib/src/raw_types_schemats/ameli.d.ts +4 -2
  217. package/lib/src/raw_types_schemats/debats.d.ts +2 -2
  218. package/lib/src/raw_types_schemats/dosleg.d.ts +2 -2
  219. package/lib/src/raw_types_schemats/questions.d.ts +2 -2
  220. package/lib/src/raw_types_schemats/sens.d.ts +10 -4216
  221. package/lib/src/scripts/convert_data.js +7 -6
  222. package/lib/src/scripts/convert_xml_to_html.js +2 -2
  223. package/lib/src/scripts/data-download.js +3 -2
  224. package/lib/src/scripts/retrieve_agenda.js +21 -9
  225. package/lib/src/scripts/retrieve_cr_commission.js +17 -17
  226. package/lib/src/scripts/retrieve_cr_seance.d.ts +14 -1
  227. package/lib/src/scripts/retrieve_cr_seance.js +10 -11
  228. package/lib/src/scripts/retrieve_documents.d.ts +11 -2
  229. package/lib/src/scripts/retrieve_documents.js +25 -14
  230. package/lib/src/scripts/retrieve_open_data.js +400 -145
  231. package/lib/src/scripts/retrieve_senateurs_photos.js +25 -11
  232. package/lib/src/scripts/retrieve_videos.js +12 -11
  233. package/lib/src/scripts/shared/cli_helpers.d.ts +1 -6
  234. package/lib/src/scripts/shared/cli_helpers.js +9 -8
  235. package/lib/src/scripts/shared/incremental_import_sql.d.ts +2 -0
  236. package/lib/src/scripts/shared/incremental_import_sql.js +894 -0
  237. package/lib/src/scripts/shared/prefixed_tables.d.ts +7 -0
  238. package/lib/src/scripts/shared/prefixed_tables.js +30 -0
  239. package/lib/src/scripts/shared/schema_version.d.ts +3 -0
  240. package/lib/src/scripts/shared/schema_version.js +97 -0
  241. package/lib/src/scripts/shared/staging_import.d.ts +3 -0
  242. package/lib/src/scripts/shared/staging_import.js +80 -0
  243. package/lib/src/scripts/shared/staging_metadata_sql.d.ts +1 -0
  244. package/lib/src/scripts/shared/staging_metadata_sql.js +221 -0
  245. package/lib/src/scripts/validate_prefixed_tables.d.ts +1 -0
  246. package/lib/src/scripts/validate_prefixed_tables.js +102 -0
  247. package/lib/src/types/texte.d.ts +1 -1
  248. package/lib/src/utils/cr_spliting.d.ts +9 -6
  249. package/lib/src/utils/cr_spliting.js +6 -101
  250. package/lib/src/utils/reunion_odj_building.d.ts +7 -3
  251. package/lib/src/utils/reunion_parsing.d.ts +2 -1
  252. package/lib/src/utils/reunion_parsing.js +2 -2
  253. package/lib/src/videos/match.js +8 -5
  254. package/lib/src/videos/pipeline.d.ts +6 -2
  255. package/lib/src/videos/pipeline.js +21 -8
  256. package/lib/src/videos/search.js +6 -2
  257. package/lib/strings.d.ts +1 -0
  258. package/lib/strings.js +18 -0
  259. package/lib/strings.mjs +18 -0
  260. package/lib/strings.ts +26 -0
  261. package/lib/tests/incrementalImportSql.test.d.ts +1 -0
  262. package/lib/tests/incrementalImportSql.test.js +155 -0
  263. package/lib/tests/prefixedTables.test.d.ts +1 -0
  264. package/lib/tests/prefixedTables.test.js +29 -0
  265. package/lib/tests/schemaVersion.test.d.ts +1 -0
  266. package/lib/tests/schemaVersion.test.js +23 -0
  267. package/lib/tests/validatePrefixedTables.test.d.ts +1 -0
  268. package/lib/tests/validatePrefixedTables.test.js +14 -0
  269. package/lib/types/agenda.d.ts +44 -0
  270. package/lib/types/agenda.js +1 -0
  271. package/lib/types/ameli.d.ts +5 -0
  272. package/lib/types/ameli.js +1 -0
  273. package/lib/types/ameli.mjs +13 -0
  274. package/lib/types/ameli.ts +21 -0
  275. package/lib/types/compte_rendu.d.ts +83 -0
  276. package/lib/types/compte_rendu.js +1 -0
  277. package/lib/types/debats.d.ts +2 -0
  278. package/lib/types/debats.js +1 -0
  279. package/lib/types/debats.mjs +2 -0
  280. package/lib/types/debats.ts +6 -0
  281. package/lib/types/dosleg.d.ts +70 -0
  282. package/lib/types/dosleg.js +1 -0
  283. package/lib/types/dosleg.mjs +151 -0
  284. package/lib/types/dosleg.ts +284 -0
  285. package/lib/types/questions.d.ts +2 -0
  286. package/lib/types/questions.js +1 -0
  287. package/lib/types/questions.mjs +1 -0
  288. package/lib/types/questions.ts +3 -0
  289. package/lib/types/sens.d.ts +10 -0
  290. package/lib/types/sens.js +1 -0
  291. package/lib/types/sens.mjs +1 -0
  292. package/lib/types/sens.ts +12 -0
  293. package/lib/types/sessions.d.ts +5 -0
  294. package/lib/types/sessions.js +84 -0
  295. package/lib/types/sessions.mjs +43 -0
  296. package/lib/types/sessions.ts +42 -0
  297. package/lib/types/texte.d.ts +74 -0
  298. package/lib/types/texte.js +16 -0
  299. package/lib/types/texte.mjs +16 -0
  300. package/lib/types/texte.ts +76 -0
  301. package/lib/typings/windows-1252.d.js +2 -0
  302. package/lib/typings/windows-1252.d.mjs +2 -0
  303. package/lib/typings/windows-1252.d.ts +11 -0
  304. package/lib/utils/cr_spliting.d.ts +28 -0
  305. package/lib/utils/cr_spliting.js +265 -0
  306. package/lib/utils/date.d.ts +10 -0
  307. package/lib/utils/date.js +100 -0
  308. package/lib/utils/nvs-timecode.d.ts +7 -0
  309. package/lib/utils/nvs-timecode.js +79 -0
  310. package/lib/utils/reunion_grouping.d.ts +9 -0
  311. package/lib/utils/reunion_grouping.js +361 -0
  312. package/lib/utils/reunion_odj_building.d.ts +5 -0
  313. package/lib/utils/reunion_odj_building.js +154 -0
  314. package/lib/utils/reunion_parsing.d.ts +23 -0
  315. package/lib/utils/reunion_parsing.js +209 -0
  316. package/lib/utils/scoring.d.ts +14 -0
  317. package/lib/utils/scoring.js +147 -0
  318. package/lib/utils/string_cleaning.d.ts +7 -0
  319. package/lib/utils/string_cleaning.js +57 -0
  320. package/lib/validators/config.d.ts +9 -0
  321. package/lib/validators/config.js +10 -0
  322. package/lib/validators/config.mjs +54 -0
  323. package/lib/validators/config.ts +79 -0
  324. package/lib/validators/senat.d.ts +0 -0
  325. package/lib/validators/senat.js +28 -0
  326. package/lib/validators/senat.mjs +24 -0
  327. package/lib/validators/senat.ts +26 -0
  328. package/package.json +6 -10
@@ -5,12 +5,17 @@ import fs from "fs-extra";
5
5
  import path from "path";
6
6
  import StreamZip from "node-stream-zip";
7
7
  import readline from "readline";
8
- import * as windows1252 from "windows-1252";
9
- import { pipeline } from "stream";
8
+ import { pipeline, Readable } from "stream";
10
9
  import { promisify } from "util";
10
+ import * as windows1252 from "windows-1252";
11
11
  import config from "../config";
12
12
  import { getChosenDatasets, getEnabledDatasets } from "../datasets";
13
- import { commonOptions } from "./shared/cli_helpers";
13
+ import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
14
+ import { buildIncrementalDatasetImportSql, buildNormalizeStagingSchemaSql } from "./shared/incremental_import_sql";
15
+ import { normalizeGeneratedDefinition, prefixedName, senatSchemaName, stagingSchemaName, } from "./shared/prefixed_tables";
16
+ import { buildEnsureSchemaVersionTableSql, buildIncrementSchemaVersionSql, buildSchemaStructureFingerprintQuery, } from "./shared/schema_version";
17
+ import { buildExportStagingMetadataStatementsQuery } from "./shared/staging_metadata_sql";
18
+ import { isCopyFromStdinLine, rewriteLineForStagingImport } from "./shared/staging_import";
14
19
  const badWindows1252CharacterRegex = /[\u0080-\u009f]/g;
15
20
  const optionsDefinitions = [
16
21
  ...commonOptions,
@@ -22,7 +27,7 @@ const optionsDefinitions = [
22
27
  },
23
28
  {
24
29
  alias: "c",
25
- help: "create TypeScript interfaces from databases schemas into src/raw_types_* directories",
30
+ help: "create TypeScript interfaces from database schemas into src/raw_types_* directories",
26
31
  name: "schema",
27
32
  type: Boolean,
28
33
  },
@@ -38,15 +43,20 @@ const optionsDefinitions = [
38
43
  name: "fetch",
39
44
  type: Boolean,
40
45
  },
46
+ {
47
+ help: "use separate staging database and postgres_fdw for incremental merge into target schema",
48
+ name: "incremental",
49
+ type: Boolean,
50
+ },
41
51
  {
42
52
  alias: "i",
43
- help: "import SQL dumps into a freshly (re-)created database",
53
+ help: "import SQL dumps into PostgreSQL",
44
54
  name: "import",
45
55
  type: Boolean,
46
56
  },
47
57
  {
48
58
  alias: "S",
49
- help: "sudo psql commands with given user",
59
+ help: "sudo psql commands with given user (example: --sudo postgres)",
50
60
  name: "sudo",
51
61
  type: String,
52
62
  },
@@ -59,99 +69,358 @@ const optionsDefinitions = [
59
69
  ];
60
70
  const options = commandLineArgs(optionsDefinitions);
61
71
  const streamPipeline = promisify(pipeline);
72
+ const stagingServerName = "staging_server";
73
+ function isIncrementalImport(options) {
74
+ return options["incremental"] === true;
75
+ }
76
+ function shellQuote(value) {
77
+ return `'${value.replace(/'/g, `'"'"'`)}'`;
78
+ }
79
+ function connectionEnv(connection) {
80
+ return {
81
+ ...process.env,
82
+ PGDATABASE: connection.name,
83
+ PGHOST: connection.host,
84
+ PGPASSWORD: connection.password,
85
+ PGPORT: String(connection.port),
86
+ PGUSER: connection.user,
87
+ };
88
+ }
89
+ function escapeSqlLiteral(value) {
90
+ return value.replace(/'/g, "''");
91
+ }
92
+ function sleep(delayMs) {
93
+ return new Promise((resolve) => setTimeout(resolve, delayMs));
94
+ }
95
+ function getExecSyncErrorOutput(error) {
96
+ const execError = error;
97
+ return `${execError.stderr ?? ""}\n${execError.stdout ?? ""}`;
98
+ }
99
+ function isRetryablePostgresError(error) {
100
+ const output = getExecSyncErrorOutput(error);
101
+ return (output.includes("deadlock detected") ||
102
+ output.includes("could not obtain lock") ||
103
+ output.includes("canceling statement due to lock timeout") ||
104
+ output.includes("could not serialize access"));
105
+ }
106
+ function canReuseExistingStagingDatabase(error) {
107
+ const output = getExecSyncErrorOutput(error);
108
+ return (output.includes("permission denied to terminate process") ||
109
+ output.includes("database is being accessed by other users") ||
110
+ output.includes("cannot drop the currently open database"));
111
+ }
112
+ function isMissingForeignServerError(error, serverName) {
113
+ return getExecSyncErrorOutput(error).includes(`server "${serverName}" does not exist`);
114
+ }
115
+ async function runWithRetry(operation, options, retryOptions) {
116
+ let attempt = 1;
117
+ let delayMs = retryOptions.delayMs;
118
+ while (true) {
119
+ try {
120
+ return operation();
121
+ }
122
+ catch (error) {
123
+ if (!isRetryablePostgresError(error) || attempt >= retryOptions.attempts) {
124
+ throw error;
125
+ }
126
+ if (!options["silent"]) {
127
+ console.warn(`${retryOptions.label} hit a transient PostgreSQL lock error on attempt ${attempt}/${retryOptions.attempts}; retrying in ${delayMs}ms...`);
128
+ }
129
+ await sleep(delayMs);
130
+ attempt += 1;
131
+ delayMs *= 2;
132
+ }
133
+ }
134
+ }
135
+ function buildPsqlCommand(baseArgs, connection, options) {
136
+ const sudoPrefix = options["sudo"] ? `sudo -u ${options["sudo"]} ` : "";
137
+ return (`${sudoPrefix}psql --quiet ` +
138
+ `-h ${shellQuote(connection.host)} ` +
139
+ `-p ${shellQuote(String(connection.port))} ` +
140
+ `-U ${shellQuote(connection.user)} ` +
141
+ `-d ${shellQuote(connection.name)} ` +
142
+ baseArgs);
143
+ }
144
+ function runPsqlFile(sqlFilePath, dataDir, options, connection, stopOnError = true) {
145
+ const onErrorFlag = stopOnError ? "-v ON_ERROR_STOP=1 " : "";
146
+ execSync(buildPsqlCommand(`${onErrorFlag}-f ${shellQuote(sqlFilePath)}`, connection, options), {
147
+ cwd: dataDir,
148
+ env: connectionEnv(connection),
149
+ encoding: "utf-8",
150
+ stdio: ["ignore", "pipe", "pipe"],
151
+ });
152
+ }
153
+ function runPsqlCommand(command, dataDir, options, connection, stopOnError = true) {
154
+ const onErrorFlag = stopOnError ? "-v ON_ERROR_STOP=1 " : "";
155
+ execSync(buildPsqlCommand(`${onErrorFlag}-c ${shellQuote(command)}`, connection, options), {
156
+ cwd: dataDir,
157
+ env: connectionEnv(connection),
158
+ encoding: "utf-8",
159
+ stdio: ["ignore", "pipe", "pipe"],
160
+ });
161
+ }
162
+ function runPsqlQuery(command, dataDir, options, connection, stopOnError = true) {
163
+ const onErrorFlag = stopOnError ? "-v ON_ERROR_STOP=1 " : "";
164
+ return execSync(buildPsqlCommand(`${onErrorFlag}-At -c ${shellQuote(command)}`, connection, options), {
165
+ cwd: dataDir,
166
+ env: connectionEnv(connection),
167
+ encoding: "utf-8",
168
+ stdio: ["ignore", "pipe", "pipe"],
169
+ });
170
+ }
171
+ function ensureStagingSchemaHasTables(dataset, dataDir, options, connection) {
172
+ const stagingSchema = stagingSchemaName(dataset.database);
173
+ const tableCount = Number.parseInt(runPsqlQuery(`SELECT count(*) FROM pg_tables WHERE schemaname = '${escapeSqlLiteral(stagingSchema)}'`, dataDir, options, connection).trim(), 10);
174
+ assert(tableCount > 0, `Staging schema ${stagingSchema} is empty after importing ${dataset.database}. Aborting incremental merge to protect ${senatSchemaName}.`);
175
+ }
176
+ function ensureStagingDatabase(dataDir, options, runtime) {
177
+ const maintenanceDb = process.env["PGDATABASE"] || "postgres";
178
+ const maintenanceConnection = {
179
+ ...runtime.staging,
180
+ name: maintenanceDb,
181
+ };
182
+ try {
183
+ runPsqlCommand(`SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${runtime.staging.name.replace(/'/g, "''")}' AND pid <> pg_backend_pid();`, dataDir, options, maintenanceConnection, false);
184
+ runPsqlCommand(`DROP DATABASE IF EXISTS ${runtime.staging.name}`, dataDir, options, maintenanceConnection, false);
185
+ runPsqlCommand(`CREATE DATABASE ${runtime.staging.name} WITH OWNER ${runtime.staging.user}`, dataDir, options, maintenanceConnection);
186
+ }
187
+ catch (error) {
188
+ if (!canReuseExistingStagingDatabase(error)) {
189
+ throw error;
190
+ }
191
+ if (!options["silent"]) {
192
+ console.warn(`Could not recreate staging database ${runtime.staging.name}; reusing the existing database instead.`);
193
+ }
194
+ ensureDatabaseExists(runtime.staging, dataDir, options);
195
+ }
196
+ }
197
+ function ensureDatabaseExists(connection, dataDir, options) {
198
+ const maintenanceDb = process.env["PGDATABASE"] || "postgres";
199
+ const maintenanceConnection = {
200
+ ...connection,
201
+ name: maintenanceDb,
202
+ };
203
+ const exists = runPsqlQuery(`SELECT 1 FROM pg_database WHERE datname = '${escapeSqlLiteral(connection.name)}'`, dataDir, options, maintenanceConnection).trim();
204
+ if (exists === "1") {
205
+ return;
206
+ }
207
+ runPsqlCommand(`CREATE DATABASE ${connection.name} WITH OWNER ${connection.user}`, dataDir, options, maintenanceConnection);
208
+ }
209
+ function ensureForeignStagingServer(dataDir, options, runtime) {
210
+ runPsqlCommand(`CREATE EXTENSION IF NOT EXISTS postgres_fdw`, dataDir, options, runtime.target);
211
+ runPsqlCommand(`DROP SERVER IF EXISTS ${stagingServerName} CASCADE`, dataDir, options, runtime.target, false);
212
+ runPsqlCommand([
213
+ `CREATE SERVER ${stagingServerName}`,
214
+ `FOREIGN DATA WRAPPER postgres_fdw`,
215
+ `OPTIONS (host '${escapeSqlLiteral(runtime.staging.host)}', dbname '${escapeSqlLiteral(runtime.staging.name)}', port '${escapeSqlLiteral(String(runtime.staging.port))}')`,
216
+ ].join(" "), dataDir, options, runtime.target);
217
+ runPsqlCommand([
218
+ `CREATE USER MAPPING FOR CURRENT_USER SERVER ${stagingServerName}`,
219
+ `OPTIONS (user '${escapeSqlLiteral(runtime.staging.user)}', password '${escapeSqlLiteral(runtime.staging.password)}')`,
220
+ ].join(" "), dataDir, options, runtime.target);
221
+ }
222
+ function cleanupForeignStagingServer(dataDir, options, runtime) {
223
+ runPsqlCommand(`DROP SERVER IF EXISTS ${stagingServerName} CASCADE`, dataDir, options, runtime.target, false);
224
+ }
225
+ function mountForeignStagingSchema(dataset, dataDir, options, runtime) {
226
+ const stagingSchema = stagingSchemaName(dataset.database);
227
+ const importForeignSchemaCommand = `IMPORT FOREIGN SCHEMA ${stagingSchema} FROM SERVER ${stagingServerName} INTO ${stagingSchema}`;
228
+ runPsqlCommand(`DROP SCHEMA IF EXISTS ${stagingSchema} CASCADE`, dataDir, options, runtime.target, false);
229
+ runPsqlCommand(`CREATE SCHEMA ${stagingSchema}`, dataDir, options, runtime.target);
230
+ try {
231
+ runPsqlCommand(importForeignSchemaCommand, dataDir, options, runtime.target);
232
+ }
233
+ catch (error) {
234
+ if (!isMissingForeignServerError(error, stagingServerName)) {
235
+ throw error;
236
+ }
237
+ if (!options["silent"]) {
238
+ console.warn(`Foreign server ${stagingServerName} disappeared before schema import; recreating it.`);
239
+ }
240
+ ensureForeignStagingServer(dataDir, options, runtime);
241
+ runPsqlCommand(importForeignSchemaCommand, dataDir, options, runtime.target);
242
+ }
243
+ }
244
+ function ensureSchemaVersionTable(dataDir, options, runtime) {
245
+ runPsqlCommand(buildEnsureSchemaVersionTableSql(senatSchemaName), dataDir, options, runtime.target);
246
+ }
247
+ function getSchemaStructureFingerprint(dataDir, options, runtime) {
248
+ return runPsqlQuery(buildSchemaStructureFingerprintQuery(senatSchemaName), dataDir, options, runtime.target).trim();
249
+ }
250
+ function getSchemaVersionNumber(dataDir, options, runtime) {
251
+ const version = runPsqlQuery(`SELECT number FROM ${senatSchemaName}.version`, dataDir, options, runtime.target).trim();
252
+ return Number.parseInt(version, 10);
253
+ }
254
+ function bumpSchemaVersionIfNeeded(previousFingerprint, dataDir, options, runtime) {
255
+ const currentFingerprint = getSchemaStructureFingerprint(dataDir, options, runtime);
256
+ if (currentFingerprint !== previousFingerprint) {
257
+ runPsqlCommand(buildIncrementSchemaVersionSql(senatSchemaName), dataDir, options, runtime.target);
258
+ }
259
+ if (!options["silent"]) {
260
+ const versionNumber = getSchemaVersionNumber(dataDir, options, runtime);
261
+ if (currentFingerprint !== previousFingerprint) {
262
+ console.log(`Incremented ${senatSchemaName}.version to ${versionNumber} after schema structure change.`);
263
+ }
264
+ else {
265
+ console.log(`Current ${senatSchemaName}.version: ${versionNumber}.`);
266
+ }
267
+ }
268
+ }
269
+ function finalizeDatasetImport(dataset, dataDir, options, runtime) {
270
+ const postImportFilePath = path.join(dataDir, `${dataset.database}_post_import.sql`);
271
+ const postImportSql = buildIncrementalDatasetImportSql(dataset.database, runtime.target.user, dataset.mergeKeys, dataset.rowMultisetMergeTables);
272
+ const stagingSchema = stagingSchemaName(dataset.database);
273
+ fs.writeFileSync(postImportFilePath, postImportSql, { encoding: "utf8" });
274
+ if (isIncrementalImport(options)) {
275
+ mountForeignStagingSchema(dataset, dataDir, options, runtime);
276
+ }
277
+ runPsqlFile(postImportFilePath, dataDir, options, runtime.target);
278
+ runPsqlCommand(`DROP SCHEMA IF EXISTS ${stagingSchema} CASCADE`, dataDir, options, runtime.target, false);
279
+ }
280
+ function applyStagingMetadataToTarget(dataset, dataDir, options, runtime) {
281
+ const stagingSchema = stagingSchemaName(dataset.database);
282
+ const encodedStatements = runPsqlQuery(buildExportStagingMetadataStatementsQuery(stagingSchema, senatSchemaName), dataDir, options, runtime.staging)
283
+ .split("\n")
284
+ .map((line) => line.trim())
285
+ .filter((line) => line.length > 0);
286
+ for (const encodedStatement of encodedStatements) {
287
+ const statement = Buffer.from(encodedStatement, "hex").toString("utf8");
288
+ runPsqlCommand(statement, dataDir, options, runtime.target);
289
+ }
290
+ }
291
+ function listTablesInSchema(schemaName, dataDir, options, connection) {
292
+ const query = ["SELECT tablename", "FROM pg_tables", `WHERE schemaname = '${schemaName}'`, "ORDER BY tablename"].join("\n");
293
+ const output = runPsqlQuery(query, dataDir, options, connection);
294
+ return output
295
+ .split("\n")
296
+ .map((tableName) => tableName.trim())
297
+ .filter((tableName) => tableName.length > 0);
298
+ }
299
+ function createManagedIndexesInStaging(dataset, dataDir, options, runtime) {
300
+ if (!dataset.indexes) {
301
+ return;
302
+ }
303
+ const stagingSchema = stagingSchemaName(dataset.database);
304
+ const importedTables = new Set(listTablesInSchema(stagingSchema, dataDir, options, runtime.staging));
305
+ runPsqlCommand(`CREATE SCHEMA IF NOT EXISTS ${stagingSchema};`, dataDir, options, runtime.staging);
306
+ for (const [table, indexes] of Object.entries(dataset.indexes)) {
307
+ if (!importedTables.has(table)) {
308
+ if (!options["silent"]) {
309
+ console.warn(`Skipping managed indexes for missing table ${stagingSchema}.${table}`);
310
+ }
311
+ continue;
312
+ }
313
+ for (const index of indexes) {
314
+ const indexName = prefixedName(dataset.database, `${table}_${index.name}`);
315
+ const columns = index.columns.join(", ");
316
+ const sql = `CREATE INDEX IF NOT EXISTS ${indexName} ON ${stagingSchema}.${table} (${columns});`;
317
+ try {
318
+ runPsqlCommand(sql, dataDir, options, runtime.staging);
319
+ }
320
+ catch (err) {
321
+ console.error(`Failed to create managed index ${indexName} on ${stagingSchema}.${table}:`, err);
322
+ continue;
323
+ }
324
+ if (!options["silent"]) {
325
+ console.log(`Prepared managed index ${indexName} on ${stagingSchema}.${table}`);
326
+ }
327
+ }
328
+ }
329
+ }
330
+ function listPrefixedTables(dataset, dataDir, options, runtime) {
331
+ const prefix = `${dataset.database}_`;
332
+ const query = [
333
+ "SELECT tablename",
334
+ "FROM pg_tables",
335
+ `WHERE schemaname = '${senatSchemaName}'`,
336
+ ` AND tablename LIKE '${prefix.replace(/_/g, "\\_")}%' ESCAPE '\\'`,
337
+ "ORDER BY tablename",
338
+ ].join("\n");
339
+ const output = runPsqlQuery(query, dataDir, options, runtime.target);
340
+ return output
341
+ .split("\n")
342
+ .map((tableName) => tableName.trim())
343
+ .filter((tableName) => tableName.length > 0);
344
+ }
62
345
  async function downloadFile(url, dest) {
63
346
  const response = await fetch(url);
64
347
  if (!response.ok) {
65
348
  throw new Error(`Download failed ${response.status} ${response.statusText} for ${url}`);
66
349
  }
67
- await streamPipeline(response.body, fs.createWriteStream(dest));
350
+ assert(response.body, `Empty response body for ${url}`);
351
+ await streamPipeline(Readable.fromWeb(response.body), fs.createWriteStream(dest));
68
352
  }
69
- /**
70
- * Copy a dataset database to the main Senat database (overwriting its contents).
71
- */
72
- async function copyToSenat(dataset, dataDir, options) {
353
+ async function importIntoStaging(dataset, dataDir, options, runtime) {
73
354
  if (!options["silent"]) {
74
- console.log(`Copying ${dataset.database} to Senat database...`);
355
+ console.log(`Importing ${dataset.database} into staging database ${runtime.staging.name}...`);
75
356
  }
76
357
  const sqlFilePath = path.join(dataDir, `${dataset.database}.sql`);
77
358
  const schemaDumpFile = path.join(dataDir, `${dataset.database}_schema_dump.sql`);
78
- // Write the header and then stream the rest of the SQL file
359
+ const normalizeSqlFile = path.join(dataDir, `${dataset.database}_normalize_staging.sql`);
360
+ const stagingSchema = stagingSchemaName(dataset.database);
79
361
  const schemaSqlWriter = fs.createWriteStream(schemaDumpFile, { encoding: "utf8" });
80
- // Add CREATE SCHEMA statement at the top
81
- schemaSqlWriter.write(`DROP SCHEMA IF EXISTS ${dataset.database} CASCADE;\n`);
82
- schemaSqlWriter.write(`CREATE SCHEMA IF NOT EXISTS ${dataset.database};\n`);
83
- schemaSqlWriter.write(`GRANT USAGE ON SCHEMA ${dataset.database} TO ${config.db.user};\n`);
84
- schemaSqlWriter.write(`GRANT SELECT ON ALL TABLES IN SCHEMA ${dataset.database} TO ${config.db.user};\n`);
85
- schemaSqlWriter.write(`ALTER DEFAULT PRIVILEGES IN SCHEMA ${dataset.database} GRANT SELECT ON TABLES TO ${config.db.user};\n`);
362
+ schemaSqlWriter.write(`DROP SCHEMA IF EXISTS ${stagingSchema} CASCADE;\n`);
363
+ schemaSqlWriter.write(`CREATE SCHEMA IF NOT EXISTS ${stagingSchema};\n`);
86
364
  const lineReader = readline.createInterface({
87
365
  input: fs.createReadStream(sqlFilePath, { encoding: "utf8" }),
88
366
  crlfDelay: Infinity,
89
367
  });
368
+ let inCopyData = false;
90
369
  for await (const line of lineReader) {
91
- let newLine = line;
92
- // Replace 'public' schema outside single-quoted strings
93
- function replacePublicOutsideStrings(line, schema) {
94
- const parts = line.split(/(')/);
95
- let inString = false;
96
- for (let i = 0; i < parts.length; i++) {
97
- if (parts[i] === "'") {
98
- inString = !inString;
99
- }
100
- else if (!inString) {
101
- // Only replace outside of strings, including before comma
102
- parts[i] = parts[i].replace(/\bpublic\b(?=(\s*\.|\s*[,;]|\s|$))/g, schema);
103
- }
370
+ let newLine = rewriteLineForStagingImport(line, dataset, stagingSchema, inCopyData);
371
+ if (!inCopyData) {
372
+ newLine = newLine.replace(/SET client_encoding = 'LATIN1';/i, "SET client_encoding = 'UTF8';");
373
+ if (isCopyFromStdinLine(newLine)) {
374
+ inCopyData = true;
104
375
  }
105
- return parts.join("");
106
376
  }
107
- newLine = replacePublicOutsideStrings(line, dataset.database);
108
- // Replace SET client_encoding to UTF8
109
- newLine = newLine.replace(/SET client_encoding = 'LATIN1';/i, "SET client_encoding = 'UTF8';");
377
+ else if (line === "\\.") {
378
+ inCopyData = false;
379
+ }
110
380
  schemaSqlWriter.write(newLine + "\n");
111
381
  }
112
382
  schemaSqlWriter.end();
113
383
  await new Promise((resolve, reject) => {
114
384
  schemaSqlWriter.on("finish", () => {
115
385
  try {
116
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d senat -f ${schemaDumpFile}`, {
117
- env: process.env,
118
- encoding: "utf-8",
119
- stdio: ["ignore", "pipe", "pipe"],
120
- });
386
+ runPsqlFile(schemaDumpFile, dataDir, options, runtime.staging, false);
387
+ createManagedIndexesInStaging(dataset, dataDir, options, runtime);
388
+ fs.writeFileSync(normalizeSqlFile, buildNormalizeStagingSchemaSql(dataset.database), { encoding: "utf8" });
389
+ runWithRetry(() => runPsqlFile(normalizeSqlFile, dataDir, options, runtime.staging), options, {
390
+ attempts: 4,
391
+ delayMs: 500,
392
+ label: `Staging normalization for ${dataset.database}`,
393
+ })
394
+ .then(() => {
395
+ ensureStagingSchemaHasTables(dataset, dataDir, options, runtime.staging);
396
+ resolve();
397
+ })
398
+ .catch(reject);
121
399
  }
122
400
  catch (error) {
401
+ const execError = error;
123
402
  if (!options["silent"]) {
124
- console.error(`Failed to import ${dataset.database} schema:`);
125
- if (error.stderr) {
126
- console.error(error.stderr);
403
+ console.error(`Failed to import ${dataset.database} data into staging:`);
404
+ if (execError.stderr) {
405
+ console.error(execError.stderr);
127
406
  }
128
- if (error.stdout) {
129
- console.error(error.stdout);
407
+ if (execError.stdout) {
408
+ console.error(execError.stdout);
130
409
  }
131
410
  }
411
+ reject(error);
132
412
  }
133
- resolve();
134
413
  });
135
414
  schemaSqlWriter.on("error", reject);
136
415
  });
137
416
  }
138
- async function retrieveDataset(dataDir, dataset) {
417
+ async function retrieveDataset(dataDir, dataset, options, runtime) {
139
418
  const zipFilename = dataset.url.substring(dataset.url.lastIndexOf("/") + 1);
140
419
  const zipFilePath = path.join(dataDir, zipFilename);
141
420
  if (options["all"] || options["fetch"]) {
142
- // Fetch & save ZIP file.
143
421
  if (!options["silent"]) {
144
- console.log(`Loading ${dataset.title}: ${zipFilename}…`);
422
+ console.log(`Loading ${dataset.title}: ${zipFilename}...`);
145
423
  }
146
- // Fetch fails with OpenSSL error: dh key too small.
147
- // (so does "curl").
148
- // const response = await fetch(dataset.url)
149
- // if (!response.ok) {
150
- // console.error(response.status, response.statusText)
151
- // console.error(await response.text())
152
- // throw new Error(`Fetch failed: ${dataset.url}`)
153
- // }
154
- // await pipeline(response.body!, fs.createWriteStream(zipFilePath))
155
424
  fs.removeSync(zipFilePath);
156
425
  await downloadFile(dataset.url, zipFilePath);
157
426
  }
@@ -159,7 +428,7 @@ async function retrieveDataset(dataDir, dataset) {
159
428
  const sqlFilePath = path.join(dataDir, sqlFilename);
160
429
  if (options["all"] || options["unzip"]) {
161
430
  if (!options["silent"]) {
162
- console.log(`Unzipping ${dataset.title}: ${zipFilename}…`);
431
+ console.log(`Unzipping ${dataset.title}: ${zipFilename}...`);
163
432
  }
164
433
  fs.removeSync(sqlFilePath);
165
434
  const zip = new StreamZip({
@@ -168,7 +437,7 @@ async function retrieveDataset(dataDir, dataset) {
168
437
  });
169
438
  await new Promise((resolve, reject) => {
170
439
  zip.on("ready", () => {
171
- zip.extract(null, dataDir, (err, _count) => {
440
+ zip.extract(null, dataDir, (err) => {
172
441
  zip.close();
173
442
  if (err) {
174
443
  reject(err);
@@ -181,27 +450,25 @@ async function retrieveDataset(dataDir, dataset) {
181
450
  });
182
451
  if (dataset.repairZip !== undefined) {
183
452
  if (!options["silent"]) {
184
- console.log(`Repairing Zip path ${dataset.title}: ${sqlFilename}…`);
453
+ console.log(`Repairing Zip path ${dataset.title}: ${sqlFilename}...`);
185
454
  }
186
455
  dataset.repairZip(dataset, dataDir);
187
456
  }
188
457
  }
189
458
  if ((options["all"] || options["repairEncoding"]) && dataset.repairEncoding) {
190
459
  if (!options["silent"]) {
191
- console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}…`);
460
+ console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}...`);
192
461
  }
193
462
  const repairedSqlFilePath = sqlFilePath + ".repaired";
194
463
  const repairedSqlWriter = fs.createWriteStream(repairedSqlFilePath, {
195
464
  encoding: "utf8",
196
465
  });
197
- // Read the file as latin1 (ISO-8859-1/CP1252) and write as UTF-8
198
466
  const lineReader = readline.createInterface({
199
467
  input: fs.createReadStream(sqlFilePath, { encoding: "latin1" }),
200
468
  crlfDelay: Infinity,
201
469
  });
202
470
  for await (const line of lineReader) {
203
- // Optionally repair Windows-1252 control characters
204
- let repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
471
+ const repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
205
472
  repairedSqlWriter.write(repairedLine + "\n");
206
473
  }
207
474
  repairedSqlWriter.end();
@@ -209,103 +476,91 @@ async function retrieveDataset(dataDir, dataset) {
209
476
  }
210
477
  if (options["all"] || options["import"] || options["schema"]) {
211
478
  if (!options["silent"]) {
212
- console.log(`Importing ${dataset.title}: ${sqlFilename}…`);
479
+ console.log(`Merging ${dataset.title}: ${sqlFilename} into ${runtime.target.name}.${senatSchemaName}...`);
213
480
  }
214
- await copyToSenat(dataset, dataDir, options);
215
- // Create indexes programmatically after import
216
- if (dataset.indexes) {
217
- for (const [table, indexes] of Object.entries(dataset.indexes)) {
218
- for (const index of indexes) {
219
- const indexName = index.name;
220
- const columns = index.columns.join(", ");
221
- const schema = dataset.database;
222
- const sql = `CREATE INDEX IF NOT EXISTS ${indexName} ON ${schema}.${table} (${columns});`;
223
- try {
224
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d senat -c "${sql}"`, {
225
- env: process.env,
226
- encoding: "utf-8",
227
- stdio: ["ignore", "ignore", "pipe"],
228
- });
229
- if (!options["silent"]) {
230
- console.log(`Created index: ${indexName} on ${schema}.${table} (${columns})`);
231
- }
232
- }
233
- catch (err) {
234
- console.error(`Failed to create index ${indexName} on ${schema}.${table}:`, err);
235
- }
236
- }
237
- }
481
+ await importIntoStaging(dataset, dataDir, options, runtime);
482
+ finalizeDatasetImport(dataset, dataDir, options, runtime);
483
+ if (isIncrementalImport(options)) {
484
+ applyStagingMetadataToTarget(dataset, dataDir, options, runtime);
238
485
  }
239
486
  }
240
487
  if (options["schema"]) {
241
- let definitionsDir = path.resolve("src", "raw_types_schemats");
488
+ const definitionsDir = path.resolve("src", "raw_types_schemats");
242
489
  assert(fs.statSync(definitionsDir).isDirectory());
243
490
  if (!options["silent"]) {
244
- console.log(`Creating TypeScript definitions from schema '${dataset.database}' in database 'senat'…`);
491
+ console.log(`Creating TypeScript definitions from prefixed ${senatSchemaName} tables ` +
492
+ `for '${dataset.database}' in database '${runtime.target.name}'...`);
245
493
  }
246
- const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}/senat`;
247
- let definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
248
- execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.database} -o ${definitionFilePath}`, {
249
- // cwd: dataDir,
494
+ const dbConnectionString = `postgres://${runtime.target.user}:${runtime.target.password}` +
495
+ `@${runtime.target.host}:${runtime.target.port}/${runtime.target.name}`;
496
+ const definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
497
+ const tables = listPrefixedTables(dataset, dataDir, options, runtime);
498
+ const tableOptions = tables.map((tableName) => `-t ${tableName}`).join(" ");
499
+ execSync(`npx schemats generate -c ${dbConnectionString} -s ${senatSchemaName} ${tableOptions} -o ${definitionFilePath}`, {
250
500
  env: process.env,
251
501
  encoding: "utf-8",
252
- // stdio: ["ignore", "ignore", "pipe"],
253
502
  });
254
503
  const definition = fs.readFileSync(definitionFilePath, { encoding: "utf8" });
255
- const definitionRepaired = definition
256
- .replace(/\r\n/g, "\n")
257
- .replace(/AUTO-GENERATED FILE @ \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}/, "AUTO-GENERATED FILE");
504
+ const definitionRepaired = normalizeGeneratedDefinition(definition, dataset.database);
258
505
  fs.writeFileSync(definitionFilePath, definitionRepaired);
259
- definitionsDir = path.resolve("src", "raw_types");
260
- definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
261
- execSync(`npx kysely-codegen --url '${dbConnectionString}' --default-schema ${dataset.database} --include-pattern '${dataset.database}.*' --out-file ${definitionFilePath}`, {
262
- env: process.env,
263
- encoding: "utf-8",
264
- // stdio: ["ignore", "ignore", "pipe"],
265
- });
266
506
  }
267
507
  }
508
+ function buildRuntimeContext() {
509
+ const target = {
510
+ host: config.db.host,
511
+ name: config.db.name,
512
+ password: config.db.password,
513
+ port: config.db.port,
514
+ user: config.db.user,
515
+ };
516
+ if (!isIncrementalImport(options)) {
517
+ return {
518
+ staging: target,
519
+ target,
520
+ };
521
+ }
522
+ const staging = {
523
+ host: config.stagingDb.host,
524
+ name: config.stagingDb.name,
525
+ password: config.stagingDb.password,
526
+ port: config.stagingDb.port,
527
+ user: config.stagingDb.user,
528
+ };
529
+ return { staging, target };
530
+ }
268
531
  async function retrieveOpenData() {
269
532
  const dataDir = options["dataDir"];
270
- assert(dataDir, "Missing argument: data directory");
271
- process.env = {
272
- ...process.env,
273
- PGHOST: process.env["PGHOST"] || config.db.host,
274
- PGPORT: process.env["PGPORT"] || String(config.db.port),
275
- PGDATABASE: process.env["PGDATABASE"] || config.db.name,
276
- PGUSER: process.env["PGUSER"] || config.db.user,
277
- PGPASSWORD: process.env["PGPASSWORD"] || config.db.password,
278
- };
279
- assert(process.env["PGHOST"] && process.env["PGPORT"] && process.env["PGUSER"] && process.env["PGPASSWORD"], "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
280
- console.time("data extraction time");
281
- // Create role 'opendata' if it does not exist
282
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE ROLE opendata" || true`, {
283
- cwd: dataDir,
284
- env: process.env,
285
- encoding: "utf-8",
286
- });
287
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE DATABASE senat WITH OWNER opendata" || true`, {
288
- cwd: dataDir,
289
- env: process.env,
290
- encoding: "utf-8",
291
- });
292
- const enabledDatasets = getEnabledDatasets(options["categories"]);
293
- const chosenDatasets = getChosenDatasets(enabledDatasets);
294
- for (const dataset of chosenDatasets) {
295
- await retrieveDataset(dataDir, dataset);
533
+ assertExistingDirectory(dataDir, "data directory");
534
+ assert(!options["sudo"] || !options["sudo"].startsWith("-"), "Option --sudo expects a Unix user name, for example: --sudo postgres");
535
+ const runtime = buildRuntimeContext();
536
+ assert(runtime.target.host && runtime.target.port && runtime.target.user && runtime.target.password, "Missing target database configuration: DB_* in .env file");
537
+ if (isIncrementalImport(options)) {
538
+ assert(runtime.staging.host && runtime.staging.port && runtime.staging.user && runtime.staging.password, "Missing staging database configuration: STAGING_DB_* in .env file");
539
+ assert(runtime.target.name !== runtime.staging.name, "Target and staging databases must be different");
296
540
  }
297
- if (options["schema"]) {
298
- const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}/senat`;
299
- const definitionsDir = path.resolve("src", "raw_types");
300
- const definitionFilePath = path.join(definitionsDir, "senat.ts");
301
- execSync(`npx kysely-codegen --url '${dbConnectionString}' --out-file ${definitionFilePath}`, {
302
- env: process.env,
303
- encoding: "utf-8",
304
- // stdio: ["ignore", "ignore", "pipe"],
305
- });
541
+ console.time("data extraction time");
542
+ try {
543
+ ensureDatabaseExists(runtime.target, dataDir, options);
544
+ if (isIncrementalImport(options)) {
545
+ ensureStagingDatabase(dataDir, options, runtime);
546
+ ensureForeignStagingServer(dataDir, options, runtime);
547
+ }
548
+ ensureSchemaVersionTable(dataDir, options, runtime);
549
+ const initialSchemaFingerprint = getSchemaStructureFingerprint(dataDir, options, runtime);
550
+ const enabledDatasets = getEnabledDatasets(options["categories"]);
551
+ const chosenDatasets = getChosenDatasets(enabledDatasets);
552
+ for (const dataset of chosenDatasets) {
553
+ await retrieveDataset(dataDir, dataset, options, runtime);
554
+ }
555
+ bumpSchemaVersionIfNeeded(initialSchemaFingerprint, dataDir, options, runtime);
306
556
  }
307
- if (!options["silent"]) {
308
- console.timeEnd("data extraction time");
557
+ finally {
558
+ if (isIncrementalImport(options)) {
559
+ cleanupForeignStagingServer(dataDir, options, runtime);
560
+ }
561
+ if (!options["silent"]) {
562
+ console.timeEnd("data extraction time");
563
+ }
309
564
  }
310
565
  }
311
566
  retrieveOpenData()