@tricoteuses/senat 2.22.16 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (345) hide show
  1. package/README.md +168 -0
  2. package/lib/aggregates.d.ts +52 -0
  3. package/lib/aggregates.js +930 -0
  4. package/lib/aggregates.mjs +713 -0
  5. package/lib/aggregates.ts +833 -0
  6. package/lib/config.d.ts +10 -0
  7. package/lib/config.js +16 -0
  8. package/lib/config.mjs +16 -0
  9. package/lib/config.ts +26 -0
  10. package/lib/databases.d.ts +2 -0
  11. package/lib/databases.js +26 -0
  12. package/lib/databases.mjs +57 -0
  13. package/lib/databases.ts +71 -0
  14. package/lib/datasets.d.ts +34 -0
  15. package/lib/datasets.js +233 -0
  16. package/lib/datasets.mjs +78 -0
  17. package/lib/datasets.ts +118 -0
  18. package/lib/fields.d.ts +10 -0
  19. package/lib/fields.js +68 -0
  20. package/lib/fields.mjs +22 -0
  21. package/lib/fields.ts +29 -0
  22. package/lib/git.d.ts +26 -0
  23. package/lib/git.js +167 -0
  24. package/lib/index.d.ts +13 -0
  25. package/lib/index.js +1 -0
  26. package/lib/index.mjs +7 -0
  27. package/lib/index.ts +64 -0
  28. package/lib/inserters.d.ts +98 -0
  29. package/lib/inserters.js +500 -0
  30. package/lib/inserters.mjs +360 -0
  31. package/lib/inserters.ts +521 -0
  32. package/lib/legislatures.json +38 -0
  33. package/lib/loaders.d.ts +58 -0
  34. package/lib/loaders.js +286 -0
  35. package/lib/loaders.mjs +158 -0
  36. package/lib/loaders.ts +271 -0
  37. package/lib/model/agenda.d.ts +6 -0
  38. package/lib/model/agenda.js +148 -0
  39. package/lib/model/ameli.d.ts +51 -0
  40. package/lib/model/ameli.js +149 -0
  41. package/lib/model/ameli.mjs +84 -0
  42. package/lib/model/ameli.ts +100 -0
  43. package/lib/model/commission.d.ts +18 -0
  44. package/lib/model/commission.js +269 -0
  45. package/lib/model/debats.d.ts +67 -0
  46. package/lib/model/debats.js +95 -0
  47. package/lib/model/debats.mjs +43 -0
  48. package/lib/model/debats.ts +68 -0
  49. package/lib/model/documents.d.ts +12 -0
  50. package/lib/model/documents.js +151 -0
  51. package/lib/model/dosleg.d.ts +7 -0
  52. package/lib/model/dosleg.js +326 -0
  53. package/lib/model/dosleg.mjs +196 -0
  54. package/lib/model/dosleg.ts +240 -0
  55. package/lib/model/index.d.ts +7 -0
  56. package/lib/model/index.js +7 -0
  57. package/lib/model/index.mjs +5 -0
  58. package/lib/model/index.ts +15 -0
  59. package/lib/model/questions.d.ts +45 -0
  60. package/lib/model/questions.js +89 -0
  61. package/lib/model/questions.mjs +71 -0
  62. package/lib/model/questions.ts +93 -0
  63. package/lib/model/scrutins.d.ts +13 -0
  64. package/lib/model/scrutins.js +114 -0
  65. package/lib/model/seance.d.ts +3 -0
  66. package/lib/model/seance.js +267 -0
  67. package/lib/model/sens.d.ts +146 -0
  68. package/lib/model/sens.js +454 -0
  69. package/lib/model/sens.mjs +415 -0
  70. package/lib/model/sens.ts +516 -0
  71. package/lib/model/texte.d.ts +7 -0
  72. package/lib/model/texte.js +256 -0
  73. package/lib/model/texte.mjs +208 -0
  74. package/lib/model/texte.ts +229 -0
  75. package/lib/model/util.d.ts +9 -0
  76. package/lib/model/util.js +38 -0
  77. package/lib/model/util.mjs +19 -0
  78. package/lib/model/util.ts +32 -0
  79. package/lib/parsers/texte.d.ts +7 -0
  80. package/lib/parsers/texte.js +228 -0
  81. package/lib/raw_types/ameli.d.ts +914 -0
  82. package/lib/raw_types/ameli.js +5 -0
  83. package/lib/raw_types/ameli.mjs +163 -0
  84. package/lib/raw_types/debats.d.ts +207 -0
  85. package/lib/raw_types/debats.js +5 -0
  86. package/lib/raw_types/debats.mjs +58 -0
  87. package/lib/raw_types/dosleg.d.ts +1619 -0
  88. package/lib/raw_types/dosleg.js +5 -0
  89. package/lib/raw_types/dosleg.mjs +438 -0
  90. package/lib/raw_types/questions.d.ts +419 -0
  91. package/lib/raw_types/questions.js +5 -0
  92. package/lib/raw_types/questions.mjs +11 -0
  93. package/lib/raw_types/senat.d.ts +11368 -0
  94. package/lib/raw_types/senat.js +5 -0
  95. package/lib/raw_types/sens.d.ts +8248 -0
  96. package/lib/raw_types/sens.js +5 -0
  97. package/lib/raw_types/sens.mjs +508 -0
  98. package/lib/raw_types_kysely/ameli.d.ts +915 -0
  99. package/lib/raw_types_kysely/ameli.js +7 -0
  100. package/lib/raw_types_kysely/ameli.mjs +5 -0
  101. package/lib/raw_types_kysely/ameli.ts +951 -0
  102. package/lib/raw_types_kysely/debats.d.ts +207 -0
  103. package/lib/raw_types_kysely/debats.js +7 -0
  104. package/lib/raw_types_kysely/debats.mjs +5 -0
  105. package/lib/raw_types_kysely/debats.ts +222 -0
  106. package/lib/raw_types_kysely/dosleg.d.ts +3532 -0
  107. package/lib/raw_types_kysely/dosleg.js +7 -0
  108. package/lib/raw_types_kysely/dosleg.mjs +5 -0
  109. package/lib/raw_types_kysely/dosleg.ts +3621 -0
  110. package/lib/raw_types_kysely/questions.d.ts +414 -0
  111. package/lib/raw_types_kysely/questions.js +7 -0
  112. package/lib/raw_types_kysely/questions.mjs +5 -0
  113. package/lib/raw_types_kysely/questions.ts +426 -0
  114. package/lib/raw_types_kysely/sens.d.ts +4394 -0
  115. package/lib/raw_types_kysely/sens.js +7 -0
  116. package/lib/raw_types_kysely/sens.mjs +5 -0
  117. package/lib/raw_types_kysely/sens.ts +4499 -0
  118. package/lib/raw_types_schemats/ameli.d.ts +539 -0
  119. package/lib/raw_types_schemats/ameli.js +2 -0
  120. package/lib/raw_types_schemats/ameli.mjs +2 -0
  121. package/lib/raw_types_schemats/ameli.ts +601 -0
  122. package/lib/raw_types_schemats/debats.d.ts +127 -0
  123. package/lib/raw_types_schemats/debats.js +2 -0
  124. package/lib/raw_types_schemats/debats.mjs +2 -0
  125. package/lib/raw_types_schemats/debats.ts +145 -0
  126. package/lib/raw_types_schemats/dosleg.d.ts +977 -0
  127. package/lib/raw_types_schemats/dosleg.js +2 -0
  128. package/lib/raw_types_schemats/dosleg.mjs +2 -0
  129. package/lib/raw_types_schemats/dosleg.ts +2193 -0
  130. package/lib/raw_types_schemats/questions.d.ts +235 -0
  131. package/lib/raw_types_schemats/questions.js +2 -0
  132. package/lib/raw_types_schemats/questions.mjs +2 -0
  133. package/lib/raw_types_schemats/questions.ts +249 -0
  134. package/lib/raw_types_schemats/sens.d.ts +6915 -0
  135. package/lib/raw_types_schemats/sens.js +2 -0
  136. package/lib/raw_types_schemats/sens.mjs +2 -0
  137. package/lib/raw_types_schemats/sens.ts +2907 -0
  138. package/lib/scripts/convert_data.d.ts +1 -0
  139. package/lib/scripts/convert_data.js +354 -0
  140. package/lib/scripts/convert_data.mjs +181 -0
  141. package/lib/scripts/convert_data.ts +243 -0
  142. package/lib/scripts/data-download.d.ts +1 -0
  143. package/lib/scripts/data-download.js +12 -0
  144. package/lib/scripts/datautil.d.ts +8 -0
  145. package/lib/scripts/datautil.js +34 -0
  146. package/lib/scripts/datautil.mjs +16 -0
  147. package/lib/scripts/datautil.ts +19 -0
  148. package/lib/scripts/images/transparent_150x192.jpg +0 -0
  149. package/lib/scripts/images/transparent_155x225.jpg +0 -0
  150. package/lib/scripts/parse_textes.d.ts +1 -0
  151. package/lib/scripts/parse_textes.js +44 -0
  152. package/lib/scripts/parse_textes.mjs +46 -0
  153. package/lib/scripts/parse_textes.ts +65 -0
  154. package/lib/scripts/retrieve_agenda.d.ts +1 -0
  155. package/lib/scripts/retrieve_agenda.js +132 -0
  156. package/lib/scripts/retrieve_cr_commission.d.ts +1 -0
  157. package/lib/scripts/retrieve_cr_commission.js +364 -0
  158. package/lib/scripts/retrieve_cr_seance.d.ts +6 -0
  159. package/lib/scripts/retrieve_cr_seance.js +347 -0
  160. package/lib/scripts/retrieve_documents.d.ts +3 -0
  161. package/lib/scripts/retrieve_documents.js +219 -0
  162. package/lib/scripts/retrieve_documents.mjs +249 -0
  163. package/lib/scripts/retrieve_documents.ts +298 -0
  164. package/lib/scripts/retrieve_open_data.d.ts +1 -0
  165. package/lib/scripts/retrieve_open_data.js +315 -0
  166. package/lib/scripts/retrieve_open_data.mjs +217 -0
  167. package/lib/scripts/retrieve_open_data.ts +268 -0
  168. package/lib/scripts/retrieve_senateurs_photos.d.ts +1 -0
  169. package/lib/scripts/retrieve_senateurs_photos.js +147 -0
  170. package/lib/scripts/retrieve_senateurs_photos.mjs +147 -0
  171. package/lib/scripts/retrieve_senateurs_photos.ts +177 -0
  172. package/lib/scripts/retrieve_videos.d.ts +1 -0
  173. package/lib/scripts/retrieve_videos.js +461 -0
  174. package/lib/scripts/shared/cli_helpers.d.ts +95 -0
  175. package/lib/scripts/shared/cli_helpers.js +91 -0
  176. package/lib/scripts/shared/cli_helpers.ts +36 -0
  177. package/lib/scripts/shared/util.d.ts +4 -0
  178. package/lib/scripts/shared/util.js +35 -0
  179. package/lib/scripts/shared/util.ts +33 -0
  180. package/lib/scripts/test_iter_load.d.ts +1 -0
  181. package/lib/scripts/test_iter_load.js +12 -0
  182. package/lib/src/config.d.ts +22 -0
  183. package/lib/src/config.js +17 -7
  184. package/lib/src/conversion_textes.js +5 -1
  185. package/lib/src/databases.d.ts +2 -1
  186. package/lib/src/databases_postgres.d.ts +4 -0
  187. package/lib/src/databases_postgres.js +23 -0
  188. package/lib/src/datasets.d.ts +4 -0
  189. package/lib/src/datasets.js +16 -2
  190. package/lib/src/git.d.ts +1 -0
  191. package/lib/src/git.js +45 -11
  192. package/lib/src/index.d.ts +19 -8
  193. package/lib/src/index.js +6 -1
  194. package/lib/src/loaders.js +10 -4
  195. package/lib/src/model/agenda.js +2 -2
  196. package/lib/src/model/ameli.d.ts +64 -52
  197. package/lib/src/model/ameli.js +147 -145
  198. package/lib/src/model/ameli_postgres.d.ts +67 -0
  199. package/lib/src/model/ameli_postgres.js +150 -0
  200. package/lib/src/model/commission.d.ts +3 -2
  201. package/lib/src/model/commission.js +2 -2
  202. package/lib/src/model/debats.d.ts +38 -66
  203. package/lib/src/model/debats.js +110 -93
  204. package/lib/src/model/documents.d.ts +32 -12
  205. package/lib/src/model/documents.js +171 -130
  206. package/lib/src/model/dosleg.d.ts +142 -5
  207. package/lib/src/model/dosleg.js +298 -156
  208. package/lib/src/model/questions.d.ts +54 -45
  209. package/lib/src/model/questions.js +89 -87
  210. package/lib/src/model/scrutins.d.ts +48 -13
  211. package/lib/src/model/scrutins.js +118 -111
  212. package/lib/src/model/seance.js +3 -3
  213. package/lib/src/model/sens.d.ts +109 -179
  214. package/lib/src/model/sens.js +384 -484
  215. package/lib/src/model/util.d.ts +0 -8
  216. package/lib/src/model/util.js +0 -23
  217. package/lib/src/parsers/texte.js +7 -7
  218. package/lib/src/raw_types/ameli.d.ts +1651 -803
  219. package/lib/src/raw_types/ameli.js +1816 -5
  220. package/lib/src/raw_types/debats.d.ts +353 -180
  221. package/lib/src/raw_types/debats.js +517 -5
  222. package/lib/src/raw_types/dosleg.d.ts +2862 -1527
  223. package/lib/src/raw_types/dosleg.js +4354 -5
  224. package/lib/src/raw_types/questions.d.ts +671 -395
  225. package/lib/src/raw_types/questions.js +1303 -5
  226. package/lib/src/raw_types/sens.d.ts +7743 -8148
  227. package/lib/src/raw_types/sens.js +10429 -5
  228. package/lib/src/raw_types_schemats/ameli.d.ts +4 -2
  229. package/lib/src/raw_types_schemats/debats.d.ts +2 -2
  230. package/lib/src/raw_types_schemats/dosleg.d.ts +2 -2
  231. package/lib/src/raw_types_schemats/questions.d.ts +2 -2
  232. package/lib/src/raw_types_schemats/sens.d.ts +10 -4216
  233. package/lib/src/scripts/convert_data.js +7 -6
  234. package/lib/src/scripts/convert_xml_to_html.js +2 -2
  235. package/lib/src/scripts/data-download.js +3 -2
  236. package/lib/src/scripts/retrieve_agenda.js +21 -9
  237. package/lib/src/scripts/retrieve_cr_commission.js +17 -17
  238. package/lib/src/scripts/retrieve_cr_seance.d.ts +14 -1
  239. package/lib/src/scripts/retrieve_cr_seance.js +10 -11
  240. package/lib/src/scripts/retrieve_documents.d.ts +11 -2
  241. package/lib/src/scripts/retrieve_documents.js +25 -14
  242. package/lib/src/scripts/retrieve_open_data.js +514 -153
  243. package/lib/src/scripts/retrieve_senateurs_photos.js +25 -11
  244. package/lib/src/scripts/retrieve_videos.js +12 -11
  245. package/lib/src/scripts/shared/cli_helpers.d.ts +1 -6
  246. package/lib/src/scripts/shared/cli_helpers.js +9 -8
  247. package/lib/src/scripts/shared/incremental_import_sql.d.ts +2 -0
  248. package/lib/src/scripts/shared/incremental_import_sql.js +894 -0
  249. package/lib/src/scripts/shared/prefixed_tables.d.ts +10 -0
  250. package/lib/src/scripts/shared/prefixed_tables.js +36 -0
  251. package/lib/src/scripts/shared/schema_version.d.ts +3 -0
  252. package/lib/src/scripts/shared/schema_version.js +97 -0
  253. package/lib/src/scripts/shared/staging_import.d.ts +3 -0
  254. package/lib/src/scripts/shared/staging_import.js +80 -0
  255. package/lib/src/scripts/shared/staging_metadata_sql.d.ts +1 -0
  256. package/lib/src/scripts/shared/staging_metadata_sql.js +221 -0
  257. package/lib/src/scripts/validate_prefixed_tables.d.ts +1 -0
  258. package/lib/src/scripts/validate_prefixed_tables.js +101 -0
  259. package/lib/src/types/ameli.d.ts +4 -4
  260. package/lib/src/types/debats.d.ts +2 -2
  261. package/lib/src/types/dosleg.d.ts +39 -39
  262. package/lib/src/types/questions.d.ts +2 -2
  263. package/lib/src/types/sens.d.ts +0 -2
  264. package/lib/src/types/texte.d.ts +1 -1
  265. package/lib/src/utils/cr_spliting.d.ts +9 -6
  266. package/lib/src/utils/cr_spliting.js +6 -101
  267. package/lib/src/utils/reunion_odj_building.d.ts +7 -3
  268. package/lib/src/utils/reunion_parsing.d.ts +2 -1
  269. package/lib/src/utils/reunion_parsing.js +2 -2
  270. package/lib/src/videos/match.js +8 -5
  271. package/lib/src/videos/pipeline.d.ts +6 -2
  272. package/lib/src/videos/pipeline.js +21 -8
  273. package/lib/src/videos/search.js +6 -2
  274. package/lib/strings.d.ts +1 -0
  275. package/lib/strings.js +18 -0
  276. package/lib/strings.mjs +18 -0
  277. package/lib/strings.ts +26 -0
  278. package/lib/tests/incrementalImportSql.test.d.ts +1 -0
  279. package/lib/tests/incrementalImportSql.test.js +155 -0
  280. package/lib/tests/prefixedTables.test.d.ts +1 -0
  281. package/lib/tests/prefixedTables.test.js +22 -0
  282. package/lib/tests/schemaVersion.test.d.ts +1 -0
  283. package/lib/tests/schemaVersion.test.js +23 -0
  284. package/lib/tests/validatePrefixedTables.test.d.ts +1 -0
  285. package/lib/tests/validatePrefixedTables.test.js +14 -0
  286. package/lib/types/agenda.d.ts +44 -0
  287. package/lib/types/agenda.js +1 -0
  288. package/lib/types/ameli.d.ts +5 -0
  289. package/lib/types/ameli.js +1 -0
  290. package/lib/types/ameli.mjs +13 -0
  291. package/lib/types/ameli.ts +21 -0
  292. package/lib/types/compte_rendu.d.ts +83 -0
  293. package/lib/types/compte_rendu.js +1 -0
  294. package/lib/types/debats.d.ts +2 -0
  295. package/lib/types/debats.js +1 -0
  296. package/lib/types/debats.mjs +2 -0
  297. package/lib/types/debats.ts +6 -0
  298. package/lib/types/dosleg.d.ts +70 -0
  299. package/lib/types/dosleg.js +1 -0
  300. package/lib/types/dosleg.mjs +151 -0
  301. package/lib/types/dosleg.ts +284 -0
  302. package/lib/types/questions.d.ts +2 -0
  303. package/lib/types/questions.js +1 -0
  304. package/lib/types/questions.mjs +1 -0
  305. package/lib/types/questions.ts +3 -0
  306. package/lib/types/sens.d.ts +10 -0
  307. package/lib/types/sens.js +1 -0
  308. package/lib/types/sens.mjs +1 -0
  309. package/lib/types/sens.ts +12 -0
  310. package/lib/types/sessions.d.ts +5 -0
  311. package/lib/types/sessions.js +84 -0
  312. package/lib/types/sessions.mjs +43 -0
  313. package/lib/types/sessions.ts +42 -0
  314. package/lib/types/texte.d.ts +74 -0
  315. package/lib/types/texte.js +16 -0
  316. package/lib/types/texte.mjs +16 -0
  317. package/lib/types/texte.ts +76 -0
  318. package/lib/typings/windows-1252.d.js +2 -0
  319. package/lib/typings/windows-1252.d.mjs +2 -0
  320. package/lib/typings/windows-1252.d.ts +11 -0
  321. package/lib/utils/cr_spliting.d.ts +28 -0
  322. package/lib/utils/cr_spliting.js +265 -0
  323. package/lib/utils/date.d.ts +10 -0
  324. package/lib/utils/date.js +100 -0
  325. package/lib/utils/nvs-timecode.d.ts +7 -0
  326. package/lib/utils/nvs-timecode.js +79 -0
  327. package/lib/utils/reunion_grouping.d.ts +9 -0
  328. package/lib/utils/reunion_grouping.js +361 -0
  329. package/lib/utils/reunion_odj_building.d.ts +5 -0
  330. package/lib/utils/reunion_odj_building.js +154 -0
  331. package/lib/utils/reunion_parsing.d.ts +23 -0
  332. package/lib/utils/reunion_parsing.js +209 -0
  333. package/lib/utils/scoring.d.ts +14 -0
  334. package/lib/utils/scoring.js +147 -0
  335. package/lib/utils/string_cleaning.d.ts +7 -0
  336. package/lib/utils/string_cleaning.js +57 -0
  337. package/lib/validators/config.d.ts +9 -0
  338. package/lib/validators/config.js +10 -0
  339. package/lib/validators/config.mjs +54 -0
  340. package/lib/validators/config.ts +79 -0
  341. package/lib/validators/senat.d.ts +0 -0
  342. package/lib/validators/senat.js +28 -0
  343. package/lib/validators/senat.mjs +24 -0
  344. package/lib/validators/senat.ts +26 -0
  345. package/package.json +11 -11
@@ -1,16 +1,23 @@
1
1
  import assert from "assert";
2
- import { execSync } from "child_process";
2
+ import { execFileSync } from "child_process";
3
3
  import commandLineArgs from "command-line-args";
4
4
  import fs from "fs-extra";
5
+ import { formatWithPrettier, makePgTsGenerator, markAsGenerated, processDatabase } from "kanel";
6
+ import { makeGenerateZodSchemas } from "kanel-zod";
5
7
  import path from "path";
6
8
  import StreamZip from "node-stream-zip";
7
9
  import readline from "readline";
8
- import * as windows1252 from "windows-1252";
9
- import { pipeline } from "stream";
10
+ import { pipeline, Readable } from "stream";
10
11
  import { promisify } from "util";
12
+ import * as windows1252 from "windows-1252";
11
13
  import config from "../config";
12
14
  import { getChosenDatasets, getEnabledDatasets } from "../datasets";
13
- import { commonOptions } from "./shared/cli_helpers";
15
+ import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
16
+ import { buildIncrementalDatasetImportSql, buildNormalizeStagingSchemaSql } from "./shared/incremental_import_sql";
17
+ import { buildGeneratedTableManifest, getGeneratedDefinitionPath, getGeneratedTableManifestPath, prefixedName, rawTypesDir, senatSchemaName, stagingSchemaName, stripDatasetPrefix, } from "./shared/prefixed_tables";
18
+ import { buildEnsureSchemaVersionTableSql, buildIncrementSchemaVersionSql, buildSchemaStructureFingerprintQuery, } from "./shared/schema_version";
19
+ import { buildExportStagingMetadataStatementsQuery } from "./shared/staging_metadata_sql";
20
+ import { isCopyFromStdinLine, rewriteLineForStagingImport } from "./shared/staging_import";
14
21
  const badWindows1252CharacterRegex = /[\u0080-\u009f]/g;
15
22
  const optionsDefinitions = [
16
23
  ...commonOptions,
@@ -22,7 +29,7 @@ const optionsDefinitions = [
22
29
  },
23
30
  {
24
31
  alias: "c",
25
- help: "create TypeScript interfaces from databases schemas into src/raw_types_* directories",
32
+ help: "create TypeScript interfaces and Zod schemas from database tables into src/raw_types",
26
33
  name: "schema",
27
34
  type: Boolean,
28
35
  },
@@ -38,15 +45,20 @@ const optionsDefinitions = [
38
45
  name: "fetch",
39
46
  type: Boolean,
40
47
  },
48
+ {
49
+ help: "use separate staging database and postgres_fdw for incremental merge into target schema",
50
+ name: "incremental",
51
+ type: Boolean,
52
+ },
41
53
  {
42
54
  alias: "i",
43
- help: "import SQL dumps into a freshly (re-)created database",
55
+ help: "import SQL dumps into PostgreSQL",
44
56
  name: "import",
45
57
  type: Boolean,
46
58
  },
47
59
  {
48
60
  alias: "S",
49
- help: "sudo psql commands with given user",
61
+ help: "sudo psql commands with given user (example: --sudo postgres)",
50
62
  name: "sudo",
51
63
  type: String,
52
64
  },
@@ -59,99 +71,473 @@ const optionsDefinitions = [
59
71
  ];
60
72
  const options = commandLineArgs(optionsDefinitions);
61
73
  const streamPipeline = promisify(pipeline);
74
+ const stagingServerName = "staging_server";
75
+ function isIncrementalImport(options) {
76
+ return options["incremental"] === true;
77
+ }
78
+ function connectionEnv(connection) {
79
+ return {
80
+ ...process.env,
81
+ PGDATABASE: connection.name,
82
+ PGHOST: connection.host,
83
+ PGPASSWORD: connection.password,
84
+ PGPORT: String(connection.port),
85
+ PGUSER: connection.user,
86
+ };
87
+ }
88
+ function escapeSqlLiteral(value) {
89
+ return value.replace(/'/g, "''");
90
+ }
91
+ function sleep(delayMs) {
92
+ return new Promise((resolve) => setTimeout(resolve, delayMs));
93
+ }
94
+ function getExecSyncErrorOutput(error) {
95
+ const execError = error;
96
+ return `${execError.stderr ?? ""}\n${execError.stdout ?? ""}`;
97
+ }
98
+ function isRetryablePostgresError(error) {
99
+ const output = getExecSyncErrorOutput(error);
100
+ return (output.includes("deadlock detected") ||
101
+ output.includes("could not obtain lock") ||
102
+ output.includes("canceling statement due to lock timeout") ||
103
+ output.includes("could not serialize access"));
104
+ }
105
+ function canReuseExistingStagingDatabase(error) {
106
+ const output = getExecSyncErrorOutput(error);
107
+ return (output.includes("permission denied to terminate process") ||
108
+ output.includes("database is being accessed by other users") ||
109
+ output.includes("cannot drop the currently open database"));
110
+ }
111
+ function isMissingForeignServerError(error, serverName) {
112
+ return getExecSyncErrorOutput(error).includes(`server "${serverName}" does not exist`);
113
+ }
114
+ async function runWithRetry(operation, options, retryOptions) {
115
+ let attempt = 1;
116
+ let delayMs = retryOptions.delayMs;
117
+ while (true) {
118
+ try {
119
+ return operation();
120
+ }
121
+ catch (error) {
122
+ if (!isRetryablePostgresError(error) || attempt >= retryOptions.attempts) {
123
+ throw error;
124
+ }
125
+ if (!options["silent"]) {
126
+ console.warn(`${retryOptions.label} hit a transient PostgreSQL lock error ` +
127
+ `on attempt ${attempt}/${retryOptions.attempts}; retrying in ${delayMs}ms...`);
128
+ }
129
+ await sleep(delayMs);
130
+ attempt += 1;
131
+ delayMs *= 2;
132
+ }
133
+ }
134
+ }
135
+ function buildPsqlInvocation(baseArgs, connection, options) {
136
+ const psqlArgs = [
137
+ "--quiet",
138
+ "-h",
139
+ connection.host,
140
+ "-p",
141
+ String(connection.port),
142
+ "-U",
143
+ connection.user,
144
+ "-d",
145
+ connection.name,
146
+ ...baseArgs,
147
+ ];
148
+ if (!options["sudo"]) {
149
+ return { command: "psql", args: psqlArgs };
150
+ }
151
+ return {
152
+ command: "sudo",
153
+ args: ["-u", options["sudo"], "psql", ...psqlArgs],
154
+ };
155
+ }
156
+ function runPsqlFile(sqlFilePath, dataDir, options, connection, stopOnError = true) {
157
+ const { command, args } = buildPsqlInvocation([...(stopOnError ? ["-v", "ON_ERROR_STOP=1"] : []), "-f", sqlFilePath], connection, options);
158
+ execFileSync(command, args, {
159
+ cwd: dataDir,
160
+ env: connectionEnv(connection),
161
+ encoding: "utf-8",
162
+ stdio: ["ignore", "pipe", "pipe"],
163
+ });
164
+ }
165
+ function runPsqlCommand(command, dataDir, options, connection, stopOnError = true) {
166
+ const psqlCommand = command;
167
+ const { command: binary, args } = buildPsqlInvocation([...(stopOnError ? ["-v", "ON_ERROR_STOP=1"] : []), "-c", psqlCommand], connection, options);
168
+ execFileSync(binary, args, {
169
+ cwd: dataDir,
170
+ env: connectionEnv(connection),
171
+ encoding: "utf-8",
172
+ stdio: ["ignore", "pipe", "pipe"],
173
+ });
174
+ }
175
+ function runPsqlQuery(command, dataDir, options, connection, stopOnError = true) {
176
+ const psqlCommand = command;
177
+ const { command: binary, args } = buildPsqlInvocation([...(stopOnError ? ["-v", "ON_ERROR_STOP=1"] : []), "-At", "-c", psqlCommand], connection, options);
178
+ return execFileSync(binary, args, {
179
+ cwd: dataDir,
180
+ env: connectionEnv(connection),
181
+ encoding: "utf-8",
182
+ stdio: ["ignore", "pipe", "pipe"],
183
+ });
184
+ }
185
+ function ensureStagingSchemaHasTables(dataset, dataDir, options, connection) {
186
+ const stagingSchema = stagingSchemaName(dataset.database);
187
+ const tableCount = Number.parseInt(runPsqlQuery(`SELECT count(*) FROM pg_tables WHERE schemaname = '${escapeSqlLiteral(stagingSchema)}'`, dataDir, options, connection).trim(), 10);
188
+ assert(tableCount > 0, `Staging schema ${stagingSchema} is empty after importing ${dataset.database}. ` +
189
+ `Aborting incremental merge to protect ${senatSchemaName}.`);
190
+ }
191
+ function ensureStagingDatabase(dataDir, options, runtime) {
192
+ const maintenanceDb = process.env["PGDATABASE"] || "postgres";
193
+ const maintenanceConnection = {
194
+ ...runtime.staging,
195
+ name: maintenanceDb,
196
+ };
197
+ try {
198
+ runPsqlCommand(`SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${runtime.staging.name.replace(/'/g, "''")}' AND pid <> pg_backend_pid();`, dataDir, options, maintenanceConnection, false);
199
+ runPsqlCommand(`DROP DATABASE IF EXISTS ${runtime.staging.name}`, dataDir, options, maintenanceConnection, false);
200
+ runPsqlCommand(`CREATE DATABASE ${runtime.staging.name} WITH OWNER ${runtime.staging.user}`, dataDir, options, maintenanceConnection);
201
+ }
202
+ catch (error) {
203
+ if (!canReuseExistingStagingDatabase(error)) {
204
+ throw error;
205
+ }
206
+ if (!options["silent"]) {
207
+ console.warn(`Could not recreate staging database ${runtime.staging.name}; reusing the existing database instead.`);
208
+ }
209
+ ensureDatabaseExists(runtime.staging, dataDir, options);
210
+ }
211
+ }
212
+ function ensureDatabaseExists(connection, dataDir, options) {
213
+ const maintenanceDb = process.env["PGDATABASE"] || "postgres";
214
+ const maintenanceConnection = {
215
+ ...connection,
216
+ name: maintenanceDb,
217
+ };
218
+ const exists = runPsqlQuery(`SELECT 1 FROM pg_database WHERE datname = '${escapeSqlLiteral(connection.name)}'`, dataDir, options, maintenanceConnection).trim();
219
+ if (exists === "1") {
220
+ return;
221
+ }
222
+ runPsqlCommand(`CREATE DATABASE ${connection.name} WITH OWNER ${connection.user}`, dataDir, options, maintenanceConnection);
223
+ }
224
+ function ensureForeignStagingServer(dataDir, options, runtime) {
225
+ runPsqlCommand("CREATE EXTENSION IF NOT EXISTS postgres_fdw", dataDir, options, runtime.target);
226
+ runPsqlCommand(`DROP SERVER IF EXISTS ${stagingServerName} CASCADE`, dataDir, options, runtime.target, false);
227
+ runPsqlCommand([
228
+ `CREATE SERVER ${stagingServerName}`,
229
+ "FOREIGN DATA WRAPPER postgres_fdw",
230
+ [
231
+ "OPTIONS (",
232
+ `host '${escapeSqlLiteral(runtime.staging.host)}', `,
233
+ `dbname '${escapeSqlLiteral(runtime.staging.name)}', `,
234
+ `port '${escapeSqlLiteral(String(runtime.staging.port))}'`,
235
+ ")",
236
+ ].join(""),
237
+ ].join(" "), dataDir, options, runtime.target);
238
+ runPsqlCommand([
239
+ `CREATE USER MAPPING FOR CURRENT_USER SERVER ${stagingServerName}`,
240
+ [
241
+ "OPTIONS (",
242
+ `user '${escapeSqlLiteral(runtime.staging.user)}', `,
243
+ `password '${escapeSqlLiteral(runtime.staging.password)}'`,
244
+ ")",
245
+ ].join(""),
246
+ ].join(" "), dataDir, options, runtime.target);
247
+ }
248
+ function cleanupForeignStagingServer(dataDir, options, runtime) {
249
+ runPsqlCommand(`DROP SERVER IF EXISTS ${stagingServerName} CASCADE`, dataDir, options, runtime.target, false);
250
+ }
251
+ function mountForeignStagingSchema(dataset, dataDir, options, runtime) {
252
+ const stagingSchema = stagingSchemaName(dataset.database);
253
+ const importForeignSchemaCommand = `IMPORT FOREIGN SCHEMA ${stagingSchema} ` + `FROM SERVER ${stagingServerName} INTO ${stagingSchema}`;
254
+ runPsqlCommand(`DROP SCHEMA IF EXISTS ${stagingSchema} CASCADE`, dataDir, options, runtime.target, false);
255
+ runPsqlCommand(`CREATE SCHEMA ${stagingSchema}`, dataDir, options, runtime.target);
256
+ try {
257
+ runPsqlCommand(importForeignSchemaCommand, dataDir, options, runtime.target);
258
+ }
259
+ catch (error) {
260
+ if (!isMissingForeignServerError(error, stagingServerName)) {
261
+ throw error;
262
+ }
263
+ if (!options["silent"]) {
264
+ console.warn(`Foreign server ${stagingServerName} disappeared before schema import; recreating it.`);
265
+ }
266
+ ensureForeignStagingServer(dataDir, options, runtime);
267
+ runPsqlCommand(importForeignSchemaCommand, dataDir, options, runtime.target);
268
+ }
269
+ }
270
+ function ensureSchemaVersionTable(dataDir, options, runtime) {
271
+ runPsqlCommand(buildEnsureSchemaVersionTableSql(senatSchemaName), dataDir, options, runtime.target);
272
+ }
273
+ function getSchemaStructureFingerprint(dataDir, options, runtime) {
274
+ return runPsqlQuery(buildSchemaStructureFingerprintQuery(senatSchemaName), dataDir, options, runtime.target).trim();
275
+ }
276
+ function getSchemaVersionNumber(dataDir, options, runtime) {
277
+ const version = runPsqlQuery(`SELECT number FROM ${senatSchemaName}.version`, dataDir, options, runtime.target).trim();
278
+ return Number.parseInt(version, 10);
279
+ }
280
+ function bumpSchemaVersionIfNeeded(previousFingerprint, dataDir, options, runtime) {
281
+ const currentFingerprint = getSchemaStructureFingerprint(dataDir, options, runtime);
282
+ if (currentFingerprint !== previousFingerprint) {
283
+ runPsqlCommand(buildIncrementSchemaVersionSql(senatSchemaName), dataDir, options, runtime.target);
284
+ }
285
+ if (!options["silent"]) {
286
+ const versionNumber = getSchemaVersionNumber(dataDir, options, runtime);
287
+ if (currentFingerprint !== previousFingerprint) {
288
+ console.log(`Incremented ${senatSchemaName}.version to ${versionNumber} after schema structure change.`);
289
+ }
290
+ else {
291
+ console.log(`Current ${senatSchemaName}.version: ${versionNumber}.`);
292
+ }
293
+ }
294
+ }
295
+ function finalizeDatasetImport(dataset, dataDir, options, runtime) {
296
+ const postImportFilePath = path.join(dataDir, `${dataset.database}_post_import.sql`);
297
+ const postImportSql = buildIncrementalDatasetImportSql(dataset.database, runtime.target.user, dataset.mergeKeys, dataset.rowMultisetMergeTables);
298
+ const stagingSchema = stagingSchemaName(dataset.database);
299
+ fs.writeFileSync(postImportFilePath, postImportSql, { encoding: "utf8" });
300
+ if (isIncrementalImport(options)) {
301
+ mountForeignStagingSchema(dataset, dataDir, options, runtime);
302
+ }
303
+ runPsqlFile(postImportFilePath, dataDir, options, runtime.target);
304
+ runPsqlCommand(`DROP SCHEMA IF EXISTS ${stagingSchema} CASCADE`, dataDir, options, runtime.target, false);
305
+ }
306
+ function applyStagingMetadataToTarget(dataset, dataDir, options, runtime) {
307
+ const stagingSchema = stagingSchemaName(dataset.database);
308
+ const encodedStatements = runPsqlQuery(buildExportStagingMetadataStatementsQuery(stagingSchema, senatSchemaName), dataDir, options, runtime.staging)
309
+ .split("\n")
310
+ .map((line) => line.trim())
311
+ .filter((line) => line.length > 0);
312
+ for (const encodedStatement of encodedStatements) {
313
+ const statement = Buffer.from(encodedStatement, "hex").toString("utf8");
314
+ runPsqlCommand(statement, dataDir, options, runtime.target);
315
+ }
316
+ }
317
+ function listTablesInSchema(schemaName, dataDir, options, connection) {
318
+ const query = ["SELECT tablename", "FROM pg_tables", `WHERE schemaname = '${schemaName}'`, "ORDER BY tablename"].join("\n");
319
+ const output = runPsqlQuery(query, dataDir, options, connection);
320
+ return output
321
+ .split("\n")
322
+ .map((tableName) => tableName.trim())
323
+ .filter((tableName) => tableName.length > 0);
324
+ }
325
+ function createManagedIndexesInStaging(dataset, dataDir, options, runtime) {
326
+ if (!dataset.indexes) {
327
+ return;
328
+ }
329
+ const stagingSchema = stagingSchemaName(dataset.database);
330
+ const importedTables = new Set(listTablesInSchema(stagingSchema, dataDir, options, runtime.staging));
331
+ runPsqlCommand(`CREATE SCHEMA IF NOT EXISTS ${stagingSchema};`, dataDir, options, runtime.staging);
332
+ for (const [table, indexes] of Object.entries(dataset.indexes)) {
333
+ if (!importedTables.has(table)) {
334
+ if (!options["silent"]) {
335
+ console.warn(`Skipping managed indexes for missing table ${stagingSchema}.${table}`);
336
+ }
337
+ continue;
338
+ }
339
+ for (const index of indexes) {
340
+ const indexName = prefixedName(dataset.database, `${table}_${index.name}`);
341
+ const columns = index.columns.join(", ");
342
+ const sql = `CREATE INDEX IF NOT EXISTS ${indexName} ON ${stagingSchema}.${table} (${columns});`;
343
+ try {
344
+ runPsqlCommand(sql, dataDir, options, runtime.staging);
345
+ }
346
+ catch (err) {
347
+ console.error(`Failed to create managed index ${indexName} on ${stagingSchema}.${table}:`, err);
348
+ continue;
349
+ }
350
+ if (!options["silent"]) {
351
+ console.log(`Prepared managed index ${indexName} on ${stagingSchema}.${table}`);
352
+ }
353
+ }
354
+ }
355
+ }
356
+ function listPrefixedTables(dataset, dataDir, options, runtime) {
357
+ const prefix = `${dataset.database}_`;
358
+ const query = [
359
+ "SELECT tablename",
360
+ "FROM pg_tables",
361
+ `WHERE schemaname = '${senatSchemaName}'`,
362
+ ` AND tablename LIKE '${prefix.replace(/_/g, "\\_")}%' ESCAPE '\\'`,
363
+ "ORDER BY tablename",
364
+ ].join("\n");
365
+ const output = runPsqlQuery(query, dataDir, options, runtime.target);
366
+ return output
367
+ .split("\n")
368
+ .map((tableName) => tableName.trim())
369
+ .filter((tableName) => tableName.length > 0);
370
+ }
371
+ function toPascalCase(text) {
372
+ return text.replace(/(^|_)([a-z0-9])/gi, (_match, _separator, letter) => letter.toUpperCase());
373
+ }
374
+ function toIdentifierName(tableName, columnName) {
375
+ return `${toPascalCase(tableName)}${toPascalCase(columnName)}`;
376
+ }
377
+ function trimComment(comment) {
378
+ const trimmed = comment?.trim();
379
+ return trimmed ? trimmed : undefined;
380
+ }
381
+ async function generateRawTypes(dataset, runtime, prefixedTables) {
382
+ await fs.ensureDir(rawTypesDir);
383
+ const definitionFilePath = getGeneratedDefinitionPath(dataset.database);
384
+ const manifestFilePath = getGeneratedTableManifestPath(dataset.database);
385
+ const datasetPrefix = `${dataset.database}_`;
386
+ const datasetOutputPath = path.join(rawTypesDir, dataset.database);
387
+ const generateZodSchemas = makeGenerateZodSchemas({
388
+ castToSchema: true,
389
+ getZodIdentifierMetadata: (column, details) => ({
390
+ name: `${toIdentifierName(stripDatasetPrefix(details.name, dataset.database), column.name)}Schema`,
391
+ }),
392
+ getZodSchemaMetadata: (details, generateFor) => {
393
+ const baseName = toPascalCase(stripDatasetPrefix(details.name, dataset.database));
394
+ const suffix = generateFor === "selector" || generateFor === undefined ? "" : toPascalCase(generateFor);
395
+ return {
396
+ name: `${baseName}${suffix}Schema`,
397
+ path: datasetOutputPath,
398
+ };
399
+ },
400
+ });
401
+ await fs.remove(definitionFilePath);
402
+ await fs.remove(manifestFilePath);
403
+ await processDatabase({
404
+ connection: {
405
+ database: runtime.target.name,
406
+ host: runtime.target.host,
407
+ password: runtime.target.password,
408
+ port: runtime.target.port,
409
+ user: runtime.target.user,
410
+ },
411
+ filter: (pgType) => pgType.schemaName === senatSchemaName && pgType.name.startsWith(datasetPrefix),
412
+ generators: [
413
+ makePgTsGenerator({
414
+ filter: (pgType) => pgType.schemaName === senatSchemaName && pgType.name.startsWith(datasetPrefix),
415
+ generateIdentifierType: (column, details, builtinType) => {
416
+ const tableName = stripDatasetPrefix(details.name, dataset.database);
417
+ return {
418
+ ...builtinType,
419
+ comment: undefined,
420
+ name: toIdentifierName(tableName, column.name),
421
+ typeDefinition: builtinType.typeDefinition.map((typeDefinition) => typeDefinition.replace(/ & \{ __flavor\?: '[^']+' \}/g, "").replace(/ & \{ __brand: '[^']+' \}/g, "")),
422
+ };
423
+ },
424
+ getMetadata: (details, generateFor, builtinMetadata) => {
425
+ const baseName = toPascalCase(stripDatasetPrefix(details.name, dataset.database));
426
+ const suffix = generateFor === "selector" || generateFor === undefined ? "" : toPascalCase(generateFor);
427
+ const tableComment = trimComment(details.comment);
428
+ return {
429
+ ...builtinMetadata,
430
+ comment: tableComment ? [tableComment] : undefined,
431
+ exportAs: "named",
432
+ name: `${baseName}${suffix}`,
433
+ path: datasetOutputPath,
434
+ };
435
+ },
436
+ getPropertyMetadata: (property, _details, generateFor, builtinMetadata) => {
437
+ const comment = trimComment(property.comment);
438
+ const defaultComment = generateFor === "initializer" && property.defaultValue !== null && property.defaultValue !== undefined
439
+ ? `Default value: ${property.defaultValue}`
440
+ : undefined;
441
+ const comments = [comment, defaultComment].filter((value) => value !== undefined);
442
+ return {
443
+ ...builtinMetadata,
444
+ comment: comments.length > 0 ? comments : undefined,
445
+ };
446
+ },
447
+ preRenderHooks: [generateZodSchemas],
448
+ }),
449
+ ],
450
+ outputPath: rawTypesDir,
451
+ postRenderHooks: [markAsGenerated, formatWithPrettier],
452
+ schemaNames: [senatSchemaName],
453
+ typescriptConfig: {
454
+ enumStyle: "literal-union",
455
+ tsModuleFormat: "esm",
456
+ },
457
+ });
458
+ const generatedDefinition = await fs.readFile(definitionFilePath, { encoding: "utf8" });
459
+ await fs.writeFile(definitionFilePath, generatedDefinition.replace(/\r\n/g, "\n"));
460
+ await fs.writeFile(manifestFilePath, buildGeneratedTableManifest(dataset.database, prefixedTables));
461
+ }
62
462
  async function downloadFile(url, dest) {
63
463
  const response = await fetch(url);
64
464
  if (!response.ok) {
65
465
  throw new Error(`Download failed ${response.status} ${response.statusText} for ${url}`);
66
466
  }
67
- await streamPipeline(response.body, fs.createWriteStream(dest));
467
+ assert(response.body, `Empty response body for ${url}`);
468
+ await streamPipeline(Readable.fromWeb(response.body), fs.createWriteStream(dest));
68
469
  }
69
- /**
70
- * Copy a dataset database to the main Senat database (overwriting its contents).
71
- */
72
- async function copyToSenat(dataset, dataDir, options) {
470
+ async function importIntoStaging(dataset, dataDir, options, runtime) {
73
471
  if (!options["silent"]) {
74
- console.log(`Copying ${dataset.database} to Senat database...`);
472
+ console.log(`Importing ${dataset.database} into staging database ${runtime.staging.name}...`);
75
473
  }
76
474
  const sqlFilePath = path.join(dataDir, `${dataset.database}.sql`);
77
475
  const schemaDumpFile = path.join(dataDir, `${dataset.database}_schema_dump.sql`);
78
- // Write the header and then stream the rest of the SQL file
476
+ const normalizeSqlFile = path.join(dataDir, `${dataset.database}_normalize_staging.sql`);
477
+ const stagingSchema = stagingSchemaName(dataset.database);
79
478
  const schemaSqlWriter = fs.createWriteStream(schemaDumpFile, { encoding: "utf8" });
80
- // Add CREATE SCHEMA statement at the top
81
- schemaSqlWriter.write(`DROP SCHEMA IF EXISTS ${dataset.database} CASCADE;\n`);
82
- schemaSqlWriter.write(`CREATE SCHEMA IF NOT EXISTS ${dataset.database};\n`);
83
- schemaSqlWriter.write(`GRANT USAGE ON SCHEMA ${dataset.database} TO ${config.db.user};\n`);
84
- schemaSqlWriter.write(`GRANT SELECT ON ALL TABLES IN SCHEMA ${dataset.database} TO ${config.db.user};\n`);
85
- schemaSqlWriter.write(`ALTER DEFAULT PRIVILEGES IN SCHEMA ${dataset.database} GRANT SELECT ON TABLES TO ${config.db.user};\n`);
479
+ schemaSqlWriter.write(`DROP SCHEMA IF EXISTS ${stagingSchema} CASCADE;\n`);
480
+ schemaSqlWriter.write(`CREATE SCHEMA IF NOT EXISTS ${stagingSchema};\n`);
86
481
  const lineReader = readline.createInterface({
87
482
  input: fs.createReadStream(sqlFilePath, { encoding: "utf8" }),
88
483
  crlfDelay: Infinity,
89
484
  });
485
+ let inCopyData = false;
90
486
  for await (const line of lineReader) {
91
- let newLine = line;
92
- // Replace 'public' schema outside single-quoted strings
93
- function replacePublicOutsideStrings(line, schema) {
94
- const parts = line.split(/(')/);
95
- let inString = false;
96
- for (let i = 0; i < parts.length; i++) {
97
- if (parts[i] === "'") {
98
- inString = !inString;
99
- }
100
- else if (!inString) {
101
- // Only replace outside of strings, including before comma
102
- parts[i] = parts[i].replace(/\bpublic\b(?=(\s*\.|\s*[,;]|\s|$))/g, schema);
103
- }
487
+ let newLine = rewriteLineForStagingImport(line, dataset, stagingSchema, inCopyData);
488
+ if (!inCopyData) {
489
+ newLine = newLine.replace(/SET client_encoding = 'LATIN1';/i, "SET client_encoding = 'UTF8';");
490
+ if (isCopyFromStdinLine(newLine)) {
491
+ inCopyData = true;
104
492
  }
105
- return parts.join("");
106
493
  }
107
- newLine = replacePublicOutsideStrings(line, dataset.database);
108
- // Replace SET client_encoding to UTF8
109
- newLine = newLine.replace(/SET client_encoding = 'LATIN1';/i, "SET client_encoding = 'UTF8';");
494
+ else if (line === "\\.") {
495
+ inCopyData = false;
496
+ }
110
497
  schemaSqlWriter.write(newLine + "\n");
111
498
  }
112
499
  schemaSqlWriter.end();
113
500
  await new Promise((resolve, reject) => {
114
501
  schemaSqlWriter.on("finish", () => {
115
502
  try {
116
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d senat -f ${schemaDumpFile}`, {
117
- env: process.env,
118
- encoding: "utf-8",
119
- stdio: ["ignore", "pipe", "pipe"],
120
- });
503
+ runPsqlFile(schemaDumpFile, dataDir, options, runtime.staging, false);
504
+ createManagedIndexesInStaging(dataset, dataDir, options, runtime);
505
+ fs.writeFileSync(normalizeSqlFile, buildNormalizeStagingSchemaSql(dataset.database), { encoding: "utf8" });
506
+ runWithRetry(() => runPsqlFile(normalizeSqlFile, dataDir, options, runtime.staging), options, {
507
+ attempts: 4,
508
+ delayMs: 500,
509
+ label: `Staging normalization for ${dataset.database}`,
510
+ })
511
+ .then(() => {
512
+ ensureStagingSchemaHasTables(dataset, dataDir, options, runtime.staging);
513
+ resolve();
514
+ })
515
+ .catch(reject);
121
516
  }
122
517
  catch (error) {
518
+ const execError = error;
123
519
  if (!options["silent"]) {
124
- console.error(`Failed to import ${dataset.database} schema:`);
125
- if (error.stderr) {
126
- console.error(error.stderr);
520
+ console.error(`Failed to import ${dataset.database} data into staging:`);
521
+ if (execError.stderr) {
522
+ console.error(execError.stderr);
127
523
  }
128
- if (error.stdout) {
129
- console.error(error.stdout);
524
+ if (execError.stdout) {
525
+ console.error(execError.stdout);
130
526
  }
131
527
  }
528
+ reject(error);
132
529
  }
133
- resolve();
134
530
  });
135
531
  schemaSqlWriter.on("error", reject);
136
532
  });
137
533
  }
138
- async function retrieveDataset(dataDir, dataset) {
534
+ async function retrieveDataset(dataDir, dataset, options, runtime) {
139
535
  const zipFilename = dataset.url.substring(dataset.url.lastIndexOf("/") + 1);
140
536
  const zipFilePath = path.join(dataDir, zipFilename);
141
537
  if (options["all"] || options["fetch"]) {
142
- // Fetch & save ZIP file.
143
538
  if (!options["silent"]) {
144
- console.log(`Loading ${dataset.title}: ${zipFilename}…`);
539
+ console.log(`Loading ${dataset.title}: ${zipFilename}...`);
145
540
  }
146
- // Fetch fails with OpenSSL error: dh key too small.
147
- // (so does "curl").
148
- // const response = await fetch(dataset.url)
149
- // if (!response.ok) {
150
- // console.error(response.status, response.statusText)
151
- // console.error(await response.text())
152
- // throw new Error(`Fetch failed: ${dataset.url}`)
153
- // }
154
- // await pipeline(response.body!, fs.createWriteStream(zipFilePath))
155
541
  fs.removeSync(zipFilePath);
156
542
  await downloadFile(dataset.url, zipFilePath);
157
543
  }
@@ -159,7 +545,7 @@ async function retrieveDataset(dataDir, dataset) {
159
545
  const sqlFilePath = path.join(dataDir, sqlFilename);
160
546
  if (options["all"] || options["unzip"]) {
161
547
  if (!options["silent"]) {
162
- console.log(`Unzipping ${dataset.title}: ${zipFilename}…`);
548
+ console.log(`Unzipping ${dataset.title}: ${zipFilename}...`);
163
549
  }
164
550
  fs.removeSync(sqlFilePath);
165
551
  const zip = new StreamZip({
@@ -168,7 +554,7 @@ async function retrieveDataset(dataDir, dataset) {
168
554
  });
169
555
  await new Promise((resolve, reject) => {
170
556
  zip.on("ready", () => {
171
- zip.extract(null, dataDir, (err, _count) => {
557
+ zip.extract(null, dataDir, (err) => {
172
558
  zip.close();
173
559
  if (err) {
174
560
  reject(err);
@@ -181,131 +567,106 @@ async function retrieveDataset(dataDir, dataset) {
181
567
  });
182
568
  if (dataset.repairZip !== undefined) {
183
569
  if (!options["silent"]) {
184
- console.log(`Repairing Zip path ${dataset.title}: ${sqlFilename}…`);
570
+ console.log(`Repairing Zip path ${dataset.title}: ${sqlFilename}...`);
185
571
  }
186
572
  dataset.repairZip(dataset, dataDir);
187
573
  }
188
574
  }
189
575
  if ((options["all"] || options["repairEncoding"]) && dataset.repairEncoding) {
190
576
  if (!options["silent"]) {
191
- console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}…`);
577
+ console.log(`Repairing Windows CP1252 encoding in ${dataset.title}: ${sqlFilename}...`);
192
578
  }
193
579
  const repairedSqlFilePath = sqlFilePath + ".repaired";
194
580
  const repairedSqlWriter = fs.createWriteStream(repairedSqlFilePath, {
195
581
  encoding: "utf8",
196
582
  });
197
- // Read the file as latin1 (ISO-8859-1/CP1252) and write as UTF-8
198
583
  const lineReader = readline.createInterface({
199
584
  input: fs.createReadStream(sqlFilePath, { encoding: "latin1" }),
200
585
  crlfDelay: Infinity,
201
586
  });
202
587
  for await (const line of lineReader) {
203
- // Optionally repair Windows-1252 control characters
204
- let repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
588
+ const repairedLine = line.replace(badWindows1252CharacterRegex, (match) => windows1252.decode(match, { mode: "fatal" }));
205
589
  repairedSqlWriter.write(repairedLine + "\n");
206
590
  }
207
591
  repairedSqlWriter.end();
208
592
  await fs.move(repairedSqlFilePath, sqlFilePath, { overwrite: true });
209
593
  }
210
- if (options["all"] || options["import"] || options["schema"]) {
594
+ if (options["all"] || options["import"]) {
211
595
  if (!options["silent"]) {
212
- console.log(`Importing ${dataset.title}: ${sqlFilename}…`);
596
+ console.log(`Merging ${dataset.title}: ${sqlFilename} into ${runtime.target.name}.${senatSchemaName}...`);
213
597
  }
214
- await copyToSenat(dataset, dataDir, options);
215
- // Create indexes programmatically after import
216
- if (dataset.indexes) {
217
- for (const [table, indexes] of Object.entries(dataset.indexes)) {
218
- for (const index of indexes) {
219
- const indexName = index.name;
220
- const columns = index.columns.join(", ");
221
- const schema = dataset.database;
222
- const sql = `CREATE INDEX IF NOT EXISTS ${indexName} ON ${schema}.${table} (${columns});`;
223
- try {
224
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -d senat -c "${sql}"`, {
225
- env: process.env,
226
- encoding: "utf-8",
227
- stdio: ["ignore", "ignore", "pipe"],
228
- });
229
- if (!options["silent"]) {
230
- console.log(`Created index: ${indexName} on ${schema}.${table} (${columns})`);
231
- }
232
- }
233
- catch (err) {
234
- console.error(`Failed to create index ${indexName} on ${schema}.${table}:`, err);
235
- }
236
- }
237
- }
598
+ await importIntoStaging(dataset, dataDir, options, runtime);
599
+ finalizeDatasetImport(dataset, dataDir, options, runtime);
600
+ if (isIncrementalImport(options)) {
601
+ applyStagingMetadataToTarget(dataset, dataDir, options, runtime);
238
602
  }
239
603
  }
240
604
  if (options["schema"]) {
241
- let definitionsDir = path.resolve("src", "raw_types_schemats");
242
- assert(fs.statSync(definitionsDir).isDirectory());
605
+ await fs.ensureDir(rawTypesDir);
243
606
  if (!options["silent"]) {
244
- console.log(`Creating TypeScript definitions from schema '${dataset.database}' in database 'senat'…`);
607
+ console.log(`Creating TypeScript definitions from prefixed ${senatSchemaName} tables ` +
608
+ `for '${dataset.database}' in database '${runtime.target.name}'...`);
245
609
  }
246
- const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}/senat`;
247
- let definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
248
- execSync(`npx schemats generate -c ${dbConnectionString} -s ${dataset.database} -o ${definitionFilePath}`, {
249
- // cwd: dataDir,
250
- env: process.env,
251
- encoding: "utf-8",
252
- // stdio: ["ignore", "ignore", "pipe"],
253
- });
254
- const definition = fs.readFileSync(definitionFilePath, { encoding: "utf8" });
255
- const definitionRepaired = definition
256
- .replace(/\r\n/g, "\n")
257
- .replace(/AUTO-GENERATED FILE @ \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}/, "AUTO-GENERATED FILE");
258
- fs.writeFileSync(definitionFilePath, definitionRepaired);
259
- definitionsDir = path.resolve("src", "raw_types");
260
- definitionFilePath = path.join(definitionsDir, `${dataset.database}.ts`);
261
- execSync(`npx kysely-codegen --url '${dbConnectionString}' --default-schema ${dataset.database} --include-pattern '${dataset.database}.*' --out-file ${definitionFilePath}`, {
262
- env: process.env,
263
- encoding: "utf-8",
264
- // stdio: ["ignore", "ignore", "pipe"],
265
- });
610
+ const prefixedTables = listPrefixedTables(dataset, dataDir, options, runtime);
611
+ await generateRawTypes(dataset, runtime, prefixedTables);
266
612
  }
267
613
  }
614
+ function buildRuntimeContext() {
615
+ const target = {
616
+ host: config.db.host,
617
+ name: config.db.name,
618
+ password: config.db.password,
619
+ port: config.db.port,
620
+ user: config.db.user,
621
+ };
622
+ if (!isIncrementalImport(options)) {
623
+ return {
624
+ staging: target,
625
+ target,
626
+ };
627
+ }
628
+ const staging = {
629
+ host: config.stagingDb.host,
630
+ name: config.stagingDb.name,
631
+ password: config.stagingDb.password,
632
+ port: config.stagingDb.port,
633
+ user: config.stagingDb.user,
634
+ };
635
+ return { staging, target };
636
+ }
268
637
  async function retrieveOpenData() {
269
638
  const dataDir = options["dataDir"];
270
- assert(dataDir, "Missing argument: data directory");
271
- process.env = {
272
- ...process.env,
273
- PGHOST: process.env["PGHOST"] || config.db.host,
274
- PGPORT: process.env["PGPORT"] || String(config.db.port),
275
- PGDATABASE: process.env["PGDATABASE"] || config.db.name,
276
- PGUSER: process.env["PGUSER"] || config.db.user,
277
- PGPASSWORD: process.env["PGPASSWORD"] || config.db.password,
278
- };
279
- assert(process.env["PGHOST"] && process.env["PGPORT"] && process.env["PGUSER"] && process.env["PGPASSWORD"], "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
280
- console.time("data extraction time");
281
- // Create role 'opendata' if it does not exist
282
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE ROLE opendata" || true`, {
283
- cwd: dataDir,
284
- env: process.env,
285
- encoding: "utf-8",
286
- });
287
- execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE DATABASE senat WITH OWNER opendata" || true`, {
288
- cwd: dataDir,
289
- env: process.env,
290
- encoding: "utf-8",
291
- });
292
- const enabledDatasets = getEnabledDatasets(options["categories"]);
293
- const chosenDatasets = getChosenDatasets(enabledDatasets);
294
- for (const dataset of chosenDatasets) {
295
- await retrieveDataset(dataDir, dataset);
639
+ assertExistingDirectory(dataDir, "data directory");
640
+ assert(!options["sudo"] || !options["sudo"].startsWith("-"), "Option --sudo expects a Unix user name, for example: --sudo postgres");
641
+ const runtime = buildRuntimeContext();
642
+ assert(runtime.target.host && runtime.target.port && runtime.target.user && runtime.target.password, "Missing target database configuration: DB_* in .env file");
643
+ if (isIncrementalImport(options)) {
644
+ assert(runtime.staging.host && runtime.staging.port && runtime.staging.user && runtime.staging.password, "Missing staging database configuration: STAGING_DB_* in .env file");
645
+ assert(runtime.target.name !== runtime.staging.name, "Target and staging databases must be different");
296
646
  }
297
- if (options["schema"]) {
298
- const dbConnectionString = `postgres://${process.env["PGUSER"]}:${process.env["PGPASSWORD"]}@${process.env["PGHOST"]}:${process.env["PGPORT"]}/senat`;
299
- const definitionsDir = path.resolve("src", "raw_types");
300
- const definitionFilePath = path.join(definitionsDir, "senat.ts");
301
- execSync(`npx kysely-codegen --url '${dbConnectionString}' --out-file ${definitionFilePath}`, {
302
- env: process.env,
303
- encoding: "utf-8",
304
- // stdio: ["ignore", "ignore", "pipe"],
305
- });
647
+ console.time("data extraction time");
648
+ try {
649
+ ensureDatabaseExists(runtime.target, dataDir, options);
650
+ if (isIncrementalImport(options)) {
651
+ ensureStagingDatabase(dataDir, options, runtime);
652
+ ensureForeignStagingServer(dataDir, options, runtime);
653
+ }
654
+ ensureSchemaVersionTable(dataDir, options, runtime);
655
+ const initialSchemaFingerprint = getSchemaStructureFingerprint(dataDir, options, runtime);
656
+ const enabledDatasets = getEnabledDatasets(options["categories"]);
657
+ const chosenDatasets = getChosenDatasets(enabledDatasets);
658
+ for (const dataset of chosenDatasets) {
659
+ await retrieveDataset(dataDir, dataset, options, runtime);
660
+ }
661
+ bumpSchemaVersionIfNeeded(initialSchemaFingerprint, dataDir, options, runtime);
306
662
  }
307
- if (!options["silent"]) {
308
- console.timeEnd("data extraction time");
663
+ finally {
664
+ if (isIncrementalImport(options)) {
665
+ cleanupForeignStagingServer(dataDir, options, runtime);
666
+ }
667
+ if (!options["silent"]) {
668
+ console.timeEnd("data extraction time");
669
+ }
309
670
  }
310
671
  }
311
672
  retrieveOpenData()