@tricoteuses/senat 2.22.16 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +168 -0
- package/lib/aggregates.d.ts +52 -0
- package/lib/aggregates.js +930 -0
- package/lib/aggregates.mjs +713 -0
- package/lib/aggregates.ts +833 -0
- package/lib/config.d.ts +10 -0
- package/lib/config.js +16 -0
- package/lib/config.mjs +16 -0
- package/lib/config.ts +26 -0
- package/lib/databases.d.ts +2 -0
- package/lib/databases.js +26 -0
- package/lib/databases.mjs +57 -0
- package/lib/databases.ts +71 -0
- package/lib/datasets.d.ts +34 -0
- package/lib/datasets.js +233 -0
- package/lib/datasets.mjs +78 -0
- package/lib/datasets.ts +118 -0
- package/lib/fields.d.ts +10 -0
- package/lib/fields.js +68 -0
- package/lib/fields.mjs +22 -0
- package/lib/fields.ts +29 -0
- package/lib/git.d.ts +26 -0
- package/lib/git.js +167 -0
- package/lib/index.d.ts +13 -0
- package/lib/index.js +1 -0
- package/lib/index.mjs +7 -0
- package/lib/index.ts +64 -0
- package/lib/inserters.d.ts +98 -0
- package/lib/inserters.js +500 -0
- package/lib/inserters.mjs +360 -0
- package/lib/inserters.ts +521 -0
- package/lib/legislatures.json +38 -0
- package/lib/loaders.d.ts +58 -0
- package/lib/loaders.js +286 -0
- package/lib/loaders.mjs +158 -0
- package/lib/loaders.ts +271 -0
- package/lib/model/agenda.d.ts +6 -0
- package/lib/model/agenda.js +148 -0
- package/lib/model/ameli.d.ts +51 -0
- package/lib/model/ameli.js +149 -0
- package/lib/model/ameli.mjs +84 -0
- package/lib/model/ameli.ts +100 -0
- package/lib/model/commission.d.ts +18 -0
- package/lib/model/commission.js +269 -0
- package/lib/model/debats.d.ts +67 -0
- package/lib/model/debats.js +95 -0
- package/lib/model/debats.mjs +43 -0
- package/lib/model/debats.ts +68 -0
- package/lib/model/documents.d.ts +12 -0
- package/lib/model/documents.js +151 -0
- package/lib/model/dosleg.d.ts +7 -0
- package/lib/model/dosleg.js +326 -0
- package/lib/model/dosleg.mjs +196 -0
- package/lib/model/dosleg.ts +240 -0
- package/lib/model/index.d.ts +7 -0
- package/lib/model/index.js +7 -0
- package/lib/model/index.mjs +5 -0
- package/lib/model/index.ts +15 -0
- package/lib/model/questions.d.ts +45 -0
- package/lib/model/questions.js +89 -0
- package/lib/model/questions.mjs +71 -0
- package/lib/model/questions.ts +93 -0
- package/lib/model/scrutins.d.ts +13 -0
- package/lib/model/scrutins.js +114 -0
- package/lib/model/seance.d.ts +3 -0
- package/lib/model/seance.js +267 -0
- package/lib/model/sens.d.ts +146 -0
- package/lib/model/sens.js +454 -0
- package/lib/model/sens.mjs +415 -0
- package/lib/model/sens.ts +516 -0
- package/lib/model/texte.d.ts +7 -0
- package/lib/model/texte.js +256 -0
- package/lib/model/texte.mjs +208 -0
- package/lib/model/texte.ts +229 -0
- package/lib/model/util.d.ts +9 -0
- package/lib/model/util.js +38 -0
- package/lib/model/util.mjs +19 -0
- package/lib/model/util.ts +32 -0
- package/lib/parsers/texte.d.ts +7 -0
- package/lib/parsers/texte.js +228 -0
- package/lib/raw_types/ameli.d.ts +914 -0
- package/lib/raw_types/ameli.js +5 -0
- package/lib/raw_types/ameli.mjs +163 -0
- package/lib/raw_types/debats.d.ts +207 -0
- package/lib/raw_types/debats.js +5 -0
- package/lib/raw_types/debats.mjs +58 -0
- package/lib/raw_types/dosleg.d.ts +1619 -0
- package/lib/raw_types/dosleg.js +5 -0
- package/lib/raw_types/dosleg.mjs +438 -0
- package/lib/raw_types/questions.d.ts +419 -0
- package/lib/raw_types/questions.js +5 -0
- package/lib/raw_types/questions.mjs +11 -0
- package/lib/raw_types/senat.d.ts +11368 -0
- package/lib/raw_types/senat.js +5 -0
- package/lib/raw_types/sens.d.ts +8248 -0
- package/lib/raw_types/sens.js +5 -0
- package/lib/raw_types/sens.mjs +508 -0
- package/lib/raw_types_kysely/ameli.d.ts +915 -0
- package/lib/raw_types_kysely/ameli.js +7 -0
- package/lib/raw_types_kysely/ameli.mjs +5 -0
- package/lib/raw_types_kysely/ameli.ts +951 -0
- package/lib/raw_types_kysely/debats.d.ts +207 -0
- package/lib/raw_types_kysely/debats.js +7 -0
- package/lib/raw_types_kysely/debats.mjs +5 -0
- package/lib/raw_types_kysely/debats.ts +222 -0
- package/lib/raw_types_kysely/dosleg.d.ts +3532 -0
- package/lib/raw_types_kysely/dosleg.js +7 -0
- package/lib/raw_types_kysely/dosleg.mjs +5 -0
- package/lib/raw_types_kysely/dosleg.ts +3621 -0
- package/lib/raw_types_kysely/questions.d.ts +414 -0
- package/lib/raw_types_kysely/questions.js +7 -0
- package/lib/raw_types_kysely/questions.mjs +5 -0
- package/lib/raw_types_kysely/questions.ts +426 -0
- package/lib/raw_types_kysely/sens.d.ts +4394 -0
- package/lib/raw_types_kysely/sens.js +7 -0
- package/lib/raw_types_kysely/sens.mjs +5 -0
- package/lib/raw_types_kysely/sens.ts +4499 -0
- package/lib/raw_types_schemats/ameli.d.ts +539 -0
- package/lib/raw_types_schemats/ameli.js +2 -0
- package/lib/raw_types_schemats/ameli.mjs +2 -0
- package/lib/raw_types_schemats/ameli.ts +601 -0
- package/lib/raw_types_schemats/debats.d.ts +127 -0
- package/lib/raw_types_schemats/debats.js +2 -0
- package/lib/raw_types_schemats/debats.mjs +2 -0
- package/lib/raw_types_schemats/debats.ts +145 -0
- package/lib/raw_types_schemats/dosleg.d.ts +977 -0
- package/lib/raw_types_schemats/dosleg.js +2 -0
- package/lib/raw_types_schemats/dosleg.mjs +2 -0
- package/lib/raw_types_schemats/dosleg.ts +2193 -0
- package/lib/raw_types_schemats/questions.d.ts +235 -0
- package/lib/raw_types_schemats/questions.js +2 -0
- package/lib/raw_types_schemats/questions.mjs +2 -0
- package/lib/raw_types_schemats/questions.ts +249 -0
- package/lib/raw_types_schemats/sens.d.ts +6915 -0
- package/lib/raw_types_schemats/sens.js +2 -0
- package/lib/raw_types_schemats/sens.mjs +2 -0
- package/lib/raw_types_schemats/sens.ts +2907 -0
- package/lib/scripts/convert_data.d.ts +1 -0
- package/lib/scripts/convert_data.js +354 -0
- package/lib/scripts/convert_data.mjs +181 -0
- package/lib/scripts/convert_data.ts +243 -0
- package/lib/scripts/data-download.d.ts +1 -0
- package/lib/scripts/data-download.js +12 -0
- package/lib/scripts/datautil.d.ts +8 -0
- package/lib/scripts/datautil.js +34 -0
- package/lib/scripts/datautil.mjs +16 -0
- package/lib/scripts/datautil.ts +19 -0
- package/lib/scripts/images/transparent_150x192.jpg +0 -0
- package/lib/scripts/images/transparent_155x225.jpg +0 -0
- package/lib/scripts/parse_textes.d.ts +1 -0
- package/lib/scripts/parse_textes.js +44 -0
- package/lib/scripts/parse_textes.mjs +46 -0
- package/lib/scripts/parse_textes.ts +65 -0
- package/lib/scripts/retrieve_agenda.d.ts +1 -0
- package/lib/scripts/retrieve_agenda.js +132 -0
- package/lib/scripts/retrieve_cr_commission.d.ts +1 -0
- package/lib/scripts/retrieve_cr_commission.js +364 -0
- package/lib/scripts/retrieve_cr_seance.d.ts +6 -0
- package/lib/scripts/retrieve_cr_seance.js +347 -0
- package/lib/scripts/retrieve_documents.d.ts +3 -0
- package/lib/scripts/retrieve_documents.js +219 -0
- package/lib/scripts/retrieve_documents.mjs +249 -0
- package/lib/scripts/retrieve_documents.ts +298 -0
- package/lib/scripts/retrieve_open_data.d.ts +1 -0
- package/lib/scripts/retrieve_open_data.js +315 -0
- package/lib/scripts/retrieve_open_data.mjs +217 -0
- package/lib/scripts/retrieve_open_data.ts +268 -0
- package/lib/scripts/retrieve_senateurs_photos.d.ts +1 -0
- package/lib/scripts/retrieve_senateurs_photos.js +147 -0
- package/lib/scripts/retrieve_senateurs_photos.mjs +147 -0
- package/lib/scripts/retrieve_senateurs_photos.ts +177 -0
- package/lib/scripts/retrieve_videos.d.ts +1 -0
- package/lib/scripts/retrieve_videos.js +461 -0
- package/lib/scripts/shared/cli_helpers.d.ts +95 -0
- package/lib/scripts/shared/cli_helpers.js +91 -0
- package/lib/scripts/shared/cli_helpers.ts +36 -0
- package/lib/scripts/shared/util.d.ts +4 -0
- package/lib/scripts/shared/util.js +35 -0
- package/lib/scripts/shared/util.ts +33 -0
- package/lib/scripts/test_iter_load.d.ts +1 -0
- package/lib/scripts/test_iter_load.js +12 -0
- package/lib/src/config.d.ts +22 -0
- package/lib/src/config.js +17 -7
- package/lib/src/conversion_textes.js +5 -1
- package/lib/src/databases.d.ts +2 -1
- package/lib/src/databases_postgres.d.ts +4 -0
- package/lib/src/databases_postgres.js +23 -0
- package/lib/src/datasets.d.ts +4 -0
- package/lib/src/datasets.js +16 -2
- package/lib/src/git.d.ts +1 -0
- package/lib/src/git.js +45 -11
- package/lib/src/index.d.ts +19 -8
- package/lib/src/index.js +6 -1
- package/lib/src/loaders.js +10 -4
- package/lib/src/model/agenda.js +2 -2
- package/lib/src/model/ameli.d.ts +64 -52
- package/lib/src/model/ameli.js +147 -145
- package/lib/src/model/ameli_postgres.d.ts +67 -0
- package/lib/src/model/ameli_postgres.js +150 -0
- package/lib/src/model/commission.d.ts +3 -2
- package/lib/src/model/commission.js +2 -2
- package/lib/src/model/debats.d.ts +38 -66
- package/lib/src/model/debats.js +110 -93
- package/lib/src/model/documents.d.ts +32 -12
- package/lib/src/model/documents.js +171 -130
- package/lib/src/model/dosleg.d.ts +142 -5
- package/lib/src/model/dosleg.js +298 -156
- package/lib/src/model/questions.d.ts +54 -45
- package/lib/src/model/questions.js +89 -87
- package/lib/src/model/scrutins.d.ts +48 -13
- package/lib/src/model/scrutins.js +118 -111
- package/lib/src/model/seance.js +3 -3
- package/lib/src/model/sens.d.ts +109 -179
- package/lib/src/model/sens.js +384 -484
- package/lib/src/model/util.d.ts +0 -8
- package/lib/src/model/util.js +0 -23
- package/lib/src/parsers/texte.js +7 -7
- package/lib/src/raw_types/ameli.d.ts +1651 -803
- package/lib/src/raw_types/ameli.js +1816 -5
- package/lib/src/raw_types/debats.d.ts +353 -180
- package/lib/src/raw_types/debats.js +517 -5
- package/lib/src/raw_types/dosleg.d.ts +2862 -1527
- package/lib/src/raw_types/dosleg.js +4354 -5
- package/lib/src/raw_types/questions.d.ts +671 -395
- package/lib/src/raw_types/questions.js +1303 -5
- package/lib/src/raw_types/sens.d.ts +7743 -8148
- package/lib/src/raw_types/sens.js +10429 -5
- package/lib/src/raw_types_schemats/ameli.d.ts +4 -2
- package/lib/src/raw_types_schemats/debats.d.ts +2 -2
- package/lib/src/raw_types_schemats/dosleg.d.ts +2 -2
- package/lib/src/raw_types_schemats/questions.d.ts +2 -2
- package/lib/src/raw_types_schemats/sens.d.ts +10 -4216
- package/lib/src/scripts/convert_data.js +7 -6
- package/lib/src/scripts/convert_xml_to_html.js +2 -2
- package/lib/src/scripts/data-download.js +3 -2
- package/lib/src/scripts/retrieve_agenda.js +21 -9
- package/lib/src/scripts/retrieve_cr_commission.js +17 -17
- package/lib/src/scripts/retrieve_cr_seance.d.ts +14 -1
- package/lib/src/scripts/retrieve_cr_seance.js +10 -11
- package/lib/src/scripts/retrieve_documents.d.ts +11 -2
- package/lib/src/scripts/retrieve_documents.js +25 -14
- package/lib/src/scripts/retrieve_open_data.js +514 -153
- package/lib/src/scripts/retrieve_senateurs_photos.js +25 -11
- package/lib/src/scripts/retrieve_videos.js +12 -11
- package/lib/src/scripts/shared/cli_helpers.d.ts +1 -6
- package/lib/src/scripts/shared/cli_helpers.js +9 -8
- package/lib/src/scripts/shared/incremental_import_sql.d.ts +2 -0
- package/lib/src/scripts/shared/incremental_import_sql.js +894 -0
- package/lib/src/scripts/shared/prefixed_tables.d.ts +10 -0
- package/lib/src/scripts/shared/prefixed_tables.js +36 -0
- package/lib/src/scripts/shared/schema_version.d.ts +3 -0
- package/lib/src/scripts/shared/schema_version.js +97 -0
- package/lib/src/scripts/shared/staging_import.d.ts +3 -0
- package/lib/src/scripts/shared/staging_import.js +80 -0
- package/lib/src/scripts/shared/staging_metadata_sql.d.ts +1 -0
- package/lib/src/scripts/shared/staging_metadata_sql.js +221 -0
- package/lib/src/scripts/validate_prefixed_tables.d.ts +1 -0
- package/lib/src/scripts/validate_prefixed_tables.js +101 -0
- package/lib/src/types/ameli.d.ts +4 -4
- package/lib/src/types/debats.d.ts +2 -2
- package/lib/src/types/dosleg.d.ts +39 -39
- package/lib/src/types/questions.d.ts +2 -2
- package/lib/src/types/sens.d.ts +0 -2
- package/lib/src/types/texte.d.ts +1 -1
- package/lib/src/utils/cr_spliting.d.ts +9 -6
- package/lib/src/utils/cr_spliting.js +6 -101
- package/lib/src/utils/reunion_odj_building.d.ts +7 -3
- package/lib/src/utils/reunion_parsing.d.ts +2 -1
- package/lib/src/utils/reunion_parsing.js +2 -2
- package/lib/src/videos/match.js +8 -5
- package/lib/src/videos/pipeline.d.ts +6 -2
- package/lib/src/videos/pipeline.js +21 -8
- package/lib/src/videos/search.js +6 -2
- package/lib/strings.d.ts +1 -0
- package/lib/strings.js +18 -0
- package/lib/strings.mjs +18 -0
- package/lib/strings.ts +26 -0
- package/lib/tests/incrementalImportSql.test.d.ts +1 -0
- package/lib/tests/incrementalImportSql.test.js +155 -0
- package/lib/tests/prefixedTables.test.d.ts +1 -0
- package/lib/tests/prefixedTables.test.js +22 -0
- package/lib/tests/schemaVersion.test.d.ts +1 -0
- package/lib/tests/schemaVersion.test.js +23 -0
- package/lib/tests/validatePrefixedTables.test.d.ts +1 -0
- package/lib/tests/validatePrefixedTables.test.js +14 -0
- package/lib/types/agenda.d.ts +44 -0
- package/lib/types/agenda.js +1 -0
- package/lib/types/ameli.d.ts +5 -0
- package/lib/types/ameli.js +1 -0
- package/lib/types/ameli.mjs +13 -0
- package/lib/types/ameli.ts +21 -0
- package/lib/types/compte_rendu.d.ts +83 -0
- package/lib/types/compte_rendu.js +1 -0
- package/lib/types/debats.d.ts +2 -0
- package/lib/types/debats.js +1 -0
- package/lib/types/debats.mjs +2 -0
- package/lib/types/debats.ts +6 -0
- package/lib/types/dosleg.d.ts +70 -0
- package/lib/types/dosleg.js +1 -0
- package/lib/types/dosleg.mjs +151 -0
- package/lib/types/dosleg.ts +284 -0
- package/lib/types/questions.d.ts +2 -0
- package/lib/types/questions.js +1 -0
- package/lib/types/questions.mjs +1 -0
- package/lib/types/questions.ts +3 -0
- package/lib/types/sens.d.ts +10 -0
- package/lib/types/sens.js +1 -0
- package/lib/types/sens.mjs +1 -0
- package/lib/types/sens.ts +12 -0
- package/lib/types/sessions.d.ts +5 -0
- package/lib/types/sessions.js +84 -0
- package/lib/types/sessions.mjs +43 -0
- package/lib/types/sessions.ts +42 -0
- package/lib/types/texte.d.ts +74 -0
- package/lib/types/texte.js +16 -0
- package/lib/types/texte.mjs +16 -0
- package/lib/types/texte.ts +76 -0
- package/lib/typings/windows-1252.d.js +2 -0
- package/lib/typings/windows-1252.d.mjs +2 -0
- package/lib/typings/windows-1252.d.ts +11 -0
- package/lib/utils/cr_spliting.d.ts +28 -0
- package/lib/utils/cr_spliting.js +265 -0
- package/lib/utils/date.d.ts +10 -0
- package/lib/utils/date.js +100 -0
- package/lib/utils/nvs-timecode.d.ts +7 -0
- package/lib/utils/nvs-timecode.js +79 -0
- package/lib/utils/reunion_grouping.d.ts +9 -0
- package/lib/utils/reunion_grouping.js +361 -0
- package/lib/utils/reunion_odj_building.d.ts +5 -0
- package/lib/utils/reunion_odj_building.js +154 -0
- package/lib/utils/reunion_parsing.d.ts +23 -0
- package/lib/utils/reunion_parsing.js +209 -0
- package/lib/utils/scoring.d.ts +14 -0
- package/lib/utils/scoring.js +147 -0
- package/lib/utils/string_cleaning.d.ts +7 -0
- package/lib/utils/string_cleaning.js +57 -0
- package/lib/validators/config.d.ts +9 -0
- package/lib/validators/config.js +10 -0
- package/lib/validators/config.mjs +54 -0
- package/lib/validators/config.ts +79 -0
- package/lib/validators/senat.d.ts +0 -0
- package/lib/validators/senat.js +28 -0
- package/lib/validators/senat.mjs +24 -0
- package/lib/validators/senat.ts +26 -0
- package/package.json +11 -11
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import assert from "assert";
|
|
2
1
|
import commandLineArgs from "command-line-args";
|
|
3
2
|
import fs from "fs-extra";
|
|
4
3
|
import path from "path";
|
|
@@ -11,7 +10,7 @@ import { processRapport, processTexte } from "./retrieve_documents";
|
|
|
11
10
|
import { buildActesLegislatifs } from "../model/dosleg";
|
|
12
11
|
import { UNDEFINED_SESSION } from "../types/sessions";
|
|
13
12
|
import { getSessionFromDate, getSessionFromSignet } from "./datautil";
|
|
14
|
-
import { commonOptions } from "./shared/cli_helpers";
|
|
13
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
15
14
|
import { ensureAndClearDir } from "./shared/util";
|
|
16
15
|
let exitCode = 10; // 0: some data changed, 10: no modification
|
|
17
16
|
const optionsDefinitions = [...commonOptions];
|
|
@@ -29,8 +28,7 @@ function commitAndPushGit(datasetDir, options) {
|
|
|
29
28
|
}
|
|
30
29
|
}
|
|
31
30
|
async function convertData() {
|
|
32
|
-
const dataDir = options["dataDir"];
|
|
33
|
-
assert(dataDir, "Missing argument: data directory");
|
|
31
|
+
const dataDir = assertExistingDirectory(options["dataDir"], "data directory");
|
|
34
32
|
const enabledDatasets = getEnabledDatasets(options["categories"]);
|
|
35
33
|
console.time("data transformation time");
|
|
36
34
|
if (enabledDatasets & EnabledDatasets.Ameli) {
|
|
@@ -129,6 +127,9 @@ async function convertDatasetDebats(dataDir, options) {
|
|
|
129
127
|
if (options["verbose"]) {
|
|
130
128
|
console.log(`Converting ${debat.id} file…`);
|
|
131
129
|
}
|
|
130
|
+
if (!debat.date_seance) {
|
|
131
|
+
continue;
|
|
132
|
+
}
|
|
132
133
|
const session = getSessionFromDate(debat.date_seance);
|
|
133
134
|
if (options["fromSession"] && session < options["fromSession"]) {
|
|
134
135
|
continue;
|
|
@@ -238,7 +239,7 @@ async function convertTextes(dataDir, options) {
|
|
|
238
239
|
const hasExposeDesMotifs = texte["origine"] === "déposé au Sénat" || texte["origine"] === "transmis au Sénat";
|
|
239
240
|
const metadata = {
|
|
240
241
|
name: texteName,
|
|
241
|
-
session
|
|
242
|
+
session,
|
|
242
243
|
date: texte["date"],
|
|
243
244
|
url_expose_des_motifs: hasExposeDesMotifs
|
|
244
245
|
? new URL(`${texteName}-expose.html`, SENAT_EXPOSE_DES_MOTIFS_BASE_URL)
|
|
@@ -286,7 +287,7 @@ async function convertRapports(dataDir, options) {
|
|
|
286
287
|
});
|
|
287
288
|
const metadata = {
|
|
288
289
|
name: rapportName,
|
|
289
|
-
session
|
|
290
|
+
session,
|
|
290
291
|
date: rapport["date"],
|
|
291
292
|
url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL),
|
|
292
293
|
url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL),
|
|
@@ -12,8 +12,8 @@ async function main() {
|
|
|
12
12
|
try {
|
|
13
13
|
options = commandLineArgs(optionDefinitions, { stopAtFirstUnknown: true });
|
|
14
14
|
}
|
|
15
|
-
catch (
|
|
16
|
-
console.error(`Error: ${
|
|
15
|
+
catch (error) {
|
|
16
|
+
console.error(`Error: ${error.message}`);
|
|
17
17
|
process.exit(1);
|
|
18
18
|
}
|
|
19
19
|
// Handle positional arguments if not using flags
|
|
@@ -5,8 +5,9 @@ try {
|
|
|
5
5
|
execSync(`tsx src/scripts/convert_data.ts ${args}`, { stdio: "inherit" });
|
|
6
6
|
}
|
|
7
7
|
catch (error) {
|
|
8
|
-
|
|
8
|
+
const execError = error;
|
|
9
|
+
if (execError.status !== 10) {
|
|
9
10
|
console.error("Error during data retrieval:", error);
|
|
10
|
-
process.exit(
|
|
11
|
+
process.exit(execError.status || 1);
|
|
11
12
|
}
|
|
12
13
|
}
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
import assert from "assert";
|
|
2
1
|
import commandLineArgs from "command-line-args";
|
|
3
2
|
import fs from "fs-extra";
|
|
4
3
|
import { DateTime } from "luxon";
|
|
5
4
|
import path from "path";
|
|
5
|
+
import * as git from "../git";
|
|
6
6
|
import { AGENDA_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
7
7
|
import { parseAgendaFromFile } from "../model/agenda";
|
|
8
|
-
import { getSessionsFromStart } from "../types/sessions";
|
|
8
|
+
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
9
9
|
import { ID_DATE_FORMAT } from "./datautil";
|
|
10
|
-
import { commonOptions } from "./shared/cli_helpers";
|
|
10
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
11
11
|
import { fetchWithRetry } from "./shared/util";
|
|
12
12
|
import { buildReunionsByBucket } from "../utils/reunion_parsing";
|
|
13
13
|
import { buildSenatDossierIndex } from "../utils/reunion_odj_building";
|
|
@@ -19,6 +19,7 @@ const optionsDefinitions = [
|
|
|
19
19
|
type: Boolean,
|
|
20
20
|
},
|
|
21
21
|
];
|
|
22
|
+
let exitCode = 10; // 0: some data changed, 10: no modification
|
|
22
23
|
const options = commandLineArgs(optionsDefinitions);
|
|
23
24
|
const SENAT_GLOBAL_AGENDA_URL_ROOT = "https://www.senat.fr/aglae/Global";
|
|
24
25
|
const EVENT_DATE_FORMAT = "ddMMyyyy";
|
|
@@ -27,9 +28,18 @@ class AgendaError extends Error {
|
|
|
27
28
|
super(`An error occurred while retrieving Agenda ${agendaName}: ${message}`);
|
|
28
29
|
}
|
|
29
30
|
}
|
|
31
|
+
function commitAndPushGit(datasetDir, options) {
|
|
32
|
+
if (options.commit) {
|
|
33
|
+
const errorCode = git.commitAndPush(datasetDir, "Nouvelle moisson", options.remote);
|
|
34
|
+
if ((exitCode === 10 && errorCode !== 10) || (exitCode === 0 && errorCode !== 0 && errorCode !== 10)) {
|
|
35
|
+
exitCode = errorCode;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
30
39
|
async function retrieveAgendas(options, sessions) {
|
|
31
40
|
console.log(`[AGENDA] Retrieving agendas for sessions ${sessions.join(", ")}`);
|
|
32
|
-
const
|
|
41
|
+
const dataDir = assertExistingDirectory(options["dataDir"], "data directory");
|
|
42
|
+
const agendaRootDir = path.join(dataDir, AGENDA_FOLDER);
|
|
33
43
|
fs.ensureDirSync(agendaRootDir);
|
|
34
44
|
const originalAgendaDir = path.join(agendaRootDir, DATA_ORIGINAL_FOLDER);
|
|
35
45
|
fs.ensureDirSync(originalAgendaDir);
|
|
@@ -51,7 +61,8 @@ async function retrieveAgendas(options, sessions) {
|
|
|
51
61
|
fs.emptyDirSync(transformedAgendaSessionDir);
|
|
52
62
|
}
|
|
53
63
|
const fifteenDaysFromNow = new Date();
|
|
54
|
-
|
|
64
|
+
// Don't download agendas more than 15 days in the future.
|
|
65
|
+
fifteenDaysFromNow.setDate(fifteenDaysFromNow.getDate() + 15);
|
|
55
66
|
for (const date = new Date(session, 9, 1); date <= new Date(session + 1, 8, 30) && date <= fifteenDaysFromNow; date.setDate(date.getDate() + 1)) {
|
|
56
67
|
const agendaName = DateTime.fromJSDate(date).toFormat(EVENT_DATE_FORMAT);
|
|
57
68
|
const agendaFileName = DateTime.fromJSDate(date).toFormat(ID_DATE_FORMAT);
|
|
@@ -117,15 +128,16 @@ async function parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPa
|
|
|
117
128
|
}
|
|
118
129
|
}
|
|
119
130
|
async function main() {
|
|
120
|
-
const dataDir = options["dataDir"];
|
|
121
|
-
|
|
122
|
-
const sessions = getSessionsFromStart(options["fromSession"]);
|
|
131
|
+
const dataDir = assertExistingDirectory(options["dataDir"], "data directory");
|
|
132
|
+
const sessions = getSessionsFromStart((options["fromSession"] ?? UNDEFINED_SESSION));
|
|
123
133
|
console.time("agenda processing time");
|
|
124
134
|
await retrieveAgendas(options, sessions);
|
|
135
|
+
const agendaDir = path.join(dataDir, AGENDA_FOLDER);
|
|
136
|
+
commitAndPushGit(agendaDir, options);
|
|
125
137
|
console.timeEnd("agenda processing time");
|
|
126
138
|
}
|
|
127
139
|
main()
|
|
128
|
-
.then(() => process.exit(
|
|
140
|
+
.then(() => process.exit(exitCode))
|
|
129
141
|
.catch((error) => {
|
|
130
142
|
console.log(error);
|
|
131
143
|
process.exit(1);
|
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
import fs, { ensureDir } from "fs-extra";
|
|
2
|
-
import assert from "assert";
|
|
3
2
|
import path from "path";
|
|
4
3
|
import * as cheerio from "cheerio";
|
|
5
4
|
import { COMMISSION_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
6
5
|
import { loadAgendaForDate, parseCommissionMetadataFromHtml, linkCRtoCommissionGroup } from "../utils/cr_spliting";
|
|
7
6
|
import { cleanTitle, extractDayH3Sections, parseCommissionCRSectionFromDom } from "../model/commission";
|
|
8
7
|
import commandLineArgs from "command-line-args";
|
|
9
|
-
import { commonOptions } from "./shared/cli_helpers";
|
|
8
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
10
9
|
import { sessionStartYearFromDate } from "../model/seance";
|
|
11
|
-
import { getSessionsFromStart } from "../types/sessions";
|
|
10
|
+
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
12
11
|
import { ensureAndClearDir, fetchWithRetry } from "./shared/util";
|
|
13
12
|
import { jaccard, jaccardTokenSim } from "../utils/scoring";
|
|
14
13
|
import * as git from "../git.js";
|
|
@@ -28,6 +27,7 @@ const optionsDefinitions = [
|
|
|
28
27
|
},
|
|
29
28
|
];
|
|
30
29
|
const options = commandLineArgs(optionsDefinitions);
|
|
30
|
+
let exitCode = 10; // 0: some data changed, 10: no modification
|
|
31
31
|
const COMMISSION_HUBS = {
|
|
32
32
|
"Commission des affaires étrangères": [
|
|
33
33
|
"https://www.senat.fr/compte-rendu-commissions/affaires-etrangeres.html",
|
|
@@ -71,7 +71,7 @@ async function harvestWeeklyLinksFromHub(hubUrl) {
|
|
|
71
71
|
const out = [];
|
|
72
72
|
$("a[href]").each((_, a) => {
|
|
73
73
|
const href = ($(a).attr("href") || "").trim();
|
|
74
|
-
const m = href.match(/\/compte-rendu-commissions\/(\d{8})\/([a-z0-9
|
|
74
|
+
const m = href.match(/\/compte-rendu-commissions\/(\d{8})\/([a-z0-9-]+)\.html$/i);
|
|
75
75
|
if (m) {
|
|
76
76
|
const url = href.startsWith("http") ? href : new URL(href, hubUrl).toString();
|
|
77
77
|
out.push(url);
|
|
@@ -86,7 +86,7 @@ async function discoverCommissionWeeklyPages(fromSession) {
|
|
|
86
86
|
try {
|
|
87
87
|
const links = await harvestWeeklyLinksFromHub(hubUrl);
|
|
88
88
|
for (const url of links) {
|
|
89
|
-
const m = url.match(/\/compte-rendu-commissions\/(\d{8})\/([a-z0-9
|
|
89
|
+
const m = url.match(/\/compte-rendu-commissions\/(\d{8})\/([a-z0-9-]+)\.html$/i);
|
|
90
90
|
if (!m)
|
|
91
91
|
continue;
|
|
92
92
|
const yyyymmdd = m[1];
|
|
@@ -106,7 +106,6 @@ async function discoverCommissionWeeklyPages(fromSession) {
|
|
|
106
106
|
return results.sort((a, b) => a.yyyymmdd.localeCompare(b.yyyymmdd));
|
|
107
107
|
}
|
|
108
108
|
function commitAndPushGit(datasetDir, options) {
|
|
109
|
-
let exitCode = 10; // 0: some data changed, 10: no modification
|
|
110
109
|
if (options.commit) {
|
|
111
110
|
const errorCode = git.commitAndPush(datasetDir, "Nouvelle moisson", options.remote);
|
|
112
111
|
if ((exitCode === 10 && errorCode !== 10) || (exitCode === 0 && errorCode !== 0 && errorCode !== 10)) {
|
|
@@ -149,8 +148,8 @@ function toTokens(s) {
|
|
|
149
148
|
.filter((t) => t.length >= 3 && !["commission", "des", "de", "du", "d", "la", "le", "les", "et"].includes(t)));
|
|
150
149
|
}
|
|
151
150
|
function reunionOrganeCandidates(h) {
|
|
152
|
-
const
|
|
153
|
-
const out = [
|
|
151
|
+
const extended = h;
|
|
152
|
+
const out = [extended.organeSlug, extended.organeKey, extended.organe, h.titre].filter(Boolean);
|
|
154
153
|
return Array.from(new Set(out.map(normOrgane)));
|
|
155
154
|
}
|
|
156
155
|
function organeSimilarity(h, commissionKey) {
|
|
@@ -182,7 +181,7 @@ function titleSimilarity(reunion, sectionTitle) {
|
|
|
182
181
|
return Math.max(sTit, sObj);
|
|
183
182
|
}
|
|
184
183
|
async function retrieveCommissionCRs(options = {}) {
|
|
185
|
-
const dataDir = options["dataDir"];
|
|
184
|
+
const dataDir = assertExistingDirectory(options["dataDir"], "data directory");
|
|
186
185
|
const fromSession = Number(options["fromSession"]);
|
|
187
186
|
const concurrency = Number(options["concurrency"] ?? 6);
|
|
188
187
|
const politenessMs = Number(options["politenessMs"] ?? 150);
|
|
@@ -201,7 +200,7 @@ async function retrieveCommissionCRs(options = {}) {
|
|
|
201
200
|
const session = sessionStartYearFromDate(d);
|
|
202
201
|
const dir = path.join(originalRoot, String(session), commissionKey);
|
|
203
202
|
fs.ensureDirSync(dir);
|
|
204
|
-
const slug = url.replace(/^.*\/(\d{8})\/([
|
|
203
|
+
const slug = url.replace(/^.*\/(\d{8})\/([^/]+)\.html$/i, "$2");
|
|
205
204
|
const outPath = path.join(dir, `${yyyymmdd}.${slug}.html`);
|
|
206
205
|
return { url, outPath, yyyymmdd, commissionKey };
|
|
207
206
|
});
|
|
@@ -229,8 +228,9 @@ async function retrieveCommissionCRs(options = {}) {
|
|
|
229
228
|
}
|
|
230
229
|
}
|
|
231
230
|
}
|
|
232
|
-
catch (
|
|
233
|
-
|
|
231
|
+
catch (error) {
|
|
232
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
233
|
+
console.error(`[COM-CR][err] ${url} → ${message}`);
|
|
234
234
|
}
|
|
235
235
|
finally {
|
|
236
236
|
completed++;
|
|
@@ -241,7 +241,7 @@ async function retrieveCommissionCRs(options = {}) {
|
|
|
241
241
|
});
|
|
242
242
|
await Promise.all(workers);
|
|
243
243
|
console.log(`[COM-CR] done: saved=${saved} | skipped=${skipped} | 404=${notFound} | total=${completed}`);
|
|
244
|
-
const sessions = getSessionsFromStart(options["fromSession"]);
|
|
244
|
+
const sessions = getSessionsFromStart((options["fromSession"] ?? UNDEFINED_SESSION));
|
|
245
245
|
const comRoot = path.join(dataDir, COMMISSION_FOLDER);
|
|
246
246
|
const transformedRoot = path.join(comRoot, DATA_TRANSFORMED_FOLDER);
|
|
247
247
|
if (options["keepDir"])
|
|
@@ -354,7 +354,8 @@ async function retrieveCommissionCRs(options = {}) {
|
|
|
354
354
|
}
|
|
355
355
|
}
|
|
356
356
|
if (!options["silent"]) {
|
|
357
|
-
console.log(`[COM-CR][SESSION ${session}][${commissionKey}] Processed ${totalFiles} CR files
|
|
357
|
+
console.log(`[COM-CR][SESSION ${session}][${commissionKey}] Processed ${totalFiles} CR files`);
|
|
358
|
+
console.log(`[COM-CR][SESSION ${session}][${commissionKey}] Linked to agenda: ${linkedFiles}`);
|
|
358
359
|
}
|
|
359
360
|
}
|
|
360
361
|
}
|
|
@@ -362,14 +363,13 @@ async function retrieveCommissionCRs(options = {}) {
|
|
|
362
363
|
commitAndPushGit(debatsDir, options);
|
|
363
364
|
}
|
|
364
365
|
async function main() {
|
|
365
|
-
const dataDir = options["dataDir"];
|
|
366
|
-
assert(dataDir, "Missing argument: data directory");
|
|
366
|
+
const dataDir = assertExistingDirectory(options["dataDir"], "data directory");
|
|
367
367
|
console.time("CRI processing time");
|
|
368
368
|
await retrieveCommissionCRs(options);
|
|
369
369
|
console.timeEnd("CRI processing time");
|
|
370
370
|
}
|
|
371
371
|
main()
|
|
372
|
-
.then(() => process.exit(
|
|
372
|
+
.then(() => process.exit(exitCode))
|
|
373
373
|
.catch((error) => {
|
|
374
374
|
console.error(error);
|
|
375
375
|
process.exit(1);
|
|
@@ -3,4 +3,17 @@
|
|
|
3
3
|
* - downloads the ZIP of comptes-rendus des débats (CRI) from data.senat.fr
|
|
4
4
|
* - extracts XML files, distributes them by session/year
|
|
5
5
|
*/
|
|
6
|
-
|
|
6
|
+
import { Session } from "../types/sessions";
|
|
7
|
+
import { CommandLineOptions } from "command-line-args";
|
|
8
|
+
type RetrieveCriOptions = CommandLineOptions & {
|
|
9
|
+
commit?: boolean;
|
|
10
|
+
dataDir?: string;
|
|
11
|
+
fromSession?: Session;
|
|
12
|
+
keepDir?: boolean;
|
|
13
|
+
"only-recent"?: number;
|
|
14
|
+
parseDebats?: boolean;
|
|
15
|
+
remote?: string[];
|
|
16
|
+
silent?: boolean;
|
|
17
|
+
};
|
|
18
|
+
export declare function retrieveCriXmlDump(dataDir: string, options?: RetrieveCriOptions): Promise<void>;
|
|
19
|
+
export {};
|
|
@@ -3,17 +3,16 @@
|
|
|
3
3
|
* - downloads the ZIP of comptes-rendus des débats (CRI) from data.senat.fr
|
|
4
4
|
* - extracts XML files, distributes them by session/year
|
|
5
5
|
*/
|
|
6
|
-
import assert from "assert";
|
|
7
6
|
import commandLineArgs from "command-line-args";
|
|
8
7
|
import fs, { ensureDirSync } from "fs-extra";
|
|
9
8
|
import path from "path";
|
|
10
9
|
import StreamZip from "node-stream-zip";
|
|
11
10
|
import * as cheerio from "cheerio";
|
|
12
11
|
import { AGENDA_FOLDER, COMPTES_RENDUS_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
13
|
-
import { commonOptions } from "./shared/cli_helpers";
|
|
12
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
14
13
|
import { parseCompteRenduIntervalFromFile, sessionStartYearFromDate } from "../model/seance";
|
|
15
14
|
import { extractSommaireBlocks, makeReunionUid } from "../utils/reunion_parsing";
|
|
16
|
-
import { getSessionsFromStart } from "../types/sessions";
|
|
15
|
+
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
17
16
|
import { ensureAndClearDir, fetchWithRetry } from "./shared/util";
|
|
18
17
|
import { isNoiseBlock, scoreSommaireBlockForEvent } from "../utils/scoring";
|
|
19
18
|
import { parseYYYYMMDD } from "../utils/date";
|
|
@@ -28,6 +27,7 @@ const optionsDefinitions = [
|
|
|
28
27
|
];
|
|
29
28
|
const options = commandLineArgs(optionsDefinitions);
|
|
30
29
|
const CRI_ZIP_URL = "https://data.senat.fr/data/debats/cri.zip";
|
|
30
|
+
let exitCode = 10; // 0: some data changed, 10: no modification
|
|
31
31
|
class CompteRenduError extends Error {
|
|
32
32
|
constructor(message, url) {
|
|
33
33
|
super(`An error occurred while retrieving ${url}: ${message}`);
|
|
@@ -94,7 +94,7 @@ export async function retrieveCriXmlDump(dataDir, options = {}) {
|
|
|
94
94
|
else {
|
|
95
95
|
fs.ensureDirSync(transformedRoot);
|
|
96
96
|
}
|
|
97
|
-
const sessions = getSessionsFromStart(options["fromSession"]);
|
|
97
|
+
const sessions = getSessionsFromStart((options["fromSession"] ?? UNDEFINED_SESSION));
|
|
98
98
|
// 1) Download ZIP global + distribut by session
|
|
99
99
|
const zipPath = path.join(dataDir, "cri.zip");
|
|
100
100
|
console.log("[CRI] Downloading global CRI zip…");
|
|
@@ -209,9 +209,8 @@ export async function retrieveCriXmlDump(dataDir, options = {}) {
|
|
|
209
209
|
commitAndPushGit(debatsDir, options);
|
|
210
210
|
}
|
|
211
211
|
function commitAndPushGit(datasetDir, options) {
|
|
212
|
-
let exitCode = 10; // 0: some data changed, 10: no modification
|
|
213
212
|
if (options.commit) {
|
|
214
|
-
const errorCode = git.commitAndPush(datasetDir, "Nouvelle moisson", options
|
|
213
|
+
const errorCode = git.commitAndPush(datasetDir, "Nouvelle moisson", options["remote"]);
|
|
215
214
|
if ((exitCode === 10 && errorCode !== 10) || (exitCode === 0 && errorCode !== 0 && errorCode !== 10)) {
|
|
216
215
|
exitCode = errorCode;
|
|
217
216
|
}
|
|
@@ -237,7 +236,6 @@ async function linkCriEventIntoAgenda(dataDir, yyyymmdd, agendaEventId, crUid, c
|
|
|
237
236
|
console.warn(`[CR] Missing reunion file for SP event=${agendaEventId}: ${agendaPath}`);
|
|
238
237
|
return;
|
|
239
238
|
}
|
|
240
|
-
;
|
|
241
239
|
agenda.compteRenduRefUid = crUid;
|
|
242
240
|
await fs.writeJSON(agendaPath, agenda, { spaces: 2 });
|
|
243
241
|
console.log(`[CR] Linked CR ${crUid} → ${path.basename(agendaPath)} (event=${agendaEventId})`);
|
|
@@ -327,7 +325,9 @@ async function loadAgendaSpEventsForDate(dataDir, yyyymmdd, session) {
|
|
|
327
325
|
if (e && e.type === "Séance publique")
|
|
328
326
|
events.push(e);
|
|
329
327
|
}
|
|
330
|
-
catch {
|
|
328
|
+
catch {
|
|
329
|
+
continue;
|
|
330
|
+
}
|
|
331
331
|
}
|
|
332
332
|
return events;
|
|
333
333
|
}
|
|
@@ -345,14 +345,13 @@ function resolveTargetIndex($, idx, targetId) {
|
|
|
345
345
|
return i == null ? null : i;
|
|
346
346
|
}
|
|
347
347
|
async function main() {
|
|
348
|
-
const dataDir = options["dataDir"];
|
|
349
|
-
assert(dataDir, "Missing argument: data directory");
|
|
348
|
+
const dataDir = assertExistingDirectory(options["dataDir"], "data directory");
|
|
350
349
|
console.time("CRI processing time");
|
|
351
350
|
await retrieveCriXmlDump(dataDir, options);
|
|
352
351
|
console.timeEnd("CRI processing time");
|
|
353
352
|
}
|
|
354
353
|
main()
|
|
355
|
-
.then(() => process.exit(
|
|
354
|
+
.then(() => process.exit(exitCode))
|
|
356
355
|
.catch((error) => {
|
|
357
356
|
console.error(error);
|
|
358
357
|
process.exit(1);
|
|
@@ -1,3 +1,12 @@
|
|
|
1
1
|
import { DocumentMetadata } from "../types/texte";
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
type DownloadOptions = {
|
|
3
|
+
force?: boolean;
|
|
4
|
+
formats?: string[];
|
|
5
|
+
"only-recent"?: number;
|
|
6
|
+
parseDocuments?: boolean;
|
|
7
|
+
silent?: boolean;
|
|
8
|
+
verbose?: boolean;
|
|
9
|
+
};
|
|
10
|
+
export declare function processTexte(texteMetadata: DocumentMetadata, originalTextesDir: string, transformedTextesDir: string, enrichedTextesDir: string, options: DownloadOptions): Promise<void>;
|
|
11
|
+
export declare function processRapport(rapportMetadata: DocumentMetadata, originalRapportsDir: string, options: DownloadOptions): Promise<void>;
|
|
12
|
+
export {};
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import assert from "assert";
|
|
2
1
|
import commandLineArgs from "command-line-args";
|
|
3
2
|
import fs from "fs-extra";
|
|
4
3
|
import { DateTime } from "luxon";
|
|
@@ -8,7 +7,7 @@ import * as git from "../git";
|
|
|
8
7
|
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, ENRICHED_TEXTE_FOLDER, iterLoadSenatRapportUrls, iterLoadSenatTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, } from "../loaders";
|
|
9
8
|
import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../parsers/texte";
|
|
10
9
|
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
11
|
-
import { commonOptions } from "./shared/cli_helpers";
|
|
10
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
12
11
|
import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue } from "./shared/util";
|
|
13
12
|
let exitCode = 10; // 0: some data changed, 10: no modification
|
|
14
13
|
const optionsDefinitions = [
|
|
@@ -54,8 +53,8 @@ function shouldDownload(filePath, docDate, options) {
|
|
|
54
53
|
return true;
|
|
55
54
|
if (!fs.existsSync(filePath))
|
|
56
55
|
return true;
|
|
57
|
-
if (options
|
|
58
|
-
return isDocumentRecent(docDate, options
|
|
56
|
+
if (options["only-recent"] !== undefined) {
|
|
57
|
+
return isDocumentRecent(docDate, options["only-recent"]);
|
|
59
58
|
}
|
|
60
59
|
return false;
|
|
61
60
|
}
|
|
@@ -124,7 +123,7 @@ export async function processTexte(texteMetadata, originalTextesDir, transformed
|
|
|
124
123
|
{ type: "pdf", url: texteMetadata.url_pdf, isParseTarget: false },
|
|
125
124
|
];
|
|
126
125
|
for (const format of formats) {
|
|
127
|
-
if (!isOptionEmptyOrHasValue(options.formats, format.type))
|
|
126
|
+
if (!isOptionEmptyOrHasValue((options.formats ?? []).join(","), format.type))
|
|
128
127
|
continue;
|
|
129
128
|
if (format.url === undefined || format.url.toString().includes("#"))
|
|
130
129
|
continue;
|
|
@@ -165,7 +164,7 @@ export async function processRapport(rapportMetadata, originalRapportsDir, optio
|
|
|
165
164
|
{ type: "pdf", url: rapportMetadata.url_pdf },
|
|
166
165
|
];
|
|
167
166
|
for (const format of formats) {
|
|
168
|
-
if (!isOptionEmptyOrHasValue(options["
|
|
167
|
+
if (!isOptionEmptyOrHasValue((options.formats ?? []).join(","), format.type))
|
|
169
168
|
continue;
|
|
170
169
|
const destPath = path.join(rapportDir, `${rapportMetadata.name}.${format.type}`);
|
|
171
170
|
await processDocument(format.url.toString(), destPath, rapportMetadata.date, options);
|
|
@@ -176,7 +175,7 @@ async function processTextes(dataDir, sessions) {
|
|
|
176
175
|
const originalTextesDir = path.join(textesDir, DATA_ORIGINAL_FOLDER);
|
|
177
176
|
const transformedTextesDir = path.join(textesDir, DATA_TRANSFORMED_FOLDER);
|
|
178
177
|
const enrichedTextesDir = path.join(dataDir, ENRICHED_TEXTE_FOLDER);
|
|
179
|
-
if (options
|
|
178
|
+
if (options.parseDocuments && options["only-recent"] === undefined) {
|
|
180
179
|
ensureAndClearDir(transformedTextesDir);
|
|
181
180
|
ensureAndClearDir(enrichedTextesDir);
|
|
182
181
|
}
|
|
@@ -184,7 +183,7 @@ async function processTextes(dataDir, sessions) {
|
|
|
184
183
|
force: options["force"],
|
|
185
184
|
silent: options["silent"],
|
|
186
185
|
verbose: options["verbose"],
|
|
187
|
-
|
|
186
|
+
"only-recent": options["only-recent"],
|
|
188
187
|
formats: options["formats"],
|
|
189
188
|
parseDocuments: options["parseDocuments"],
|
|
190
189
|
};
|
|
@@ -203,7 +202,7 @@ async function processRapports(dataDir, sessions) {
|
|
|
203
202
|
force: options["force"],
|
|
204
203
|
silent: options["silent"],
|
|
205
204
|
verbose: options["verbose"],
|
|
206
|
-
|
|
205
|
+
"only-recent": options["only-recent"],
|
|
207
206
|
formats: options["formats"],
|
|
208
207
|
};
|
|
209
208
|
for (const session of sessions) {
|
|
@@ -245,14 +244,26 @@ async function parseDocument(session, transformedTextesDir, textePath, texteName
|
|
|
245
244
|
return parsedTexte;
|
|
246
245
|
}
|
|
247
246
|
async function main() {
|
|
248
|
-
const dataDir = options["dataDir"];
|
|
249
|
-
|
|
250
|
-
const
|
|
247
|
+
const dataDir = assertExistingDirectory(options["dataDir"], "data directory");
|
|
248
|
+
const sessions = getSessionsFromStart((options.fromSession ?? UNDEFINED_SESSION));
|
|
249
|
+
const textesDir = path.join(dataDir, TEXTE_FOLDER);
|
|
250
|
+
const enrichedTextesDir = path.join(dataDir, ENRICHED_TEXTE_FOLDER);
|
|
251
|
+
const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
|
|
252
|
+
const syncRemote = options["remote"]?.[0];
|
|
251
253
|
console.time("documents processing time");
|
|
252
|
-
if (
|
|
254
|
+
if (syncRemote) {
|
|
255
|
+
if (isOptionEmptyOrHasValue(options.types?.join(",") ?? "", "textes")) {
|
|
256
|
+
git.pull(textesDir, syncRemote);
|
|
257
|
+
git.pull(enrichedTextesDir, syncRemote);
|
|
258
|
+
}
|
|
259
|
+
if (isOptionEmptyOrHasValue(options.types?.join(",") ?? "", "rapports")) {
|
|
260
|
+
git.pull(rapportsDir, syncRemote);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
if (isOptionEmptyOrHasValue(options.types?.join(",") ?? "", "textes")) {
|
|
253
264
|
await processTextes(dataDir, sessions);
|
|
254
265
|
}
|
|
255
|
-
if (isOptionEmptyOrHasValue(options
|
|
266
|
+
if (isOptionEmptyOrHasValue(options.types?.join(",") ?? "", "rapports")) {
|
|
256
267
|
await processRapports(dataDir, sessions);
|
|
257
268
|
}
|
|
258
269
|
if (!options["silent"]) {
|