@tricoteuses/senat 1.3.2 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/lib/config.js +4 -6
  2. package/lib/databases.js +34 -75
  3. package/lib/datasets.js +20 -28
  4. package/lib/index.d.ts +5 -0
  5. package/lib/index.js +4 -43
  6. package/lib/loaders.js +56 -74
  7. package/lib/model/ameli.js +8 -11
  8. package/lib/model/dosleg.js +47 -52
  9. package/lib/model/index.js +4 -13
  10. package/lib/model/questions.js +15 -18
  11. package/lib/model/sens.d.ts +1 -1
  12. package/lib/model/sens.js +65 -71
  13. package/lib/model/texte.js +17 -25
  14. package/lib/model/util.js +13 -21
  15. package/lib/raw_types/ameli.js +1 -2
  16. package/lib/raw_types/debats.js +1 -2
  17. package/lib/raw_types/dosleg.js +1 -2
  18. package/lib/raw_types/questions.js +1 -2
  19. package/lib/raw_types/sens.js +1 -2
  20. package/lib/raw_types_schemats/ameli.js +1 -2
  21. package/lib/raw_types_schemats/debats.js +1 -2
  22. package/lib/raw_types_schemats/dosleg.js +1 -2
  23. package/lib/raw_types_schemats/questions.js +1 -2
  24. package/lib/raw_types_schemats/sens.js +1 -2
  25. package/lib/scripts/convert_data.js +78 -83
  26. package/lib/scripts/datautil.js +9 -13
  27. package/lib/scripts/parse_textes.js +23 -28
  28. package/lib/scripts/retrieve_documents.js +56 -61
  29. package/lib/scripts/retrieve_open_data.js +44 -49
  30. package/lib/scripts/retrieve_senateurs_photos.js +31 -36
  31. package/lib/scripts/shared/cli_helpers.js +9 -12
  32. package/lib/scripts/shared/util.js +7 -15
  33. package/lib/strings.js +4 -10
  34. package/lib/types/ameli.js +5 -8
  35. package/lib/types/debats.js +2 -5
  36. package/lib/types/dosleg.js +28 -31
  37. package/lib/types/questions.js +1 -2
  38. package/lib/types/sens.js +1 -2
  39. package/lib/types/sessions.js +2 -5
  40. package/lib/types/texte.js +2 -5
  41. package/lib/validators/config.js +4 -7
  42. package/package.json +4 -4
@@ -1,132 +1,127 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- const assert_1 = __importDefault(require("assert"));
7
- const command_line_args_1 = __importDefault(require("command-line-args"));
8
- const fs_extra_1 = __importDefault(require("fs-extra"));
9
- const path_1 = __importDefault(require("path"));
10
- const datasets_1 = require("../datasets");
11
- const loaders_1 = require("../loaders");
12
- const model_1 = require("../model");
13
- const dosleg_1 = require("../model/dosleg");
14
- const datautil_1 = require("./datautil");
15
- const cli_helpers_1 = require("./shared/cli_helpers");
16
- const util_1 = require("./shared/util");
17
- const optionsDefinitions = [...cli_helpers_1.commonOptions];
18
- const options = (0, command_line_args_1.default)(optionsDefinitions);
1
+ import assert from "assert";
2
+ import commandLineArgs from "command-line-args";
3
+ import fs from "fs-extra";
4
+ import path from "path";
5
+ import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
6
+ import { DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER, } from "../loaders";
7
+ import { findAllAmendements, findAllCirconscriptions, findAllLois, findAllOrganismes, findAllQuestions, findAllSens, } from "../model";
8
+ import { findSenatRapportUrls, findSenatTexteUrls } from "../model/dosleg";
9
+ import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, UNDEFINED_SESSION, } from "./datautil";
10
+ import { commonOptions } from "./shared/cli_helpers";
11
+ import { ensureAndClearDir } from "./shared/util";
12
+ const optionsDefinitions = [...commonOptions];
13
+ const options = commandLineArgs(optionsDefinitions);
19
14
  const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/";
20
15
  const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
21
16
  const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/";
22
17
  const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
23
18
  async function convertData() {
24
- const enabledDatasets = (0, datasets_1.getEnabledDatasets)(options["categories"]);
19
+ const enabledDatasets = getEnabledDatasets(options["categories"]);
25
20
  const dataDir = options["dataDir"];
26
- (0, assert_1.default)(dataDir, "Missing argument: data directory");
21
+ assert(dataDir, "Missing argument: data directory");
27
22
  console.time("data transformation time");
28
- if (enabledDatasets & datasets_1.EnabledDatasets.Ameli) {
29
- const dataset = datasets_1.datasets.ameli;
23
+ if (enabledDatasets & EnabledDatasets.Ameli) {
24
+ const dataset = datasets.ameli;
30
25
  if (!options["silent"]) {
31
26
  console.log(`Converting database ${dataset.database} data into files…`);
32
27
  }
33
- const ameliReorganizedRootDir = path_1.default.join(dataDir, dataset.database);
34
- (0, util_1.ensureAndClearDir)(ameliReorganizedRootDir);
35
- for await (const amendement of (0, model_1.findAllAmendements)()) {
28
+ const ameliReorganizedRootDir = path.join(dataDir, dataset.database);
29
+ ensureAndClearDir(ameliReorganizedRootDir);
30
+ for await (const amendement of findAllAmendements()) {
36
31
  if (options["verbose"]) {
37
32
  console.log(`Converting ${amendement.numero} file…`);
38
33
  }
39
- const session = String(amendement.session) || datautil_1.UNDEFINED_SESSION;
34
+ const session = String(amendement.session) || UNDEFINED_SESSION;
40
35
  const signetDossierLegislatif = amendement.signet_dossier_legislatif ||
41
36
  `${amendement.nature_texte}-${amendement.numero_texte}`.toLowerCase();
42
- const ameliReorganizedDir = path_1.default.join(ameliReorganizedRootDir, session, signetDossierLegislatif);
43
- fs_extra_1.default.ensureDirSync(ameliReorganizedDir);
37
+ const ameliReorganizedDir = path.join(ameliReorganizedRootDir, session, signetDossierLegislatif);
38
+ fs.ensureDirSync(ameliReorganizedDir);
44
39
  const amendementFileName = `${amendement.numero}.json`;
45
- fs_extra_1.default.writeJSONSync(path_1.default.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 });
40
+ fs.writeJSONSync(path.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 });
46
41
  }
47
42
  }
48
- if (enabledDatasets & datasets_1.EnabledDatasets.DosLeg) {
49
- const dataset = datasets_1.datasets.dosleg;
43
+ if (enabledDatasets & EnabledDatasets.DosLeg) {
44
+ const dataset = datasets.dosleg;
50
45
  if (!options["silent"]) {
51
46
  console.log(`Converting database ${dataset.database} data into files…`);
52
47
  }
53
- const doslegReorganizedRootDir = path_1.default.join(dataDir, dataset.database);
54
- const dossiersReorganizedDir = path_1.default.join(doslegReorganizedRootDir, loaders_1.DOSLEG_DOSSIERS_FOLDER);
55
- (0, util_1.ensureAndClearDir)(doslegReorganizedRootDir);
56
- (0, util_1.ensureAndClearDir)(dossiersReorganizedDir);
57
- for await (const loi of (0, model_1.findAllLois)()) {
48
+ const doslegReorganizedRootDir = path.join(dataDir, dataset.database);
49
+ const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER);
50
+ ensureAndClearDir(doslegReorganizedRootDir);
51
+ ensureAndClearDir(dossiersReorganizedDir);
52
+ for await (const loi of findAllLois()) {
58
53
  if (options["verbose"]) {
59
54
  console.log(`Converting ${loi.signet} file…`);
60
55
  }
61
- let loiReorganizedDir = path_1.default.join(dossiersReorganizedDir, datautil_1.UNDEFINED_SESSION);
62
- const signetParts = datautil_1.SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups;
56
+ let loiReorganizedDir = path.join(dossiersReorganizedDir, UNDEFINED_SESSION);
57
+ const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups;
63
58
  if (signetParts && "session" in signetParts) {
64
59
  const { session } = signetParts;
65
- const formattedSession = (0, datautil_1.formatToFourDigitSession)(session);
66
- loiReorganizedDir = path_1.default.join(dossiersReorganizedDir, formattedSession);
60
+ const formattedSession = formatToFourDigitSession(session);
61
+ loiReorganizedDir = path.join(dossiersReorganizedDir, formattedSession);
67
62
  }
68
- fs_extra_1.default.ensureDirSync(loiReorganizedDir);
63
+ fs.ensureDirSync(loiReorganizedDir);
69
64
  const loiFileName = `${loi.signet}.json`;
70
- fs_extra_1.default.writeJSONSync(path_1.default.join(loiReorganizedDir, loiFileName), loi, {
65
+ fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, {
71
66
  spaces: 2,
72
67
  });
73
68
  }
74
69
  await convertTexteUrls(dataDir);
75
70
  await convertRapportUrls(dataDir);
76
71
  }
77
- if (enabledDatasets & datasets_1.EnabledDatasets.Questions) {
78
- const dataset = datasets_1.datasets.questions;
72
+ if (enabledDatasets & EnabledDatasets.Questions) {
73
+ const dataset = datasets.questions;
79
74
  if (!options["silent"]) {
80
75
  console.log(`Converting database ${dataset.database} data into files…`);
81
76
  }
82
- const questionsReorganizedRootDir = path_1.default.join(dataDir, dataset.database);
83
- (0, util_1.ensureAndClearDir)(questionsReorganizedRootDir);
84
- for await (const question of (0, model_1.findAllQuestions)()) {
77
+ const questionsReorganizedRootDir = path.join(dataDir, dataset.database);
78
+ ensureAndClearDir(questionsReorganizedRootDir);
79
+ for await (const question of findAllQuestions()) {
85
80
  if (options["verbose"]) {
86
81
  console.log(`Converting ${question.reference} file…`);
87
82
  }
88
83
  const legislature = question.legislature ? question.legislature : 0;
89
- const questionReorganizedDir = path_1.default.join(questionsReorganizedRootDir, String(legislature));
90
- fs_extra_1.default.ensureDirSync(questionReorganizedDir);
84
+ const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature));
85
+ fs.ensureDirSync(questionReorganizedDir);
91
86
  const questionFileName = `${question.reference}.json`;
92
- fs_extra_1.default.writeJSONSync(path_1.default.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
87
+ fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
93
88
  }
94
89
  }
95
- if (enabledDatasets & datasets_1.EnabledDatasets.Sens) {
96
- const dataset = datasets_1.datasets.sens;
90
+ if (enabledDatasets & EnabledDatasets.Sens) {
91
+ const dataset = datasets.sens;
97
92
  if (!options["silent"]) {
98
93
  console.log(`Converting database ${dataset.database} data into files…`);
99
94
  }
100
- const sensReorganizedRootDir = path_1.default.join(dataDir, dataset.database);
101
- const senateursReorganizedDir = path_1.default.join(sensReorganizedRootDir, loaders_1.SENS_SENATEURS_FOLDER);
102
- const circonscriptionsReorganizedDir = path_1.default.join(sensReorganizedRootDir, loaders_1.SENS_CIRCONSCRIPTIONS_FOLDER);
103
- const organismesReorganizedDir = path_1.default.join(sensReorganizedRootDir, loaders_1.SENS_ORGANISMES_FOLDER);
104
- (0, util_1.ensureAndClearDir)(sensReorganizedRootDir);
105
- (0, util_1.ensureAndClearDir)(senateursReorganizedDir);
106
- (0, util_1.ensureAndClearDir)(circonscriptionsReorganizedDir);
107
- (0, util_1.ensureAndClearDir)(organismesReorganizedDir);
108
- for await (const sen of (0, model_1.findAllSens)()) {
95
+ const sensReorganizedRootDir = path.join(dataDir, dataset.database);
96
+ const senateursReorganizedDir = path.join(sensReorganizedRootDir, SENS_SENATEURS_FOLDER);
97
+ const circonscriptionsReorganizedDir = path.join(sensReorganizedRootDir, SENS_CIRCONSCRIPTIONS_FOLDER);
98
+ const organismesReorganizedDir = path.join(sensReorganizedRootDir, SENS_ORGANISMES_FOLDER);
99
+ ensureAndClearDir(sensReorganizedRootDir);
100
+ ensureAndClearDir(senateursReorganizedDir);
101
+ ensureAndClearDir(circonscriptionsReorganizedDir);
102
+ ensureAndClearDir(organismesReorganizedDir);
103
+ for await (const sen of findAllSens()) {
109
104
  if (options["verbose"]) {
110
105
  console.log(`Converting ${sen.matricule} file…`);
111
106
  }
112
107
  const senFileName = `${sen.matricule}.json`;
113
- fs_extra_1.default.writeJSONSync(path_1.default.join(senateursReorganizedDir, senFileName), sen, {
108
+ fs.writeJSONSync(path.join(senateursReorganizedDir, senFileName), sen, {
114
109
  spaces: 2,
115
110
  });
116
111
  }
117
- for await (const circonscription of (0, model_1.findAllCirconscriptions)()) {
112
+ for await (const circonscription of findAllCirconscriptions()) {
118
113
  if (options["verbose"]) {
119
114
  console.log(`Converting ${circonscription.identifiant} file…`);
120
115
  }
121
116
  const circonscriptionFileName = `${circonscription.identifiant}.json`;
122
- fs_extra_1.default.writeJSONSync(path_1.default.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 });
117
+ fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 });
123
118
  }
124
- for await (const organisme of (0, model_1.findAllOrganismes)()) {
119
+ for await (const organisme of findAllOrganismes()) {
125
120
  if (options["verbose"]) {
126
121
  console.log(`Converting ${organisme.code} file…`);
127
122
  }
128
123
  const organismeFileName = `${organisme.code}.json`;
129
- fs_extra_1.default.writeJSONSync(path_1.default.join(organismesReorganizedDir, organismeFileName), organisme, { spaces: 2 });
124
+ fs.writeJSONSync(path.join(organismesReorganizedDir, organismeFileName), organisme, { spaces: 2 });
130
125
  }
131
126
  }
132
127
  if (!options["silent"]) {
@@ -134,13 +129,13 @@ async function convertData() {
134
129
  }
135
130
  }
136
131
  async function convertTexteUrls(dataDir) {
137
- const textesDir = path_1.default.join(dataDir, loaders_1.TEXTE_FOLDER);
138
- fs_extra_1.default.ensureDirSync(textesDir);
139
- const originalTextesDir = path_1.default.join(textesDir, loaders_1.TEXTE_ORIGINAL_FOLDER);
140
- for await (const texte of (0, dosleg_1.findSenatTexteUrls)(options["sessions"])) {
141
- const texteName = path_1.default.parse(texte.url).name;
142
- const texteDir = path_1.default.join(originalTextesDir, `${texte.session ?? datautil_1.UNDEFINED_SESSION}`, texteName);
143
- fs_extra_1.default.ensureDirSync(texteDir);
132
+ const textesDir = path.join(dataDir, TEXTE_FOLDER);
133
+ fs.ensureDirSync(textesDir);
134
+ const originalTextesDir = path.join(textesDir, TEXTE_ORIGINAL_FOLDER);
135
+ for await (const texte of findSenatTexteUrls(options["sessions"])) {
136
+ const texteName = path.parse(texte.url).name;
137
+ const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName);
138
+ fs.ensureDirSync(texteDir);
144
139
  const metadata = {
145
140
  name: texteName,
146
141
  session: texte.session,
@@ -151,26 +146,26 @@ async function convertTexteUrls(dataDir) {
151
146
  url_html: new URL(`${texteName}.html`, SENAT_TEXTE_BASE_URL),
152
147
  url_pdf: new URL(`${texteName}.pdf`, SENAT_TEXTE_BASE_URL),
153
148
  };
154
- fs_extra_1.default.writeJSONSync(path_1.default.join(texteDir, loaders_1.DOCUMENT_METADATA_FILE), metadata, {
149
+ fs.writeJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, {
155
150
  spaces: 2,
156
151
  });
157
152
  }
158
153
  }
159
154
  async function convertRapportUrls(dataDir) {
160
- const rapportsDir = path_1.default.join(dataDir, loaders_1.RAPPORT_FOLDER);
161
- fs_extra_1.default.ensureDirSync(rapportsDir);
162
- for await (const rapport of (0, dosleg_1.findSenatRapportUrls)(options["sessions"])) {
163
- const parsedRapportUrl = path_1.default.parse(rapport.url);
155
+ const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
156
+ fs.ensureDirSync(rapportsDir);
157
+ for await (const rapport of findSenatRapportUrls(options["sessions"])) {
158
+ const parsedRapportUrl = path.parse(rapport.url);
164
159
  const rapportName = parsedRapportUrl.name;
165
- const rapportDir = path_1.default.join(rapportsDir, `${rapport.session ?? datautil_1.UNDEFINED_SESSION}`, rapportName);
166
- fs_extra_1.default.ensureDirSync(rapportDir);
160
+ const rapportDir = path.join(rapportsDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName);
161
+ fs.ensureDirSync(rapportDir);
167
162
  const rapportHtmlUrlBase = `${rapportName}_mono.html`;
168
- const rapportHtmlUrl = path_1.default.format({
163
+ const rapportHtmlUrl = path.format({
169
164
  dir: parsedRapportUrl.dir,
170
165
  base: rapportHtmlUrlBase,
171
166
  });
172
167
  const rapportPdfUrlBase = `${rapportName}1.pdf`;
173
- const rapportPdfUrl = path_1.default.format({
168
+ const rapportPdfUrl = path.format({
174
169
  dir: parsedRapportUrl.dir,
175
170
  base: rapportPdfUrlBase,
176
171
  });
@@ -180,7 +175,7 @@ async function convertRapportUrls(dataDir) {
180
175
  url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL),
181
176
  url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL),
182
177
  };
183
- fs_extra_1.default.writeJSONSync(path_1.default.join(rapportDir, loaders_1.DOCUMENT_METADATA_FILE), metadata, {
178
+ fs.writeJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, {
184
179
  spaces: 2,
185
180
  });
186
181
  }
@@ -1,21 +1,17 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.UNDEFINED_SESSION = exports.AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP = exports.AKN_IDENTIFICATION_STRUCTURE_REGEXP = exports.SIGNET_STRUCTURE_REGEXP = void 0;
4
- exports.formatToFourDigitSession = formatToFourDigitSession;
5
- const luxon_1 = require("luxon");
6
- luxon_1.Settings.twoDigitCutoffYear = 50;
7
- exports.SIGNET_STRUCTURE_REGEXP = /^(?<type>[a-z]+)(?<session>\d{2,4})-?(?<numTexte>\d+)?/;
8
- exports.AKN_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{4}-\d{4})\/?(?<numTexte>\d+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/;
9
- exports.AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{2,4})\/?(?<numTexte>[a-zA-Z0-9]+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/;
10
- exports.UNDEFINED_SESSION = "0";
11
- function formatToFourDigitSession(session) {
1
+ import { DateTime, Settings } from "luxon";
2
+ Settings.twoDigitCutoffYear = 50;
3
+ export const SIGNET_STRUCTURE_REGEXP = /^(?<type>[a-z]+)(?<session>\d{2,4})-?(?<numTexte>\d+)?/;
4
+ export const AKN_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{4}-\d{4})\/?(?<numTexte>\d+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/;
5
+ export const AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{2,4})\/?(?<numTexte>[a-zA-Z0-9]+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/;
6
+ export const UNDEFINED_SESSION = "0";
7
+ export function formatToFourDigitSession(session) {
12
8
  if (session.length >= 2) {
13
9
  const sessionFirstTwoDigits = session.substring(0, 2);
14
10
  const sessionLastTwoDigits = session.substring(session.length - 2);
15
11
  const twoDigitSession = parseInt(sessionFirstTwoDigits) === parseInt(sessionLastTwoDigits) - 1
16
12
  ? sessionFirstTwoDigits
17
13
  : sessionLastTwoDigits;
18
- return luxon_1.DateTime.fromFormat(String(twoDigitSession), "yy").toFormat("yyyy");
14
+ return DateTime.fromFormat(String(twoDigitSession), "yy").toFormat("yyyy");
19
15
  }
20
- return exports.UNDEFINED_SESSION;
16
+ return UNDEFINED_SESSION;
21
17
  }
@@ -1,44 +1,39 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- const assert_1 = __importDefault(require("assert"));
7
- const command_line_args_1 = __importDefault(require("command-line-args"));
8
- const fs_extra_1 = __importDefault(require("fs-extra"));
9
- const path_1 = __importDefault(require("path"));
10
- const loaders_1 = require("../loaders");
11
- const texte_1 = require("../model/texte");
12
- const cli_helpers_1 = require("./shared/cli_helpers");
13
- const util_1 = require("./shared/util");
14
- const optionsDefinitions = [...cli_helpers_1.commonOptions];
15
- const options = (0, command_line_args_1.default)(optionsDefinitions);
1
+ import assert from "assert";
2
+ import commandLineArgs from "command-line-args";
3
+ import fs from "fs-extra";
4
+ import path from "path";
5
+ import { iterFilePaths, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER, TEXTE_TRANSFORMED_FOLDER, } from "../loaders";
6
+ import { parseExposeDesMotifsFromFile, parseTexteFromFile, } from "../model/texte";
7
+ import { commonOptions } from "./shared/cli_helpers";
8
+ import { ensureAndClearDir } from "./shared/util";
9
+ const optionsDefinitions = [...commonOptions];
10
+ const options = commandLineArgs(optionsDefinitions);
16
11
  async function main() {
17
12
  const dataDir = options["dataDir"];
18
- (0, assert_1.default)(dataDir, "Missing argument: data directory");
19
- const transformedTextesDir = path_1.default.join(options["dataDir"], loaders_1.TEXTE_FOLDER, loaders_1.TEXTE_TRANSFORMED_FOLDER);
20
- (0, util_1.ensureAndClearDir)(transformedTextesDir);
21
- for (const filePath of (0, loaders_1.iterFilePaths)(path_1.default.join(dataDir, loaders_1.TEXTE_FOLDER, loaders_1.TEXTE_ORIGINAL_FOLDER))) {
22
- const parsedFilePath = path_1.default.parse(filePath);
13
+ assert(dataDir, "Missing argument: data directory");
14
+ const transformedTextesDir = path.join(options["dataDir"], TEXTE_FOLDER, TEXTE_TRANSFORMED_FOLDER);
15
+ ensureAndClearDir(transformedTextesDir);
16
+ for (const filePath of iterFilePaths(path.join(dataDir, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER))) {
17
+ const parsedFilePath = path.parse(filePath);
23
18
  if (parsedFilePath.ext !== ".xml") {
24
19
  continue;
25
20
  }
26
- const texteDirFromOriginal = parsedFilePath.dir.substring(filePath.indexOf(loaders_1.TEXTE_ORIGINAL_FOLDER) + loaders_1.TEXTE_ORIGINAL_FOLDER.length);
27
- const transformedTexteDir = path_1.default.join(transformedTextesDir, texteDirFromOriginal);
28
- fs_extra_1.default.ensureDirSync(transformedTexteDir);
21
+ const texteDirFromOriginal = parsedFilePath.dir.substring(filePath.indexOf(TEXTE_ORIGINAL_FOLDER) + TEXTE_ORIGINAL_FOLDER.length);
22
+ const transformedTexteDir = path.join(transformedTextesDir, texteDirFromOriginal);
23
+ fs.ensureDirSync(transformedTexteDir);
29
24
  if (!options["silent"]) {
30
25
  console.log(`Parsing texte ${parsedFilePath.name}.xml…`);
31
26
  }
32
- const parsedTexte = await (0, texte_1.parseTexteFromFile)(filePath);
27
+ const parsedTexte = await parseTexteFromFile(filePath);
33
28
  const exposeDesMotifsFileName = `${parsedFilePath.name}-expose`;
34
- const exposeDesMotifsFilePath = path_1.default.join(parsedFilePath.dir, `${exposeDesMotifsFileName}.html`);
35
- if (parsedTexte && fs_extra_1.default.existsSync(exposeDesMotifsFilePath)) {
29
+ const exposeDesMotifsFilePath = path.join(parsedFilePath.dir, `${exposeDesMotifsFileName}.html`);
30
+ if (parsedTexte && fs.existsSync(exposeDesMotifsFilePath)) {
36
31
  if (!options["silent"]) {
37
32
  console.log(`Parsing exposé des motifs ${exposeDesMotifsFileName}.html…`);
38
33
  }
39
- parsedTexte.exposeDesMotifs = await (0, texte_1.parseExposeDesMotifsFromFile)(exposeDesMotifsFilePath);
34
+ parsedTexte.exposeDesMotifs = await parseExposeDesMotifsFromFile(exposeDesMotifsFilePath);
40
35
  }
41
- fs_extra_1.default.writeJSONSync(path_1.default.join(transformedTexteDir, `${parsedFilePath.name}.json`), parsedTexte, { spaces: 2 });
36
+ fs.writeJSONSync(path.join(transformedTexteDir, `${parsedFilePath.name}.json`), parsedTexte, { spaces: 2 });
42
37
  }
43
38
  }
44
39
  main()
@@ -1,19 +1,14 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- const assert_1 = __importDefault(require("assert"));
7
- const command_line_args_1 = __importDefault(require("command-line-args"));
8
- const fs_extra_1 = __importDefault(require("fs-extra"));
9
- const path_1 = __importDefault(require("path"));
10
- const loaders_1 = require("../loaders");
11
- const texte_1 = require("../model/texte");
12
- const datautil_1 = require("./datautil");
13
- const cli_helpers_1 = require("./shared/cli_helpers");
14
- const util_1 = require("./shared/util");
1
+ import assert from "assert";
2
+ import commandLineArgs from "command-line-args";
3
+ import fs from "fs-extra";
4
+ import path from "path";
5
+ import { iterLoadSenatDossiersLegislatifsRapportUrls, iterLoadSenatDossiersLegislatifsTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER, TEXTE_TRANSFORMED_FOLDER, } from "../loaders";
6
+ import { parseExposeDesMotifs, parseTexte, parseTexteFromFile, } from "../model/texte";
7
+ import { UNDEFINED_SESSION } from "./datautil";
8
+ import { commonOptions } from "./shared/cli_helpers";
9
+ import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue, } from "./shared/util";
15
10
  const optionsDefinitions = [
16
- ...cli_helpers_1.commonOptions,
11
+ ...commonOptions,
17
12
  {
18
13
  help: "sessions of textes to retrieve; leave empty for all",
19
14
  multiple: true,
@@ -44,14 +39,14 @@ const optionsDefinitions = [
44
39
  type: Boolean,
45
40
  },
46
41
  ];
47
- const options = (0, command_line_args_1.default)(optionsDefinitions);
42
+ const options = commandLineArgs(optionsDefinitions);
48
43
  const textDecoder = new TextDecoder("utf8");
49
44
  async function retrieveDocument(documentUrl) {
50
45
  if (!options["silent"]) {
51
46
  console.log(`Retrieving document ${documentUrl}…`);
52
47
  }
53
48
  try {
54
- const response = await (0, util_1.fetchWithRetry)(documentUrl);
49
+ const response = await fetchWithRetry(documentUrl);
55
50
  if (!response.ok) {
56
51
  if (response.status === 404) {
57
52
  console.warn(`Texte ${documentUrl} not found`);
@@ -69,36 +64,36 @@ async function retrieveDocument(documentUrl) {
69
64
  }
70
65
  }
71
66
  async function retrieveTextes(dataDir) {
72
- const textesDir = path_1.default.join(dataDir, loaders_1.TEXTE_FOLDER);
73
- fs_extra_1.default.ensureDirSync(textesDir);
74
- const originalTextesDir = path_1.default.join(textesDir, loaders_1.TEXTE_ORIGINAL_FOLDER);
75
- const transformedTextesDir = path_1.default.join(textesDir, loaders_1.TEXTE_TRANSFORMED_FOLDER);
67
+ const textesDir = path.join(dataDir, TEXTE_FOLDER);
68
+ fs.ensureDirSync(textesDir);
69
+ const originalTextesDir = path.join(textesDir, TEXTE_ORIGINAL_FOLDER);
70
+ const transformedTextesDir = path.join(textesDir, TEXTE_TRANSFORMED_FOLDER);
76
71
  if (options["parseDocuments"]) {
77
- (0, util_1.ensureAndClearDir)(transformedTextesDir);
72
+ ensureAndClearDir(transformedTextesDir);
78
73
  }
79
74
  let retrievedTextesCount = 0;
80
75
  const texteUrlsNotFoundOrError = [];
81
76
  const texteUrlsParseError = [];
82
77
  for (const session of options["sessions"]) {
83
- for (const { item: texteMetadata, } of (0, loaders_1.iterLoadSenatDossiersLegislatifsTexteUrls)(dataDir, session)) {
84
- const texteDir = path_1.default.join(originalTextesDir, `${texteMetadata.session ?? datautil_1.UNDEFINED_SESSION}`, texteMetadata.name);
85
- fs_extra_1.default.ensureDirSync(texteDir);
78
+ for (const { item: texteMetadata, } of iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session)) {
79
+ const texteDir = path.join(originalTextesDir, `${texteMetadata.session ?? UNDEFINED_SESSION}`, texteMetadata.name);
80
+ fs.ensureDirSync(texteDir);
86
81
  let exposeDesMotifsContent = null;
87
82
  if (texteMetadata.url_expose_des_motifs) {
88
83
  if (!options["silent"]) {
89
84
  console.log("Retrieving exposé des motifs…");
90
85
  }
91
- const exposeDesMotifsPath = path_1.default.join(texteDir, `${texteMetadata.name}-expose.html`);
86
+ const exposeDesMotifsPath = path.join(texteDir, `${texteMetadata.name}-expose.html`);
92
87
  exposeDesMotifsContent = await retrieveDocument(texteMetadata.url_expose_des_motifs.toString());
93
88
  if (!exposeDesMotifsContent) {
94
89
  continue;
95
90
  }
96
- fs_extra_1.default.writeFileSync(exposeDesMotifsPath, Buffer.from(exposeDesMotifsContent));
91
+ fs.writeFileSync(exposeDesMotifsPath, Buffer.from(exposeDesMotifsContent));
97
92
  }
98
- if ((0, util_1.isOptionEmptyOrHasValue)(options["formats"], "xml")) {
99
- const textePath = path_1.default.join(texteDir, `${texteMetadata.name}.xml`);
93
+ if (isOptionEmptyOrHasValue(options["formats"], "xml")) {
94
+ const textePath = path.join(texteDir, `${texteMetadata.name}.xml`);
100
95
  let texteBuffer = null;
101
- if (!options["force"] && fs_extra_1.default.existsSync(textePath)) {
96
+ if (!options["force"] && fs.existsSync(textePath)) {
102
97
  if (!options["silent"]) {
103
98
  console.info(`Already retrieved texte ${textePath}…`);
104
99
  }
@@ -109,7 +104,7 @@ async function retrieveTextes(dataDir) {
109
104
  texteUrlsNotFoundOrError.push(texteMetadata.url_xml);
110
105
  continue;
111
106
  }
112
- fs_extra_1.default.writeFileSync(textePath, Buffer.from(texteBuffer));
107
+ fs.writeFileSync(textePath, Buffer.from(texteBuffer));
113
108
  retrievedTextesCount++;
114
109
  }
115
110
  if (options["parseDocuments"]) {
@@ -119,10 +114,10 @@ async function retrieveTextes(dataDir) {
119
114
  let parsedTexte = null;
120
115
  if (texteBuffer) {
121
116
  const texteXml = textDecoder.decode(texteBuffer);
122
- parsedTexte = (0, texte_1.parseTexte)(texteXml);
117
+ parsedTexte = parseTexte(texteXml);
123
118
  }
124
119
  else {
125
- parsedTexte = await (0, texte_1.parseTexteFromFile)(textePath);
120
+ parsedTexte = await parseTexteFromFile(textePath);
126
121
  }
127
122
  if (!parsedTexte) {
128
123
  texteUrlsParseError.push(texteMetadata.url_xml);
@@ -134,16 +129,16 @@ async function retrieveTextes(dataDir) {
134
129
  }
135
130
  const exposeDesMotifsHtml = textDecoder.decode(exposeDesMotifsContent);
136
131
  parsedTexte.exposeDesMotifs =
137
- (0, texte_1.parseExposeDesMotifs)(exposeDesMotifsHtml);
132
+ parseExposeDesMotifs(exposeDesMotifsHtml);
138
133
  }
139
- const transformedTexteDir = path_1.default.join(transformedTextesDir, `${texteMetadata.session ?? datautil_1.UNDEFINED_SESSION}`, texteMetadata.name);
140
- fs_extra_1.default.ensureDirSync(transformedTexteDir);
141
- fs_extra_1.default.writeJSONSync(path_1.default.join(transformedTexteDir, `${texteMetadata.name}.json`), parsedTexte, { spaces: 2 });
134
+ const transformedTexteDir = path.join(transformedTextesDir, `${texteMetadata.session ?? UNDEFINED_SESSION}`, texteMetadata.name);
135
+ fs.ensureDirSync(transformedTexteDir);
136
+ fs.writeJSONSync(path.join(transformedTexteDir, `${texteMetadata.name}.json`), parsedTexte, { spaces: 2 });
142
137
  }
143
138
  }
144
- if ((0, util_1.isOptionEmptyOrHasValue)(options["formats"], "html")) {
145
- const textePath = path_1.default.join(texteDir, `${texteMetadata.name}.html`);
146
- if (!options["force"] && fs_extra_1.default.existsSync(textePath)) {
139
+ if (isOptionEmptyOrHasValue(options["formats"], "html")) {
140
+ const textePath = path.join(texteDir, `${texteMetadata.name}.html`);
141
+ if (!options["force"] && fs.existsSync(textePath)) {
147
142
  if (!options["silent"]) {
148
143
  console.info(`Already retrieved texte ${textePath}…`);
149
144
  }
@@ -154,13 +149,13 @@ async function retrieveTextes(dataDir) {
154
149
  texteUrlsNotFoundOrError.push(texteMetadata.url_html);
155
150
  continue;
156
151
  }
157
- fs_extra_1.default.writeFileSync(textePath, Buffer.from(texteBuffer));
152
+ fs.writeFileSync(textePath, Buffer.from(texteBuffer));
158
153
  retrievedTextesCount++;
159
154
  }
160
155
  }
161
- if ((0, util_1.isOptionEmptyOrHasValue)(options["formats"], "pdf")) {
162
- const textePath = path_1.default.join(texteDir, `${texteMetadata.name}.pdf`);
163
- if (!options["force"] && fs_extra_1.default.existsSync(textePath)) {
156
+ if (isOptionEmptyOrHasValue(options["formats"], "pdf")) {
157
+ const textePath = path.join(texteDir, `${texteMetadata.name}.pdf`);
158
+ if (!options["force"] && fs.existsSync(textePath)) {
164
159
  if (!options["silent"]) {
165
160
  console.info(`Already retrieved texte ${textePath}…`);
166
161
  }
@@ -171,7 +166,7 @@ async function retrieveTextes(dataDir) {
171
166
  texteUrlsNotFoundOrError.push(texteMetadata.url_pdf);
172
167
  continue;
173
168
  }
174
- fs_extra_1.default.writeFileSync(textePath, Buffer.from(texteBuffer));
169
+ fs.writeFileSync(textePath, Buffer.from(texteBuffer));
175
170
  retrievedTextesCount++;
176
171
  }
177
172
  }
@@ -186,17 +181,17 @@ async function retrieveTextes(dataDir) {
186
181
  }
187
182
  }
188
183
  async function retrieveRapports(dataDir) {
189
- const rapportsDir = path_1.default.join(dataDir, loaders_1.RAPPORT_FOLDER);
190
- fs_extra_1.default.ensureDirSync(rapportsDir);
184
+ const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
185
+ fs.ensureDirSync(rapportsDir);
191
186
  let retrievedRapportsCount = 0;
192
187
  const rapportUrlsNotFoundOrError = [];
193
188
  for (const session of options["sessions"]) {
194
- for (const { item: rapportMetadata, } of (0, loaders_1.iterLoadSenatDossiersLegislatifsRapportUrls)(dataDir, session)) {
195
- const rapportDir = path_1.default.join(rapportsDir, `${rapportMetadata.session ?? datautil_1.UNDEFINED_SESSION}`, rapportMetadata.name);
196
- fs_extra_1.default.ensureDirSync(rapportDir);
197
- if ((0, util_1.isOptionEmptyOrHasValue)(options["formats"], "html")) {
198
- const rapportPath = path_1.default.join(rapportDir, `${rapportMetadata.name}.html`);
199
- if (!options["force"] && fs_extra_1.default.existsSync(rapportPath)) {
189
+ for (const { item: rapportMetadata, } of iterLoadSenatDossiersLegislatifsRapportUrls(dataDir, session)) {
190
+ const rapportDir = path.join(rapportsDir, `${rapportMetadata.session ?? UNDEFINED_SESSION}`, rapportMetadata.name);
191
+ fs.ensureDirSync(rapportDir);
192
+ if (isOptionEmptyOrHasValue(options["formats"], "html")) {
193
+ const rapportPath = path.join(rapportDir, `${rapportMetadata.name}.html`);
194
+ if (!options["force"] && fs.existsSync(rapportPath)) {
200
195
  if (!options["silent"]) {
201
196
  console.info(`Already retrieved rapport ${rapportPath}…`);
202
197
  }
@@ -207,12 +202,12 @@ async function retrieveRapports(dataDir) {
207
202
  rapportUrlsNotFoundOrError.push(rapportMetadata.url_html);
208
203
  continue;
209
204
  }
210
- fs_extra_1.default.writeFileSync(rapportPath, Buffer.from(rapportBuffer));
205
+ fs.writeFileSync(rapportPath, Buffer.from(rapportBuffer));
211
206
  retrievedRapportsCount++;
212
207
  }
213
- if ((0, util_1.isOptionEmptyOrHasValue)(options["formats"], "pdf")) {
214
- const rapportPath = path_1.default.join(rapportDir, `${rapportMetadata.name}.pdf`);
215
- if (!options["force"] && fs_extra_1.default.existsSync(rapportPath)) {
208
+ if (isOptionEmptyOrHasValue(options["formats"], "pdf")) {
209
+ const rapportPath = path.join(rapportDir, `${rapportMetadata.name}.pdf`);
210
+ if (!options["force"] && fs.existsSync(rapportPath)) {
216
211
  if (!options["silent"]) {
217
212
  console.info(`Already retrieved rapport ${rapportPath}…`);
218
213
  }
@@ -223,7 +218,7 @@ async function retrieveRapports(dataDir) {
223
218
  rapportUrlsNotFoundOrError.push(rapportMetadata.url_pdf);
224
219
  continue;
225
220
  }
226
- fs_extra_1.default.writeFileSync(rapportPath, Buffer.from(rapportBuffer));
221
+ fs.writeFileSync(rapportPath, Buffer.from(rapportBuffer));
227
222
  retrievedRapportsCount++;
228
223
  }
229
224
  }
@@ -235,12 +230,12 @@ async function retrieveRapports(dataDir) {
235
230
  }
236
231
  async function main() {
237
232
  const dataDir = options["dataDir"];
238
- (0, assert_1.default)(dataDir, "Missing argument: data directory");
233
+ assert(dataDir, "Missing argument: data directory");
239
234
  console.time("documents processing time");
240
- if ((0, util_1.isOptionEmptyOrHasValue)(options["types"], "textes")) {
235
+ if (isOptionEmptyOrHasValue(options["types"], "textes")) {
241
236
  await retrieveTextes(dataDir);
242
237
  }
243
- if ((0, util_1.isOptionEmptyOrHasValue)(options["types"], "rapports")) {
238
+ if (isOptionEmptyOrHasValue(options["types"], "rapports")) {
244
239
  await retrieveRapports(dataDir);
245
240
  }
246
241
  if (!options["silent"]) {