@tricoteuses/senat 1.3.5 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -43,19 +43,22 @@ npm run data:download ../senat-data -- [--categories All]
43
43
 
44
44
  Data from other sources is also available :
45
45
  ```bash
46
- # Retrieval of sénateurs' pictures from Sénat's website
47
- npm run data:retrieve_senateurs_photos ../senat-data
48
-
49
46
  # Retrieval of textes and rapports from Sénat's website
50
47
  # Available options for optional `formats` parameter : xml, html, pdf
51
48
  # Available options for optional `types` parameter : textes, rapports
52
- npm run data:retrieve_documents ../senat-data -- --sessions 2024 2025 [--formats xml pdf] [--types textes]
49
+ npm run data:retrieve_documents ../senat-data -- --fromSession 2023 [--formats xml pdf] [--types textes]
53
50
 
54
51
  # Retrieval & parsing (textes in xml format only for now)
55
- npm run data:retrieve_documents ../senat-data -- --sessions 2024 2025 --parseDocuments
52
+ npm run data:retrieve_documents ../senat-data -- --fromSession 2023 --parseDocuments
56
53
 
57
54
  # Parsing only
58
55
  npm run data:parse_textes_lois ../senat-data
56
+
57
+ # Retrieval of agenda from Sénat's website
58
+ npm run data:retrieve_agenda ../senat-data -- --fromSession 2023
59
+
60
+ # Retrieval of sénateurs' pictures from Sénat's website
61
+ npm run data:retrieve_senateurs_photos ../senat-data
59
62
  ```
60
63
 
61
64
  ## Data download using Docker
@@ -67,21 +70,19 @@ docker volume create senat-data # Create a volume to download the data
67
70
  docker run --name tricoteuses-senat -v senat-data:/app/senat-data -d registry.en-root.org/tricoteuses/tricoteuses-senat:latest
68
71
  ```
69
72
 
70
- Use the environment variable `CATEGORIES` and `SESSIONS` if needed.
73
+ Use the environment variable `CATEGORIES` and `FROM_SESSION` if needed.
71
74
 
72
75
  ## Using the data
73
76
 
74
77
  Once the data is downloaded, you can use loaders to retrieve it.
75
78
  To use loaders in your project, you can install the _@tricoteuses/senat_ package, and import the iterator functions that you need.
76
79
 
77
- _Only iterator for Questions, Sénateurs and Circonscriptions available for now_
78
-
79
80
  ```bash
80
81
  npm install @tricoteuses/senat
81
82
  ```
82
83
 
83
84
  ```js
84
- import { iterLoadSenatQuestions } from "@tricoteuses/senat/lib/loaders"
85
+ import { iterLoadSenatQuestions } from "@tricoteuses/senat/loaders"
85
86
 
86
87
  // Pass data directory and legislature as arguments
87
88
  for (const { item: question } of iterLoadSenatQuestions("../senat-data", 17)) {
package/lib/datasets.d.ts CHANGED
@@ -21,8 +21,9 @@ export declare enum EnabledDatasets {
21
21
  Questions = 8,
22
22
  Sens = 16,
23
23
  PhotosSenateurs = 32,
24
- All = 63
24
+ Agenda = 64,
25
+ All = 127
25
26
  }
26
27
  export declare const datasets: Datasets;
27
- export declare function getEnabledDatasets(categories: any): EnabledDatasets;
28
- export declare function getChosenFromEnabledDatasets(categories: any): Dataset[];
28
+ export declare function getEnabledDatasets(categories: string[]): EnabledDatasets;
29
+ export declare function getChosenDatasets(enabledDatasets: EnabledDatasets): Dataset[];
package/lib/datasets.js CHANGED
@@ -10,7 +10,8 @@ export var EnabledDatasets;
10
10
  EnabledDatasets[EnabledDatasets["Questions"] = 8] = "Questions";
11
11
  EnabledDatasets[EnabledDatasets["Sens"] = 16] = "Sens";
12
12
  EnabledDatasets[EnabledDatasets["PhotosSenateurs"] = 32] = "PhotosSenateurs";
13
- EnabledDatasets[EnabledDatasets["All"] = 63] = "All";
13
+ EnabledDatasets[EnabledDatasets["Agenda"] = 64] = "Agenda";
14
+ EnabledDatasets[EnabledDatasets["All"] = 127] = "All";
14
15
  })(EnabledDatasets || (EnabledDatasets = {}));
15
16
  export const datasets = {
16
17
  ameli: {
@@ -68,8 +69,7 @@ export function getEnabledDatasets(categories) {
68
69
  return enabledDatasets | (enabledDataset || EnabledDatasets.None);
69
70
  }, EnabledDatasets.None);
70
71
  }
71
- export function getChosenFromEnabledDatasets(categories) {
72
- const enabledDatasets = getEnabledDatasets(categories);
72
+ export function getChosenDatasets(enabledDatasets) {
73
73
  return [
74
74
  enabledDatasets & EnabledDatasets.Ameli ? datasets.ameli : null,
75
75
  enabledDatasets & EnabledDatasets.Debats ? datasets.debats : null,
package/lib/index.d.ts CHANGED
@@ -2,12 +2,9 @@ export type { AmendementResult, } from "./model/ameli";
2
2
  export type { DossierLegislatifResult, } from "./model/dosleg";
3
3
  export type { QuestionResult, } from "./model/questions";
4
4
  export type { CirconscriptionResult, OrganismeResult, SenateurResult, } from "./model/sens";
5
- export { sesFieldsToParseInt, sesFieldsToTrim, subFieldsToParseInt, subFieldsToTrim, txtAmeliFieldsToTrim, } from "./types/ameli";
6
5
  export type { Ses, Sub, TxtAmeli } from "./types/ameli";
7
- export { debatsFieldsToTrim, lecassdebFieldsToTrim } from "./types/debats";
8
6
  export type { Debat, LecAssDeb } from "./types/debats";
9
- export { assFieldsToTrim, audFieldsToTrim, auteurFieldsToTrim, dateSeanceFieldsToTrim, deccocFieldsToTrim, denrapFieldsToTrim, docattFieldsToParseInt, docattFieldsToTrim, ecrFieldsToTrim, etaloiFieldsToTrim, lecassFieldsToTrim, lecassrapFieldsToTrim, lectureFieldsToTrim, loiFieldsToTrim, orgFieldsToTrim, oritxtFieldsToTrim, quaFieldsToTrim, rapFieldsToParseInt, rapFieldsToTrim, raporgFieldsToTrim, scrFieldsToTrim, texteFieldsToParseInt, texteFieldsToTrim, typattFieldsToTrim, typlecFieldsToTrim, typloiFieldsToTrim, typtxtFieldsToTrim, typurlFieldsToTrim, } from "./types/dosleg";
10
7
  export type { Ass, Aud, Auteur, DateSeance, DecCoc, DenRap, DocAtt, Ecr, EtaLoi, LecAss, LecAssRap, Lecture, Loi, Org, OriTxt, Qua, Rap, RapOrg, Scr, Texte, TypAtt, TypLec, TypLoi, TypTxt, TypUrl, } from "./types/dosleg";
11
8
  export type { Photo, Sen } from "./types/sens";
12
- export { Session } from "./types/sessions";
9
+ export type { Session } from "./types/sessions";
13
10
  export type { Alinea, Article, Division, DivisionTag, FlatTexte } from "./types/texte";
package/lib/index.js CHANGED
@@ -1,4 +1 @@
1
- export { sesFieldsToParseInt, sesFieldsToTrim, subFieldsToParseInt, subFieldsToTrim, txtAmeliFieldsToTrim, } from "./types/ameli";
2
- export { debatsFieldsToTrim, lecassdebFieldsToTrim } from "./types/debats";
3
- export { assFieldsToTrim, audFieldsToTrim, auteurFieldsToTrim, dateSeanceFieldsToTrim, deccocFieldsToTrim, denrapFieldsToTrim, docattFieldsToParseInt, docattFieldsToTrim, ecrFieldsToTrim, etaloiFieldsToTrim, lecassFieldsToTrim, lecassrapFieldsToTrim, lectureFieldsToTrim, loiFieldsToTrim, orgFieldsToTrim, oritxtFieldsToTrim, quaFieldsToTrim, rapFieldsToParseInt, rapFieldsToTrim, raporgFieldsToTrim, scrFieldsToTrim, texteFieldsToParseInt, texteFieldsToTrim, typattFieldsToTrim, typlecFieldsToTrim, typloiFieldsToTrim, typtxtFieldsToTrim, typurlFieldsToTrim, } from "./types/dosleg";
4
- export { Session } from "./types/sessions";
1
+ export {};
package/lib/loaders.d.ts CHANGED
@@ -4,15 +4,16 @@ import { QuestionResult } from "./model/questions";
4
4
  import { CirconscriptionResult, OrganismeResult, SenateurResult } from "./model/sens";
5
5
  import { FlatTexte } from "./types/texte";
6
6
  export { EnabledDatasets } from "./datasets";
7
+ export declare const AGENDA_FOLDER = "agenda";
7
8
  export declare const DOSLEG_DOSSIERS_FOLDER = "dossiers";
9
+ export declare const RAPPORT_FOLDER = "rap";
8
10
  export declare const SENS_CIRCONSCRIPTIONS_FOLDER = "circonscriptions";
9
11
  export declare const SENS_ORGANISMES_FOLDER = "organismes";
10
12
  export declare const SENS_SENATEURS_FOLDER = "senateurs";
11
13
  export declare const TEXTE_FOLDER = "leg";
12
- export declare const TEXTE_ORIGINAL_FOLDER = "original";
13
- export declare const TEXTE_TRANSFORMED_FOLDER = "transformed";
14
+ export declare const DATA_ORIGINAL_FOLDER = "original";
15
+ export declare const DATA_TRANSFORMED_FOLDER = "transformed";
14
16
  export declare const DOCUMENT_METADATA_FILE = "metadata.json";
15
- export declare const RAPPORT_FOLDER = "rap";
16
17
  type IterItem<T> = {
17
18
  item: T;
18
19
  filePathFromDataset?: string;
package/lib/loaders.js CHANGED
@@ -2,17 +2,18 @@ import fs from "fs";
2
2
  import path from "path";
3
3
  import legislatures from "./legislatures.json";
4
4
  import { datasets } from "./datasets";
5
- import { UNDEFINED_SESSION } from "./scripts/datautil";
5
+ import { UNDEFINED_SESSION } from "./types/sessions";
6
6
  export { EnabledDatasets } from "./datasets";
7
+ export const AGENDA_FOLDER = "agenda";
7
8
  export const DOSLEG_DOSSIERS_FOLDER = "dossiers";
9
+ export const RAPPORT_FOLDER = "rap";
8
10
  export const SENS_CIRCONSCRIPTIONS_FOLDER = "circonscriptions";
9
11
  export const SENS_ORGANISMES_FOLDER = "organismes";
10
12
  export const SENS_SENATEURS_FOLDER = "senateurs";
11
13
  export const TEXTE_FOLDER = "leg";
12
- export const TEXTE_ORIGINAL_FOLDER = "original";
13
- export const TEXTE_TRANSFORMED_FOLDER = "transformed";
14
+ export const DATA_ORIGINAL_FOLDER = "original";
15
+ export const DATA_TRANSFORMED_FOLDER = "transformed";
14
16
  export const DOCUMENT_METADATA_FILE = "metadata.json";
15
- export const RAPPORT_FOLDER = "rap";
16
17
  export function* iterFilePaths(dirPath) {
17
18
  if (dirPath && fs.existsSync(dirPath)) {
18
19
  const files = fs.readdirSync(dirPath, {
@@ -59,7 +60,7 @@ export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}
59
60
  }
60
61
  }
61
62
  export function* iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session) {
62
- let itemsDir = path.join(dataDir, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER);
63
+ let itemsDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
63
64
  if (session) {
64
65
  itemsDir = path.join(itemsDir, session.toString());
65
66
  }
@@ -121,7 +122,7 @@ export function* iterLoadSenatDossiersLegislatifsTextes(dataDir, session, option
121
122
  export function loadSenatTexteContent(dataDir, textePathFromDataset) {
122
123
  const parsedTextePath = path.parse(textePathFromDataset);
123
124
  const jsonTexteName = `${parsedTextePath.name}.json`;
124
- const fullTextePath = path.join(dataDir, TEXTE_FOLDER, TEXTE_TRANSFORMED_FOLDER, parsedTextePath.dir, jsonTexteName);
125
+ const fullTextePath = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER, parsedTextePath.dir, jsonTexteName);
125
126
  if (!fs.existsSync(fullTextePath)) {
126
127
  return { item: null };
127
128
  }
@@ -0,0 +1,2 @@
1
+ import { AgendaEvent } from "../types/agenda";
2
+ export declare function parseAgendaFromFile(htmlFilePath: string): Promise<AgendaEvent[] | null>;
@@ -0,0 +1,85 @@
1
+ import { JSDOM } from "jsdom";
2
+ import { DateTime } from "luxon";
3
+ function getEventType(eventClasses) {
4
+ const typeClass = [...eventClasses]
5
+ .find(className => className.startsWith("evt-"))
6
+ || null;
7
+ switch (typeClass) {
8
+ case "evt-seance":
9
+ return "Séance publique";
10
+ case "evt-instanz":
11
+ return "Commissions";
12
+ case "evt-cemi":
13
+ return "Mission de contrôle";
14
+ case "evt-deleg":
15
+ return "Offices et délégations";
16
+ case "evt-bureau":
17
+ return "Instances décisionnelles";
18
+ }
19
+ return null;
20
+ }
21
+ function getUrlDossierSenat(lienElements) {
22
+ const urlElement = [...lienElements]
23
+ .find(lienElement => lienElement.textContent?.includes("dossier législatif"));
24
+ return urlElement ? urlElement.getAttribute("href") : null;
25
+ }
26
+ function getHeuresDebutFin(timeStr) {
27
+ const normalizedHeureDebut = timeStr
28
+ ?.replace(/^À l'issue de l'espace réservé .* et au plus tard\s/i, "") // Must be first
29
+ ?.replace(/^(?:le )?matin/i, "10h00")
30
+ ?.replace(/^(?:l')?après-midi/i, "16h00")
31
+ ?.replace(/^(?:le )?soir/i, "20h00")
32
+ ?.replace(/^(?:la )?nuit/i, "22h00")
33
+ ?.replace(/^à\s/ig, "")
34
+ ?.replace(/heures/ig, "h00")
35
+ ?.replace(/\set.*/i, "")
36
+ ?.replace(/,.*/, "")
37
+ ?.replace(/\s\(hors hémicycle\)/i, "")
38
+ ?.replace(/\s*h\s*/ig, "h");
39
+ console.dir(`${timeStr};${normalizedHeureDebut}`);
40
+ const heureDebut = timeStr
41
+ ? DateTime.fromFormat(timeStr, "H'h'mm").toISOTime()
42
+ : null;
43
+ return {
44
+ heureDebut,
45
+ heureFin: null
46
+ };
47
+ }
48
+ function transformAgenda(document) {
49
+ const agendaEvents = [];
50
+ const eventElements = document.querySelectorAll(".evt");
51
+ for (const eventElement of eventElements) {
52
+ const type = getEventType(eventElement.classList);
53
+ const timeOriginal = eventElement.querySelector(".time")?.textContent || null;
54
+ const { heureDebut, heureFin } = getHeuresDebutFin(timeOriginal);
55
+ const titre = eventElement.querySelector(".titre")?.textContent || null;
56
+ const organe = eventElement.querySelector(".organe")?.textContent || null;
57
+ const objet = eventElement.querySelector(".objet")?.textContent || null;
58
+ const lieu = eventElement.querySelector(".lieu")?.textContent || null;
59
+ const url_dossier_senat = getUrlDossierSenat(eventElement.querySelectorAll(".lien a"));
60
+ const url_video = eventElement.querySelector(".video a")?.getAttribute("href") || null;
61
+ agendaEvents.push({
62
+ type,
63
+ heureDebut,
64
+ heureFin,
65
+ timeOriginal,
66
+ titre,
67
+ organe,
68
+ objet,
69
+ lieu,
70
+ url_dossier_senat,
71
+ url_video,
72
+ });
73
+ }
74
+ return agendaEvents;
75
+ }
76
+ export async function parseAgendaFromFile(htmlFilePath) {
77
+ try {
78
+ const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window;
79
+ return transformAgenda(document);
80
+ }
81
+ catch (error) {
82
+ console.error(`Could not parse texte with error ${error}`);
83
+ }
84
+ return null;
85
+ }
@@ -49,7 +49,7 @@ declare const findAllQuery: import("kysely").SelectQueryBuilder<{
49
49
  date_depot: string;
50
50
  dispositif: string | null;
51
51
  objet: string | null;
52
- etat_id: number;
52
+ etat: string;
53
53
  avis_commission: string | null;
54
54
  avis_gouvernement: string | null;
55
55
  sort: string | null;
@@ -86,7 +86,7 @@ export declare function findAll(): AsyncIterableIterator<{
86
86
  date_depot: string;
87
87
  dispositif: string | null;
88
88
  objet: string | null;
89
- etat_id: number;
89
+ etat: string;
90
90
  avis_commission: string | null;
91
91
  avis_gouvernement: string | null;
92
92
  sort: string | null;
@@ -43,7 +43,7 @@ const findAllQuery = dbAmeli
43
43
  .then(val("Motion"))
44
44
  .when("amd.typ", "=", "S")
45
45
  .then(val("Sous-amendement"))
46
- .else("'")
46
+ .else("")
47
47
  .end()
48
48
  .as("nature"),
49
49
  "amd.id as id",
@@ -56,15 +56,23 @@ const findAllQuery = dbAmeli
56
56
  "typsub.lib as subdivision_type",
57
57
  "amd.alinea as alinea",
58
58
  toDateString(ref("amd.datdep")).as("date_depot"),
59
+ "amd.dis as dispositif",
60
+ "amd.obj as objet",
59
61
  eb
60
62
  .case()
63
+ .when("amd.etaid", "=", 7)
64
+ .then(val("Diffusé"))
61
65
  .when("amd.etaid", "=", 8)
62
- .then(val("Cet amendement a été retiré avant séance."))
63
- .else(ref("amd.dis"))
66
+ .then(val("Retiré avant réunion ou séance"))
67
+ .when("amd.etaid", "=", 9)
68
+ .then(val("Examiné en commission ou séance"))
69
+ .when("amd.etaid", "=", 10)
70
+ .then(val("Irrecevable"))
71
+ .when("amd.etaid", "=", 11)
72
+ .then(val("Irrecevable"))
73
+ .else("")
64
74
  .end()
65
- .as("dispositif"),
66
- "amd.obj as objet",
67
- "amd.etaid as etat_id",
75
+ .as("etat"),
68
76
  "avicom.lib as avis_commission",
69
77
  "avigvt.lib as avis_gouvernement",
70
78
  eb.fn.coalesce("sor.lib", "irr.libirr").as("sort"),
@@ -214,12 +214,12 @@ export declare function findAll(): AsyncIterableIterator<{
214
214
  numero_JO: string | null;
215
215
  url_JO: string | null;
216
216
  }>;
217
- export declare function findSenatTexteUrls(sessions?: string[]): AsyncIterableIterator<{
217
+ export declare function findSenatTexteUrls(sessions?: number[]): AsyncIterableIterator<{
218
218
  session: string | null | undefined;
219
219
  url: string;
220
220
  hasExposeDesMotifs: boolean;
221
221
  }>;
222
- export declare function findSenatRapportUrls(sessions?: string[]): AsyncIterableIterator<{
222
+ export declare function findSenatRapportUrls(sessions?: number[]): AsyncIterableIterator<{
223
223
  url: string;
224
224
  session: string | null | undefined;
225
225
  }>;
@@ -170,11 +170,12 @@ export function findAll() {
170
170
  return findAllQuery.stream();
171
171
  }
172
172
  export function findSenatTexteUrls(sessions = []) {
173
+ const sessionsStr = sessions.map(session => String(session));
173
174
  return dbDosleg
174
175
  .selectFrom("texte")
175
176
  .where("texurl", "is not", null)
176
177
  .where("typurl", "=", "I")
177
- .$if(sessions.length > 0, (qb) => qb.where("sesann", "in", sessions))
178
+ .$if(sessions.length > 0, (qb) => qb.where("sesann", "in", sessionsStr))
178
179
  .select(({ eb, ref }) => [
179
180
  "sesann as session",
180
181
  rtrim(ref("texurl")).as("url"),
@@ -190,11 +191,12 @@ export function findSenatTexteUrls(sessions = []) {
190
191
  .stream();
191
192
  }
192
193
  export function findSenatRapportUrls(sessions = []) {
194
+ const sessionsStr = sessions.map(session => String(session));
193
195
  return dbDosleg
194
196
  .selectFrom("rap")
195
197
  .where("rapurl", "is not", null)
196
198
  .where("typurl", "=", "I")
197
- .$if(sessions.length > 0, (qb) => qb.where("sesann", "in", sessions))
199
+ .$if(sessions.length > 0, (qb) => qb.where("sesann", "in", sessionsStr))
198
200
  .select(({ ref }) => [rtrim(ref("rapurl")).as("url"), "sesann as session"])
199
201
  .$narrowType()
200
202
  .stream();
@@ -3,10 +3,11 @@ import commandLineArgs from "command-line-args";
3
3
  import fs from "fs-extra";
4
4
  import path from "path";
5
5
  import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
6
- import { DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER, } from "../loaders";
6
+ import { DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER, } from "../loaders";
7
7
  import { findAllAmendements, findAllCirconscriptions, findAllLois, findAllOrganismes, findAllQuestions, findAllSens, } from "../model";
8
8
  import { findSenatRapportUrls, findSenatTexteUrls } from "../model/dosleg";
9
- import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, UNDEFINED_SESSION, } from "./datautil";
9
+ import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
10
+ import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, } from "./datautil";
10
11
  import { commonOptions } from "./shared/cli_helpers";
11
12
  import { ensureAndClearDir } from "./shared/util";
12
13
  const optionsDefinitions = [...commonOptions];
@@ -16,123 +17,99 @@ const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
16
17
  const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/";
17
18
  const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
18
19
  async function convertData() {
19
- const enabledDatasets = getEnabledDatasets(options["categories"]);
20
20
  const dataDir = options["dataDir"];
21
21
  assert(dataDir, "Missing argument: data directory");
22
+ const enabledDatasets = getEnabledDatasets(options["categories"]);
23
+ const sessions = getSessionsFromStart(options["fromSession"]);
22
24
  console.time("data transformation time");
23
25
  if (enabledDatasets & EnabledDatasets.Ameli) {
24
- const dataset = datasets.ameli;
25
- if (!options["silent"]) {
26
- console.log(`Converting database ${dataset.database} data into files…`);
27
- }
28
- const ameliReorganizedRootDir = path.join(dataDir, dataset.database);
29
- ensureAndClearDir(ameliReorganizedRootDir);
30
- for await (const amendement of findAllAmendements()) {
31
- if (options["verbose"]) {
32
- console.log(`Converting ${amendement.numero} file…`);
33
- }
34
- const session = String(amendement.session) || UNDEFINED_SESSION;
35
- const signetDossierLegislatif = amendement.signet_dossier_legislatif ||
36
- `${amendement.nature_texte}-${amendement.numero_texte}`.toLowerCase();
37
- const ameliReorganizedDir = path.join(ameliReorganizedRootDir, session, signetDossierLegislatif);
38
- fs.ensureDirSync(ameliReorganizedDir);
39
- const amendementFileName = `${amendement.numero}.json`;
40
- fs.writeJSONSync(path.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 });
41
- }
26
+ await convertDatasetAmeli(dataDir);
42
27
  }
43
28
  if (enabledDatasets & EnabledDatasets.DosLeg) {
44
- const dataset = datasets.dosleg;
45
- if (!options["silent"]) {
46
- console.log(`Converting database ${dataset.database} data into files…`);
47
- }
48
- const doslegReorganizedRootDir = path.join(dataDir, dataset.database);
49
- const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER);
50
- ensureAndClearDir(doslegReorganizedRootDir);
51
- ensureAndClearDir(dossiersReorganizedDir);
52
- for await (const loi of findAllLois()) {
53
- if (options["verbose"]) {
54
- console.log(`Converting ${loi.signet} file…`);
55
- }
56
- let loiReorganizedDir = path.join(dossiersReorganizedDir, UNDEFINED_SESSION);
57
- const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups;
58
- if (signetParts && "session" in signetParts) {
59
- const { session } = signetParts;
60
- const formattedSession = formatToFourDigitSession(session);
61
- loiReorganizedDir = path.join(dossiersReorganizedDir, formattedSession);
62
- }
63
- fs.ensureDirSync(loiReorganizedDir);
64
- const loiFileName = `${loi.signet}.json`;
65
- fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, {
66
- spaces: 2,
67
- });
68
- }
69
- await convertTexteUrls(dataDir);
70
- await convertRapportUrls(dataDir);
29
+ await convertDatasetDosLeg(dataDir, sessions);
71
30
  }
72
31
  if (enabledDatasets & EnabledDatasets.Questions) {
73
- const dataset = datasets.questions;
74
- if (!options["silent"]) {
75
- console.log(`Converting database ${dataset.database} data into files…`);
76
- }
77
- const questionsReorganizedRootDir = path.join(dataDir, dataset.database);
78
- ensureAndClearDir(questionsReorganizedRootDir);
79
- for await (const question of findAllQuestions()) {
80
- if (options["verbose"]) {
81
- console.log(`Converting ${question.reference} file…`);
82
- }
83
- const legislature = question.legislature ? question.legislature : 0;
84
- const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature));
85
- fs.ensureDirSync(questionReorganizedDir);
86
- const questionFileName = `${question.reference}.json`;
87
- fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
88
- }
32
+ await convertDatasetQuestions(dataDir);
89
33
  }
90
34
  if (enabledDatasets & EnabledDatasets.Sens) {
91
- const dataset = datasets.sens;
92
- if (!options["silent"]) {
93
- console.log(`Converting database ${dataset.database} data into files…`);
94
- }
95
- const sensReorganizedRootDir = path.join(dataDir, dataset.database);
96
- const senateursReorganizedDir = path.join(sensReorganizedRootDir, SENS_SENATEURS_FOLDER);
97
- const circonscriptionsReorganizedDir = path.join(sensReorganizedRootDir, SENS_CIRCONSCRIPTIONS_FOLDER);
98
- const organismesReorganizedDir = path.join(sensReorganizedRootDir, SENS_ORGANISMES_FOLDER);
99
- ensureAndClearDir(sensReorganizedRootDir);
100
- ensureAndClearDir(senateursReorganizedDir);
101
- ensureAndClearDir(circonscriptionsReorganizedDir);
102
- ensureAndClearDir(organismesReorganizedDir);
103
- for await (const sen of findAllSens()) {
104
- if (options["verbose"]) {
105
- console.log(`Converting ${sen.matricule} file…`);
106
- }
107
- const senFileName = `${sen.matricule}.json`;
108
- fs.writeJSONSync(path.join(senateursReorganizedDir, senFileName), sen, {
109
- spaces: 2,
110
- });
35
+ await convertDatasetSens(dataDir);
36
+ }
37
+ if (!options["silent"]) {
38
+ console.timeEnd("data transformation time");
39
+ }
40
+ }
41
+ async function convertDatasetAmeli(dataDir) {
42
+ const dataset = datasets.ameli;
43
+ if (!options["silent"]) {
44
+ console.log(`Converting database ${dataset.database} data into files…`);
45
+ }
46
+ const ameliReorganizedRootDir = path.join(dataDir, dataset.database);
47
+ ensureAndClearDir(ameliReorganizedRootDir);
48
+ for await (const amendement of findAllAmendements()) {
49
+ if (options["verbose"]) {
50
+ console.log(`Converting ${amendement.numero} file…`);
111
51
  }
112
- for await (const circonscription of findAllCirconscriptions()) {
113
- if (options["verbose"]) {
114
- console.log(`Converting ${circonscription.identifiant} file…`);
115
- }
116
- const circonscriptionFileName = `${circonscription.identifiant}.json`;
117
- fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 });
52
+ const session = String(amendement.session) || UNDEFINED_SESSION;
53
+ const signetDossierLegislatif = amendement.signet_dossier_legislatif ||
54
+ `${amendement.nature_texte}-${amendement.numero_texte}`.toLowerCase();
55
+ const ameliReorganizedDir = path.join(ameliReorganizedRootDir, String(session), signetDossierLegislatif);
56
+ fs.ensureDirSync(ameliReorganizedDir);
57
+ const amendementFileName = `${amendement.numero}.json`;
58
+ fs.writeJSONSync(path.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 });
59
+ }
60
+ }
61
+ async function convertDatasetDosLeg(dataDir, sessions) {
62
+ const dataset = datasets.dosleg;
63
+ if (!options["silent"]) {
64
+ console.log(`Converting database ${dataset.database} data into files…`);
65
+ }
66
+ const doslegReorganizedRootDir = path.join(dataDir, dataset.database);
67
+ const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER);
68
+ ensureAndClearDir(doslegReorganizedRootDir);
69
+ ensureAndClearDir(dossiersReorganizedDir);
70
+ for await (const loi of findAllLois()) {
71
+ if (options["verbose"]) {
72
+ console.log(`Converting ${loi.signet} file…`);
118
73
  }
119
- for await (const organisme of findAllOrganismes()) {
120
- if (options["verbose"]) {
121
- console.log(`Converting ${organisme.code} file…`);
122
- }
123
- const organismeFileName = `${organisme.code}.json`;
124
- fs.writeJSONSync(path.join(organismesReorganizedDir, organismeFileName), organisme, { spaces: 2 });
74
+ let loiReorganizedDir = path.join(dossiersReorganizedDir, String(UNDEFINED_SESSION));
75
+ const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups;
76
+ if (signetParts && "session" in signetParts) {
77
+ const { session } = signetParts;
78
+ const formattedSession = formatToFourDigitSession(session);
79
+ loiReorganizedDir = path.join(dossiersReorganizedDir, String(formattedSession));
125
80
  }
81
+ fs.ensureDirSync(loiReorganizedDir);
82
+ const loiFileName = `${loi.signet}.json`;
83
+ fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, {
84
+ spaces: 2,
85
+ });
126
86
  }
87
+ await convertTexteUrls(dataDir, sessions);
88
+ await convertRapportUrls(dataDir, sessions);
89
+ }
90
+ async function convertDatasetQuestions(dataDir) {
91
+ const dataset = datasets.questions;
127
92
  if (!options["silent"]) {
128
- console.timeEnd("data transformation time");
93
+ console.log(`Converting database ${dataset.database} data into files…`);
94
+ }
95
+ const questionsReorganizedRootDir = path.join(dataDir, dataset.database);
96
+ ensureAndClearDir(questionsReorganizedRootDir);
97
+ for await (const question of findAllQuestions()) {
98
+ if (options["verbose"]) {
99
+ console.log(`Converting ${question.reference} file…`);
100
+ }
101
+ const legislature = question.legislature ? question.legislature : 0;
102
+ const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature));
103
+ fs.ensureDirSync(questionReorganizedDir);
104
+ const questionFileName = `${question.reference}.json`;
105
+ fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
129
106
  }
130
107
  }
131
- async function convertTexteUrls(dataDir) {
108
+ async function convertTexteUrls(dataDir, sessions) {
132
109
  const textesDir = path.join(dataDir, TEXTE_FOLDER);
133
110
  fs.ensureDirSync(textesDir);
134
- const originalTextesDir = path.join(textesDir, TEXTE_ORIGINAL_FOLDER);
135
- for await (const texte of findSenatTexteUrls(options["sessions"])) {
111
+ const originalTextesDir = path.join(textesDir, DATA_ORIGINAL_FOLDER);
112
+ for await (const texte of findSenatTexteUrls(sessions)) {
136
113
  const texteName = path.parse(texte.url).name;
137
114
  const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName);
138
115
  fs.ensureDirSync(texteDir);
@@ -151,10 +128,10 @@ async function convertTexteUrls(dataDir) {
151
128
  });
152
129
  }
153
130
  }
154
- async function convertRapportUrls(dataDir) {
131
+ async function convertRapportUrls(dataDir, sessions) {
155
132
  const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
156
133
  fs.ensureDirSync(rapportsDir);
157
- for await (const rapport of findSenatRapportUrls(options["sessions"])) {
134
+ for await (const rapport of findSenatRapportUrls(sessions)) {
158
135
  const parsedRapportUrl = path.parse(rapport.url);
159
136
  const rapportName = parsedRapportUrl.name;
160
137
  const rapportDir = path.join(rapportsDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName);
@@ -180,6 +157,43 @@ async function convertRapportUrls(dataDir) {
180
157
  });
181
158
  }
182
159
  }
160
+ async function convertDatasetSens(dataDir) {
161
+ const dataset = datasets.sens;
162
+ if (!options["silent"]) {
163
+ console.log(`Converting database ${dataset.database} data into files…`);
164
+ }
165
+ const sensReorganizedRootDir = path.join(dataDir, dataset.database);
166
+ const senateursReorganizedDir = path.join(sensReorganizedRootDir, SENS_SENATEURS_FOLDER);
167
+ const circonscriptionsReorganizedDir = path.join(sensReorganizedRootDir, SENS_CIRCONSCRIPTIONS_FOLDER);
168
+ const organismesReorganizedDir = path.join(sensReorganizedRootDir, SENS_ORGANISMES_FOLDER);
169
+ ensureAndClearDir(sensReorganizedRootDir);
170
+ ensureAndClearDir(senateursReorganizedDir);
171
+ ensureAndClearDir(circonscriptionsReorganizedDir);
172
+ ensureAndClearDir(organismesReorganizedDir);
173
+ for await (const sen of findAllSens()) {
174
+ if (options["verbose"]) {
175
+ console.log(`Converting ${sen.matricule} file…`);
176
+ }
177
+ const senFileName = `${sen.matricule}.json`;
178
+ fs.writeJSONSync(path.join(senateursReorganizedDir, senFileName), sen, {
179
+ spaces: 2,
180
+ });
181
+ }
182
+ for await (const circonscription of findAllCirconscriptions()) {
183
+ if (options["verbose"]) {
184
+ console.log(`Converting ${circonscription.identifiant} file…`);
185
+ }
186
+ const circonscriptionFileName = `${circonscription.identifiant}.json`;
187
+ fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 });
188
+ }
189
+ for await (const organisme of findAllOrganismes()) {
190
+ if (options["verbose"]) {
191
+ console.log(`Converting ${organisme.code} file…`);
192
+ }
193
+ const organismeFileName = `${organisme.code}.json`;
194
+ fs.writeJSONSync(path.join(organismesReorganizedDir, organismeFileName), organisme, { spaces: 2 });
195
+ }
196
+ }
183
197
  convertData()
184
198
  .then(() => process.exit(0))
185
199
  .catch((error) => {
@@ -1,5 +1,4 @@
1
1
  export declare const SIGNET_STRUCTURE_REGEXP: RegExp;
2
2
  export declare const AKN_IDENTIFICATION_STRUCTURE_REGEXP: RegExp;
3
3
  export declare const AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP: RegExp;
4
- export declare const UNDEFINED_SESSION = "0";
5
- export declare function formatToFourDigitSession(session: string): string;
4
+ export declare function formatToFourDigitSession(session: string): string | 0;