@tricoteuses/senat 2.22.4 → 2.22.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/aggregates.d.ts +52 -0
- package/lib/aggregates.js +930 -0
- package/lib/aggregates.mjs +713 -0
- package/lib/aggregates.ts +833 -0
- package/lib/config.d.ts +10 -0
- package/lib/config.js +16 -0
- package/lib/config.mjs +16 -0
- package/lib/config.ts +26 -0
- package/lib/databases.d.ts +2 -0
- package/lib/databases.js +26 -0
- package/lib/databases.mjs +57 -0
- package/lib/databases.ts +71 -0
- package/lib/datasets.d.ts +34 -0
- package/lib/datasets.js +233 -0
- package/lib/datasets.mjs +78 -0
- package/lib/datasets.ts +118 -0
- package/lib/fields.d.ts +10 -0
- package/lib/fields.js +68 -0
- package/lib/fields.mjs +22 -0
- package/lib/fields.ts +29 -0
- package/lib/git.d.ts +26 -0
- package/lib/git.js +167 -0
- package/lib/index.d.ts +13 -0
- package/lib/index.js +1 -0
- package/lib/index.mjs +7 -0
- package/lib/index.ts +64 -0
- package/lib/inserters.d.ts +98 -0
- package/lib/inserters.js +500 -0
- package/lib/inserters.mjs +360 -0
- package/lib/inserters.ts +521 -0
- package/lib/legislatures.json +38 -0
- package/lib/loaders.d.ts +58 -0
- package/lib/loaders.js +286 -0
- package/lib/loaders.mjs +158 -0
- package/lib/loaders.ts +271 -0
- package/lib/model/agenda.d.ts +6 -0
- package/lib/model/agenda.js +148 -0
- package/lib/model/ameli.d.ts +51 -0
- package/lib/model/ameli.js +149 -0
- package/lib/model/ameli.mjs +84 -0
- package/lib/model/ameli.ts +100 -0
- package/lib/model/commission.d.ts +18 -0
- package/lib/model/commission.js +269 -0
- package/lib/model/debats.d.ts +67 -0
- package/lib/model/debats.js +95 -0
- package/lib/model/debats.mjs +43 -0
- package/lib/model/debats.ts +68 -0
- package/lib/model/documents.d.ts +12 -0
- package/lib/model/documents.js +151 -0
- package/lib/model/dosleg.d.ts +7 -0
- package/lib/model/dosleg.js +326 -0
- package/lib/model/dosleg.mjs +196 -0
- package/lib/model/dosleg.ts +240 -0
- package/lib/model/index.d.ts +7 -0
- package/lib/model/index.js +7 -0
- package/lib/model/index.mjs +5 -0
- package/lib/model/index.ts +15 -0
- package/lib/model/questions.d.ts +45 -0
- package/lib/model/questions.js +89 -0
- package/lib/model/questions.mjs +71 -0
- package/lib/model/questions.ts +93 -0
- package/lib/model/scrutins.d.ts +13 -0
- package/lib/model/scrutins.js +114 -0
- package/lib/model/seance.d.ts +3 -0
- package/lib/model/seance.js +267 -0
- package/lib/model/sens.d.ts +146 -0
- package/lib/model/sens.js +454 -0
- package/lib/model/sens.mjs +415 -0
- package/lib/model/sens.ts +516 -0
- package/lib/model/texte.d.ts +7 -0
- package/lib/model/texte.js +256 -0
- package/lib/model/texte.mjs +208 -0
- package/lib/model/texte.ts +229 -0
- package/lib/model/util.d.ts +9 -0
- package/lib/model/util.js +38 -0
- package/lib/model/util.mjs +19 -0
- package/lib/model/util.ts +32 -0
- package/lib/parsers/texte.d.ts +7 -0
- package/lib/parsers/texte.js +228 -0
- package/lib/raw_types/ameli.d.ts +914 -0
- package/lib/raw_types/ameli.js +5 -0
- package/lib/raw_types/ameli.mjs +163 -0
- package/lib/raw_types/debats.d.ts +207 -0
- package/lib/raw_types/debats.js +5 -0
- package/lib/raw_types/debats.mjs +58 -0
- package/lib/raw_types/dosleg.d.ts +1619 -0
- package/lib/raw_types/dosleg.js +5 -0
- package/lib/raw_types/dosleg.mjs +438 -0
- package/lib/raw_types/questions.d.ts +419 -0
- package/lib/raw_types/questions.js +5 -0
- package/lib/raw_types/questions.mjs +11 -0
- package/lib/raw_types/senat.d.ts +11368 -0
- package/lib/raw_types/senat.js +5 -0
- package/lib/raw_types/sens.d.ts +8248 -0
- package/lib/raw_types/sens.js +5 -0
- package/lib/raw_types/sens.mjs +508 -0
- package/lib/raw_types_kysely/ameli.d.ts +915 -0
- package/lib/raw_types_kysely/ameli.js +7 -0
- package/lib/raw_types_kysely/ameli.mjs +5 -0
- package/lib/raw_types_kysely/ameli.ts +951 -0
- package/lib/raw_types_kysely/debats.d.ts +207 -0
- package/lib/raw_types_kysely/debats.js +7 -0
- package/lib/raw_types_kysely/debats.mjs +5 -0
- package/lib/raw_types_kysely/debats.ts +222 -0
- package/lib/raw_types_kysely/dosleg.d.ts +3532 -0
- package/lib/raw_types_kysely/dosleg.js +7 -0
- package/lib/raw_types_kysely/dosleg.mjs +5 -0
- package/lib/raw_types_kysely/dosleg.ts +3621 -0
- package/lib/raw_types_kysely/questions.d.ts +414 -0
- package/lib/raw_types_kysely/questions.js +7 -0
- package/lib/raw_types_kysely/questions.mjs +5 -0
- package/lib/raw_types_kysely/questions.ts +426 -0
- package/lib/raw_types_kysely/sens.d.ts +4394 -0
- package/lib/raw_types_kysely/sens.js +7 -0
- package/lib/raw_types_kysely/sens.mjs +5 -0
- package/lib/raw_types_kysely/sens.ts +4499 -0
- package/lib/raw_types_schemats/ameli.d.ts +539 -0
- package/lib/raw_types_schemats/ameli.js +2 -0
- package/lib/raw_types_schemats/ameli.mjs +2 -0
- package/lib/raw_types_schemats/ameli.ts +601 -0
- package/lib/raw_types_schemats/debats.d.ts +127 -0
- package/lib/raw_types_schemats/debats.js +2 -0
- package/lib/raw_types_schemats/debats.mjs +2 -0
- package/lib/raw_types_schemats/debats.ts +145 -0
- package/lib/raw_types_schemats/dosleg.d.ts +977 -0
- package/lib/raw_types_schemats/dosleg.js +2 -0
- package/lib/raw_types_schemats/dosleg.mjs +2 -0
- package/lib/raw_types_schemats/dosleg.ts +2193 -0
- package/lib/raw_types_schemats/questions.d.ts +235 -0
- package/lib/raw_types_schemats/questions.js +2 -0
- package/lib/raw_types_schemats/questions.mjs +2 -0
- package/lib/raw_types_schemats/questions.ts +249 -0
- package/lib/raw_types_schemats/sens.d.ts +6915 -0
- package/lib/raw_types_schemats/sens.js +2 -0
- package/lib/raw_types_schemats/sens.mjs +2 -0
- package/lib/raw_types_schemats/sens.ts +2907 -0
- package/lib/scripts/convert_data.d.ts +1 -0
- package/lib/scripts/convert_data.js +354 -0
- package/lib/scripts/convert_data.mjs +181 -0
- package/lib/scripts/convert_data.ts +243 -0
- package/lib/scripts/data-download.d.ts +1 -0
- package/lib/scripts/data-download.js +12 -0
- package/lib/scripts/datautil.d.ts +8 -0
- package/lib/scripts/datautil.js +34 -0
- package/lib/scripts/datautil.mjs +16 -0
- package/lib/scripts/datautil.ts +19 -0
- package/lib/scripts/images/transparent_150x192.jpg +0 -0
- package/lib/scripts/images/transparent_155x225.jpg +0 -0
- package/lib/scripts/parse_textes.d.ts +1 -0
- package/lib/scripts/parse_textes.js +44 -0
- package/lib/scripts/parse_textes.mjs +46 -0
- package/lib/scripts/parse_textes.ts +65 -0
- package/lib/scripts/retrieve_agenda.d.ts +1 -0
- package/lib/scripts/retrieve_agenda.js +132 -0
- package/lib/scripts/retrieve_cr_commission.d.ts +1 -0
- package/lib/scripts/retrieve_cr_commission.js +364 -0
- package/lib/scripts/retrieve_cr_seance.d.ts +6 -0
- package/lib/scripts/retrieve_cr_seance.js +347 -0
- package/lib/scripts/retrieve_documents.d.ts +3 -0
- package/lib/scripts/retrieve_documents.js +219 -0
- package/lib/scripts/retrieve_documents.mjs +249 -0
- package/lib/scripts/retrieve_documents.ts +298 -0
- package/lib/scripts/retrieve_open_data.d.ts +1 -0
- package/lib/scripts/retrieve_open_data.js +315 -0
- package/lib/scripts/retrieve_open_data.mjs +217 -0
- package/lib/scripts/retrieve_open_data.ts +268 -0
- package/lib/scripts/retrieve_senateurs_photos.d.ts +1 -0
- package/lib/scripts/retrieve_senateurs_photos.js +147 -0
- package/lib/scripts/retrieve_senateurs_photos.mjs +147 -0
- package/lib/scripts/retrieve_senateurs_photos.ts +177 -0
- package/lib/scripts/retrieve_videos.d.ts +1 -0
- package/lib/scripts/retrieve_videos.js +461 -0
- package/lib/scripts/shared/cli_helpers.d.ts +95 -0
- package/lib/scripts/shared/cli_helpers.js +91 -0
- package/lib/scripts/shared/cli_helpers.ts +36 -0
- package/lib/scripts/shared/util.d.ts +4 -0
- package/lib/scripts/shared/util.js +35 -0
- package/lib/scripts/shared/util.ts +33 -0
- package/lib/scripts/test_iter_load.d.ts +1 -0
- package/lib/scripts/test_iter_load.js +12 -0
- package/lib/src/conversion_textes.js +10 -1
- package/lib/src/index.d.ts +1 -1
- package/lib/src/index.js +1 -1
- package/lib/src/loaders.d.ts +1 -0
- package/lib/src/loaders.js +1 -0
- package/lib/src/parsers/texte.js +2 -2
- package/lib/src/scripts/convert_data.js +11 -10
- package/lib/src/scripts/retrieve_documents.d.ts +1 -1
- package/lib/src/scripts/retrieve_documents.js +25 -10
- package/lib/src/scripts/retrieve_open_data.js +1 -1
- package/lib/src/types/sessions.d.ts +5 -4
- package/lib/src/types/sessions.js +10 -75
- package/lib/src/types/texte.d.ts +2 -0
- package/lib/strings.d.ts +1 -0
- package/lib/strings.js +18 -0
- package/lib/strings.mjs +18 -0
- package/lib/strings.ts +26 -0
- package/lib/types/agenda.d.ts +44 -0
- package/lib/types/agenda.js +1 -0
- package/lib/types/ameli.d.ts +5 -0
- package/lib/types/ameli.js +1 -0
- package/lib/types/ameli.mjs +13 -0
- package/lib/types/ameli.ts +21 -0
- package/lib/types/compte_rendu.d.ts +83 -0
- package/lib/types/compte_rendu.js +1 -0
- package/lib/types/debats.d.ts +2 -0
- package/lib/types/debats.js +1 -0
- package/lib/types/debats.mjs +2 -0
- package/lib/types/debats.ts +6 -0
- package/lib/types/dosleg.d.ts +70 -0
- package/lib/types/dosleg.js +1 -0
- package/lib/types/dosleg.mjs +151 -0
- package/lib/types/dosleg.ts +284 -0
- package/lib/types/questions.d.ts +2 -0
- package/lib/types/questions.js +1 -0
- package/lib/types/questions.mjs +1 -0
- package/lib/types/questions.ts +3 -0
- package/lib/types/sens.d.ts +10 -0
- package/lib/types/sens.js +1 -0
- package/lib/types/sens.mjs +1 -0
- package/lib/types/sens.ts +12 -0
- package/lib/types/sessions.d.ts +5 -0
- package/lib/types/sessions.js +84 -0
- package/lib/types/sessions.mjs +43 -0
- package/lib/types/sessions.ts +42 -0
- package/lib/types/texte.d.ts +74 -0
- package/lib/types/texte.js +16 -0
- package/lib/types/texte.mjs +16 -0
- package/lib/types/texte.ts +76 -0
- package/lib/typings/windows-1252.d.js +2 -0
- package/lib/typings/windows-1252.d.mjs +2 -0
- package/lib/typings/windows-1252.d.ts +11 -0
- package/lib/utils/cr_spliting.d.ts +28 -0
- package/lib/utils/cr_spliting.js +265 -0
- package/lib/utils/date.d.ts +10 -0
- package/lib/utils/date.js +100 -0
- package/lib/utils/nvs-timecode.d.ts +7 -0
- package/lib/utils/nvs-timecode.js +79 -0
- package/lib/utils/reunion_grouping.d.ts +9 -0
- package/lib/utils/reunion_grouping.js +361 -0
- package/lib/utils/reunion_odj_building.d.ts +5 -0
- package/lib/utils/reunion_odj_building.js +154 -0
- package/lib/utils/reunion_parsing.d.ts +23 -0
- package/lib/utils/reunion_parsing.js +209 -0
- package/lib/utils/scoring.d.ts +14 -0
- package/lib/utils/scoring.js +147 -0
- package/lib/utils/string_cleaning.d.ts +7 -0
- package/lib/utils/string_cleaning.js +57 -0
- package/lib/validators/config.d.ts +9 -0
- package/lib/validators/config.js +10 -0
- package/lib/validators/config.mjs +54 -0
- package/lib/validators/config.ts +79 -0
- package/lib/validators/senat.d.ts +0 -0
- package/lib/validators/senat.js +28 -0
- package/lib/validators/senat.mjs +24 -0
- package/lib/validators/senat.ts +26 -0
- package/package.json +5 -5
package/lib/loaders.js
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import fsex from "fs-extra";
|
|
2
|
+
import fs from "fs";
|
|
3
|
+
import path from "path";
|
|
4
|
+
import * as git from "./git";
|
|
5
|
+
import { datasets } from "./datasets";
|
|
6
|
+
import { UNDEFINED_SESSION } from "./types/sessions";
|
|
7
|
+
export { EnabledDatasets } from "./datasets";
|
|
8
|
+
export const AGENDA_FOLDER = "agenda";
|
|
9
|
+
export const COMPTES_RENDUS_FOLDER = "seances";
|
|
10
|
+
export const COMMISSION_FOLDER = "commissions";
|
|
11
|
+
export const DOSLEG_DOSSIERS_FOLDER = "dossiers";
|
|
12
|
+
export const SCRUTINS_FOLDER = "scrutins";
|
|
13
|
+
export const SENS_CIRCONSCRIPTIONS_FOLDER = "circonscriptions";
|
|
14
|
+
export const SENS_ORGANISMES_FOLDER = "organismes";
|
|
15
|
+
export const SENS_SENATEURS_FOLDER = "senateurs";
|
|
16
|
+
export const TEXTE_FOLDER = "leg";
|
|
17
|
+
export const RAPPORT_FOLDER = "rap";
|
|
18
|
+
export const DATA_ORIGINAL_FOLDER = "original";
|
|
19
|
+
export const DATA_TRANSFORMED_FOLDER = "transformed";
|
|
20
|
+
export const DOCUMENT_METADATA_FILE = "metadata.json";
|
|
21
|
+
export function* iterFilePaths(dirPath) {
|
|
22
|
+
if (dirPath && fs.existsSync(dirPath)) {
|
|
23
|
+
const files = fs.readdirSync(dirPath, {
|
|
24
|
+
withFileTypes: true,
|
|
25
|
+
recursive: true,
|
|
26
|
+
});
|
|
27
|
+
for (const file of files) {
|
|
28
|
+
if (file.isFile()) {
|
|
29
|
+
yield path.join(file.parentPath, file.name);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
function* iterLoadSenatItems(dataDir, dataName, legislatureOrSession, subDir, { log = false, sinceCommit } = {}) {
|
|
35
|
+
let itemsDir = path.join(dataDir, dataName);
|
|
36
|
+
if (subDir) {
|
|
37
|
+
itemsDir = path.join(itemsDir, subDir);
|
|
38
|
+
}
|
|
39
|
+
if (legislatureOrSession) {
|
|
40
|
+
itemsDir = path.join(itemsDir, String(legislatureOrSession));
|
|
41
|
+
}
|
|
42
|
+
// Get changed files if sinceCommit is specified (excluding deleted files)
|
|
43
|
+
const changedFiles = sinceCommit
|
|
44
|
+
? git.getChangedFilesSinceCommit(itemsDir, sinceCommit, {
|
|
45
|
+
diffFilter: "AMR", // Added, Modified, Renamed
|
|
46
|
+
})
|
|
47
|
+
: null;
|
|
48
|
+
if (log && sinceCommit) {
|
|
49
|
+
console.log(`Filtering files changed since commit ${sinceCommit} in ${itemsDir}`);
|
|
50
|
+
console.log(`Found ${changedFiles?.size || 0} changed files (AMR)`);
|
|
51
|
+
}
|
|
52
|
+
for (const filePath of iterFilePaths(itemsDir)) {
|
|
53
|
+
if (!filePath.endsWith(".json")) {
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
const relativePath = path.relative(path.join(dataDir, dataName), filePath);
|
|
57
|
+
const gitStatus = changedFiles?.get(relativePath);
|
|
58
|
+
// Filter by changed files if sinceCommit is specified
|
|
59
|
+
if (changedFiles && !gitStatus) {
|
|
60
|
+
// Skip files not in the change set
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
if (log) {
|
|
64
|
+
console.log(`Loading file: ${filePath}…${gitStatus ? ` (${gitStatus})` : ""}`);
|
|
65
|
+
}
|
|
66
|
+
let item;
|
|
67
|
+
try {
|
|
68
|
+
const itemJson = fs.readFileSync(filePath, { encoding: "utf8" });
|
|
69
|
+
item = JSON.parse(itemJson);
|
|
70
|
+
}
|
|
71
|
+
catch (err) {
|
|
72
|
+
console.warn(`[iterLoadSenatItems] skipped invalid JSON: ${filePath} (${err.message})`);
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
const filePathFromDataset = filePath.substring(filePath.indexOf(dataName) + dataName.length);
|
|
76
|
+
yield {
|
|
77
|
+
item,
|
|
78
|
+
filePathFromDataset,
|
|
79
|
+
legislature: legislatureOrSession,
|
|
80
|
+
...(gitStatus && { gitStatus }), // Include gitStatus
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
// Yield deleted files at the end if sinceCommit is specified
|
|
84
|
+
if (sinceCommit) {
|
|
85
|
+
const deletedFiles = git.getChangedFilesSinceCommit(itemsDir, sinceCommit, {
|
|
86
|
+
diffFilter: "D", // Deleted
|
|
87
|
+
});
|
|
88
|
+
if (log) {
|
|
89
|
+
console.log(`Found ${deletedFiles.size || 0} deleted files (D)`);
|
|
90
|
+
}
|
|
91
|
+
for (const [relativePath, status] of deletedFiles.entries()) {
|
|
92
|
+
const deletedFilePath = path.join(itemsDir, relativePath);
|
|
93
|
+
if (log) {
|
|
94
|
+
console.log(`Deleted file: ${deletedFilePath}`);
|
|
95
|
+
}
|
|
96
|
+
// Extract UID from filename (remove extension) for the placeholder item
|
|
97
|
+
const fileExtension = path.extname(relativePath) || ".json"; // Assuming files use an extension like .json
|
|
98
|
+
const filename = path.basename(relativePath, fileExtension);
|
|
99
|
+
const fakeItem = { uid: filename }; // Placeholder item using uid constraint
|
|
100
|
+
const filePathFromDataset = deletedFilePath.substring(deletedFilePath.indexOf(dataName) + dataName.length);
|
|
101
|
+
yield {
|
|
102
|
+
item: fakeItem,
|
|
103
|
+
filePathFromDataset,
|
|
104
|
+
legislature: legislatureOrSession,
|
|
105
|
+
gitStatus: status,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
export function* iterLoadSenatAmendements(dataDir, session, options = {}) {
|
|
111
|
+
for (const amendementItem of iterLoadSenatItems(dataDir, datasets.ameli.database, session, undefined, options)) {
|
|
112
|
+
yield amendementItem;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
export function* iterLoadSenatDebats(dataDir, session, options = {}) {
|
|
116
|
+
for (const debatItem of iterLoadSenatItems(dataDir, datasets.debats.database, session, undefined, options)) {
|
|
117
|
+
yield debatItem;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
function* iterLoadSenatComptesRendusGeneric(dataDir, session, subFolder) {
|
|
121
|
+
const basePath = path.join(dataDir, subFolder, DATA_TRANSFORMED_FOLDER, String(session));
|
|
122
|
+
if (!fs.existsSync(basePath)) {
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
const files = (fs.readdirSync(basePath) || []).filter((f) => f.endsWith(".json")).sort();
|
|
126
|
+
for (const fileName of files) {
|
|
127
|
+
const filePath = path.join(basePath, fileName);
|
|
128
|
+
try {
|
|
129
|
+
const fileContent = fs.readFileSync(filePath, "utf-8");
|
|
130
|
+
const compteRendu = JSON.parse(fileContent);
|
|
131
|
+
if (!compteRendu?.uid) {
|
|
132
|
+
console.warn(`[SN] CR without uid → ${fileName}`);
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
yield { compteRendu, session };
|
|
136
|
+
}
|
|
137
|
+
catch (err) {
|
|
138
|
+
console.warn(`[SN] error reading CR → ${fileName}`, err);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
export function* iterLoadSenatComptesRendusSeances(dataDir, session) {
|
|
143
|
+
yield* iterLoadSenatComptesRendusGeneric(dataDir, session, COMPTES_RENDUS_FOLDER);
|
|
144
|
+
}
|
|
145
|
+
export function* iterLoadSenatComptesRendusCommissions(dataDir, session) {
|
|
146
|
+
yield* iterLoadSenatComptesRendusGeneric(dataDir, session, COMMISSION_FOLDER);
|
|
147
|
+
}
|
|
148
|
+
export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}) {
|
|
149
|
+
for (const dossierLegislatifItem of iterLoadSenatItems(dataDir, datasets.dosleg.database, session, DOSLEG_DOSSIERS_FOLDER, options)) {
|
|
150
|
+
yield dossierLegislatifItem;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
export function* iterLoadSenatRapportUrls(dataDir, session) {
|
|
154
|
+
let itemsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
155
|
+
if (session) {
|
|
156
|
+
itemsDir = path.join(itemsDir, session.toString());
|
|
157
|
+
}
|
|
158
|
+
for (const filePath of iterFilePaths(itemsDir)) {
|
|
159
|
+
const parsedFilePath = path.parse(filePath);
|
|
160
|
+
if (parsedFilePath.base === DOCUMENT_METADATA_FILE) {
|
|
161
|
+
const itemJson = fs.readFileSync(filePath, { encoding: "utf8" });
|
|
162
|
+
const item = JSON.parse(itemJson);
|
|
163
|
+
yield {
|
|
164
|
+
item,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
export function* iterLoadSenatTexteUrls(dataDir, session) {
|
|
170
|
+
let itemsDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
171
|
+
if (session) {
|
|
172
|
+
itemsDir = path.join(itemsDir, session.toString());
|
|
173
|
+
}
|
|
174
|
+
for (const filePath of iterFilePaths(itemsDir)) {
|
|
175
|
+
const parsedFilePath = path.parse(filePath);
|
|
176
|
+
if (parsedFilePath.base === DOCUMENT_METADATA_FILE) {
|
|
177
|
+
const itemJson = fs.readFileSync(filePath, { encoding: "utf8" });
|
|
178
|
+
const item = JSON.parse(itemJson);
|
|
179
|
+
yield {
|
|
180
|
+
item,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
export function* iterLoadSenatDocuments(dataDir, session, documentType, options = {}) {
|
|
186
|
+
for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(dataDir, session, options)) {
|
|
187
|
+
for (const lecture of dossierLegislatif["lectures"]) {
|
|
188
|
+
const lecturesSenat = lecture.lectures_assemblee.filter((lectureAssemblee) => lectureAssemblee.assemblee === "Sénat");
|
|
189
|
+
for (const lectureSenat of lecturesSenat) {
|
|
190
|
+
for (const document of lectureSenat[documentType]) {
|
|
191
|
+
const enrichedDocument = {
|
|
192
|
+
signet_dossier: dossierLegislatif["signet"],
|
|
193
|
+
...document,
|
|
194
|
+
};
|
|
195
|
+
const documentItem = {
|
|
196
|
+
item: enrichedDocument
|
|
197
|
+
};
|
|
198
|
+
if (document.url) {
|
|
199
|
+
const documentName = path.parse(document.url).name;
|
|
200
|
+
documentItem.filePathFromDataset = path.join(`${document.session ?? UNDEFINED_SESSION}`, documentName, `${documentName}.pdf`);
|
|
201
|
+
}
|
|
202
|
+
yield documentItem;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
export function* iterLoadSenatRapports(dataDir, session, options = {}) {
|
|
209
|
+
for (const iterItem of iterLoadSenatItems(dataDir, RAPPORT_FOLDER, session, "original", options)) {
|
|
210
|
+
if (iterItem.item?.["id"]) {
|
|
211
|
+
yield iterItem;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
export function* iterLoadSenatTextes(dataDir, session, options = {}) {
|
|
216
|
+
for (const iterItem of iterLoadSenatDocuments(dataDir, session, "textes", options)) {
|
|
217
|
+
yield iterItem;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
export function loadSenatTexteContent(dataDir, textePathFromDataset) {
|
|
221
|
+
const parsedTextePath = path.parse(textePathFromDataset);
|
|
222
|
+
const jsonTexteName = `${parsedTextePath.name}.json`;
|
|
223
|
+
const fullTextePath = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER, parsedTextePath.dir, jsonTexteName);
|
|
224
|
+
if (!fs.existsSync(fullTextePath)) {
|
|
225
|
+
return { item: null };
|
|
226
|
+
}
|
|
227
|
+
const texteJson = fs.readFileSync(fullTextePath, { encoding: "utf8" });
|
|
228
|
+
return { item: JSON.parse(texteJson) };
|
|
229
|
+
}
|
|
230
|
+
export function loadSenatCompteRenduContent(dataDir, session, debatId) {
|
|
231
|
+
const fullPath = path.join(dataDir, COMPTES_RENDUS_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${debatId}.json`);
|
|
232
|
+
if (!fs.existsSync(fullPath)) {
|
|
233
|
+
return { item: null };
|
|
234
|
+
}
|
|
235
|
+
const json = fs.readFileSync(fullPath, { encoding: "utf8" });
|
|
236
|
+
return { item: JSON.parse(json) };
|
|
237
|
+
}
|
|
238
|
+
export function* iterLoadSenatAgendas(dataDir, session) {
|
|
239
|
+
const baseDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session ?? ""));
|
|
240
|
+
if (!fs.existsSync(baseDir))
|
|
241
|
+
return;
|
|
242
|
+
const files = (fs.readdirSync(baseDir) || []).filter((f) => f.startsWith("RUSN") && f.endsWith(".json")).sort();
|
|
243
|
+
for (const fileName of files) {
|
|
244
|
+
const filePath = path.join(baseDir, fileName);
|
|
245
|
+
let raw;
|
|
246
|
+
try {
|
|
247
|
+
raw = fsex.readJSONSync(filePath);
|
|
248
|
+
}
|
|
249
|
+
catch {
|
|
250
|
+
continue; // JSON invalide
|
|
251
|
+
}
|
|
252
|
+
if (!raw || typeof raw !== "object")
|
|
253
|
+
continue;
|
|
254
|
+
const gr = raw;
|
|
255
|
+
if (!gr.uid || !gr.date || !gr.titre)
|
|
256
|
+
continue;
|
|
257
|
+
if (!Array.isArray(gr.events))
|
|
258
|
+
gr.events = [];
|
|
259
|
+
yield { item: gr };
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
export function* iterLoadSenatCirconscriptions(dataDir, options = {}) {
|
|
263
|
+
for (const circonscriptionItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_CIRCONSCRIPTIONS_FOLDER, options)) {
|
|
264
|
+
yield circonscriptionItem;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
export function* iterLoadSenatOrganismes(dataDir, options = {}) {
|
|
268
|
+
for (const organismeItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_ORGANISMES_FOLDER, options)) {
|
|
269
|
+
yield organismeItem;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
export function* iterLoadSenatSenateurs(dataDir, options = {}) {
|
|
273
|
+
for (const senateurItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_SENATEURS_FOLDER, options)) {
|
|
274
|
+
yield senateurItem;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
export function* iterLoadSenatQuestions(dataDir, legislature, options = {}) {
|
|
278
|
+
for (const questionItem of iterLoadSenatItems(dataDir, datasets.questions.database, legislature, undefined, options)) {
|
|
279
|
+
yield questionItem;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
export function* iterLoadSenatScrutins(dataDir, session, options = {}) {
|
|
283
|
+
for (const scrutinItem of iterLoadSenatItems(dataDir, "scrutins", session, undefined, options)) {
|
|
284
|
+
yield scrutinItem;
|
|
285
|
+
}
|
|
286
|
+
}
|
package/lib/loaders.mjs
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import fs from "fs";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import legislatures from "./legislatures.json";
|
|
4
|
+
import { datasets } from "./datasets";
|
|
5
|
+
import { UNDEFINED_SESSION } from "./scripts/datautil";
|
|
6
|
+
export { EnabledDatasets } from "./datasets";
|
|
7
|
+
export const DOSLEG_DOSSIERS_FOLDER = "dossiers";
|
|
8
|
+
export const SENS_CIRCONSCRIPTIONS_FOLDER = "circonscriptions";
|
|
9
|
+
export const SENS_ORGANISMES_FOLDER = "organismes";
|
|
10
|
+
export const SENS_SENATEURS_FOLDER = "senateurs";
|
|
11
|
+
export const TEXTE_FOLDER = "leg";
|
|
12
|
+
export const TEXTE_ORIGINAL_FOLDER = "original";
|
|
13
|
+
export const TEXTE_TRANSFORMED_FOLDER = "transformed";
|
|
14
|
+
export const DOCUMENT_METADATA_FILE = "metadata.json";
|
|
15
|
+
export const RAPPORT_FOLDER = "rap";
|
|
16
|
+
export function* iterFilePaths(dirPath) {
|
|
17
|
+
if (dirPath && fs.existsSync(dirPath)) {
|
|
18
|
+
const files = fs.readdirSync(dirPath, { withFileTypes: true, recursive: true });
|
|
19
|
+
for (const file of files) {
|
|
20
|
+
if (file.isFile()) {
|
|
21
|
+
yield path.join(file.parentPath, file.name);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
function* iterLoadSenatItems(dataDir, dataset, legislatureOrSession, subDir, { log = false } = {}) {
|
|
27
|
+
let itemsDir = path.join(dataDir, dataset.database);
|
|
28
|
+
if (subDir) {
|
|
29
|
+
itemsDir = path.join(itemsDir, subDir);
|
|
30
|
+
}
|
|
31
|
+
if (legislatureOrSession) {
|
|
32
|
+
itemsDir = path.join(itemsDir, String(legislatureOrSession));
|
|
33
|
+
}
|
|
34
|
+
for (const filePath of iterFilePaths(itemsDir)) {
|
|
35
|
+
if (log) {
|
|
36
|
+
console.log(`Loading file: ${filePath}…`);
|
|
37
|
+
}
|
|
38
|
+
const itemJson = fs.readFileSync(filePath, { encoding: "utf8" });
|
|
39
|
+
const item = JSON.parse(itemJson);
|
|
40
|
+
const filePathFromDataset = filePath.substring(filePath.indexOf(dataset.database) + dataset.database.length);
|
|
41
|
+
yield {
|
|
42
|
+
item,
|
|
43
|
+
filePathFromDataset,
|
|
44
|
+
legislature: legislatureOrSession,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
export function* iterLoadSenatAmendements(dataDir, session, options = {}) {
|
|
49
|
+
for (const amendementItem of iterLoadSenatItems(dataDir, datasets.ameli, session, undefined, options)) {
|
|
50
|
+
yield amendementItem;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}) {
|
|
54
|
+
for (const dossierLegislatifItem of iterLoadSenatItems(dataDir, datasets.dosleg, session, DOSLEG_DOSSIERS_FOLDER, options)) {
|
|
55
|
+
yield dossierLegislatifItem;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
export function* iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session) {
|
|
59
|
+
let itemsDir = path.join(dataDir, TEXTE_FOLDER, TEXTE_ORIGINAL_FOLDER);
|
|
60
|
+
if (session) {
|
|
61
|
+
itemsDir = path.join(itemsDir, session.toString());
|
|
62
|
+
}
|
|
63
|
+
for (const filePath of iterFilePaths(itemsDir)) {
|
|
64
|
+
const parsedFilePath = path.parse(filePath);
|
|
65
|
+
if (parsedFilePath.base === DOCUMENT_METADATA_FILE) {
|
|
66
|
+
const itemJson = fs.readFileSync(filePath, { encoding: "utf8" });
|
|
67
|
+
const item = JSON.parse(itemJson);
|
|
68
|
+
yield {
|
|
69
|
+
item,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
export function* iterLoadSenatDossiersLegislatifsRapportUrls(dataDir, session) {
|
|
75
|
+
let itemsDir = path.join(dataDir, RAPPORT_FOLDER);
|
|
76
|
+
if (session) {
|
|
77
|
+
itemsDir = path.join(itemsDir, session.toString());
|
|
78
|
+
}
|
|
79
|
+
for (const filePath of iterFilePaths(itemsDir)) {
|
|
80
|
+
const parsedFilePath = path.parse(filePath);
|
|
81
|
+
if (parsedFilePath.base === DOCUMENT_METADATA_FILE) {
|
|
82
|
+
const itemJson = fs.readFileSync(filePath, { encoding: "utf8" });
|
|
83
|
+
const item = JSON.parse(itemJson);
|
|
84
|
+
yield {
|
|
85
|
+
item,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
export function* iterLoadSenatDossiersLegislatifsTextes(dataDir, session, options = {}) {
|
|
91
|
+
for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(dataDir, session, options)) {
|
|
92
|
+
for (const lecture of dossierLegislatif.lectures) {
|
|
93
|
+
const lecturesSenat = lecture.lectures_assemblee
|
|
94
|
+
.filter(lectureAssemblee => lectureAssemblee.assemblee === "Sénat");
|
|
95
|
+
for (const lectureSenat of lecturesSenat) {
|
|
96
|
+
for (const texte of lectureSenat.textes) {
|
|
97
|
+
const enrichedTexte = {
|
|
98
|
+
signet_dossier: dossierLegislatif.signet,
|
|
99
|
+
url_dossier_senat: dossierLegislatif.url,
|
|
100
|
+
url_dossier_assemblee_nationale: dossierLegislatif.url_dossier_assemblee_nationale,
|
|
101
|
+
type_lecture: lecture.type_lecture,
|
|
102
|
+
libelle_lecture: lecture.libelle,
|
|
103
|
+
libelle_organisme: lectureSenat.libelle_organisme,
|
|
104
|
+
...texte,
|
|
105
|
+
};
|
|
106
|
+
const texteItem = {
|
|
107
|
+
item: enrichedTexte,
|
|
108
|
+
};
|
|
109
|
+
if (texte.url) {
|
|
110
|
+
const texteName = path.parse(texte.url).name;
|
|
111
|
+
texteItem.filePathFromDataset
|
|
112
|
+
= path.join(`${texte.session ?? UNDEFINED_SESSION}`, texteName, `${texteName}.pdf`);
|
|
113
|
+
}
|
|
114
|
+
yield texteItem;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
export function loadSenatTexteContent(dataDir, textePathFromDataset) {
|
|
121
|
+
const parsedTextePath = path.parse(textePathFromDataset);
|
|
122
|
+
const jsonTexteName = `${parsedTextePath.name}.json`;
|
|
123
|
+
const fullTextePath = path.join(dataDir, TEXTE_FOLDER, TEXTE_TRANSFORMED_FOLDER, parsedTextePath.dir, jsonTexteName);
|
|
124
|
+
if (!fs.existsSync(fullTextePath)) {
|
|
125
|
+
return { item: null };
|
|
126
|
+
}
|
|
127
|
+
const texteJson = fs.readFileSync(fullTextePath, { encoding: "utf8" });
|
|
128
|
+
return { item: JSON.parse(texteJson) };
|
|
129
|
+
}
|
|
130
|
+
export function* iterLoadSenatOrganismes(dataDir, options = {}) {
|
|
131
|
+
for (const organismeItem of iterLoadSenatItems(dataDir, datasets.sens, undefined, SENS_ORGANISMES_FOLDER, options)) {
|
|
132
|
+
yield organismeItem;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
export function* iterLoadSenatSenateurs(dataDir, legislature, options = {}) {
|
|
136
|
+
const dateDebutLegislatureStr = legislatures
|
|
137
|
+
.find(legislatureInfo => legislatureInfo.numero === legislature)
|
|
138
|
+
?.date_debut;
|
|
139
|
+
const dateDebutLegislature = new Date(dateDebutLegislatureStr);
|
|
140
|
+
for (const senateurItem of iterLoadSenatItems(dataDir, datasets.sens, undefined, SENS_SENATEURS_FOLDER, options)) {
|
|
141
|
+
const dateFinMandatSenateur = senateurItem.item.mandats_senateur[0]?.date_fin ?
|
|
142
|
+
new Date(senateurItem.item.mandats_senateur[0]?.date_fin) : null;
|
|
143
|
+
if (dateFinMandatSenateur && dateFinMandatSenateur < dateDebutLegislature) {
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
146
|
+
yield senateurItem;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
export function* iterLoadSenatCirconscriptions(dataDir, options = {}) {
|
|
150
|
+
for (const circonscriptionItem of iterLoadSenatItems(dataDir, datasets.sens, undefined, SENS_CIRCONSCRIPTIONS_FOLDER, options)) {
|
|
151
|
+
yield circonscriptionItem;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
export function* iterLoadSenatQuestions(dataDir, legislature, options = {}) {
|
|
155
|
+
for (const questionItem of iterLoadSenatItems(dataDir, datasets.questions, legislature, undefined, options)) {
|
|
156
|
+
yield questionItem;
|
|
157
|
+
}
|
|
158
|
+
}
|