@tricoteuses/senat 2.22.13 → 2.22.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/src/loaders.d.ts +2 -8
- package/lib/src/loaders.js +7 -25
- package/lib/tests/test_iter_load.test.js +17 -0
- package/package.json +2 -2
- package/lib/config.d.ts +0 -21
- package/lib/config.js +0 -27
- package/lib/databases.d.ts +0 -2
- package/lib/databases.js +0 -26
- package/lib/datasets.d.ts +0 -34
- package/lib/datasets.js +0 -233
- package/lib/git.d.ts +0 -26
- package/lib/git.js +0 -167
- package/lib/index.d.ts +0 -13
- package/lib/index.js +0 -1
- package/lib/loaders.d.ts +0 -58
- package/lib/loaders.js +0 -286
- package/lib/model/agenda.d.ts +0 -6
- package/lib/model/agenda.js +0 -148
- package/lib/model/ameli.d.ts +0 -51
- package/lib/model/ameli.js +0 -147
- package/lib/model/commission.d.ts +0 -18
- package/lib/model/commission.js +0 -269
- package/lib/model/debats.d.ts +0 -67
- package/lib/model/debats.js +0 -95
- package/lib/model/documents.d.ts +0 -12
- package/lib/model/documents.js +0 -138
- package/lib/model/dosleg.d.ts +0 -7
- package/lib/model/dosleg.js +0 -326
- package/lib/model/index.d.ts +0 -7
- package/lib/model/index.js +0 -7
- package/lib/model/questions.d.ts +0 -45
- package/lib/model/questions.js +0 -89
- package/lib/model/scrutins.d.ts +0 -13
- package/lib/model/scrutins.js +0 -114
- package/lib/model/seance.d.ts +0 -3
- package/lib/model/seance.js +0 -267
- package/lib/model/sens.d.ts +0 -146
- package/lib/model/sens.js +0 -454
- package/lib/model/texte.d.ts +0 -7
- package/lib/model/texte.js +0 -228
- package/lib/model/util.d.ts +0 -9
- package/lib/model/util.js +0 -38
- package/lib/parsers/texte.d.ts +0 -7
- package/lib/parsers/texte.js +0 -228
- package/lib/raw_types/ameli.d.ts +0 -914
- package/lib/raw_types/ameli.js +0 -5
- package/lib/raw_types/debats.d.ts +0 -207
- package/lib/raw_types/debats.js +0 -5
- package/lib/raw_types/dosleg.d.ts +0 -1619
- package/lib/raw_types/dosleg.js +0 -5
- package/lib/raw_types/questions.d.ts +0 -423
- package/lib/raw_types/questions.js +0 -5
- package/lib/raw_types/senat.d.ts +0 -11372
- package/lib/raw_types/senat.js +0 -5
- package/lib/raw_types/sens.d.ts +0 -8248
- package/lib/raw_types/sens.js +0 -5
- package/lib/raw_types_schemats/ameli.d.ts +0 -539
- package/lib/raw_types_schemats/ameli.js +0 -2
- package/lib/raw_types_schemats/debats.d.ts +0 -127
- package/lib/raw_types_schemats/debats.js +0 -2
- package/lib/raw_types_schemats/dosleg.d.ts +0 -977
- package/lib/raw_types_schemats/dosleg.js +0 -2
- package/lib/raw_types_schemats/questions.d.ts +0 -237
- package/lib/raw_types_schemats/questions.js +0 -2
- package/lib/raw_types_schemats/sens.d.ts +0 -6915
- package/lib/raw_types_schemats/sens.js +0 -2
- package/lib/scripts/convert_data.js +0 -354
- package/lib/scripts/data-download.d.ts +0 -1
- package/lib/scripts/data-download.js +0 -12
- package/lib/scripts/datautil.d.ts +0 -8
- package/lib/scripts/datautil.js +0 -34
- package/lib/scripts/parse_textes.d.ts +0 -1
- package/lib/scripts/parse_textes.js +0 -44
- package/lib/scripts/retrieve_agenda.d.ts +0 -1
- package/lib/scripts/retrieve_agenda.js +0 -132
- package/lib/scripts/retrieve_cr_commission.d.ts +0 -1
- package/lib/scripts/retrieve_cr_commission.js +0 -364
- package/lib/scripts/retrieve_cr_seance.d.ts +0 -6
- package/lib/scripts/retrieve_cr_seance.js +0 -347
- package/lib/scripts/retrieve_documents.d.ts +0 -3
- package/lib/scripts/retrieve_documents.js +0 -219
- package/lib/scripts/retrieve_open_data.d.ts +0 -1
- package/lib/scripts/retrieve_open_data.js +0 -316
- package/lib/scripts/retrieve_senateurs_photos.d.ts +0 -1
- package/lib/scripts/retrieve_senateurs_photos.js +0 -147
- package/lib/scripts/retrieve_videos.d.ts +0 -1
- package/lib/scripts/retrieve_videos.js +0 -461
- package/lib/scripts/shared/cli_helpers.d.ts +0 -95
- package/lib/scripts/shared/cli_helpers.js +0 -91
- package/lib/scripts/shared/util.d.ts +0 -4
- package/lib/scripts/shared/util.js +0 -35
- package/lib/scripts/test_iter_load.d.ts +0 -1
- package/lib/scripts/test_iter_load.js +0 -12
- package/lib/src/utils/nvs-timecode.d.ts +0 -17
- package/lib/src/utils/nvs-timecode.js +0 -79
- package/lib/src/utils/weights_scoring_config.d.ts +0 -2
- package/lib/src/utils/weights_scoring_config.js +0 -15
- package/lib/strings.d.ts +0 -1
- package/lib/strings.js +0 -18
- package/lib/types/agenda.d.ts +0 -44
- package/lib/types/agenda.js +0 -1
- package/lib/types/ameli.d.ts +0 -5
- package/lib/types/ameli.js +0 -1
- package/lib/types/compte_rendu.d.ts +0 -83
- package/lib/types/compte_rendu.js +0 -1
- package/lib/types/debats.d.ts +0 -2
- package/lib/types/debats.js +0 -1
- package/lib/types/dosleg.d.ts +0 -70
- package/lib/types/dosleg.js +0 -1
- package/lib/types/questions.d.ts +0 -2
- package/lib/types/questions.js +0 -1
- package/lib/types/sens.d.ts +0 -10
- package/lib/types/sens.js +0 -1
- package/lib/types/sessions.d.ts +0 -5
- package/lib/types/sessions.js +0 -84
- package/lib/types/texte.d.ts +0 -74
- package/lib/types/texte.js +0 -16
- package/lib/utils/cr_spliting.d.ts +0 -28
- package/lib/utils/cr_spliting.js +0 -265
- package/lib/utils/date.d.ts +0 -10
- package/lib/utils/date.js +0 -100
- package/lib/utils/nvs-timecode.d.ts +0 -7
- package/lib/utils/nvs-timecode.js +0 -79
- package/lib/utils/reunion_grouping.d.ts +0 -11
- package/lib/utils/reunion_grouping.js +0 -337
- package/lib/utils/reunion_odj_building.d.ts +0 -5
- package/lib/utils/reunion_odj_building.js +0 -154
- package/lib/utils/reunion_parsing.d.ts +0 -23
- package/lib/utils/reunion_parsing.js +0 -209
- package/lib/utils/scoring.d.ts +0 -14
- package/lib/utils/scoring.js +0 -147
- package/lib/utils/string_cleaning.d.ts +0 -7
- package/lib/utils/string_cleaning.js +0 -57
- package/lib/validators/config.d.ts +0 -9
- package/lib/validators/config.js +0 -10
- /package/lib/{scripts/convert_data.d.ts → tests/test_iter_load.test.d.ts} +0 -0
package/lib/loaders.d.ts
DELETED
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
import { AmendementResult } from "./model/ameli";
|
|
2
|
-
import { DebatResult } from "./model/debats";
|
|
3
|
-
import { DossierLegislatifResult } from "./model/dosleg";
|
|
4
|
-
import { QuestionResult } from "./model/questions";
|
|
5
|
-
import { ScrutinResult } from "./model/scrutins";
|
|
6
|
-
import { CirconscriptionResult, OrganismeResult, SenateurResult } from "./model/sens";
|
|
7
|
-
import { Reunion } from "./types/agenda";
|
|
8
|
-
import { FlatTexte, DocumentMetadata } from "./types/texte";
|
|
9
|
-
import { CompteRendu } from "./types/compte_rendu";
|
|
10
|
-
import { DocumentResult } from "./model/documents";
|
|
11
|
-
export { EnabledDatasets } from "./datasets";
|
|
12
|
-
export type { DocumentResult } from "./model/documents";
|
|
13
|
-
export declare const AGENDA_FOLDER = "agenda";
|
|
14
|
-
export declare const COMPTES_RENDUS_FOLDER = "seances";
|
|
15
|
-
export declare const COMMISSION_FOLDER = "commissions";
|
|
16
|
-
export declare const DOSLEG_DOSSIERS_FOLDER = "dossiers";
|
|
17
|
-
export declare const SCRUTINS_FOLDER = "scrutins";
|
|
18
|
-
export declare const SENS_CIRCONSCRIPTIONS_FOLDER = "circonscriptions";
|
|
19
|
-
export declare const SENS_ORGANISMES_FOLDER = "organismes";
|
|
20
|
-
export declare const SENS_SENATEURS_FOLDER = "senateurs";
|
|
21
|
-
export declare const TEXTE_FOLDER = "leg";
|
|
22
|
-
export declare const RAPPORT_FOLDER = "rap";
|
|
23
|
-
export declare const DATA_ORIGINAL_FOLDER = "original";
|
|
24
|
-
export declare const DATA_TRANSFORMED_FOLDER = "transformed";
|
|
25
|
-
export declare const DOCUMENT_METADATA_FILE = "metadata.json";
|
|
26
|
-
export type IterItem<T> = {
|
|
27
|
-
item: T;
|
|
28
|
-
filePathFromDataset?: string;
|
|
29
|
-
legislature?: number;
|
|
30
|
-
gitStatus?: "A" | "M" | "D" | "R" | "C" | "T" | "U";
|
|
31
|
-
};
|
|
32
|
-
export declare function iterFilePaths(dirPath: string): Generator<string>;
|
|
33
|
-
export declare function iterLoadSenatAmendements(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AmendementResult>>;
|
|
34
|
-
export declare function iterLoadSenatDebats(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DebatResult>>;
|
|
35
|
-
export declare function iterLoadSenatComptesRendusSeances(dataDir: string, session: number): Generator<{
|
|
36
|
-
compteRendu: CompteRendu;
|
|
37
|
-
session: number;
|
|
38
|
-
}>;
|
|
39
|
-
export declare function iterLoadSenatComptesRendusCommissions(dataDir: string, session: number): Generator<{
|
|
40
|
-
compteRendu: CompteRendu;
|
|
41
|
-
session: number;
|
|
42
|
-
}>;
|
|
43
|
-
export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
|
|
44
|
-
export declare function iterLoadSenatRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
|
|
45
|
-
export declare function iterLoadSenatTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
|
|
46
|
-
export declare function iterLoadSenatDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<DocumentResult>>;
|
|
47
|
-
export declare function iterLoadSenatRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
|
|
48
|
-
export declare function iterLoadSenatTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
|
|
49
|
-
export declare function loadSenatTexteContent(dataDir: string, textePathFromDataset: string): IterItem<FlatTexte | null>;
|
|
50
|
-
export declare function loadSenatCompteRenduContent(dataDir: string, session: number, debatId: string | number): {
|
|
51
|
-
item: CompteRendu | null;
|
|
52
|
-
};
|
|
53
|
-
export declare function iterLoadSenatAgendas(dataDir: string, session: number | undefined): Generator<IterItem<Reunion>>;
|
|
54
|
-
export declare function iterLoadSenatCirconscriptions(dataDir: string, options?: {}): Generator<IterItem<CirconscriptionResult>>;
|
|
55
|
-
export declare function iterLoadSenatOrganismes(dataDir: string, options?: {}): Generator<IterItem<OrganismeResult>>;
|
|
56
|
-
export declare function iterLoadSenatSenateurs(dataDir: string, options?: {}): Generator<IterItem<SenateurResult>>;
|
|
57
|
-
export declare function iterLoadSenatQuestions(dataDir: string, legislature: number, options?: {}): Generator<IterItem<QuestionResult>>;
|
|
58
|
-
export declare function iterLoadSenatScrutins(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<ScrutinResult>>;
|
package/lib/loaders.js
DELETED
|
@@ -1,286 +0,0 @@
|
|
|
1
|
-
import fsex from "fs-extra";
|
|
2
|
-
import fs from "fs";
|
|
3
|
-
import path from "path";
|
|
4
|
-
import * as git from "./git";
|
|
5
|
-
import { datasets } from "./datasets";
|
|
6
|
-
import { UNDEFINED_SESSION } from "./types/sessions";
|
|
7
|
-
export { EnabledDatasets } from "./datasets";
|
|
8
|
-
export const AGENDA_FOLDER = "agenda";
|
|
9
|
-
export const COMPTES_RENDUS_FOLDER = "seances";
|
|
10
|
-
export const COMMISSION_FOLDER = "commissions";
|
|
11
|
-
export const DOSLEG_DOSSIERS_FOLDER = "dossiers";
|
|
12
|
-
export const SCRUTINS_FOLDER = "scrutins";
|
|
13
|
-
export const SENS_CIRCONSCRIPTIONS_FOLDER = "circonscriptions";
|
|
14
|
-
export const SENS_ORGANISMES_FOLDER = "organismes";
|
|
15
|
-
export const SENS_SENATEURS_FOLDER = "senateurs";
|
|
16
|
-
export const TEXTE_FOLDER = "leg";
|
|
17
|
-
export const RAPPORT_FOLDER = "rap";
|
|
18
|
-
export const DATA_ORIGINAL_FOLDER = "original";
|
|
19
|
-
export const DATA_TRANSFORMED_FOLDER = "transformed";
|
|
20
|
-
export const DOCUMENT_METADATA_FILE = "metadata.json";
|
|
21
|
-
export function* iterFilePaths(dirPath) {
|
|
22
|
-
if (dirPath && fs.existsSync(dirPath)) {
|
|
23
|
-
const files = fs.readdirSync(dirPath, {
|
|
24
|
-
withFileTypes: true,
|
|
25
|
-
recursive: true,
|
|
26
|
-
});
|
|
27
|
-
for (const file of files) {
|
|
28
|
-
if (file.isFile()) {
|
|
29
|
-
yield path.join(file.parentPath, file.name);
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
function* iterLoadSenatItems(dataDir, dataName, legislatureOrSession, subDir, { log = false, sinceCommit } = {}) {
|
|
35
|
-
let itemsDir = path.join(dataDir, dataName);
|
|
36
|
-
if (subDir) {
|
|
37
|
-
itemsDir = path.join(itemsDir, subDir);
|
|
38
|
-
}
|
|
39
|
-
if (legislatureOrSession) {
|
|
40
|
-
itemsDir = path.join(itemsDir, String(legislatureOrSession));
|
|
41
|
-
}
|
|
42
|
-
// Get changed files if sinceCommit is specified (excluding deleted files)
|
|
43
|
-
const changedFiles = sinceCommit
|
|
44
|
-
? git.getChangedFilesSinceCommit(itemsDir, sinceCommit, {
|
|
45
|
-
diffFilter: "AMR", // Added, Modified, Renamed
|
|
46
|
-
})
|
|
47
|
-
: null;
|
|
48
|
-
if (log && sinceCommit) {
|
|
49
|
-
console.log(`Filtering files changed since commit ${sinceCommit} in ${itemsDir}`);
|
|
50
|
-
console.log(`Found ${changedFiles?.size || 0} changed files (AMR)`);
|
|
51
|
-
}
|
|
52
|
-
for (const filePath of iterFilePaths(itemsDir)) {
|
|
53
|
-
if (!filePath.endsWith(".json")) {
|
|
54
|
-
continue;
|
|
55
|
-
}
|
|
56
|
-
const relativePath = path.relative(path.join(dataDir, dataName), filePath);
|
|
57
|
-
const gitStatus = changedFiles?.get(relativePath);
|
|
58
|
-
// Filter by changed files if sinceCommit is specified
|
|
59
|
-
if (changedFiles && !gitStatus) {
|
|
60
|
-
// Skip files not in the change set
|
|
61
|
-
continue;
|
|
62
|
-
}
|
|
63
|
-
if (log) {
|
|
64
|
-
console.log(`Loading file: ${filePath}…${gitStatus ? ` (${gitStatus})` : ""}`);
|
|
65
|
-
}
|
|
66
|
-
let item;
|
|
67
|
-
try {
|
|
68
|
-
const itemJson = fs.readFileSync(filePath, { encoding: "utf8" });
|
|
69
|
-
item = JSON.parse(itemJson);
|
|
70
|
-
}
|
|
71
|
-
catch (err) {
|
|
72
|
-
console.warn(`[iterLoadSenatItems] skipped invalid JSON: ${filePath} (${err.message})`);
|
|
73
|
-
continue;
|
|
74
|
-
}
|
|
75
|
-
const filePathFromDataset = filePath.substring(filePath.indexOf(dataName) + dataName.length);
|
|
76
|
-
yield {
|
|
77
|
-
item,
|
|
78
|
-
filePathFromDataset,
|
|
79
|
-
legislature: legislatureOrSession,
|
|
80
|
-
...(gitStatus && { gitStatus }), // Include gitStatus
|
|
81
|
-
};
|
|
82
|
-
}
|
|
83
|
-
// Yield deleted files at the end if sinceCommit is specified
|
|
84
|
-
if (sinceCommit) {
|
|
85
|
-
const deletedFiles = git.getChangedFilesSinceCommit(itemsDir, sinceCommit, {
|
|
86
|
-
diffFilter: "D", // Deleted
|
|
87
|
-
});
|
|
88
|
-
if (log) {
|
|
89
|
-
console.log(`Found ${deletedFiles.size || 0} deleted files (D)`);
|
|
90
|
-
}
|
|
91
|
-
for (const [relativePath, status] of deletedFiles.entries()) {
|
|
92
|
-
const deletedFilePath = path.join(itemsDir, relativePath);
|
|
93
|
-
if (log) {
|
|
94
|
-
console.log(`Deleted file: ${deletedFilePath}`);
|
|
95
|
-
}
|
|
96
|
-
// Extract UID from filename (remove extension) for the placeholder item
|
|
97
|
-
const fileExtension = path.extname(relativePath) || ".json"; // Assuming files use an extension like .json
|
|
98
|
-
const filename = path.basename(relativePath, fileExtension);
|
|
99
|
-
const fakeItem = { uid: filename }; // Placeholder item using uid constraint
|
|
100
|
-
const filePathFromDataset = deletedFilePath.substring(deletedFilePath.indexOf(dataName) + dataName.length);
|
|
101
|
-
yield {
|
|
102
|
-
item: fakeItem,
|
|
103
|
-
filePathFromDataset,
|
|
104
|
-
legislature: legislatureOrSession,
|
|
105
|
-
gitStatus: status,
|
|
106
|
-
};
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
export function* iterLoadSenatAmendements(dataDir, session, options = {}) {
|
|
111
|
-
for (const amendementItem of iterLoadSenatItems(dataDir, datasets.ameli.database, session, undefined, options)) {
|
|
112
|
-
yield amendementItem;
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
export function* iterLoadSenatDebats(dataDir, session, options = {}) {
|
|
116
|
-
for (const debatItem of iterLoadSenatItems(dataDir, datasets.debats.database, session, undefined, options)) {
|
|
117
|
-
yield debatItem;
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
function* iterLoadSenatComptesRendusGeneric(dataDir, session, subFolder) {
|
|
121
|
-
const basePath = path.join(dataDir, subFolder, DATA_TRANSFORMED_FOLDER, String(session));
|
|
122
|
-
if (!fs.existsSync(basePath)) {
|
|
123
|
-
return;
|
|
124
|
-
}
|
|
125
|
-
const files = (fs.readdirSync(basePath) || []).filter((f) => f.endsWith(".json")).sort();
|
|
126
|
-
for (const fileName of files) {
|
|
127
|
-
const filePath = path.join(basePath, fileName);
|
|
128
|
-
try {
|
|
129
|
-
const fileContent = fs.readFileSync(filePath, "utf-8");
|
|
130
|
-
const compteRendu = JSON.parse(fileContent);
|
|
131
|
-
if (!compteRendu?.uid) {
|
|
132
|
-
console.warn(`[SN] CR without uid → ${fileName}`);
|
|
133
|
-
continue;
|
|
134
|
-
}
|
|
135
|
-
yield { compteRendu, session };
|
|
136
|
-
}
|
|
137
|
-
catch (err) {
|
|
138
|
-
console.warn(`[SN] error reading CR → ${fileName}`, err);
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
export function* iterLoadSenatComptesRendusSeances(dataDir, session) {
|
|
143
|
-
yield* iterLoadSenatComptesRendusGeneric(dataDir, session, COMPTES_RENDUS_FOLDER);
|
|
144
|
-
}
|
|
145
|
-
export function* iterLoadSenatComptesRendusCommissions(dataDir, session) {
|
|
146
|
-
yield* iterLoadSenatComptesRendusGeneric(dataDir, session, COMMISSION_FOLDER);
|
|
147
|
-
}
|
|
148
|
-
export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}) {
|
|
149
|
-
for (const dossierLegislatifItem of iterLoadSenatItems(dataDir, datasets.dosleg.database, session, DOSLEG_DOSSIERS_FOLDER, options)) {
|
|
150
|
-
yield dossierLegislatifItem;
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
export function* iterLoadSenatRapportUrls(dataDir, session) {
|
|
154
|
-
let itemsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
155
|
-
if (session) {
|
|
156
|
-
itemsDir = path.join(itemsDir, session.toString());
|
|
157
|
-
}
|
|
158
|
-
for (const filePath of iterFilePaths(itemsDir)) {
|
|
159
|
-
const parsedFilePath = path.parse(filePath);
|
|
160
|
-
if (parsedFilePath.base === DOCUMENT_METADATA_FILE) {
|
|
161
|
-
const itemJson = fs.readFileSync(filePath, { encoding: "utf8" });
|
|
162
|
-
const item = JSON.parse(itemJson);
|
|
163
|
-
yield {
|
|
164
|
-
item,
|
|
165
|
-
};
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
export function* iterLoadSenatTexteUrls(dataDir, session) {
|
|
170
|
-
let itemsDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
171
|
-
if (session) {
|
|
172
|
-
itemsDir = path.join(itemsDir, session.toString());
|
|
173
|
-
}
|
|
174
|
-
for (const filePath of iterFilePaths(itemsDir)) {
|
|
175
|
-
const parsedFilePath = path.parse(filePath);
|
|
176
|
-
if (parsedFilePath.base === DOCUMENT_METADATA_FILE) {
|
|
177
|
-
const itemJson = fs.readFileSync(filePath, { encoding: "utf8" });
|
|
178
|
-
const item = JSON.parse(itemJson);
|
|
179
|
-
yield {
|
|
180
|
-
item,
|
|
181
|
-
};
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
export function* iterLoadSenatDocuments(dataDir, session, documentType, options = {}) {
|
|
186
|
-
for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(dataDir, session, options)) {
|
|
187
|
-
for (const lecture of dossierLegislatif["lectures"]) {
|
|
188
|
-
const lecturesSenat = lecture.lectures_assemblee.filter((lectureAssemblee) => lectureAssemblee.assemblee === "Sénat");
|
|
189
|
-
for (const lectureSenat of lecturesSenat) {
|
|
190
|
-
for (const document of lectureSenat[documentType]) {
|
|
191
|
-
const enrichedDocument = {
|
|
192
|
-
signet_dossier: dossierLegislatif["signet"],
|
|
193
|
-
...document,
|
|
194
|
-
};
|
|
195
|
-
const documentItem = {
|
|
196
|
-
item: enrichedDocument,
|
|
197
|
-
};
|
|
198
|
-
if (document.url) {
|
|
199
|
-
const documentName = path.parse(document.url).name;
|
|
200
|
-
documentItem.filePathFromDataset = path.join(`${document.session ?? UNDEFINED_SESSION}`, documentName, `${documentName}.pdf`);
|
|
201
|
-
}
|
|
202
|
-
yield documentItem;
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
export function* iterLoadSenatRapports(dataDir, session, options = {}) {
|
|
209
|
-
for (const iterItem of iterLoadSenatItems(dataDir, RAPPORT_FOLDER, session, "original", options)) {
|
|
210
|
-
if (iterItem.item?.["id"]) {
|
|
211
|
-
yield iterItem;
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
export function* iterLoadSenatTextes(dataDir, session, options = {}) {
|
|
216
|
-
for (const iterItem of iterLoadSenatDocuments(dataDir, session, "textes", options)) {
|
|
217
|
-
yield iterItem;
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
export function loadSenatTexteContent(dataDir, textePathFromDataset) {
|
|
221
|
-
const parsedTextePath = path.parse(textePathFromDataset);
|
|
222
|
-
const jsonTexteName = `${parsedTextePath.name}.json`;
|
|
223
|
-
const fullTextePath = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER, parsedTextePath.dir, jsonTexteName);
|
|
224
|
-
if (!fs.existsSync(fullTextePath)) {
|
|
225
|
-
return { item: null };
|
|
226
|
-
}
|
|
227
|
-
const texteJson = fs.readFileSync(fullTextePath, { encoding: "utf8" });
|
|
228
|
-
return { item: JSON.parse(texteJson) };
|
|
229
|
-
}
|
|
230
|
-
export function loadSenatCompteRenduContent(dataDir, session, debatId) {
|
|
231
|
-
const fullPath = path.join(dataDir, COMPTES_RENDUS_FOLDER, DATA_TRANSFORMED_FOLDER, String(session), `${debatId}.json`);
|
|
232
|
-
if (!fs.existsSync(fullPath)) {
|
|
233
|
-
return { item: null };
|
|
234
|
-
}
|
|
235
|
-
const json = fs.readFileSync(fullPath, { encoding: "utf8" });
|
|
236
|
-
return { item: JSON.parse(json) };
|
|
237
|
-
}
|
|
238
|
-
export function* iterLoadSenatAgendas(dataDir, session) {
|
|
239
|
-
const baseDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session ?? ""));
|
|
240
|
-
if (!fs.existsSync(baseDir))
|
|
241
|
-
return;
|
|
242
|
-
const files = (fs.readdirSync(baseDir) || []).filter((f) => f.startsWith("RUSN") && f.endsWith(".json")).sort();
|
|
243
|
-
for (const fileName of files) {
|
|
244
|
-
const filePath = path.join(baseDir, fileName);
|
|
245
|
-
let raw;
|
|
246
|
-
try {
|
|
247
|
-
raw = fsex.readJSONSync(filePath);
|
|
248
|
-
}
|
|
249
|
-
catch {
|
|
250
|
-
continue; // JSON invalide
|
|
251
|
-
}
|
|
252
|
-
if (!raw || typeof raw !== "object")
|
|
253
|
-
continue;
|
|
254
|
-
const gr = raw;
|
|
255
|
-
if (!gr.uid || !gr.date || !gr.titre)
|
|
256
|
-
continue;
|
|
257
|
-
if (!Array.isArray(gr.events))
|
|
258
|
-
gr.events = [];
|
|
259
|
-
yield { item: gr };
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
export function* iterLoadSenatCirconscriptions(dataDir, options = {}) {
|
|
263
|
-
for (const circonscriptionItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_CIRCONSCRIPTIONS_FOLDER, options)) {
|
|
264
|
-
yield circonscriptionItem;
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
export function* iterLoadSenatOrganismes(dataDir, options = {}) {
|
|
268
|
-
for (const organismeItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_ORGANISMES_FOLDER, options)) {
|
|
269
|
-
yield organismeItem;
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
export function* iterLoadSenatSenateurs(dataDir, options = {}) {
|
|
273
|
-
for (const senateurItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_SENATEURS_FOLDER, options)) {
|
|
274
|
-
yield senateurItem;
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
export function* iterLoadSenatQuestions(dataDir, legislature, options = {}) {
|
|
278
|
-
for (const questionItem of iterLoadSenatItems(dataDir, datasets.questions.database, legislature, undefined, options)) {
|
|
279
|
-
yield questionItem;
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
export function* iterLoadSenatScrutins(dataDir, session, options = {}) {
|
|
283
|
-
for (const scrutinItem of iterLoadSenatItems(dataDir, "scrutins", session, undefined, options)) {
|
|
284
|
-
yield scrutinItem;
|
|
285
|
-
}
|
|
286
|
-
}
|
package/lib/model/agenda.d.ts
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
import { AgendaEvent } from "../types/agenda";
|
|
2
|
-
export declare function getStartAndEndTimes(timeStr: string | null | undefined, dateISO: string): {
|
|
3
|
-
startTime: string | null;
|
|
4
|
-
endTime: string | null;
|
|
5
|
-
};
|
|
6
|
-
export declare function parseAgendaFromFile(htmlFilePath: string): Promise<AgendaEvent[] | null>;
|
package/lib/model/agenda.js
DELETED
|
@@ -1,148 +0,0 @@
|
|
|
1
|
-
import { JSDOM } from "jsdom";
|
|
2
|
-
import { DateTime } from "luxon";
|
|
3
|
-
import path from "path";
|
|
4
|
-
import { ID_DATE_FORMAT, STANDARD_DATE_FORMAT } from "../scripts/datautil";
|
|
5
|
-
const FR_TZ = "Europe/Paris";
|
|
6
|
-
function eventIsSeance(eventElement) {
|
|
7
|
-
return eventElement.classList.contains("evt-seance");
|
|
8
|
-
}
|
|
9
|
-
function getEventType(eventClasses) {
|
|
10
|
-
const typeClass = [...eventClasses].find((className) => className.startsWith("evt-")) || null;
|
|
11
|
-
switch (typeClass) {
|
|
12
|
-
case "evt-seance":
|
|
13
|
-
return "Séance publique";
|
|
14
|
-
case "evt-instanz":
|
|
15
|
-
return "Commissions";
|
|
16
|
-
case "evt-cemi":
|
|
17
|
-
return "Mission de contrôle";
|
|
18
|
-
case "evt-deleg":
|
|
19
|
-
return "Offices et délégations";
|
|
20
|
-
case "evt-bureau":
|
|
21
|
-
return "Instances décisionnelles";
|
|
22
|
-
}
|
|
23
|
-
return null;
|
|
24
|
-
}
|
|
25
|
-
function getUrlDossierSenat(lienElements) {
|
|
26
|
-
const urlElement = [...lienElements].find((lienElement) => lienElement.textContent?.includes("dossier législatif"));
|
|
27
|
-
return urlElement ? urlElement.getAttribute("href") : null;
|
|
28
|
-
}
|
|
29
|
-
function getQuantieme(eventElement, seancesElements) {
|
|
30
|
-
const seanceIndex = seancesElements.indexOf(eventElement);
|
|
31
|
-
if (seancesElements.length === 1 && seanceIndex === 0) {
|
|
32
|
-
return "Unique";
|
|
33
|
-
}
|
|
34
|
-
else {
|
|
35
|
-
switch (seanceIndex) {
|
|
36
|
-
case 0:
|
|
37
|
-
return "Première";
|
|
38
|
-
case 1:
|
|
39
|
-
return "Deuxième";
|
|
40
|
-
case 2:
|
|
41
|
-
return "Troisième";
|
|
42
|
-
case 3:
|
|
43
|
-
return "Quatrième";
|
|
44
|
-
case 4:
|
|
45
|
-
return "Cinquième";
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
return "Non défini";
|
|
49
|
-
}
|
|
50
|
-
/**
|
|
51
|
-
* Normalize time string to become a simple start time ("H'h'mm") or a duration ("'de 'H'h'mm' à 'H'h'mm").
|
|
52
|
-
*/
|
|
53
|
-
function normalizeTime(timeStr) {
|
|
54
|
-
return timeStr
|
|
55
|
-
?.replace(/^À l'issue de l'espace réservé .* et au plus tard\s/i, "") // Must be processed first
|
|
56
|
-
?.replace(/^(?:le )?matin/i, "10h00") // We chose "matin" to mean 10h00
|
|
57
|
-
?.replace(/^(?:l')?après-midi/i, "16h00") // We chose "après-midi" to mean 16h00
|
|
58
|
-
?.replace(/^(?:le )?soir/i, "20h00") // We chose "soir" to mean 20h00
|
|
59
|
-
?.replace(/^(?:la )?nuit/i, "22h00") // We chose "nuit" to mean 22h00
|
|
60
|
-
?.replace(/^à\s/gi, "")
|
|
61
|
-
?.replace(/heures/gi, "h00")
|
|
62
|
-
?.replace(/\set.*/i, "")
|
|
63
|
-
?.replace(/,.*/, "")
|
|
64
|
-
?.replace(/\s\(hors hémicycle\)/i, "")
|
|
65
|
-
?.replace(/\s*h\s*/gi, "h");
|
|
66
|
-
}
|
|
67
|
-
export function getStartAndEndTimes(timeStr, dateISO) {
|
|
68
|
-
const normalizedTime = normalizeTime(timeStr);
|
|
69
|
-
if (!normalizedTime) {
|
|
70
|
-
return { startTime: null, endTime: null };
|
|
71
|
-
}
|
|
72
|
-
const rangeMatch = normalizedTime.match(/^de (?<start>\d{1,2}h\d{2}) à (?<end>\d{1,2}h\d{2})$/i);
|
|
73
|
-
const toUtcTimeOnly = (value) => {
|
|
74
|
-
if (!value)
|
|
75
|
-
return null;
|
|
76
|
-
const time = DateTime.fromFormat(value, "H'h'mm", { zone: FR_TZ });
|
|
77
|
-
if (!time.isValid)
|
|
78
|
-
return null;
|
|
79
|
-
const local = DateTime.fromISO(dateISO, { zone: FR_TZ }).set({
|
|
80
|
-
hour: time.hour,
|
|
81
|
-
minute: time.minute,
|
|
82
|
-
second: 0,
|
|
83
|
-
millisecond: 0,
|
|
84
|
-
});
|
|
85
|
-
if (!local.isValid)
|
|
86
|
-
return null;
|
|
87
|
-
return local.toUTC().toFormat("HH:mm:ss.SSS'Z'");
|
|
88
|
-
};
|
|
89
|
-
if (rangeMatch?.groups) {
|
|
90
|
-
const { start, end } = rangeMatch.groups;
|
|
91
|
-
return {
|
|
92
|
-
startTime: toUtcTimeOnly(start),
|
|
93
|
-
endTime: toUtcTimeOnly(end),
|
|
94
|
-
};
|
|
95
|
-
}
|
|
96
|
-
return {
|
|
97
|
-
startTime: toUtcTimeOnly(normalizedTime),
|
|
98
|
-
endTime: null,
|
|
99
|
-
};
|
|
100
|
-
}
|
|
101
|
-
function transformAgenda(document, fileName) {
|
|
102
|
-
const agendaEvents = [];
|
|
103
|
-
const eventElements = document.querySelectorAll(".evt");
|
|
104
|
-
const seanceElements = Array.from(eventElements).filter((eventElement) => eventIsSeance(eventElement));
|
|
105
|
-
for (const eventElement of eventElements) {
|
|
106
|
-
const id = eventElement.previousElementSibling?.getAttribute("name") || null;
|
|
107
|
-
if (!id) {
|
|
108
|
-
continue;
|
|
109
|
-
}
|
|
110
|
-
const type = getEventType(eventElement.classList);
|
|
111
|
-
const date = DateTime.fromFormat(fileName, ID_DATE_FORMAT).toFormat(STANDARD_DATE_FORMAT);
|
|
112
|
-
const timeOriginal = eventElement.querySelector(".time")?.textContent || null;
|
|
113
|
-
const { startTime, endTime } = getStartAndEndTimes(timeOriginal, date);
|
|
114
|
-
const titre = eventElement.querySelector(".titre")?.textContent?.trim() || "";
|
|
115
|
-
const organe = eventElement.querySelector(".organe")?.textContent?.trim() || null;
|
|
116
|
-
const objet = eventElement.querySelector(".objet")?.textContent?.trim()?.replace(/^- /, "") || null;
|
|
117
|
-
const lieu = eventElement.querySelector(".lieu")?.textContent || null;
|
|
118
|
-
const videoElement = eventElement.querySelector(".video");
|
|
119
|
-
const urlDossierSenat = getUrlDossierSenat(eventElement.querySelectorAll(".lien a"));
|
|
120
|
-
agendaEvents.push({
|
|
121
|
-
id,
|
|
122
|
-
type,
|
|
123
|
-
date,
|
|
124
|
-
startTime,
|
|
125
|
-
endTime,
|
|
126
|
-
timeOriginal,
|
|
127
|
-
titre,
|
|
128
|
-
organe,
|
|
129
|
-
objet,
|
|
130
|
-
lieu,
|
|
131
|
-
captationVideo: videoElement !== null,
|
|
132
|
-
urlDossierSenat: urlDossierSenat,
|
|
133
|
-
quantieme: eventIsSeance(eventElement) ? getQuantieme(eventElement, seanceElements) : null,
|
|
134
|
-
});
|
|
135
|
-
}
|
|
136
|
-
return agendaEvents;
|
|
137
|
-
}
|
|
138
|
-
export async function parseAgendaFromFile(htmlFilePath) {
|
|
139
|
-
try {
|
|
140
|
-
const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window;
|
|
141
|
-
const fileName = path.parse(htmlFilePath).name;
|
|
142
|
-
return transformAgenda(document, fileName);
|
|
143
|
-
}
|
|
144
|
-
catch (error) {
|
|
145
|
-
console.error(`Could not parse texte with error ${error}`);
|
|
146
|
-
}
|
|
147
|
-
return null;
|
|
148
|
-
}
|
package/lib/model/ameli.d.ts
DELETED
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
import { InferResult } from "kysely";
|
|
2
|
-
export type AmendementResult = InferResult<typeof findAllAmendementsQuery>[0];
|
|
3
|
-
declare const findAllAmendementsQuery: import("kysely").SelectQueryBuilder<{
|
|
4
|
-
[x: string]: any;
|
|
5
|
-
[x: number]: any;
|
|
6
|
-
[x: symbol]: any;
|
|
7
|
-
}, "ameli.amd" | "ameli.sub" | "ameli.typsub" | "ameli.typrect" | "ameli.txt_ameli" | "ameli.etatxt" | "ameli.ses" | "ameli.typses" | "ameli.nat" | "ameli.lec_ameli" | "dosleg.texte" | "dosleg.lecass" | "ameli.mot" | "ameli.avicom" | "ameli.avigvt" | "ameli.sor" | "ameli.irr" | "ameli.grppol_ameli" | "ameli.com_ameli" | "ameli.cab", {
|
|
8
|
-
[x: string]: any;
|
|
9
|
-
nature: string;
|
|
10
|
-
date_depot: string;
|
|
11
|
-
etat: string;
|
|
12
|
-
sort: any;
|
|
13
|
-
url: string;
|
|
14
|
-
auteur_est_gouvernement: boolean;
|
|
15
|
-
scrutin_num: any;
|
|
16
|
-
auteurs: {
|
|
17
|
-
prenom: any;
|
|
18
|
-
homonyme: any;
|
|
19
|
-
nom: any;
|
|
20
|
-
qualite: any;
|
|
21
|
-
rang: any;
|
|
22
|
-
matricule: any;
|
|
23
|
-
groupe_politique_id: any;
|
|
24
|
-
group_politique_code: any;
|
|
25
|
-
groupe_politique_libelle_court: any;
|
|
26
|
-
groupe_politique_libelle: any;
|
|
27
|
-
}[];
|
|
28
|
-
}>;
|
|
29
|
-
export declare function findAllAmendements(fromSession?: number): AsyncIterableIterator<{
|
|
30
|
-
[x: string]: any;
|
|
31
|
-
nature: string;
|
|
32
|
-
date_depot: string;
|
|
33
|
-
etat: string;
|
|
34
|
-
sort: any;
|
|
35
|
-
url: string;
|
|
36
|
-
auteur_est_gouvernement: boolean;
|
|
37
|
-
scrutin_num: any;
|
|
38
|
-
auteurs: {
|
|
39
|
-
prenom: any;
|
|
40
|
-
homonyme: any;
|
|
41
|
-
nom: any;
|
|
42
|
-
qualite: any;
|
|
43
|
-
rang: any;
|
|
44
|
-
matricule: any;
|
|
45
|
-
groupe_politique_id: any;
|
|
46
|
-
group_politique_code: any;
|
|
47
|
-
groupe_politique_libelle_court: any;
|
|
48
|
-
groupe_politique_libelle: any;
|
|
49
|
-
}[];
|
|
50
|
-
}>;
|
|
51
|
-
export {};
|