@tricoteuses/senat 2.5.8 → 2.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -5
- package/lib/databases.d.ts +2 -11
- package/lib/databases.js +5 -29
- package/lib/index.d.ts +1 -0
- package/lib/loaders.d.ts +4 -1
- package/lib/loaders.js +7 -11
- package/lib/model/agenda.js +2 -1
- package/lib/model/ameli.d.ts +4 -6
- package/lib/model/ameli.js +2 -2
- package/lib/model/compte_rendu.d.ts +3 -0
- package/lib/model/compte_rendu.js +32 -0
- package/lib/model/debats.d.ts +81 -0
- package/lib/model/debats.js +81 -1
- package/lib/model/dosleg.d.ts +27 -14
- package/lib/model/dosleg.js +20 -2
- package/lib/model/index.d.ts +2 -1
- package/lib/model/index.js +2 -1
- package/lib/model/questions.d.ts +6 -8
- package/lib/model/questions.js +0 -2
- package/lib/model/sens.d.ts +11 -13
- package/lib/model/sens.js +7 -10
- package/lib/model/texte.js +20 -8
- package/lib/model/util.d.ts +1 -1
- package/lib/model/util.js +3 -2
- package/lib/scripts/convert_data.js +50 -20
- package/lib/scripts/datautil.d.ts +6 -2
- package/lib/scripts/datautil.js +22 -5
- package/lib/scripts/retrieve_agenda.js +46 -29
- package/lib/scripts/retrieve_comptes_rendus.d.ts +1 -0
- package/lib/scripts/retrieve_comptes_rendus.js +106 -0
- package/lib/scripts/retrieve_documents.js +74 -67
- package/lib/types/compte_rendu.d.ts +11 -0
- package/lib/types/compte_rendu.js +1 -0
- package/package.json +2 -2
package/lib/model/questions.d.ts
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import { InferResult } from "kysely";
|
|
2
|
+
export type QuestionResult = InferResult<typeof findAllQuery>[0];
|
|
2
3
|
declare const findAllQuery: import("kysely").SelectQueryBuilder<{
|
|
3
4
|
the: import("../raw_types/questions").The;
|
|
4
|
-
|
|
5
|
+
tam_questions: import("../raw_types/questions").TamQuestions;
|
|
5
6
|
etatquestion: import("kysely").Nullable<import("../raw_types/questions").Etatquestion>;
|
|
6
7
|
legquestion: import("../raw_types/questions").Legquestion;
|
|
7
8
|
naturequestion: import("kysely").Nullable<import("../raw_types/questions").Naturequestion>;
|
|
8
9
|
sortquestion: import("kysely").Nullable<import("../raw_types/questions").Sortquestion>;
|
|
9
10
|
tam_ministeres: import("kysely").Nullable<import("../raw_types/questions").TamMinisteres>;
|
|
10
|
-
|
|
11
|
-
}, "
|
|
11
|
+
tam_reponses: import("../raw_types/questions").TamReponses;
|
|
12
|
+
}, "tam_questions" | "etatquestion" | "naturequestion" | "sortquestion" | "tam_ministeres", {
|
|
12
13
|
date_publication_JO: string;
|
|
13
|
-
date_reponse_JO: string;
|
|
14
14
|
republique: string | null;
|
|
15
15
|
legislature: string | null;
|
|
16
16
|
sort: string | null;
|
|
@@ -29,13 +29,13 @@ declare const findAllQuery: import("kysely").SelectQueryBuilder<{
|
|
|
29
29
|
ministere_depot_date_debut: string;
|
|
30
30
|
date_transmission: string;
|
|
31
31
|
ministere_transmission: string | null;
|
|
32
|
+
date_reponse_JO: string;
|
|
32
33
|
ministere_reponse: string | null;
|
|
33
34
|
date_cloture: string;
|
|
34
35
|
reference_question_rappelee: string | null;
|
|
35
36
|
texte: string | null;
|
|
36
37
|
texte_erratum: string | null;
|
|
37
38
|
rubrique: string | null;
|
|
38
|
-
} & {
|
|
39
39
|
themes: {
|
|
40
40
|
libelle: string;
|
|
41
41
|
}[];
|
|
@@ -46,10 +46,8 @@ declare const findAllQuery: import("kysely").SelectQueryBuilder<{
|
|
|
46
46
|
texte_erratum: string | null;
|
|
47
47
|
}[];
|
|
48
48
|
}>;
|
|
49
|
-
export type QuestionResult = InferResult<typeof findAllQuery>[0];
|
|
50
49
|
export declare function findAll(): AsyncIterableIterator<{
|
|
51
50
|
date_publication_JO: string;
|
|
52
|
-
date_reponse_JO: string;
|
|
53
51
|
republique: string | null;
|
|
54
52
|
legislature: string | null;
|
|
55
53
|
sort: string | null;
|
|
@@ -68,13 +66,13 @@ export declare function findAll(): AsyncIterableIterator<{
|
|
|
68
66
|
ministere_depot_date_debut: string;
|
|
69
67
|
date_transmission: string;
|
|
70
68
|
ministere_transmission: string | null;
|
|
69
|
+
date_reponse_JO: string;
|
|
71
70
|
ministere_reponse: string | null;
|
|
72
71
|
date_cloture: string;
|
|
73
72
|
reference_question_rappelee: string | null;
|
|
74
73
|
texte: string | null;
|
|
75
74
|
texte_erratum: string | null;
|
|
76
75
|
rubrique: string | null;
|
|
77
|
-
} & {
|
|
78
76
|
themes: {
|
|
79
77
|
libelle: string;
|
|
80
78
|
}[];
|
package/lib/model/questions.js
CHANGED
|
@@ -63,8 +63,6 @@ const findAllQuery = dbQuestions
|
|
|
63
63
|
"tam_questions.txtque as texte",
|
|
64
64
|
"tam_questions.txtque as texte_erratum",
|
|
65
65
|
"tam_questions.rubrique as rubrique",
|
|
66
|
-
])
|
|
67
|
-
.select(({ ref }) => [
|
|
68
66
|
themes(ref("tam_questions.id")).as("themes"),
|
|
69
67
|
reponses(ref("tam_questions.id")).as("reponses"),
|
|
70
68
|
]);
|
package/lib/model/sens.d.ts
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
import { InferResult } from "kysely";
|
|
2
|
+
export type SenateurResult = InferResult<typeof findAllQuery>[0];
|
|
3
|
+
export type CirconscriptionResult = InferResult<typeof findAllCirconscriptionsQuery>[0];
|
|
4
|
+
export type OrganismeResult = InferResult<typeof findAllOrganismesQuery>[0];
|
|
2
5
|
declare const findAllQuery: import("kysely").SelectQueryBuilder<{
|
|
3
6
|
qua: import("../raw_types/sens").Qua;
|
|
4
7
|
bur: import("../raw_types/sens").Bur;
|
|
@@ -121,15 +124,14 @@ declare const findAllQuery: import("kysely").SelectQueryBuilder<{
|
|
|
121
124
|
description_profession: string | null;
|
|
122
125
|
siege: string | null;
|
|
123
126
|
url_hatvp: string | null;
|
|
124
|
-
} & {
|
|
125
127
|
urls: {
|
|
126
128
|
code_url: string;
|
|
127
129
|
url: string;
|
|
128
130
|
}[];
|
|
129
131
|
mandats_senateur: {
|
|
132
|
+
code_circonscription: string;
|
|
130
133
|
date_debut: string;
|
|
131
134
|
date_fin: string;
|
|
132
|
-
code_circonscription: string;
|
|
133
135
|
etat: string | null;
|
|
134
136
|
etat_debut: string | null;
|
|
135
137
|
etat_fin: string | null;
|
|
@@ -282,13 +284,13 @@ declare const findAllCirconscriptionsQuery: import("kysely").SelectQueryBuilder<
|
|
|
282
284
|
typvoi: import("../raw_types/sens").Typvoi;
|
|
283
285
|
zongeo: import("../raw_types/sens").Zongeo;
|
|
284
286
|
}, "dpt" | "reg", {
|
|
285
|
-
code: string;
|
|
286
|
-
libelle_departement: string;
|
|
287
|
-
libelle_region: string | null;
|
|
288
287
|
date_debut: string;
|
|
289
288
|
date_fin: string;
|
|
290
289
|
identifiant: string;
|
|
290
|
+
code: string;
|
|
291
|
+
libelle_departement: string;
|
|
291
292
|
article: string | null;
|
|
293
|
+
libelle_region: string | null;
|
|
292
294
|
etat: string | null;
|
|
293
295
|
nombre_senateurs: string | null;
|
|
294
296
|
url: string | null;
|
|
@@ -415,9 +417,6 @@ declare const findAllOrganismesQuery: import("kysely").SelectQueryBuilder<{
|
|
|
415
417
|
type_libelle: string;
|
|
416
418
|
etat: string | null;
|
|
417
419
|
}>;
|
|
418
|
-
export type SenateurResult = InferResult<typeof findAllQuery>[0];
|
|
419
|
-
export type CirconscriptionResult = InferResult<typeof findAllCirconscriptionsQuery>[0];
|
|
420
|
-
export type OrganismeResult = InferResult<typeof findAllOrganismesQuery>[0];
|
|
421
420
|
export declare function findAll(): AsyncIterableIterator<{
|
|
422
421
|
matricule: string;
|
|
423
422
|
qualite: string;
|
|
@@ -441,15 +440,14 @@ export declare function findAll(): AsyncIterableIterator<{
|
|
|
441
440
|
description_profession: string | null;
|
|
442
441
|
siege: string | null;
|
|
443
442
|
url_hatvp: string | null;
|
|
444
|
-
} & {
|
|
445
443
|
urls: {
|
|
446
444
|
code_url: string;
|
|
447
445
|
url: string;
|
|
448
446
|
}[];
|
|
449
447
|
mandats_senateur: {
|
|
448
|
+
code_circonscription: string;
|
|
450
449
|
date_debut: string;
|
|
451
450
|
date_fin: string;
|
|
452
|
-
code_circonscription: string;
|
|
453
451
|
etat: string | null;
|
|
454
452
|
etat_debut: string | null;
|
|
455
453
|
etat_fin: string | null;
|
|
@@ -503,13 +501,13 @@ export declare function findAll(): AsyncIterableIterator<{
|
|
|
503
501
|
}[];
|
|
504
502
|
}>;
|
|
505
503
|
export declare function findAllCirconscriptions(): AsyncIterableIterator<{
|
|
506
|
-
code: string;
|
|
507
|
-
libelle_departement: string;
|
|
508
|
-
libelle_region: string | null;
|
|
509
504
|
date_debut: string;
|
|
510
505
|
date_fin: string;
|
|
511
506
|
identifiant: string;
|
|
507
|
+
code: string;
|
|
508
|
+
libelle_departement: string;
|
|
512
509
|
article: string | null;
|
|
510
|
+
libelle_region: string | null;
|
|
513
511
|
etat: string | null;
|
|
514
512
|
nombre_senateurs: string | null;
|
|
515
513
|
url: string | null;
|
package/lib/model/sens.js
CHANGED
|
@@ -16,7 +16,9 @@ function textes(actId) {
|
|
|
16
16
|
return jsonArrayFrom(dbSens
|
|
17
17
|
.selectFrom("activite_loi")
|
|
18
18
|
.where("actid", "=", actId)
|
|
19
|
-
.select(({ ref }) => [
|
|
19
|
+
.select(({ ref }) => [
|
|
20
|
+
rtrim(ref("loicod")).as("loicod"),
|
|
21
|
+
]));
|
|
20
22
|
}
|
|
21
23
|
function delegations(actId) {
|
|
22
24
|
return jsonArrayFrom(dbSens
|
|
@@ -157,8 +159,6 @@ function mandatsMembreCommission(senMat) {
|
|
|
157
159
|
toDateString(ref("memcom.memcomdatdeb")).as("date_debut"),
|
|
158
160
|
toDateString(ref("memcom.memcomdatfin")).as("date_fin"),
|
|
159
161
|
"memcom.temvalcod as etat",
|
|
160
|
-
])
|
|
161
|
-
.select(({ ref }) => [
|
|
162
162
|
fonctionsMembreCommission(ref("memcom.memcomid")).as("fonctions"),
|
|
163
163
|
])
|
|
164
164
|
.orderBy("memcom.memcomdatdeb desc"));
|
|
@@ -190,8 +190,6 @@ function mandatsMembreDelegation(senMat) {
|
|
|
190
190
|
toDateString(ref("memdelega.memdelegadatdeb")).as("date_debut"),
|
|
191
191
|
toDateString(ref("memdelega.memdelegadatfin")).as("date_fin"),
|
|
192
192
|
"memdelega.temvalcod as etat",
|
|
193
|
-
])
|
|
194
|
-
.select(({ ref }) => [
|
|
195
193
|
fonctionsMembreDelegation(ref("memdelega.memdelegaid")).as("fonctions"),
|
|
196
194
|
])
|
|
197
195
|
.orderBy("memdelega.memdelegadatdeb desc"));
|
|
@@ -231,8 +229,6 @@ function mandatsMembreGroupePolitique(senMat) {
|
|
|
231
229
|
toDateString(ref("memgrppol.memgrppoldatdeb")).as("date_debut"),
|
|
232
230
|
toDateString(ref("memgrppol.memgrppoldatfin")).as("date_fin"),
|
|
233
231
|
"memgrppol.temvalcod as etat",
|
|
234
|
-
])
|
|
235
|
-
.select(({ ref }) => [
|
|
236
232
|
fonctionsMembreGroupePolitique(ref("memgrppol.memgrppolid")).as("fonctions"),
|
|
237
233
|
])
|
|
238
234
|
.orderBy("memgrppol.memgrppoldatdeb desc"));
|
|
@@ -293,7 +289,10 @@ function urls(senMat) {
|
|
|
293
289
|
return jsonArrayFrom(dbSens
|
|
294
290
|
.selectFrom("senurl")
|
|
295
291
|
.where("senurl.senmat", "=", senMat)
|
|
296
|
-
.select([
|
|
292
|
+
.select([
|
|
293
|
+
"senurl.typurlcod as code_url",
|
|
294
|
+
"senurl.senurlurl as url",
|
|
295
|
+
])
|
|
297
296
|
.orderBy("senurl.senurlnumtri asc"));
|
|
298
297
|
}
|
|
299
298
|
const findAllQuery = dbSens
|
|
@@ -328,8 +327,6 @@ const findAllQuery = dbSens
|
|
|
328
327
|
"sen.sendespro as description_profession",
|
|
329
328
|
"sen.sennumsie as siege",
|
|
330
329
|
"sen.sendaiurl as url_hatvp",
|
|
331
|
-
])
|
|
332
|
-
.select(({ ref }) => [
|
|
333
330
|
urls(ref("sen.senmat")).as("urls"),
|
|
334
331
|
mandatsEluSenateur(ref("sen.senmat")).as("mandats_senateur"),
|
|
335
332
|
mandatsMembreCommission(ref("sen.senmat")).as("commissions"),
|
package/lib/model/texte.js
CHANGED
|
@@ -195,10 +195,10 @@ export function transformExposeDesMotifs(document) {
|
|
|
195
195
|
continue;
|
|
196
196
|
}
|
|
197
197
|
else {
|
|
198
|
-
|
|
198
|
+
secondParagraph.remove();
|
|
199
199
|
}
|
|
200
200
|
}
|
|
201
|
-
|
|
201
|
+
firstParagraph.remove();
|
|
202
202
|
return {
|
|
203
203
|
text: sectionElement.textContent?.trim() ?? null,
|
|
204
204
|
html: sectionElement.innerHTML?.trim() ?? null,
|
|
@@ -231,14 +231,26 @@ export async function parseTexteFromFile(xmlFilePath) {
|
|
|
231
231
|
return null;
|
|
232
232
|
}
|
|
233
233
|
export function parseExposeDesMotifs(exposeDesMotifsHtml) {
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
234
|
+
try {
|
|
235
|
+
const { document } = new JSDOM(exposeDesMotifsHtml, {
|
|
236
|
+
contentType: "text/html",
|
|
237
|
+
}).window;
|
|
238
|
+
return transformExposeDesMotifs(document);
|
|
239
|
+
}
|
|
240
|
+
catch (error) {
|
|
241
|
+
console.error(`Could not parse exposé des motifs with error ${error}`);
|
|
242
|
+
}
|
|
243
|
+
return null;
|
|
238
244
|
}
|
|
239
245
|
// Prevent from memory leak
|
|
240
246
|
// https://github.com/jsdom/jsdom/issues/2583#issuecomment-559520814
|
|
241
247
|
export async function parseExposeDesMotifsFromFile(htmlFilePath) {
|
|
242
|
-
|
|
243
|
-
|
|
248
|
+
try {
|
|
249
|
+
const { document } = (await JSDOM.fromFile(htmlFilePath, { contentType: "text/html" })).window;
|
|
250
|
+
return transformExposeDesMotifs(document);
|
|
251
|
+
}
|
|
252
|
+
catch (error) {
|
|
253
|
+
console.error(`Could not parse exposé des motifs with error ${error}`);
|
|
254
|
+
}
|
|
255
|
+
return null;
|
|
244
256
|
}
|
package/lib/model/util.d.ts
CHANGED
|
@@ -5,4 +5,4 @@ export declare function nullIf(expr: Expression<number | string | null | undefin
|
|
|
5
5
|
export declare function removeSubstring(expr: Expression<string | null | undefined>, pattern: Expression<string>): import("kysely").RawBuilder<string>;
|
|
6
6
|
export declare function replace(expr: Expression<string | null | undefined>, pattern: Expression<string>, replacement: Expression<string>): import("kysely").RawBuilder<string>;
|
|
7
7
|
export declare function rtrim(expr: Expression<string | null | undefined>): import("kysely").RawBuilder<string>;
|
|
8
|
-
export declare function toDateString(expr: Expression<Date | null | undefined>): import("kysely").RawBuilder<string>;
|
|
8
|
+
export declare function toDateString(expr: Expression<Date | null | undefined>, format?: Expression<string>): import("kysely").RawBuilder<string>;
|
package/lib/model/util.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { sql } from "kysely";
|
|
2
|
+
import { STANDARD_DATE_FORMAT } from "../scripts/datautil";
|
|
2
3
|
export function concat(...exprs) {
|
|
3
4
|
return sql.join(exprs, sql `||`).$castTo();
|
|
4
5
|
}
|
|
@@ -17,6 +18,6 @@ export function replace(expr, pattern, replacement) {
|
|
|
17
18
|
export function rtrim(expr) {
|
|
18
19
|
return sql `rtrim(${expr})`;
|
|
19
20
|
}
|
|
20
|
-
export function toDateString(expr) {
|
|
21
|
-
return sql `to_char(${expr},
|
|
21
|
+
export function toDateString(expr, format = sql.val(STANDARD_DATE_FORMAT)) {
|
|
22
|
+
return sql `to_char(${expr}, ${format})`;
|
|
22
23
|
}
|
|
@@ -3,11 +3,10 @@ import commandLineArgs from "command-line-args";
|
|
|
3
3
|
import fs from "fs-extra";
|
|
4
4
|
import path from "path";
|
|
5
5
|
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
|
|
6
|
-
import { DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER,
|
|
7
|
-
import { findAllAmendements, findAllCirconscriptions, findAllLois, findAllOrganismes, findAllQuestions, findAllSens, } from "../model";
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, } from "./datautil";
|
|
6
|
+
import { DATA_ORIGINAL_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
|
|
7
|
+
import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllLois, findAllOrganismes, findAllQuestions, findAllSens, findAuteur, findSenatRapportUrls, findSenatTexteUrls, } from "../model";
|
|
8
|
+
import { UNDEFINED_SESSION } from "../types/sessions";
|
|
9
|
+
import { getSessionFromDate, getSessionFromSignet } from "./datautil";
|
|
11
10
|
import { commonOptions } from "./shared/cli_helpers";
|
|
12
11
|
import { ensureAndClearDir } from "./shared/util";
|
|
13
12
|
const optionsDefinitions = [...commonOptions];
|
|
@@ -20,13 +19,15 @@ async function convertData() {
|
|
|
20
19
|
const dataDir = options["dataDir"];
|
|
21
20
|
assert(dataDir, "Missing argument: data directory");
|
|
22
21
|
const enabledDatasets = getEnabledDatasets(options["categories"]);
|
|
23
|
-
const sessions = getSessionsFromStart(options["fromSession"]);
|
|
24
22
|
console.time("data transformation time");
|
|
25
23
|
if (enabledDatasets & EnabledDatasets.Ameli) {
|
|
26
24
|
await convertDatasetAmeli(dataDir);
|
|
27
25
|
}
|
|
26
|
+
if (enabledDatasets & EnabledDatasets.Debats) {
|
|
27
|
+
await convertDatasetDebats(dataDir);
|
|
28
|
+
}
|
|
28
29
|
if (enabledDatasets & EnabledDatasets.DosLeg) {
|
|
29
|
-
await convertDatasetDosLeg(dataDir
|
|
30
|
+
await convertDatasetDosLeg(dataDir);
|
|
30
31
|
}
|
|
31
32
|
if (enabledDatasets & EnabledDatasets.Questions) {
|
|
32
33
|
await convertDatasetQuestions(dataDir);
|
|
@@ -58,7 +59,40 @@ async function convertDatasetAmeli(dataDir) {
|
|
|
58
59
|
fs.writeJSONSync(path.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 });
|
|
59
60
|
}
|
|
60
61
|
}
|
|
61
|
-
async function
|
|
62
|
+
async function convertDatasetDebats(dataDir) {
|
|
63
|
+
const dataset = datasets.debats;
|
|
64
|
+
if (!options["silent"]) {
|
|
65
|
+
console.log(`Converting database ${dataset.database} data into files…`);
|
|
66
|
+
}
|
|
67
|
+
const debatsReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
68
|
+
ensureAndClearDir(debatsReorganizedRootDir);
|
|
69
|
+
for await (const debat of findAllDebats()) {
|
|
70
|
+
if (options["verbose"]) {
|
|
71
|
+
console.log(`Converting ${debat.id} file…`);
|
|
72
|
+
}
|
|
73
|
+
const enrichedDebat = await enrichDebat(debat);
|
|
74
|
+
const session = getSessionFromDate(enrichedDebat.date_seance);
|
|
75
|
+
const debatsReorganizedDir = path.join(debatsReorganizedRootDir, String(session));
|
|
76
|
+
fs.ensureDirSync(debatsReorganizedDir);
|
|
77
|
+
const debatFileName = `${enrichedDebat.id}.json`;
|
|
78
|
+
fs.writeJSONSync(path.join(debatsReorganizedDir, debatFileName), enrichedDebat, { spaces: 2 });
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
async function enrichDebat(debat) {
|
|
82
|
+
const enrichedDebat = { ...debat };
|
|
83
|
+
for (const section of enrichedDebat.sections) {
|
|
84
|
+
for (const intervention of section.interventions) {
|
|
85
|
+
intervention.auteur = await findAuteur(intervention.auteur_code);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
for (const section of enrichedDebat.sections_divers) {
|
|
89
|
+
for (const intervention of section.interventions) {
|
|
90
|
+
intervention.auteur = await findAuteur(intervention.auteur_code);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return enrichedDebat;
|
|
94
|
+
}
|
|
95
|
+
async function convertDatasetDosLeg(dataDir) {
|
|
62
96
|
const dataset = datasets.dosleg;
|
|
63
97
|
if (!options["silent"]) {
|
|
64
98
|
console.log(`Converting database ${dataset.database} data into files…`);
|
|
@@ -72,20 +106,16 @@ async function convertDatasetDosLeg(dataDir, sessions) {
|
|
|
72
106
|
console.log(`Converting ${loi.signet} file…`);
|
|
73
107
|
}
|
|
74
108
|
let loiReorganizedDir = path.join(dossiersReorganizedDir, String(UNDEFINED_SESSION));
|
|
75
|
-
const
|
|
76
|
-
|
|
77
|
-
const { session } = signetParts;
|
|
78
|
-
const formattedSession = formatToFourDigitSession(session);
|
|
79
|
-
loiReorganizedDir = path.join(dossiersReorganizedDir, String(formattedSession));
|
|
80
|
-
}
|
|
109
|
+
const session = getSessionFromSignet(loi.signet) || UNDEFINED_SESSION;
|
|
110
|
+
loiReorganizedDir = path.join(dossiersReorganizedDir, String(session));
|
|
81
111
|
fs.ensureDirSync(loiReorganizedDir);
|
|
82
112
|
const loiFileName = `${loi.signet}.json`;
|
|
83
113
|
fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, {
|
|
84
114
|
spaces: 2,
|
|
85
115
|
});
|
|
86
116
|
}
|
|
87
|
-
await convertTexteUrls(dataDir
|
|
88
|
-
await convertRapportUrls(dataDir
|
|
117
|
+
await convertTexteUrls(dataDir);
|
|
118
|
+
await convertRapportUrls(dataDir);
|
|
89
119
|
}
|
|
90
120
|
async function convertDatasetQuestions(dataDir) {
|
|
91
121
|
const dataset = datasets.questions;
|
|
@@ -105,11 +135,11 @@ async function convertDatasetQuestions(dataDir) {
|
|
|
105
135
|
fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
|
|
106
136
|
}
|
|
107
137
|
}
|
|
108
|
-
async function convertTexteUrls(dataDir
|
|
138
|
+
async function convertTexteUrls(dataDir) {
|
|
109
139
|
const textesDir = path.join(dataDir, TEXTE_FOLDER);
|
|
110
140
|
fs.ensureDirSync(textesDir);
|
|
111
141
|
const originalTextesDir = path.join(textesDir, DATA_ORIGINAL_FOLDER);
|
|
112
|
-
for await (const texte of findSenatTexteUrls(
|
|
142
|
+
for await (const texte of findSenatTexteUrls()) {
|
|
113
143
|
const texteName = path.parse(texte.url).name;
|
|
114
144
|
const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName);
|
|
115
145
|
fs.ensureDirSync(texteDir);
|
|
@@ -128,11 +158,11 @@ async function convertTexteUrls(dataDir, sessions) {
|
|
|
128
158
|
});
|
|
129
159
|
}
|
|
130
160
|
}
|
|
131
|
-
async function convertRapportUrls(dataDir
|
|
161
|
+
async function convertRapportUrls(dataDir) {
|
|
132
162
|
const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
|
|
133
163
|
fs.ensureDirSync(rapportsDir);
|
|
134
164
|
const originalTextesDir = path.join(rapportsDir, DATA_ORIGINAL_FOLDER);
|
|
135
|
-
for await (const rapport of findSenatRapportUrls(
|
|
165
|
+
for await (const rapport of findSenatRapportUrls()) {
|
|
136
166
|
const parsedRapportUrl = path.parse(rapport.url);
|
|
137
167
|
const rapportName = parsedRapportUrl.name;
|
|
138
168
|
const rapportDir = path.join(originalTextesDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName);
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
|
|
1
|
+
import { Session } from "../types/sessions";
|
|
2
|
+
export declare const STANDARD_DATE_FORMAT = "yyyy-MM-dd";
|
|
3
|
+
export declare const ID_DATE_FORMAT = "yyyyMMdd";
|
|
2
4
|
export declare const AKN_IDENTIFICATION_STRUCTURE_REGEXP: RegExp;
|
|
3
5
|
export declare const AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP: RegExp;
|
|
4
|
-
export declare
|
|
6
|
+
export declare const SIGNET_STRUCTURE_REGEXP: RegExp;
|
|
7
|
+
export declare function getSessionFromSignet(signet: string): Session | null;
|
|
8
|
+
export declare function getSessionFromDate(date: string, format?: string): Session;
|
package/lib/scripts/datautil.js
CHANGED
|
@@ -1,17 +1,34 @@
|
|
|
1
1
|
import { DateTime, Settings } from "luxon";
|
|
2
|
-
import { UNDEFINED_SESSION } from "../types/sessions";
|
|
3
2
|
Settings.twoDigitCutoffYear = 50;
|
|
4
|
-
export const
|
|
3
|
+
export const STANDARD_DATE_FORMAT = "yyyy-MM-dd";
|
|
4
|
+
export const ID_DATE_FORMAT = "yyyyMMdd";
|
|
5
5
|
export const AKN_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{4}-\d{4})\/?(?<numTexte>\d+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/;
|
|
6
6
|
export const AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{2,4})\/?(?<numTexte>[a-zA-Z0-9]+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/;
|
|
7
|
-
export
|
|
7
|
+
export const SIGNET_STRUCTURE_REGEXP = /^(?<type>[a-z]+)(?<session>\d{2,4})-?(?<numTexte>\d+)?/;
|
|
8
|
+
export function getSessionFromSignet(signet) {
|
|
9
|
+
const signetParts = SIGNET_STRUCTURE_REGEXP.exec(signet)?.groups;
|
|
10
|
+
if (signetParts && "session" in signetParts) {
|
|
11
|
+
const { session } = signetParts;
|
|
12
|
+
return formatToFourDigitSession(session);
|
|
13
|
+
}
|
|
14
|
+
return null;
|
|
15
|
+
}
|
|
16
|
+
function formatToFourDigitSession(session) {
|
|
8
17
|
if (session.length >= 2) {
|
|
9
18
|
const sessionFirstTwoDigits = session.substring(0, 2);
|
|
10
19
|
const sessionLastTwoDigits = session.substring(session.length - 2);
|
|
11
20
|
const twoDigitSession = parseInt(sessionFirstTwoDigits) === parseInt(sessionLastTwoDigits) - 1
|
|
12
21
|
? sessionFirstTwoDigits
|
|
13
22
|
: sessionLastTwoDigits;
|
|
14
|
-
return DateTime.fromFormat(String(twoDigitSession), "yy").toFormat("yyyy");
|
|
23
|
+
return parseInt(DateTime.fromFormat(String(twoDigitSession), "yy").toFormat("yyyy"));
|
|
24
|
+
}
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
export function getSessionFromDate(date, format = STANDARD_DATE_FORMAT) {
|
|
28
|
+
const parsedDate = DateTime.fromFormat(date, format);
|
|
29
|
+
const endSessionDate = DateTime.fromObject({ year: parsedDate.year, month: 9, day: 30 });
|
|
30
|
+
if (parsedDate < endSessionDate) {
|
|
31
|
+
return parsedDate.year - 1;
|
|
15
32
|
}
|
|
16
|
-
return
|
|
33
|
+
return parsedDate.year;
|
|
17
34
|
}
|
|
@@ -6,6 +6,7 @@ import path from "path";
|
|
|
6
6
|
import { AGENDA_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
7
7
|
import { parseAgendaFromFile } from "../model/agenda";
|
|
8
8
|
import { getSessionsFromStart } from "../types/sessions";
|
|
9
|
+
import { ID_DATE_FORMAT } from "./datautil";
|
|
9
10
|
import { commonOptions } from "./shared/cli_helpers";
|
|
10
11
|
import { ensureAndClearDir } from "./shared/util";
|
|
11
12
|
const optionsDefinitions = [
|
|
@@ -18,6 +19,12 @@ const optionsDefinitions = [
|
|
|
18
19
|
];
|
|
19
20
|
const options = commandLineArgs(optionsDefinitions);
|
|
20
21
|
const SENAT_GLOBAL_AGENDA_URL_ROOT = "https://www.senat.fr/aglae/Global";
|
|
22
|
+
const EVENT_DATE_FORMAT = "ddMMyyyy";
|
|
23
|
+
class AgendaError extends Error {
|
|
24
|
+
constructor(message, agendaName) {
|
|
25
|
+
super(`An error occurred while retrieving Agenda ${agendaName}: ${message}`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
21
28
|
async function retrieveAgendas(dataDir, sessions) {
|
|
22
29
|
const agendaRootDir = path.join(dataDir, AGENDA_FOLDER);
|
|
23
30
|
ensureAndClearDir(agendaRootDir);
|
|
@@ -28,9 +35,6 @@ async function retrieveAgendas(dataDir, sessions) {
|
|
|
28
35
|
fs.ensureDirSync(transformedAgendaDir);
|
|
29
36
|
}
|
|
30
37
|
for (const session of sessions) {
|
|
31
|
-
if (!options["silent"]) {
|
|
32
|
-
console.log(`Retrieving Agenda for session ${session}…`);
|
|
33
|
-
}
|
|
34
38
|
const originalAgendaSessionDir = path.join(originalAgendaDir, `${session}`);
|
|
35
39
|
fs.ensureDirSync(originalAgendaSessionDir);
|
|
36
40
|
const transformedAgendaSessionDir = path.join(transformedAgendaDir, `${session}`);
|
|
@@ -39,40 +43,53 @@ async function retrieveAgendas(dataDir, sessions) {
|
|
|
39
43
|
}
|
|
40
44
|
const fifteenDaysFromNow = new Date();
|
|
41
45
|
fifteenDaysFromNow.setDate(fifteenDaysFromNow.getDate() + 15);
|
|
42
|
-
for (const date = new Date(session,
|
|
43
|
-
const agendaName = DateTime.fromJSDate(date).toFormat(
|
|
44
|
-
const agendaFileName = DateTime.fromJSDate(date).toFormat(
|
|
45
|
-
const agendaPath = path.join(originalAgendaSessionDir, agendaFileName);
|
|
46
|
+
for (const date = new Date(session, 9, 1); date <= new Date(session + 1, 8, 30) && date <= fifteenDaysFromNow; date.setDate(date.getDate() + 1)) {
|
|
47
|
+
const agendaName = DateTime.fromJSDate(date).toFormat(EVENT_DATE_FORMAT);
|
|
48
|
+
const agendaFileName = DateTime.fromJSDate(date).toFormat(ID_DATE_FORMAT);
|
|
49
|
+
const agendaPath = path.join(originalAgendaSessionDir, `${agendaFileName}.html`);
|
|
46
50
|
try {
|
|
47
|
-
|
|
48
|
-
if (
|
|
49
|
-
|
|
50
|
-
console.warn(`Agenda ${agendaName} not found`);
|
|
51
|
-
}
|
|
52
|
-
else {
|
|
53
|
-
console.error(`An error occurred while retrieving Agenda ${agendaName}: ${response.status}`);
|
|
54
|
-
}
|
|
55
|
-
return;
|
|
51
|
+
await downloadAgenda(agendaName, agendaPath);
|
|
52
|
+
if (options["parseAgenda"]) {
|
|
53
|
+
await parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPath);
|
|
56
54
|
}
|
|
57
|
-
const agendaContent = await response.arrayBuffer();
|
|
58
|
-
if (!agendaContent) {
|
|
59
|
-
return;
|
|
60
|
-
}
|
|
61
|
-
fs.writeFileSync(agendaPath, Buffer.from(agendaContent));
|
|
62
55
|
}
|
|
63
56
|
catch (error) {
|
|
64
|
-
console.error(error
|
|
65
|
-
}
|
|
66
|
-
if (options["parseAgenda"]) {
|
|
67
|
-
const parsedAgendaEvents = await parseAgendaFromFile(agendaPath);
|
|
68
|
-
if (!parsedAgendaEvents || parsedAgendaEvents.length === 0) {
|
|
69
|
-
continue;
|
|
70
|
-
}
|
|
71
|
-
fs.writeJSONSync(path.join(transformedAgendaSessionDir, `${agendaFileName}.json`), parsedAgendaEvents, { spaces: 2 });
|
|
57
|
+
console.error(error);
|
|
72
58
|
}
|
|
73
59
|
}
|
|
74
60
|
}
|
|
75
61
|
}
|
|
62
|
+
async function downloadAgenda(agendaName, agendaPath) {
|
|
63
|
+
const agendaUrl = `${SENAT_GLOBAL_AGENDA_URL_ROOT}/agl${agendaName}.html`;
|
|
64
|
+
if (!options["silent"]) {
|
|
65
|
+
console.log(`Downloading Agenda ${agendaUrl}…`);
|
|
66
|
+
}
|
|
67
|
+
const response = await fetch(agendaUrl);
|
|
68
|
+
if (!response.ok) {
|
|
69
|
+
if (response.status === 404) {
|
|
70
|
+
console.warn(`Agenda ${agendaUrl} not found`);
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
throw new AgendaError(String(response.status), agendaName);
|
|
74
|
+
}
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
const agendaContent = await response.arrayBuffer();
|
|
78
|
+
if (!agendaContent) {
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
fs.writeFileSync(agendaPath, Buffer.from(agendaContent));
|
|
82
|
+
}
|
|
83
|
+
async function parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPath) {
|
|
84
|
+
if (!options["silent"]) {
|
|
85
|
+
console.log(`Parsing Agenda ${agendaPath}…`);
|
|
86
|
+
}
|
|
87
|
+
const parsedAgendaEvents = await parseAgendaFromFile(agendaPath);
|
|
88
|
+
if (!parsedAgendaEvents || parsedAgendaEvents.length === 0) {
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
fs.writeJSONSync(path.join(transformedAgendaSessionDir, `${agendaFileName}.json`), parsedAgendaEvents, { spaces: 2 });
|
|
92
|
+
}
|
|
76
93
|
async function main() {
|
|
77
94
|
const dataDir = options["dataDir"];
|
|
78
95
|
assert(dataDir, "Missing argument: data directory");
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|