@tricoteuses/senat 2.20.29 → 2.20.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/loaders.d.ts +8 -46
- package/lib/loaders.js +8 -17
- package/lib/model/documents.d.ts +12 -2
- package/lib/model/documents.js +144 -26
- package/lib/model/dosleg.js +2 -118
- package/lib/model/index.d.ts +1 -1
- package/lib/model/index.js +1 -1
- package/lib/scripts/convert_data.js +31 -17
- package/lib/scripts/retrieve_documents.d.ts +2 -1
- package/lib/scripts/retrieve_documents.js +5 -3
- package/lib/scripts/test_iter_load.js +3 -9
- package/lib/types/texte.d.ts +9 -0
- package/package.json +1 -1
package/lib/loaders.d.ts
CHANGED
|
@@ -5,9 +5,11 @@ import { QuestionResult } from "./model/questions";
|
|
|
5
5
|
import { ScrutinResult } from "./model/scrutins";
|
|
6
6
|
import { CirconscriptionResult, OrganismeResult, SenateurResult } from "./model/sens";
|
|
7
7
|
import { Reunion } from "./types/agenda";
|
|
8
|
-
import { FlatTexte } from "./types/texte";
|
|
8
|
+
import { FlatTexte, DocumentMetadata } from "./types/texte";
|
|
9
9
|
import { CompteRendu } from "./types/compte_rendu";
|
|
10
|
+
import { DocumentResult } from "./model/documents";
|
|
10
11
|
export { EnabledDatasets } from "./datasets";
|
|
12
|
+
export type { DocumentResult } from "./model/documents";
|
|
11
13
|
export declare const AGENDA_FOLDER = "agenda";
|
|
12
14
|
export declare const COMPTES_RENDUS_FOLDER = "seances";
|
|
13
15
|
export declare const COMMISSION_FOLDER = "commissions";
|
|
@@ -27,46 +29,6 @@ export type IterItem<T> = {
|
|
|
27
29
|
legislature?: number;
|
|
28
30
|
gitStatus?: "A" | "M" | "D" | "R" | "C" | "T" | "U";
|
|
29
31
|
};
|
|
30
|
-
export interface TexteMetadata {
|
|
31
|
-
name: string;
|
|
32
|
-
session: number | null | undefined;
|
|
33
|
-
date?: string | null;
|
|
34
|
-
url_expose_des_motifs?: URL;
|
|
35
|
-
url_xml: URL;
|
|
36
|
-
url_html: URL;
|
|
37
|
-
url_pdf: URL;
|
|
38
|
-
}
|
|
39
|
-
export interface RapportMetadata {
|
|
40
|
-
name: string;
|
|
41
|
-
session: number | null | undefined;
|
|
42
|
-
date?: string | null;
|
|
43
|
-
url_html: URL;
|
|
44
|
-
url_pdf: URL;
|
|
45
|
-
}
|
|
46
|
-
export interface DossierLegislatifDocumentResult {
|
|
47
|
-
signet_dossier: string;
|
|
48
|
-
url_dossier_senat: string;
|
|
49
|
-
url_dossier_assemblee_nationale: string | null;
|
|
50
|
-
type_lecture: string;
|
|
51
|
-
libelle_lecture: string;
|
|
52
|
-
libelle_organisme: string | null;
|
|
53
|
-
code_organisme: string | null;
|
|
54
|
-
numero: number | null;
|
|
55
|
-
id: string | null;
|
|
56
|
-
url: string;
|
|
57
|
-
origine?: string | null | undefined;
|
|
58
|
-
type: string;
|
|
59
|
-
date: string;
|
|
60
|
-
session: number | null;
|
|
61
|
-
auteurs: {
|
|
62
|
-
prenom: string | null;
|
|
63
|
-
nom_usuel: string;
|
|
64
|
-
matricule: string | null;
|
|
65
|
-
}[];
|
|
66
|
-
organismes?: {
|
|
67
|
-
libelle: string;
|
|
68
|
-
}[] | undefined;
|
|
69
|
-
}
|
|
70
32
|
export declare function iterFilePaths(dirPath: string): Generator<string>;
|
|
71
33
|
export declare function iterLoadSenatAmendements(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AmendementResult>>;
|
|
72
34
|
export declare function iterLoadSenatDebats(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DebatResult>>;
|
|
@@ -79,11 +41,11 @@ export declare function iterLoadSenatComptesRendusCommissions(dataDir: string, s
|
|
|
79
41
|
session: number;
|
|
80
42
|
}>;
|
|
81
43
|
export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
|
|
82
|
-
export declare function
|
|
83
|
-
export declare function
|
|
84
|
-
export declare function
|
|
85
|
-
export declare function
|
|
86
|
-
export declare function
|
|
44
|
+
export declare function iterLoadSenatRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
|
|
45
|
+
export declare function iterLoadSenatTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
|
|
46
|
+
export declare function iterLoadSenatDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<DocumentResult>>;
|
|
47
|
+
export declare function iterLoadSenatRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
|
|
48
|
+
export declare function iterLoadSenatTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
|
|
87
49
|
export declare function loadSenatTexteContent(dataDir: string, textePathFromDataset: string): IterItem<FlatTexte | null>;
|
|
88
50
|
export declare function loadSenatCompteRenduContent(dataDir: string, session: number, debatId: string | number): {
|
|
89
51
|
item: CompteRendu | null;
|
package/lib/loaders.js
CHANGED
|
@@ -147,7 +147,7 @@ export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}
|
|
|
147
147
|
yield dossierLegislatifItem;
|
|
148
148
|
}
|
|
149
149
|
}
|
|
150
|
-
export function*
|
|
150
|
+
export function* iterLoadSenatRapportUrls(dataDir, session) {
|
|
151
151
|
let itemsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
152
152
|
if (session) {
|
|
153
153
|
itemsDir = path.join(itemsDir, session.toString());
|
|
@@ -163,7 +163,7 @@ export function* iterLoadSenatDossiersLegislatifsRapportUrls(dataDir, session) {
|
|
|
163
163
|
}
|
|
164
164
|
}
|
|
165
165
|
}
|
|
166
|
-
export function*
|
|
166
|
+
export function* iterLoadSenatTexteUrls(dataDir, session) {
|
|
167
167
|
let itemsDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
168
168
|
if (session) {
|
|
169
169
|
itemsDir = path.join(itemsDir, session.toString());
|
|
@@ -179,23 +179,14 @@ export function* iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session) {
|
|
|
179
179
|
}
|
|
180
180
|
}
|
|
181
181
|
}
|
|
182
|
-
export function*
|
|
182
|
+
export function* iterLoadSenatDocuments(dataDir, session, documentType, options = {}) {
|
|
183
183
|
for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(dataDir, session, options)) {
|
|
184
184
|
for (const lecture of dossierLegislatif["lectures"]) {
|
|
185
185
|
const lecturesSenat = lecture.lectures_assemblee.filter((lectureAssemblee) => lectureAssemblee.assemblee === "Sénat");
|
|
186
186
|
for (const lectureSenat of lecturesSenat) {
|
|
187
187
|
for (const document of lectureSenat[documentType]) {
|
|
188
|
-
const enrichedDocument = {
|
|
189
|
-
signet_dossier: dossierLegislatif["signet"],
|
|
190
|
-
url_dossier_senat: dossierLegislatif["url"],
|
|
191
|
-
url_dossier_assemblee_nationale: dossierLegislatif["url_dossier_assemblee_nationale"],
|
|
192
|
-
type_lecture: lecture.type_lecture,
|
|
193
|
-
libelle_lecture: lecture.libelle,
|
|
194
|
-
libelle_organisme: lectureSenat.libelle_organisme,
|
|
195
|
-
...document,
|
|
196
|
-
};
|
|
197
188
|
const documentItem = {
|
|
198
|
-
item:
|
|
189
|
+
item: document,
|
|
199
190
|
};
|
|
200
191
|
if (document.url) {
|
|
201
192
|
const documentName = path.parse(document.url).name;
|
|
@@ -207,13 +198,13 @@ export function* iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, doc
|
|
|
207
198
|
}
|
|
208
199
|
}
|
|
209
200
|
}
|
|
210
|
-
export function*
|
|
211
|
-
for (const iterItem of
|
|
201
|
+
export function* iterLoadSenatRapports(dataDir, session, options = {}) {
|
|
202
|
+
for (const iterItem of iterLoadSenatItems(dataDir, RAPPORT_FOLDER, session, "original", options)) {
|
|
212
203
|
yield iterItem;
|
|
213
204
|
}
|
|
214
205
|
}
|
|
215
|
-
export function*
|
|
216
|
-
for (const iterItem of
|
|
206
|
+
export function* iterLoadSenatTextes(dataDir, session, options = {}) {
|
|
207
|
+
for (const iterItem of iterLoadSenatDocuments(dataDir, session, "textes", options)) {
|
|
217
208
|
yield iterItem;
|
|
218
209
|
}
|
|
219
210
|
}
|
package/lib/model/documents.d.ts
CHANGED
|
@@ -1,2 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
export declare function
|
|
1
|
+
import { Expression, InferResult, SelectQueryBuilder } from "kysely";
|
|
2
|
+
export declare function rapports(lectureAssembleeId: Expression<string>): import("kysely").RawBuilder<{
|
|
3
|
+
[x: string]: any;
|
|
4
|
+
}[]>;
|
|
5
|
+
declare const queryTextes: SelectQueryBuilder<any, any, any>;
|
|
6
|
+
export declare function textes(lectureAssembleeId: Expression<string>): import("kysely").RawBuilder<{
|
|
7
|
+
[x: string]: any;
|
|
8
|
+
}[]>;
|
|
9
|
+
export declare function findAllTextes(): AsyncIterableIterator<DocumentResult>;
|
|
10
|
+
export declare function findAllRapports(): AsyncIterableIterator<DocumentResult>;
|
|
11
|
+
export type DocumentResult = InferResult<typeof queryTextes>[0];
|
|
12
|
+
export {};
|
package/lib/model/documents.js
CHANGED
|
@@ -1,33 +1,151 @@
|
|
|
1
|
+
import { sql } from "kysely";
|
|
1
2
|
import { dbSenat } from "../databases";
|
|
2
|
-
import { rtrim, toDateString } from "./util";
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
import { concat, rtrim, toDateString } from "./util";
|
|
4
|
+
import { jsonArrayFrom } from "kysely/helpers/postgres";
|
|
5
|
+
function orderOrdreOrigineTexte(expr) {
|
|
6
|
+
return sql `array_position(array['0','2','1'], ${expr})`;
|
|
7
|
+
}
|
|
8
|
+
function auteursRapport(rapportId) {
|
|
9
|
+
return jsonArrayFrom(dbSenat
|
|
5
10
|
.withSchema("dosleg")
|
|
6
|
-
.selectFrom("
|
|
7
|
-
.
|
|
8
|
-
.
|
|
9
|
-
|
|
10
|
-
.select(
|
|
11
|
-
"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
.selectFrom("dosleg.auteur")
|
|
12
|
+
.leftJoin("dosleg.ecr", "dosleg.ecr.autcod", "dosleg.auteur.autcod")
|
|
13
|
+
.leftJoin("dosleg.rolsig", "dosleg.rolsig.signataire", "dosleg.ecr.signataire")
|
|
14
|
+
.where("dosleg.ecr.rapcod", "=", rapportId)
|
|
15
|
+
.select([
|
|
16
|
+
"dosleg.auteur.prenom as prenom",
|
|
17
|
+
"dosleg.auteur.nomuse as nom_usuel",
|
|
18
|
+
"dosleg.auteur.autmat as matricule",
|
|
19
|
+
"dosleg.ecr.ecrnumtri as ordre",
|
|
20
|
+
"dosleg.rolsig.rolsiglib as role",
|
|
21
|
+
"dosleg.ecr.ecrqua as qualite",
|
|
15
22
|
])
|
|
16
|
-
|
|
17
|
-
|
|
23
|
+
.orderBy("dosleg.ecr.ecrnumtri", "asc"));
|
|
24
|
+
}
|
|
25
|
+
function documentsAttaches(rapportId) {
|
|
26
|
+
return jsonArrayFrom(dbSenat
|
|
27
|
+
.withSchema("dosleg")
|
|
28
|
+
.selectFrom("docatt")
|
|
29
|
+
.leftJoin("typatt", "docatt.typattcod", "typatt.typattcod")
|
|
30
|
+
.where("docatt.rapcod", "=", rapportId)
|
|
31
|
+
.select([
|
|
32
|
+
"docatt.docatturl as url",
|
|
33
|
+
"typatt.typattlib as type_document"
|
|
34
|
+
]));
|
|
35
|
+
}
|
|
36
|
+
function selectRapportAttributes({ eb, ref, val }) {
|
|
37
|
+
return [
|
|
38
|
+
"rap.rapnum as numero",
|
|
39
|
+
"raporg.orgcod as code_organisme",
|
|
40
|
+
eb
|
|
41
|
+
.case()
|
|
42
|
+
.when("rap.rapurl", "is not", null)
|
|
43
|
+
.then(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
|
|
44
|
+
.else(null)
|
|
45
|
+
.end()
|
|
46
|
+
.as("id"),
|
|
47
|
+
eb
|
|
48
|
+
.case()
|
|
49
|
+
.when("rap.typurl", "=", "I")
|
|
50
|
+
.then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
|
|
51
|
+
.else(rtrim(ref("rap.rapurl")))
|
|
52
|
+
.end()
|
|
53
|
+
.as("url"),
|
|
54
|
+
rtrim(ref("denrap.libdenrap")).as("type"),
|
|
55
|
+
rtrim(rtrim(ref("rap.raptil"))).as("titre"),
|
|
56
|
+
rtrim(rtrim(ref("rap.rapsoustit"))).as("sous_titre"),
|
|
57
|
+
toDateString(ref("rap.date_depot")).as("date"),
|
|
58
|
+
"rap.sesann as session",
|
|
59
|
+
auteursRapport(ref("rap.rapcod")).as("auteurs"),
|
|
60
|
+
documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
|
|
61
|
+
];
|
|
62
|
+
}
|
|
63
|
+
const baseQueryRapports = dbSenat
|
|
64
|
+
.withSchema("dosleg")
|
|
65
|
+
.selectFrom("rap")
|
|
66
|
+
.leftJoin("raporg", "raporg.rapcod", "rap.rapcod")
|
|
67
|
+
.leftJoin("denrap", "denrap.coddenrap", "rap.coddenrap")
|
|
68
|
+
.leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod");
|
|
69
|
+
const queryRapports = baseQueryRapports
|
|
70
|
+
.leftJoin("lecass", "lecass.lecassidt", "lecassrap.lecassidt")
|
|
71
|
+
.leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
|
|
72
|
+
.leftJoin("loi", "loi.loicod", "lecture.loicod")
|
|
73
|
+
.select((args) => [
|
|
74
|
+
"loi.signet as signet_dossier",
|
|
75
|
+
...selectRapportAttributes(args),
|
|
76
|
+
]);
|
|
77
|
+
export function rapports(lectureAssembleeId) {
|
|
78
|
+
return jsonArrayFrom(baseQueryRapports
|
|
79
|
+
.select(selectRapportAttributes)
|
|
80
|
+
.where("lecassrap.lecassidt", "=", lectureAssembleeId));
|
|
18
81
|
}
|
|
19
|
-
|
|
20
|
-
return dbSenat
|
|
82
|
+
function auteursTexte(texteId) {
|
|
83
|
+
return jsonArrayFrom(dbSenat
|
|
21
84
|
.withSchema("dosleg")
|
|
22
|
-
.selectFrom("
|
|
23
|
-
.
|
|
24
|
-
.
|
|
25
|
-
|
|
26
|
-
.select(
|
|
27
|
-
"
|
|
28
|
-
|
|
29
|
-
|
|
85
|
+
.selectFrom("auteur")
|
|
86
|
+
.leftJoin("ecr", "ecr.autcod", "auteur.autcod")
|
|
87
|
+
.leftJoin("rolsig", "rolsig.signataire", "ecr.signataire")
|
|
88
|
+
.where("ecr.texcod", "=", texteId)
|
|
89
|
+
.select([
|
|
90
|
+
"auteur.prenom as prenom",
|
|
91
|
+
"auteur.nomuse as nom_usuel",
|
|
92
|
+
"auteur.autmat as matricule",
|
|
93
|
+
"ecr.ecrnumtri as ordre",
|
|
94
|
+
"rolsig.rolsiglib as role",
|
|
95
|
+
"ecr.ecrqua as qualite",
|
|
30
96
|
])
|
|
31
|
-
|
|
32
|
-
|
|
97
|
+
.orderBy("ecr.ecrnumtri", "asc"));
|
|
98
|
+
}
|
|
99
|
+
function selectTexteAttributes({ eb, ref, val }) {
|
|
100
|
+
return [
|
|
101
|
+
"texte.texnum as numero",
|
|
102
|
+
"texte.orgcod as code_organisme",
|
|
103
|
+
eb
|
|
104
|
+
.case()
|
|
105
|
+
.when("texte.texurl", "is not", null)
|
|
106
|
+
.then(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
|
|
107
|
+
.else(null)
|
|
108
|
+
.end()
|
|
109
|
+
.as("id"),
|
|
110
|
+
eb
|
|
111
|
+
.case()
|
|
112
|
+
.when("texte.typurl", "=", "I")
|
|
113
|
+
.then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
|
|
114
|
+
.else(rtrim(ref("texte.texurl")))
|
|
115
|
+
.end()
|
|
116
|
+
.as("url"),
|
|
117
|
+
rtrim(ref("oritxt.oritxtlib")).as("origine"),
|
|
118
|
+
"oritxt.oriordre as ordre_origine",
|
|
119
|
+
"oritxt.oritxtado as code_adoption",
|
|
120
|
+
"oritxt.oritxtmod as modification",
|
|
121
|
+
rtrim(ref("typtxt.typtxtlib")).as("type"),
|
|
122
|
+
toDateString(ref("texte.txtoritxtdat")).as("date"),
|
|
123
|
+
"texte.sesann as session",
|
|
124
|
+
auteursTexte(ref("texte.texcod")).as("auteurs"),
|
|
125
|
+
];
|
|
126
|
+
}
|
|
127
|
+
const baseQueryTextes = dbSenat
|
|
128
|
+
.withSchema("dosleg")
|
|
129
|
+
.selectFrom("texte")
|
|
130
|
+
.leftJoin("oritxt", "oritxt.oritxtcod", "texte.oritxtcod")
|
|
131
|
+
.leftJoin("typtxt", "typtxt.typtxtcod", "texte.typtxtcod")
|
|
132
|
+
.orderBy(({ ref }) => orderOrdreOrigineTexte(ref("oritxt.oriordre")));
|
|
133
|
+
const queryTextes = baseQueryTextes
|
|
134
|
+
.leftJoin("lecass", "lecass.lecassidt", "texte.lecassidt")
|
|
135
|
+
.leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
|
|
136
|
+
.leftJoin("loi", "loi.loicod", "lecture.loicod")
|
|
137
|
+
.select((args) => [
|
|
138
|
+
"loi.signet as signet_dossier",
|
|
139
|
+
...selectTexteAttributes(args),
|
|
140
|
+
]);
|
|
141
|
+
export function textes(lectureAssembleeId) {
|
|
142
|
+
return jsonArrayFrom(baseQueryTextes
|
|
143
|
+
.select(selectTexteAttributes)
|
|
144
|
+
.where("texte.lecassidt", "=", lectureAssembleeId));
|
|
145
|
+
}
|
|
146
|
+
export function findAllTextes() {
|
|
147
|
+
return queryTextes.stream();
|
|
148
|
+
}
|
|
149
|
+
export function findAllRapports() {
|
|
150
|
+
return queryRapports.stream();
|
|
33
151
|
}
|
package/lib/model/dosleg.js
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
import { sql } from "kysely";
|
|
2
2
|
import { jsonArrayFrom } from "kysely/helpers/postgres";
|
|
3
3
|
import { dbSenat } from "../databases";
|
|
4
|
-
import { concat,
|
|
5
|
-
|
|
6
|
-
return sql `array_position(array['0','2','1'], ${expr})`;
|
|
7
|
-
}
|
|
4
|
+
import { concat, rtrim, toDateString } from "./util";
|
|
5
|
+
import { textes, rapports } from "./documents";
|
|
8
6
|
function datesSeances(lectureAssembleeId) {
|
|
9
7
|
return jsonArrayFrom(dbSenat
|
|
10
8
|
.withSchema("dosleg")
|
|
@@ -12,120 +10,6 @@ function datesSeances(lectureAssembleeId) {
|
|
|
12
10
|
.where("dosleg.date_seance.lecidt", "=", lectureAssembleeId)
|
|
13
11
|
.select(({ ref }) => [toDateString(ref("dosleg.date_seance.date_s")).as("date")]));
|
|
14
12
|
}
|
|
15
|
-
function auteursRapport(rapportId) {
|
|
16
|
-
return jsonArrayFrom(dbSenat
|
|
17
|
-
.withSchema("dosleg")
|
|
18
|
-
.selectFrom("dosleg.auteur")
|
|
19
|
-
.leftJoin("dosleg.ecr", "dosleg.ecr.autcod", "dosleg.auteur.autcod")
|
|
20
|
-
.leftJoin("dosleg.rolsig", "dosleg.rolsig.signataire", "dosleg.ecr.signataire")
|
|
21
|
-
.where("dosleg.ecr.rapcod", "=", rapportId)
|
|
22
|
-
.select([
|
|
23
|
-
"dosleg.auteur.prenom as prenom",
|
|
24
|
-
"dosleg.auteur.nomuse as nom_usuel",
|
|
25
|
-
"dosleg.auteur.autmat as matricule",
|
|
26
|
-
"dosleg.ecr.ecrnumtri as ordre",
|
|
27
|
-
"dosleg.rolsig.rolsiglib as role",
|
|
28
|
-
"dosleg.ecr.ecrqua as qualite",
|
|
29
|
-
])
|
|
30
|
-
.orderBy("dosleg.ecr.ecrnumtri", "asc"));
|
|
31
|
-
}
|
|
32
|
-
function documentsAttaches(rapportId) {
|
|
33
|
-
return jsonArrayFrom(dbSenat
|
|
34
|
-
.withSchema("dosleg")
|
|
35
|
-
.selectFrom("docatt")
|
|
36
|
-
.leftJoin("typatt", "docatt.typattcod", "typatt.typattcod")
|
|
37
|
-
.where("docatt.rapcod", "=", rapportId)
|
|
38
|
-
.select([
|
|
39
|
-
"docatt.docatturl as url",
|
|
40
|
-
"typatt.typattlib as type_document"
|
|
41
|
-
]));
|
|
42
|
-
}
|
|
43
|
-
function rapports(lectureAssembleeId) {
|
|
44
|
-
return jsonArrayFrom(dbSenat
|
|
45
|
-
.withSchema("dosleg")
|
|
46
|
-
.selectFrom("rap")
|
|
47
|
-
.leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod")
|
|
48
|
-
.leftJoin("raporg", "raporg.rapcod", "rap.rapcod")
|
|
49
|
-
.leftJoin("denrap", "denrap.coddenrap", "rap.coddenrap")
|
|
50
|
-
.where("lecassrap.lecassidt", "=", lectureAssembleeId)
|
|
51
|
-
.select(({ eb, ref, val }) => [
|
|
52
|
-
"rap.rapnum as numero",
|
|
53
|
-
"raporg.orgcod as code_organisme",
|
|
54
|
-
eb
|
|
55
|
-
.case()
|
|
56
|
-
.when("rap.typurl", "=", "I")
|
|
57
|
-
.then(removeSubstring(ref("rap.rapurl"), val(".html")))
|
|
58
|
-
.else(null)
|
|
59
|
-
.end()
|
|
60
|
-
.as("id"),
|
|
61
|
-
eb
|
|
62
|
-
.case()
|
|
63
|
-
.when("rap.typurl", "=", "I")
|
|
64
|
-
.then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
|
|
65
|
-
.else(rtrim(ref("rap.rapurl")))
|
|
66
|
-
.end()
|
|
67
|
-
.as("url"),
|
|
68
|
-
rtrim(ref("denrap.libdenrap")).as("type"),
|
|
69
|
-
rtrim(ref("rap.raptil")).as("titre"),
|
|
70
|
-
rtrim(ref("rap.rapsoustit")).as("sous_titre"),
|
|
71
|
-
toDateString(ref("rap.date_depot")).as("date"),
|
|
72
|
-
"sesann as session",
|
|
73
|
-
auteursRapport(ref("rap.rapcod")).as("auteurs"),
|
|
74
|
-
documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
|
|
75
|
-
]));
|
|
76
|
-
}
|
|
77
|
-
function auteursTexte(texteId) {
|
|
78
|
-
return jsonArrayFrom(dbSenat
|
|
79
|
-
.withSchema("dosleg")
|
|
80
|
-
.selectFrom("auteur")
|
|
81
|
-
.leftJoin("ecr", "ecr.autcod", "auteur.autcod")
|
|
82
|
-
.leftJoin("rolsig", "rolsig.signataire", "ecr.signataire")
|
|
83
|
-
.where("ecr.texcod", "=", texteId)
|
|
84
|
-
.select([
|
|
85
|
-
"auteur.prenom as prenom",
|
|
86
|
-
"auteur.nomuse as nom_usuel",
|
|
87
|
-
"auteur.autmat as matricule",
|
|
88
|
-
"ecr.ecrnumtri as ordre",
|
|
89
|
-
"rolsig.rolsiglib as role",
|
|
90
|
-
"ecr.ecrqua as qualite",
|
|
91
|
-
])
|
|
92
|
-
.orderBy("ecr.ecrnumtri", "asc"));
|
|
93
|
-
}
|
|
94
|
-
function textes(lectureAssembleeId) {
|
|
95
|
-
return jsonArrayFrom(dbSenat
|
|
96
|
-
.withSchema("dosleg")
|
|
97
|
-
.selectFrom("texte")
|
|
98
|
-
.leftJoin("oritxt", "oritxt.oritxtcod", "texte.oritxtcod")
|
|
99
|
-
.leftJoin("typtxt", "typtxt.typtxtcod", "texte.typtxtcod")
|
|
100
|
-
.where("texte.lecassidt", "=", lectureAssembleeId)
|
|
101
|
-
.select(({ eb, ref, val }) => [
|
|
102
|
-
"texte.texnum as numero",
|
|
103
|
-
"texte.orgcod as code_organisme",
|
|
104
|
-
eb
|
|
105
|
-
.case()
|
|
106
|
-
.when("texte.typurl", "=", "I")
|
|
107
|
-
.then(removeSubstring(ref("texte.texurl"), val(".html")))
|
|
108
|
-
.else(null)
|
|
109
|
-
.end()
|
|
110
|
-
.as("id"),
|
|
111
|
-
eb
|
|
112
|
-
.case()
|
|
113
|
-
.when("texte.typurl", "=", "I")
|
|
114
|
-
.then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
|
|
115
|
-
.else(rtrim(ref("texte.texurl")))
|
|
116
|
-
.end()
|
|
117
|
-
.as("url"),
|
|
118
|
-
rtrim(ref("oritxt.oritxtlib")).as("origine"),
|
|
119
|
-
"oritxt.oriordre as ordre_origine",
|
|
120
|
-
"oritxt.oritxtado as code_adoption",
|
|
121
|
-
"oritxt.oritxtmod as modification",
|
|
122
|
-
rtrim(ref("typtxt.typtxtlib")).as("type"),
|
|
123
|
-
toDateString(ref("texte.txtoritxtdat")).as("date"),
|
|
124
|
-
"sesann as session",
|
|
125
|
-
auteursTexte(ref("texte.texcod")).as("auteurs"),
|
|
126
|
-
])
|
|
127
|
-
.orderBy(({ ref }) => orderOrdreOrigineTexte(ref("oritxt.oriordre"))));
|
|
128
|
-
}
|
|
129
13
|
function lecturesAssemblee(lectureId) {
|
|
130
14
|
return jsonArrayFrom(dbSenat
|
|
131
15
|
.withSchema("dosleg")
|
package/lib/model/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export { findAllAmendements } from "./ameli";
|
|
2
2
|
export { findAll as findAllDebats } from "./debats";
|
|
3
3
|
export { findAllDossiers } from "./dosleg";
|
|
4
|
-
export {
|
|
4
|
+
export { findAllTextes, findAllRapports } from "./documents";
|
|
5
5
|
export { findAllScrutins } from "./scrutins";
|
|
6
6
|
export { findAll as findAllQuestions } from "./questions";
|
|
7
7
|
export { findAll as findAllSens, findAllCirconscriptions, findAllOrganismes } from "./sens";
|
package/lib/model/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export { findAllAmendements } from "./ameli";
|
|
2
2
|
export { findAll as findAllDebats } from "./debats";
|
|
3
3
|
export { findAllDossiers } from "./dosleg";
|
|
4
|
-
export {
|
|
4
|
+
export { findAllTextes, findAllRapports } from "./documents";
|
|
5
5
|
export { findAllScrutins } from "./scrutins";
|
|
6
6
|
export { findAll as findAllQuestions } from "./questions";
|
|
7
7
|
export { findAll as findAllSens, findAllCirconscriptions, findAllOrganismes } from "./sens";
|
|
@@ -5,8 +5,8 @@ import path from "path";
|
|
|
5
5
|
import pLimit from "p-limit";
|
|
6
6
|
import * as git from "../git";
|
|
7
7
|
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
|
|
8
|
-
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER
|
|
9
|
-
import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens,
|
|
8
|
+
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER } from "../loaders";
|
|
9
|
+
import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findAllTextes, findAllRapports, } from "../model";
|
|
10
10
|
import { processRapport, processTexte } from "./retrieve_documents";
|
|
11
11
|
import { buildActesLegislatifs } from "../model/dosleg";
|
|
12
12
|
import { UNDEFINED_SESSION } from "../types/sessions";
|
|
@@ -169,8 +169,8 @@ async function convertDatasetDosLeg(dataDir, options) {
|
|
|
169
169
|
const dossierFile = `${dossier["signet"]}.json`;
|
|
170
170
|
await fs.outputJSON(path.join(dossierReorganizedDir, dossierFile), dossierWithActes, { spaces: 2 });
|
|
171
171
|
}
|
|
172
|
-
await
|
|
173
|
-
await
|
|
172
|
+
await convertTextes(dataDir, options);
|
|
173
|
+
await convertRapports(dataDir, options);
|
|
174
174
|
}
|
|
175
175
|
async function convertDatasetScrutins(dataDir, options) {
|
|
176
176
|
const dataset = datasets.dosleg;
|
|
@@ -219,30 +219,38 @@ async function convertDatasetQuestions(dataDir, options) {
|
|
|
219
219
|
}
|
|
220
220
|
await Promise.all(tasks);
|
|
221
221
|
}
|
|
222
|
-
async function
|
|
222
|
+
async function convertTextes(dataDir, options) {
|
|
223
223
|
const originalTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
224
224
|
const transformedTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER);
|
|
225
225
|
if (!options["silent"]) {
|
|
226
226
|
console.log(`Converting database textes data into files…`);
|
|
227
227
|
}
|
|
228
|
-
for await (const texte of
|
|
229
|
-
const session = texte
|
|
228
|
+
for await (const texte of findAllTextes()) {
|
|
229
|
+
const session = texte["session"] ?? UNDEFINED_SESSION;
|
|
230
230
|
if (options["fromSession"] && session < options["fromSession"]) {
|
|
231
231
|
continue;
|
|
232
232
|
}
|
|
233
|
-
|
|
233
|
+
if (!texte["url"]) {
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
const texteName = path.parse(texte["url"]).name;
|
|
234
237
|
const texteDir = path.join(originalTextesDir, `${session}`, texteName);
|
|
238
|
+
// oritxtcod = 1 corresponds to "Texte de loi déposé au Sénat"
|
|
239
|
+
const hasExposeDesMotifs = texte["origine"] === 'Sénat' && texte["ordre_origine"] === '1';
|
|
235
240
|
const metadata = {
|
|
236
241
|
name: texteName,
|
|
237
|
-
session: texte
|
|
238
|
-
date: texte
|
|
239
|
-
url_expose_des_motifs:
|
|
242
|
+
session: texte["session"],
|
|
243
|
+
date: texte["date"],
|
|
244
|
+
url_expose_des_motifs: hasExposeDesMotifs
|
|
240
245
|
? new URL(`${texteName}-expose.html`, SENAT_EXPOSE_DES_MOTIFS_BASE_URL)
|
|
241
246
|
: undefined,
|
|
242
247
|
url_xml: new URL(`${texteName}.akn.xml`, SENAT_TEXTE_XML_BASE_URL),
|
|
243
248
|
url_html: new URL(`${texteName}.html`, SENAT_TEXTE_BASE_URL),
|
|
244
249
|
url_pdf: new URL(`${texteName}.pdf`, SENAT_TEXTE_BASE_URL),
|
|
245
250
|
};
|
|
251
|
+
fs.outputJSONSync(path.join(texteDir, `${texteName}.json`), texte, {
|
|
252
|
+
spaces: 2,
|
|
253
|
+
});
|
|
246
254
|
fs.outputJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, {
|
|
247
255
|
spaces: 2,
|
|
248
256
|
});
|
|
@@ -251,17 +259,20 @@ async function convertTexteUrls(dataDir, options) {
|
|
|
251
259
|
}
|
|
252
260
|
}
|
|
253
261
|
}
|
|
254
|
-
async function
|
|
262
|
+
async function convertRapports(dataDir, options) {
|
|
255
263
|
const originalRapportsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
256
264
|
if (!options["silent"]) {
|
|
257
265
|
console.log(`Converting database rapports data into files…`);
|
|
258
266
|
}
|
|
259
|
-
for await (const rapport of
|
|
260
|
-
const session = rapport
|
|
267
|
+
for await (const rapport of findAllRapports()) {
|
|
268
|
+
const session = rapport["session"] ?? UNDEFINED_SESSION;
|
|
261
269
|
if (options["fromSession"] && session < options["fromSession"]) {
|
|
262
270
|
continue;
|
|
263
271
|
}
|
|
264
|
-
|
|
272
|
+
if (!rapport["url"]) {
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
const parsedRapportUrl = path.parse(rapport["url"]);
|
|
265
276
|
const rapportName = parsedRapportUrl.name;
|
|
266
277
|
const rapportDir = path.join(originalRapportsDir, `${session}`, rapportName);
|
|
267
278
|
const rapportHtmlUrlBase = `${rapportName}_mono.html`;
|
|
@@ -276,11 +287,14 @@ async function convertRapportUrls(dataDir, options) {
|
|
|
276
287
|
});
|
|
277
288
|
const metadata = {
|
|
278
289
|
name: rapportName,
|
|
279
|
-
session: rapport
|
|
280
|
-
date: rapport
|
|
290
|
+
session: rapport["session"],
|
|
291
|
+
date: rapport["date"],
|
|
281
292
|
url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL),
|
|
282
293
|
url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL),
|
|
283
294
|
};
|
|
295
|
+
fs.outputJSONSync(path.join(rapportDir, `${rapportName}.json`), rapport, {
|
|
296
|
+
spaces: 2,
|
|
297
|
+
});
|
|
284
298
|
fs.outputJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, {
|
|
285
299
|
spaces: 2,
|
|
286
300
|
});
|
|
@@ -1,2 +1,3 @@
|
|
|
1
|
-
|
|
1
|
+
import { DocumentMetadata } from "../types/texte";
|
|
2
|
+
export declare function processTexte(texteMetadata: DocumentMetadata, originalTextesDir: string, transformedTextesDir: string, options: any): Promise<void>;
|
|
2
3
|
export declare function processRapport(rapportMetadata: any, originalRapportsDir: string, options: any): Promise<void>;
|
|
@@ -3,7 +3,7 @@ import commandLineArgs from "command-line-args";
|
|
|
3
3
|
import fs from "fs-extra";
|
|
4
4
|
import { DateTime } from "luxon";
|
|
5
5
|
import path from "path";
|
|
6
|
-
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER,
|
|
6
|
+
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatRapportUrls, iterLoadSenatTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, } from "../loaders";
|
|
7
7
|
import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../parsers/texte";
|
|
8
8
|
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
9
9
|
import { commonOptions } from "./shared/cli_helpers";
|
|
@@ -111,6 +111,8 @@ export async function processTexte(texteMetadata, originalTextesDir, transformed
|
|
|
111
111
|
for (const format of formats) {
|
|
112
112
|
if (!isOptionEmptyOrHasValue(options.formats, format.type))
|
|
113
113
|
continue;
|
|
114
|
+
if (!format.url)
|
|
115
|
+
continue;
|
|
114
116
|
const destPath = path.join(texteDir, `${texteMetadata.name}.${format.type}`);
|
|
115
117
|
const result = await processDocument(format.url.toString(), destPath, texteMetadata.date, options);
|
|
116
118
|
// Specific logic: Parsing (Only applies to XML)
|
|
@@ -147,7 +149,7 @@ async function retrieveTextes(dataDir, sessions) {
|
|
|
147
149
|
parseDocuments: options["parseDocuments"],
|
|
148
150
|
};
|
|
149
151
|
for (const session of sessions) {
|
|
150
|
-
for (const { item: texteMetadata } of
|
|
152
|
+
for (const { item: texteMetadata } of iterLoadSenatTexteUrls(dataDir, session)) {
|
|
151
153
|
await processTexte(texteMetadata, originalTextesDir, transformedTextesDir, dlOptions);
|
|
152
154
|
}
|
|
153
155
|
}
|
|
@@ -162,7 +164,7 @@ async function retrieveRapports(dataDir, sessions) {
|
|
|
162
164
|
formats: options["formats"],
|
|
163
165
|
};
|
|
164
166
|
for (const session of sessions) {
|
|
165
|
-
for (const { item: rapportMetadata } of
|
|
167
|
+
for (const { item: rapportMetadata } of iterLoadSenatRapportUrls(dataDir, session)) {
|
|
166
168
|
await processRapport(rapportMetadata, originalRapportsDir, dlOptions);
|
|
167
169
|
}
|
|
168
170
|
}
|
|
@@ -1,18 +1,12 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { iterLoadSenatRapports } from "../loaders";
|
|
2
2
|
import commandLineArgs from "command-line-args";
|
|
3
3
|
import { dataDirDefaultOption } from "./shared/cli_helpers";
|
|
4
4
|
const optionsDefinitions = [dataDirDefaultOption];
|
|
5
5
|
const options = commandLineArgs(optionsDefinitions);
|
|
6
6
|
const session = 2024;
|
|
7
7
|
const sinceCommit = undefined;
|
|
8
|
-
for (const { item:
|
|
9
|
-
log: true,
|
|
8
|
+
for (const { item: rapport } of iterLoadSenatRapports(options["dataDir"], session, {
|
|
10
9
|
sinceCommit: sinceCommit,
|
|
11
10
|
})) {
|
|
12
|
-
console.log(
|
|
13
|
-
}
|
|
14
|
-
for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(options["dataDir"], session, {
|
|
15
|
-
sinceCommit: sinceCommit,
|
|
16
|
-
})) {
|
|
17
|
-
console.log(dossierLegislatif["numero"]);
|
|
11
|
+
console.log(rapport);
|
|
18
12
|
}
|
package/lib/types/texte.d.ts
CHANGED
|
@@ -13,6 +13,15 @@ export declare enum DivisionType {
|
|
|
13
13
|
division = 12
|
|
14
14
|
}
|
|
15
15
|
export type DivisionTag = keyof typeof DivisionType;
|
|
16
|
+
export interface DocumentMetadata {
|
|
17
|
+
name: string;
|
|
18
|
+
session: number | null | undefined;
|
|
19
|
+
date?: string | null;
|
|
20
|
+
url_expose_des_motifs?: URL;
|
|
21
|
+
url_xml?: URL;
|
|
22
|
+
url_html: URL;
|
|
23
|
+
url_pdf: URL;
|
|
24
|
+
}
|
|
16
25
|
export interface FlatTexte {
|
|
17
26
|
titre: string | null;
|
|
18
27
|
titreCourt: string | null;
|