@tricoteuses/senat 2.20.30 → 2.20.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/loaders.d.ts +5 -5
- package/lib/loaders.js +8 -17
- package/lib/model/documents.js +78 -55
- package/lib/scripts/retrieve_documents.js +3 -3
- package/lib/scripts/test_iter_load.js +3 -5
- package/package.json +1 -1
package/lib/loaders.d.ts
CHANGED
|
@@ -41,11 +41,11 @@ export declare function iterLoadSenatComptesRendusCommissions(dataDir: string, s
|
|
|
41
41
|
session: number;
|
|
42
42
|
}>;
|
|
43
43
|
export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
|
|
44
|
-
export declare function
|
|
45
|
-
export declare function
|
|
46
|
-
export declare function
|
|
47
|
-
export declare function
|
|
48
|
-
export declare function
|
|
44
|
+
export declare function iterLoadSenatRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
|
|
45
|
+
export declare function iterLoadSenatTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
|
|
46
|
+
export declare function iterLoadSenatDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<DocumentResult>>;
|
|
47
|
+
export declare function iterLoadSenatRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
|
|
48
|
+
export declare function iterLoadSenatTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
|
|
49
49
|
export declare function loadSenatTexteContent(dataDir: string, textePathFromDataset: string): IterItem<FlatTexte | null>;
|
|
50
50
|
export declare function loadSenatCompteRenduContent(dataDir: string, session: number, debatId: string | number): {
|
|
51
51
|
item: CompteRendu | null;
|
package/lib/loaders.js
CHANGED
|
@@ -147,7 +147,7 @@ export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}
|
|
|
147
147
|
yield dossierLegislatifItem;
|
|
148
148
|
}
|
|
149
149
|
}
|
|
150
|
-
export function*
|
|
150
|
+
export function* iterLoadSenatRapportUrls(dataDir, session) {
|
|
151
151
|
let itemsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
152
152
|
if (session) {
|
|
153
153
|
itemsDir = path.join(itemsDir, session.toString());
|
|
@@ -163,7 +163,7 @@ export function* iterLoadSenatDossiersLegislatifsRapportUrls(dataDir, session) {
|
|
|
163
163
|
}
|
|
164
164
|
}
|
|
165
165
|
}
|
|
166
|
-
export function*
|
|
166
|
+
export function* iterLoadSenatTexteUrls(dataDir, session) {
|
|
167
167
|
let itemsDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
168
168
|
if (session) {
|
|
169
169
|
itemsDir = path.join(itemsDir, session.toString());
|
|
@@ -179,23 +179,14 @@ export function* iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session) {
|
|
|
179
179
|
}
|
|
180
180
|
}
|
|
181
181
|
}
|
|
182
|
-
export function*
|
|
182
|
+
export function* iterLoadSenatDocuments(dataDir, session, documentType, options = {}) {
|
|
183
183
|
for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(dataDir, session, options)) {
|
|
184
184
|
for (const lecture of dossierLegislatif["lectures"]) {
|
|
185
185
|
const lecturesSenat = lecture.lectures_assemblee.filter((lectureAssemblee) => lectureAssemblee.assemblee === "Sénat");
|
|
186
186
|
for (const lectureSenat of lecturesSenat) {
|
|
187
187
|
for (const document of lectureSenat[documentType]) {
|
|
188
|
-
const enrichedDocument = {
|
|
189
|
-
signet_dossier: dossierLegislatif["signet"],
|
|
190
|
-
url_dossier_senat: dossierLegislatif["url"],
|
|
191
|
-
url_dossier_assemblee_nationale: dossierLegislatif["url_dossier_assemblee_nationale"],
|
|
192
|
-
type_lecture: lecture.type_lecture,
|
|
193
|
-
libelle_lecture: lecture.libelle,
|
|
194
|
-
libelle_organisme: lectureSenat.libelle_organisme,
|
|
195
|
-
...document,
|
|
196
|
-
};
|
|
197
188
|
const documentItem = {
|
|
198
|
-
item:
|
|
189
|
+
item: document,
|
|
199
190
|
};
|
|
200
191
|
if (document.url) {
|
|
201
192
|
const documentName = path.parse(document.url).name;
|
|
@@ -207,13 +198,13 @@ export function* iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, doc
|
|
|
207
198
|
}
|
|
208
199
|
}
|
|
209
200
|
}
|
|
210
|
-
export function*
|
|
211
|
-
for (const iterItem of
|
|
201
|
+
export function* iterLoadSenatRapports(dataDir, session, options = {}) {
|
|
202
|
+
for (const iterItem of iterLoadSenatItems(dataDir, RAPPORT_FOLDER, session, "original", options)) {
|
|
212
203
|
yield iterItem;
|
|
213
204
|
}
|
|
214
205
|
}
|
|
215
|
-
export function*
|
|
216
|
-
for (const iterItem of
|
|
206
|
+
export function* iterLoadSenatTextes(dataDir, session, options = {}) {
|
|
207
|
+
for (const iterItem of iterLoadSenatDocuments(dataDir, session, "textes", options)) {
|
|
217
208
|
yield iterItem;
|
|
218
209
|
}
|
|
219
210
|
}
|
package/lib/model/documents.js
CHANGED
|
@@ -33,39 +33,50 @@ function documentsAttaches(rapportId) {
|
|
|
33
33
|
"typatt.typattlib as type_document"
|
|
34
34
|
]));
|
|
35
35
|
}
|
|
36
|
-
|
|
36
|
+
function selectRapportAttributes({ eb, ref, val }) {
|
|
37
|
+
return [
|
|
38
|
+
"rap.rapnum as numero",
|
|
39
|
+
"raporg.orgcod as code_organisme",
|
|
40
|
+
eb
|
|
41
|
+
.case()
|
|
42
|
+
.when("rap.rapurl", "is not", null)
|
|
43
|
+
.then(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
|
|
44
|
+
.else(null)
|
|
45
|
+
.end()
|
|
46
|
+
.as("id"),
|
|
47
|
+
eb
|
|
48
|
+
.case()
|
|
49
|
+
.when("rap.typurl", "=", "I")
|
|
50
|
+
.then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
|
|
51
|
+
.else(rtrim(ref("rap.rapurl")))
|
|
52
|
+
.end()
|
|
53
|
+
.as("url"),
|
|
54
|
+
rtrim(ref("denrap.libdenrap")).as("type"),
|
|
55
|
+
rtrim(rtrim(ref("rap.raptil"))).as("titre"),
|
|
56
|
+
rtrim(rtrim(ref("rap.rapsoustit"))).as("sous_titre"),
|
|
57
|
+
toDateString(ref("rap.date_depot")).as("date"),
|
|
58
|
+
"rap.sesann as session",
|
|
59
|
+
auteursRapport(ref("rap.rapcod")).as("auteurs"),
|
|
60
|
+
documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
|
|
61
|
+
];
|
|
62
|
+
}
|
|
63
|
+
const baseQueryRapports = dbSenat
|
|
37
64
|
.withSchema("dosleg")
|
|
38
65
|
.selectFrom("rap")
|
|
39
66
|
.leftJoin("raporg", "raporg.rapcod", "rap.rapcod")
|
|
40
67
|
.leftJoin("denrap", "denrap.coddenrap", "rap.coddenrap")
|
|
41
|
-
.
|
|
42
|
-
|
|
43
|
-
"
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
.end()
|
|
50
|
-
.as("id"),
|
|
51
|
-
eb
|
|
52
|
-
.case()
|
|
53
|
-
.when("rap.typurl", "=", "I")
|
|
54
|
-
.then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
|
|
55
|
-
.else(rtrim(ref("rap.rapurl")))
|
|
56
|
-
.end()
|
|
57
|
-
.as("url"),
|
|
58
|
-
rtrim(ref("denrap.libdenrap")).as("type"),
|
|
59
|
-
rtrim(ref("rap.raptil")).as("titre"),
|
|
60
|
-
rtrim(ref("rap.rapsoustit")).as("sous_titre"),
|
|
61
|
-
toDateString(ref("rap.date_depot")).as("date"),
|
|
62
|
-
"sesann as session",
|
|
63
|
-
auteursRapport(ref("rap.rapcod")).as("auteurs"),
|
|
64
|
-
documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
|
|
68
|
+
.leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod");
|
|
69
|
+
const queryRapports = baseQueryRapports
|
|
70
|
+
.leftJoin("lecass", "lecass.lecassidt", "lecassrap.lecassidt")
|
|
71
|
+
.leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
|
|
72
|
+
.leftJoin("loi", "loi.loicod", "lecture.loicod")
|
|
73
|
+
.select((args) => [
|
|
74
|
+
"loi.signet as signet_dossier",
|
|
75
|
+
...selectRapportAttributes(args),
|
|
65
76
|
]);
|
|
66
77
|
export function rapports(lectureAssembleeId) {
|
|
67
|
-
return jsonArrayFrom(
|
|
68
|
-
.
|
|
78
|
+
return jsonArrayFrom(baseQueryRapports
|
|
79
|
+
.select(selectRapportAttributes)
|
|
69
80
|
.where("lecassrap.lecassidt", "=", lectureAssembleeId));
|
|
70
81
|
}
|
|
71
82
|
function auteursTexte(texteId) {
|
|
@@ -85,40 +96,52 @@ function auteursTexte(texteId) {
|
|
|
85
96
|
])
|
|
86
97
|
.orderBy("ecr.ecrnumtri", "asc"));
|
|
87
98
|
}
|
|
88
|
-
|
|
99
|
+
function selectTexteAttributes({ eb, ref, val }) {
|
|
100
|
+
return [
|
|
101
|
+
"texte.texnum as numero",
|
|
102
|
+
"texte.orgcod as code_organisme",
|
|
103
|
+
eb
|
|
104
|
+
.case()
|
|
105
|
+
.when("texte.texurl", "is not", null)
|
|
106
|
+
.then(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
|
|
107
|
+
.else(null)
|
|
108
|
+
.end()
|
|
109
|
+
.as("id"),
|
|
110
|
+
eb
|
|
111
|
+
.case()
|
|
112
|
+
.when("texte.typurl", "=", "I")
|
|
113
|
+
.then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
|
|
114
|
+
.else(rtrim(ref("texte.texurl")))
|
|
115
|
+
.end()
|
|
116
|
+
.as("url"),
|
|
117
|
+
rtrim(ref("oritxt.oritxtlib")).as("origine"),
|
|
118
|
+
"oritxt.oriordre as ordre_origine",
|
|
119
|
+
"oritxt.oritxtado as code_adoption",
|
|
120
|
+
"oritxt.oritxtmod as modification",
|
|
121
|
+
rtrim(ref("typtxt.typtxtlib")).as("type"),
|
|
122
|
+
toDateString(ref("texte.txtoritxtdat")).as("date"),
|
|
123
|
+
"texte.sesann as session",
|
|
124
|
+
auteursTexte(ref("texte.texcod")).as("auteurs"),
|
|
125
|
+
];
|
|
126
|
+
}
|
|
127
|
+
const baseQueryTextes = dbSenat
|
|
89
128
|
.withSchema("dosleg")
|
|
90
129
|
.selectFrom("texte")
|
|
91
130
|
.leftJoin("oritxt", "oritxt.oritxtcod", "texte.oritxtcod")
|
|
92
131
|
.leftJoin("typtxt", "typtxt.typtxtcod", "texte.typtxtcod")
|
|
93
|
-
.select(({ eb, ref, val }) => [
|
|
94
|
-
"texte.texnum as numero",
|
|
95
|
-
"texte.orgcod as code_organisme",
|
|
96
|
-
eb
|
|
97
|
-
.case()
|
|
98
|
-
.when("texte.texurl", "is not", null)
|
|
99
|
-
.then(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
|
|
100
|
-
.else(null)
|
|
101
|
-
.end()
|
|
102
|
-
.as("id"),
|
|
103
|
-
eb
|
|
104
|
-
.case()
|
|
105
|
-
.when("texte.typurl", "=", "I")
|
|
106
|
-
.then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
|
|
107
|
-
.else(rtrim(ref("texte.texurl")))
|
|
108
|
-
.end()
|
|
109
|
-
.as("url"),
|
|
110
|
-
rtrim(ref("oritxt.oritxtlib")).as("origine"),
|
|
111
|
-
"oritxt.oriordre as ordre_origine",
|
|
112
|
-
"oritxt.oritxtado as code_adoption",
|
|
113
|
-
"oritxt.oritxtmod as modification",
|
|
114
|
-
rtrim(ref("typtxt.typtxtlib")).as("type"),
|
|
115
|
-
toDateString(ref("texte.txtoritxtdat")).as("date"),
|
|
116
|
-
"sesann as session",
|
|
117
|
-
auteursTexte(ref("texte.texcod")).as("auteurs"),
|
|
118
|
-
])
|
|
119
132
|
.orderBy(({ ref }) => orderOrdreOrigineTexte(ref("oritxt.oriordre")));
|
|
133
|
+
const queryTextes = baseQueryTextes
|
|
134
|
+
.leftJoin("lecass", "lecass.lecassidt", "texte.lecassidt")
|
|
135
|
+
.leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
|
|
136
|
+
.leftJoin("loi", "loi.loicod", "lecture.loicod")
|
|
137
|
+
.select((args) => [
|
|
138
|
+
"loi.signet as signet_dossier",
|
|
139
|
+
...selectTexteAttributes(args),
|
|
140
|
+
]);
|
|
120
141
|
export function textes(lectureAssembleeId) {
|
|
121
|
-
return jsonArrayFrom(
|
|
142
|
+
return jsonArrayFrom(baseQueryTextes
|
|
143
|
+
.select(selectTexteAttributes)
|
|
144
|
+
.where("texte.lecassidt", "=", lectureAssembleeId));
|
|
122
145
|
}
|
|
123
146
|
export function findAllTextes() {
|
|
124
147
|
return queryTextes.stream();
|
|
@@ -3,7 +3,7 @@ import commandLineArgs from "command-line-args";
|
|
|
3
3
|
import fs from "fs-extra";
|
|
4
4
|
import { DateTime } from "luxon";
|
|
5
5
|
import path from "path";
|
|
6
|
-
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER,
|
|
6
|
+
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatRapportUrls, iterLoadSenatTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, } from "../loaders";
|
|
7
7
|
import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../parsers/texte";
|
|
8
8
|
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
9
9
|
import { commonOptions } from "./shared/cli_helpers";
|
|
@@ -149,7 +149,7 @@ async function retrieveTextes(dataDir, sessions) {
|
|
|
149
149
|
parseDocuments: options["parseDocuments"],
|
|
150
150
|
};
|
|
151
151
|
for (const session of sessions) {
|
|
152
|
-
for (const { item: texteMetadata } of
|
|
152
|
+
for (const { item: texteMetadata } of iterLoadSenatTexteUrls(dataDir, session)) {
|
|
153
153
|
await processTexte(texteMetadata, originalTextesDir, transformedTextesDir, dlOptions);
|
|
154
154
|
}
|
|
155
155
|
}
|
|
@@ -164,7 +164,7 @@ async function retrieveRapports(dataDir, sessions) {
|
|
|
164
164
|
formats: options["formats"],
|
|
165
165
|
};
|
|
166
166
|
for (const session of sessions) {
|
|
167
|
-
for (const { item: rapportMetadata } of
|
|
167
|
+
for (const { item: rapportMetadata } of iterLoadSenatRapportUrls(dataDir, session)) {
|
|
168
168
|
await processRapport(rapportMetadata, originalRapportsDir, dlOptions);
|
|
169
169
|
}
|
|
170
170
|
}
|
|
@@ -1,14 +1,12 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { iterLoadSenatRapports } from "../loaders";
|
|
2
2
|
import commandLineArgs from "command-line-args";
|
|
3
3
|
import { dataDirDefaultOption } from "./shared/cli_helpers";
|
|
4
4
|
const optionsDefinitions = [dataDirDefaultOption];
|
|
5
5
|
const options = commandLineArgs(optionsDefinitions);
|
|
6
6
|
const session = 2024;
|
|
7
7
|
const sinceCommit = undefined;
|
|
8
|
-
for (const { item:
|
|
8
|
+
for (const { item: rapport } of iterLoadSenatRapports(options["dataDir"], session, {
|
|
9
9
|
sinceCommit: sinceCommit,
|
|
10
10
|
})) {
|
|
11
|
-
|
|
12
|
-
console.log(dossierLegislatif["id"]);
|
|
13
|
-
}
|
|
11
|
+
console.log(rapport);
|
|
14
12
|
}
|