@tricoteuses/senat 2.20.30 → 2.20.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/loaders.d.ts +5 -5
- package/lib/loaders.js +14 -18
- package/lib/model/documents.js +78 -55
- package/lib/scripts/retrieve_documents.js +3 -3
- package/lib/scripts/test_iter_load.js +3 -5
- package/package.json +1 -1
package/lib/loaders.d.ts
CHANGED
|
@@ -41,11 +41,11 @@ export declare function iterLoadSenatComptesRendusCommissions(dataDir: string, s
|
|
|
41
41
|
session: number;
|
|
42
42
|
}>;
|
|
43
43
|
export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
|
|
44
|
-
export declare function
|
|
45
|
-
export declare function
|
|
46
|
-
export declare function
|
|
47
|
-
export declare function
|
|
48
|
-
export declare function
|
|
44
|
+
export declare function iterLoadSenatRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
|
|
45
|
+
export declare function iterLoadSenatTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
|
|
46
|
+
export declare function iterLoadSenatDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<DocumentResult>>;
|
|
47
|
+
export declare function iterLoadSenatRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
|
|
48
|
+
export declare function iterLoadSenatTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
|
|
49
49
|
export declare function loadSenatTexteContent(dataDir: string, textePathFromDataset: string): IterItem<FlatTexte | null>;
|
|
50
50
|
export declare function loadSenatCompteRenduContent(dataDir: string, session: number, debatId: string | number): {
|
|
51
51
|
item: CompteRendu | null;
|
package/lib/loaders.js
CHANGED
|
@@ -50,6 +50,9 @@ function* iterLoadSenatItems(dataDir, dataName, legislatureOrSession, subDir, {
|
|
|
50
50
|
console.log(`Found ${changedFiles?.size || 0} changed files (AMR)`);
|
|
51
51
|
}
|
|
52
52
|
for (const filePath of iterFilePaths(itemsDir)) {
|
|
53
|
+
if (!filePath.endsWith(".json")) {
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
53
56
|
const relativePath = path.relative(path.join(dataDir, dataName), filePath);
|
|
54
57
|
const gitStatus = changedFiles?.get(relativePath);
|
|
55
58
|
// Filter by changed files if sinceCommit is specified
|
|
@@ -147,7 +150,7 @@ export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}
|
|
|
147
150
|
yield dossierLegislatifItem;
|
|
148
151
|
}
|
|
149
152
|
}
|
|
150
|
-
export function*
|
|
153
|
+
export function* iterLoadSenatRapportUrls(dataDir, session) {
|
|
151
154
|
let itemsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
152
155
|
if (session) {
|
|
153
156
|
itemsDir = path.join(itemsDir, session.toString());
|
|
@@ -163,7 +166,7 @@ export function* iterLoadSenatDossiersLegislatifsRapportUrls(dataDir, session) {
|
|
|
163
166
|
}
|
|
164
167
|
}
|
|
165
168
|
}
|
|
166
|
-
export function*
|
|
169
|
+
export function* iterLoadSenatTexteUrls(dataDir, session) {
|
|
167
170
|
let itemsDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
168
171
|
if (session) {
|
|
169
172
|
itemsDir = path.join(itemsDir, session.toString());
|
|
@@ -179,23 +182,14 @@ export function* iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session) {
|
|
|
179
182
|
}
|
|
180
183
|
}
|
|
181
184
|
}
|
|
182
|
-
export function*
|
|
185
|
+
export function* iterLoadSenatDocuments(dataDir, session, documentType, options = {}) {
|
|
183
186
|
for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(dataDir, session, options)) {
|
|
184
187
|
for (const lecture of dossierLegislatif["lectures"]) {
|
|
185
188
|
const lecturesSenat = lecture.lectures_assemblee.filter((lectureAssemblee) => lectureAssemblee.assemblee === "Sénat");
|
|
186
189
|
for (const lectureSenat of lecturesSenat) {
|
|
187
190
|
for (const document of lectureSenat[documentType]) {
|
|
188
|
-
const enrichedDocument = {
|
|
189
|
-
signet_dossier: dossierLegislatif["signet"],
|
|
190
|
-
url_dossier_senat: dossierLegislatif["url"],
|
|
191
|
-
url_dossier_assemblee_nationale: dossierLegislatif["url_dossier_assemblee_nationale"],
|
|
192
|
-
type_lecture: lecture.type_lecture,
|
|
193
|
-
libelle_lecture: lecture.libelle,
|
|
194
|
-
libelle_organisme: lectureSenat.libelle_organisme,
|
|
195
|
-
...document,
|
|
196
|
-
};
|
|
197
191
|
const documentItem = {
|
|
198
|
-
item:
|
|
192
|
+
item: document,
|
|
199
193
|
};
|
|
200
194
|
if (document.url) {
|
|
201
195
|
const documentName = path.parse(document.url).name;
|
|
@@ -207,13 +201,15 @@ export function* iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, doc
|
|
|
207
201
|
}
|
|
208
202
|
}
|
|
209
203
|
}
|
|
210
|
-
export function*
|
|
211
|
-
for (const iterItem of
|
|
212
|
-
|
|
204
|
+
export function* iterLoadSenatRapports(dataDir, session, options = {}) {
|
|
205
|
+
for (const iterItem of iterLoadSenatItems(dataDir, RAPPORT_FOLDER, session, "original", options)) {
|
|
206
|
+
if (iterItem.item?.["id"]) {
|
|
207
|
+
yield iterItem;
|
|
208
|
+
}
|
|
213
209
|
}
|
|
214
210
|
}
|
|
215
|
-
export function*
|
|
216
|
-
for (const iterItem of
|
|
211
|
+
export function* iterLoadSenatTextes(dataDir, session, options = {}) {
|
|
212
|
+
for (const iterItem of iterLoadSenatDocuments(dataDir, session, "textes", options)) {
|
|
217
213
|
yield iterItem;
|
|
218
214
|
}
|
|
219
215
|
}
|
package/lib/model/documents.js
CHANGED
|
@@ -33,39 +33,50 @@ function documentsAttaches(rapportId) {
|
|
|
33
33
|
"typatt.typattlib as type_document"
|
|
34
34
|
]));
|
|
35
35
|
}
|
|
36
|
-
|
|
36
|
+
function selectRapportAttributes({ eb, ref, val }) {
|
|
37
|
+
return [
|
|
38
|
+
"rap.rapnum as numero",
|
|
39
|
+
"raporg.orgcod as code_organisme",
|
|
40
|
+
eb
|
|
41
|
+
.case()
|
|
42
|
+
.when("rap.rapurl", "is not", null)
|
|
43
|
+
.then(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
|
|
44
|
+
.else(null)
|
|
45
|
+
.end()
|
|
46
|
+
.as("id"),
|
|
47
|
+
eb
|
|
48
|
+
.case()
|
|
49
|
+
.when("rap.typurl", "=", "I")
|
|
50
|
+
.then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
|
|
51
|
+
.else(rtrim(ref("rap.rapurl")))
|
|
52
|
+
.end()
|
|
53
|
+
.as("url"),
|
|
54
|
+
rtrim(ref("denrap.libdenrap")).as("type"),
|
|
55
|
+
rtrim(rtrim(ref("rap.raptil"))).as("titre"),
|
|
56
|
+
rtrim(rtrim(ref("rap.rapsoustit"))).as("sous_titre"),
|
|
57
|
+
toDateString(ref("rap.date_depot")).as("date"),
|
|
58
|
+
"rap.sesann as session",
|
|
59
|
+
auteursRapport(ref("rap.rapcod")).as("auteurs"),
|
|
60
|
+
documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
|
|
61
|
+
];
|
|
62
|
+
}
|
|
63
|
+
const baseQueryRapports = dbSenat
|
|
37
64
|
.withSchema("dosleg")
|
|
38
65
|
.selectFrom("rap")
|
|
39
66
|
.leftJoin("raporg", "raporg.rapcod", "rap.rapcod")
|
|
40
67
|
.leftJoin("denrap", "denrap.coddenrap", "rap.coddenrap")
|
|
41
|
-
.
|
|
42
|
-
|
|
43
|
-
"
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
.end()
|
|
50
|
-
.as("id"),
|
|
51
|
-
eb
|
|
52
|
-
.case()
|
|
53
|
-
.when("rap.typurl", "=", "I")
|
|
54
|
-
.then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
|
|
55
|
-
.else(rtrim(ref("rap.rapurl")))
|
|
56
|
-
.end()
|
|
57
|
-
.as("url"),
|
|
58
|
-
rtrim(ref("denrap.libdenrap")).as("type"),
|
|
59
|
-
rtrim(ref("rap.raptil")).as("titre"),
|
|
60
|
-
rtrim(ref("rap.rapsoustit")).as("sous_titre"),
|
|
61
|
-
toDateString(ref("rap.date_depot")).as("date"),
|
|
62
|
-
"sesann as session",
|
|
63
|
-
auteursRapport(ref("rap.rapcod")).as("auteurs"),
|
|
64
|
-
documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
|
|
68
|
+
.leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod");
|
|
69
|
+
const queryRapports = baseQueryRapports
|
|
70
|
+
.leftJoin("lecass", "lecass.lecassidt", "lecassrap.lecassidt")
|
|
71
|
+
.leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
|
|
72
|
+
.leftJoin("loi", "loi.loicod", "lecture.loicod")
|
|
73
|
+
.select((args) => [
|
|
74
|
+
"loi.signet as signet_dossier",
|
|
75
|
+
...selectRapportAttributes(args),
|
|
65
76
|
]);
|
|
66
77
|
export function rapports(lectureAssembleeId) {
|
|
67
|
-
return jsonArrayFrom(
|
|
68
|
-
.
|
|
78
|
+
return jsonArrayFrom(baseQueryRapports
|
|
79
|
+
.select(selectRapportAttributes)
|
|
69
80
|
.where("lecassrap.lecassidt", "=", lectureAssembleeId));
|
|
70
81
|
}
|
|
71
82
|
function auteursTexte(texteId) {
|
|
@@ -85,40 +96,52 @@ function auteursTexte(texteId) {
|
|
|
85
96
|
])
|
|
86
97
|
.orderBy("ecr.ecrnumtri", "asc"));
|
|
87
98
|
}
|
|
88
|
-
|
|
99
|
+
function selectTexteAttributes({ eb, ref, val }) {
|
|
100
|
+
return [
|
|
101
|
+
"texte.texnum as numero",
|
|
102
|
+
"texte.orgcod as code_organisme",
|
|
103
|
+
eb
|
|
104
|
+
.case()
|
|
105
|
+
.when("texte.texurl", "is not", null)
|
|
106
|
+
.then(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
|
|
107
|
+
.else(null)
|
|
108
|
+
.end()
|
|
109
|
+
.as("id"),
|
|
110
|
+
eb
|
|
111
|
+
.case()
|
|
112
|
+
.when("texte.typurl", "=", "I")
|
|
113
|
+
.then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
|
|
114
|
+
.else(rtrim(ref("texte.texurl")))
|
|
115
|
+
.end()
|
|
116
|
+
.as("url"),
|
|
117
|
+
rtrim(ref("oritxt.oritxtlib")).as("origine"),
|
|
118
|
+
"oritxt.oriordre as ordre_origine",
|
|
119
|
+
"oritxt.oritxtado as code_adoption",
|
|
120
|
+
"oritxt.oritxtmod as modification",
|
|
121
|
+
rtrim(ref("typtxt.typtxtlib")).as("type"),
|
|
122
|
+
toDateString(ref("texte.txtoritxtdat")).as("date"),
|
|
123
|
+
"texte.sesann as session",
|
|
124
|
+
auteursTexte(ref("texte.texcod")).as("auteurs"),
|
|
125
|
+
];
|
|
126
|
+
}
|
|
127
|
+
const baseQueryTextes = dbSenat
|
|
89
128
|
.withSchema("dosleg")
|
|
90
129
|
.selectFrom("texte")
|
|
91
130
|
.leftJoin("oritxt", "oritxt.oritxtcod", "texte.oritxtcod")
|
|
92
131
|
.leftJoin("typtxt", "typtxt.typtxtcod", "texte.typtxtcod")
|
|
93
|
-
.select(({ eb, ref, val }) => [
|
|
94
|
-
"texte.texnum as numero",
|
|
95
|
-
"texte.orgcod as code_organisme",
|
|
96
|
-
eb
|
|
97
|
-
.case()
|
|
98
|
-
.when("texte.texurl", "is not", null)
|
|
99
|
-
.then(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
|
|
100
|
-
.else(null)
|
|
101
|
-
.end()
|
|
102
|
-
.as("id"),
|
|
103
|
-
eb
|
|
104
|
-
.case()
|
|
105
|
-
.when("texte.typurl", "=", "I")
|
|
106
|
-
.then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
|
|
107
|
-
.else(rtrim(ref("texte.texurl")))
|
|
108
|
-
.end()
|
|
109
|
-
.as("url"),
|
|
110
|
-
rtrim(ref("oritxt.oritxtlib")).as("origine"),
|
|
111
|
-
"oritxt.oriordre as ordre_origine",
|
|
112
|
-
"oritxt.oritxtado as code_adoption",
|
|
113
|
-
"oritxt.oritxtmod as modification",
|
|
114
|
-
rtrim(ref("typtxt.typtxtlib")).as("type"),
|
|
115
|
-
toDateString(ref("texte.txtoritxtdat")).as("date"),
|
|
116
|
-
"sesann as session",
|
|
117
|
-
auteursTexte(ref("texte.texcod")).as("auteurs"),
|
|
118
|
-
])
|
|
119
132
|
.orderBy(({ ref }) => orderOrdreOrigineTexte(ref("oritxt.oriordre")));
|
|
133
|
+
const queryTextes = baseQueryTextes
|
|
134
|
+
.leftJoin("lecass", "lecass.lecassidt", "texte.lecassidt")
|
|
135
|
+
.leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
|
|
136
|
+
.leftJoin("loi", "loi.loicod", "lecture.loicod")
|
|
137
|
+
.select((args) => [
|
|
138
|
+
"loi.signet as signet_dossier",
|
|
139
|
+
...selectTexteAttributes(args),
|
|
140
|
+
]);
|
|
120
141
|
export function textes(lectureAssembleeId) {
|
|
121
|
-
return jsonArrayFrom(
|
|
142
|
+
return jsonArrayFrom(baseQueryTextes
|
|
143
|
+
.select(selectTexteAttributes)
|
|
144
|
+
.where("texte.lecassidt", "=", lectureAssembleeId));
|
|
122
145
|
}
|
|
123
146
|
export function findAllTextes() {
|
|
124
147
|
return queryTextes.stream();
|
|
@@ -3,7 +3,7 @@ import commandLineArgs from "command-line-args";
|
|
|
3
3
|
import fs from "fs-extra";
|
|
4
4
|
import { DateTime } from "luxon";
|
|
5
5
|
import path from "path";
|
|
6
|
-
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER,
|
|
6
|
+
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatRapportUrls, iterLoadSenatTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, } from "../loaders";
|
|
7
7
|
import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../parsers/texte";
|
|
8
8
|
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
9
9
|
import { commonOptions } from "./shared/cli_helpers";
|
|
@@ -149,7 +149,7 @@ async function retrieveTextes(dataDir, sessions) {
|
|
|
149
149
|
parseDocuments: options["parseDocuments"],
|
|
150
150
|
};
|
|
151
151
|
for (const session of sessions) {
|
|
152
|
-
for (const { item: texteMetadata } of
|
|
152
|
+
for (const { item: texteMetadata } of iterLoadSenatTexteUrls(dataDir, session)) {
|
|
153
153
|
await processTexte(texteMetadata, originalTextesDir, transformedTextesDir, dlOptions);
|
|
154
154
|
}
|
|
155
155
|
}
|
|
@@ -164,7 +164,7 @@ async function retrieveRapports(dataDir, sessions) {
|
|
|
164
164
|
formats: options["formats"],
|
|
165
165
|
};
|
|
166
166
|
for (const session of sessions) {
|
|
167
|
-
for (const { item: rapportMetadata } of
|
|
167
|
+
for (const { item: rapportMetadata } of iterLoadSenatRapportUrls(dataDir, session)) {
|
|
168
168
|
await processRapport(rapportMetadata, originalRapportsDir, dlOptions);
|
|
169
169
|
}
|
|
170
170
|
}
|
|
@@ -1,14 +1,12 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { iterLoadSenatRapports } from "../loaders";
|
|
2
2
|
import commandLineArgs from "command-line-args";
|
|
3
3
|
import { dataDirDefaultOption } from "./shared/cli_helpers";
|
|
4
4
|
const optionsDefinitions = [dataDirDefaultOption];
|
|
5
5
|
const options = commandLineArgs(optionsDefinitions);
|
|
6
6
|
const session = 2024;
|
|
7
7
|
const sinceCommit = undefined;
|
|
8
|
-
for (const { item:
|
|
8
|
+
for (const { item: rapport } of iterLoadSenatRapports(options["dataDir"], session, {
|
|
9
9
|
sinceCommit: sinceCommit,
|
|
10
10
|
})) {
|
|
11
|
-
|
|
12
|
-
console.log(dossierLegislatif["id"]);
|
|
13
|
-
}
|
|
11
|
+
console.log(rapport);
|
|
14
12
|
}
|