@tricoteuses/senat 2.20.28 → 2.20.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/loaders.d.ts +8 -46
- package/lib/model/documents.d.ts +12 -2
- package/lib/model/documents.js +121 -26
- package/lib/model/dosleg.js +2 -118
- package/lib/model/index.d.ts +1 -1
- package/lib/model/index.js +1 -1
- package/lib/scripts/convert_data.js +31 -17
- package/lib/scripts/retrieve_documents.d.ts +2 -1
- package/lib/scripts/retrieve_documents.js +2 -0
- package/lib/scripts/test_iter_load.js +5 -9
- package/lib/types/texte.d.ts +9 -0
- package/lib/utils/reunion_odj_building.js +29 -33
- package/package.json +1 -1
package/lib/loaders.d.ts
CHANGED
|
@@ -5,9 +5,11 @@ import { QuestionResult } from "./model/questions";
|
|
|
5
5
|
import { ScrutinResult } from "./model/scrutins";
|
|
6
6
|
import { CirconscriptionResult, OrganismeResult, SenateurResult } from "./model/sens";
|
|
7
7
|
import { Reunion } from "./types/agenda";
|
|
8
|
-
import { FlatTexte } from "./types/texte";
|
|
8
|
+
import { FlatTexte, DocumentMetadata } from "./types/texte";
|
|
9
9
|
import { CompteRendu } from "./types/compte_rendu";
|
|
10
|
+
import { DocumentResult } from "./model/documents";
|
|
10
11
|
export { EnabledDatasets } from "./datasets";
|
|
12
|
+
export type { DocumentResult } from "./model/documents";
|
|
11
13
|
export declare const AGENDA_FOLDER = "agenda";
|
|
12
14
|
export declare const COMPTES_RENDUS_FOLDER = "seances";
|
|
13
15
|
export declare const COMMISSION_FOLDER = "commissions";
|
|
@@ -27,46 +29,6 @@ export type IterItem<T> = {
|
|
|
27
29
|
legislature?: number;
|
|
28
30
|
gitStatus?: "A" | "M" | "D" | "R" | "C" | "T" | "U";
|
|
29
31
|
};
|
|
30
|
-
export interface TexteMetadata {
|
|
31
|
-
name: string;
|
|
32
|
-
session: number | null | undefined;
|
|
33
|
-
date?: string | null;
|
|
34
|
-
url_expose_des_motifs?: URL;
|
|
35
|
-
url_xml: URL;
|
|
36
|
-
url_html: URL;
|
|
37
|
-
url_pdf: URL;
|
|
38
|
-
}
|
|
39
|
-
export interface RapportMetadata {
|
|
40
|
-
name: string;
|
|
41
|
-
session: number | null | undefined;
|
|
42
|
-
date?: string | null;
|
|
43
|
-
url_html: URL;
|
|
44
|
-
url_pdf: URL;
|
|
45
|
-
}
|
|
46
|
-
export interface DossierLegislatifDocumentResult {
|
|
47
|
-
signet_dossier: string;
|
|
48
|
-
url_dossier_senat: string;
|
|
49
|
-
url_dossier_assemblee_nationale: string | null;
|
|
50
|
-
type_lecture: string;
|
|
51
|
-
libelle_lecture: string;
|
|
52
|
-
libelle_organisme: string | null;
|
|
53
|
-
code_organisme: string | null;
|
|
54
|
-
numero: number | null;
|
|
55
|
-
id: string | null;
|
|
56
|
-
url: string;
|
|
57
|
-
origine?: string | null | undefined;
|
|
58
|
-
type: string;
|
|
59
|
-
date: string;
|
|
60
|
-
session: number | null;
|
|
61
|
-
auteurs: {
|
|
62
|
-
prenom: string | null;
|
|
63
|
-
nom_usuel: string;
|
|
64
|
-
matricule: string | null;
|
|
65
|
-
}[];
|
|
66
|
-
organismes?: {
|
|
67
|
-
libelle: string;
|
|
68
|
-
}[] | undefined;
|
|
69
|
-
}
|
|
70
32
|
export declare function iterFilePaths(dirPath: string): Generator<string>;
|
|
71
33
|
export declare function iterLoadSenatAmendements(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AmendementResult>>;
|
|
72
34
|
export declare function iterLoadSenatDebats(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DebatResult>>;
|
|
@@ -79,11 +41,11 @@ export declare function iterLoadSenatComptesRendusCommissions(dataDir: string, s
|
|
|
79
41
|
session: number;
|
|
80
42
|
}>;
|
|
81
43
|
export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
|
|
82
|
-
export declare function iterLoadSenatDossiersLegislatifsRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<
|
|
83
|
-
export declare function iterLoadSenatDossiersLegislatifsTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<
|
|
84
|
-
export declare function iterLoadSenatDossiersLegislatifsDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<
|
|
85
|
-
export declare function iterLoadSenatDossiersLegislatifsRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<
|
|
86
|
-
export declare function iterLoadSenatDossiersLegislatifsTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<
|
|
44
|
+
export declare function iterLoadSenatDossiersLegislatifsRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
|
|
45
|
+
export declare function iterLoadSenatDossiersLegislatifsTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
|
|
46
|
+
export declare function iterLoadSenatDossiersLegislatifsDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<DocumentResult>>;
|
|
47
|
+
export declare function iterLoadSenatDossiersLegislatifsRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
|
|
48
|
+
export declare function iterLoadSenatDossiersLegislatifsTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
|
|
87
49
|
export declare function loadSenatTexteContent(dataDir: string, textePathFromDataset: string): IterItem<FlatTexte | null>;
|
|
88
50
|
export declare function loadSenatCompteRenduContent(dataDir: string, session: number, debatId: string | number): {
|
|
89
51
|
item: CompteRendu | null;
|
package/lib/model/documents.d.ts
CHANGED
|
@@ -1,2 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
export declare function
|
|
1
|
+
import { Expression, InferResult, SelectQueryBuilder } from "kysely";
|
|
2
|
+
export declare function rapports(lectureAssembleeId: Expression<string>): import("kysely").RawBuilder<{
|
|
3
|
+
[x: string]: any;
|
|
4
|
+
}[]>;
|
|
5
|
+
declare const queryTextes: SelectQueryBuilder<any, any, any>;
|
|
6
|
+
export declare function textes(lectureAssembleeId: Expression<string>): import("kysely").RawBuilder<{
|
|
7
|
+
[x: string]: any;
|
|
8
|
+
}[]>;
|
|
9
|
+
export declare function findAllTextes(): AsyncIterableIterator<DocumentResult>;
|
|
10
|
+
export declare function findAllRapports(): AsyncIterableIterator<DocumentResult>;
|
|
11
|
+
export type DocumentResult = InferResult<typeof queryTextes>[0];
|
|
12
|
+
export {};
|
package/lib/model/documents.js
CHANGED
|
@@ -1,33 +1,128 @@
|
|
|
1
|
+
import { sql } from "kysely";
|
|
1
2
|
import { dbSenat } from "../databases";
|
|
2
|
-
import { rtrim, toDateString } from "./util";
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
import { concat, rtrim, toDateString } from "./util";
|
|
4
|
+
import { jsonArrayFrom } from "kysely/helpers/postgres";
|
|
5
|
+
function orderOrdreOrigineTexte(expr) {
|
|
6
|
+
return sql `array_position(array['0','2','1'], ${expr})`;
|
|
7
|
+
}
|
|
8
|
+
function auteursRapport(rapportId) {
|
|
9
|
+
return jsonArrayFrom(dbSenat
|
|
5
10
|
.withSchema("dosleg")
|
|
6
|
-
.selectFrom("
|
|
7
|
-
.
|
|
8
|
-
.
|
|
9
|
-
|
|
10
|
-
.select(
|
|
11
|
-
"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
.selectFrom("dosleg.auteur")
|
|
12
|
+
.leftJoin("dosleg.ecr", "dosleg.ecr.autcod", "dosleg.auteur.autcod")
|
|
13
|
+
.leftJoin("dosleg.rolsig", "dosleg.rolsig.signataire", "dosleg.ecr.signataire")
|
|
14
|
+
.where("dosleg.ecr.rapcod", "=", rapportId)
|
|
15
|
+
.select([
|
|
16
|
+
"dosleg.auteur.prenom as prenom",
|
|
17
|
+
"dosleg.auteur.nomuse as nom_usuel",
|
|
18
|
+
"dosleg.auteur.autmat as matricule",
|
|
19
|
+
"dosleg.ecr.ecrnumtri as ordre",
|
|
20
|
+
"dosleg.rolsig.rolsiglib as role",
|
|
21
|
+
"dosleg.ecr.ecrqua as qualite",
|
|
15
22
|
])
|
|
16
|
-
|
|
17
|
-
|
|
23
|
+
.orderBy("dosleg.ecr.ecrnumtri", "asc"));
|
|
24
|
+
}
|
|
25
|
+
function documentsAttaches(rapportId) {
|
|
26
|
+
return jsonArrayFrom(dbSenat
|
|
27
|
+
.withSchema("dosleg")
|
|
28
|
+
.selectFrom("docatt")
|
|
29
|
+
.leftJoin("typatt", "docatt.typattcod", "typatt.typattcod")
|
|
30
|
+
.where("docatt.rapcod", "=", rapportId)
|
|
31
|
+
.select([
|
|
32
|
+
"docatt.docatturl as url",
|
|
33
|
+
"typatt.typattlib as type_document"
|
|
34
|
+
]));
|
|
35
|
+
}
|
|
36
|
+
const queryRapports = dbSenat
|
|
37
|
+
.withSchema("dosleg")
|
|
38
|
+
.selectFrom("rap")
|
|
39
|
+
.leftJoin("raporg", "raporg.rapcod", "rap.rapcod")
|
|
40
|
+
.leftJoin("denrap", "denrap.coddenrap", "rap.coddenrap")
|
|
41
|
+
.select(({ eb, ref, val }) => [
|
|
42
|
+
"rap.rapnum as numero",
|
|
43
|
+
"raporg.orgcod as code_organisme",
|
|
44
|
+
eb
|
|
45
|
+
.case()
|
|
46
|
+
.when("rap.rapurl", "is not", null)
|
|
47
|
+
.then(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
|
|
48
|
+
.else(null)
|
|
49
|
+
.end()
|
|
50
|
+
.as("id"),
|
|
51
|
+
eb
|
|
52
|
+
.case()
|
|
53
|
+
.when("rap.typurl", "=", "I")
|
|
54
|
+
.then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
|
|
55
|
+
.else(rtrim(ref("rap.rapurl")))
|
|
56
|
+
.end()
|
|
57
|
+
.as("url"),
|
|
58
|
+
rtrim(ref("denrap.libdenrap")).as("type"),
|
|
59
|
+
rtrim(ref("rap.raptil")).as("titre"),
|
|
60
|
+
rtrim(ref("rap.rapsoustit")).as("sous_titre"),
|
|
61
|
+
toDateString(ref("rap.date_depot")).as("date"),
|
|
62
|
+
"sesann as session",
|
|
63
|
+
auteursRapport(ref("rap.rapcod")).as("auteurs"),
|
|
64
|
+
documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
|
|
65
|
+
]);
|
|
66
|
+
export function rapports(lectureAssembleeId) {
|
|
67
|
+
return jsonArrayFrom(queryRapports
|
|
68
|
+
.leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod")
|
|
69
|
+
.where("lecassrap.lecassidt", "=", lectureAssembleeId));
|
|
18
70
|
}
|
|
19
|
-
|
|
20
|
-
return dbSenat
|
|
71
|
+
function auteursTexte(texteId) {
|
|
72
|
+
return jsonArrayFrom(dbSenat
|
|
21
73
|
.withSchema("dosleg")
|
|
22
|
-
.selectFrom("
|
|
23
|
-
.
|
|
24
|
-
.
|
|
25
|
-
|
|
26
|
-
.select(
|
|
27
|
-
"
|
|
28
|
-
|
|
29
|
-
|
|
74
|
+
.selectFrom("auteur")
|
|
75
|
+
.leftJoin("ecr", "ecr.autcod", "auteur.autcod")
|
|
76
|
+
.leftJoin("rolsig", "rolsig.signataire", "ecr.signataire")
|
|
77
|
+
.where("ecr.texcod", "=", texteId)
|
|
78
|
+
.select([
|
|
79
|
+
"auteur.prenom as prenom",
|
|
80
|
+
"auteur.nomuse as nom_usuel",
|
|
81
|
+
"auteur.autmat as matricule",
|
|
82
|
+
"ecr.ecrnumtri as ordre",
|
|
83
|
+
"rolsig.rolsiglib as role",
|
|
84
|
+
"ecr.ecrqua as qualite",
|
|
30
85
|
])
|
|
31
|
-
|
|
32
|
-
|
|
86
|
+
.orderBy("ecr.ecrnumtri", "asc"));
|
|
87
|
+
}
|
|
88
|
+
const queryTextes = dbSenat
|
|
89
|
+
.withSchema("dosleg")
|
|
90
|
+
.selectFrom("texte")
|
|
91
|
+
.leftJoin("oritxt", "oritxt.oritxtcod", "texte.oritxtcod")
|
|
92
|
+
.leftJoin("typtxt", "typtxt.typtxtcod", "texte.typtxtcod")
|
|
93
|
+
.select(({ eb, ref, val }) => [
|
|
94
|
+
"texte.texnum as numero",
|
|
95
|
+
"texte.orgcod as code_organisme",
|
|
96
|
+
eb
|
|
97
|
+
.case()
|
|
98
|
+
.when("texte.texurl", "is not", null)
|
|
99
|
+
.then(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
|
|
100
|
+
.else(null)
|
|
101
|
+
.end()
|
|
102
|
+
.as("id"),
|
|
103
|
+
eb
|
|
104
|
+
.case()
|
|
105
|
+
.when("texte.typurl", "=", "I")
|
|
106
|
+
.then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
|
|
107
|
+
.else(rtrim(ref("texte.texurl")))
|
|
108
|
+
.end()
|
|
109
|
+
.as("url"),
|
|
110
|
+
rtrim(ref("oritxt.oritxtlib")).as("origine"),
|
|
111
|
+
"oritxt.oriordre as ordre_origine",
|
|
112
|
+
"oritxt.oritxtado as code_adoption",
|
|
113
|
+
"oritxt.oritxtmod as modification",
|
|
114
|
+
rtrim(ref("typtxt.typtxtlib")).as("type"),
|
|
115
|
+
toDateString(ref("texte.txtoritxtdat")).as("date"),
|
|
116
|
+
"sesann as session",
|
|
117
|
+
auteursTexte(ref("texte.texcod")).as("auteurs"),
|
|
118
|
+
])
|
|
119
|
+
.orderBy(({ ref }) => orderOrdreOrigineTexte(ref("oritxt.oriordre")));
|
|
120
|
+
export function textes(lectureAssembleeId) {
|
|
121
|
+
return jsonArrayFrom(queryTextes.where("texte.lecassidt", "=", lectureAssembleeId));
|
|
122
|
+
}
|
|
123
|
+
export function findAllTextes() {
|
|
124
|
+
return queryTextes.stream();
|
|
125
|
+
}
|
|
126
|
+
export function findAllRapports() {
|
|
127
|
+
return queryRapports.stream();
|
|
33
128
|
}
|
package/lib/model/dosleg.js
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
import { sql } from "kysely";
|
|
2
2
|
import { jsonArrayFrom } from "kysely/helpers/postgres";
|
|
3
3
|
import { dbSenat } from "../databases";
|
|
4
|
-
import { concat,
|
|
5
|
-
|
|
6
|
-
return sql `array_position(array['0','2','1'], ${expr})`;
|
|
7
|
-
}
|
|
4
|
+
import { concat, rtrim, toDateString } from "./util";
|
|
5
|
+
import { textes, rapports } from "./documents";
|
|
8
6
|
function datesSeances(lectureAssembleeId) {
|
|
9
7
|
return jsonArrayFrom(dbSenat
|
|
10
8
|
.withSchema("dosleg")
|
|
@@ -12,120 +10,6 @@ function datesSeances(lectureAssembleeId) {
|
|
|
12
10
|
.where("dosleg.date_seance.lecidt", "=", lectureAssembleeId)
|
|
13
11
|
.select(({ ref }) => [toDateString(ref("dosleg.date_seance.date_s")).as("date")]));
|
|
14
12
|
}
|
|
15
|
-
function auteursRapport(rapportId) {
|
|
16
|
-
return jsonArrayFrom(dbSenat
|
|
17
|
-
.withSchema("dosleg")
|
|
18
|
-
.selectFrom("dosleg.auteur")
|
|
19
|
-
.leftJoin("dosleg.ecr", "dosleg.ecr.autcod", "dosleg.auteur.autcod")
|
|
20
|
-
.leftJoin("dosleg.rolsig", "dosleg.rolsig.signataire", "dosleg.ecr.signataire")
|
|
21
|
-
.where("dosleg.ecr.rapcod", "=", rapportId)
|
|
22
|
-
.select([
|
|
23
|
-
"dosleg.auteur.prenom as prenom",
|
|
24
|
-
"dosleg.auteur.nomuse as nom_usuel",
|
|
25
|
-
"dosleg.auteur.autmat as matricule",
|
|
26
|
-
"dosleg.ecr.ecrnumtri as ordre",
|
|
27
|
-
"dosleg.rolsig.rolsiglib as role",
|
|
28
|
-
"dosleg.ecr.ecrqua as qualite",
|
|
29
|
-
])
|
|
30
|
-
.orderBy("dosleg.ecr.ecrnumtri", "asc"));
|
|
31
|
-
}
|
|
32
|
-
function documentsAttaches(rapportId) {
|
|
33
|
-
return jsonArrayFrom(dbSenat
|
|
34
|
-
.withSchema("dosleg")
|
|
35
|
-
.selectFrom("docatt")
|
|
36
|
-
.leftJoin("typatt", "docatt.typattcod", "typatt.typattcod")
|
|
37
|
-
.where("docatt.rapcod", "=", rapportId)
|
|
38
|
-
.select([
|
|
39
|
-
"docatt.docatturl as url",
|
|
40
|
-
"typatt.typattlib as type_document"
|
|
41
|
-
]));
|
|
42
|
-
}
|
|
43
|
-
function rapports(lectureAssembleeId) {
|
|
44
|
-
return jsonArrayFrom(dbSenat
|
|
45
|
-
.withSchema("dosleg")
|
|
46
|
-
.selectFrom("rap")
|
|
47
|
-
.leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod")
|
|
48
|
-
.leftJoin("raporg", "raporg.rapcod", "rap.rapcod")
|
|
49
|
-
.leftJoin("denrap", "denrap.coddenrap", "rap.coddenrap")
|
|
50
|
-
.where("lecassrap.lecassidt", "=", lectureAssembleeId)
|
|
51
|
-
.select(({ eb, ref, val }) => [
|
|
52
|
-
"rap.rapnum as numero",
|
|
53
|
-
"raporg.orgcod as code_organisme",
|
|
54
|
-
eb
|
|
55
|
-
.case()
|
|
56
|
-
.when("rap.typurl", "=", "I")
|
|
57
|
-
.then(removeSubstring(ref("rap.rapurl"), val(".html")))
|
|
58
|
-
.else(null)
|
|
59
|
-
.end()
|
|
60
|
-
.as("id"),
|
|
61
|
-
eb
|
|
62
|
-
.case()
|
|
63
|
-
.when("rap.typurl", "=", "I")
|
|
64
|
-
.then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
|
|
65
|
-
.else(rtrim(ref("rap.rapurl")))
|
|
66
|
-
.end()
|
|
67
|
-
.as("url"),
|
|
68
|
-
rtrim(ref("denrap.libdenrap")).as("type"),
|
|
69
|
-
rtrim(ref("rap.raptil")).as("titre"),
|
|
70
|
-
rtrim(ref("rap.rapsoustit")).as("sous_titre"),
|
|
71
|
-
toDateString(ref("rap.date_depot")).as("date"),
|
|
72
|
-
"sesann as session",
|
|
73
|
-
auteursRapport(ref("rap.rapcod")).as("auteurs"),
|
|
74
|
-
documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
|
|
75
|
-
]));
|
|
76
|
-
}
|
|
77
|
-
function auteursTexte(texteId) {
|
|
78
|
-
return jsonArrayFrom(dbSenat
|
|
79
|
-
.withSchema("dosleg")
|
|
80
|
-
.selectFrom("auteur")
|
|
81
|
-
.leftJoin("ecr", "ecr.autcod", "auteur.autcod")
|
|
82
|
-
.leftJoin("rolsig", "rolsig.signataire", "ecr.signataire")
|
|
83
|
-
.where("ecr.texcod", "=", texteId)
|
|
84
|
-
.select([
|
|
85
|
-
"auteur.prenom as prenom",
|
|
86
|
-
"auteur.nomuse as nom_usuel",
|
|
87
|
-
"auteur.autmat as matricule",
|
|
88
|
-
"ecr.ecrnumtri as ordre",
|
|
89
|
-
"rolsig.rolsiglib as role",
|
|
90
|
-
"ecr.ecrqua as qualite",
|
|
91
|
-
])
|
|
92
|
-
.orderBy("ecr.ecrnumtri", "asc"));
|
|
93
|
-
}
|
|
94
|
-
function textes(lectureAssembleeId) {
|
|
95
|
-
return jsonArrayFrom(dbSenat
|
|
96
|
-
.withSchema("dosleg")
|
|
97
|
-
.selectFrom("texte")
|
|
98
|
-
.leftJoin("oritxt", "oritxt.oritxtcod", "texte.oritxtcod")
|
|
99
|
-
.leftJoin("typtxt", "typtxt.typtxtcod", "texte.typtxtcod")
|
|
100
|
-
.where("texte.lecassidt", "=", lectureAssembleeId)
|
|
101
|
-
.select(({ eb, ref, val }) => [
|
|
102
|
-
"texte.texnum as numero",
|
|
103
|
-
"texte.orgcod as code_organisme",
|
|
104
|
-
eb
|
|
105
|
-
.case()
|
|
106
|
-
.when("texte.typurl", "=", "I")
|
|
107
|
-
.then(removeSubstring(ref("texte.texurl"), val(".html")))
|
|
108
|
-
.else(null)
|
|
109
|
-
.end()
|
|
110
|
-
.as("id"),
|
|
111
|
-
eb
|
|
112
|
-
.case()
|
|
113
|
-
.when("texte.typurl", "=", "I")
|
|
114
|
-
.then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
|
|
115
|
-
.else(rtrim(ref("texte.texurl")))
|
|
116
|
-
.end()
|
|
117
|
-
.as("url"),
|
|
118
|
-
rtrim(ref("oritxt.oritxtlib")).as("origine"),
|
|
119
|
-
"oritxt.oriordre as ordre_origine",
|
|
120
|
-
"oritxt.oritxtado as code_adoption",
|
|
121
|
-
"oritxt.oritxtmod as modification",
|
|
122
|
-
rtrim(ref("typtxt.typtxtlib")).as("type"),
|
|
123
|
-
toDateString(ref("texte.txtoritxtdat")).as("date"),
|
|
124
|
-
"sesann as session",
|
|
125
|
-
auteursTexte(ref("texte.texcod")).as("auteurs"),
|
|
126
|
-
])
|
|
127
|
-
.orderBy(({ ref }) => orderOrdreOrigineTexte(ref("oritxt.oriordre"))));
|
|
128
|
-
}
|
|
129
13
|
function lecturesAssemblee(lectureId) {
|
|
130
14
|
return jsonArrayFrom(dbSenat
|
|
131
15
|
.withSchema("dosleg")
|
package/lib/model/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export { findAllAmendements } from "./ameli";
|
|
2
2
|
export { findAll as findAllDebats } from "./debats";
|
|
3
3
|
export { findAllDossiers } from "./dosleg";
|
|
4
|
-
export {
|
|
4
|
+
export { findAllTextes, findAllRapports } from "./documents";
|
|
5
5
|
export { findAllScrutins } from "./scrutins";
|
|
6
6
|
export { findAll as findAllQuestions } from "./questions";
|
|
7
7
|
export { findAll as findAllSens, findAllCirconscriptions, findAllOrganismes } from "./sens";
|
package/lib/model/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export { findAllAmendements } from "./ameli";
|
|
2
2
|
export { findAll as findAllDebats } from "./debats";
|
|
3
3
|
export { findAllDossiers } from "./dosleg";
|
|
4
|
-
export {
|
|
4
|
+
export { findAllTextes, findAllRapports } from "./documents";
|
|
5
5
|
export { findAllScrutins } from "./scrutins";
|
|
6
6
|
export { findAll as findAllQuestions } from "./questions";
|
|
7
7
|
export { findAll as findAllSens, findAllCirconscriptions, findAllOrganismes } from "./sens";
|
|
@@ -5,8 +5,8 @@ import path from "path";
|
|
|
5
5
|
import pLimit from "p-limit";
|
|
6
6
|
import * as git from "../git";
|
|
7
7
|
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
|
|
8
|
-
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER
|
|
9
|
-
import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens,
|
|
8
|
+
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER } from "../loaders";
|
|
9
|
+
import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findAllTextes, findAllRapports, } from "../model";
|
|
10
10
|
import { processRapport, processTexte } from "./retrieve_documents";
|
|
11
11
|
import { buildActesLegislatifs } from "../model/dosleg";
|
|
12
12
|
import { UNDEFINED_SESSION } from "../types/sessions";
|
|
@@ -169,8 +169,8 @@ async function convertDatasetDosLeg(dataDir, options) {
|
|
|
169
169
|
const dossierFile = `${dossier["signet"]}.json`;
|
|
170
170
|
await fs.outputJSON(path.join(dossierReorganizedDir, dossierFile), dossierWithActes, { spaces: 2 });
|
|
171
171
|
}
|
|
172
|
-
await
|
|
173
|
-
await
|
|
172
|
+
await convertTextes(dataDir, options);
|
|
173
|
+
await convertRapports(dataDir, options);
|
|
174
174
|
}
|
|
175
175
|
async function convertDatasetScrutins(dataDir, options) {
|
|
176
176
|
const dataset = datasets.dosleg;
|
|
@@ -219,30 +219,38 @@ async function convertDatasetQuestions(dataDir, options) {
|
|
|
219
219
|
}
|
|
220
220
|
await Promise.all(tasks);
|
|
221
221
|
}
|
|
222
|
-
async function
|
|
222
|
+
async function convertTextes(dataDir, options) {
|
|
223
223
|
const originalTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
224
224
|
const transformedTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER);
|
|
225
225
|
if (!options["silent"]) {
|
|
226
226
|
console.log(`Converting database textes data into files…`);
|
|
227
227
|
}
|
|
228
|
-
for await (const texte of
|
|
229
|
-
const session = texte
|
|
228
|
+
for await (const texte of findAllTextes()) {
|
|
229
|
+
const session = texte["session"] ?? UNDEFINED_SESSION;
|
|
230
230
|
if (options["fromSession"] && session < options["fromSession"]) {
|
|
231
231
|
continue;
|
|
232
232
|
}
|
|
233
|
-
|
|
233
|
+
if (!texte["url"]) {
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
const texteName = path.parse(texte["url"]).name;
|
|
234
237
|
const texteDir = path.join(originalTextesDir, `${session}`, texteName);
|
|
238
|
+
// oritxtcod = 1 corresponds to "Texte de loi déposé au Sénat"
|
|
239
|
+
const hasExposeDesMotifs = texte["origine"] === 'Sénat' && texte["ordre_origine"] === '1';
|
|
235
240
|
const metadata = {
|
|
236
241
|
name: texteName,
|
|
237
|
-
session: texte
|
|
238
|
-
date: texte
|
|
239
|
-
url_expose_des_motifs:
|
|
242
|
+
session: texte["session"],
|
|
243
|
+
date: texte["date"],
|
|
244
|
+
url_expose_des_motifs: hasExposeDesMotifs
|
|
240
245
|
? new URL(`${texteName}-expose.html`, SENAT_EXPOSE_DES_MOTIFS_BASE_URL)
|
|
241
246
|
: undefined,
|
|
242
247
|
url_xml: new URL(`${texteName}.akn.xml`, SENAT_TEXTE_XML_BASE_URL),
|
|
243
248
|
url_html: new URL(`${texteName}.html`, SENAT_TEXTE_BASE_URL),
|
|
244
249
|
url_pdf: new URL(`${texteName}.pdf`, SENAT_TEXTE_BASE_URL),
|
|
245
250
|
};
|
|
251
|
+
fs.outputJSONSync(path.join(texteDir, `${texteName}.json`), texte, {
|
|
252
|
+
spaces: 2,
|
|
253
|
+
});
|
|
246
254
|
fs.outputJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, {
|
|
247
255
|
spaces: 2,
|
|
248
256
|
});
|
|
@@ -251,17 +259,20 @@ async function convertTexteUrls(dataDir, options) {
|
|
|
251
259
|
}
|
|
252
260
|
}
|
|
253
261
|
}
|
|
254
|
-
async function
|
|
262
|
+
async function convertRapports(dataDir, options) {
|
|
255
263
|
const originalRapportsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
|
|
256
264
|
if (!options["silent"]) {
|
|
257
265
|
console.log(`Converting database rapports data into files…`);
|
|
258
266
|
}
|
|
259
|
-
for await (const rapport of
|
|
260
|
-
const session = rapport
|
|
267
|
+
for await (const rapport of findAllRapports()) {
|
|
268
|
+
const session = rapport["session"] ?? UNDEFINED_SESSION;
|
|
261
269
|
if (options["fromSession"] && session < options["fromSession"]) {
|
|
262
270
|
continue;
|
|
263
271
|
}
|
|
264
|
-
|
|
272
|
+
if (!rapport["url"]) {
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
const parsedRapportUrl = path.parse(rapport["url"]);
|
|
265
276
|
const rapportName = parsedRapportUrl.name;
|
|
266
277
|
const rapportDir = path.join(originalRapportsDir, `${session}`, rapportName);
|
|
267
278
|
const rapportHtmlUrlBase = `${rapportName}_mono.html`;
|
|
@@ -276,11 +287,14 @@ async function convertRapportUrls(dataDir, options) {
|
|
|
276
287
|
});
|
|
277
288
|
const metadata = {
|
|
278
289
|
name: rapportName,
|
|
279
|
-
session: rapport
|
|
280
|
-
date: rapport
|
|
290
|
+
session: rapport["session"],
|
|
291
|
+
date: rapport["date"],
|
|
281
292
|
url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL),
|
|
282
293
|
url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL),
|
|
283
294
|
};
|
|
295
|
+
fs.outputJSONSync(path.join(rapportDir, `${rapportName}.json`), rapport, {
|
|
296
|
+
spaces: 2,
|
|
297
|
+
});
|
|
284
298
|
fs.outputJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, {
|
|
285
299
|
spaces: 2,
|
|
286
300
|
});
|
|
@@ -1,2 +1,3 @@
|
|
|
1
|
-
|
|
1
|
+
import { DocumentMetadata } from "../types/texte";
|
|
2
|
+
export declare function processTexte(texteMetadata: DocumentMetadata, originalTextesDir: string, transformedTextesDir: string, options: any): Promise<void>;
|
|
2
3
|
export declare function processRapport(rapportMetadata: any, originalRapportsDir: string, options: any): Promise<void>;
|
|
@@ -111,6 +111,8 @@ export async function processTexte(texteMetadata, originalTextesDir, transformed
|
|
|
111
111
|
for (const format of formats) {
|
|
112
112
|
if (!isOptionEmptyOrHasValue(options.formats, format.type))
|
|
113
113
|
continue;
|
|
114
|
+
if (!format.url)
|
|
115
|
+
continue;
|
|
114
116
|
const destPath = path.join(texteDir, `${texteMetadata.name}.${format.type}`);
|
|
115
117
|
const result = await processDocument(format.url.toString(), destPath, texteMetadata.date, options);
|
|
116
118
|
// Specific logic: Parsing (Only applies to XML)
|
|
@@ -1,18 +1,14 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { iterLoadSenatDossiersLegislatifsRapports } from "../loaders";
|
|
2
2
|
import commandLineArgs from "command-line-args";
|
|
3
3
|
import { dataDirDefaultOption } from "./shared/cli_helpers";
|
|
4
4
|
const optionsDefinitions = [dataDirDefaultOption];
|
|
5
5
|
const options = commandLineArgs(optionsDefinitions);
|
|
6
6
|
const session = 2024;
|
|
7
7
|
const sinceCommit = undefined;
|
|
8
|
-
for (const { item:
|
|
9
|
-
log: true,
|
|
8
|
+
for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifsRapports(options["dataDir"], session, {
|
|
10
9
|
sinceCommit: sinceCommit,
|
|
11
10
|
})) {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
sinceCommit: sinceCommit,
|
|
16
|
-
})) {
|
|
17
|
-
console.log(dossierLegislatif["numero"]);
|
|
11
|
+
if (!dossierLegislatif["id"]?.includes("r24")) {
|
|
12
|
+
console.log(dossierLegislatif["id"]);
|
|
13
|
+
}
|
|
18
14
|
}
|
package/lib/types/texte.d.ts
CHANGED
|
@@ -13,6 +13,15 @@ export declare enum DivisionType {
|
|
|
13
13
|
division = 12
|
|
14
14
|
}
|
|
15
15
|
export type DivisionTag = keyof typeof DivisionType;
|
|
16
|
+
export interface DocumentMetadata {
|
|
17
|
+
name: string;
|
|
18
|
+
session: number | null | undefined;
|
|
19
|
+
date?: string | null;
|
|
20
|
+
url_expose_des_motifs?: URL;
|
|
21
|
+
url_xml?: URL;
|
|
22
|
+
url_html: URL;
|
|
23
|
+
url_pdf: URL;
|
|
24
|
+
}
|
|
16
25
|
export interface FlatTexte {
|
|
17
26
|
titre: string | null;
|
|
18
27
|
titreCourt: string | null;
|
|
@@ -73,7 +73,7 @@ function detectLecture(objet) {
|
|
|
73
73
|
return undefined;
|
|
74
74
|
}
|
|
75
75
|
function computeCodeEtape(ev, dossier) {
|
|
76
|
-
// In order to match with
|
|
76
|
+
// In order to match with stage, we need to remove the '-SEANCE' suffix from the codeActe
|
|
77
77
|
const cleanCode = (code) => code.replace(/-SEANCE$/, "");
|
|
78
78
|
const lecture = detectLecture(ev.objet ?? "");
|
|
79
79
|
const organe = ev.organe ?? "";
|
|
@@ -84,7 +84,7 @@ function computeCodeEtape(ev, dossier) {
|
|
|
84
84
|
: "";
|
|
85
85
|
const evDate = ev.date.split("T")[0];
|
|
86
86
|
const flat = buildFlatActes(dossier);
|
|
87
|
-
// 1) Strict matching
|
|
87
|
+
// 1) Strict matching: same date + same nature
|
|
88
88
|
let candidates = flat.filter((a) => {
|
|
89
89
|
if (a.date !== evDate)
|
|
90
90
|
return false;
|
|
@@ -92,67 +92,63 @@ function computeCodeEtape(ev, dossier) {
|
|
|
92
92
|
return false;
|
|
93
93
|
return true;
|
|
94
94
|
});
|
|
95
|
-
// If lecture is
|
|
95
|
+
// If a specific lecture is detected in the agenda event, refine the candidates
|
|
96
96
|
if (lecture !== undefined && candidates.length > 0) {
|
|
97
97
|
const withLecture = candidates.filter((c) => c.ordreLecture === lecture);
|
|
98
98
|
if (withLecture.length > 0) {
|
|
99
99
|
candidates = withLecture;
|
|
100
100
|
}
|
|
101
101
|
}
|
|
102
|
-
// Multiple candidates : we take the longest ?
|
|
103
102
|
if (candidates.length > 0) {
|
|
103
|
+
// Multiple candidates: pick the most specific one (longest code string)
|
|
104
104
|
candidates.sort((a, b) => b.codeActe.length - a.codeActe.length);
|
|
105
105
|
return cleanCode(candidates[0].codeActe);
|
|
106
106
|
}
|
|
107
|
-
// 2)
|
|
107
|
+
// 2) Fallback COM: If no exact date match for a commission event,
|
|
108
|
+
// take the latest commission act for this lecture on or before the event date.
|
|
108
109
|
if (nature === "COM") {
|
|
109
110
|
let comActs = flat.filter((a) => a.codeActe.includes("COM") && a.date <= evDate);
|
|
110
111
|
if (lecture !== undefined) {
|
|
111
112
|
const byLecture = comActs.filter((a) => a.ordreLecture === lecture);
|
|
112
|
-
if (byLecture.length > 0)
|
|
113
|
+
if (byLecture.length > 0)
|
|
113
114
|
comActs = byLecture;
|
|
114
|
-
}
|
|
115
115
|
}
|
|
116
116
|
if (comActs.length > 0) {
|
|
117
117
|
comActs.sort((a, b) => b.date.localeCompare(a.date) || b.codeActe.length - a.codeActe.length);
|
|
118
118
|
return cleanCode(comActs[0].codeActe);
|
|
119
119
|
}
|
|
120
120
|
}
|
|
121
|
-
// 3) Fallback lecture
|
|
121
|
+
// 3) Fallback general lecture: if nothing else worked but a lecture is identified,
|
|
122
|
+
// find any act belonging to that lecture (e.g., SN1-DEPOT).
|
|
122
123
|
if (lecture !== undefined) {
|
|
123
|
-
const
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
if (rootCode && typeof rootCode === "string") {
|
|
127
|
-
return cleanCode(rootCode);
|
|
124
|
+
const genericActe = flat.find((a) => a.ordreLecture === lecture);
|
|
125
|
+
if (genericActe) {
|
|
126
|
+
return cleanCode(genericActe.codeActe);
|
|
128
127
|
}
|
|
129
128
|
}
|
|
130
|
-
console.log(
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
structurePremierActe: dossier["actes_legislatifs"]?.[0]
|
|
134
|
-
? JSON.stringify(dossier["actes_legislatifs"][0]).substring(0, 200)
|
|
135
|
-
: "AUCUN_ACTE",
|
|
129
|
+
console.log(`✖ No stage code found for ev=${ev.id} (Date: ${evDate}, Nature: ${nature}, Lecture: ${lecture})`, {
|
|
130
|
+
totalActsInDossier: dossier["actes_legislatifs"]?.length || 0,
|
|
131
|
+
firstActDate: flat[0]?.date,
|
|
136
132
|
});
|
|
137
133
|
return null;
|
|
138
134
|
}
|
|
139
135
|
function buildFlatActes(dossier) {
|
|
140
|
-
const
|
|
136
|
+
const actes = dossier["actes_legislatifs"] ?? [];
|
|
141
137
|
const res = [];
|
|
142
|
-
for (const
|
|
143
|
-
|
|
144
|
-
if (!assemblee.toLowerCase().includes("sénat") && !assemblee.toLowerCase().includes("senat"))
|
|
138
|
+
for (const acte of actes) {
|
|
139
|
+
if (acte["chambre"] !== "SN")
|
|
145
140
|
continue;
|
|
146
|
-
const
|
|
147
|
-
const
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
141
|
+
const codeActe = acte.code_acte;
|
|
142
|
+
const dateActe = acte.date?.split("T")[0];
|
|
143
|
+
if (!codeActe || !dateActe)
|
|
144
|
+
continue;
|
|
145
|
+
const match = codeActe.match(/^(?:SN|AN)(\d+)/);
|
|
146
|
+
const ordreLecture = match ? parseInt(match[1], 10) : undefined;
|
|
147
|
+
res.push({
|
|
148
|
+
codeActe,
|
|
149
|
+
date: dateActe,
|
|
150
|
+
ordreLecture,
|
|
151
|
+
});
|
|
156
152
|
}
|
|
157
153
|
return res;
|
|
158
154
|
}
|