@tricoteuses/senat 2.20.33 → 2.20.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/LICENSE.md +12 -2
  2. package/lib/loaders.d.ts +46 -8
  3. package/lib/loaders.js +14 -14
  4. package/lib/model/documents.d.ts +2 -12
  5. package/lib/model/documents.js +26 -144
  6. package/lib/model/dosleg.js +118 -2
  7. package/lib/model/index.d.ts +1 -1
  8. package/lib/model/index.js +1 -1
  9. package/lib/scripts/convert_data.js +17 -31
  10. package/lib/scripts/retrieve_documents.d.ts +1 -2
  11. package/lib/scripts/retrieve_documents.js +3 -5
  12. package/lib/scripts/test_iter_load.js +9 -3
  13. package/lib/src/config.d.ts +21 -0
  14. package/lib/src/config.js +27 -0
  15. package/lib/src/databases.d.ts +2 -0
  16. package/lib/src/databases.js +26 -0
  17. package/lib/src/datasets.d.ts +34 -0
  18. package/lib/src/datasets.js +233 -0
  19. package/lib/src/git.d.ts +26 -0
  20. package/lib/src/git.js +167 -0
  21. package/lib/src/index.d.ts +13 -0
  22. package/lib/src/index.js +1 -0
  23. package/lib/src/loaders.d.ts +58 -0
  24. package/lib/src/loaders.js +286 -0
  25. package/lib/src/model/agenda.d.ts +6 -0
  26. package/lib/src/model/agenda.js +148 -0
  27. package/lib/src/model/ameli.d.ts +51 -0
  28. package/lib/src/model/ameli.js +147 -0
  29. package/lib/src/model/commission.d.ts +18 -0
  30. package/lib/src/model/commission.js +269 -0
  31. package/lib/src/model/debats.d.ts +67 -0
  32. package/lib/src/model/debats.js +95 -0
  33. package/lib/src/model/documents.d.ts +12 -0
  34. package/lib/src/model/documents.js +138 -0
  35. package/lib/src/model/dosleg.d.ts +7 -0
  36. package/lib/src/model/dosleg.js +326 -0
  37. package/lib/src/model/index.d.ts +7 -0
  38. package/lib/src/model/index.js +7 -0
  39. package/lib/src/model/questions.d.ts +45 -0
  40. package/lib/src/model/questions.js +89 -0
  41. package/lib/src/model/scrutins.d.ts +13 -0
  42. package/lib/src/model/scrutins.js +114 -0
  43. package/lib/src/model/seance.d.ts +3 -0
  44. package/lib/src/model/seance.js +267 -0
  45. package/lib/src/model/sens.d.ts +146 -0
  46. package/lib/src/model/sens.js +454 -0
  47. package/lib/src/model/util.d.ts +9 -0
  48. package/lib/src/model/util.js +38 -0
  49. package/lib/src/parsers/texte.d.ts +7 -0
  50. package/lib/src/parsers/texte.js +228 -0
  51. package/lib/src/raw_types/ameli.d.ts +914 -0
  52. package/lib/src/raw_types/ameli.js +5 -0
  53. package/lib/src/raw_types/debats.d.ts +207 -0
  54. package/lib/src/raw_types/debats.js +5 -0
  55. package/lib/src/raw_types/dosleg.d.ts +1619 -0
  56. package/lib/src/raw_types/dosleg.js +5 -0
  57. package/lib/src/raw_types/questions.d.ts +423 -0
  58. package/lib/src/raw_types/questions.js +5 -0
  59. package/lib/src/raw_types/senat.d.ts +11372 -0
  60. package/lib/src/raw_types/senat.js +5 -0
  61. package/lib/src/raw_types/sens.d.ts +8248 -0
  62. package/lib/src/raw_types/sens.js +5 -0
  63. package/lib/src/raw_types_schemats/ameli.d.ts +539 -0
  64. package/lib/src/raw_types_schemats/ameli.js +2 -0
  65. package/lib/src/raw_types_schemats/debats.d.ts +127 -0
  66. package/lib/src/raw_types_schemats/debats.js +2 -0
  67. package/lib/src/raw_types_schemats/dosleg.d.ts +977 -0
  68. package/lib/src/raw_types_schemats/dosleg.js +2 -0
  69. package/lib/src/raw_types_schemats/questions.d.ts +237 -0
  70. package/lib/src/raw_types_schemats/questions.js +2 -0
  71. package/lib/src/raw_types_schemats/sens.d.ts +6915 -0
  72. package/lib/src/raw_types_schemats/sens.js +2 -0
  73. package/lib/src/scripts/convert_data.d.ts +1 -0
  74. package/lib/src/scripts/convert_data.js +354 -0
  75. package/lib/src/scripts/data-download.d.ts +1 -0
  76. package/lib/src/scripts/data-download.js +12 -0
  77. package/lib/src/scripts/datautil.d.ts +8 -0
  78. package/lib/src/scripts/datautil.js +34 -0
  79. package/lib/src/scripts/retrieve_agenda.d.ts +1 -0
  80. package/lib/src/scripts/retrieve_agenda.js +132 -0
  81. package/lib/src/scripts/retrieve_cr_commission.d.ts +1 -0
  82. package/lib/src/scripts/retrieve_cr_commission.js +364 -0
  83. package/lib/src/scripts/retrieve_cr_seance.d.ts +6 -0
  84. package/lib/src/scripts/retrieve_cr_seance.js +347 -0
  85. package/lib/src/scripts/retrieve_documents.d.ts +3 -0
  86. package/lib/src/scripts/retrieve_documents.js +219 -0
  87. package/lib/src/scripts/retrieve_open_data.d.ts +1 -0
  88. package/lib/src/scripts/retrieve_open_data.js +316 -0
  89. package/lib/src/scripts/retrieve_senateurs_photos.d.ts +1 -0
  90. package/lib/src/scripts/retrieve_senateurs_photos.js +147 -0
  91. package/lib/src/scripts/retrieve_videos.d.ts +33 -0
  92. package/lib/src/scripts/retrieve_videos.js +419 -0
  93. package/lib/src/scripts/shared/cli_helpers.d.ts +95 -0
  94. package/lib/src/scripts/shared/cli_helpers.js +91 -0
  95. package/lib/src/scripts/shared/util.d.ts +4 -0
  96. package/lib/src/scripts/shared/util.js +35 -0
  97. package/lib/src/scripts/test_iter_load.d.ts +1 -0
  98. package/lib/src/scripts/test_iter_load.js +12 -0
  99. package/lib/src/strings.d.ts +1 -0
  100. package/lib/src/strings.js +18 -0
  101. package/lib/src/types/agenda.d.ts +44 -0
  102. package/lib/src/types/agenda.js +1 -0
  103. package/lib/src/types/ameli.d.ts +5 -0
  104. package/lib/src/types/ameli.js +1 -0
  105. package/lib/src/types/compte_rendu.d.ts +83 -0
  106. package/lib/src/types/compte_rendu.js +1 -0
  107. package/lib/src/types/debats.d.ts +2 -0
  108. package/lib/src/types/debats.js +1 -0
  109. package/lib/src/types/dosleg.d.ts +70 -0
  110. package/lib/src/types/dosleg.js +1 -0
  111. package/lib/src/types/questions.d.ts +2 -0
  112. package/lib/src/types/questions.js +1 -0
  113. package/lib/src/types/sens.d.ts +10 -0
  114. package/lib/src/types/sens.js +1 -0
  115. package/lib/src/types/sessions.d.ts +5 -0
  116. package/lib/src/types/sessions.js +84 -0
  117. package/lib/src/types/texte.d.ts +74 -0
  118. package/lib/src/types/texte.js +16 -0
  119. package/lib/src/utils/cr_spliting.d.ts +28 -0
  120. package/lib/src/utils/cr_spliting.js +265 -0
  121. package/lib/src/utils/date.d.ts +10 -0
  122. package/lib/src/utils/date.js +100 -0
  123. package/lib/src/utils/nvs-timecode.d.ts +17 -0
  124. package/lib/src/utils/nvs-timecode.js +79 -0
  125. package/lib/src/utils/reunion_odj_building.d.ts +5 -0
  126. package/lib/src/utils/reunion_odj_building.js +154 -0
  127. package/lib/src/utils/reunion_parsing.d.ts +23 -0
  128. package/lib/src/utils/reunion_parsing.js +210 -0
  129. package/lib/src/utils/scoring.d.ts +77 -0
  130. package/lib/src/utils/scoring.js +293 -0
  131. package/lib/src/utils/string_cleaning.d.ts +7 -0
  132. package/lib/src/utils/string_cleaning.js +57 -0
  133. package/lib/src/utils/weights_scoring_config.d.ts +2 -0
  134. package/lib/src/utils/weights_scoring_config.js +15 -0
  135. package/lib/src/validators/config.d.ts +9 -0
  136. package/lib/src/validators/config.js +10 -0
  137. package/lib/tests/videoMatching.test.d.ts +1 -0
  138. package/lib/tests/videoMatching.test.js +396 -0
  139. package/lib/types/texte.d.ts +0 -9
  140. package/package.json +18 -7
package/LICENSE.md CHANGED
@@ -2,9 +2,19 @@
2
2
 
3
3
  ## _Handle French Sénat's open data_
4
4
 
5
- By: Emmanuel Raviart <mailto:emmanuel@raviart.com>
5
+ By:
6
6
 
7
- Copyright (C) 2019, 2020, 2021 Emmanuel Raviart
7
+ - Henry Boisgibault <mailto:henry.boisgibault@proton.me>
8
+ - Pierre Drege <mailto:dregop@proton.me>
9
+ - Hélène Jonin <mailto:helene.jonin@gmail.com>
10
+ - Paul-Henry Ngounou <mailto:ngpaulhenry@gmail.com>
11
+ - Emmanuel Raviart <mailto:emmanuel@raviart.com>
12
+
13
+ Copyright:
14
+
15
+ - © 2019, 2021, 2022 Emmanuel Raviart
16
+ - © 2024 Logora
17
+ - © 2025, 2026 Logora & Emmanuel Raviart
8
18
 
9
19
  https://git.tricoteuses.fr/logiciels/tricoteuses-senat
10
20
 
package/lib/loaders.d.ts CHANGED
@@ -5,11 +5,9 @@ import { QuestionResult } from "./model/questions";
5
5
  import { ScrutinResult } from "./model/scrutins";
6
6
  import { CirconscriptionResult, OrganismeResult, SenateurResult } from "./model/sens";
7
7
  import { Reunion } from "./types/agenda";
8
- import { FlatTexte, DocumentMetadata } from "./types/texte";
8
+ import { FlatTexte } from "./types/texte";
9
9
  import { CompteRendu } from "./types/compte_rendu";
10
- import { DocumentResult } from "./model/documents";
11
10
  export { EnabledDatasets } from "./datasets";
12
- export type { DocumentResult } from "./model/documents";
13
11
  export declare const AGENDA_FOLDER = "agenda";
14
12
  export declare const COMPTES_RENDUS_FOLDER = "seances";
15
13
  export declare const COMMISSION_FOLDER = "commissions";
@@ -29,6 +27,46 @@ export type IterItem<T> = {
29
27
  legislature?: number;
30
28
  gitStatus?: "A" | "M" | "D" | "R" | "C" | "T" | "U";
31
29
  };
30
+ export interface TexteMetadata {
31
+ name: string;
32
+ session: number | null | undefined;
33
+ date?: string | null;
34
+ url_expose_des_motifs?: URL;
35
+ url_xml: URL;
36
+ url_html: URL;
37
+ url_pdf: URL;
38
+ }
39
+ export interface RapportMetadata {
40
+ name: string;
41
+ session: number | null | undefined;
42
+ date?: string | null;
43
+ url_html: URL;
44
+ url_pdf: URL;
45
+ }
46
+ export interface DossierLegislatifDocumentResult {
47
+ signet_dossier: string;
48
+ url_dossier_senat: string;
49
+ url_dossier_assemblee_nationale: string | null;
50
+ type_lecture: string;
51
+ libelle_lecture: string;
52
+ libelle_organisme: string | null;
53
+ code_organisme: string | null;
54
+ numero: number | null;
55
+ id: string | null;
56
+ url: string;
57
+ origine?: string | null | undefined;
58
+ type: string;
59
+ date: string;
60
+ session: number | null;
61
+ auteurs: {
62
+ prenom: string | null;
63
+ nom_usuel: string;
64
+ matricule: string | null;
65
+ }[];
66
+ organismes?: {
67
+ libelle: string;
68
+ }[] | undefined;
69
+ }
32
70
  export declare function iterFilePaths(dirPath: string): Generator<string>;
33
71
  export declare function iterLoadSenatAmendements(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AmendementResult>>;
34
72
  export declare function iterLoadSenatDebats(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DebatResult>>;
@@ -41,11 +79,11 @@ export declare function iterLoadSenatComptesRendusCommissions(dataDir: string, s
41
79
  session: number;
42
80
  }>;
43
81
  export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
44
- export declare function iterLoadSenatRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
45
- export declare function iterLoadSenatTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
46
- export declare function iterLoadSenatDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<DocumentResult>>;
47
- export declare function iterLoadSenatRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
48
- export declare function iterLoadSenatTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
82
+ export declare function iterLoadSenatDossiersLegislatifsRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<RapportMetadata>>;
83
+ export declare function iterLoadSenatDossiersLegislatifsTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<TexteMetadata>>;
84
+ export declare function iterLoadSenatDossiersLegislatifsDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<DossierLegislatifDocumentResult>>;
85
+ export declare function iterLoadSenatDossiersLegislatifsRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifDocumentResult>>;
86
+ export declare function iterLoadSenatDossiersLegislatifsTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifDocumentResult>>;
49
87
  export declare function loadSenatTexteContent(dataDir: string, textePathFromDataset: string): IterItem<FlatTexte | null>;
50
88
  export declare function loadSenatCompteRenduContent(dataDir: string, session: number, debatId: string | number): {
51
89
  item: CompteRendu | null;
package/lib/loaders.js CHANGED
@@ -50,9 +50,6 @@ function* iterLoadSenatItems(dataDir, dataName, legislatureOrSession, subDir, {
50
50
  console.log(`Found ${changedFiles?.size || 0} changed files (AMR)`);
51
51
  }
52
52
  for (const filePath of iterFilePaths(itemsDir)) {
53
- if (!filePath.endsWith(".json")) {
54
- continue;
55
- }
56
53
  const relativePath = path.relative(path.join(dataDir, dataName), filePath);
57
54
  const gitStatus = changedFiles?.get(relativePath);
58
55
  // Filter by changed files if sinceCommit is specified
@@ -150,7 +147,7 @@ export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}
150
147
  yield dossierLegislatifItem;
151
148
  }
152
149
  }
153
- export function* iterLoadSenatRapportUrls(dataDir, session) {
150
+ export function* iterLoadSenatDossiersLegislatifsRapportUrls(dataDir, session) {
154
151
  let itemsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
155
152
  if (session) {
156
153
  itemsDir = path.join(itemsDir, session.toString());
@@ -166,7 +163,7 @@ export function* iterLoadSenatRapportUrls(dataDir, session) {
166
163
  }
167
164
  }
168
165
  }
169
- export function* iterLoadSenatTexteUrls(dataDir, session) {
166
+ export function* iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session) {
170
167
  let itemsDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
171
168
  if (session) {
172
169
  itemsDir = path.join(itemsDir, session.toString());
@@ -182,7 +179,7 @@ export function* iterLoadSenatTexteUrls(dataDir, session) {
182
179
  }
183
180
  }
184
181
  }
185
- export function* iterLoadSenatDocuments(dataDir, session, documentType, options = {}) {
182
+ export function* iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, documentType, options = {}) {
186
183
  for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(dataDir, session, options)) {
187
184
  for (const lecture of dossierLegislatif["lectures"]) {
188
185
  const lecturesSenat = lecture.lectures_assemblee.filter((lectureAssemblee) => lectureAssemblee.assemblee === "Sénat");
@@ -190,10 +187,15 @@ export function* iterLoadSenatDocuments(dataDir, session, documentType, options
190
187
  for (const document of lectureSenat[documentType]) {
191
188
  const enrichedDocument = {
192
189
  signet_dossier: dossierLegislatif["signet"],
190
+ url_dossier_senat: dossierLegislatif["url"],
191
+ url_dossier_assemblee_nationale: dossierLegislatif["url_dossier_assemblee_nationale"],
192
+ type_lecture: lecture.type_lecture,
193
+ libelle_lecture: lecture.libelle,
194
+ libelle_organisme: lectureSenat.libelle_organisme,
193
195
  ...document,
194
196
  };
195
197
  const documentItem = {
196
- item: enrichedDocument
198
+ item: enrichedDocument,
197
199
  };
198
200
  if (document.url) {
199
201
  const documentName = path.parse(document.url).name;
@@ -205,15 +207,13 @@ export function* iterLoadSenatDocuments(dataDir, session, documentType, options
205
207
  }
206
208
  }
207
209
  }
208
- export function* iterLoadSenatRapports(dataDir, session, options = {}) {
209
- for (const iterItem of iterLoadSenatItems(dataDir, RAPPORT_FOLDER, session, "original", options)) {
210
- if (iterItem.item?.["id"]) {
211
- yield iterItem;
212
- }
210
+ export function* iterLoadSenatDossiersLegislatifsRapports(dataDir, session, options = {}) {
211
+ for (const iterItem of iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, "rapports", options)) {
212
+ yield iterItem;
213
213
  }
214
214
  }
215
- export function* iterLoadSenatTextes(dataDir, session, options = {}) {
216
- for (const iterItem of iterLoadSenatDocuments(dataDir, session, "textes", options)) {
215
+ export function* iterLoadSenatDossiersLegislatifsTextes(dataDir, session, options = {}) {
216
+ for (const iterItem of iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, "textes", options)) {
217
217
  yield iterItem;
218
218
  }
219
219
  }
@@ -1,12 +1,2 @@
1
- import { Expression, InferResult, SelectQueryBuilder } from "kysely";
2
- export declare function rapports(lectureAssembleeId: Expression<string>): import("kysely").RawBuilder<{
3
- [x: string]: any;
4
- }[]>;
5
- declare const queryTextes: SelectQueryBuilder<any, any, any>;
6
- export declare function textes(lectureAssembleeId: Expression<string>): import("kysely").RawBuilder<{
7
- [x: string]: any;
8
- }[]>;
9
- export declare function findAllTextes(): AsyncIterableIterator<DocumentResult>;
10
- export declare function findAllRapports(): AsyncIterableIterator<DocumentResult>;
11
- export type DocumentResult = InferResult<typeof queryTextes>[0];
12
- export {};
1
+ export declare function findSenatTexteUrls(sessions?: number[]): any;
2
+ export declare function findSenatRapportUrls(sessions?: number[]): any;
@@ -1,151 +1,33 @@
1
- import { sql } from "kysely";
2
1
  import { dbSenat } from "../databases";
3
- import { concat, rtrim, toDateString } from "./util";
4
- import { jsonArrayFrom } from "kysely/helpers/postgres";
5
- function orderOrdreOrigineTexte(expr) {
6
- return sql `array_position(array['0','2','1'], ${expr})`;
7
- }
8
- function auteursRapport(rapportId) {
9
- return jsonArrayFrom(dbSenat
2
+ import { rtrim, toDateString } from "./util";
3
+ export function findSenatTexteUrls(sessions = []) {
4
+ return dbSenat
10
5
  .withSchema("dosleg")
11
- .selectFrom("dosleg.auteur")
12
- .leftJoin("dosleg.ecr", "dosleg.ecr.autcod", "dosleg.auteur.autcod")
13
- .leftJoin("dosleg.rolsig", "dosleg.rolsig.signataire", "dosleg.ecr.signataire")
14
- .where("dosleg.ecr.rapcod", "=", rapportId)
15
- .select([
16
- "dosleg.auteur.prenom as prenom",
17
- "dosleg.auteur.nomuse as nom_usuel",
18
- "dosleg.auteur.autmat as matricule",
19
- "dosleg.ecr.ecrnumtri as ordre",
20
- "dosleg.rolsig.rolsiglib as role",
21
- "dosleg.ecr.ecrqua as qualite",
6
+ .selectFrom("texte")
7
+ .where("texurl", "is not", null)
8
+ .where("typurl", "=", "I")
9
+ .$if(sessions.length > 0, (qb) => qb.where("sesann", "in", sessions))
10
+ .select(({ eb, ref }) => [
11
+ "sesann as session",
12
+ rtrim(ref("texurl")).as("url"),
13
+ toDateString(ref("txtoritxtdat")).as("date"),
14
+ eb.case().when("oritxtcod", "=", "1").then(true).else(false).end().as("hasExposeDesMotifs"),
22
15
  ])
23
- .orderBy("dosleg.ecr.ecrnumtri", "asc"));
24
- }
25
- function documentsAttaches(rapportId) {
26
- return jsonArrayFrom(dbSenat
27
- .withSchema("dosleg")
28
- .selectFrom("docatt")
29
- .leftJoin("typatt", "docatt.typattcod", "typatt.typattcod")
30
- .where("docatt.rapcod", "=", rapportId)
31
- .select([
32
- "docatt.docatturl as url",
33
- "typatt.typattlib as type_document"
34
- ]));
35
- }
36
- function selectRapportAttributes({ eb, ref, val }) {
37
- return [
38
- "rap.rapnum as numero",
39
- "raporg.orgcod as code_organisme",
40
- eb
41
- .case()
42
- .when("rap.rapurl", "is not", null)
43
- .then(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
44
- .else(null)
45
- .end()
46
- .as("id"),
47
- eb
48
- .case()
49
- .when("rap.typurl", "=", "I")
50
- .then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
51
- .else(rtrim(ref("rap.rapurl")))
52
- .end()
53
- .as("url"),
54
- rtrim(ref("denrap.libdenrap")).as("type"),
55
- rtrim(rtrim(ref("rap.raptil"))).as("titre"),
56
- rtrim(rtrim(ref("rap.rapsoustit"))).as("sous_titre"),
57
- toDateString(ref("rap.date_depot")).as("date"),
58
- "rap.sesann as session",
59
- auteursRapport(ref("rap.rapcod")).as("auteurs"),
60
- documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
61
- ];
62
- }
63
- const baseQueryRapports = dbSenat
64
- .withSchema("dosleg")
65
- .selectFrom("rap")
66
- .leftJoin("raporg", "raporg.rapcod", "rap.rapcod")
67
- .leftJoin("denrap", "denrap.coddenrap", "rap.coddenrap")
68
- .leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod");
69
- const queryRapports = baseQueryRapports
70
- .leftJoin("lecass", "lecass.lecassidt", "lecassrap.lecassidt")
71
- .leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
72
- .leftJoin("loi", "loi.loicod", "lecture.loicod")
73
- .select((args) => [
74
- "loi.signet as signet_dossier",
75
- ...selectRapportAttributes(args),
76
- ]);
77
- export function rapports(lectureAssembleeId) {
78
- return jsonArrayFrom(baseQueryRapports
79
- .select(selectRapportAttributes)
80
- .where("lecassrap.lecassidt", "=", lectureAssembleeId));
16
+ .$narrowType()
17
+ .stream();
81
18
  }
82
- function auteursTexte(texteId) {
83
- return jsonArrayFrom(dbSenat
19
+ export function findSenatRapportUrls(sessions = []) {
20
+ return dbSenat
84
21
  .withSchema("dosleg")
85
- .selectFrom("auteur")
86
- .leftJoin("ecr", "ecr.autcod", "auteur.autcod")
87
- .leftJoin("rolsig", "rolsig.signataire", "ecr.signataire")
88
- .where("ecr.texcod", "=", texteId)
89
- .select([
90
- "auteur.prenom as prenom",
91
- "auteur.nomuse as nom_usuel",
92
- "auteur.autmat as matricule",
93
- "ecr.ecrnumtri as ordre",
94
- "rolsig.rolsiglib as role",
95
- "ecr.ecrqua as qualite",
22
+ .selectFrom("rap")
23
+ .where("rapurl", "is not", null)
24
+ .where("typurl", "=", "I")
25
+ .$if(sessions.length > 0, (qb) => qb.where("sesann", "in", sessions))
26
+ .select(({ ref }) => [
27
+ "sesann as session",
28
+ rtrim(ref("rapurl")).as("url"),
29
+ toDateString(ref("date_depot")).as("date"),
96
30
  ])
97
- .orderBy("ecr.ecrnumtri", "asc"));
98
- }
99
- function selectTexteAttributes({ eb, ref, val }) {
100
- return [
101
- "texte.texnum as numero",
102
- "texte.orgcod as code_organisme",
103
- eb
104
- .case()
105
- .when("texte.texurl", "is not", null)
106
- .then(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
107
- .else(null)
108
- .end()
109
- .as("id"),
110
- eb
111
- .case()
112
- .when("texte.typurl", "=", "I")
113
- .then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
114
- .else(rtrim(ref("texte.texurl")))
115
- .end()
116
- .as("url"),
117
- rtrim(ref("oritxt.oritxtlib")).as("origine"),
118
- "oritxt.oriordre as ordre_origine",
119
- "oritxt.oritxtado as code_adoption",
120
- "oritxt.oritxtmod as modification",
121
- rtrim(ref("typtxt.typtxtlib")).as("type"),
122
- toDateString(ref("texte.txtoritxtdat")).as("date"),
123
- "texte.sesann as session",
124
- auteursTexte(ref("texte.texcod")).as("auteurs"),
125
- ];
126
- }
127
- const baseQueryTextes = dbSenat
128
- .withSchema("dosleg")
129
- .selectFrom("texte")
130
- .leftJoin("oritxt", "oritxt.oritxtcod", "texte.oritxtcod")
131
- .leftJoin("typtxt", "typtxt.typtxtcod", "texte.typtxtcod")
132
- .orderBy(({ ref }) => orderOrdreOrigineTexte(ref("oritxt.oriordre")));
133
- const queryTextes = baseQueryTextes
134
- .leftJoin("lecass", "lecass.lecassidt", "texte.lecassidt")
135
- .leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
136
- .leftJoin("loi", "loi.loicod", "lecture.loicod")
137
- .select((args) => [
138
- "loi.signet as signet_dossier",
139
- ...selectTexteAttributes(args),
140
- ]);
141
- export function textes(lectureAssembleeId) {
142
- return jsonArrayFrom(baseQueryTextes
143
- .select(selectTexteAttributes)
144
- .where("texte.lecassidt", "=", lectureAssembleeId));
145
- }
146
- export function findAllTextes() {
147
- return queryTextes.stream();
148
- }
149
- export function findAllRapports() {
150
- return queryRapports.stream();
31
+ .$narrowType()
32
+ .stream();
151
33
  }
@@ -1,8 +1,10 @@
1
1
  import { sql } from "kysely";
2
2
  import { jsonArrayFrom } from "kysely/helpers/postgres";
3
3
  import { dbSenat } from "../databases";
4
- import { concat, rtrim, toDateString } from "./util";
5
- import { textes, rapports } from "./documents";
4
+ import { concat, removeSubstring, rtrim, toDateString } from "./util";
5
+ function orderOrdreOrigineTexte(expr) {
6
+ return sql `array_position(array['0','2','1'], ${expr})`;
7
+ }
6
8
  function datesSeances(lectureAssembleeId) {
7
9
  return jsonArrayFrom(dbSenat
8
10
  .withSchema("dosleg")
@@ -10,6 +12,120 @@ function datesSeances(lectureAssembleeId) {
10
12
  .where("dosleg.date_seance.lecidt", "=", lectureAssembleeId)
11
13
  .select(({ ref }) => [toDateString(ref("dosleg.date_seance.date_s")).as("date")]));
12
14
  }
15
+ function auteursRapport(rapportId) {
16
+ return jsonArrayFrom(dbSenat
17
+ .withSchema("dosleg")
18
+ .selectFrom("dosleg.auteur")
19
+ .leftJoin("dosleg.ecr", "dosleg.ecr.autcod", "dosleg.auteur.autcod")
20
+ .leftJoin("dosleg.rolsig", "dosleg.rolsig.signataire", "dosleg.ecr.signataire")
21
+ .where("dosleg.ecr.rapcod", "=", rapportId)
22
+ .select([
23
+ "dosleg.auteur.prenom as prenom",
24
+ "dosleg.auteur.nomuse as nom_usuel",
25
+ "dosleg.auteur.autmat as matricule",
26
+ "dosleg.ecr.ecrnumtri as ordre",
27
+ "dosleg.rolsig.rolsiglib as role",
28
+ "dosleg.ecr.ecrqua as qualite",
29
+ ])
30
+ .orderBy("dosleg.ecr.ecrnumtri", "asc"));
31
+ }
32
+ function documentsAttaches(rapportId) {
33
+ return jsonArrayFrom(dbSenat
34
+ .withSchema("dosleg")
35
+ .selectFrom("docatt")
36
+ .leftJoin("typatt", "docatt.typattcod", "typatt.typattcod")
37
+ .where("docatt.rapcod", "=", rapportId)
38
+ .select([
39
+ "docatt.docatturl as url",
40
+ "typatt.typattlib as type_document"
41
+ ]));
42
+ }
43
+ function rapports(lectureAssembleeId) {
44
+ return jsonArrayFrom(dbSenat
45
+ .withSchema("dosleg")
46
+ .selectFrom("rap")
47
+ .leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod")
48
+ .leftJoin("raporg", "raporg.rapcod", "rap.rapcod")
49
+ .leftJoin("denrap", "denrap.coddenrap", "rap.coddenrap")
50
+ .where("lecassrap.lecassidt", "=", lectureAssembleeId)
51
+ .select(({ eb, ref, val }) => [
52
+ "rap.rapnum as numero",
53
+ "raporg.orgcod as code_organisme",
54
+ eb
55
+ .case()
56
+ .when("rap.typurl", "=", "I")
57
+ .then(removeSubstring(ref("rap.rapurl"), val(".html")))
58
+ .else(null)
59
+ .end()
60
+ .as("id"),
61
+ eb
62
+ .case()
63
+ .when("rap.typurl", "=", "I")
64
+ .then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
65
+ .else(rtrim(ref("rap.rapurl")))
66
+ .end()
67
+ .as("url"),
68
+ rtrim(ref("denrap.libdenrap")).as("type"),
69
+ rtrim(ref("rap.raptil")).as("titre"),
70
+ rtrim(ref("rap.rapsoustit")).as("sous_titre"),
71
+ toDateString(ref("rap.date_depot")).as("date"),
72
+ "sesann as session",
73
+ auteursRapport(ref("rap.rapcod")).as("auteurs"),
74
+ documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
75
+ ]));
76
+ }
77
+ function auteursTexte(texteId) {
78
+ return jsonArrayFrom(dbSenat
79
+ .withSchema("dosleg")
80
+ .selectFrom("auteur")
81
+ .leftJoin("ecr", "ecr.autcod", "auteur.autcod")
82
+ .leftJoin("rolsig", "rolsig.signataire", "ecr.signataire")
83
+ .where("ecr.texcod", "=", texteId)
84
+ .select([
85
+ "auteur.prenom as prenom",
86
+ "auteur.nomuse as nom_usuel",
87
+ "auteur.autmat as matricule",
88
+ "ecr.ecrnumtri as ordre",
89
+ "rolsig.rolsiglib as role",
90
+ "ecr.ecrqua as qualite",
91
+ ])
92
+ .orderBy("ecr.ecrnumtri", "asc"));
93
+ }
94
+ function textes(lectureAssembleeId) {
95
+ return jsonArrayFrom(dbSenat
96
+ .withSchema("dosleg")
97
+ .selectFrom("texte")
98
+ .leftJoin("oritxt", "oritxt.oritxtcod", "texte.oritxtcod")
99
+ .leftJoin("typtxt", "typtxt.typtxtcod", "texte.typtxtcod")
100
+ .where("texte.lecassidt", "=", lectureAssembleeId)
101
+ .select(({ eb, ref, val }) => [
102
+ "texte.texnum as numero",
103
+ "texte.orgcod as code_organisme",
104
+ eb
105
+ .case()
106
+ .when("texte.typurl", "=", "I")
107
+ .then(removeSubstring(ref("texte.texurl"), val(".html")))
108
+ .else(null)
109
+ .end()
110
+ .as("id"),
111
+ eb
112
+ .case()
113
+ .when("texte.typurl", "=", "I")
114
+ .then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
115
+ .else(rtrim(ref("texte.texurl")))
116
+ .end()
117
+ .as("url"),
118
+ rtrim(ref("oritxt.oritxtlib")).as("origine"),
119
+ "oritxt.oriordre as ordre_origine",
120
+ "oritxt.oritxtado as code_adoption",
121
+ "oritxt.oritxtmod as modification",
122
+ rtrim(ref("typtxt.typtxtlib")).as("type"),
123
+ toDateString(ref("texte.txtoritxtdat")).as("date"),
124
+ "sesann as session",
125
+ auteursTexte(ref("texte.texcod")).as("auteurs"),
126
+ ])
127
+ .orderBy(({ ref }) => orderOrdreOrigineTexte(ref("oritxt.oriordre"))));
128
+ }
13
129
  function lecturesAssemblee(lectureId) {
14
130
  return jsonArrayFrom(dbSenat
15
131
  .withSchema("dosleg")
@@ -1,7 +1,7 @@
1
1
  export { findAllAmendements } from "./ameli";
2
2
  export { findAll as findAllDebats } from "./debats";
3
3
  export { findAllDossiers } from "./dosleg";
4
- export { findAllTextes, findAllRapports } from "./documents";
4
+ export { findSenatTexteUrls, findSenatRapportUrls } from "./documents";
5
5
  export { findAllScrutins } from "./scrutins";
6
6
  export { findAll as findAllQuestions } from "./questions";
7
7
  export { findAll as findAllSens, findAllCirconscriptions, findAllOrganismes } from "./sens";
@@ -1,7 +1,7 @@
1
1
  export { findAllAmendements } from "./ameli";
2
2
  export { findAll as findAllDebats } from "./debats";
3
3
  export { findAllDossiers } from "./dosleg";
4
- export { findAllTextes, findAllRapports } from "./documents";
4
+ export { findSenatTexteUrls, findSenatRapportUrls } from "./documents";
5
5
  export { findAllScrutins } from "./scrutins";
6
6
  export { findAll as findAllQuestions } from "./questions";
7
7
  export { findAll as findAllSens, findAllCirconscriptions, findAllOrganismes } from "./sens";
@@ -5,8 +5,8 @@ import path from "path";
5
5
  import pLimit from "p-limit";
6
6
  import * as git from "../git";
7
7
  import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
8
- import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER } from "../loaders";
9
- import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findAllTextes, findAllRapports, } from "../model";
8
+ import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
9
+ import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findSenatRapportUrls, findSenatTexteUrls, } from "../model";
10
10
  import { processRapport, processTexte } from "./retrieve_documents";
11
11
  import { buildActesLegislatifs } from "../model/dosleg";
12
12
  import { UNDEFINED_SESSION } from "../types/sessions";
@@ -169,8 +169,8 @@ async function convertDatasetDosLeg(dataDir, options) {
169
169
  const dossierFile = `${dossier["signet"]}.json`;
170
170
  await fs.outputJSON(path.join(dossierReorganizedDir, dossierFile), dossierWithActes, { spaces: 2 });
171
171
  }
172
- await convertTextes(dataDir, options);
173
- await convertRapports(dataDir, options);
172
+ await convertTexteUrls(dataDir, options);
173
+ await convertRapportUrls(dataDir, options);
174
174
  }
175
175
  async function convertDatasetScrutins(dataDir, options) {
176
176
  const dataset = datasets.dosleg;
@@ -219,38 +219,30 @@ async function convertDatasetQuestions(dataDir, options) {
219
219
  }
220
220
  await Promise.all(tasks);
221
221
  }
222
- async function convertTextes(dataDir, options) {
222
+ async function convertTexteUrls(dataDir, options) {
223
223
  const originalTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
224
224
  const transformedTextesDir = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER);
225
225
  if (!options["silent"]) {
226
226
  console.log(`Converting database textes data into files…`);
227
227
  }
228
- for await (const texte of findAllTextes()) {
229
- const session = texte["session"] ?? UNDEFINED_SESSION;
228
+ for await (const texte of findSenatTexteUrls()) {
229
+ const session = texte.session ?? UNDEFINED_SESSION;
230
230
  if (options["fromSession"] && session < options["fromSession"]) {
231
231
  continue;
232
232
  }
233
- if (!texte["url"]) {
234
- continue;
235
- }
236
- const texteName = path.parse(texte["url"]).name;
233
+ const texteName = path.parse(texte.url).name;
237
234
  const texteDir = path.join(originalTextesDir, `${session}`, texteName);
238
- // oritxtcod = 1 corresponds to "Texte de loi déposé au Sénat"
239
- const hasExposeDesMotifs = texte["origine"] === 'Sénat' && texte["ordre_origine"] === '1';
240
235
  const metadata = {
241
236
  name: texteName,
242
- session: texte["session"],
243
- date: texte["date"],
244
- url_expose_des_motifs: hasExposeDesMotifs
237
+ session: texte.session,
238
+ date: texte.date,
239
+ url_expose_des_motifs: texte.hasExposeDesMotifs
245
240
  ? new URL(`${texteName}-expose.html`, SENAT_EXPOSE_DES_MOTIFS_BASE_URL)
246
241
  : undefined,
247
242
  url_xml: new URL(`${texteName}.akn.xml`, SENAT_TEXTE_XML_BASE_URL),
248
243
  url_html: new URL(`${texteName}.html`, SENAT_TEXTE_BASE_URL),
249
244
  url_pdf: new URL(`${texteName}.pdf`, SENAT_TEXTE_BASE_URL),
250
245
  };
251
- fs.outputJSONSync(path.join(texteDir, `${texteName}.json`), texte, {
252
- spaces: 2,
253
- });
254
246
  fs.outputJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, {
255
247
  spaces: 2,
256
248
  });
@@ -259,20 +251,17 @@ async function convertTextes(dataDir, options) {
259
251
  }
260
252
  }
261
253
  }
262
- async function convertRapports(dataDir, options) {
254
+ async function convertRapportUrls(dataDir, options) {
263
255
  const originalRapportsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
264
256
  if (!options["silent"]) {
265
257
  console.log(`Converting database rapports data into files…`);
266
258
  }
267
- for await (const rapport of findAllRapports()) {
268
- const session = rapport["session"] ?? UNDEFINED_SESSION;
259
+ for await (const rapport of findSenatRapportUrls()) {
260
+ const session = rapport.session ?? UNDEFINED_SESSION;
269
261
  if (options["fromSession"] && session < options["fromSession"]) {
270
262
  continue;
271
263
  }
272
- if (!rapport["url"]) {
273
- continue;
274
- }
275
- const parsedRapportUrl = path.parse(rapport["url"]);
264
+ const parsedRapportUrl = path.parse(rapport.url);
276
265
  const rapportName = parsedRapportUrl.name;
277
266
  const rapportDir = path.join(originalRapportsDir, `${session}`, rapportName);
278
267
  const rapportHtmlUrlBase = `${rapportName}_mono.html`;
@@ -287,14 +276,11 @@ async function convertRapports(dataDir, options) {
287
276
  });
288
277
  const metadata = {
289
278
  name: rapportName,
290
- session: rapport["session"],
291
- date: rapport["date"],
279
+ session: rapport.session,
280
+ date: rapport.date,
292
281
  url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL),
293
282
  url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL),
294
283
  };
295
- fs.outputJSONSync(path.join(rapportDir, `${rapportName}.json`), rapport, {
296
- spaces: 2,
297
- });
298
284
  fs.outputJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, {
299
285
  spaces: 2,
300
286
  });
@@ -1,3 +1,2 @@
1
- import { DocumentMetadata } from "../types/texte";
2
- export declare function processTexte(texteMetadata: DocumentMetadata, originalTextesDir: string, transformedTextesDir: string, options: any): Promise<void>;
1
+ export declare function processTexte(texteMetadata: any, originalTextesDir: string, transformedTextesDir: string, options: any): Promise<void>;
3
2
  export declare function processRapport(rapportMetadata: any, originalRapportsDir: string, options: any): Promise<void>;