@tricoteuses/senat 2.20.21 → 2.20.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -43,7 +43,6 @@ npm run data:download ../senat-data
43
43
 
44
44
  - `npm run data:download <dir>`: Download, convert data to JSON
45
45
  - `npm run data:retrieve_documents <dir>`: Retrieval of textes and rapports from Sénat's website
46
- - `npm run data:parse_textes_lois <dir>`: Parse textes (requires xml files)
47
46
  - `npm run data:retrieve_agenda <dir>`: Retrieval of agenda from Sénat's website
48
47
  - `npm run data:retrieve_cr_seance <dir>`: Retrieval of comptes-rendus de séance from Sénat's data
49
48
  - `npm run data:retrieve_cr_commission <dir>`: Retrieval of comptes-rendus de commissions from Sénat's website
package/lib/loaders.d.ts CHANGED
@@ -13,11 +13,11 @@ export declare const COMPTES_RENDUS_FOLDER = "seances";
13
13
  export declare const COMMISSION_FOLDER = "commissions";
14
14
  export declare const DOSLEG_DOSSIERS_FOLDER = "dossiers";
15
15
  export declare const SCRUTINS_FOLDER = "scrutins";
16
- export declare const RAPPORT_FOLDER = "rap";
17
16
  export declare const SENS_CIRCONSCRIPTIONS_FOLDER = "circonscriptions";
18
17
  export declare const SENS_ORGANISMES_FOLDER = "organismes";
19
18
  export declare const SENS_SENATEURS_FOLDER = "senateurs";
20
19
  export declare const TEXTE_FOLDER = "leg";
20
+ export declare const RAPPORT_FOLDER = "rap";
21
21
  export declare const DATA_ORIGINAL_FOLDER = "original";
22
22
  export declare const DATA_TRANSFORMED_FOLDER = "transformed";
23
23
  export declare const DOCUMENT_METADATA_FILE = "metadata.json";
@@ -25,6 +25,7 @@ export type IterItem<T> = {
25
25
  item: T;
26
26
  filePathFromDataset?: string;
27
27
  legislature?: number;
28
+ gitStatus?: "A" | "M" | "D" | "R" | "C" | "T" | "U";
28
29
  };
29
30
  export interface TexteMetadata {
30
31
  name: string;
package/lib/loaders.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import fsex from "fs-extra";
2
2
  import fs from "fs";
3
3
  import path from "path";
4
+ import * as git from "./git";
4
5
  import { datasets } from "./datasets";
5
6
  import { UNDEFINED_SESSION } from "./types/sessions";
6
7
  export { EnabledDatasets } from "./datasets";
@@ -9,11 +10,11 @@ export const COMPTES_RENDUS_FOLDER = "seances";
9
10
  export const COMMISSION_FOLDER = "commissions";
10
11
  export const DOSLEG_DOSSIERS_FOLDER = "dossiers";
11
12
  export const SCRUTINS_FOLDER = "scrutins";
12
- export const RAPPORT_FOLDER = "rap";
13
13
  export const SENS_CIRCONSCRIPTIONS_FOLDER = "circonscriptions";
14
14
  export const SENS_ORGANISMES_FOLDER = "organismes";
15
15
  export const SENS_SENATEURS_FOLDER = "senateurs";
16
16
  export const TEXTE_FOLDER = "leg";
17
+ export const RAPPORT_FOLDER = "rap";
17
18
  export const DATA_ORIGINAL_FOLDER = "original";
18
19
  export const DATA_TRANSFORMED_FOLDER = "transformed";
19
20
  export const DOCUMENT_METADATA_FILE = "metadata.json";
@@ -30,7 +31,7 @@ export function* iterFilePaths(dirPath) {
30
31
  }
31
32
  }
32
33
  }
33
- function* iterLoadSenatItems(dataDir, dataName, legislatureOrSession, subDir, { log = false } = {}) {
34
+ function* iterLoadSenatItems(dataDir, dataName, legislatureOrSession, subDir, { log = false, sinceCommit } = {}) {
34
35
  let itemsDir = path.join(dataDir, dataName);
35
36
  if (subDir) {
36
37
  itemsDir = path.join(itemsDir, subDir);
@@ -38,9 +39,26 @@ function* iterLoadSenatItems(dataDir, dataName, legislatureOrSession, subDir, {
38
39
  if (legislatureOrSession) {
39
40
  itemsDir = path.join(itemsDir, String(legislatureOrSession));
40
41
  }
42
+ // Get changed files if sinceCommit is specified (excluding deleted files)
43
+ const changedFiles = sinceCommit
44
+ ? git.getChangedFilesSinceCommit(itemsDir, sinceCommit, {
45
+ diffFilter: "AMR", // Added, Modified, Renamed
46
+ })
47
+ : null;
48
+ if (log && sinceCommit) {
49
+ console.log(`Filtering files changed since commit ${sinceCommit} in ${itemsDir}`);
50
+ console.log(`Found ${changedFiles?.size || 0} changed files (AMR)`);
51
+ }
41
52
  for (const filePath of iterFilePaths(itemsDir)) {
53
+ const relativePath = path.relative(path.join(dataDir, dataName), filePath);
54
+ const gitStatus = changedFiles?.get(relativePath);
55
+ // Filter by changed files if sinceCommit is specified
56
+ if (changedFiles && !gitStatus) {
57
+ // Skip files not in the change set
58
+ continue;
59
+ }
42
60
  if (log) {
43
- console.log(`Loading file: ${filePath}…`);
61
+ console.log(`Loading file: ${filePath}…${gitStatus ? ` (${gitStatus})` : ""}`);
44
62
  }
45
63
  let item;
46
64
  try {
@@ -56,8 +74,35 @@ function* iterLoadSenatItems(dataDir, dataName, legislatureOrSession, subDir, {
56
74
  item,
57
75
  filePathFromDataset,
58
76
  legislature: legislatureOrSession,
77
+ ...(gitStatus && { gitStatus }), // Include gitStatus
59
78
  };
60
79
  }
80
+ // Yield deleted files at the end if sinceCommit is specified
81
+ if (sinceCommit) {
82
+ const deletedFiles = git.getChangedFilesSinceCommit(itemsDir, sinceCommit, {
83
+ diffFilter: "D", // Deleted
84
+ });
85
+ if (log) {
86
+ console.log(`Found ${deletedFiles.size || 0} deleted files (D)`);
87
+ }
88
+ for (const [relativePath, status] of deletedFiles.entries()) {
89
+ const deletedFilePath = path.join(itemsDir, relativePath);
90
+ if (log) {
91
+ console.log(`Deleted file: ${deletedFilePath}`);
92
+ }
93
+ // Extract UID from filename (remove extension) for the placeholder item
94
+ const fileExtension = path.extname(relativePath) || ".json"; // Assuming files use an extension like .json
95
+ const filename = path.basename(relativePath, fileExtension);
96
+ const fakeItem = { uid: filename }; // Placeholder item using uid constraint
97
+ const filePathFromDataset = deletedFilePath.substring(deletedFilePath.indexOf(dataName) + dataName.length);
98
+ yield {
99
+ item: fakeItem,
100
+ filePathFromDataset,
101
+ legislature: legislatureOrSession,
102
+ gitStatus: status,
103
+ };
104
+ }
105
+ }
61
106
  }
62
107
  export function* iterLoadSenatAmendements(dataDir, session, options = {}) {
63
108
  for (const amendementItem of iterLoadSenatItems(dataDir, datasets.ameli.database, session, undefined, options)) {
@@ -1,8 +1,7 @@
1
1
  import { InferResult, SelectQueryBuilder } from "kysely";
2
2
  declare const findAllDossiersQuery: SelectQueryBuilder<any, any, any>;
3
3
  export declare function findAllDossiers(): AsyncIterableIterator<DossierLegislatifResult>;
4
- export declare function createActesLegislatifs(dossier: DossierLegislatifResult): any;
5
4
  export declare function getCodeActeLecture(codeNatureDossier: string, typeLecture: string, assemblee: string): string | null;
6
- export declare function getCodeActeTexte(codeParent: string | null, texteOrigine: string): string | null;
7
5
  export type DossierLegislatifResult = InferResult<typeof findAllDossiersQuery>[0];
6
+ export declare function buildActesLegislatifs(dossier: any): any[];
8
7
  export {};
@@ -29,6 +29,17 @@ function auteursRapport(rapportId) {
29
29
  ])
30
30
  .orderBy("dosleg.ecr.ecrnumtri", "asc"));
31
31
  }
32
+ function documentsAttaches(rapportId) {
33
+ return jsonArrayFrom(dbSenat
34
+ .withSchema("dosleg")
35
+ .selectFrom("docatt")
36
+ .leftJoin("typatt", "docatt.typattcod", "typatt.typattcod")
37
+ .where("docatt.rapcod", "=", rapportId)
38
+ .select([
39
+ "docatt.docatturl as url",
40
+ "typatt.typattlib as type_document"
41
+ ]));
42
+ }
32
43
  function rapports(lectureAssembleeId) {
33
44
  return jsonArrayFrom(dbSenat
34
45
  .withSchema("dosleg")
@@ -55,9 +66,12 @@ function rapports(lectureAssembleeId) {
55
66
  .end()
56
67
  .as("url"),
57
68
  rtrim(ref("denrap.libdenrap")).as("type"),
69
+ rtrim(ref("rap.raptil")).as("titre"),
70
+ rtrim(ref("rap.rapsoustit")).as("sous_titre"),
58
71
  toDateString(ref("rap.date_depot")).as("date"),
59
72
  "sesann as session",
60
73
  auteursRapport(ref("rap.rapcod")).as("auteurs"),
74
+ documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
61
75
  ]));
62
76
  }
63
77
  function auteursTexte(texteId) {
@@ -221,90 +235,6 @@ const findAllDossiersQuery = dbSenat
221
235
  export function findAllDossiers() {
222
236
  return findAllDossiersQuery.stream();
223
237
  }
224
- export function createActesLegislatifs(dossier) {
225
- const actesLegislatifs = (dossier["lectures"] || []).flatMap((lecture) => {
226
- const lecturesAssemblee = (lecture["lectures_assemblee"] || []).map((lectureAss) => {
227
- const codeParent = getCodeActeLecture(dossier["code_nature_dossier"], lecture["type_lecture"], lectureAss["assemblee"]);
228
- const textesWithCodeActe = (lectureAss["textes"] || []).map((texte) => ({
229
- code_acte: getCodeActeTexte(codeParent, texte["origine"]),
230
- ...texte,
231
- }));
232
- // Ajout étape -COM-FOND après chaque -DEPOT
233
- let acteLegislatifsLecture = [];
234
- for (let i = 0; i < textesWithCodeActe.length; i++) {
235
- const t = textesWithCodeActe[i];
236
- acteLegislatifsLecture.push(t);
237
- if (t.code_acte?.endsWith("-DEPOT") && t.type === "texte de loi") {
238
- acteLegislatifsLecture.push({
239
- ...t,
240
- code_acte: t.code_acte.replace("-DEPOT", "-COM-FOND"),
241
- });
242
- }
243
- }
244
- if (lectureAss["dates_seances"]?.length > 0) {
245
- acteLegislatifsLecture.push({
246
- session: lectureAss["session"],
247
- type_lecture: lecture["type_lecture"],
248
- libelle_lecture: "Discussion en séance publique",
249
- code_acte: `${codeParent}-DEBATS-SEANCE`,
250
- date: lectureAss["dates_seances"][0]?.["date"],
251
- id: lectureAss["id"],
252
- numero: lectureAss["numero"]
253
- });
254
- }
255
- const { textes, rapports, ...lectureAssWithoutTextes } = lectureAss;
256
- return {
257
- type_lecture: lecture["type_lecture"],
258
- ordre_lecture: lecture["ordre_lecture"],
259
- libelle_lecture: lecture["libelle"],
260
- code_acte: codeParent,
261
- actes_legislatifs: acteLegislatifsLecture,
262
- ...lectureAssWithoutTextes,
263
- };
264
- });
265
- return lecturesAssemblee;
266
- });
267
- if (dossier["date_decision_CoC"]) {
268
- actesLegislatifs.push({
269
- type_lecture: "Conseil constitutionnel",
270
- ordre_lecture: null,
271
- libelle_lecture: "Conseil constitutionnel",
272
- code_acte: "CC",
273
- actes_legislatifs: [
274
- {
275
- code_acte: "CC-SAISIE",
276
- date: dossier["date_saisine_CoC"],
277
- libelle_decision_CoC: dossier["libelle_decision_CoC"],
278
- date_decision_CoC: dossier["date_decision_CoC"],
279
- num_decision_CoC: dossier["num_decision_CoC"],
280
- url_decision_CoC: dossier["url_decision_CoC"],
281
- url_dossier_CoC: dossier["url_dossier_CoC"],
282
- date_saisine_CoC: dossier["date_saisine_CoC"],
283
- condition_saisine_CoC: dossier["condition_saisine_CoC"],
284
- },
285
- ],
286
- });
287
- }
288
- if (dossier["date_publication_JO"]) {
289
- actesLegislatifs.push({
290
- type_lecture: "Promulgation",
291
- ordre_lecture: null,
292
- libelle_lecture: "Promulgation",
293
- code_acte: "PROM",
294
- actes_legislatifs: [
295
- {
296
- code_acte: "PROM-PUB",
297
- date: dossier["date_publication_JO"],
298
- titre_JO: dossier["titre_JO"],
299
- date_publication_JO: dossier["date_publication_JO"],
300
- numero_JO: dossier["numero_JO"],
301
- url_JO: dossier["url_JO"],
302
- },
303
- ],
304
- });
305
- }
306
- return actesLegislatifs;
307
- }
308
238
  export function getCodeActeLecture(codeNatureDossier, typeLecture, assemblee) {
309
239
  const codeAssemblee = assemblee === "Sénat" ? "SN" : assemblee === "Assemblée nationale" ? "AN" : null;
310
240
  if (typeLecture === "Commission mixte paritaire") {
@@ -336,38 +266,177 @@ export function getCodeActeLecture(codeNatureDossier, typeLecture, assemblee) {
336
266
  }
337
267
  return null;
338
268
  }
339
- export function getCodeActeTexte(codeParent, texteOrigine) {
340
- if (codeParent === "CMP") {
341
- if (texteOrigine === "de la commission") {
342
- return "CMP-DEBATS-AN";
343
- }
344
- else if (texteOrigine === "adopté par l'Assemblée Nationale" ||
345
- texteOrigine === "adopté par l'Assemblée nationale") {
346
- return "CMP-DEBATS-SN";
347
- }
348
- }
349
- if (texteOrigine === "transmis au Sénat" ||
350
- texteOrigine === "déposé au Sénat" ||
351
- texteOrigine === "transmis à l'Assemblée nationale" ||
352
- texteOrigine === "déposé à l'Assemblée nationale" ||
353
- texteOrigine === "transmis à l'Assemblée Nationale" ||
354
- texteOrigine === "déposé à l'Assemblée Nationale" ||
355
- texteOrigine === "transmis en application de l'article 47-1, alinéa 2, de la Constitution") {
356
- return `${codeParent}-DEPOT`;
269
+ // Helper pour déterminer le code de phase (SN1, SN2, CMP...)
270
+ function getPhasePrefix(lecture, assemblee) {
271
+ if (assemblee !== "Sénat")
272
+ return null;
273
+ const typeLibelle = (lecture.type_lecture || "").toLowerCase();
274
+ if (typeLibelle.includes("cmp") || typeLibelle.includes("mixte"))
275
+ return "CMP";
276
+ if (typeLibelle.includes("nouvelle lecture"))
277
+ return "SNNLEC";
278
+ if (typeLibelle.includes("d\u00e9finitive"))
279
+ return "SNLDEF";
280
+ if (typeLibelle.includes("unique"))
281
+ return "SNLUNI";
282
+ if (lecture.ordre_lecture) {
283
+ return `SN${lecture.ordre_lecture}`;
357
284
  }
358
- if (texteOrigine === "de la commission" ||
359
- texteOrigine === "de la commission (AN)" ||
360
- texteOrigine === "résultat des travaux de la commission") {
361
- return `${codeParent}-DEBATS-SEANCE`;
285
+ if (typeLibelle.includes("premi\u00e8re"))
286
+ return "SN1";
287
+ if (typeLibelle.includes("deuxi\u00e8me") || typeLibelle.includes("seconde"))
288
+ return "SN2";
289
+ return "SN1";
290
+ }
291
+ export function buildActesLegislatifs(dossier) {
292
+ const actes = [];
293
+ const loiSignet = dossier.signet;
294
+ const lectures = dossier.lectures || [];
295
+ for (const lecture of lectures) {
296
+ const lecturesAssemblee = lecture.lectures_assemblee || [];
297
+ for (const lecAss of lecturesAssemblee) {
298
+ // On ne traite que la partie SÉNAT
299
+ if (lecAss.assemblee !== "Sénat")
300
+ continue;
301
+ const phasePrefix = getPhasePrefix(lecture, lecAss.assemblee);
302
+ if (!phasePrefix)
303
+ continue;
304
+ // Préparation des textes (tri chronologique)
305
+ const textes = lecAss.textes || [];
306
+ const textesTries = [...textes].sort((a, b) => new Date(a.date).getTime() - new Date(b.date).getTime());
307
+ // =================================================================
308
+ // A. DÉPÔT
309
+ // =================================================================
310
+ const depotTexte = textesTries.find((t) => (t.origine || "").toLowerCase().includes("déposé") ||
311
+ (t.origine || "").toLowerCase().includes("transmis") ||
312
+ t.ordre_origine === "0");
313
+ if (depotTexte && depotTexte.date) {
314
+ actes.push({
315
+ code_acte: `${phasePrefix}-DEPOT`,
316
+ date: depotTexte.date,
317
+ libelle: `Dépôt du texte n°${depotTexte.numero}`,
318
+ id: depotTexte.id,
319
+ numero: depotTexte.numero,
320
+ uid: `${loiSignet}-${phasePrefix}-DEPOT`,
321
+ session: lecAss.session,
322
+ chambre: 'SN',
323
+ signet_dossier: loiSignet,
324
+ texte_url: depotTexte.url,
325
+ code_organisme: null
326
+ });
327
+ }
328
+ // =================================================================
329
+ // B. COMMISSION (Rapports)
330
+ // =================================================================
331
+ const rapports = lecAss.rapports || [];
332
+ for (const rap of rapports) {
333
+ if (rap.date) {
334
+ actes.push({
335
+ code_acte: `${phasePrefix}-COM-FOND`,
336
+ date: rap.date,
337
+ libelle: `Rapport n°${rap.numero} de la commission`,
338
+ id: rap.id,
339
+ numero: rap.numero,
340
+ code_organisme: rap.code_organisme,
341
+ adoption: rap.adoption,
342
+ uid: `${loiSignet}-${phasePrefix}-COM`,
343
+ session: lecAss.session,
344
+ chambre: 'SN',
345
+ signet_dossier: loiSignet,
346
+ texte_url: rap.url,
347
+ });
348
+ }
349
+ }
350
+ // =================================================================
351
+ // C. SÉANCE PUBLIQUE
352
+ // =================================================================
353
+ const datesSeances = lecAss.dates_seances || [];
354
+ if (datesSeances.length > 0) {
355
+ // Tri des objets dates
356
+ datesSeances.sort((a, b) => new Date(a.date).getTime() - new Date(b.date).getTime());
357
+ const premiereSeance = datesSeances[0];
358
+ if (premiereSeance && premiereSeance.date) {
359
+ actes.push({
360
+ // Champs pour buildParlementActeLegislatif
361
+ code_acte: `${phasePrefix}-DEBATS-SEANCE`,
362
+ date: premiereSeance.date,
363
+ libelle: `Discussion en séance publique`,
364
+ uid: `${loiSignet}-${phasePrefix}-DEBATS-SEANCE`,
365
+ session: lecAss.session,
366
+ chambre: 'SN',
367
+ signet_dossier: loiSignet,
368
+ code_organisme: null
369
+ });
370
+ }
371
+ }
372
+ // =================================================================
373
+ // D. DÉCISION / VOTE
374
+ // =================================================================
375
+ const texteFinal = [...textesTries].reverse().find((t) => {
376
+ const origine = (t.origine || "").toLowerCase();
377
+ return (origine.includes("adopté") ||
378
+ origine.includes("rejeté") ||
379
+ origine.includes("devenu résolution") ||
380
+ t.code_adoption === "O");
381
+ });
382
+ if (texteFinal && texteFinal.date) {
383
+ const origine = (texteFinal.origine || "").toLowerCase();
384
+ let libelleStatut = "Adopté";
385
+ if (origine.includes("rejeté")) {
386
+ libelleStatut = "Rejeté";
387
+ }
388
+ else if (origine.includes("devenue résolution")) {
389
+ libelleStatut = "Adopté";
390
+ }
391
+ actes.push({
392
+ code_acte: `${phasePrefix}-DEBATS-DEC`,
393
+ date: texteFinal.date,
394
+ libelle: `${libelleStatut === "Adopté" ? "Adoption" : "Rejet"} (Texte n°${texteFinal.numero})`,
395
+ id: texteFinal.id,
396
+ numero: texteFinal.numero,
397
+ adoption: libelleStatut,
398
+ uid: `${loiSignet}-DEC-${texteFinal.numero}`,
399
+ session: lecAss.session,
400
+ chambre: 'SN',
401
+ signet_dossier: loiSignet,
402
+ texte_url: texteFinal.url,
403
+ code_organisme: null
404
+ });
405
+ }
406
+ }
362
407
  }
363
- if (texteOrigine === "retiré par l'auteur") {
364
- return `${codeParent}-RTRINI`;
408
+ // =================================================================
409
+ // E. HORS LECTURE (CC & PROMULGATION)
410
+ // =================================================================
411
+ if (dossier.date_decision_CoC) {
412
+ actes.push({
413
+ code_acte: 'CC',
414
+ date: dossier.date_decision_CoC,
415
+ libelle: `Décision du Conseil constitutionnel`,
416
+ id: dossier.url_decision_CoC,
417
+ uid: `${loiSignet}-CC`,
418
+ chambre: 'AN',
419
+ signet_dossier: loiSignet,
420
+ texte_url: dossier.url_decision_CoC || dossier.url_dossier_CoC,
421
+ });
365
422
  }
366
- if (texteOrigine.includes("adopté") ||
367
- texteOrigine.includes("rejeté") ||
368
- texteOrigine.includes("modifié") ||
369
- texteOrigine === "devenu résolution du Sénat") {
370
- return `${codeParent}-DEBATS-DEC`;
423
+ if (dossier.date_promulgation) {
424
+ actes.push({
425
+ code_acte: 'PROM',
426
+ date: dossier.date_promulgation,
427
+ libelle: `Promulgation de la loi`,
428
+ date_publication_JO: dossier.date_publication_JO,
429
+ numero_JO: dossier.numero_JO,
430
+ url_legifrance: dossier.url_JO,
431
+ id: dossier.url_JO,
432
+ uid: `${loiSignet}-PROM`,
433
+ chambre: 'AN',
434
+ signet_dossier: loiSignet,
435
+ });
371
436
  }
372
- return null;
437
+ return actes.sort((a, b) => {
438
+ const dateA = new Date(a.date).getTime();
439
+ const dateB = new Date(b.date).getTime();
440
+ return dateA - dateB;
441
+ });
373
442
  }
@@ -0,0 +1,7 @@
1
+ import { ExposeDesMotifs, FlatTexte } from "../types/texte";
2
+ export declare function transformTexte(document: Document): FlatTexte | null;
3
+ export declare function transformExposeDesMotifs(document: Document): ExposeDesMotifs | null;
4
+ export declare function parseTexte(texteXml: string): FlatTexte | null;
5
+ export declare function parseTexteFromFile(xmlFilePath: string): Promise<FlatTexte | null>;
6
+ export declare function parseExposeDesMotifs(exposeDesMotifsHtml: string): ExposeDesMotifs | null;
7
+ export declare function parseExposeDesMotifsFromFile(htmlFilePath: string): Promise<ExposeDesMotifs | null>;