npm - @tricoteuses/senat - Versions diffs - 2.9.10 → 2.10.1 - Mend

@tricoteuses/senat 2.9.10 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/LICENSE.md +22 -22
package/README.md +116 -116
package/lib/loaders.d.ts +6 -1
package/lib/loaders.js +54 -0
package/lib/model/agenda.js +0 -2
package/lib/model/compte_rendu.d.ts +9 -2
package/lib/model/compte_rendu.js +223 -211
package/lib/model/util.d.ts +1 -0
package/lib/model/util.js +3 -0
package/lib/scripts/retrieve_agenda.js +25 -6
package/lib/scripts/retrieve_comptes_rendus.d.ts +6 -1
package/lib/scripts/retrieve_comptes_rendus.js +230 -77
package/lib/scripts/retrieve_open_data.js +3 -1
package/lib/scripts/retrieve_videos.js +1 -9
package/lib/types/agenda.d.ts +19 -2
package/lib/types/compte_rendu.d.ts +1 -1
package/lib/utils/cr_spliting.d.ts +7 -0
package/lib/utils/cr_spliting.js +125 -0
package/lib/utils/reunion_grouping.d.ts +6 -0
package/lib/utils/reunion_grouping.js +359 -0
package/lib/validators/senat.d.ts +0 -0
package/lib/validators/senat.js +24 -0
package/package.json +98 -98
package/lib/raw_types/kysely-table-types.d.ts +0 -5
package/lib/raw_types/kysely-table-types.js +0 -1

package/LICENSE.md CHANGED Viewed

@@ -1,22 +1,22 @@
-# Tricoteuses-Senat
-## _Handle French Sénat's open data_
-By: Emmanuel Raviart <mailto:emmanuel@raviart.com>
-Copyright (C) 2019, 2020, 2021 Emmanuel Raviart
-https://git.tricoteuses.fr/logiciels/tricoteuses-senat
-> Tricoteuses-Senat is free software; you can redistribute it and/or modify
-> it under the terms of the GNU Affero General Public License as
-> published by the Free Software Foundation, either version 3 of the
-> License, or (at your option) any later version.
->
-> Tricoteuses-Senat is distributed in the hope that it will be useful,
-> but WITHOUT ANY WARRANTY; without even the implied warranty of
-> MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-> GNU Affero General Public License for more details.
->
-> You should have received a copy of the GNU Affero General Public License
-> along with this program. If not, see <http://www.gnu.org/licenses/>.
+# Tricoteuses-Senat
+## _Handle French Sénat's open data_
+By: Emmanuel Raviart <mailto:emmanuel@raviart.com>
+Copyright (C) 2019, 2020, 2021 Emmanuel Raviart
+https://git.tricoteuses.fr/logiciels/tricoteuses-senat
+> Tricoteuses-Senat is free software; you can redistribute it and/or modify
+> it under the terms of the GNU Affero General Public License as
+> published by the Free Software Foundation, either version 3 of the
+> License, or (at your option) any later version.
+>
+> Tricoteuses-Senat is distributed in the hope that it will be useful,
+> but WITHOUT ANY WARRANTY; without even the implied warranty of
+> MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+> GNU Affero General Public License for more details.
+>
+> You should have received a copy of the GNU Affero General Public License
+> along with this program. If not, see <http://www.gnu.org/licenses/>.

package/README.md CHANGED Viewed

@@ -1,116 +1,116 @@
-# Tricoteuses-Senat
-## _Retrieve, clean up & handle  French Sénat's open data_
-## Requirements
-- Node >= 22
-## Installation
-```bash
-git clone https://git.tricoteuses.fr/logiciels/tricoteuses-senat
-cd tricoteuses-senat/
-```
-Create a `.env` file to set PostgreSQL database informations and other configuration variables (you can use `example.env` as a template). Then
-```bash
-npm install
-```
-### Database creation (not needed if downloading with Docker image)
-#### Using Docker
-```bash
-docker run --name local-postgres -d -p 5432:5432 -e POSTGRES_PASSWORD=$YOUR_CUSTOM_DB_PASSWORD postgres
-# Default Postgres user is postgres
-# But scripts require an "opendata" role
-docker exec -it local-postgres psql -U postgres -c "CREATE ROLE opendata;"
-```
-## Download data
-Create a folder where the data will be downloaded and run the following command to download the data and convert it into JSON files.
-```bash
-mkdir ../senat-data/
-# Available options for optional `categories` parameter : All,  Ameli, Debats, DosLeg, Questions, Sens
-npm run data:download ../senat-data -- [--categories All]
-```
-Data from other sources is also available :
-```bash
-# Retrieval of textes and rapports from Sénat's website
-# Available options for optional `formats` parameter : xml, html, pdf
-# Available options for optional `types` parameter : textes, rapports
-npm run data:retrieve_documents ../senat-data -- --fromSession 2022 [--formats xml pdf] [--types textes]
-# Retrieval & parsing (textes in xml format only for now)
-npm run data:retrieve_documents ../senat-data -- --fromSession 2022 --parseDocuments
-# Parsing only
-npm run data:parse_textes_lois ../senat-data
-# Retrieval (& parsing) of agenda from Sénat's website
-npm run data:retrieve_agenda ../senat-data -- --fromSession 2022 [--parseAgenda]
-# Retrieval (& parsing) of comptes-rendus des débats from Sénat's website
-npm run data:retrieve_comptes_rendus ../senat-data -- [--parseDebats]
-# Retrieval of sénateurs' pictures from Sénat's website
-npm run data:retrieve_senateurs_photos ../senat-data
-```
-## Data download using Docker
-A Docker image that downloads and converts the data all at once is available. Build it locally or run it from the container registry.
-Use the environment variables `FROM_SESSION` and `CATEGORIES` if needed.
-```bash
-docker run --pull always --name tricoteuses-senat -v ../senat-data:/app/senat-data -d git.tricoteuses.fr/logiciels/tricoteuses-senat:latest
-```
-Use the environment variable `CATEGORIES` and `FROM_SESSION` if needed.
-## Using the data
-Once the data is downloaded, you can use loaders to retrieve it.
-To use loaders in your project, you can install the _@tricoteuses/senat_ package, and import the iterator functions that you need.
-```bash
-npm install @tricoteuses/senat
-```
-```js
-import { iterLoadSenatQuestions } from "@tricoteuses/senat/loaders"
-// Pass data directory and legislature as arguments
-for (const { item: question } of iterLoadSenatQuestions("../senat-data", 17)) {
-  console.log(question.id)
-}
-```
-## Generation of raw types from SQL schema (for contributors only)
-```bash
-npm run data:generate_schemas ../senat-data
-```
-## Publishing
-To publish a new version of this package onto npm, bump the package version and publish.
-```bash
-npm version x.y.z # Bumps version in package.json and creates a new tag x.y.z
-npx tsc
-npm publish
-```
-The Docker image will be automatically built during a CI Workflow if you push the tag to the remote repository.
-```bash
-git push --tags
-```
+# Tricoteuses-Senat
+## _Retrieve, clean up & handle  French Sénat's open data_
+## Requirements
+- Node >= 22
+## Installation
+```bash
+git clone https://git.tricoteuses.fr/logiciels/tricoteuses-senat
+cd tricoteuses-senat/
+```
+Create a `.env` file to set PostgreSQL database informations and other configuration variables (you can use `example.env` as a template). Then
+```bash
+npm install
+```
+### Database creation (not needed if downloading with Docker image)
+#### Using Docker
+```bash
+docker run --name local-postgres -d -p 5432:5432 -e POSTGRES_PASSWORD=$YOUR_CUSTOM_DB_PASSWORD postgres
+# Default Postgres user is postgres
+# But scripts require an "opendata" role
+docker exec -it local-postgres psql -U postgres -c "CREATE ROLE opendata;"
+```
+## Download data
+Create a folder where the data will be downloaded and run the following command to download the data and convert it into JSON files.
+```bash
+mkdir ../senat-data/
+# Available options for optional `categories` parameter : All,  Ameli, Debats, DosLeg, Questions, Sens
+npm run data:download ../senat-data -- [--categories All]
+```
+Data from other sources is also available :
+```bash
+# Retrieval of textes and rapports from Sénat's website
+# Available options for optional `formats` parameter : xml, html, pdf
+# Available options for optional `types` parameter : textes, rapports
+npm run data:retrieve_documents ../senat-data -- --fromSession 2022 [--formats xml pdf] [--types textes]
+# Retrieval & parsing (textes in xml format only for now)
+npm run data:retrieve_documents ../senat-data -- --fromSession 2022 --parseDocuments
+# Parsing only
+npm run data:parse_textes_lois ../senat-data
+# Retrieval (& parsing) of agenda from Sénat's website
+npm run data:retrieve_agenda ../senat-data -- --fromSession 2022 [--parseAgenda]
+# Retrieval (& parsing) of comptes-rendus des débats from Sénat's website
+npm run data:retrieve_comptes_rendus ../senat-data -- [--parseDebats]
+# Retrieval of sénateurs' pictures from Sénat's website
+npm run data:retrieve_senateurs_photos ../senat-data
+```
+## Data download using Docker
+A Docker image that downloads and converts the data all at once is available. Build it locally or run it from the container registry.
+Use the environment variables `FROM_SESSION` and `CATEGORIES` if needed.
+```bash
+docker run --pull always --name tricoteuses-senat -v ../senat-data:/app/senat-data -d git.tricoteuses.fr/logiciels/tricoteuses-senat:latest
+```
+Use the environment variable `CATEGORIES` and `FROM_SESSION` if needed.
+## Using the data
+Once the data is downloaded, you can use loaders to retrieve it.
+To use loaders in your project, you can install the _@tricoteuses/senat_ package, and import the iterator functions that you need.
+```bash
+npm install @tricoteuses/senat
+```
+```js
+import { iterLoadSenatQuestions } from "@tricoteuses/senat/loaders"
+// Pass data directory and legislature as arguments
+for (const { item: question } of iterLoadSenatQuestions("../senat-data", 17)) {
+  console.log(question.id)
+}
+```
+## Generation of raw types from SQL schema (for contributors only)
+```bash
+npm run data:generate_schemas ../senat-data
+```
+## Publishing
+To publish a new version of this package onto npm, bump the package version and publish.
+```bash
+npm version x.y.z # Bumps version in package.json and creates a new tag x.y.z
+npx tsc
+npm publish
+```
+The Docker image will be automatically built during a CI Workflow if you push the tag to the remote repository.
+```bash
+git push --tags
+```

package/lib/loaders.d.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import { DebatResult } from "./model/debats";
 import { DossierLegislatifResult } from "./model/dosleg";
 import { QuestionResult } from "./model/questions";
 import { CirconscriptionResult, OrganismeResult, SenateurResult } from "./model/sens";
-import { AgendaEvent } from "./types/agenda";
+import { AgendaEvent, GroupedReunion } from "./types/agenda";
 import { FlatTexte } from "./types/texte";
 import { CompteRendu } from "./types/compte_rendu";
 export { EnabledDatasets } from "./datasets";
@@ -64,6 +64,10 @@ export interface DossierLegislatifDocumentResult {
 export declare function iterFilePaths(dirPath: string): Generator<string>;
 export declare function iterLoadSenatAmendements(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AmendementResult>>;
 export declare function iterLoadSenatDebats(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DebatResult>>;
+export declare function iterLoadSenatComptesRendusSeances(dataDir: string, session: number): Generator<{
+    compteRendu: CompteRendu;
+    session: number;
+}>;
 export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
 export declare function iterLoadSenatDossiersLegislatifsRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<RapportMetadata>>;
 export declare function iterLoadSenatDossiersLegislatifsTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<TexteMetadata>>;
@@ -76,6 +80,7 @@ export declare function loadSenatCompteRenduContent(dataDir: string, session: nu
 };
 export declare function iterLoadSenatAgendas(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AgendaEvent[]>>;
 export declare function iterLoadSenatEvenements(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AgendaEvent>>;
+export declare function iterLoadSenatAgendasGrouped(dataDir: string, session: number | undefined): Generator<IterItem<GroupedReunion>>;
 export declare function iterLoadSenatCirconscriptions(dataDir: string, options?: {}): Generator<IterItem<CirconscriptionResult>>;
 export declare function iterLoadSenatOrganismes(dataDir: string, options?: {}): Generator<IterItem<OrganismeResult>>;
 export declare function iterLoadSenatSenateurs(dataDir: string, options?: {}): Generator<IterItem<SenateurResult>>;

package/lib/loaders.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import fsex from "fs-extra";
 import fs from "fs";
 import path from "path";
 import { datasets } from "./datasets";
@@ -60,6 +61,29 @@ export function* iterLoadSenatDebats(dataDir, session, options = {}) {
         yield debatItem;
     }
 }
+export function* iterLoadSenatComptesRendusSeances(dataDir, session) {
+    const basePath = path.join(dataDir, COMPTES_RENDUS_FOLDER, DATA_TRANSFORMED_FOLDER, String(session));
+    if (!fs.existsSync(basePath))
+        return;
+    const files = (fs.readdirSync(basePath) || [])
+        .filter(f => f.endsWith(".json"))
+        .sort();
+    for (const fileName of files) {
+        const filePath = path.join(basePath, fileName);
+        try {
+            const fileContent = fs.readFileSync(filePath, "utf-8");
+            const compteRendu = JSON.parse(fileContent);
+            if (!compteRendu?.uid) {
+                console.warn(`[SN] CR without uid → ${fileName}`);
+                continue;
+            }
+            yield { compteRendu, session };
+        }
+        catch (err) {
+            console.warn(`[SN] error reading CR → ${fileName}`, err);
+        }
+    }
+}
 export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}) {
     for (const dossierLegislatifItem of iterLoadSenatItems(dataDir, datasets.dosleg.database, session, DOSLEG_DOSSIERS_FOLDER, options)) {
         yield dossierLegislatifItem;
@@ -165,6 +189,36 @@ export function* iterLoadSenatEvenements(dataDir, session, options = {}) {
         }
     }
 }
+export function* iterLoadSenatAgendasGrouped(dataDir, session) {
+    const baseDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session ?? ""));
+    if (!fs.existsSync(baseDir))
+        return;
+    const files = (fs.readdirSync(baseDir) || [])
+        .filter((f) => f.startsWith("RUSN") && f.endsWith(".json"))
+        .sort();
+    for (const fileName of files) {
+        const filePath = path.join(baseDir, fileName);
+        let groups;
+        try {
+            groups = fsex.readJSONSync(filePath);
+        }
+        catch {
+            continue;
+        }
+        if (!Array.isArray(groups))
+            continue;
+        for (const g of groups) {
+            if (!g || typeof g !== "object")
+                continue;
+            const gr = g;
+            if (!gr.date || !gr.slot)
+                continue;
+            if (!Array.isArray(gr.reunions))
+                gr.reunions = [];
+            yield { item: gr };
+        }
+    }
+}
 export function* iterLoadSenatCirconscriptions(dataDir, options = {}) {
     for (const circonscriptionItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_CIRCONSCRIPTIONS_FOLDER, options)) {
         yield circonscriptionItem;

package/lib/model/agenda.js CHANGED Viewed

@@ -119,8 +119,6 @@ function transformAgenda(document, fileName) {
             captationVideo: videoElement !== null,
             urlDossierSenat: urlDossierSenat,
             quantieme: eventIsSeance(eventElement) ? getQuantieme(eventElement, seanceElements) : null,
-            urlVideo: null,
-            timecodeDebutVideo: null
         });
     }
     return agendaEvents;

package/lib/model/compte_rendu.d.ts CHANGED Viewed

@@ -1,2 +1,9 @@
-import { CompteRendu } from "../types/compte_rendu";
-export declare function parseCompteRenduFromFile(htmlFilePath: string): Promise<CompteRendu | null>;
+import { CompteRendu, Sommaire } from "../types/compte_rendu";
+import { TimeSlot } from "../types/agenda";
+export declare function parseCompteRenduSlotFromFile(xmlFilePath: string, wantedSlot: TimeSlot, firstSlotOfDay?: TimeSlot): Promise<CompteRendu | null>;
+export declare function sessionStartYearFromDate(d: Date): number;
+export declare function parseYYYYMMDD(yyyymmdd: string): Date | null;
+export declare function deriveTitreObjetFromSommaire(sommaire: Sommaire | undefined, slot: TimeSlot): {
+    titre: string;
+    objet: string;
+};