@tricoteuses/senat 2.9.10 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE.md CHANGED
@@ -1,22 +1,22 @@
1
- # Tricoteuses-Senat
2
-
3
- ## _Handle French Sénat's open data_
4
-
5
- By: Emmanuel Raviart <mailto:emmanuel@raviart.com>
6
-
7
- Copyright (C) 2019, 2020, 2021 Emmanuel Raviart
8
-
9
- https://git.tricoteuses.fr/logiciels/tricoteuses-senat
10
-
11
- > Tricoteuses-Senat is free software; you can redistribute it and/or modify
12
- > it under the terms of the GNU Affero General Public License as
13
- > published by the Free Software Foundation, either version 3 of the
14
- > License, or (at your option) any later version.
15
- >
16
- > Tricoteuses-Senat is distributed in the hope that it will be useful,
17
- > but WITHOUT ANY WARRANTY; without even the implied warranty of
18
- > MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
- > GNU Affero General Public License for more details.
20
- >
21
- > You should have received a copy of the GNU Affero General Public License
22
- > along with this program. If not, see <http://www.gnu.org/licenses/>.
1
+ # Tricoteuses-Senat
2
+
3
+ ## _Handle French Sénat's open data_
4
+
5
+ By: Emmanuel Raviart <mailto:emmanuel@raviart.com>
6
+
7
+ Copyright (C) 2019, 2020, 2021 Emmanuel Raviart
8
+
9
+ https://git.tricoteuses.fr/logiciels/tricoteuses-senat
10
+
11
+ > Tricoteuses-Senat is free software; you can redistribute it and/or modify
12
+ > it under the terms of the GNU Affero General Public License as
13
+ > published by the Free Software Foundation, either version 3 of the
14
+ > License, or (at your option) any later version.
15
+ >
16
+ > Tricoteuses-Senat is distributed in the hope that it will be useful,
17
+ > but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ > MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
+ > GNU Affero General Public License for more details.
20
+ >
21
+ > You should have received a copy of the GNU Affero General Public License
22
+ > along with this program. If not, see <http://www.gnu.org/licenses/>.
package/README.md CHANGED
@@ -1,116 +1,116 @@
1
- # Tricoteuses-Senat
2
-
3
- ## _Retrieve, clean up & handle French Sénat's open data_
4
-
5
- ## Requirements
6
-
7
- - Node >= 22
8
-
9
- ## Installation
10
-
11
- ```bash
12
- git clone https://git.tricoteuses.fr/logiciels/tricoteuses-senat
13
- cd tricoteuses-senat/
14
- ```
15
-
16
- Create a `.env` file to set PostgreSQL database informations and other configuration variables (you can use `example.env` as a template). Then
17
-
18
- ```bash
19
- npm install
20
- ```
21
-
22
- ### Database creation (not needed if downloading with Docker image)
23
-
24
- #### Using Docker
25
-
26
- ```bash
27
- docker run --name local-postgres -d -p 5432:5432 -e POSTGRES_PASSWORD=$YOUR_CUSTOM_DB_PASSWORD postgres
28
- # Default Postgres user is postgres
29
- # But scripts require an "opendata" role
30
- docker exec -it local-postgres psql -U postgres -c "CREATE ROLE opendata;"
31
- ```
32
-
33
- ## Download data
34
-
35
- Create a folder where the data will be downloaded and run the following command to download the data and convert it into JSON files.
36
-
37
- ```bash
38
- mkdir ../senat-data/
39
-
40
- # Available options for optional `categories` parameter : All, Ameli, Debats, DosLeg, Questions, Sens
41
- npm run data:download ../senat-data -- [--categories All]
42
- ```
43
-
44
- Data from other sources is also available :
45
- ```bash
46
- # Retrieval of textes and rapports from Sénat's website
47
- # Available options for optional `formats` parameter : xml, html, pdf
48
- # Available options for optional `types` parameter : textes, rapports
49
- npm run data:retrieve_documents ../senat-data -- --fromSession 2022 [--formats xml pdf] [--types textes]
50
-
51
- # Retrieval & parsing (textes in xml format only for now)
52
- npm run data:retrieve_documents ../senat-data -- --fromSession 2022 --parseDocuments
53
-
54
- # Parsing only
55
- npm run data:parse_textes_lois ../senat-data
56
-
57
- # Retrieval (& parsing) of agenda from Sénat's website
58
- npm run data:retrieve_agenda ../senat-data -- --fromSession 2022 [--parseAgenda]
59
-
60
- # Retrieval (& parsing) of comptes-rendus des débats from Sénat's website
61
- npm run data:retrieve_comptes_rendus ../senat-data -- [--parseDebats]
62
-
63
- # Retrieval of sénateurs' pictures from Sénat's website
64
- npm run data:retrieve_senateurs_photos ../senat-data
65
- ```
66
-
67
- ## Data download using Docker
68
-
69
- A Docker image that downloads and converts the data all at once is available. Build it locally or run it from the container registry.
70
- Use the environment variables `FROM_SESSION` and `CATEGORIES` if needed.
71
-
72
- ```bash
73
- docker run --pull always --name tricoteuses-senat -v ../senat-data:/app/senat-data -d git.tricoteuses.fr/logiciels/tricoteuses-senat:latest
74
- ```
75
-
76
- Use the environment variable `CATEGORIES` and `FROM_SESSION` if needed.
77
-
78
- ## Using the data
79
-
80
- Once the data is downloaded, you can use loaders to retrieve it.
81
- To use loaders in your project, you can install the _@tricoteuses/senat_ package, and import the iterator functions that you need.
82
-
83
- ```bash
84
- npm install @tricoteuses/senat
85
- ```
86
-
87
- ```js
88
- import { iterLoadSenatQuestions } from "@tricoteuses/senat/loaders"
89
-
90
- // Pass data directory and legislature as arguments
91
- for (const { item: question } of iterLoadSenatQuestions("../senat-data", 17)) {
92
- console.log(question.id)
93
- }
94
- ```
95
-
96
- ## Generation of raw types from SQL schema (for contributors only)
97
-
98
- ```bash
99
- npm run data:generate_schemas ../senat-data
100
- ```
101
-
102
- ## Publishing
103
-
104
- To publish a new version of this package onto npm, bump the package version and publish.
105
-
106
- ```bash
107
- npm version x.y.z # Bumps version in package.json and creates a new tag x.y.z
108
- npx tsc
109
- npm publish
110
- ```
111
-
112
- The Docker image will be automatically built during a CI Workflow if you push the tag to the remote repository.
113
-
114
- ```bash
115
- git push --tags
116
- ```
1
+ # Tricoteuses-Senat
2
+
3
+ ## _Retrieve, clean up & handle French Sénat's open data_
4
+
5
+ ## Requirements
6
+
7
+ - Node >= 22
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ git clone https://git.tricoteuses.fr/logiciels/tricoteuses-senat
13
+ cd tricoteuses-senat/
14
+ ```
15
+
16
+ Create a `.env` file to set PostgreSQL database informations and other configuration variables (you can use `example.env` as a template). Then
17
+
18
+ ```bash
19
+ npm install
20
+ ```
21
+
22
+ ### Database creation (not needed if downloading with Docker image)
23
+
24
+ #### Using Docker
25
+
26
+ ```bash
27
+ docker run --name local-postgres -d -p 5432:5432 -e POSTGRES_PASSWORD=$YOUR_CUSTOM_DB_PASSWORD postgres
28
+ # Default Postgres user is postgres
29
+ # But scripts require an "opendata" role
30
+ docker exec -it local-postgres psql -U postgres -c "CREATE ROLE opendata;"
31
+ ```
32
+
33
+ ## Download data
34
+
35
+ Create a folder where the data will be downloaded and run the following command to download the data and convert it into JSON files.
36
+
37
+ ```bash
38
+ mkdir ../senat-data/
39
+
40
+ # Available options for optional `categories` parameter : All, Ameli, Debats, DosLeg, Questions, Sens
41
+ npm run data:download ../senat-data -- [--categories All]
42
+ ```
43
+
44
+ Data from other sources is also available :
45
+ ```bash
46
+ # Retrieval of textes and rapports from Sénat's website
47
+ # Available options for optional `formats` parameter : xml, html, pdf
48
+ # Available options for optional `types` parameter : textes, rapports
49
+ npm run data:retrieve_documents ../senat-data -- --fromSession 2022 [--formats xml pdf] [--types textes]
50
+
51
+ # Retrieval & parsing (textes in xml format only for now)
52
+ npm run data:retrieve_documents ../senat-data -- --fromSession 2022 --parseDocuments
53
+
54
+ # Parsing only
55
+ npm run data:parse_textes_lois ../senat-data
56
+
57
+ # Retrieval (& parsing) of agenda from Sénat's website
58
+ npm run data:retrieve_agenda ../senat-data -- --fromSession 2022 [--parseAgenda]
59
+
60
+ # Retrieval (& parsing) of comptes-rendus des débats from Sénat's website
61
+ npm run data:retrieve_comptes_rendus ../senat-data -- [--parseDebats]
62
+
63
+ # Retrieval of sénateurs' pictures from Sénat's website
64
+ npm run data:retrieve_senateurs_photos ../senat-data
65
+ ```
66
+
67
+ ## Data download using Docker
68
+
69
+ A Docker image that downloads and converts the data all at once is available. Build it locally or run it from the container registry.
70
+ Use the environment variables `FROM_SESSION` and `CATEGORIES` if needed.
71
+
72
+ ```bash
73
+ docker run --pull always --name tricoteuses-senat -v ../senat-data:/app/senat-data -d git.tricoteuses.fr/logiciels/tricoteuses-senat:latest
74
+ ```
75
+
76
+ Use the environment variable `CATEGORIES` and `FROM_SESSION` if needed.
77
+
78
+ ## Using the data
79
+
80
+ Once the data is downloaded, you can use loaders to retrieve it.
81
+ To use loaders in your project, you can install the _@tricoteuses/senat_ package, and import the iterator functions that you need.
82
+
83
+ ```bash
84
+ npm install @tricoteuses/senat
85
+ ```
86
+
87
+ ```js
88
+ import { iterLoadSenatQuestions } from "@tricoteuses/senat/loaders"
89
+
90
+ // Pass data directory and legislature as arguments
91
+ for (const { item: question } of iterLoadSenatQuestions("../senat-data", 17)) {
92
+ console.log(question.id)
93
+ }
94
+ ```
95
+
96
+ ## Generation of raw types from SQL schema (for contributors only)
97
+
98
+ ```bash
99
+ npm run data:generate_schemas ../senat-data
100
+ ```
101
+
102
+ ## Publishing
103
+
104
+ To publish a new version of this package onto npm, bump the package version and publish.
105
+
106
+ ```bash
107
+ npm version x.y.z # Bumps version in package.json and creates a new tag x.y.z
108
+ npx tsc
109
+ npm publish
110
+ ```
111
+
112
+ The Docker image will be automatically built during a CI Workflow if you push the tag to the remote repository.
113
+
114
+ ```bash
115
+ git push --tags
116
+ ```
package/lib/loaders.d.ts CHANGED
@@ -3,7 +3,7 @@ import { DebatResult } from "./model/debats";
3
3
  import { DossierLegislatifResult } from "./model/dosleg";
4
4
  import { QuestionResult } from "./model/questions";
5
5
  import { CirconscriptionResult, OrganismeResult, SenateurResult } from "./model/sens";
6
- import { AgendaEvent } from "./types/agenda";
6
+ import { AgendaEvent, GroupedReunion } from "./types/agenda";
7
7
  import { FlatTexte } from "./types/texte";
8
8
  import { CompteRendu } from "./types/compte_rendu";
9
9
  export { EnabledDatasets } from "./datasets";
@@ -64,6 +64,10 @@ export interface DossierLegislatifDocumentResult {
64
64
  export declare function iterFilePaths(dirPath: string): Generator<string>;
65
65
  export declare function iterLoadSenatAmendements(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AmendementResult>>;
66
66
  export declare function iterLoadSenatDebats(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DebatResult>>;
67
+ export declare function iterLoadSenatComptesRendusSeances(dataDir: string, session: number): Generator<{
68
+ compteRendu: CompteRendu;
69
+ session: number;
70
+ }>;
67
71
  export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
68
72
  export declare function iterLoadSenatDossiersLegislatifsRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<RapportMetadata>>;
69
73
  export declare function iterLoadSenatDossiersLegislatifsTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<TexteMetadata>>;
@@ -76,6 +80,7 @@ export declare function loadSenatCompteRenduContent(dataDir: string, session: nu
76
80
  };
77
81
  export declare function iterLoadSenatAgendas(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AgendaEvent[]>>;
78
82
  export declare function iterLoadSenatEvenements(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<AgendaEvent>>;
83
+ export declare function iterLoadSenatAgendasGrouped(dataDir: string, session: number | undefined): Generator<IterItem<GroupedReunion>>;
79
84
  export declare function iterLoadSenatCirconscriptions(dataDir: string, options?: {}): Generator<IterItem<CirconscriptionResult>>;
80
85
  export declare function iterLoadSenatOrganismes(dataDir: string, options?: {}): Generator<IterItem<OrganismeResult>>;
81
86
  export declare function iterLoadSenatSenateurs(dataDir: string, options?: {}): Generator<IterItem<SenateurResult>>;
package/lib/loaders.js CHANGED
@@ -1,3 +1,4 @@
1
+ import fsex from "fs-extra";
1
2
  import fs from "fs";
2
3
  import path from "path";
3
4
  import { datasets } from "./datasets";
@@ -60,6 +61,29 @@ export function* iterLoadSenatDebats(dataDir, session, options = {}) {
60
61
  yield debatItem;
61
62
  }
62
63
  }
64
+ export function* iterLoadSenatComptesRendusSeances(dataDir, session) {
65
+ const basePath = path.join(dataDir, COMPTES_RENDUS_FOLDER, DATA_TRANSFORMED_FOLDER, String(session));
66
+ if (!fs.existsSync(basePath))
67
+ return;
68
+ const files = (fs.readdirSync(basePath) || [])
69
+ .filter(f => f.endsWith(".json"))
70
+ .sort();
71
+ for (const fileName of files) {
72
+ const filePath = path.join(basePath, fileName);
73
+ try {
74
+ const fileContent = fs.readFileSync(filePath, "utf-8");
75
+ const compteRendu = JSON.parse(fileContent);
76
+ if (!compteRendu?.uid) {
77
+ console.warn(`[SN] CR without uid → ${fileName}`);
78
+ continue;
79
+ }
80
+ yield { compteRendu, session };
81
+ }
82
+ catch (err) {
83
+ console.warn(`[SN] error reading CR → ${fileName}`, err);
84
+ }
85
+ }
86
+ }
63
87
  export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}) {
64
88
  for (const dossierLegislatifItem of iterLoadSenatItems(dataDir, datasets.dosleg.database, session, DOSLEG_DOSSIERS_FOLDER, options)) {
65
89
  yield dossierLegislatifItem;
@@ -165,6 +189,36 @@ export function* iterLoadSenatEvenements(dataDir, session, options = {}) {
165
189
  }
166
190
  }
167
191
  }
192
+ export function* iterLoadSenatAgendasGrouped(dataDir, session) {
193
+ const baseDir = path.join(dataDir, AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER, String(session ?? ""));
194
+ if (!fs.existsSync(baseDir))
195
+ return;
196
+ const files = (fs.readdirSync(baseDir) || [])
197
+ .filter((f) => f.startsWith("RUSN") && f.endsWith(".json"))
198
+ .sort();
199
+ for (const fileName of files) {
200
+ const filePath = path.join(baseDir, fileName);
201
+ let groups;
202
+ try {
203
+ groups = fsex.readJSONSync(filePath);
204
+ }
205
+ catch {
206
+ continue;
207
+ }
208
+ if (!Array.isArray(groups))
209
+ continue;
210
+ for (const g of groups) {
211
+ if (!g || typeof g !== "object")
212
+ continue;
213
+ const gr = g;
214
+ if (!gr.date || !gr.slot)
215
+ continue;
216
+ if (!Array.isArray(gr.reunions))
217
+ gr.reunions = [];
218
+ yield { item: gr };
219
+ }
220
+ }
221
+ }
168
222
  export function* iterLoadSenatCirconscriptions(dataDir, options = {}) {
169
223
  for (const circonscriptionItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_CIRCONSCRIPTIONS_FOLDER, options)) {
170
224
  yield circonscriptionItem;
@@ -119,8 +119,6 @@ function transformAgenda(document, fileName) {
119
119
  captationVideo: videoElement !== null,
120
120
  urlDossierSenat: urlDossierSenat,
121
121
  quantieme: eventIsSeance(eventElement) ? getQuantieme(eventElement, seanceElements) : null,
122
- urlVideo: null,
123
- timecodeDebutVideo: null
124
122
  });
125
123
  }
126
124
  return agendaEvents;
@@ -1,2 +1,9 @@
1
- import { CompteRendu } from "../types/compte_rendu";
2
- export declare function parseCompteRenduFromFile(htmlFilePath: string): Promise<CompteRendu | null>;
1
+ import { CompteRendu, Sommaire } from "../types/compte_rendu";
2
+ import { TimeSlot } from "../types/agenda";
3
+ export declare function parseCompteRenduSlotFromFile(xmlFilePath: string, wantedSlot: TimeSlot, firstSlotOfDay?: TimeSlot): Promise<CompteRendu | null>;
4
+ export declare function sessionStartYearFromDate(d: Date): number;
5
+ export declare function parseYYYYMMDD(yyyymmdd: string): Date | null;
6
+ export declare function deriveTitreObjetFromSommaire(sommaire: Sommaire | undefined, slot: TimeSlot): {
7
+ titre: string;
8
+ objet: string;
9
+ };