@tricoteuses/senat 2.19.6 → 2.19.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/datasets.js +40 -116
- package/lib/index.d.ts +5 -5
- package/lib/model/ameli.js +1 -1
- package/lib/model/debats.js +8 -8
- package/lib/model/dosleg.js +31 -22
- package/lib/model/index.d.ts +2 -2
- package/lib/model/index.js +2 -2
- package/lib/model/questions.js +3 -5
- package/lib/model/scrutins.js +13 -33
- package/lib/model/sens.js +43 -68
- package/lib/model/texte.js +15 -43
- package/lib/raw_types/ameli.d.ts +9 -9
- package/lib/raw_types/senat.d.ts +9 -9
- package/lib/raw_types_schemats/ameli.d.ts +2 -2
- package/lib/raw_types_schemats/debats.d.ts +2 -2
- package/lib/raw_types_schemats/dosleg.d.ts +2 -2
- package/lib/raw_types_schemats/questions.d.ts +2 -2
- package/lib/raw_types_schemats/sens.d.ts +2 -2
- package/lib/scripts/convert_data.js +18 -22
- package/lib/scripts/datautil.js +1 -1
- package/lib/scripts/parse_textes.js +2 -2
- package/lib/scripts/retrieve_agenda.js +1 -3
- package/lib/scripts/retrieve_cr_commission.js +1 -3
- package/lib/scripts/retrieve_cr_seance.js +17 -8
- package/lib/scripts/retrieve_open_data.js +5 -7
- package/lib/scripts/retrieve_senateurs_photos.js +1 -4
- package/lib/scripts/test_iter_load.js +4 -1
- package/lib/types/dosleg.d.ts +1 -1
- package/lib/types/sessions.d.ts +1 -1
- package/lib/types/sessions.js +1 -1
- package/package.json +11 -11
|
@@ -49,23 +49,19 @@ async function convertDatasetAmeli(dataDir, options) {
|
|
|
49
49
|
}
|
|
50
50
|
const ameliReorganizedRootDir = path.join(dataDir, dataset.database);
|
|
51
51
|
await fs.ensureDir(ameliReorganizedRootDir);
|
|
52
|
-
const limit = pLimit(10);
|
|
53
|
-
const tasks = [];
|
|
54
52
|
for await (const amendement of findAllAmendements(options["fromSession"])) {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
}));
|
|
53
|
+
if (options["verbose"]) {
|
|
54
|
+
console.log(`Converting ${amendement["numero"]} file…`);
|
|
55
|
+
}
|
|
56
|
+
const session = String(amendement["session"]) || UNDEFINED_SESSION;
|
|
57
|
+
const signetDossierLegislatif = amendement["signet_dossier_legislatif"] ||
|
|
58
|
+
`${amendement["nature_texte"]}-${amendement["numero_texte"]}`.toLowerCase();
|
|
59
|
+
const ameliReorganizedDir = path.join(ameliReorganizedRootDir, String(session), signetDossierLegislatif);
|
|
60
|
+
await fs.ensureDir(ameliReorganizedDir);
|
|
61
|
+
const amendementFileName = `${amendement["numero"]}.json`;
|
|
62
|
+
const filePath = path.join(ameliReorganizedDir, amendementFileName);
|
|
63
|
+
await fs.writeJSON(filePath, amendement, { spaces: 2 });
|
|
67
64
|
}
|
|
68
|
-
await Promise.all(tasks);
|
|
69
65
|
}
|
|
70
66
|
async function convertDatasetDebats(dataDir, options) {
|
|
71
67
|
const dataset = datasets.debats;
|
|
@@ -83,9 +79,10 @@ async function convertDatasetDebats(dataDir, options) {
|
|
|
83
79
|
continue;
|
|
84
80
|
}
|
|
85
81
|
const debatsReorganizedDir = path.join(debatsReorganizedRootDir, String(session));
|
|
86
|
-
fs.
|
|
82
|
+
await fs.ensureDir(debatsReorganizedDir);
|
|
87
83
|
const debatFileName = `${debat.id}.json`;
|
|
88
|
-
|
|
84
|
+
const filePath = path.join(debatsReorganizedDir, debatFileName);
|
|
85
|
+
await fs.writeJSON(filePath, debat, { spaces: 2 });
|
|
89
86
|
}
|
|
90
87
|
}
|
|
91
88
|
async function convertDatasetDosLeg(dataDir, options) {
|
|
@@ -107,14 +104,13 @@ async function convertDatasetDosLeg(dataDir, options) {
|
|
|
107
104
|
continue;
|
|
108
105
|
}
|
|
109
106
|
loiReorganizedDir = path.join(dossiersReorganizedDir, String(session));
|
|
110
|
-
fs.
|
|
107
|
+
await fs.ensureDir(loiReorganizedDir);
|
|
111
108
|
// Ajout des actes législatifs au dossier
|
|
112
109
|
const actesLegislatifs = createActesLegislatifs(loi);
|
|
113
110
|
const loiWithActes = { ...loi, actes_legislatifs: actesLegislatifs };
|
|
114
|
-
const
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
});
|
|
111
|
+
const dossierFile = `${loi["signet"]}.json`;
|
|
112
|
+
const filePath = path.join(loiReorganizedDir, dossierFile);
|
|
113
|
+
await fs.writeJSON(filePath, loiWithActes, { spaces: 2 });
|
|
118
114
|
}
|
|
119
115
|
await convertTexteUrls(dataDir);
|
|
120
116
|
await convertRapportUrls(dataDir);
|
package/lib/scripts/datautil.js
CHANGED
|
@@ -28,7 +28,7 @@ export function getSessionFromDate(date, format = STANDARD_DATE_FORMAT) {
|
|
|
28
28
|
const parsedDate = DateTime.fromFormat(date, format);
|
|
29
29
|
const endSessionDate = DateTime.fromObject({ year: parsedDate.year, month: 9, day: 30 });
|
|
30
30
|
if (parsedDate < endSessionDate) {
|
|
31
|
-
return parsedDate.year - 1;
|
|
31
|
+
return (parsedDate.year - 1);
|
|
32
32
|
}
|
|
33
33
|
return parsedDate.year;
|
|
34
34
|
}
|
|
@@ -2,8 +2,8 @@ import assert from "assert";
|
|
|
2
2
|
import commandLineArgs from "command-line-args";
|
|
3
3
|
import fs from "fs-extra";
|
|
4
4
|
import path from "path";
|
|
5
|
-
import { iterFilePaths, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER
|
|
6
|
-
import { parseExposeDesMotifsFromFile, parseTexteFromFile
|
|
5
|
+
import { iterFilePaths, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
6
|
+
import { parseExposeDesMotifsFromFile, parseTexteFromFile } from "../model/texte";
|
|
7
7
|
import { commonOptions } from "./shared/cli_helpers";
|
|
8
8
|
import { ensureAndClearDir } from "./shared/util";
|
|
9
9
|
const optionsDefinitions = [...commonOptions];
|
|
@@ -128,9 +128,7 @@ async function main() {
|
|
|
128
128
|
const sessions = getSessionsFromStart(options["fromSession"]);
|
|
129
129
|
console.time("agenda processing time");
|
|
130
130
|
await retrieveAgendas(dataDir, sessions);
|
|
131
|
-
|
|
132
|
-
console.timeEnd("agenda processing time");
|
|
133
|
-
}
|
|
131
|
+
console.timeEnd("agenda processing time");
|
|
134
132
|
}
|
|
135
133
|
main()
|
|
136
134
|
.then(() => process.exit(0))
|
|
@@ -356,9 +356,7 @@ async function main() {
|
|
|
356
356
|
assert(dataDir, "Missing argument: data directory");
|
|
357
357
|
console.time("CRI processing time");
|
|
358
358
|
await retrieveCommissionCRs(options);
|
|
359
|
-
|
|
360
|
-
console.timeEnd("CRI processing time");
|
|
361
|
-
}
|
|
359
|
+
console.timeEnd("CRI processing time");
|
|
362
360
|
}
|
|
363
361
|
main()
|
|
364
362
|
.then(() => process.exit(0))
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import assert from "assert";
|
|
7
7
|
import commandLineArgs from "command-line-args";
|
|
8
|
-
import fs from "fs-extra";
|
|
8
|
+
import fs, { ensureDirSync } from "fs-extra";
|
|
9
9
|
import path from "path";
|
|
10
10
|
import StreamZip from "node-stream-zip";
|
|
11
11
|
import * as cheerio from "cheerio";
|
|
@@ -14,7 +14,7 @@ import { commonOptions } from "./shared/cli_helpers";
|
|
|
14
14
|
import { deriveTitreObjetFromSommaire, parseCompteRenduSlotFromFile, parseYYYYMMDD, sessionStartYearFromDate, } from "../model/seance";
|
|
15
15
|
import { makeGroupUid } from "../utils/reunion_grouping";
|
|
16
16
|
import { getSessionsFromStart } from "../types/sessions";
|
|
17
|
-
import {
|
|
17
|
+
import { fetchWithRetry } from "./shared/util";
|
|
18
18
|
import { computeIntervalsBySlot } from "../utils/cr_spliting";
|
|
19
19
|
const optionsDefinitions = [
|
|
20
20
|
...commonOptions,
|
|
@@ -115,7 +115,7 @@ async function extractAndDistributeXmlBySession(zipPath, originalRoot) {
|
|
|
115
115
|
}
|
|
116
116
|
export async function retrieveCriXmlDump(dataDir, options = {}) {
|
|
117
117
|
const root = path.join(dataDir, COMPTES_RENDUS_FOLDER);
|
|
118
|
-
|
|
118
|
+
ensureDirSync(root);
|
|
119
119
|
const originalRoot = path.join(root, DATA_ORIGINAL_FOLDER);
|
|
120
120
|
fs.ensureDirSync(originalRoot);
|
|
121
121
|
const transformedRoot = path.join(root, DATA_TRANSFORMED_FOLDER);
|
|
@@ -153,11 +153,22 @@ export async function retrieveCriXmlDump(dataDir, options = {}) {
|
|
|
153
153
|
}
|
|
154
154
|
const xmlFiles = (await fs.readdir(originalSessionDir)).filter((f) => /^d\d{8}\.xml$/i.test(f)).sort();
|
|
155
155
|
const transformedSessionDir = path.join(transformedRoot, String(session));
|
|
156
|
-
|
|
157
|
-
|
|
156
|
+
await fs.ensureDir(transformedSessionDir);
|
|
157
|
+
const now = Date.now();
|
|
158
158
|
for (const f of xmlFiles) {
|
|
159
159
|
const yyyymmdd = f.slice(1, 9);
|
|
160
160
|
const xmlPath = path.join(originalSessionDir, f);
|
|
161
|
+
if (options["only-recent"]) {
|
|
162
|
+
const cutoff = now - options["only-recent"] * 24 * 3600 * 1000;
|
|
163
|
+
const seanceTs = Date.parse(yyyymmdd.slice(0, 4) + "-" + yyyymmdd.slice(4, 6) + "-" + yyyymmdd.slice(6, 8));
|
|
164
|
+
if (seanceTs < cutoff) {
|
|
165
|
+
// Check if some file exists sarting with CRSSN{yyyymmdd} in transformed dir
|
|
166
|
+
const someFile = (await fs.readdir(transformedSessionDir)).find((fn) => fn.startsWith(`CRSSN${yyyymmdd}`));
|
|
167
|
+
if (someFile) {
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
161
172
|
// 1) Deduce slot(s) from agenda if it exsits
|
|
162
173
|
const agendaInfo = loadAgendaSPSlotsForDate(dataDir, yyyymmdd, session);
|
|
163
174
|
const firstSlotOfDay = pickFirstSlotOfDay(agendaInfo?.slots ?? []);
|
|
@@ -209,9 +220,7 @@ async function main() {
|
|
|
209
220
|
assert(dataDir, "Missing argument: data directory");
|
|
210
221
|
console.time("CRI processing time");
|
|
211
222
|
await retrieveCriXmlDump(dataDir, options);
|
|
212
|
-
|
|
213
|
-
console.timeEnd("CRI processing time");
|
|
214
|
-
}
|
|
223
|
+
console.timeEnd("CRI processing time");
|
|
215
224
|
}
|
|
216
225
|
main()
|
|
217
226
|
.then(() => process.exit(0))
|
|
@@ -5,7 +5,7 @@ import fs from "fs-extra";
|
|
|
5
5
|
import path from "path";
|
|
6
6
|
import StreamZip from "node-stream-zip";
|
|
7
7
|
import readline from "readline";
|
|
8
|
-
import windows1252 from "windows-1252";
|
|
8
|
+
import * as windows1252 from "windows-1252";
|
|
9
9
|
import { pipeline } from "stream";
|
|
10
10
|
import { promisify } from "util";
|
|
11
11
|
import config from "../config";
|
|
@@ -102,7 +102,7 @@ async function copyToSenat(dataset, dataDir, options) {
|
|
|
102
102
|
parts[i] = parts[i].replace(/\bpublic\b(?=(\s*\.|\s*[,;]|\s|$))/g, schema);
|
|
103
103
|
}
|
|
104
104
|
}
|
|
105
|
-
return parts.join(
|
|
105
|
+
return parts.join("");
|
|
106
106
|
}
|
|
107
107
|
newLine = replacePublicOutsideStrings(line, dataset.database);
|
|
108
108
|
// Replace SET client_encoding to UTF8
|
|
@@ -120,7 +120,8 @@ async function copyToSenat(dataset, dataDir, options) {
|
|
|
120
120
|
});
|
|
121
121
|
}
|
|
122
122
|
finally {
|
|
123
|
-
try {
|
|
123
|
+
try {
|
|
124
|
+
}
|
|
124
125
|
catch { }
|
|
125
126
|
}
|
|
126
127
|
resolve();
|
|
@@ -268,10 +269,7 @@ async function retrieveOpenData() {
|
|
|
268
269
|
PGUSER: process.env["PGUSER"] || config.db.user,
|
|
269
270
|
PGPASSWORD: process.env["PGPASSWORD"] || config.db.password,
|
|
270
271
|
};
|
|
271
|
-
assert(process.env["PGHOST"] &&
|
|
272
|
-
process.env["PGPORT"] &&
|
|
273
|
-
process.env["PGUSER"] &&
|
|
274
|
-
process.env["PGPASSWORD"], "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
|
|
272
|
+
assert(process.env["PGHOST"] && process.env["PGPORT"] && process.env["PGUSER"] && process.env["PGPASSWORD"], "Missing database configuration: environment variables PGHOST, PGPORT, PGUSER and PGPASSWORD or TRICOTEUSES_SENAT_DB_* in .env file");
|
|
275
273
|
console.time("data extraction time");
|
|
276
274
|
// Create role 'opendata' if it does not exist
|
|
277
275
|
execSync(`${options["sudo"] ? `sudo -u ${options["sudo"]} ` : ""}psql --quiet -c "CREATE ROLE opendata" || true`, {
|
|
@@ -71,10 +71,7 @@ async function retrievePhotosSenateurs() {
|
|
|
71
71
|
fs.renameSync(photoTempFilePath, photoFilePath);
|
|
72
72
|
}
|
|
73
73
|
catch (error) {
|
|
74
|
-
if (typeof error === "object" &&
|
|
75
|
-
error &&
|
|
76
|
-
"status" in error &&
|
|
77
|
-
error.status === 8) {
|
|
74
|
+
if (typeof error === "object" && error && "status" in error && error.status === 8) {
|
|
78
75
|
console.error(`Unable to load photo for ${sen.senprenomuse} ${sen.sennomuse}`);
|
|
79
76
|
continue;
|
|
80
77
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { iterLoadSenatScrutins } from "../loaders";
|
|
1
|
+
import { iterLoadSenatScrutins, iterLoadSenatAmendements } from "../loaders";
|
|
2
2
|
import commandLineArgs from "command-line-args";
|
|
3
3
|
import { dataDirDefaultOption } from "./shared/cli_helpers";
|
|
4
4
|
const optionsDefinitions = [dataDirDefaultOption];
|
|
@@ -8,3 +8,6 @@ const session = 2024;
|
|
|
8
8
|
for (const { item: scrutin } of iterLoadSenatScrutins(options["dataDir"], session, { noValidation: noValidation })) {
|
|
9
9
|
console.log(scrutin["numero"]);
|
|
10
10
|
}
|
|
11
|
+
for (const { item: amendement } of iterLoadSenatAmendements(options["dataDir"], session, { noValidation: noValidation })) {
|
|
12
|
+
console.log(amendement["numero"]);
|
|
13
|
+
}
|
package/lib/types/dosleg.d.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { txt_ameliFields } from "../raw_types_schemats/ameli";
|
|
|
2
2
|
import { ass as Ass, aud, audFields, auteur, date_seance, date_seanceFields as dateSeanceFields, deccoc as DecCoc, denrap as DenRap, docatt, docattFields, ecr, ecrFields, etaloi as EtaLoi, lecass, lecassFields, lecassrap, lecture, lectureFields, loi, org as Org, oritxt as OriTxt, qua as Qua, rap, raporg as RapOrg, raporgFields, scr as Scr, texte, texteFields, typatt as TypAtt, typlec as TypLec, typloi as TypLoi, typtxt as TypTxt, typurl as TypUrl, typurlFields } from "../raw_types_schemats/dosleg";
|
|
3
3
|
import { Debat } from "./debats";
|
|
4
4
|
import { TxtAmeli } from "./ameli";
|
|
5
|
-
export type { Ass, DecCoc, DenRap, EtaLoi, Org, OriTxt, Qua, RapOrg, Scr, TypAtt, TypLec, TypLoi, TypTxt, TypUrl
|
|
5
|
+
export type { Ass, DecCoc, DenRap, EtaLoi, Org, OriTxt, Qua, RapOrg, Scr, TypAtt, TypLec, TypLoi, TypTxt, TypUrl };
|
|
6
6
|
export interface Aud extends aud {
|
|
7
7
|
org?: Org;
|
|
8
8
|
}
|
package/lib/types/sessions.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export declare const UNDEFINED_SESSION = 0;
|
|
2
2
|
declare const sessions: readonly [0, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026];
|
|
3
|
-
export type Session = typeof sessions[number];
|
|
3
|
+
export type Session = (typeof sessions)[number];
|
|
4
4
|
export declare function getSessionsFromStart(startSession: Session): (0 | 1958 | 1959 | 1960 | 1961 | 1962 | 1963 | 1964 | 1965 | 1966 | 1967 | 1968 | 1969 | 1970 | 1971 | 1972 | 1973 | 1974 | 1975 | 1976 | 1977 | 1978 | 1979 | 1980 | 1981 | 1982 | 1983 | 1984 | 1985 | 1986 | 1987 | 1988 | 1989 | 1990 | 1991 | 1992 | 1993 | 1994 | 1995 | 1996 | 1997 | 1998 | 1999 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 | 2023 | 2024 | 2025 | 2026)[];
|
|
5
5
|
export {};
|
package/lib/types/sessions.js
CHANGED
|
@@ -76,7 +76,7 @@ export function getSessionsFromStart(startSession) {
|
|
|
76
76
|
if (startSession === 0) {
|
|
77
77
|
return Array.from(sessions);
|
|
78
78
|
}
|
|
79
|
-
const sessionIndex = sessions.findIndex(session => startSession === session);
|
|
79
|
+
const sessionIndex = sessions.findIndex((session) => startSession === session);
|
|
80
80
|
if (sessionIndex >= 0) {
|
|
81
81
|
return sessions.slice(sessionIndex);
|
|
82
82
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tricoteuses/senat",
|
|
3
|
-
"version": "2.19.
|
|
3
|
+
"version": "2.19.8",
|
|
4
4
|
"description": "Handle French Sénat's open data",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"France",
|
|
@@ -63,28 +63,28 @@
|
|
|
63
63
|
"dependencies": {
|
|
64
64
|
"@biryani/core": "^0.2.1",
|
|
65
65
|
"cheerio": "^1.1.2",
|
|
66
|
-
"command-line-args": "^
|
|
67
|
-
"dotenv": "^
|
|
68
|
-
"fs-extra": "^
|
|
69
|
-
"jsdom": "^
|
|
70
|
-
"kysely": "^0.
|
|
66
|
+
"command-line-args": "^6.0.1",
|
|
67
|
+
"dotenv": "^17.2.3",
|
|
68
|
+
"fs-extra": "^11.3.2",
|
|
69
|
+
"jsdom": "^27.2.0",
|
|
70
|
+
"kysely": "^0.28.8",
|
|
71
71
|
"luxon": "^3.7.2",
|
|
72
72
|
"node-stream-zip": "^1.8.2",
|
|
73
|
+
"p-limit": "^7.2.0",
|
|
73
74
|
"pg": "^8.13.1",
|
|
74
75
|
"pg-cursor": "^2.12.1",
|
|
75
|
-
"p-limit": "^7.2.0",
|
|
76
76
|
"slug": "^11.0.0",
|
|
77
77
|
"tsx": "^4.20.6",
|
|
78
|
-
"windows-1252": "^
|
|
78
|
+
"windows-1252": "^3.0.4"
|
|
79
79
|
},
|
|
80
80
|
"devDependencies": {
|
|
81
81
|
"@typed-code/schemats": "^5.0.1",
|
|
82
82
|
"@types/cheerio": "^1.0.0",
|
|
83
83
|
"@types/command-line-args": "^5.0.0",
|
|
84
|
-
"@types/fs-extra": "^
|
|
85
|
-
"@types/jsdom": "^
|
|
84
|
+
"@types/fs-extra": "^11.0.4",
|
|
85
|
+
"@types/jsdom": "^27.0.0",
|
|
86
86
|
"@types/luxon": "^3.7.1",
|
|
87
|
-
"@types/node": "^
|
|
87
|
+
"@types/node": "^24.10.1",
|
|
88
88
|
"@types/pg": "^8.15.5",
|
|
89
89
|
"@types/pg-cursor": "^2.7.2",
|
|
90
90
|
"@types/slug": "^5.0.9",
|