@tricoteuses/senat 2.8.6 → 2.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/README.md +1 -0
  2. package/lib/model/dosleg.d.ts +0 -29
  3. package/lib/model/questions.d.ts +20 -0
  4. package/lib/model/questions.js +32 -1
  5. package/lib/model/sens.d.ts +670 -82
  6. package/lib/raw_types/dosleg.d.ts +77 -1994
  7. package/lib/raw_types/questions.d.ts +70 -0
  8. package/lib/raw_types/sens.d.ts +5424 -1570
  9. package/lib/raw_types_schemats/ameli.d.ts +12 -2
  10. package/lib/raw_types_schemats/debats.d.ts +2 -2
  11. package/lib/raw_types_schemats/dosleg.d.ts +2 -1054
  12. package/lib/raw_types_schemats/questions.d.ts +4 -2
  13. package/lib/raw_types_schemats/sens.d.ts +4235 -29
  14. package/package.json +9 -10
  15. package/lib/aggregates.d.ts +0 -52
  16. package/lib/aggregates.js +0 -949
  17. package/lib/aggregates.mjs +0 -726
  18. package/lib/aggregates.ts +0 -852
  19. package/lib/config.mjs +0 -16
  20. package/lib/config.ts +0 -26
  21. package/lib/databases.mjs +0 -55
  22. package/lib/databases.ts +0 -68
  23. package/lib/datasets.mjs +0 -78
  24. package/lib/datasets.ts +0 -118
  25. package/lib/fields.d.ts +0 -10
  26. package/lib/fields.js +0 -68
  27. package/lib/fields.mjs +0 -22
  28. package/lib/fields.ts +0 -29
  29. package/lib/index.mjs +0 -7
  30. package/lib/index.ts +0 -64
  31. package/lib/inserters.d.ts +0 -98
  32. package/lib/inserters.js +0 -500
  33. package/lib/inserters.mjs +0 -360
  34. package/lib/inserters.ts +0 -521
  35. package/lib/legislatures.json +0 -38
  36. package/lib/loaders.mjs +0 -97
  37. package/lib/loaders.ts +0 -173
  38. package/lib/model/ameli.mjs +0 -57
  39. package/lib/model/ameli.ts +0 -86
  40. package/lib/model/debats.mjs +0 -43
  41. package/lib/model/debats.ts +0 -68
  42. package/lib/model/dosleg.mjs +0 -163
  43. package/lib/model/dosleg.ts +0 -204
  44. package/lib/model/index.mjs +0 -4
  45. package/lib/model/index.ts +0 -13
  46. package/lib/model/questions.mjs +0 -76
  47. package/lib/model/questions.ts +0 -102
  48. package/lib/model/sens.mjs +0 -339
  49. package/lib/model/sens.ts +0 -432
  50. package/lib/model/texte.mjs +0 -156
  51. package/lib/model/texte.ts +0 -174
  52. package/lib/raw_types_kysely/ameli.d.ts +0 -915
  53. package/lib/raw_types_kysely/ameli.js +0 -7
  54. package/lib/raw_types_kysely/ameli.mjs +0 -5
  55. package/lib/raw_types_kysely/ameli.ts +0 -951
  56. package/lib/raw_types_kysely/debats.d.ts +0 -207
  57. package/lib/raw_types_kysely/debats.js +0 -7
  58. package/lib/raw_types_kysely/debats.mjs +0 -5
  59. package/lib/raw_types_kysely/debats.ts +0 -222
  60. package/lib/raw_types_kysely/dosleg.d.ts +0 -3532
  61. package/lib/raw_types_kysely/dosleg.js +0 -7
  62. package/lib/raw_types_kysely/dosleg.mjs +0 -5
  63. package/lib/raw_types_kysely/dosleg.ts +0 -3621
  64. package/lib/raw_types_kysely/questions.d.ts +0 -414
  65. package/lib/raw_types_kysely/questions.js +0 -7
  66. package/lib/raw_types_kysely/questions.mjs +0 -5
  67. package/lib/raw_types_kysely/questions.ts +0 -426
  68. package/lib/raw_types_kysely/sens.d.ts +0 -4394
  69. package/lib/raw_types_kysely/sens.js +0 -7
  70. package/lib/raw_types_kysely/sens.mjs +0 -5
  71. package/lib/raw_types_kysely/sens.ts +0 -4499
  72. package/lib/raw_types_schemats/ameli.mjs +0 -2
  73. package/lib/raw_types_schemats/ameli.ts +0 -601
  74. package/lib/raw_types_schemats/debats.mjs +0 -2
  75. package/lib/raw_types_schemats/debats.ts +0 -145
  76. package/lib/raw_types_schemats/dosleg.mjs +0 -2
  77. package/lib/raw_types_schemats/dosleg.ts +0 -2193
  78. package/lib/raw_types_schemats/questions.mjs +0 -2
  79. package/lib/raw_types_schemats/questions.ts +0 -249
  80. package/lib/raw_types_schemats/sens.mjs +0 -2
  81. package/lib/raw_types_schemats/sens.ts +0 -2907
  82. package/lib/scripts/convert_data.mjs +0 -95
  83. package/lib/scripts/convert_data.ts +0 -119
  84. package/lib/scripts/datautil.mjs +0 -16
  85. package/lib/scripts/datautil.ts +0 -19
  86. package/lib/scripts/images/transparent_150x192.jpg +0 -0
  87. package/lib/scripts/images/transparent_155x225.jpg +0 -0
  88. package/lib/scripts/parse_textes.mjs +0 -38
  89. package/lib/scripts/parse_textes.ts +0 -52
  90. package/lib/scripts/retrieve_documents.mjs +0 -243
  91. package/lib/scripts/retrieve_documents.ts +0 -279
  92. package/lib/scripts/retrieve_open_data.mjs +0 -214
  93. package/lib/scripts/retrieve_open_data.ts +0 -261
  94. package/lib/scripts/retrieve_senateurs_photos.mjs +0 -147
  95. package/lib/scripts/retrieve_senateurs_photos.ts +0 -177
  96. package/lib/scripts/retrieve_textes.d.ts +0 -1
  97. package/lib/scripts/retrieve_textes.mjs +0 -165
  98. package/lib/scripts/retrieve_textes.ts +0 -79
  99. package/lib/scripts/shared/cli_helpers.ts +0 -36
  100. package/lib/scripts/shared/util.ts +0 -33
  101. package/lib/strings.mjs +0 -18
  102. package/lib/strings.ts +0 -26
  103. package/lib/types/ameli.mjs +0 -13
  104. package/lib/types/ameli.ts +0 -21
  105. package/lib/types/debats.mjs +0 -2
  106. package/lib/types/debats.ts +0 -6
  107. package/lib/types/dosleg.mjs +0 -151
  108. package/lib/types/dosleg.ts +0 -284
  109. package/lib/types/questions.mjs +0 -1
  110. package/lib/types/questions.ts +0 -3
  111. package/lib/types/sens.mjs +0 -1
  112. package/lib/types/sens.ts +0 -12
  113. package/lib/types/sessions.mjs +0 -43
  114. package/lib/types/sessions.ts +0 -42
  115. package/lib/types/texte.mjs +0 -16
  116. package/lib/types/texte.ts +0 -66
  117. package/lib/typings/windows-1252.d.js +0 -2
  118. package/lib/typings/windows-1252.d.mjs +0 -2
  119. package/lib/typings/windows-1252.d.ts +0 -11
  120. package/lib/validators/config.mjs +0 -54
  121. package/lib/validators/config.ts +0 -79
  122. package/lib/validators/senat.d.ts +0 -0
  123. package/lib/validators/senat.js +0 -24
  124. package/lib/validators/senat.mjs +0 -24
  125. package/lib/validators/senat.ts +0 -26
@@ -1,95 +0,0 @@
1
- import assert from "assert";
2
- import commandLineArgs from "command-line-args";
3
- import fs from "fs-extra";
4
- import path from "path";
5
- import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
6
- import { findAllCirconscriptions, findAllLois, findAllQuestions, findAllSens } from "../model";
7
- import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, UNDEFINED_SESSION } from "./datautil";
8
- import { commonOptions } from "./shared/cli_helpers";
9
- import { ensureAndClearDir } from "./shared/util";
10
- const optionsDefinitions = [
11
- ...commonOptions,
12
- ];
13
- const options = commandLineArgs(optionsDefinitions);
14
- async function convertData() {
15
- const enabledDatasets = getEnabledDatasets(options.categories);
16
- const dataDir = options.dataDir;
17
- assert(dataDir, "Missing argument: data directory");
18
- console.time("conversion time");
19
- if (enabledDatasets & EnabledDatasets.DosLeg) {
20
- const dataset = datasets.dosleg;
21
- if (!options.silent) {
22
- console.log(`Converting database ${dataset.database} data into files…`);
23
- }
24
- const loiReorganizedRootDir = path.join(dataDir, dataset.database);
25
- ensureAndClearDir(loiReorganizedRootDir);
26
- for await (const loi of findAllLois()) {
27
- if (options.verbose) {
28
- console.log(`Converting ${loi.signet} file…`);
29
- }
30
- let loiReorganizedDir = path.join(loiReorganizedRootDir, UNDEFINED_SESSION);
31
- const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups;
32
- if (signetParts) {
33
- const { session } = signetParts;
34
- const formattedSession = formatToFourDigitSession(session);
35
- loiReorganizedDir = path.join(loiReorganizedRootDir, formattedSession);
36
- }
37
- fs.ensureDirSync(loiReorganizedDir);
38
- const loiFileName = `${loi.signet}.json`;
39
- fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, { spaces: 2 });
40
- }
41
- }
42
- if (enabledDatasets & EnabledDatasets.Questions) {
43
- const dataset = datasets.questions;
44
- if (!options.silent) {
45
- console.log(`Converting database ${dataset.database} data into files…`);
46
- }
47
- const questionReorganizedRootDir = path.join(dataDir, dataset.database);
48
- ensureAndClearDir(questionReorganizedRootDir);
49
- for await (const question of findAllQuestions()) {
50
- if (options.verbose) {
51
- console.log(`Converting ${question.reference} file…`);
52
- }
53
- const legislature = question.legislature ? question.legislature : 0;
54
- const questionReorganizedDir = path.join(questionReorganizedRootDir, String(legislature));
55
- fs.ensureDirSync(questionReorganizedDir);
56
- const questionFileName = `${question.reference}.json`;
57
- fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
58
- }
59
- }
60
- if (enabledDatasets & EnabledDatasets.Sens) {
61
- const dataset = datasets.sens;
62
- if (!options.silent) {
63
- console.log(`Converting database ${dataset.database} data into files…`);
64
- }
65
- const senReorganizedRootDir = path.join(dataDir, dataset.database);
66
- const senReorganizedDir = path.join(senReorganizedRootDir, "sens");
67
- const circonscriptionsReorganizedDir = path.join(senReorganizedRootDir, "circonscriptions");
68
- ensureAndClearDir(senReorganizedRootDir);
69
- ensureAndClearDir(senReorganizedDir);
70
- ensureAndClearDir(circonscriptionsReorganizedDir);
71
- for await (const sen of findAllSens()) {
72
- if (options.verbose) {
73
- console.log(`Converting ${sen.matricule} file…`);
74
- }
75
- const senFileName = `${sen.matricule}.json`;
76
- fs.writeJSONSync(path.join(senReorganizedDir, senFileName), sen, { spaces: 2 });
77
- }
78
- for await (const circonscription of findAllCirconscriptions()) {
79
- if (options.verbose) {
80
- console.log(`Converting ${circonscription.identifiant} file…`);
81
- }
82
- const circonscriptionFileName = `${circonscription.identifiant}.json`;
83
- fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 });
84
- }
85
- }
86
- if (!options.silent) {
87
- console.timeEnd("conversion time");
88
- }
89
- }
90
- convertData()
91
- .then(() => process.exit(0))
92
- .catch((error) => {
93
- console.log(error);
94
- process.exit(1);
95
- });
@@ -1,119 +0,0 @@
1
- import assert from "assert"
2
- import commandLineArgs from "command-line-args"
3
- import fs from "fs-extra"
4
- import path from "path"
5
-
6
- import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets"
7
- import { findAllCirconscriptions, findAllLois, findAllQuestions, findAllSens } from "../model"
8
- import { formatToFourDigitSession, SIGNET_STRUCTURE_REGEXP, UNDEFINED_SESSION } from "./datautil"
9
- import { commonOptions } from "./shared/cli_helpers"
10
- import { ensureAndClearDir } from "./shared/util"
11
-
12
- const optionsDefinitions = [
13
- ...commonOptions,
14
- ]
15
- const options = commandLineArgs(optionsDefinitions)
16
-
17
- async function convertData() {
18
- const enabledDatasets = getEnabledDatasets(options.categories)
19
- const dataDir: string = options.dataDir
20
- assert(dataDir, "Missing argument: data directory")
21
-
22
- console.time("conversion time")
23
-
24
- if (enabledDatasets & EnabledDatasets.DosLeg) {
25
- const dataset = datasets.dosleg
26
-
27
- if (!options.silent) {
28
- console.log(`Converting database ${dataset.database} data into files…`)
29
- }
30
-
31
- const loiReorganizedRootDir = path.join(dataDir, dataset.database)
32
- ensureAndClearDir(loiReorganizedRootDir)
33
-
34
- for await (const loi of findAllLois()) {
35
- if (options.verbose) {
36
- console.log(`Converting ${loi.signet} file…`)
37
- }
38
-
39
- let loiReorganizedDir = path.join(loiReorganizedRootDir, UNDEFINED_SESSION)
40
- const signetParts = SIGNET_STRUCTURE_REGEXP.exec(loi.signet)?.groups
41
- if (signetParts) {
42
- const { session } = signetParts
43
- const formattedSession = formatToFourDigitSession(session)
44
- loiReorganizedDir = path.join(loiReorganizedRootDir, formattedSession)
45
- }
46
- fs.ensureDirSync(loiReorganizedDir)
47
-
48
- const loiFileName = `${loi.signet}.json`
49
- fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, {spaces: 2})
50
- }
51
- }
52
-
53
- if (enabledDatasets & EnabledDatasets.Questions) {
54
- const dataset = datasets.questions
55
-
56
- if (!options.silent) {
57
- console.log(`Converting database ${dataset.database} data into files…`)
58
- }
59
-
60
- const questionReorganizedRootDir = path.join(dataDir, dataset.database)
61
- ensureAndClearDir(questionReorganizedRootDir)
62
-
63
- for await (const question of findAllQuestions()) {
64
- if (options.verbose) {
65
- console.log(`Converting ${question.reference} file…`)
66
- }
67
-
68
- const legislature = question.legislature ? question.legislature : 0
69
- const questionReorganizedDir = path.join(questionReorganizedRootDir, String(legislature))
70
- fs.ensureDirSync(questionReorganizedDir)
71
- const questionFileName = `${question.reference}.json`
72
- fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, {spaces: 2})
73
- }
74
- }
75
-
76
- if (enabledDatasets & EnabledDatasets.Sens) {
77
- const dataset = datasets.sens
78
-
79
- if (!options.silent) {
80
- console.log(`Converting database ${dataset.database} data into files…`)
81
- }
82
-
83
- const senReorganizedRootDir = path.join(dataDir, dataset.database)
84
- const senReorganizedDir = path.join(senReorganizedRootDir, "sens")
85
- const circonscriptionsReorganizedDir = path.join(senReorganizedRootDir, "circonscriptions")
86
- ensureAndClearDir(senReorganizedRootDir)
87
- ensureAndClearDir(senReorganizedDir)
88
- ensureAndClearDir(circonscriptionsReorganizedDir)
89
-
90
- for await (const sen of findAllSens()) {
91
- if (options.verbose) {
92
- console.log(`Converting ${sen.matricule} file…`)
93
- }
94
-
95
- const senFileName = `${sen.matricule}.json`
96
- fs.writeJSONSync(path.join(senReorganizedDir, senFileName), sen, {spaces: 2})
97
- }
98
-
99
- for await (const circonscription of findAllCirconscriptions()) {
100
- if (options.verbose) {
101
- console.log(`Converting ${circonscription.identifiant} file…`)
102
- }
103
-
104
- const circonscriptionFileName = `${circonscription.identifiant}.json`
105
- fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, {spaces: 2})
106
- }
107
- }
108
-
109
- if (!options.silent) {
110
- console.timeEnd("conversion time")
111
- }
112
- }
113
-
114
- convertData()
115
- .then(() => process.exit(0))
116
- .catch((error) => {
117
- console.log(error)
118
- process.exit(1)
119
- })
@@ -1,16 +0,0 @@
1
- import { DateTime, Settings } from "luxon";
2
- Settings.twoDigitCutoffYear = 50;
3
- export const SIGNET_STRUCTURE_REGEXP = /^(?<type>[a-z]+)(?<session>\d{2,4})-?(?<numTexte>\d+)?/;
4
- export const AKN_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{4}-\d{4})\/?(?<numTexte>\d+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/;
5
- export const AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{2,4})\/?(?<numTexte>[a-zA-Z0-9]+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/;
6
- export const UNDEFINED_SESSION = "0";
7
- export function formatToFourDigitSession(session) {
8
- if (session.length >= 2) {
9
- const sessionFirstTwoDigits = session.substring(0, 2);
10
- const sessionLastTwoDigits = session.substring(session.length - 2);
11
- const twoDigitSession = parseInt(sessionFirstTwoDigits) === parseInt(sessionLastTwoDigits) - 1 ?
12
- sessionFirstTwoDigits : sessionLastTwoDigits;
13
- return DateTime.fromFormat(String(twoDigitSession), "yy").toFormat("yyyy");
14
- }
15
- return UNDEFINED_SESSION;
16
- }
@@ -1,19 +0,0 @@
1
- import { DateTime, Settings } from "luxon"
2
-
3
- Settings.twoDigitCutoffYear = 50
4
-
5
- export const SIGNET_STRUCTURE_REGEXP = /^(?<type>[a-z]+)(?<session>\d{2,4})-?(?<numTexte>\d+)?/
6
- export const AKN_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{4}-\d{4})\/?(?<numTexte>\d+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/
7
- export const AKN_WORKFLOW_IDENTIFICATION_STRUCTURE_REGEXP = /^\/akn\/fr\/(?<type>[a-z]+)\/(?<session>\d{2,4})\/?(?<numTexte>[a-zA-Z0-9]+)\/fr@(?<version>\b(?:RECT|RECT_BIS|RECT_TER|RECT_QUATER|RECT_QUINQUIES)\b)?/
8
- export const UNDEFINED_SESSION = "0"
9
-
10
- export function formatToFourDigitSession (session: string) {
11
- if (session.length >= 2) {
12
- const sessionFirstTwoDigits = session.substring(0, 2)
13
- const sessionLastTwoDigits = session.substring(session.length - 2)
14
- const twoDigitSession = parseInt(sessionFirstTwoDigits) === parseInt(sessionLastTwoDigits) - 1 ?
15
- sessionFirstTwoDigits : sessionLastTwoDigits
16
- return DateTime.fromFormat(String(twoDigitSession), "yy").toFormat("yyyy")
17
- }
18
- return UNDEFINED_SESSION
19
- }
@@ -1,38 +0,0 @@
1
- import assert from "assert";
2
- import commandLineArgs from "command-line-args";
3
- import fs from "fs-extra";
4
- import path from "path";
5
- import { iterFilePaths } from "../loaders";
6
- import { parseTexteFromFile } from "../model/texte";
7
- import { commonOptions } from "./shared/cli_helpers";
8
- import { ensureAndClearDir } from "./shared/util";
9
- const optionsDefinitions = [
10
- ...commonOptions,
11
- ];
12
- const options = commandLineArgs(optionsDefinitions);
13
- async function main() {
14
- const dataDir = options.dataDir;
15
- assert(dataDir, "Missing argument: data directory");
16
- const transformedTextesDir = path.join(options.dataDir, "leg", "transformed");
17
- ensureAndClearDir(transformedTextesDir);
18
- for (const filePath of iterFilePaths(path.join(dataDir, "leg", "original"))) {
19
- const parsedFilePath = path.parse(filePath);
20
- if (parsedFilePath.ext !== ".xml") {
21
- continue;
22
- }
23
- const texteDirFromOriginal = parsedFilePath.dir.substring(filePath.indexOf("original") + "original".length);
24
- const transformedTexteDir = path.join(transformedTextesDir, texteDirFromOriginal);
25
- fs.ensureDirSync(transformedTexteDir);
26
- if (!options.silent) {
27
- console.log(`Parsing texte ${parsedFilePath.name}.xml…`);
28
- }
29
- const parsedTexte = await parseTexteFromFile(filePath);
30
- fs.writeJSONSync(path.join(transformedTexteDir, `${parsedFilePath.name}.json`), parsedTexte, { spaces: 2 });
31
- }
32
- }
33
- main()
34
- .then(() => process.exit(0))
35
- .catch((error) => {
36
- console.log(error);
37
- process.exit(1);
38
- });
@@ -1,52 +0,0 @@
1
- import assert from "assert"
2
- import commandLineArgs from "command-line-args"
3
- import fs from "fs-extra"
4
- import path from "path"
5
-
6
- import { iterFilePaths } from "../loaders"
7
- import { parseTexteFromFile } from "../model/texte"
8
- import { commonOptions } from "./shared/cli_helpers"
9
- import { ensureAndClearDir } from "./shared/util"
10
-
11
- const optionsDefinitions = [
12
- ...commonOptions,
13
- ]
14
- const options = commandLineArgs(optionsDefinitions)
15
-
16
- async function main() {
17
- const dataDir = options.dataDir
18
- assert(dataDir, "Missing argument: data directory")
19
-
20
- const transformedTextesDir = path.join(options.dataDir, "leg", "transformed")
21
- ensureAndClearDir(transformedTextesDir)
22
-
23
- for (const filePath of iterFilePaths(path.join(dataDir, "leg", "original"))) {
24
- const parsedFilePath = path.parse(filePath)
25
-
26
- if (parsedFilePath.ext !== ".xml") {
27
- continue
28
- }
29
-
30
- const texteDirFromOriginal = parsedFilePath.dir.substring(filePath.indexOf("original") + "original".length)
31
- const transformedTexteDir = path.join(transformedTextesDir, texteDirFromOriginal)
32
- fs.ensureDirSync(transformedTexteDir)
33
-
34
- if (!options.silent) {
35
- console.log(`Parsing texte ${parsedFilePath.name}.xml…`)
36
- }
37
-
38
- const parsedTexte = await parseTexteFromFile(filePath)
39
- fs.writeJSONSync(
40
- path.join(transformedTexteDir, `${parsedFilePath.name}.json`),
41
- parsedTexte,
42
- { spaces: 2 }
43
- )
44
- }
45
- }
46
-
47
- main()
48
- .then(() => process.exit(0))
49
- .catch((error) => {
50
- console.log(error)
51
- process.exit(1)
52
- })
@@ -1,243 +0,0 @@
1
- import assert from "assert";
2
- import commandLineArgs from "command-line-args";
3
- import fs from "fs-extra";
4
- import path from "path";
5
- import { findSenatRapportUrls, findSenatTexteUrls } from "../model/dosleg";
6
- import { parseTexte, parseTexteFromFile } from "../model/texte";
7
- import { UNDEFINED_SESSION } from "./datautil";
8
- import { commonOptions } from "./shared/cli_helpers";
9
- import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue } from "./shared/util";
10
- const optionsDefinitions = [
11
- ...commonOptions,
12
- {
13
- help: "sessions of textes to retrieve; leave empty for all",
14
- multiple: true,
15
- name: "sessions",
16
- type: String,
17
- },
18
- {
19
- help: "parse and convert documents into JSON (textes only for now, requires format xml)",
20
- name: "parseDocuments",
21
- type: Boolean,
22
- },
23
- {
24
- alias: "F",
25
- help: "formats of documents to retrieve (xml/html/pdf for textes, html/pdf for rapports); leave empty for all",
26
- multiple: true,
27
- name: "formats",
28
- type: String,
29
- },
30
- {
31
- help: "types of documents to retrieve (textes/rapports); leave empty for all",
32
- multiple: true,
33
- name: "types",
34
- type: String,
35
- },
36
- {
37
- help: "force retrieve all documents, even already retrieved ones",
38
- name: "force",
39
- type: Boolean,
40
- },
41
- ];
42
- const options = commandLineArgs(optionsDefinitions);
43
- const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/";
44
- const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
45
- const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
46
- const textDecoder = new TextDecoder("utf8");
47
- async function retrieveDocument(documentUrl) {
48
- if (!options.silent) {
49
- console.log(`Retrieving document ${documentUrl}…`);
50
- }
51
- try {
52
- const response = await fetchWithRetry(documentUrl);
53
- if (!response.ok) {
54
- if (response.status === 404) {
55
- console.warn(`Texte ${documentUrl} not found`);
56
- }
57
- else {
58
- console.error(`An error occurred while retrieving texte ${documentUrl}: ${response.status}`);
59
- }
60
- return null;
61
- }
62
- return response.arrayBuffer();
63
- }
64
- catch (error) {
65
- console.error(error.message);
66
- return null;
67
- }
68
- }
69
- async function retrieveTextes(dataDir) {
70
- const textesDir = path.join(dataDir, "leg");
71
- fs.ensureDirSync(textesDir);
72
- const originalTextesDir = path.join(textesDir, "original");
73
- const transformedTextesDir = path.join(textesDir, "transformed");
74
- ensureAndClearDir(transformedTextesDir);
75
- let retrievedTextesCount = 0;
76
- const texteUrlsNotFoundOrError = [];
77
- const texteUrlsParseError = [];
78
- for await (const texte of findSenatTexteUrls(options.sessions)) {
79
- const texteName = path.parse(texte.url).name;
80
- const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName);
81
- fs.ensureDirSync(texteDir);
82
- if (isOptionEmptyOrHasValue(options.formats, "xml")) {
83
- const texteXmlUrl = `${texteName}.akn.xml`;
84
- const texteXmlAbsoluteUrl = new URL(texteXmlUrl, SENAT_TEXTE_XML_BASE_URL).toString();
85
- const textePath = path.join(texteDir, texteXmlUrl);
86
- let texteBuffer = null;
87
- if (!options.force && fs.existsSync(textePath)) {
88
- if (!options.silent) {
89
- console.info(`Already retrieved texte ${textePath}…`);
90
- }
91
- }
92
- else {
93
- texteBuffer = await retrieveDocument(texteXmlAbsoluteUrl);
94
- if (!texteBuffer) {
95
- texteUrlsNotFoundOrError.push(texteXmlAbsoluteUrl);
96
- continue;
97
- }
98
- fs.writeFileSync(textePath, Buffer.from(texteBuffer));
99
- retrievedTextesCount++;
100
- }
101
- if (options.parseDocuments) {
102
- if (!options.silent) {
103
- console.log(`Parsing texte ${texteXmlUrl}…`);
104
- }
105
- let parsedTexte = null;
106
- if (texteBuffer) {
107
- const texteXml = textDecoder.decode(texteBuffer);
108
- parsedTexte = parseTexte(texteXml);
109
- }
110
- else {
111
- parsedTexte = await parseTexteFromFile(textePath);
112
- }
113
- if (!parsedTexte) {
114
- texteUrlsParseError.push(texteXmlAbsoluteUrl);
115
- continue;
116
- }
117
- const transformedTexteDir = path.join(transformedTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName);
118
- fs.ensureDirSync(transformedTexteDir);
119
- fs.writeJSONSync(path.join(transformedTexteDir, `${texteName}.akn.json`), parsedTexte, { spaces: 2 });
120
- }
121
- }
122
- if (isOptionEmptyOrHasValue(options.formats, "html")) {
123
- const texteHtmlUrl = `${texteName}.html`;
124
- const texteHtmlAbsoluteUrl = new URL(texteHtmlUrl, SENAT_TEXTE_BASE_URL).toString();
125
- const textePath = path.join(texteDir, texteHtmlUrl);
126
- if (!options.force && fs.existsSync(textePath)) {
127
- if (!options.silent) {
128
- console.info(`Already retrieved texte ${textePath}…`);
129
- }
130
- continue;
131
- }
132
- const texteBuffer = await retrieveDocument(texteHtmlAbsoluteUrl);
133
- if (!texteBuffer) {
134
- texteUrlsNotFoundOrError.push(texteHtmlAbsoluteUrl);
135
- continue;
136
- }
137
- fs.writeFileSync(textePath, Buffer.from(texteBuffer));
138
- retrievedTextesCount++;
139
- }
140
- if (isOptionEmptyOrHasValue(options.formats, "pdf")) {
141
- const textePdfUrl = `${texteName}.pdf`;
142
- const textePdfAbsoluteUrl = new URL(textePdfUrl, SENAT_TEXTE_BASE_URL).toString();
143
- const textePath = path.join(texteDir, textePdfUrl);
144
- if (!options.force && fs.existsSync(textePath)) {
145
- if (!options.silent) {
146
- console.info(`Already retrieved texte ${textePath}…`);
147
- }
148
- continue;
149
- }
150
- const texteBuffer = await retrieveDocument(textePdfAbsoluteUrl);
151
- if (!texteBuffer) {
152
- texteUrlsNotFoundOrError.push(textePdfAbsoluteUrl);
153
- continue;
154
- }
155
- fs.writeFileSync(textePath, Buffer.from(texteBuffer));
156
- retrievedTextesCount++;
157
- }
158
- }
159
- if (options.verbose) {
160
- console.log(`${retrievedTextesCount} textes retrieved`);
161
- console.log(`${texteUrlsNotFoundOrError.length} textes failed to be retrieved with URLs ${texteUrlsNotFoundOrError.join(", ")}`);
162
- if (options.parseDocuments) {
163
- console.log(`${texteUrlsParseError.length} textes failed to be parsed with URLs ${texteUrlsParseError.join(", ")}`);
164
- }
165
- }
166
- // TODO retrieve exposé des motifs (/leg/exposes-des-motifs)
167
- }
168
- async function retrieveRapports(dataDir) {
169
- const rapportsDir = path.join(dataDir, "rap");
170
- fs.ensureDirSync(rapportsDir);
171
- let retrievedRapportsCount = 0;
172
- const rapportUrlsNotFoundOrError = [];
173
- for await (const rapport of findSenatRapportUrls(options.sessions)) {
174
- const parsedRapportUrl = path.parse(rapport.url);
175
- const rapportName = parsedRapportUrl.name;
176
- const rapportDir = path.join(rapportsDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName);
177
- fs.ensureDirSync(rapportDir);
178
- if (isOptionEmptyOrHasValue(options.formats, "html")) {
179
- const rapportHtmlUrlBase = `${rapportName}_mono.html`;
180
- const rapportHtmlUrl = path.format({
181
- dir: parsedRapportUrl.dir,
182
- base: rapportHtmlUrlBase,
183
- });
184
- const rapportHtmlAbsoluteUrl = new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL).toString();
185
- const rapportPath = path.join(rapportDir, rapportHtmlUrlBase);
186
- if (!options.force && fs.existsSync(rapportPath)) {
187
- if (!options.silent) {
188
- console.info(`Already retrieved rapport ${rapportPath}…`);
189
- }
190
- continue;
191
- }
192
- const rapportBuffer = await retrieveDocument(rapportHtmlAbsoluteUrl);
193
- if (!rapportBuffer) {
194
- rapportUrlsNotFoundOrError.push(rapportHtmlAbsoluteUrl);
195
- continue;
196
- }
197
- fs.writeFileSync(rapportPath, Buffer.from(rapportBuffer));
198
- retrievedRapportsCount++;
199
- }
200
- if (isOptionEmptyOrHasValue(options.formats, "pdf")) {
201
- const rapportPdfUrlBase = `${rapportName}1.pdf`;
202
- const rapportPdfUrl = path.format({
203
- dir: parsedRapportUrl.dir,
204
- base: rapportPdfUrlBase,
205
- });
206
- const rapportPdfAbsoluteUrl = new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL).toString();
207
- const rapportPath = path.join(rapportDir, rapportPdfUrlBase);
208
- if (!options.force && fs.existsSync(rapportPath)) {
209
- if (!options.silent) {
210
- console.info(`Already retrieved rapport ${rapportPath}…`);
211
- }
212
- continue;
213
- }
214
- const rapportBuffer = await retrieveDocument(rapportPdfAbsoluteUrl);
215
- if (!rapportBuffer) {
216
- rapportUrlsNotFoundOrError.push(rapportPdfAbsoluteUrl);
217
- continue;
218
- }
219
- fs.writeFileSync(rapportPath, Buffer.from(rapportBuffer));
220
- retrievedRapportsCount++;
221
- }
222
- }
223
- if (options.verbose) {
224
- console.log(`${retrievedRapportsCount} rapports retrieved`);
225
- console.log(`${rapportUrlsNotFoundOrError.length} rapports failed with URLs ${rapportUrlsNotFoundOrError.join(", ")}`);
226
- }
227
- }
228
- async function main() {
229
- const dataDir = options.dataDir;
230
- assert(dataDir, "Missing argument: data directory");
231
- if (isOptionEmptyOrHasValue(options.types, "textes")) {
232
- await retrieveTextes(dataDir);
233
- }
234
- if (isOptionEmptyOrHasValue(options.types, "rapports")) {
235
- await retrieveRapports(dataDir);
236
- }
237
- }
238
- main()
239
- .then(() => process.exit(0))
240
- .catch((error) => {
241
- console.log(error);
242
- process.exit(1);
243
- });