@tricoteuses/senat 2.20.30 → 2.20.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/loaders.d.ts CHANGED
@@ -41,11 +41,11 @@ export declare function iterLoadSenatComptesRendusCommissions(dataDir: string, s
41
41
  session: number;
42
42
  }>;
43
43
  export declare function iterLoadSenatDossiersLegislatifs(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DossierLegislatifResult>>;
44
- export declare function iterLoadSenatDossiersLegislatifsRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
45
- export declare function iterLoadSenatDossiersLegislatifsTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
46
- export declare function iterLoadSenatDossiersLegislatifsDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<DocumentResult>>;
47
- export declare function iterLoadSenatDossiersLegislatifsRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
48
- export declare function iterLoadSenatDossiersLegislatifsTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
44
+ export declare function iterLoadSenatRapportUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
45
+ export declare function iterLoadSenatTexteUrls(dataDir: string, session: number | undefined): Generator<IterItem<DocumentMetadata>>;
46
+ export declare function iterLoadSenatDocuments(dataDir: string, session: number | undefined, documentType: "textes" | "rapports", options?: {}): Generator<IterItem<DocumentResult>>;
47
+ export declare function iterLoadSenatRapports(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
48
+ export declare function iterLoadSenatTextes(dataDir: string, session: number | undefined, options?: {}): Generator<IterItem<DocumentResult>>;
49
49
  export declare function loadSenatTexteContent(dataDir: string, textePathFromDataset: string): IterItem<FlatTexte | null>;
50
50
  export declare function loadSenatCompteRenduContent(dataDir: string, session: number, debatId: string | number): {
51
51
  item: CompteRendu | null;
package/lib/loaders.js CHANGED
@@ -50,6 +50,9 @@ function* iterLoadSenatItems(dataDir, dataName, legislatureOrSession, subDir, {
50
50
  console.log(`Found ${changedFiles?.size || 0} changed files (AMR)`);
51
51
  }
52
52
  for (const filePath of iterFilePaths(itemsDir)) {
53
+ if (!filePath.endsWith(".json")) {
54
+ continue;
55
+ }
53
56
  const relativePath = path.relative(path.join(dataDir, dataName), filePath);
54
57
  const gitStatus = changedFiles?.get(relativePath);
55
58
  // Filter by changed files if sinceCommit is specified
@@ -147,7 +150,7 @@ export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}
147
150
  yield dossierLegislatifItem;
148
151
  }
149
152
  }
150
- export function* iterLoadSenatDossiersLegislatifsRapportUrls(dataDir, session) {
153
+ export function* iterLoadSenatRapportUrls(dataDir, session) {
151
154
  let itemsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER);
152
155
  if (session) {
153
156
  itemsDir = path.join(itemsDir, session.toString());
@@ -163,7 +166,7 @@ export function* iterLoadSenatDossiersLegislatifsRapportUrls(dataDir, session) {
163
166
  }
164
167
  }
165
168
  }
166
- export function* iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session) {
169
+ export function* iterLoadSenatTexteUrls(dataDir, session) {
167
170
  let itemsDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER);
168
171
  if (session) {
169
172
  itemsDir = path.join(itemsDir, session.toString());
@@ -179,23 +182,14 @@ export function* iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session) {
179
182
  }
180
183
  }
181
184
  }
182
- export function* iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, documentType, options = {}) {
185
+ export function* iterLoadSenatDocuments(dataDir, session, documentType, options = {}) {
183
186
  for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(dataDir, session, options)) {
184
187
  for (const lecture of dossierLegislatif["lectures"]) {
185
188
  const lecturesSenat = lecture.lectures_assemblee.filter((lectureAssemblee) => lectureAssemblee.assemblee === "Sénat");
186
189
  for (const lectureSenat of lecturesSenat) {
187
190
  for (const document of lectureSenat[documentType]) {
188
- const enrichedDocument = {
189
- signet_dossier: dossierLegislatif["signet"],
190
- url_dossier_senat: dossierLegislatif["url"],
191
- url_dossier_assemblee_nationale: dossierLegislatif["url_dossier_assemblee_nationale"],
192
- type_lecture: lecture.type_lecture,
193
- libelle_lecture: lecture.libelle,
194
- libelle_organisme: lectureSenat.libelle_organisme,
195
- ...document,
196
- };
197
191
  const documentItem = {
198
- item: enrichedDocument,
192
+ item: document,
199
193
  };
200
194
  if (document.url) {
201
195
  const documentName = path.parse(document.url).name;
@@ -207,13 +201,15 @@ export function* iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, doc
207
201
  }
208
202
  }
209
203
  }
210
- export function* iterLoadSenatDossiersLegislatifsRapports(dataDir, session, options = {}) {
211
- for (const iterItem of iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, "rapports", options)) {
212
- yield iterItem;
204
+ export function* iterLoadSenatRapports(dataDir, session, options = {}) {
205
+ for (const iterItem of iterLoadSenatItems(dataDir, RAPPORT_FOLDER, session, "original", options)) {
206
+ if (iterItem.item?.["id"]) {
207
+ yield iterItem;
208
+ }
213
209
  }
214
210
  }
215
- export function* iterLoadSenatDossiersLegislatifsTextes(dataDir, session, options = {}) {
216
- for (const iterItem of iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, "textes", options)) {
211
+ export function* iterLoadSenatTextes(dataDir, session, options = {}) {
212
+ for (const iterItem of iterLoadSenatDocuments(dataDir, session, "textes", options)) {
217
213
  yield iterItem;
218
214
  }
219
215
  }
@@ -33,39 +33,50 @@ function documentsAttaches(rapportId) {
33
33
  "typatt.typattlib as type_document"
34
34
  ]));
35
35
  }
36
- const queryRapports = dbSenat
36
+ function selectRapportAttributes({ eb, ref, val }) {
37
+ return [
38
+ "rap.rapnum as numero",
39
+ "raporg.orgcod as code_organisme",
40
+ eb
41
+ .case()
42
+ .when("rap.rapurl", "is not", null)
43
+ .then(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
44
+ .else(null)
45
+ .end()
46
+ .as("id"),
47
+ eb
48
+ .case()
49
+ .when("rap.typurl", "=", "I")
50
+ .then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
51
+ .else(rtrim(ref("rap.rapurl")))
52
+ .end()
53
+ .as("url"),
54
+ rtrim(ref("denrap.libdenrap")).as("type"),
55
+ rtrim(rtrim(ref("rap.raptil"))).as("titre"),
56
+ rtrim(rtrim(ref("rap.rapsoustit"))).as("sous_titre"),
57
+ toDateString(ref("rap.date_depot")).as("date"),
58
+ "rap.sesann as session",
59
+ auteursRapport(ref("rap.rapcod")).as("auteurs"),
60
+ documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
61
+ ];
62
+ }
63
+ const baseQueryRapports = dbSenat
37
64
  .withSchema("dosleg")
38
65
  .selectFrom("rap")
39
66
  .leftJoin("raporg", "raporg.rapcod", "rap.rapcod")
40
67
  .leftJoin("denrap", "denrap.coddenrap", "rap.coddenrap")
41
- .select(({ eb, ref, val }) => [
42
- "rap.rapnum as numero",
43
- "raporg.orgcod as code_organisme",
44
- eb
45
- .case()
46
- .when("rap.rapurl", "is not", null)
47
- .then(sql `regexp_replace(trim(${ref("rap.rapurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
48
- .else(null)
49
- .end()
50
- .as("id"),
51
- eb
52
- .case()
53
- .when("rap.typurl", "=", "I")
54
- .then(concat(val("https://www.senat.fr/rap/"), rtrim(ref("rap.rapurl"))))
55
- .else(rtrim(ref("rap.rapurl")))
56
- .end()
57
- .as("url"),
58
- rtrim(ref("denrap.libdenrap")).as("type"),
59
- rtrim(ref("rap.raptil")).as("titre"),
60
- rtrim(ref("rap.rapsoustit")).as("sous_titre"),
61
- toDateString(ref("rap.date_depot")).as("date"),
62
- "sesann as session",
63
- auteursRapport(ref("rap.rapcod")).as("auteurs"),
64
- documentsAttaches(ref("rap.rapcod")).as("documents_annexes"),
68
+ .leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod");
69
+ const queryRapports = baseQueryRapports
70
+ .leftJoin("lecass", "lecass.lecassidt", "lecassrap.lecassidt")
71
+ .leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
72
+ .leftJoin("loi", "loi.loicod", "lecture.loicod")
73
+ .select((args) => [
74
+ "loi.signet as signet_dossier",
75
+ ...selectRapportAttributes(args),
65
76
  ]);
66
77
  export function rapports(lectureAssembleeId) {
67
- return jsonArrayFrom(queryRapports
68
- .leftJoin("lecassrap", "lecassrap.rapcod", "rap.rapcod")
78
+ return jsonArrayFrom(baseQueryRapports
79
+ .select(selectRapportAttributes)
69
80
  .where("lecassrap.lecassidt", "=", lectureAssembleeId));
70
81
  }
71
82
  function auteursTexte(texteId) {
@@ -85,40 +96,52 @@ function auteursTexte(texteId) {
85
96
  ])
86
97
  .orderBy("ecr.ecrnumtri", "asc"));
87
98
  }
88
- const queryTextes = dbSenat
99
+ function selectTexteAttributes({ eb, ref, val }) {
100
+ return [
101
+ "texte.texnum as numero",
102
+ "texte.orgcod as code_organisme",
103
+ eb
104
+ .case()
105
+ .when("texte.texurl", "is not", null)
106
+ .then(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
107
+ .else(null)
108
+ .end()
109
+ .as("id"),
110
+ eb
111
+ .case()
112
+ .when("texte.typurl", "=", "I")
113
+ .then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
114
+ .else(rtrim(ref("texte.texurl")))
115
+ .end()
116
+ .as("url"),
117
+ rtrim(ref("oritxt.oritxtlib")).as("origine"),
118
+ "oritxt.oriordre as ordre_origine",
119
+ "oritxt.oritxtado as code_adoption",
120
+ "oritxt.oritxtmod as modification",
121
+ rtrim(ref("typtxt.typtxtlib")).as("type"),
122
+ toDateString(ref("texte.txtoritxtdat")).as("date"),
123
+ "texte.sesann as session",
124
+ auteursTexte(ref("texte.texcod")).as("auteurs"),
125
+ ];
126
+ }
127
+ const baseQueryTextes = dbSenat
89
128
  .withSchema("dosleg")
90
129
  .selectFrom("texte")
91
130
  .leftJoin("oritxt", "oritxt.oritxtcod", "texte.oritxtcod")
92
131
  .leftJoin("typtxt", "typtxt.typtxtcod", "texte.typtxtcod")
93
- .select(({ eb, ref, val }) => [
94
- "texte.texnum as numero",
95
- "texte.orgcod as code_organisme",
96
- eb
97
- .case()
98
- .when("texte.texurl", "is not", null)
99
- .then(sql `regexp_replace(trim(${ref("texte.texurl")}), '^(.*/)?(.*?)(\\.html)?$', '\\2')`)
100
- .else(null)
101
- .end()
102
- .as("id"),
103
- eb
104
- .case()
105
- .when("texte.typurl", "=", "I")
106
- .then(concat(val("https://www.senat.fr/leg/"), rtrim(ref("texte.texurl"))))
107
- .else(rtrim(ref("texte.texurl")))
108
- .end()
109
- .as("url"),
110
- rtrim(ref("oritxt.oritxtlib")).as("origine"),
111
- "oritxt.oriordre as ordre_origine",
112
- "oritxt.oritxtado as code_adoption",
113
- "oritxt.oritxtmod as modification",
114
- rtrim(ref("typtxt.typtxtlib")).as("type"),
115
- toDateString(ref("texte.txtoritxtdat")).as("date"),
116
- "sesann as session",
117
- auteursTexte(ref("texte.texcod")).as("auteurs"),
118
- ])
119
132
  .orderBy(({ ref }) => orderOrdreOrigineTexte(ref("oritxt.oriordre")));
133
+ const queryTextes = baseQueryTextes
134
+ .leftJoin("lecass", "lecass.lecassidt", "texte.lecassidt")
135
+ .leftJoin("lecture", "lecture.lecidt", "lecass.lecidt")
136
+ .leftJoin("loi", "loi.loicod", "lecture.loicod")
137
+ .select((args) => [
138
+ "loi.signet as signet_dossier",
139
+ ...selectTexteAttributes(args),
140
+ ]);
120
141
  export function textes(lectureAssembleeId) {
121
- return jsonArrayFrom(queryTextes.where("texte.lecassidt", "=", lectureAssembleeId));
142
+ return jsonArrayFrom(baseQueryTextes
143
+ .select(selectTexteAttributes)
144
+ .where("texte.lecassidt", "=", lectureAssembleeId));
122
145
  }
123
146
  export function findAllTextes() {
124
147
  return queryTextes.stream();
@@ -3,7 +3,7 @@ import commandLineArgs from "command-line-args";
3
3
  import fs from "fs-extra";
4
4
  import { DateTime } from "luxon";
5
5
  import path from "path";
6
- import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatDossiersLegislatifsRapportUrls, iterLoadSenatDossiersLegislatifsTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, } from "../loaders";
6
+ import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatRapportUrls, iterLoadSenatTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, } from "../loaders";
7
7
  import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../parsers/texte";
8
8
  import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
9
9
  import { commonOptions } from "./shared/cli_helpers";
@@ -149,7 +149,7 @@ async function retrieveTextes(dataDir, sessions) {
149
149
  parseDocuments: options["parseDocuments"],
150
150
  };
151
151
  for (const session of sessions) {
152
- for (const { item: texteMetadata } of iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session)) {
152
+ for (const { item: texteMetadata } of iterLoadSenatTexteUrls(dataDir, session)) {
153
153
  await processTexte(texteMetadata, originalTextesDir, transformedTextesDir, dlOptions);
154
154
  }
155
155
  }
@@ -164,7 +164,7 @@ async function retrieveRapports(dataDir, sessions) {
164
164
  formats: options["formats"],
165
165
  };
166
166
  for (const session of sessions) {
167
- for (const { item: rapportMetadata } of iterLoadSenatDossiersLegislatifsRapportUrls(dataDir, session)) {
167
+ for (const { item: rapportMetadata } of iterLoadSenatRapportUrls(dataDir, session)) {
168
168
  await processRapport(rapportMetadata, originalRapportsDir, dlOptions);
169
169
  }
170
170
  }
@@ -1,14 +1,12 @@
1
- import { iterLoadSenatDossiersLegislatifsRapports } from "../loaders";
1
+ import { iterLoadSenatRapports } from "../loaders";
2
2
  import commandLineArgs from "command-line-args";
3
3
  import { dataDirDefaultOption } from "./shared/cli_helpers";
4
4
  const optionsDefinitions = [dataDirDefaultOption];
5
5
  const options = commandLineArgs(optionsDefinitions);
6
6
  const session = 2024;
7
7
  const sinceCommit = undefined;
8
- for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifsRapports(options["dataDir"], session, {
8
+ for (const { item: rapport } of iterLoadSenatRapports(options["dataDir"], session, {
9
9
  sinceCommit: sinceCommit,
10
10
  })) {
11
- if (!dossierLegislatif["id"]?.includes("r24")) {
12
- console.log(dossierLegislatif["id"]);
13
- }
11
+ console.log(rapport);
14
12
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tricoteuses/senat",
3
- "version": "2.20.30",
3
+ "version": "2.20.32",
4
4
  "description": "Handle French Sénat's open data",
5
5
  "keywords": [
6
6
  "France",