@tricoteuses/senat 3.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/add-js-extensions-v2.d.ts +1 -0
- package/lib/add-js-extensions-v2.js +23 -0
- package/lib/add-js-extensions.d.ts +1 -0
- package/lib/add-js-extensions.js +17 -0
- package/lib/src/databases_postgres.js +1 -1
- package/lib/src/index.d.ts +24 -24
- package/lib/src/index.js +6 -6
- package/lib/src/loaders.d.ts +12 -12
- package/lib/src/loaders.js +4 -4
- package/lib/src/model/agenda.d.ts +1 -1
- package/lib/src/model/agenda.js +1 -1
- package/lib/src/model/ameli.js +1 -1
- package/lib/src/model/commission.d.ts +2 -2
- package/lib/src/model/commission.js +4 -4
- package/lib/src/model/debats.js +1 -1
- package/lib/src/model/documents.js +1 -1
- package/lib/src/model/dosleg.js +1 -1
- package/lib/src/model/index.d.ts +7 -7
- package/lib/src/model/index.js +7 -7
- package/lib/src/model/questions.js +1 -1
- package/lib/src/model/scrutins.js +1 -1
- package/lib/src/model/seance.d.ts +1 -1
- package/lib/src/model/seance.js +4 -4
- package/lib/src/model/sens.js +1 -1
- package/lib/src/parsers/texte.d.ts +1 -1
- package/lib/src/parsers/texte.js +2 -2
- package/lib/src/raw_types/ameli.js +235 -977
- package/lib/src/raw_types/debats.js +73 -324
- package/lib/src/raw_types/dosleg.js +702 -3051
- package/lib/src/raw_types/questions.js +276 -1086
- package/lib/src/raw_types/sens.js +1547 -7285
- package/lib/src/scripts/convert_data.js +10 -10
- package/lib/src/scripts/convert_xml_to_html.js +1 -1
- package/lib/src/scripts/datautil.d.ts +1 -1
- package/lib/src/scripts/retrieve_agenda.js +9 -9
- package/lib/src/scripts/retrieve_cr_commission.js +8 -8
- package/lib/src/scripts/retrieve_cr_seance.d.ts +1 -1
- package/lib/src/scripts/retrieve_cr_seance.js +8 -8
- package/lib/src/scripts/retrieve_documents.d.ts +1 -1
- package/lib/src/scripts/retrieve_documents.js +7 -7
- package/lib/src/scripts/retrieve_open_data.js +8 -8
- package/lib/src/scripts/retrieve_senateurs_photos.js +4 -4
- package/lib/src/scripts/retrieve_videos.js +9 -9
- package/lib/src/scripts/shared/incremental_import_sql.js +1 -1
- package/lib/src/scripts/shared/staging_import.d.ts +1 -1
- package/lib/src/scripts/validate_prefixed_tables.js +4 -4
- package/lib/src/types/ameli.d.ts +2 -2
- package/lib/src/types/dosleg.d.ts +4 -4
- package/lib/src/types/questions.d.ts +1 -1
- package/lib/src/types/texte.d.ts +1 -1
- package/lib/src/utils/cr_spliting.d.ts +1 -1
- package/lib/src/utils/cr_spliting.js +4 -4
- package/lib/src/utils/nvs-parsing.js +2 -2
- package/lib/src/utils/reunion_odj_building.d.ts +2 -2
- package/lib/src/utils/reunion_odj_building.js +2 -2
- package/lib/src/utils/reunion_parsing.d.ts +2 -2
- package/lib/src/utils/reunion_parsing.js +2 -2
- package/lib/src/utils/scoring.d.ts +3 -3
- package/lib/src/utils/scoring.js +2 -2
- package/lib/src/videos/config.d.ts +1 -1
- package/lib/src/videos/index.d.ts +5 -5
- package/lib/src/videos/index.js +5 -5
- package/lib/src/videos/match.d.ts +2 -2
- package/lib/src/videos/match.js +5 -5
- package/lib/src/videos/pipeline.d.ts +2 -2
- package/lib/src/videos/pipeline.js +4 -4
- package/lib/src/videos/search.d.ts +2 -2
- package/lib/src/videos/search.js +2 -2
- package/lib/tests/incrementalImportSql.test.js +4 -4
- package/lib/tests/prefixedTables.test.js +1 -1
- package/lib/tests/schemaVersion.test.js +1 -1
- package/lib/tests/test_iter_load.test.js +1 -1
- package/lib/tests/validatePrefixedTables.test.js +2 -2
- package/lib/tests/videoMatching.test.js +4 -4
- package/package.json +1 -1
|
@@ -2,16 +2,16 @@ import commandLineArgs from "command-line-args";
|
|
|
2
2
|
import fs from "fs-extra";
|
|
3
3
|
import path from "path";
|
|
4
4
|
import pLimit from "p-limit";
|
|
5
|
-
import * as git from "../git";
|
|
6
|
-
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
|
|
7
|
-
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, ENRICHED_TEXTE_FOLDER, } from "../loaders";
|
|
8
|
-
import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findAllTextes, findAllRapports, } from "../model";
|
|
9
|
-
import { processRapport, processTexte } from "./retrieve_documents";
|
|
10
|
-
import { buildActesLegislatifs } from "../model/dosleg";
|
|
11
|
-
import { UNDEFINED_SESSION } from "../types/sessions";
|
|
12
|
-
import { getSessionFromDate, getSessionFromSignet } from "./datautil";
|
|
13
|
-
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
14
|
-
import { ensureAndClearDir } from "./shared/util";
|
|
5
|
+
import * as git from "../git.js";
|
|
6
|
+
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets.js";
|
|
7
|
+
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, SCRUTINS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, ENRICHED_TEXTE_FOLDER, } from "../loaders.js";
|
|
8
|
+
import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllDossiers, findAllScrutins, findAllOrganismes, findAllQuestions, findAllSens, findAllTextes, findAllRapports, } from "../model/index.js";
|
|
9
|
+
import { processRapport, processTexte } from "./retrieve_documents.js";
|
|
10
|
+
import { buildActesLegislatifs } from "../model/dosleg.js";
|
|
11
|
+
import { UNDEFINED_SESSION } from "../types/sessions.js";
|
|
12
|
+
import { getSessionFromDate, getSessionFromSignet } from "./datautil.js";
|
|
13
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers.js";
|
|
14
|
+
import { ensureAndClearDir } from "./shared/util.js";
|
|
15
15
|
let exitCode = 10; // 0: some data changed, 10: no modification
|
|
16
16
|
const optionsDefinitions = [...commonOptions];
|
|
17
17
|
const options = commandLineArgs(optionsDefinitions);
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import fs from "fs-extra";
|
|
2
2
|
import path from "path";
|
|
3
3
|
import commandLineArgs from "command-line-args";
|
|
4
|
-
import { convertSenatXmlToHtml } from "../conversion_textes";
|
|
4
|
+
import { convertSenatXmlToHtml } from "../conversion_textes.js";
|
|
5
5
|
const optionDefinitions = [
|
|
6
6
|
{ name: "input", alias: "i", type: String, defaultOption: true },
|
|
7
7
|
{ name: "output", alias: "o", type: String },
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Session } from "../types/sessions";
|
|
1
|
+
import { Session } from "../types/sessions.js";
|
|
2
2
|
export declare const STANDARD_DATE_FORMAT = "yyyy-MM-dd";
|
|
3
3
|
export declare const ID_DATE_FORMAT = "yyyyMMdd";
|
|
4
4
|
export declare const AKN_IDENTIFICATION_STRUCTURE_REGEXP: RegExp;
|
|
@@ -2,15 +2,15 @@ import commandLineArgs from "command-line-args";
|
|
|
2
2
|
import fs from "fs-extra";
|
|
3
3
|
import { DateTime } from "luxon";
|
|
4
4
|
import path from "path";
|
|
5
|
-
import * as git from "../git";
|
|
6
|
-
import { AGENDA_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
7
|
-
import { parseAgendaFromFile } from "../model/agenda";
|
|
8
|
-
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
9
|
-
import { ID_DATE_FORMAT } from "./datautil";
|
|
10
|
-
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
11
|
-
import { fetchWithRetry } from "./shared/util";
|
|
12
|
-
import { buildReunionsByBucket } from "../utils/reunion_parsing";
|
|
13
|
-
import { buildSenatDossierIndex } from "../utils/reunion_odj_building";
|
|
5
|
+
import * as git from "../git.js";
|
|
6
|
+
import { AGENDA_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders.js";
|
|
7
|
+
import { parseAgendaFromFile } from "../model/agenda.js";
|
|
8
|
+
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions.js";
|
|
9
|
+
import { ID_DATE_FORMAT } from "./datautil.js";
|
|
10
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers.js";
|
|
11
|
+
import { fetchWithRetry } from "./shared/util.js";
|
|
12
|
+
import { buildReunionsByBucket } from "../utils/reunion_parsing.js";
|
|
13
|
+
import { buildSenatDossierIndex } from "../utils/reunion_odj_building.js";
|
|
14
14
|
const optionsDefinitions = [
|
|
15
15
|
...commonOptions,
|
|
16
16
|
{
|
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import fs, { ensureDir } from "fs-extra";
|
|
2
2
|
import path from "path";
|
|
3
3
|
import * as cheerio from "cheerio";
|
|
4
|
-
import { COMMISSION_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
5
|
-
import { loadAgendaForDate, parseCommissionMetadataFromHtml, linkCRtoCommissionGroup } from "../utils/cr_spliting";
|
|
6
|
-
import { cleanTitle, extractDayH3Sections, parseCommissionCRSectionFromDom } from "../model/commission";
|
|
4
|
+
import { COMMISSION_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders.js";
|
|
5
|
+
import { loadAgendaForDate, parseCommissionMetadataFromHtml, linkCRtoCommissionGroup } from "../utils/cr_spliting.js";
|
|
6
|
+
import { cleanTitle, extractDayH3Sections, parseCommissionCRSectionFromDom } from "../model/commission.js";
|
|
7
7
|
import commandLineArgs from "command-line-args";
|
|
8
|
-
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
9
|
-
import { sessionStartYearFromDate } from "../model/seance";
|
|
10
|
-
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
11
|
-
import { ensureAndClearDir, fetchWithRetry } from "./shared/util";
|
|
12
|
-
import { jaccard, jaccardTokenSim } from "../utils/scoring";
|
|
8
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers.js";
|
|
9
|
+
import { sessionStartYearFromDate } from "../model/seance.js";
|
|
10
|
+
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions.js";
|
|
11
|
+
import { ensureAndClearDir, fetchWithRetry } from "./shared/util.js";
|
|
12
|
+
import { jaccard, jaccardTokenSim } from "../utils/scoring.js";
|
|
13
13
|
import * as git from "../git.js";
|
|
14
14
|
class CommissionCRDownloadError extends Error {
|
|
15
15
|
constructor(message, url) {
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* - downloads the ZIP of comptes-rendus des débats (CRI) from data.senat.fr
|
|
4
4
|
* - extracts XML files, distributes them by session/year
|
|
5
5
|
*/
|
|
6
|
-
import { Session } from "../types/sessions";
|
|
6
|
+
import { Session } from "../types/sessions.js";
|
|
7
7
|
import { CommandLineOptions } from "command-line-args";
|
|
8
8
|
type RetrieveCriOptions = CommandLineOptions & {
|
|
9
9
|
commit?: boolean;
|
|
@@ -8,14 +8,14 @@ import fs, { ensureDirSync } from "fs-extra";
|
|
|
8
8
|
import path from "path";
|
|
9
9
|
import StreamZip from "node-stream-zip";
|
|
10
10
|
import * as cheerio from "cheerio";
|
|
11
|
-
import { AGENDA_FOLDER, COMPTES_RENDUS_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
12
|
-
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
13
|
-
import { parseCompteRenduIntervalFromFile, sessionStartYearFromDate } from "../model/seance";
|
|
14
|
-
import { extractSommaireBlocks, makeReunionUid } from "../utils/reunion_parsing";
|
|
15
|
-
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
16
|
-
import { ensureAndClearDir, fetchWithRetry } from "./shared/util";
|
|
17
|
-
import { isNoiseBlock, scoreSommaireBlockForEvent } from "../utils/scoring";
|
|
18
|
-
import { parseYYYYMMDD } from "../utils/date";
|
|
11
|
+
import { AGENDA_FOLDER, COMPTES_RENDUS_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders.js";
|
|
12
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers.js";
|
|
13
|
+
import { parseCompteRenduIntervalFromFile, sessionStartYearFromDate } from "../model/seance.js";
|
|
14
|
+
import { extractSommaireBlocks, makeReunionUid } from "../utils/reunion_parsing.js";
|
|
15
|
+
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions.js";
|
|
16
|
+
import { ensureAndClearDir, fetchWithRetry } from "./shared/util.js";
|
|
17
|
+
import { isNoiseBlock, scoreSommaireBlockForEvent } from "../utils/scoring.js";
|
|
18
|
+
import { parseYYYYMMDD } from "../utils/date.js";
|
|
19
19
|
import * as git from "../git.js";
|
|
20
20
|
const optionsDefinitions = [
|
|
21
21
|
...commonOptions,
|
|
@@ -2,13 +2,13 @@ import commandLineArgs from "command-line-args";
|
|
|
2
2
|
import fs from "fs-extra";
|
|
3
3
|
import { DateTime } from "luxon";
|
|
4
4
|
import path from "path";
|
|
5
|
-
import { convertSenatXmlToHtml } from "../conversion_textes";
|
|
6
|
-
import * as git from "../git";
|
|
7
|
-
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, ENRICHED_TEXTE_FOLDER, iterLoadSenatRapportUrls, iterLoadSenatTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, } from "../loaders";
|
|
8
|
-
import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../parsers/texte";
|
|
9
|
-
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
10
|
-
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
11
|
-
import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue } from "./shared/util";
|
|
5
|
+
import { convertSenatXmlToHtml } from "../conversion_textes.js";
|
|
6
|
+
import * as git from "../git.js";
|
|
7
|
+
import { DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER, ENRICHED_TEXTE_FOLDER, iterLoadSenatRapportUrls, iterLoadSenatTexteUrls, RAPPORT_FOLDER, TEXTE_FOLDER, } from "../loaders.js";
|
|
8
|
+
import { parseExposeDesMotifs, parseTexte, parseTexteFromFile } from "../parsers/texte.js";
|
|
9
|
+
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions.js";
|
|
10
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers.js";
|
|
11
|
+
import { ensureAndClearDir, fetchWithRetry, isOptionEmptyOrHasValue } from "./shared/util.js";
|
|
12
12
|
let exitCode = 10; // 0: some data changed, 10: no modification
|
|
13
13
|
const optionsDefinitions = [
|
|
14
14
|
...commonOptions,
|
|
@@ -10,14 +10,14 @@ import readline from "readline";
|
|
|
10
10
|
import { pipeline, Readable } from "stream";
|
|
11
11
|
import { promisify } from "util";
|
|
12
12
|
import * as windows1252 from "windows-1252";
|
|
13
|
-
import config from "../config";
|
|
14
|
-
import { getChosenDatasets, getEnabledDatasets } from "../datasets";
|
|
15
|
-
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
16
|
-
import { buildIncrementalDatasetImportSql, buildNormalizeStagingSchemaSql } from "./shared/incremental_import_sql";
|
|
17
|
-
import { buildGeneratedTableManifest, getGeneratedDefinitionPath, getGeneratedTableManifestPath, prefixedName, rawTypesDir, senatSchemaName, stagingSchemaName, stripDatasetPrefix, } from "./shared/prefixed_tables";
|
|
18
|
-
import { buildEnsureSchemaVersionTableSql, buildIncrementSchemaVersionSql, buildSchemaStructureFingerprintQuery, } from "./shared/schema_version";
|
|
19
|
-
import { buildExportStagingMetadataStatementsQuery } from "./shared/staging_metadata_sql";
|
|
20
|
-
import { isCopyFromStdinLine, rewriteLineForStagingImport } from "./shared/staging_import";
|
|
13
|
+
import config from "../config.js";
|
|
14
|
+
import { getChosenDatasets, getEnabledDatasets } from "../datasets.js";
|
|
15
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers.js";
|
|
16
|
+
import { buildIncrementalDatasetImportSql, buildNormalizeStagingSchemaSql } from "./shared/incremental_import_sql.js";
|
|
17
|
+
import { buildGeneratedTableManifest, getGeneratedDefinitionPath, getGeneratedTableManifestPath, prefixedName, rawTypesDir, senatSchemaName, stagingSchemaName, stripDatasetPrefix, } from "./shared/prefixed_tables.js";
|
|
18
|
+
import { buildEnsureSchemaVersionTableSql, buildIncrementSchemaVersionSql, buildSchemaStructureFingerprintQuery, } from "./shared/schema_version.js";
|
|
19
|
+
import { buildExportStagingMetadataStatementsQuery } from "./shared/staging_metadata_sql.js";
|
|
20
|
+
import { isCopyFromStdinLine, rewriteLineForStagingImport } from "./shared/staging_import.js";
|
|
21
21
|
const badWindows1252CharacterRegex = /[\u0080-\u009f]/g;
|
|
22
22
|
const optionsDefinitions = [
|
|
23
23
|
...commonOptions,
|
|
@@ -6,10 +6,10 @@ import path from "path";
|
|
|
6
6
|
import { fileURLToPath } from "url";
|
|
7
7
|
// import stream from "stream"
|
|
8
8
|
// import util from "util"
|
|
9
|
-
import * as git from "../git";
|
|
10
|
-
import { findActif as findActifSenateurs } from "../model/sens";
|
|
11
|
-
import { slugify } from "../strings";
|
|
12
|
-
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
9
|
+
import * as git from "../git.js";
|
|
10
|
+
import { findActif as findActifSenateurs } from "../model/sens.js";
|
|
11
|
+
import { slugify } from "../strings.js";
|
|
12
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers.js";
|
|
13
13
|
const optionsDefinitions = [
|
|
14
14
|
...commonOptions,
|
|
15
15
|
{
|
|
@@ -4,16 +4,16 @@ import fs from "fs-extra";
|
|
|
4
4
|
import fsp from "fs/promises";
|
|
5
5
|
import path from "path";
|
|
6
6
|
import * as git from "../git.js";
|
|
7
|
-
import { AGENDA_FOLDER, iterLoadSenatAgendas } from "../loaders";
|
|
8
|
-
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions";
|
|
9
|
-
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers";
|
|
10
|
-
import { getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs } from "../utils/nvs-parsing";
|
|
11
|
-
import { epochToParisDateTime, isAmbiguousTimeOriginal, toTargetEpoch } from "../utils/date";
|
|
7
|
+
import { AGENDA_FOLDER, iterLoadSenatAgendas } from "../loaders.js";
|
|
8
|
+
import { getSessionsFromStart, UNDEFINED_SESSION } from "../types/sessions.js";
|
|
9
|
+
import { assertExistingDirectory, commonOptions } from "./shared/cli_helpers.js";
|
|
10
|
+
import { getAgendaSegmentTimecodes, buildSenatVodMasterM3u8FromNvs } from "../utils/nvs-parsing.js";
|
|
11
|
+
import { epochToParisDateTime, isAmbiguousTimeOriginal, toTargetEpoch } from "../utils/date.js";
|
|
12
12
|
import { pathToFileURL } from "url";
|
|
13
|
-
import { fetchCandidatesForAgenda, fetchText } from "../videos/search";
|
|
14
|
-
import { matchAgendaToVideo } from "../videos/match";
|
|
15
|
-
import { SENAT_DATAS_ROOT, STATS, VIDEOS_ROOT_FOLDER, weights } from "../videos/config";
|
|
16
|
-
import { processBisIfNeeded, processOneReunionMatch, writeIfChanged } from "../videos";
|
|
13
|
+
import { fetchCandidatesForAgenda, fetchText } from "../videos/search.js";
|
|
14
|
+
import { matchAgendaToVideo } from "../videos/match.js";
|
|
15
|
+
import { SENAT_DATAS_ROOT, STATS, VIDEOS_ROOT_FOLDER, weights } from "../videos/config.js";
|
|
16
|
+
import { processBisIfNeeded, processOneReunionMatch, writeIfChanged } from "../videos/index.js";
|
|
17
17
|
const optionsDefinitions = [...commonOptions];
|
|
18
18
|
const options = commandLineArgs(optionsDefinitions);
|
|
19
19
|
let exitCode = 10; // 0: some data changed, 10: no modification
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import type { Dataset } from "../../datasets";
|
|
1
|
+
import type { Dataset } from "../../datasets.js";
|
|
2
2
|
export declare function isCopyFromStdinLine(line: string): boolean;
|
|
3
3
|
export declare function rewriteLineForStagingImport(line: string, dataset: Dataset, stagingSchema: string, inCopyData?: boolean): string;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import commandLineArgs from "command-line-args";
|
|
2
2
|
import fs from "fs-extra";
|
|
3
|
-
import { sql } from "../databases_postgres";
|
|
4
|
-
import { getChosenDatasets, getEnabledDatasets } from "../datasets";
|
|
5
|
-
import { categoriesOption, silentOption } from "./shared/cli_helpers";
|
|
6
|
-
import { extractPrefixedTableNamesFromGeneratedManifest, getGeneratedTableManifestPath, senatSchemaName, } from "./shared/prefixed_tables";
|
|
3
|
+
import { sql } from "../databases_postgres.js";
|
|
4
|
+
import { getChosenDatasets, getEnabledDatasets } from "../datasets.js";
|
|
5
|
+
import { categoriesOption, silentOption } from "./shared/cli_helpers.js";
|
|
6
|
+
import { extractPrefixedTableNamesFromGeneratedManifest, getGeneratedTableManifestPath, senatSchemaName, } from "./shared/prefixed_tables.js";
|
|
7
7
|
const optionsDefinitions = [categoriesOption, silentOption];
|
|
8
8
|
const options = commandLineArgs(optionsDefinitions);
|
|
9
9
|
async function listPrefixedSenatTables(dataset) {
|
package/lib/src/types/ameli.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { TxtAmeli } from "../raw_types/ameli";
|
|
2
|
-
import { Sub } from "../raw_types/ameli";
|
|
1
|
+
import { TxtAmeli } from "../raw_types/ameli.js";
|
|
2
|
+
import { Sub } from "../raw_types/ameli.js";
|
|
3
3
|
export interface TxtAmeliCustom extends TxtAmeli {
|
|
4
4
|
subids?: Sub["id"][];
|
|
5
5
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { Ass, Aud as AudRaw, Auteur as AuteurRaw, DateSeance as DateSeanceRaw, Deccoc, Denrap, Docatt as DocattRaw, Ecr as EcrRaw, Etaloi, Lecass as LecassRaw, Lecassrap as LecassrapRaw, Lecture as LectureRaw, Loi as LoiRaw, Org, Oritxt, Qua, Rap as RapRaw, Raporg, Scr, Texte as TexteRaw, Typatt, Typlec, Typloi, Typtxt, Typurl } from "../raw_types/dosleg";
|
|
2
|
-
import { TxtAmeli } from "../raw_types/ameli";
|
|
3
|
-
import { Debats } from "../raw_types/debats";
|
|
4
|
-
import { TxtAmeliCustom } from "./ameli";
|
|
1
|
+
import { Ass, Aud as AudRaw, Auteur as AuteurRaw, DateSeance as DateSeanceRaw, Deccoc, Denrap, Docatt as DocattRaw, Ecr as EcrRaw, Etaloi, Lecass as LecassRaw, Lecassrap as LecassrapRaw, Lecture as LectureRaw, Loi as LoiRaw, Org, Oritxt, Qua, Rap as RapRaw, Raporg, Scr, Texte as TexteRaw, Typatt, Typlec, Typloi, Typtxt, Typurl } from "../raw_types/dosleg.js";
|
|
2
|
+
import { TxtAmeli } from "../raw_types/ameli.js";
|
|
3
|
+
import { Debats } from "../raw_types/debats.js";
|
|
4
|
+
import { TxtAmeliCustom } from "./ameli.js";
|
|
5
5
|
export type { Deccoc as DecCoc, Denrap as DenRap, Etaloi as EtaLoi, Oritxt as OriTxt, Raporg as RapOrg, Typatt as TypAtt, Typlec as TypLec, Typloi as TypLoi, Typtxt as TypTxt, Typurl as TypUrl, };
|
|
6
6
|
export interface Aud extends AudRaw {
|
|
7
7
|
org?: Org;
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { TamQuestions } from "../raw_types/questions";
|
|
1
|
+
import { TamQuestions } from "../raw_types/questions.js";
|
|
2
2
|
export type { TamQuestions as Question };
|
package/lib/src/types/texte.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import path from "path";
|
|
2
2
|
import * as cheerio from "cheerio";
|
|
3
|
-
import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
3
|
+
import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders.js";
|
|
4
4
|
import fs from "fs-extra";
|
|
5
|
-
import { sessionStartYearFromDate } from "../model/seance";
|
|
6
|
-
import { frDateToISO, hourShortToStartTime } from "./date";
|
|
7
|
-
import { normalizeSpaces } from "./string_cleaning";
|
|
5
|
+
import { sessionStartYearFromDate } from "../model/seance.js";
|
|
6
|
+
import { frDateToISO, hourShortToStartTime } from "./date.js";
|
|
7
|
+
import { normalizeSpaces } from "./string_cleaning.js";
|
|
8
8
|
function extractWeekStartFromHead($) {
|
|
9
9
|
const og = $('meta[property="og:title"]').attr("content") || $("title").text();
|
|
10
10
|
const m = (og ?? "").toLowerCase().match(/semaine du\s+(\d{1,2}\s+\w+\s+\d{4})/i);
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { XMLParser } from "fast-xml-parser";
|
|
2
|
-
import { dice, normalize } from "./scoring";
|
|
3
|
-
import { decodeHtmlEntities } from "./string_cleaning";
|
|
2
|
+
import { dice, normalize } from "./scoring.js";
|
|
3
|
+
import { decodeHtmlEntities } from "./string_cleaning.js";
|
|
4
4
|
const CHAPTER_MATCH_THRESHOLD = 0.5;
|
|
5
5
|
const xmlParser = new XMLParser({
|
|
6
6
|
ignoreAttributes: false,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import commandLineArgs from "command-line-args";
|
|
2
|
-
import { ActeLegislatif, DossierLegislatifResult } from "../model/dosleg";
|
|
3
|
-
import { AgendaEvent, ReunionOdj } from "../types/agenda";
|
|
2
|
+
import { ActeLegislatif, DossierLegislatifResult } from "../model/dosleg.js";
|
|
3
|
+
import { AgendaEvent, ReunionOdj } from "../types/agenda.js";
|
|
4
4
|
type DossierWithActes = DossierLegislatifResult & {
|
|
5
5
|
actes_legislatifs?: ActeLegislatif[] | null;
|
|
6
6
|
};
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { getSessionsFromStart } from "../types/sessions";
|
|
2
|
-
import { iterLoadSenatDossiersLegislatifs } from "../loaders";
|
|
1
|
+
import { getSessionsFromStart } from "../types/sessions.js";
|
|
2
|
+
import { iterLoadSenatDossiersLegislatifs } from "../loaders.js";
|
|
3
3
|
export function buildOdj(events, dossierBySenatUrl) {
|
|
4
4
|
const byObjet = new Map(); // objet -> set de dossier uids
|
|
5
5
|
let codeEtape = null;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { DateTime } from "luxon";
|
|
2
2
|
import type { AnyNode } from "domhandler";
|
|
3
|
-
import { AgendaEvent, Reunion } from "../types/agenda";
|
|
4
|
-
import { DossierLegislatifResult } from "../model/dosleg";
|
|
3
|
+
import { AgendaEvent, Reunion } from "../types/agenda.js";
|
|
4
|
+
import { DossierLegislatifResult } from "../model/dosleg.js";
|
|
5
5
|
import * as cheerio from "cheerio";
|
|
6
6
|
type KnownType = "SP" | "COM" | "MC" | "OD" | "ID";
|
|
7
7
|
type DossierBySenatUrl = Record<string, DossierLegislatifResult>;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { DateTime } from "luxon";
|
|
2
|
-
import { buildOdj } from "./reunion_odj_building";
|
|
3
|
-
import { norm } from "./string_cleaning";
|
|
2
|
+
import { buildOdj } from "./reunion_odj_building.js";
|
|
3
|
+
import { norm } from "./string_cleaning.js";
|
|
4
4
|
const PARIS = "Europe/Paris";
|
|
5
5
|
const STOPWORDS = new Set([
|
|
6
6
|
"de",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { AgendaEvent, Reunion } from "../types/agenda";
|
|
2
|
-
import { VideoScoreSignals, VideoScoreWeights } from "../videos/types";
|
|
3
|
-
import { L1Chapter } from "./nvs-parsing";
|
|
1
|
+
import { AgendaEvent, Reunion } from "../types/agenda.js";
|
|
2
|
+
import { VideoScoreSignals, VideoScoreWeights } from "../videos/types.js";
|
|
3
|
+
import { L1Chapter } from "./nvs-parsing.js";
|
|
4
4
|
export declare function jaccard(a: Set<string>, b: Set<string>): number;
|
|
5
5
|
export declare function jaccardTokenSim(a: string, b: string): number;
|
|
6
6
|
export declare function isNoiseBlock(text: string): boolean;
|
package/lib/src/utils/scoring.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { parseISO } from "./reunion_parsing";
|
|
2
|
-
import { normalizeText } from "./string_cleaning";
|
|
1
|
+
import { parseISO } from "./reunion_parsing.js";
|
|
2
|
+
import { normalizeText } from "./string_cleaning.js";
|
|
3
3
|
export function jaccard(a, b) {
|
|
4
4
|
if (!a.size || !b.size)
|
|
5
5
|
return 0;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
export * from "./types";
|
|
2
|
-
export * from "./search";
|
|
3
|
-
export * from "./match";
|
|
4
|
-
export * from "./config";
|
|
5
|
-
export * from "./pipeline";
|
|
1
|
+
export * from "./types.js";
|
|
2
|
+
export * from "./search.js";
|
|
3
|
+
export * from "./match.js";
|
|
4
|
+
export * from "./config.js";
|
|
5
|
+
export * from "./pipeline.js";
|
package/lib/src/videos/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
export * from "./types";
|
|
2
|
-
export * from "./search";
|
|
3
|
-
export * from "./match";
|
|
4
|
-
export * from "./config";
|
|
5
|
-
export * from "./pipeline";
|
|
1
|
+
export * from "./types.js";
|
|
2
|
+
export * from "./search.js";
|
|
3
|
+
export * from "./match.js";
|
|
4
|
+
export * from "./config.js";
|
|
5
|
+
export * from "./pipeline.js";
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { CommandLineOptions } from "command-line-args";
|
|
2
|
-
import { Reunion } from "../types/agenda";
|
|
3
|
-
import { Candidate, MatchResult, MatchWeights } from "./types";
|
|
2
|
+
import { Reunion } from "../types/agenda.js";
|
|
3
|
+
import { Candidate, MatchResult, MatchWeights } from "./types.js";
|
|
4
4
|
export declare function matchOneReunion(args: {
|
|
5
5
|
agenda: Reunion;
|
|
6
6
|
agendaTs: number | null;
|
package/lib/src/videos/match.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { isAmbiguousTimeOriginal } from "../utils/date";
|
|
2
|
-
import { buildSenatVodMasterM3u8FromNvs, getLevel1Chapters, parseDataNvs } from "../utils/nvs-parsing";
|
|
3
|
-
import { dice, getOrgKey, normalize, scoreVideo } from "../utils/scoring";
|
|
4
|
-
import { SENAT_DATAS_ROOT, weights } from "./config";
|
|
5
|
-
import { fetchBuffer } from "./search";
|
|
1
|
+
import { isAmbiguousTimeOriginal } from "../utils/date.js";
|
|
2
|
+
import { buildSenatVodMasterM3u8FromNvs, getLevel1Chapters, parseDataNvs } from "../utils/nvs-parsing.js";
|
|
3
|
+
import { dice, getOrgKey, normalize, scoreVideo } from "../utils/scoring.js";
|
|
4
|
+
import { SENAT_DATAS_ROOT, weights } from "./config.js";
|
|
5
|
+
import { fetchBuffer } from "./search.js";
|
|
6
6
|
export async function matchOneReunion(args) {
|
|
7
7
|
const { agenda, agendaTs, timeAmbigious, candidates, weights, fetchDataNvs, options } = args;
|
|
8
8
|
if (!options["silent"])
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Reunion } from "../types/agenda";
|
|
2
|
-
import { BestMatch, LastForVideo, MatchContext } from "./types";
|
|
1
|
+
import { Reunion } from "../types/agenda.js";
|
|
2
|
+
import { BestMatch, LastForVideo, MatchContext } from "./types.js";
|
|
3
3
|
import { CommandLineOptions } from "command-line-args";
|
|
4
4
|
type VideoPipelineOptions = CommandLineOptions & {
|
|
5
5
|
silent?: boolean;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
|
|
2
|
-
import { epochToParisDateTime } from "../utils/date";
|
|
3
|
-
import { SENAT_DATAS_ROOT } from "./config";
|
|
4
|
-
import { fetchText } from "./search";
|
|
1
|
+
import { AGENDA_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders.js";
|
|
2
|
+
import { epochToParisDateTime } from "../utils/date.js";
|
|
3
|
+
import { SENAT_DATAS_ROOT } from "./config.js";
|
|
4
|
+
import { fetchText } from "./search.js";
|
|
5
5
|
import fs from "fs-extra";
|
|
6
6
|
import fsp from "fs/promises";
|
|
7
7
|
import path from "path";
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { CommandLineOptions } from "command-line-args";
|
|
2
|
-
import { Reunion } from "../types/agenda";
|
|
3
|
-
import { Candidate, SearchParams } from "./types";
|
|
2
|
+
import { Reunion } from "../types/agenda.js";
|
|
3
|
+
import { Candidate, SearchParams } from "./types.js";
|
|
4
4
|
export declare function fetchText(url: string): Promise<string | null>;
|
|
5
5
|
export declare function fetchBuffer(url: string): Promise<Buffer | null>;
|
|
6
6
|
export declare function getAgendaType(agenda: Reunion): string;
|
package/lib/src/videos/search.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { toFRDate } from "../utils/date";
|
|
2
|
-
import { MAX_CANDIDATES, SENAT_VIDEOS_SEARCH_AJAX } from "./config";
|
|
1
|
+
import { toFRDate } from "../utils/date.js";
|
|
2
|
+
import { MAX_CANDIDATES, SENAT_VIDEOS_SEARCH_AJAX } from "./config.js";
|
|
3
3
|
import * as cheerio from "cheerio";
|
|
4
4
|
export async function fetchText(url) {
|
|
5
5
|
const res = await fetch(url);
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
|
-
import { buildIncrementalDatasetImportSql, buildNormalizeStagingSchemaSql, } from "../src/scripts/shared/incremental_import_sql";
|
|
3
|
-
import { stagingSchemaName } from "../src/scripts/shared/prefixed_tables";
|
|
4
|
-
import { buildExportStagingMetadataStatementsQuery } from "../src/scripts/shared/staging_metadata_sql";
|
|
5
|
-
import { isCopyFromStdinLine, rewriteLineForStagingImport } from "../src/scripts/shared/staging_import";
|
|
2
|
+
import { buildIncrementalDatasetImportSql, buildNormalizeStagingSchemaSql, } from "../src/scripts/shared/incremental_import_sql.js";
|
|
3
|
+
import { stagingSchemaName } from "../src/scripts/shared/prefixed_tables.js";
|
|
4
|
+
import { buildExportStagingMetadataStatementsQuery } from "../src/scripts/shared/staging_metadata_sql.js";
|
|
5
|
+
import { isCopyFromStdinLine, rewriteLineForStagingImport } from "../src/scripts/shared/staging_import.js";
|
|
6
6
|
describe("incremental import SQL", () => {
|
|
7
7
|
it("normalizes staging object names before merge", () => {
|
|
8
8
|
const sql = buildNormalizeStagingSchemaSql("dosleg");
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
|
-
import { buildGeneratedTableManifest, extractPrefixedTableNamesFromGeneratedManifest, prefixedName, senatSchemaName, stagingSchemaName, stripDatasetPrefix, } from "../src/scripts/shared/prefixed_tables";
|
|
2
|
+
import { buildGeneratedTableManifest, extractPrefixedTableNamesFromGeneratedManifest, prefixedName, senatSchemaName, stagingSchemaName, stripDatasetPrefix, } from "../src/scripts/shared/prefixed_tables.js";
|
|
3
3
|
describe("prefixed table helpers", () => {
|
|
4
4
|
it("builds prefixed table and staging schema names", () => {
|
|
5
5
|
expect(prefixedName("dosleg", "texte")).toBe("dosleg_texte");
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
|
-
import { buildEnsureSchemaVersionTableSql, buildIncrementSchemaVersionSql, buildSchemaStructureFingerprintQuery, } from "../src/scripts/shared/schema_version";
|
|
2
|
+
import { buildEnsureSchemaVersionTableSql, buildIncrementSchemaVersionSql, buildSchemaStructureFingerprintQuery, } from "../src/scripts/shared/schema_version.js";
|
|
3
3
|
describe("schema version SQL", () => {
|
|
4
4
|
it("creates the senat.version table and seeds version zero", () => {
|
|
5
5
|
const sql = buildEnsureSchemaVersionTableSql("senat");
|
|
@@ -2,7 +2,7 @@ import fs from "fs";
|
|
|
2
2
|
import os from "os";
|
|
3
3
|
import path from "path";
|
|
4
4
|
import { describe, it, expect } from "vitest";
|
|
5
|
-
import { COMPTES_RENDUS_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatComptesRendusSeances } from "../src/loaders";
|
|
5
|
+
import { COMPTES_RENDUS_FOLDER, DATA_TRANSFORMED_FOLDER, iterLoadSenatComptesRendusSeances } from "../src/loaders.js";
|
|
6
6
|
describe("iterLoadSenatComptesRendusSeances", () => {
|
|
7
7
|
it("loads comptes rendus from transformed/session folder", () => {
|
|
8
8
|
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "senat-cr-"));
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { readFileSync } from "fs";
|
|
2
2
|
import { describe, expect, it } from "vitest";
|
|
3
|
-
import { datasets } from "../src/datasets";
|
|
4
|
-
import { extractPrefixedTableNamesFromGeneratedManifest, getGeneratedTableManifestPath, } from "../src/scripts/shared/prefixed_tables";
|
|
3
|
+
import { datasets } from "../src/datasets.js";
|
|
4
|
+
import { extractPrefixedTableNamesFromGeneratedManifest, getGeneratedTableManifestPath, } from "../src/scripts/shared/prefixed_tables.js";
|
|
5
5
|
describe("generated raw types coverage", () => {
|
|
6
6
|
it("maps each generated dataset definition to prefixed senat tables", () => {
|
|
7
7
|
for (const dataset of Object.values(datasets)) {
|
|
@@ -7,10 +7,10 @@
|
|
|
7
7
|
import { describe, it, expect } from "vitest";
|
|
8
8
|
import * as fs from "node:fs/promises";
|
|
9
9
|
import * as path from "node:path";
|
|
10
|
-
import { isAmbiguousTimeOriginal, toFRDate } from "../src/utils/date";
|
|
11
|
-
import { dice, getOrgKey, normalize, scoreVideo } from "../src/utils/scoring";
|
|
12
|
-
import { buildSenatVodMasterM3u8FromNvs, getLevel1Chapters, parseDataNvs } from "../src/utils/nvs-parsing";
|
|
13
|
-
import { extractCandidatesFromSearchHtml, fetchAllSearchPages, fetchBuffer, getAgendaType, SENAT_DATAS_ROOT, } from "../src/videos";
|
|
10
|
+
import { isAmbiguousTimeOriginal, toFRDate } from "../src/utils/date.js";
|
|
11
|
+
import { dice, getOrgKey, normalize, scoreVideo } from "../src/utils/scoring.js";
|
|
12
|
+
import { buildSenatVodMasterM3u8FromNvs, getLevel1Chapters, parseDataNvs } from "../src/utils/nvs-parsing.js";
|
|
13
|
+
import { extractCandidatesFromSearchHtml, fetchAllSearchPages, fetchBuffer, getAgendaType, SENAT_DATAS_ROOT, } from "../src/videos/index.js";
|
|
14
14
|
const LIVE_CACHE_DIR = path.join(process.cwd(), "tests", ".cache", "video-matching-live");
|
|
15
15
|
const FIXTURES_ROOT = path.join(process.cwd(), "tests", "fixtures", "data");
|
|
16
16
|
const GOLD_PATH = path.join(FIXTURES_ROOT, "expected-video-matching.json");
|