@tricoteuses/tisseuse 0.7.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +6 -2
- package/README.md +2 -2
- package/dist/html-B_uf_8Zi.js +12736 -0
- package/dist/index.js +1260 -5179
- package/dist/lib/alineas/alineas_numbers.d.ts +5 -0
- package/dist/lib/extractors/action_directives.d.ts +54 -0
- package/dist/lib/extractors/article_portions.d.ts +59 -0
- package/dist/lib/extractors/definitions.d.ts +3 -0
- package/dist/lib/{text_parsers → extractors}/links.d.ts +35 -20
- package/dist/lib/extractors/references.d.ts +8 -0
- package/dist/lib/{table_of_contents.d.ts → extractors/table_of_contents.d.ts} +10 -2
- package/dist/lib/index.d.ts +18 -6
- package/dist/lib/linkers/html.d.ts +45 -0
- package/dist/lib/linkers/markdown.d.ts +15 -0
- package/dist/lib/markdown.d.ts +1 -0
- package/dist/lib/server/alineas/alineas_images_utils.d.ts +88 -0
- package/dist/lib/server/config.d.ts +1 -1
- package/dist/lib/server/databases/index.d.ts +1 -5
- package/dist/lib/server/html_simplifier.d.ts +34 -0
- package/dist/lib/server/index.d.ts +3 -0
- package/dist/lib/server/linkers/html.d.ts +21 -0
- package/dist/lib/server/tables_of_contents.d.ts +2 -0
- package/dist/lib/text_parsers/actions.d.ts +11 -1
- package/dist/lib/text_parsers/articles.d.ts +15 -15
- package/dist/lib/text_parsers/ast.d.ts +8 -3
- package/dist/lib/text_parsers/citations.d.ts +4 -4
- package/dist/lib/text_parsers/dates.d.ts +10 -10
- package/dist/lib/text_parsers/divisions.d.ts +13 -13
- package/dist/lib/text_parsers/helpers.d.ts +2 -1
- package/dist/lib/text_parsers/numbers.d.ts +16 -15
- package/dist/lib/text_parsers/parsers.d.ts +17 -1
- package/dist/lib/text_parsers/portions.d.ts +25 -19
- package/dist/lib/text_parsers/prepositions.d.ts +7 -6
- package/dist/lib/text_parsers/references.d.ts +17 -15
- package/dist/lib/text_parsers/relative_locations.d.ts +9 -9
- package/dist/lib/text_parsers/search_queries.d.ts +7 -0
- package/dist/lib/text_parsers/search_queries.test.d.ts +1 -0
- package/dist/lib/text_parsers/separators.d.ts +3 -3
- package/dist/lib/text_parsers/texts.d.ts +14 -14
- package/dist/lib/text_parsers/transformers.d.ts +3 -2
- package/dist/lib/text_parsers/typography.d.ts +9 -9
- package/dist/scripts/add_links_to_senat_parsed_documents.d.ts +1 -0
- package/dist/scripts/{link_table_of_contents_to_simplified_html.d.ts → add_positions_to_table_of_contents.d.ts} +1 -13
- package/dist/scripts/enrich_assemblee_documents.d.ts +1 -0
- package/dist/scripts/extract_alineas_images.d.ts +71 -0
- package/dist/scripts/generate_alineas_numbers_review.d.ts +24 -0
- package/dist/scripts/{extract_texts_infos.d.ts → index_typesense.d.ts} +1 -0
- package/dist/scripts/merge_reviewed_alineas_numbers.d.ts +16 -0
- package/dist/scripts/simplify_word_html.d.ts +1 -0
- package/dist/server.js +19498 -74
- package/package.json +25 -17
- package/dist/lib/server/databases/tisseuse.d.ts +0 -1
- package/dist/lib/text_parsers/index.d.ts +0 -8
- package/dist/lib/text_parsers/text_titles_infos.json.d.ts +0 -4
- package/dist/scripts/configure.d.ts +0 -1
- package/dist/scripts/extract_assemblee_dossiers_et_documents_infos.d.ts +0 -6
- package/dist/scripts/test.d.ts +0 -0
- /package/dist/lib/{text_parsers/index.test.d.ts → extractors/action_directives.test.d.ts} +0 -0
- /package/dist/lib/{text_parsers/links.test.d.ts → extractors/article_portions.test.d.ts} +0 -0
- /package/dist/{scripts/add_links_to_assemblee_documents.d.ts → lib/extractors/links.test.d.ts} +0 -0
- /package/dist/{scripts/etude_natures_dates_et_titres_textes.d.ts → lib/extractors/references.test.d.ts} +0 -0
- /package/dist/{scripts/extract_jos_infos.d.ts → lib/server/html_simplifier.test.d.ts} +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#!/usr/bin/env npx ts-node
|
|
2
|
+
/**
|
|
3
|
+
* Script d'extraction des images d'alinéa depuis les documents de l'Assemblée nationale
|
|
4
|
+
*
|
|
5
|
+
* Ce script parcourt tous les fichiers HTML des documents et extrait toutes les
|
|
6
|
+
* informations sur les pastilles d'alinéa dans un fichier JSON intermédiaire.
|
|
7
|
+
*
|
|
8
|
+
* Ce fichier JSON est ensuite utilisé par :
|
|
9
|
+
* - `merge_reviewed_alineas_numbers.ts` : pour fusionner les corrections manuelles
|
|
10
|
+
* - `generate_alineas_numbers_review.ts` : pour générer la page HTML de révision
|
|
11
|
+
*
|
|
12
|
+
* ## Usage
|
|
13
|
+
*
|
|
14
|
+
* ```bash
|
|
15
|
+
* npx tsx src/scripts/extract_alineas_images.ts [chemin_vers_documents] [--output fichier.json]
|
|
16
|
+
* ```
|
|
17
|
+
*
|
|
18
|
+
* Par défaut :
|
|
19
|
+
* - Analyse `/home/eraviart/Projects/tricoteuses/assemblee-data/Documents`
|
|
20
|
+
* - Génère `src/lib/alineas/alineas_images_extracted.json`
|
|
21
|
+
*
|
|
22
|
+
* ## Données extraites
|
|
23
|
+
*
|
|
24
|
+
* Pour chaque image unique (par hash) :
|
|
25
|
+
* - hash MD5, données base64, dimensions
|
|
26
|
+
* - Liste de toutes les occurrences avec contexte (document, article, position, voisins)
|
|
27
|
+
*
|
|
28
|
+
* Statistiques pré-calculées :
|
|
29
|
+
* - Nombre d'occurrences, positions en tête d'article
|
|
30
|
+
* - Transitions (prédécesseurs/successeurs)
|
|
31
|
+
*
|
|
32
|
+
* @module extract_alineas_images
|
|
33
|
+
*/
|
|
34
|
+
/**
|
|
35
|
+
* Une occurrence d'une image dans un document
|
|
36
|
+
*/
|
|
37
|
+
export interface ImageOccurrence {
|
|
38
|
+
documentPath: string;
|
|
39
|
+
articleId: string | null;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Données complètes d'une image unique (par hash)
|
|
43
|
+
*/
|
|
44
|
+
export interface ImageData {
|
|
45
|
+
hash: string;
|
|
46
|
+
base64Src: string;
|
|
47
|
+
dimensions: {
|
|
48
|
+
width: number;
|
|
49
|
+
height: number;
|
|
50
|
+
} | null;
|
|
51
|
+
/** Hash de l'image après crop (suppression des bordures blanches/transparentes) */
|
|
52
|
+
croppedHash: string;
|
|
53
|
+
/** Données base64 de l'image croppée (avec préfixe data:image/png;base64,) */
|
|
54
|
+
croppedBase64Src: string;
|
|
55
|
+
occurrences: ImageOccurrence[];
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Structure complète du fichier JSON extrait
|
|
59
|
+
*/
|
|
60
|
+
export interface ExtractedData {
|
|
61
|
+
metadata: {
|
|
62
|
+
extractionDate: string;
|
|
63
|
+
documentsPath: string;
|
|
64
|
+
documentsAnalyzed: number;
|
|
65
|
+
documentsWithImages: number;
|
|
66
|
+
articlesAnalyzed: number;
|
|
67
|
+
totalOccurrences: number;
|
|
68
|
+
uniqueHashes: number;
|
|
69
|
+
};
|
|
70
|
+
images: Record<string, ImageData>;
|
|
71
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/env npx ts-node
|
|
2
|
+
/**
|
|
3
|
+
* Script de génération d'un fichier HTML de révision des images d'alinéa
|
|
4
|
+
*
|
|
5
|
+
* Ce script lit le fichier JSON généré par `extract_alinea_images.ts` et génère
|
|
6
|
+
* un fichier HTML interactif permettant de visualiser chaque image d'alinéa,
|
|
7
|
+
* son hash MD5, et le numéro d'alinéa détecté pour permettre une correction manuelle.
|
|
8
|
+
*
|
|
9
|
+
* - `extract_alineas_images.ts` : pour extraire les images
|
|
10
|
+
* - `merge_reviewed_alineas_numbers.ts` : pour fusionner les corrections
|
|
11
|
+
*
|
|
12
|
+
* ## Usage
|
|
13
|
+
*
|
|
14
|
+
* ```bash
|
|
15
|
+
* npx tsx src/scripts/generate_alineas_numbers_review.ts [fichier_extrait.json] [--output fichier.html]
|
|
16
|
+
* ```
|
|
17
|
+
*
|
|
18
|
+
* Par défaut :
|
|
19
|
+
* - Lit `src/lib/alineas/alineas_images_extracted.json`
|
|
20
|
+
* - Génère `src/lib/alineas/alinea_review.html`
|
|
21
|
+
*
|
|
22
|
+
* @module generate_alineas_numbers_review
|
|
23
|
+
*/
|
|
24
|
+
export {};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env npx ts-node
|
|
2
|
+
/**
|
|
3
|
+
* Script de fusion des corrections manuelles des images d'alinéa
|
|
4
|
+
*
|
|
5
|
+
* Ce script fusionne les corrections manuelles (`alineas_numbers_reviewed.json`)
|
|
6
|
+
* avec le mapping existant (`alineas_numbers.ts`).
|
|
7
|
+
*
|
|
8
|
+
* ## Usage
|
|
9
|
+
*
|
|
10
|
+
* ```bash
|
|
11
|
+
* npx tsx src/scripts/merge_reviewed_alineas_numbers.ts
|
|
12
|
+
* ```
|
|
13
|
+
*
|
|
14
|
+
* @module merge_reviewed_alineas_numbers
|
|
15
|
+
*/
|
|
16
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|