iev 0.4.6 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +3 -45
- data/CLAUDE.md +11 -1
- data/Gemfile +3 -2
- data/iev.gemspec +1 -1
- data/lib/iev/bibliography_builder.rb +87 -0
- data/lib/iev/exporter.rb +33 -0
- data/lib/iev/figure_builder.rb +186 -0
- data/lib/iev/version.rb +1 -1
- data/lib/iev.rb +2 -0
- metadata +6 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a965bb1653227f1b4ba8ebc27e9f3a67950d359cec2f809b46d4e0edebec18e9
|
|
4
|
+
data.tar.gz: 5414e80f45caf460970d75052dc7bd7ec931d697f267de0ea2df6abd6a2209ba
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 805143019d38cf0ed15e6526b8b727e681e8e3361b25d3c58ea71f6b2b4cf66eb3dda9882bcb8f038317bc4f1bd84554f6abdd14e4575774c2572a08b45c8865
|
|
7
|
+
data.tar.gz: 204a790f8cc565859a3abb9f64b137b235a559bbb411f52fee15989b467c5860ff2fe3073c65495f5c28e1e01a251375c533139cac07eaefb7341b5016669d1a
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-06-
|
|
3
|
+
# on 2026-06-18 12:23:48 UTC using RuboCop version 1.86.1.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -11,37 +11,7 @@ Gemspec/RequiredRubyVersion:
|
|
|
11
11
|
Exclude:
|
|
12
12
|
- 'iev.gemspec'
|
|
13
13
|
|
|
14
|
-
# Offense count:
|
|
15
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
16
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
17
|
-
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
18
|
-
Layout/ArgumentAlignment:
|
|
19
|
-
Exclude:
|
|
20
|
-
- 'spec/iev/exporter_spec.rb'
|
|
21
|
-
|
|
22
|
-
# Offense count: 1
|
|
23
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
24
|
-
# Configuration parameters: EnforcedStyleAlignWith.
|
|
25
|
-
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
26
|
-
Layout/BlockAlignment:
|
|
27
|
-
Exclude:
|
|
28
|
-
- 'spec/iev/exporter_spec.rb'
|
|
29
|
-
|
|
30
|
-
# Offense count: 1
|
|
31
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
32
|
-
Layout/BlockEndNewline:
|
|
33
|
-
Exclude:
|
|
34
|
-
- 'spec/iev/exporter_spec.rb'
|
|
35
|
-
|
|
36
|
-
# Offense count: 2
|
|
37
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
38
|
-
# Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
|
|
39
|
-
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
40
|
-
Layout/IndentationWidth:
|
|
41
|
-
Exclude:
|
|
42
|
-
- 'spec/iev/exporter_spec.rb'
|
|
43
|
-
|
|
44
|
-
# Offense count: 65
|
|
14
|
+
# Offense count: 64
|
|
45
15
|
# This cop supports safe autocorrection (--autocorrect).
|
|
46
16
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
47
17
|
# URISchemes: http, https
|
|
@@ -65,7 +35,7 @@ Lint/ShadowedException:
|
|
|
65
35
|
Exclude:
|
|
66
36
|
- 'lib/iev/source_parser.rb'
|
|
67
37
|
|
|
68
|
-
# Offense count:
|
|
38
|
+
# Offense count: 27
|
|
69
39
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
70
40
|
Metrics/AbcSize:
|
|
71
41
|
Exclude:
|
|
@@ -128,15 +98,3 @@ Naming/MethodParameterName:
|
|
|
128
98
|
Naming/VariableNumber:
|
|
129
99
|
Exclude:
|
|
130
100
|
- 'spec/iev/exporter_spec.rb'
|
|
131
|
-
|
|
132
|
-
# Offense count: 5
|
|
133
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
134
|
-
# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
|
|
135
|
-
# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
|
|
136
|
-
# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
|
|
137
|
-
# FunctionalMethods: let, let!, subject, watch
|
|
138
|
-
# AllowedMethods: lambda, proc, it
|
|
139
|
-
Style/BlockDelimiters:
|
|
140
|
-
Exclude:
|
|
141
|
-
- 'lib/iev/exporter.rb'
|
|
142
|
-
- 'spec/iev/exporter_spec.rb'
|
data/CLAUDE.md
CHANGED
|
@@ -33,7 +33,9 @@ This is a Ruby gem (`iev`) for working with the International Electrotechnical V
|
|
|
33
33
|
- `SupersessionParser` — parses the REPLACES field for deprecated term relationships.
|
|
34
34
|
- `SubjectAreas` — manages the IEV subject area/section hierarchy. Bundled `data/subject_areas.yaml` contains the area/section tree. URI scheme: `area-{code}` and `section-{code}`.
|
|
35
35
|
- `SubjectAreaConcepts` — builds area and section hierarchy concepts. Uses `ConceptReference` with proper `ref_type` per `ConceptReferenceType`: `"domain"` for thematic area classification, `"section"` for structural section membership. Sets `ConceptData#domain` to area title text.
|
|
36
|
-
- `Exporter` — full export pipeline (Excel/SQLite → Glossarist YAML). Assigns domain and section `ConceptReference` objects via `domain_references_for`. Uses `Glossarist::DatasetRegister` model for `register.yaml`. Sets `schema_version: "3"` on all exported concepts.
|
|
36
|
+
- `Exporter` — full export pipeline (Excel/SQLite → Glossarist YAML). Assigns domain and section `ConceptReference` objects via `domain_references_for`. Uses `Glossarist::DatasetRegister` model for `register.yaml`. Sets `schema_version: "3"` on all exported concepts. Pipeline order: build → subject areas → section relations → figure extraction → reference enrichment → save concepts → save figures → save bibliography → save register.
|
|
37
|
+
- `FigureBuilder` — destructive extraction pass that hoists AsciiDoc image macros (emitted from SIMG tags by `Utilities`) into dataset-shared `Glossarist::Figure` entities. Rewrites inline text to `{{fig:id, display}}` mentions and adds `FigureReference` entries to `ManagedConceptData#figures`. One Figure entity per unique image file; captions merge across languages.
|
|
38
|
+
- `BibliographyBuilder` — collects unique `(source, id)` pairs from every concept's sources (localized and managed) into a `Glossarist::BibliographyData`. Entry ids are normalized with the same rules as `Glossarist::Validation::BibliographyIndex` so consumers can resolve anchors.
|
|
37
39
|
- `Converter::MathmlToAsciimath` — converts MathML markup to AsciiMath using Plurimath.
|
|
38
40
|
- `Utilities` — HTML processing: converts IEV cross-references (`<a href=IEV...>`) to `{{URN, term}}` format (ID first, display text last), handles figures, images, bold tags, and newline normalization.
|
|
39
41
|
|
|
@@ -45,6 +47,14 @@ Per the concept model's `ConceptReferenceType`:
|
|
|
45
47
|
|
|
46
48
|
Each concept's `ManagedConceptData#domains` contains both refs. `ConceptData#domain` (a `LocalizedString`) holds the section/area title text. The `ManagedConcept#related` array holds `broader`/`narrower` relationships for the hierarchy tree.
|
|
47
49
|
|
|
50
|
+
### V3 Output Artifacts
|
|
51
|
+
|
|
52
|
+
An export produces these files alongside the concepts/ directory:
|
|
53
|
+
- `register.yaml` — `Glossarist::DatasetRegister` with section tree, languages, owner, URN.
|
|
54
|
+
- `bibliography.yaml` — single `bibliography:` key wrapping an array of `BibliographyEntry` objects. Entry `id` is the normalized anchor that `Glossarist::Validation::BibliographyIndex` will resolve against.
|
|
55
|
+
- `figures/{fig-id}.yaml` — one `Glossarist::Figure` per unique image. Each concept carries a `FigureReference` on `ManagedConceptData#figures` and an inline `{{fig:id, display}}` mention in the text where the figure appeared.
|
|
56
|
+
- References on localized concepts are populated by `Glossarist::ConceptEnricher#inject_references`, which scans text for `{{urn:...}}`, `<<xref>>`, and `image::` patterns.
|
|
57
|
+
|
|
48
58
|
### Configuration
|
|
49
59
|
|
|
50
60
|
`Iev.configure` yields a `Config` object with:
|
data/Gemfile
CHANGED
|
@@ -3,11 +3,12 @@
|
|
|
3
3
|
source "https://rubygems.org"
|
|
4
4
|
|
|
5
5
|
# Use local glossarist-ruby when available for development.
|
|
6
|
-
# Otherwise falls back to released gem (requires >= 2.8.
|
|
6
|
+
# Otherwise falls back to released gem (requires >= 2.8.15 for
|
|
7
|
+
# BibliographyData, Figure/NonVerbRep, and ConceptEnricher support).
|
|
7
8
|
if File.directory?(File.expand_path("../glossarist-ruby", __dir__))
|
|
8
9
|
gem "glossarist", path: "../glossarist-ruby"
|
|
9
10
|
else
|
|
10
|
-
gem "glossarist", ">= 2.8.
|
|
11
|
+
gem "glossarist", ">= 2.8.15"
|
|
11
12
|
end
|
|
12
13
|
|
|
13
14
|
gem "benchmark"
|
data/iev.gemspec
CHANGED
|
@@ -23,7 +23,7 @@ Gem::Specification.new do |spec|
|
|
|
23
23
|
|
|
24
24
|
spec.add_dependency "creek", "~> 2.6"
|
|
25
25
|
spec.add_dependency "ferrum", "~> 0.15"
|
|
26
|
-
spec.add_dependency "glossarist", ">= 2.8.
|
|
26
|
+
spec.add_dependency "glossarist", ">= 2.8.15"
|
|
27
27
|
spec.add_dependency "lutaml-model", "~> 0.8.0"
|
|
28
28
|
spec.add_dependency "nokogiri", "~> 1.19"
|
|
29
29
|
spec.add_dependency "plurimath"
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Iev
|
|
4
|
+
# Builds a `Glossarist::BibliographyData` from the sources cited across a
|
|
5
|
+
# concept collection.
|
|
6
|
+
#
|
|
7
|
+
# Each unique `(source, id)` pair from a concept's `ConceptSource#origin`
|
|
8
|
+
# becomes one `BibliographyEntry`. The entry's `id` is the normalized
|
|
9
|
+
# anchor that `Glossarist::Validation::BibliographyIndex` uses for
|
|
10
|
+
# resolution — so the same normalization rules are applied here.
|
|
11
|
+
module BibliographyBuilder
|
|
12
|
+
module_function
|
|
13
|
+
|
|
14
|
+
# @param concepts [Enumerable<Glossarist::ManagedConcept>]
|
|
15
|
+
# @return [Glossarist::BibliographyData]
|
|
16
|
+
def build(concepts)
|
|
17
|
+
entries = collect_entries(concepts)
|
|
18
|
+
Glossarist::BibliographyData.new(entries: entries)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def collect_entries(concepts)
|
|
22
|
+
seen = {}
|
|
23
|
+
concepts.each do |concept|
|
|
24
|
+
concept.localizations.each do |l10n|
|
|
25
|
+
collect_from_l10n(l10n, seen)
|
|
26
|
+
end
|
|
27
|
+
Array(concept.sources).each { |src| add_source_entry(src, seen) }
|
|
28
|
+
end
|
|
29
|
+
seen.values.sort_by(&:id)
|
|
30
|
+
end
|
|
31
|
+
private_class_method :collect_entries
|
|
32
|
+
|
|
33
|
+
def collect_from_l10n(l10n, seen)
|
|
34
|
+
Array(l10n.all_sources).each { |src| add_source_entry(src, seen) }
|
|
35
|
+
end
|
|
36
|
+
private_class_method :collect_from_l10n
|
|
37
|
+
|
|
38
|
+
def add_source_entry(source, seen)
|
|
39
|
+
ref = source_origin_ref(source)
|
|
40
|
+
return unless ref
|
|
41
|
+
|
|
42
|
+
seen[entry_label(ref)] ||= build_entry(ref, source&.origin)
|
|
43
|
+
end
|
|
44
|
+
private_class_method :add_source_entry
|
|
45
|
+
|
|
46
|
+
def source_origin_ref(source)
|
|
47
|
+
ref = source&.origin&.ref
|
|
48
|
+
return unless ref&.source && !ref.source.strip.empty?
|
|
49
|
+
|
|
50
|
+
ref
|
|
51
|
+
end
|
|
52
|
+
private_class_method :source_origin_ref
|
|
53
|
+
|
|
54
|
+
def build_entry(ref, origin)
|
|
55
|
+
label = entry_label(ref)
|
|
56
|
+
Glossarist::BibliographyEntry.new(
|
|
57
|
+
id: normalize_anchor(label),
|
|
58
|
+
reference: label,
|
|
59
|
+
link: origin&.link,
|
|
60
|
+
type: type_for(ref.source),
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
private_class_method :build_entry
|
|
64
|
+
|
|
65
|
+
def entry_label(ref)
|
|
66
|
+
[ref.source, ref.id].compact.join(" ").strip
|
|
67
|
+
end
|
|
68
|
+
private_class_method :entry_label
|
|
69
|
+
|
|
70
|
+
# Mirrors `Glossarist::Validation::BibliographyIndex#normalize_anchor`
|
|
71
|
+
# so the id we write matches what the validator will look up.
|
|
72
|
+
def normalize_anchor(anchor)
|
|
73
|
+
anchor.to_s.gsub(/[ \/:]/, "_").gsub(/__+/, "_").downcase
|
|
74
|
+
end
|
|
75
|
+
private_class_method :normalize_anchor
|
|
76
|
+
|
|
77
|
+
def type_for(source)
|
|
78
|
+
case source.to_s
|
|
79
|
+
when /\A(IEV|VIM|JCGM)/ then "vocabulary"
|
|
80
|
+
when /\AITU/ then "recommendation"
|
|
81
|
+
when /\A(BIPM|BBIPM)/ then "brochure"
|
|
82
|
+
else "standard"
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
private_class_method :type_for
|
|
86
|
+
end
|
|
87
|
+
end
|
data/lib/iev/exporter.rb
CHANGED
|
@@ -56,13 +56,18 @@ module Iev
|
|
|
56
56
|
collection = build_collection(dataset)
|
|
57
57
|
add_subject_area_concepts(collection) if @include_areas
|
|
58
58
|
build_section_narrower_relations(collection) if @include_areas
|
|
59
|
+
figures = FigureBuilder.extract!(collection)
|
|
60
|
+
enrich_references(collection)
|
|
59
61
|
save_collection(collection)
|
|
62
|
+
save_figures(figures)
|
|
63
|
+
save_bibliography(BibliographyBuilder.build(collection))
|
|
60
64
|
save_register
|
|
61
65
|
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
|
|
62
66
|
|
|
63
67
|
@stats = {
|
|
64
68
|
concept_count: collection.count,
|
|
65
69
|
localized_count: localized_count(collection),
|
|
70
|
+
figure_count: figures.length,
|
|
66
71
|
elapsed_seconds: elapsed,
|
|
67
72
|
}
|
|
68
73
|
collection
|
|
@@ -175,6 +180,34 @@ module Iev
|
|
|
175
180
|
collection.save_grouped_concepts_to_files(concepts_dir.to_s)
|
|
176
181
|
end
|
|
177
182
|
|
|
183
|
+
def save_figures(figures)
|
|
184
|
+
return if figures.empty?
|
|
185
|
+
|
|
186
|
+
figures_dir = output_dir.expand_path.join("figures")
|
|
187
|
+
FileUtils.mkdir_p(figures_dir)
|
|
188
|
+
figures.each do |figure|
|
|
189
|
+
path = figures_dir.join("#{figure.id}.yaml")
|
|
190
|
+
File.write(path, figure.to_yaml, encoding: "utf-8")
|
|
191
|
+
end
|
|
192
|
+
puts "Written #{figures.length} figures to figures/" if $stdout.tty?
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def save_bibliography(bibliography)
|
|
196
|
+
return if bibliography.entries.empty?
|
|
197
|
+
|
|
198
|
+
path = output_dir.expand_path.join("bibliography.yaml")
|
|
199
|
+
FileUtils.mkdir_p(path.dirname)
|
|
200
|
+
File.write(path, bibliography.to_yaml, encoding: "utf-8")
|
|
201
|
+
count = bibliography.entries.length
|
|
202
|
+
puts "Written bibliography.yaml with #{count} entries" if $stdout.tty?
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def enrich_references(collection)
|
|
206
|
+
return if collection.none?
|
|
207
|
+
|
|
208
|
+
Glossarist::ConceptEnricher.new.inject_references(collection.to_a)
|
|
209
|
+
end
|
|
210
|
+
|
|
178
211
|
def save_register
|
|
179
212
|
areas = SubjectAreas.all
|
|
180
213
|
sections = build_section_tree(areas)
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Iev
|
|
4
|
+
# Hoists IEV figure references into dataset-shared Figure entities.
|
|
5
|
+
#
|
|
6
|
+
# IEV source data carries figures as inline SIMG tags, which Utilities
|
|
7
|
+
# rewrites to AsciiDoc image macros (+image::/assets/images/parts/{area}/
|
|
8
|
+
# FILE[Figure N - caption]+). This builder walks every concept's
|
|
9
|
+
# localizations, finds those image macros, promotes each to a
|
|
10
|
+
# dataset-shared Glossarist::Figure entity, and rewrites the inline text
|
|
11
|
+
# to a V3 figure mention (+{{fig:id, display}}+).
|
|
12
|
+
#
|
|
13
|
+
# The Figure entity is shared across concepts and languages — captions
|
|
14
|
+
# from different localizations merge into the same {lang => text} hash.
|
|
15
|
+
# The structural link from concept to figure is a FigureReference entry
|
|
16
|
+
# on ManagedConceptData#figures.
|
|
17
|
+
#
|
|
18
|
+
# Extraction is destructive: it mutates DetailedDefinition#content and
|
|
19
|
+
# appends FigureReference entries. Returns the unique Figure entities so
|
|
20
|
+
# the exporter can persist them to figures/{id}.yaml.
|
|
21
|
+
module FigureBuilder
|
|
22
|
+
# URL path prefix emitted by Utilities when converting SIMG tags.
|
|
23
|
+
# Kept in sync with Utilities::IMAGE_PATH_PREFIX (without the macro).
|
|
24
|
+
PATH_PREFIX = "/assets/images/parts"
|
|
25
|
+
private_constant :PATH_PREFIX
|
|
26
|
+
|
|
27
|
+
# Matches AsciiDoc image macros emitted by Utilities#process_simg_figures.
|
|
28
|
+
IMAGE_MACRO_REGEX = /
|
|
29
|
+
image::#{Regexp.escape(PATH_PREFIX)}
|
|
30
|
+
\/(?<area>\d+)\/(?<file>[\w.-]+)\[(?<caption>[^\]]*)\]
|
|
31
|
+
/x
|
|
32
|
+
|
|
33
|
+
# Captures "Figure N" label and the trailing caption text.
|
|
34
|
+
CAPTION_REGEX = /\A(?<label>Figure\s+\d+)\s*[–-]\s*(?<text>.+)\z/m
|
|
35
|
+
|
|
36
|
+
module_function
|
|
37
|
+
|
|
38
|
+
# @param collection [Glossarist::ManagedConceptCollection]
|
|
39
|
+
# @return [Array<Glossarist::Figure>] unique figures, sorted by id
|
|
40
|
+
def extract!(collection)
|
|
41
|
+
figures_by_id = {}
|
|
42
|
+
|
|
43
|
+
collection.each do |concept|
|
|
44
|
+
concept.localizations.each do |l10n|
|
|
45
|
+
process_localization(l10n, concept, figures_by_id)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
figures_by_id.values.sort_by(&:id)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def process_localization(l10n, concept, figures_by_id)
|
|
53
|
+
lang = l10n.data&.language_code
|
|
54
|
+
return unless lang && lang.length == 3
|
|
55
|
+
|
|
56
|
+
Glossarist::ConceptData.detailed_definition_fields.each do |field|
|
|
57
|
+
process_field(l10n, field, lang, concept, figures_by_id)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
private_class_method :process_localization
|
|
61
|
+
|
|
62
|
+
def process_field(l10n, field, lang, concept, figures_by_id)
|
|
63
|
+
l10n.data.public_send(field).each do |dd|
|
|
64
|
+
next unless dd.content&.include?("image::")
|
|
65
|
+
|
|
66
|
+
rewritten, hits = extract_from_text(dd.content, lang)
|
|
67
|
+
next if hits.empty?
|
|
68
|
+
|
|
69
|
+
dd.content = rewritten
|
|
70
|
+
hits.each { |hit| register_figure(hit, concept, figures_by_id) }
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
private_class_method :process_field
|
|
74
|
+
|
|
75
|
+
# @return [Array<(String, Array<Hash>)>] rewritten text and per-match
|
|
76
|
+
# figure descriptors ({ id:, identifier:, caption:, lang:, image: })
|
|
77
|
+
def extract_from_text(text, lang)
|
|
78
|
+
hits = []
|
|
79
|
+
rewritten = text.gsub(IMAGE_MACRO_REGEX) do
|
|
80
|
+
hit = build_hit(Regexp.last_match, lang)
|
|
81
|
+
hits << hit
|
|
82
|
+
mention_for(hit)
|
|
83
|
+
end
|
|
84
|
+
[rewritten, hits]
|
|
85
|
+
end
|
|
86
|
+
private_class_method :extract_from_text
|
|
87
|
+
|
|
88
|
+
def build_hit(match, lang)
|
|
89
|
+
identifier, caption = parse_caption(match[:caption])
|
|
90
|
+
{
|
|
91
|
+
id: figure_id_for(match[:file]),
|
|
92
|
+
identifier: identifier,
|
|
93
|
+
caption: caption,
|
|
94
|
+
lang: lang,
|
|
95
|
+
image: build_image(match[:area], match[:file]),
|
|
96
|
+
}
|
|
97
|
+
end
|
|
98
|
+
private_class_method :build_hit
|
|
99
|
+
|
|
100
|
+
def build_image(area, file)
|
|
101
|
+
Glossarist::FigureImage.new(
|
|
102
|
+
src: "#{PATH_PREFIX}/#{area}/#{file}",
|
|
103
|
+
format: format_for(file),
|
|
104
|
+
)
|
|
105
|
+
end
|
|
106
|
+
private_class_method :build_image
|
|
107
|
+
|
|
108
|
+
def parse_caption(bracket)
|
|
109
|
+
stripped = bracket.to_s.strip
|
|
110
|
+
return [nil, nil] if stripped.empty?
|
|
111
|
+
|
|
112
|
+
if (m = stripped.match(CAPTION_REGEX))
|
|
113
|
+
label = m[:label].gsub(/\s+/, " ")
|
|
114
|
+
[label, m[:text].strip]
|
|
115
|
+
elsif stripped.match?(/\AFigure\s+\d+\z/)
|
|
116
|
+
[stripped, nil]
|
|
117
|
+
else
|
|
118
|
+
[nil, stripped]
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
private_class_method :parse_caption
|
|
122
|
+
|
|
123
|
+
def figure_id_for(file)
|
|
124
|
+
"fig-#{file.sub(/\.[^.]+\z/, '')}"
|
|
125
|
+
end
|
|
126
|
+
private_class_method :figure_id_for
|
|
127
|
+
|
|
128
|
+
def format_for(file)
|
|
129
|
+
File.extname(file).delete_prefix(".").downcase
|
|
130
|
+
end
|
|
131
|
+
private_class_method :format_for
|
|
132
|
+
|
|
133
|
+
def mention_for(hit)
|
|
134
|
+
parts = [hit[:identifier], hit[:caption]].compact
|
|
135
|
+
return "{{fig:#{hit[:id]}}}" if parts.empty?
|
|
136
|
+
|
|
137
|
+
"{{fig:#{hit[:id]}, #{parts.join(' - ')}}}"
|
|
138
|
+
end
|
|
139
|
+
private_class_method :mention_for
|
|
140
|
+
|
|
141
|
+
# Add or merge a figure descriptor into the shared index, and ensure the
|
|
142
|
+
# concept carries a FigureReference to it.
|
|
143
|
+
def register_figure(hit, concept, figures_by_id)
|
|
144
|
+
figure = figures_by_id[hit[:id]] ||= build_figure(hit)
|
|
145
|
+
merge_caption!(figure, hit)
|
|
146
|
+
add_image_if_missing(figure, hit[:image])
|
|
147
|
+
add_figure_reference(concept, hit[:id], hit[:identifier])
|
|
148
|
+
end
|
|
149
|
+
private_class_method :register_figure
|
|
150
|
+
|
|
151
|
+
def build_figure(hit)
|
|
152
|
+
Glossarist::Figure.new(
|
|
153
|
+
id: hit[:id],
|
|
154
|
+
identifier: hit[:identifier],
|
|
155
|
+
images: [],
|
|
156
|
+
caption: {},
|
|
157
|
+
)
|
|
158
|
+
end
|
|
159
|
+
private_class_method :build_figure
|
|
160
|
+
|
|
161
|
+
def merge_caption!(figure, hit)
|
|
162
|
+
return unless hit[:caption]
|
|
163
|
+
|
|
164
|
+
figure.caption ||= {}
|
|
165
|
+
figure.caption[hit[:lang]] ||= hit[:caption]
|
|
166
|
+
end
|
|
167
|
+
private_class_method :merge_caption!
|
|
168
|
+
|
|
169
|
+
def add_image_if_missing(figure, image)
|
|
170
|
+
return if figure.images.any? { |i| i.src == image.src }
|
|
171
|
+
|
|
172
|
+
figure.images << image
|
|
173
|
+
end
|
|
174
|
+
private_class_method :add_image_if_missing
|
|
175
|
+
|
|
176
|
+
def add_figure_reference(concept, figure_id, display)
|
|
177
|
+
refs = Array(concept.data.figures)
|
|
178
|
+
return if refs.any? { |r| r.entity_id == figure_id }
|
|
179
|
+
|
|
180
|
+
concept.data.figures = refs + [
|
|
181
|
+
Glossarist::FigureReference.new(entity_id: figure_id, display: display),
|
|
182
|
+
]
|
|
183
|
+
end
|
|
184
|
+
private_class_method :add_figure_reference
|
|
185
|
+
end
|
|
186
|
+
end
|
data/lib/iev/version.rb
CHANGED
data/lib/iev.rb
CHANGED
|
@@ -21,6 +21,7 @@ module Iev
|
|
|
21
21
|
# IEV dataset URN — single source of truth for all concept references.
|
|
22
22
|
IEV_SOURCE = "urn:iec:std:iec:60050"
|
|
23
23
|
|
|
24
|
+
autoload :BibliographyBuilder, "iev/bibliography_builder"
|
|
24
25
|
autoload :Cli, "iev/cli"
|
|
25
26
|
autoload :Config, "iev/config"
|
|
26
27
|
autoload :Converter, "iev/converter"
|
|
@@ -28,6 +29,7 @@ module Iev
|
|
|
28
29
|
autoload :DataSource, "iev/data_source"
|
|
29
30
|
autoload :DbWriter, "iev/db_writer"
|
|
30
31
|
autoload :Exporter, "iev/exporter"
|
|
32
|
+
autoload :FigureBuilder, "iev/figure_builder"
|
|
31
33
|
autoload :IevCode, "iev/iev_code"
|
|
32
34
|
autoload :Iso639Code, "iev/iso_639_code"
|
|
33
35
|
autoload :Profiler, "iev/profiler"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: iev
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-18 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: creek
|
|
@@ -44,14 +44,14 @@ dependencies:
|
|
|
44
44
|
requirements:
|
|
45
45
|
- - ">="
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: 2.8.
|
|
47
|
+
version: 2.8.15
|
|
48
48
|
type: :runtime
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
52
|
- - ">="
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: 2.8.
|
|
54
|
+
version: 2.8.15
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
56
|
name: lutaml-model
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -195,6 +195,7 @@ files:
|
|
|
195
195
|
- exe/iev
|
|
196
196
|
- iev.gemspec
|
|
197
197
|
- lib/iev.rb
|
|
198
|
+
- lib/iev/bibliography_builder.rb
|
|
198
199
|
- lib/iev/cli.rb
|
|
199
200
|
- lib/iev/cli/command.rb
|
|
200
201
|
- lib/iev/cli/command_helper.rb
|
|
@@ -206,6 +207,7 @@ files:
|
|
|
206
207
|
- lib/iev/data_source.rb
|
|
207
208
|
- lib/iev/db_writer.rb
|
|
208
209
|
- lib/iev/exporter.rb
|
|
210
|
+
- lib/iev/figure_builder.rb
|
|
209
211
|
- lib/iev/iev_code.rb
|
|
210
212
|
- lib/iev/iso_639_2.yaml
|
|
211
213
|
- lib/iev/iso_639_code.rb
|