iev 0.4.5 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +98 -21
- data/CLAUDE.md +17 -5
- data/Gemfile +8 -0
- data/README.adoc +10 -3
- data/iev.gemspec +3 -2
- data/lib/iev/cli/command.rb +3 -2
- data/lib/iev/cli/ui.rb +5 -5
- data/lib/iev/config.rb +1 -15
- data/lib/iev/data_source.rb +4 -2
- data/lib/iev/db_writer.rb +1 -0
- data/lib/iev/exporter.rb +106 -21
- data/lib/iev/iso_639_code.rb +2 -1
- data/lib/iev/relaton_db.rb +1 -1
- data/lib/iev/scraper/browser.rb +90 -88
- data/lib/iev/scraper.rb +5 -4
- data/lib/iev/source_parser.rb +9 -10
- data/lib/iev/subject_area_concepts.rb +36 -33
- data/lib/iev/subject_areas.rb +9 -11
- data/lib/iev/term_attrs_parser.rb +1 -1
- data/lib/iev/term_builder.rb +14 -9
- data/lib/iev/utilities.rb +29 -1
- data/lib/iev/version.rb +1 -1
- data/lib/iev.rb +28 -6
- metadata +22 -27
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f5d3397238922342cdb3fb6b5adcf8fa969d2b3d6510742485d7d2a20b1d1a12
|
|
4
|
+
data.tar.gz: 6a82e64e1201a9a2f04cff66d024de1b23643030ce6e37f26ef09bb2ad334b10
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: be3b984bd46c791580a73644e88dccca4b98b708ce0e28c50f72e822abb609573455201b633d0e4d791b9271cdecb20a555b11cc674dda46bde90fcf7eaccb74
|
|
7
|
+
data.tar.gz: e56796a853c58aae578dd5c4fe3fbaf3e18115e1a08e22feb7d5f876d9521dee4d27fb734fb05baa12e297b1dc9ac89b67197cad0b51695b2224c3fdb622f3a5
|
data/.rubocop.yml
CHANGED
data/.rubocop_todo.yml
CHANGED
|
@@ -1,65 +1,142 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on
|
|
3
|
+
# on 2026-06-13 04:18:23 UTC using RuboCop version 1.86.1.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
|
8
8
|
|
|
9
|
-
# Offense count:
|
|
9
|
+
# Offense count: 1
|
|
10
|
+
Gemspec/RequiredRubyVersion:
|
|
11
|
+
Exclude:
|
|
12
|
+
- 'iev.gemspec'
|
|
13
|
+
|
|
14
|
+
# Offense count: 2
|
|
10
15
|
# This cop supports safe autocorrection (--autocorrect).
|
|
11
|
-
# Configuration parameters:
|
|
16
|
+
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
17
|
+
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
18
|
+
Layout/ArgumentAlignment:
|
|
19
|
+
Exclude:
|
|
20
|
+
- 'spec/iev/exporter_spec.rb'
|
|
21
|
+
|
|
22
|
+
# Offense count: 1
|
|
23
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
24
|
+
# Configuration parameters: EnforcedStyleAlignWith.
|
|
25
|
+
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
26
|
+
Layout/BlockAlignment:
|
|
27
|
+
Exclude:
|
|
28
|
+
- 'spec/iev/exporter_spec.rb'
|
|
29
|
+
|
|
30
|
+
# Offense count: 1
|
|
31
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
32
|
+
Layout/BlockEndNewline:
|
|
33
|
+
Exclude:
|
|
34
|
+
- 'spec/iev/exporter_spec.rb'
|
|
35
|
+
|
|
36
|
+
# Offense count: 2
|
|
37
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
38
|
+
# Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
|
|
39
|
+
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
40
|
+
Layout/IndentationWidth:
|
|
41
|
+
Exclude:
|
|
42
|
+
- 'spec/iev/exporter_spec.rb'
|
|
43
|
+
|
|
44
|
+
# Offense count: 65
|
|
45
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
46
|
+
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
12
47
|
# URISchemes: http, https
|
|
13
48
|
Layout/LineLength:
|
|
14
|
-
|
|
15
|
-
- 'iev.gemspec'
|
|
16
|
-
- 'lib/iev/cli/command.rb'
|
|
17
|
-
- 'lib/iev/cli/command_helper.rb'
|
|
18
|
-
- 'lib/iev/db_cache.rb'
|
|
19
|
-
- 'lib/iev/source_parser.rb'
|
|
20
|
-
- 'spec/acceptance/db2yaml_spec.rb'
|
|
21
|
-
- 'spec/acceptance/xlsx2yaml_spec.rb'
|
|
22
|
-
- 'spec/iev_spec.rb'
|
|
49
|
+
Enabled: false
|
|
23
50
|
|
|
24
|
-
# Offense count:
|
|
51
|
+
# Offense count: 19
|
|
25
52
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
|
26
53
|
Lint/DuplicateBranch:
|
|
27
54
|
Exclude:
|
|
28
55
|
- 'lib/iev/source_parser.rb'
|
|
29
|
-
- 'lib/iev/
|
|
56
|
+
- 'lib/iev/utilities.rb'
|
|
30
57
|
|
|
31
58
|
# Offense count: 1
|
|
32
59
|
Lint/MixedRegexpCaptureTypes:
|
|
33
60
|
Exclude:
|
|
34
61
|
- 'lib/iev/term_builder.rb'
|
|
35
62
|
|
|
36
|
-
# Offense count:
|
|
63
|
+
# Offense count: 2
|
|
64
|
+
Lint/ShadowedException:
|
|
65
|
+
Exclude:
|
|
66
|
+
- 'lib/iev/source_parser.rb'
|
|
67
|
+
|
|
68
|
+
# Offense count: 26
|
|
37
69
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
38
70
|
Metrics/AbcSize:
|
|
39
71
|
Exclude:
|
|
72
|
+
- 'lib/iev/cli/command.rb'
|
|
73
|
+
- 'lib/iev/cli/command_helper.rb'
|
|
40
74
|
- 'lib/iev/converter/mathml_to_asciimath.rb'
|
|
41
75
|
- 'lib/iev/db_writer.rb'
|
|
76
|
+
- 'lib/iev/exporter.rb'
|
|
77
|
+
- 'lib/iev/scraper/page_parser.rb'
|
|
42
78
|
- 'lib/iev/source_parser.rb'
|
|
79
|
+
- 'lib/iev/subject_area_concepts.rb'
|
|
80
|
+
- 'lib/iev/subject_areas.rb'
|
|
43
81
|
- 'lib/iev/term_builder.rb'
|
|
82
|
+
- 'lib/iev/utilities.rb'
|
|
83
|
+
- 'spec/iev/supersession_parser_spec.rb'
|
|
44
84
|
|
|
45
|
-
# Offense count:
|
|
85
|
+
# Offense count: 15
|
|
46
86
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
47
87
|
Metrics/CyclomaticComplexity:
|
|
48
88
|
Exclude:
|
|
49
|
-
- 'lib/iev.rb'
|
|
50
89
|
- 'lib/iev/converter/mathml_to_asciimath.rb'
|
|
90
|
+
- 'lib/iev/exporter.rb'
|
|
91
|
+
- 'lib/iev/scraper/page_parser.rb'
|
|
51
92
|
- 'lib/iev/source_parser.rb'
|
|
93
|
+
- 'lib/iev/subject_areas.rb'
|
|
52
94
|
- 'lib/iev/term_builder.rb'
|
|
95
|
+
- 'lib/iev/utilities.rb'
|
|
53
96
|
|
|
54
|
-
# Offense count:
|
|
97
|
+
# Offense count: 36
|
|
55
98
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
56
99
|
Metrics/MethodLength:
|
|
57
|
-
Max:
|
|
100
|
+
Max: 73
|
|
101
|
+
|
|
102
|
+
# Offense count: 1
|
|
103
|
+
# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
|
|
104
|
+
Metrics/ParameterLists:
|
|
105
|
+
Max: 7
|
|
58
106
|
|
|
59
|
-
# Offense count:
|
|
107
|
+
# Offense count: 8
|
|
60
108
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
61
109
|
Metrics/PerceivedComplexity:
|
|
62
110
|
Exclude:
|
|
63
|
-
- 'lib/iev.rb'
|
|
64
111
|
- 'lib/iev/converter/mathml_to_asciimath.rb'
|
|
112
|
+
- 'lib/iev/exporter.rb'
|
|
113
|
+
- 'lib/iev/scraper/page_parser.rb'
|
|
114
|
+
- 'lib/iev/subject_areas.rb'
|
|
65
115
|
- 'lib/iev/term_builder.rb'
|
|
116
|
+
|
|
117
|
+
# Offense count: 1
|
|
118
|
+
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
|
119
|
+
# AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
|
|
120
|
+
Naming/MethodParameterName:
|
|
121
|
+
Exclude:
|
|
122
|
+
- 'lib/iev/subject_areas.rb'
|
|
123
|
+
|
|
124
|
+
# Offense count: 2
|
|
125
|
+
# Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
|
|
126
|
+
# SupportedStyles: snake_case, normalcase, non_integer
|
|
127
|
+
# AllowedIdentifiers: TLS1_1, TLS1_2, capture3, iso8601, rfc1123_date, rfc822, rfc2822, rfc3339, x86_64
|
|
128
|
+
Naming/VariableNumber:
|
|
129
|
+
Exclude:
|
|
130
|
+
- 'spec/iev/exporter_spec.rb'
|
|
131
|
+
|
|
132
|
+
# Offense count: 5
|
|
133
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
134
|
+
# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
|
|
135
|
+
# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
|
|
136
|
+
# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
|
|
137
|
+
# FunctionalMethods: let, let!, subject, watch
|
|
138
|
+
# AllowedMethods: lambda, proc, it
|
|
139
|
+
Style/BlockDelimiters:
|
|
140
|
+
Exclude:
|
|
141
|
+
- 'lib/iev/exporter.rb'
|
|
142
|
+
- 'spec/iev/exporter_spec.rb'
|
data/CLAUDE.md
CHANGED
|
@@ -27,15 +27,23 @@ This is a Ruby gem (`iev`) for working with the International Electrotechnical V
|
|
|
27
27
|
|
|
28
28
|
### Key Modules
|
|
29
29
|
|
|
30
|
-
- `TermBuilder` — the core converter that turns a spreadsheet row into a `Glossarist::LocalizedConcept`. Handles definition splitting (notes/examples extraction), term designation parsing, and source parsing.
|
|
30
|
+
- `TermBuilder` — the core converter that turns a spreadsheet row into a `Glossarist::LocalizedConcept`. Handles definition splitting (notes/examples extraction), term designation parsing, and source parsing. Sets `ConceptData#domain` to section/area title text (not URI).
|
|
31
31
|
- `SourceParser` — parses the SOURCE column from IEV exports, normalizing references (CEI→IEC, UIT→ITU, etc.) and extracting ref/clause/relationship using extensive regex matching.
|
|
32
|
-
- `TermAttrsParser` — parses the TERMATTRIBUTE field (gender, plurality, part of speech, geographical area,
|
|
32
|
+
- `TermAttrsParser` — parses the TERMATTRIBUTE field (gender, plurality, part of speech, geographical area, usage_info).
|
|
33
33
|
- `SupersessionParser` — parses the REPLACES field for deprecated term relationships.
|
|
34
34
|
- `SubjectAreas` — manages the IEV subject area/section hierarchy. Bundled `data/subject_areas.yaml` contains the area/section tree. URI scheme: `area-{code}` and `section-{code}`.
|
|
35
|
-
- `SubjectAreaConcepts` — builds area and section hierarchy concepts
|
|
36
|
-
- `Exporter` — full export pipeline (Excel/SQLite → Glossarist YAML). Assigns domain `ConceptReference` objects via `domain_references_for`.
|
|
35
|
+
- `SubjectAreaConcepts` — builds area and section hierarchy concepts. Uses `ConceptReference` with proper `ref_type` per `ConceptReferenceType`: `"domain"` for thematic area classification, `"section"` for structural section membership. Sets `ConceptData#domain` to area title text.
|
|
36
|
+
- `Exporter` — full export pipeline (Excel/SQLite → Glossarist YAML). Assigns domain and section `ConceptReference` objects via `domain_references_for`. Uses `Glossarist::DatasetRegister` model for `register.yaml`. Sets `schema_version: "3"` on all exported concepts.
|
|
37
37
|
- `Converter::MathmlToAsciimath` — converts MathML markup to AsciiMath using Plurimath.
|
|
38
|
-
- `Utilities` — HTML processing: converts IEV cross-references (`<a href=IEV...>`) to `{{
|
|
38
|
+
- `Utilities` — HTML processing: converts IEV cross-references (`<a href=IEV...>`) to `{{URN, term}}` format (ID first, display text last), handles figures, images, bold tags, and newline normalization.
|
|
39
|
+
|
|
40
|
+
### Domain/Section Model
|
|
41
|
+
|
|
42
|
+
Per the concept model's `ConceptReferenceType`:
|
|
43
|
+
- `"domain"` — thematic/subject-area classification (area level, e.g. "103")
|
|
44
|
+
- `"section"` — structural section membership (section level, e.g. "103-01")
|
|
45
|
+
|
|
46
|
+
Each concept's `ManagedConceptData#domains` contains both refs. `ConceptData#domain` (a `LocalizedString`) holds the section/area title text. The `ManagedConcept#related` array holds `broader`/`narrower` relationships for the hierarchy tree.
|
|
39
47
|
|
|
40
48
|
### Configuration
|
|
41
49
|
|
|
@@ -47,7 +55,11 @@ This is a Ruby gem (`iev`) for working with the International Electrotechnical V
|
|
|
47
55
|
## Key Conventions
|
|
48
56
|
|
|
49
57
|
- Ruby >= 3.1.0 required
|
|
58
|
+
- All constants live under `Iev::` namespace (e.g. `Iev::IEV_SOURCE`, not top-level `IEV_SOURCE`)
|
|
59
|
+
- `Iev.config` / `Iev.configure` / `Iev.reset_config!` are defined directly in `lib/iev.rb` — they must be available at load time without triggering autoload
|
|
50
60
|
- `plurimath` and `unitsml` are optional runtime dependencies — loaded with `rescue LoadError`, so the `DataSource`/`Db` APIs work without them
|
|
51
61
|
- The IEV Excel export format is specific to IEC-internal use; column structure is documented in README.adoc
|
|
52
62
|
- Language codes: the spreadsheet uses ISO 639-1 (2-char like "en"), internally converted to ISO 639-2/3 (3-char like "eng") via `Iso639Code` and `DataConversions`
|
|
53
63
|
- `DataConversions` is a refinement (`using DataConversions`) that adds `.sanitize` and `.decode_html` methods to String
|
|
64
|
+
- `IevCode` is the single source of truth for IEV code decomposition — always use it instead of manual `split("-")` parsing
|
|
65
|
+
- Schema version 3: all exported concepts use `schema_version: "3"`, which supports `annotations`, V3 concept sources, and structured references
|
data/Gemfile
CHANGED
|
@@ -2,6 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
source "https://rubygems.org"
|
|
4
4
|
|
|
5
|
+
# Use local glossarist-ruby when available for development.
|
|
6
|
+
# Otherwise falls back to released gem (requires >= 2.8.2 for tags support).
|
|
7
|
+
if File.directory?(File.expand_path("../glossarist-ruby", __dir__))
|
|
8
|
+
gem "glossarist", path: "../glossarist-ruby"
|
|
9
|
+
else
|
|
10
|
+
gem "glossarist", ">= 2.8.2"
|
|
11
|
+
end
|
|
12
|
+
|
|
5
13
|
gem "benchmark"
|
|
6
14
|
gem "canon"
|
|
7
15
|
gem "openssl"
|
data/README.adoc
CHANGED
|
@@ -228,7 +228,7 @@ There are these data types inside the term attribute field. Make sure you split
|
|
|
228
228
|
We need to parse out all NOTEs and EXAMPLEs and normalize them.
|
|
229
229
|
|
|
230
230
|
For all `This links to <a href=IEV112-01-01>quantity</a>`, we parse them and replace with:
|
|
231
|
-
`This links to {{
|
|
231
|
+
`This links to {{IEV:112-01-01, quantity}}`.
|
|
232
232
|
|
|
233
233
|
e.g.
|
|
234
234
|
|
|
@@ -300,9 +300,9 @@ notes:
|
|
|
300
300
|
|
|
301
301
|
[source,yaml]
|
|
302
302
|
----
|
|
303
|
-
definition: {{
|
|
303
|
+
definition: {{IEV:112-01-01, quantity}} which keeps the same value under particular circumstances, or which results from theoretical considerations
|
|
304
304
|
examples:
|
|
305
|
-
- {{
|
|
305
|
+
- {{IEV:103-05-26, time constant}}, equilibrium constant for a chemical reaction, {{IEV:112-03-09, fundamental physical constant}}.
|
|
306
306
|
----
|
|
307
307
|
|
|
308
308
|
|
|
@@ -582,6 +582,10 @@ of columns during export:
|
|
|
582
582
|
| Derived from `IEVREF`
|
|
583
583
|
| The IEVREF pattern `AAA-BB-CC` is split. Creates two `ConceptReference` objects with `ref_type: "domain"` and `source: "urn:iec:std:iec:60050"` (IEC URN per IEC URN specification): `area-AAA` and `section-AAA-BB`. For example, `103-01-02` produces `area-103` + `section-103-01`.
|
|
584
584
|
|
|
585
|
+
| `ManagedConceptData#tags`
|
|
586
|
+
| Derived from `IEVREF`
|
|
587
|
+
| Plain string tags for grouping and filtering. Derived from the IEV subject area hierarchy: includes the area title (e.g. `"Mathematics - Functions"`) and section title (e.g. `"General concepts"`).
|
|
588
|
+
|
|
585
589
|
| `LocalizedConcept#classification`
|
|
586
590
|
| `SYNONYM1STATUS`
|
|
587
591
|
| Maps localized classification values: Chinese/Russian/Spanish `"admitido"` to `"admitted"`, various forms of `"preferred"` similarly; other values lowercased as-is.
|
|
@@ -749,6 +753,9 @@ data:
|
|
|
749
753
|
- concept_id: section-103-01
|
|
750
754
|
source: urn:iec:std:iec:60050
|
|
751
755
|
ref_type: domain
|
|
756
|
+
tags:
|
|
757
|
+
- "Mathematics - Functions"
|
|
758
|
+
- "General concepts"
|
|
752
759
|
----
|
|
753
760
|
|
|
754
761
|
The `ref_type: domain` distinguishes domain references from other
|
data/iev.gemspec
CHANGED
|
@@ -22,14 +22,15 @@ Gem::Specification.new do |spec|
|
|
|
22
22
|
spec.required_ruby_version = Gem::Requirement.new(">= 3.2.0")
|
|
23
23
|
|
|
24
24
|
spec.add_dependency "creek", "~> 2.6"
|
|
25
|
-
spec.add_dependency "glossarist", "~> 2.6", ">= 2.6.7"
|
|
26
25
|
spec.add_dependency "ferrum", "~> 0.15"
|
|
26
|
+
spec.add_dependency "glossarist", ">= 2.8.2"
|
|
27
|
+
spec.add_dependency "lutaml-model", "~> 0.8.0"
|
|
27
28
|
spec.add_dependency "nokogiri", "~> 1.19"
|
|
28
29
|
spec.add_dependency "plurimath"
|
|
29
|
-
spec.add_dependency "lutaml-model", "~> 0.8.0"
|
|
30
30
|
spec.add_dependency "relaton", ">= 2.0.0", "< 3"
|
|
31
31
|
spec.add_dependency "sequel", "~> 5.40"
|
|
32
32
|
spec.add_dependency "sqlite3", "~> 1.7"
|
|
33
33
|
spec.add_dependency "thor", "~> 1.0"
|
|
34
34
|
spec.add_dependency "unitsml"
|
|
35
|
+
spec.metadata["rubygems_mfa_required"] = "true"
|
|
35
36
|
end
|
data/lib/iev/cli/command.rb
CHANGED
|
@@ -142,14 +142,15 @@ module Iev
|
|
|
142
142
|
summary
|
|
143
143
|
end
|
|
144
144
|
|
|
145
|
-
desc "subject_areas",
|
|
145
|
+
desc "subject_areas",
|
|
146
|
+
"Fetch IEV subject areas and sections from Electropedia."
|
|
146
147
|
option :output, desc: "Output YAML file (default: stdout)", aliases: :o
|
|
147
148
|
option :refresh, type: :boolean, default: false,
|
|
148
149
|
desc: "Force re-fetch even if cached"
|
|
149
150
|
def subject_areas
|
|
150
151
|
if options[:refresh]
|
|
151
152
|
cache_path = File.join(Iev.config.cache_dir, "subject_areas.yaml")
|
|
152
|
-
FileUtils.rm_f(cache_path)
|
|
153
|
+
FileUtils.rm_f(cache_path)
|
|
153
154
|
end
|
|
154
155
|
|
|
155
156
|
result = Iev::SubjectAreas.fetch
|
data/lib/iev/cli/ui.rb
CHANGED
|
@@ -12,12 +12,12 @@ module Iev
|
|
|
12
12
|
module Ui
|
|
13
13
|
module_function
|
|
14
14
|
|
|
15
|
-
def debug(*
|
|
16
|
-
Helper.cli_out(:debug, *
|
|
15
|
+
def debug(*)
|
|
16
|
+
Helper.cli_out(:debug, *)
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
-
def warn(*
|
|
20
|
-
Helper.cli_out(:warn, *
|
|
19
|
+
def warn(*)
|
|
20
|
+
Helper.cli_out(:warn, *)
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
# Prints progress message which will be replaced on next call.
|
|
@@ -52,7 +52,7 @@ module Iev
|
|
|
52
52
|
|
|
53
53
|
def cli_out(level, *args)
|
|
54
54
|
topic = args[0].is_a?(Symbol) ? args.shift : nil
|
|
55
|
-
message = args.
|
|
55
|
+
message = args.join(" ").chomp
|
|
56
56
|
ui_tag = Thread.current[:iev_ui_tag]
|
|
57
57
|
|
|
58
58
|
return unless should_out?(level, topic)
|
data/lib/iev/config.rb
CHANGED
|
@@ -9,23 +9,9 @@ module Iev
|
|
|
9
9
|
attr_accessor :data_path, :cache_dir, :remote_base_url
|
|
10
10
|
|
|
11
11
|
def initialize
|
|
12
|
-
@data_path = ENV
|
|
12
|
+
@data_path = ENV.fetch("IEV_DATA_PATH", nil)
|
|
13
13
|
@cache_dir = ENV["IEV_CACHE_DIR"] || File.join(Dir.tmpdir, "iev-cache")
|
|
14
14
|
@remote_base_url = DEFAULT_REMOTE_BASE_URL
|
|
15
15
|
end
|
|
16
16
|
end
|
|
17
|
-
|
|
18
|
-
class << self
|
|
19
|
-
def config
|
|
20
|
-
@config ||= Config.new
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
def configure
|
|
24
|
-
yield(config) if block_given?
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
def reset_config!
|
|
28
|
-
@config = nil
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
17
|
end
|
data/lib/iev/data_source.rb
CHANGED
|
@@ -63,7 +63,8 @@ module Iev
|
|
|
63
63
|
path = File.join(data_path, "concept-#{code}.yaml")
|
|
64
64
|
return nil unless File.exist?(path)
|
|
65
65
|
|
|
66
|
-
YAML.safe_load(File.read(path, encoding: "utf-8"),
|
|
66
|
+
YAML.safe_load(File.read(path, encoding: "utf-8"),
|
|
67
|
+
permitted_classes: [Date, Time])
|
|
67
68
|
end
|
|
68
69
|
|
|
69
70
|
def from_remote(code)
|
|
@@ -101,7 +102,8 @@ module Iev
|
|
|
101
102
|
cache_path = cache_file_path(filename)
|
|
102
103
|
return nil unless File.exist?(cache_path)
|
|
103
104
|
|
|
104
|
-
YAML.safe_load(File.read(cache_path, encoding: "utf-8"),
|
|
105
|
+
YAML.safe_load(File.read(cache_path, encoding: "utf-8"),
|
|
106
|
+
permitted_classes: [Date, Time])
|
|
105
107
|
end
|
|
106
108
|
|
|
107
109
|
def write_cache(filename, data)
|
data/lib/iev/db_writer.rb
CHANGED
data/lib/iev/exporter.rb
CHANGED
|
@@ -57,6 +57,7 @@ module Iev
|
|
|
57
57
|
add_subject_area_concepts(collection) if @include_areas
|
|
58
58
|
build_section_narrower_relations(collection) if @include_areas
|
|
59
59
|
save_collection(collection)
|
|
60
|
+
save_register
|
|
60
61
|
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
|
|
61
62
|
|
|
62
63
|
@stats = {
|
|
@@ -140,11 +141,16 @@ module Iev
|
|
|
140
141
|
term = TermBuilder.build_from(row)
|
|
141
142
|
next unless term
|
|
142
143
|
|
|
144
|
+
# Parse IevCode once per concept — used by all helpers below.
|
|
145
|
+
code = IevCode.new(term.id)
|
|
146
|
+
|
|
143
147
|
concept = concept_index[term.id] ||= begin
|
|
144
148
|
c = Glossarist::ManagedConcept.new(data: { "id" => term.id })
|
|
145
149
|
c.uuid = term.id
|
|
146
|
-
c.
|
|
147
|
-
|
|
150
|
+
c.schema_version = "3"
|
|
151
|
+
c.data.domains = domain_references_for(code)
|
|
152
|
+
c.data.tags = tags_for(code)
|
|
153
|
+
add_section_broader(c, code)
|
|
148
154
|
collection.store(c)
|
|
149
155
|
c
|
|
150
156
|
end
|
|
@@ -169,34 +175,97 @@ module Iev
|
|
|
169
175
|
collection.save_grouped_concepts_to_files(concepts_dir.to_s)
|
|
170
176
|
end
|
|
171
177
|
|
|
172
|
-
def
|
|
173
|
-
|
|
178
|
+
def save_register
|
|
179
|
+
areas = SubjectAreas.all
|
|
180
|
+
sections = build_section_tree(areas)
|
|
181
|
+
|
|
182
|
+
register = Glossarist::DatasetRegister.new(
|
|
183
|
+
schema_type: "glossarist",
|
|
184
|
+
schema_version: "3",
|
|
185
|
+
id: "iev",
|
|
186
|
+
ref: "IEC 60050:2011",
|
|
187
|
+
year: 2011,
|
|
188
|
+
urn: IEV_SOURCE,
|
|
189
|
+
urn_aliases: ["#{IEV_SOURCE}*"],
|
|
190
|
+
status: "current",
|
|
191
|
+
owner: "IEC",
|
|
192
|
+
source_repo: "https://github.com/glossarist/iev-data",
|
|
193
|
+
tags: %w[electrotechnical vocabulary iec],
|
|
194
|
+
languages: %w[eng fra],
|
|
195
|
+
language_order: %w[eng fra],
|
|
196
|
+
ordering: "systematic",
|
|
197
|
+
sections: sections,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
register_path = output_dir.expand_path.join("register.yaml")
|
|
201
|
+
FileUtils.mkdir_p(register_path.dirname)
|
|
202
|
+
File.write(register_path, register.to_yaml, encoding: "utf-8")
|
|
203
|
+
puts "Written register.yaml with #{sections.length} areas" if $stdout.tty?
|
|
174
204
|
end
|
|
175
205
|
|
|
176
|
-
|
|
206
|
+
def build_section_tree(areas)
|
|
207
|
+
areas.sort_by { |a| a.code.to_i }.map do |area|
|
|
208
|
+
children = area.sections.sort_by do |s|
|
|
209
|
+
s.code.split("-").map(&:to_i)
|
|
210
|
+
end.map do |sec|
|
|
211
|
+
Glossarist::Section.new(
|
|
212
|
+
id: sec.code,
|
|
213
|
+
names: { "eng" => sec.title },
|
|
214
|
+
)
|
|
215
|
+
end
|
|
177
216
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
refs << Glossarist::ConceptReference.new(
|
|
183
|
-
concept_id: code.area_uri,
|
|
184
|
-
source: IEV_SOURCE,
|
|
185
|
-
ref_type: "domain",
|
|
217
|
+
Glossarist::Section.new(
|
|
218
|
+
id: area.code,
|
|
219
|
+
names: { "eng" => area.title },
|
|
220
|
+
children: children.empty? ? nil : children,
|
|
186
221
|
)
|
|
187
222
|
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def localized_count(collection)
|
|
226
|
+
collection.sum { |c| c.localized_concepts.count }
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Build domain ConceptReferences for a concept.
|
|
230
|
+
#
|
|
231
|
+
# Per the concept model, ConceptReferenceType distinguishes:
|
|
232
|
+
# - "domain" → thematic/subject-area classification (area level)
|
|
233
|
+
# - "section" → structural section membership (section level)
|
|
234
|
+
#
|
|
235
|
+
# Every concept gets both: a "domain" ref to its area and a "section"
|
|
236
|
+
# ref to its section. Concepts with only an area code (no section)
|
|
237
|
+
# get only a "domain" ref.
|
|
238
|
+
#
|
|
239
|
+
# @param code [IevCode] pre-parsed IEV code
|
|
240
|
+
# @return [Array<Glossarist::ConceptReference>]
|
|
241
|
+
def domain_references_for(code)
|
|
242
|
+
refs = []
|
|
243
|
+
|
|
244
|
+
# Domain reference: thematic classification at the area level
|
|
245
|
+
refs << domain_ref(code.area_uri)
|
|
246
|
+
|
|
247
|
+
# Section reference: structural membership in the section
|
|
188
248
|
if code.section_code
|
|
189
|
-
refs <<
|
|
190
|
-
concept_id: code.section_uri,
|
|
191
|
-
source: IEV_SOURCE,
|
|
192
|
-
ref_type: "domain",
|
|
193
|
-
)
|
|
249
|
+
refs << section_ref(code.section_uri)
|
|
194
250
|
end
|
|
251
|
+
|
|
195
252
|
refs
|
|
196
253
|
end
|
|
197
254
|
|
|
198
|
-
|
|
199
|
-
|
|
255
|
+
# @param code [IevCode] pre-parsed IEV code
|
|
256
|
+
# @return [Array<String>]
|
|
257
|
+
def tags_for(code)
|
|
258
|
+
tags = []
|
|
259
|
+
area = SubjectAreas.find_area(code.area_code)
|
|
260
|
+
tags << area.title if area
|
|
261
|
+
section = code.section_code && SubjectAreas.find_section(code.section_code)
|
|
262
|
+
tags << section.title if section
|
|
263
|
+
tags
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# @param concept [Glossarist::ManagedConcept]
|
|
267
|
+
# @param code [IevCode] pre-parsed IEV code
|
|
268
|
+
def add_section_broader(concept, code)
|
|
200
269
|
return unless code.section_uri
|
|
201
270
|
|
|
202
271
|
concept.related ||= []
|
|
@@ -249,7 +318,9 @@ module Iev
|
|
|
249
318
|
|
|
250
319
|
concept.related ||= []
|
|
251
320
|
related.each do |r|
|
|
252
|
-
next if concept.related.any?
|
|
321
|
+
next if concept.related.any? do |er|
|
|
322
|
+
er.type == r.type && er.ref&.id == r.ref&.id
|
|
323
|
+
end
|
|
253
324
|
|
|
254
325
|
concept.related << r
|
|
255
326
|
end
|
|
@@ -263,5 +334,19 @@ module Iev
|
|
|
263
334
|
status = term.entry_status
|
|
264
335
|
concept.status = status if status && !status.empty?
|
|
265
336
|
end
|
|
337
|
+
|
|
338
|
+
# --- ConceptReference factory helpers ---
|
|
339
|
+
|
|
340
|
+
def domain_ref(concept_id)
|
|
341
|
+
ref = Glossarist::ConceptReference.domain(concept_id)
|
|
342
|
+
ref.source = IEV_SOURCE
|
|
343
|
+
ref
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
def section_ref(concept_id)
|
|
347
|
+
ref = Glossarist::ConceptReference.section(concept_id)
|
|
348
|
+
ref.source = IEV_SOURCE
|
|
349
|
+
ref
|
|
350
|
+
end
|
|
266
351
|
end
|
|
267
352
|
end
|
data/lib/iev/iso_639_code.rb
CHANGED
|
@@ -6,7 +6,8 @@
|
|
|
6
6
|
module Iev
|
|
7
7
|
# @todo This needs to be rewritten.
|
|
8
8
|
class Iso639Code
|
|
9
|
-
COUNTRY_CODES = YAML.
|
|
9
|
+
COUNTRY_CODES = YAML.safe_load_file(File.join(__dir__,
|
|
10
|
+
"iso_639_2.yaml"), permitted_classes: [Symbol]).freeze
|
|
10
11
|
# rubocop:disable Style/MutableConstant
|
|
11
12
|
THREE_CHAR_MEMO = {} # Memoization cache, must be mutable
|
|
12
13
|
# rubocop:enable Style/MutableConstant
|