iev 0.4.4 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +7 -4
- data/.github/workflows/release.yml +2 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +4 -1
- data/.rubocop_todo.yml +98 -21
- data/CLAUDE.md +17 -5
- data/Gemfile +8 -4
- data/README.adoc +395 -10
- data/exe/iev +1 -1
- data/iev.gemspec +3 -2
- data/lib/iev/cli/command.rb +3 -2
- data/lib/iev/cli/command_helper.rb +1 -2
- data/lib/iev/cli/ui.rb +5 -5
- data/lib/iev/config.rb +1 -15
- data/lib/iev/data_source.rb +4 -2
- data/lib/iev/db_writer.rb +1 -0
- data/lib/iev/exporter.rb +182 -10
- data/lib/iev/iev_code.rb +80 -0
- data/lib/iev/iso_639_code.rb +2 -1
- data/lib/iev/relaton_db.rb +1 -1
- data/lib/iev/scraper/browser.rb +90 -88
- data/lib/iev/scraper.rb +5 -4
- data/lib/iev/section.rb +37 -0
- data/lib/iev/source_parser.rb +57 -11
- data/lib/iev/subject_area.rb +46 -0
- data/lib/iev/subject_area_concepts.rb +60 -35
- data/lib/iev/subject_areas.rb +72 -33
- data/lib/iev/supersession_parser.rb +1 -2
- data/lib/iev/term_attrs_parser.rb +1 -1
- data/lib/iev/term_builder.rb +14 -9
- data/lib/iev/utilities.rb +29 -1
- data/lib/iev/version.rb +1 -1
- data/lib/iev.rb +43 -11
- metadata +26 -22
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f5d3397238922342cdb3fb6b5adcf8fa969d2b3d6510742485d7d2a20b1d1a12
|
|
4
|
+
data.tar.gz: 6a82e64e1201a9a2f04cff66d024de1b23643030ce6e37f26ef09bb2ad334b10
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: be3b984bd46c791580a73644e88dccca4b98b708ce0e28c50f72e822abb609573455201b633d0e4d791b9271cdecb20a555b11cc674dda46bde90fcf7eaccb74
|
|
7
|
+
data.tar.gz: e56796a853c58aae578dd5c4fe3fbaf3e18115e1a08e22feb7d5f876d9521dee4d27fb734fb05baa12e297b1dc9ac89b67197cad0b51695b2224c3fdb622f3a5
|
data/.github/workflows/rake.yml
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
|
2
|
+
# See https://github.com/metanorma/cimas
|
|
1
3
|
name: rake
|
|
2
4
|
|
|
3
|
-
permissions:
|
|
4
|
-
contents: write
|
|
5
|
-
|
|
6
5
|
on:
|
|
7
6
|
push:
|
|
8
7
|
branches: [ master, main ]
|
|
9
8
|
tags: [ v* ]
|
|
10
9
|
pull_request:
|
|
11
|
-
|
|
10
|
+
|
|
11
|
+
permissions:
|
|
12
|
+
contents: write
|
|
12
13
|
|
|
13
14
|
jobs:
|
|
14
15
|
rake:
|
|
15
16
|
uses: metanorma/ci/.github/workflows/generic-rake.yml@main
|
|
17
|
+
secrets:
|
|
18
|
+
pat_token: ${{ secrets.GLOSSARIST_CI_PAT_TOKEN }}
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
|
@@ -2,7 +2,10 @@
|
|
|
2
2
|
# See https://github.com/metanorma/cimas
|
|
3
3
|
inherit_from:
|
|
4
4
|
- .rubocop_todo.yml
|
|
5
|
-
- https://raw.githubusercontent.com/riboseinc/oss-guides/
|
|
5
|
+
- https://raw.githubusercontent.com/riboseinc/oss-guides/main/ci/rubocop.yml
|
|
6
6
|
|
|
7
7
|
# local repo-specific modifications
|
|
8
8
|
# ...
|
|
9
|
+
|
|
10
|
+
AllCops:
|
|
11
|
+
TargetRubyVersion: 3.4
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,65 +1,142 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on
|
|
3
|
+
# on 2026-06-13 04:18:23 UTC using RuboCop version 1.86.1.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
|
8
8
|
|
|
9
|
-
# Offense count:
|
|
9
|
+
# Offense count: 1
|
|
10
|
+
Gemspec/RequiredRubyVersion:
|
|
11
|
+
Exclude:
|
|
12
|
+
- 'iev.gemspec'
|
|
13
|
+
|
|
14
|
+
# Offense count: 2
|
|
10
15
|
# This cop supports safe autocorrection (--autocorrect).
|
|
11
|
-
# Configuration parameters:
|
|
16
|
+
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
17
|
+
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
18
|
+
Layout/ArgumentAlignment:
|
|
19
|
+
Exclude:
|
|
20
|
+
- 'spec/iev/exporter_spec.rb'
|
|
21
|
+
|
|
22
|
+
# Offense count: 1
|
|
23
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
24
|
+
# Configuration parameters: EnforcedStyleAlignWith.
|
|
25
|
+
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
26
|
+
Layout/BlockAlignment:
|
|
27
|
+
Exclude:
|
|
28
|
+
- 'spec/iev/exporter_spec.rb'
|
|
29
|
+
|
|
30
|
+
# Offense count: 1
|
|
31
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
32
|
+
Layout/BlockEndNewline:
|
|
33
|
+
Exclude:
|
|
34
|
+
- 'spec/iev/exporter_spec.rb'
|
|
35
|
+
|
|
36
|
+
# Offense count: 2
|
|
37
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
38
|
+
# Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
|
|
39
|
+
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
40
|
+
Layout/IndentationWidth:
|
|
41
|
+
Exclude:
|
|
42
|
+
- 'spec/iev/exporter_spec.rb'
|
|
43
|
+
|
|
44
|
+
# Offense count: 65
|
|
45
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
46
|
+
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
12
47
|
# URISchemes: http, https
|
|
13
48
|
Layout/LineLength:
|
|
14
|
-
|
|
15
|
-
- 'iev.gemspec'
|
|
16
|
-
- 'lib/iev/cli/command.rb'
|
|
17
|
-
- 'lib/iev/cli/command_helper.rb'
|
|
18
|
-
- 'lib/iev/db_cache.rb'
|
|
19
|
-
- 'lib/iev/source_parser.rb'
|
|
20
|
-
- 'spec/acceptance/db2yaml_spec.rb'
|
|
21
|
-
- 'spec/acceptance/xlsx2yaml_spec.rb'
|
|
22
|
-
- 'spec/iev_spec.rb'
|
|
49
|
+
Enabled: false
|
|
23
50
|
|
|
24
|
-
# Offense count:
|
|
51
|
+
# Offense count: 19
|
|
25
52
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
|
26
53
|
Lint/DuplicateBranch:
|
|
27
54
|
Exclude:
|
|
28
55
|
- 'lib/iev/source_parser.rb'
|
|
29
|
-
- 'lib/iev/
|
|
56
|
+
- 'lib/iev/utilities.rb'
|
|
30
57
|
|
|
31
58
|
# Offense count: 1
|
|
32
59
|
Lint/MixedRegexpCaptureTypes:
|
|
33
60
|
Exclude:
|
|
34
61
|
- 'lib/iev/term_builder.rb'
|
|
35
62
|
|
|
36
|
-
# Offense count:
|
|
63
|
+
# Offense count: 2
|
|
64
|
+
Lint/ShadowedException:
|
|
65
|
+
Exclude:
|
|
66
|
+
- 'lib/iev/source_parser.rb'
|
|
67
|
+
|
|
68
|
+
# Offense count: 26
|
|
37
69
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
38
70
|
Metrics/AbcSize:
|
|
39
71
|
Exclude:
|
|
72
|
+
- 'lib/iev/cli/command.rb'
|
|
73
|
+
- 'lib/iev/cli/command_helper.rb'
|
|
40
74
|
- 'lib/iev/converter/mathml_to_asciimath.rb'
|
|
41
75
|
- 'lib/iev/db_writer.rb'
|
|
76
|
+
- 'lib/iev/exporter.rb'
|
|
77
|
+
- 'lib/iev/scraper/page_parser.rb'
|
|
42
78
|
- 'lib/iev/source_parser.rb'
|
|
79
|
+
- 'lib/iev/subject_area_concepts.rb'
|
|
80
|
+
- 'lib/iev/subject_areas.rb'
|
|
43
81
|
- 'lib/iev/term_builder.rb'
|
|
82
|
+
- 'lib/iev/utilities.rb'
|
|
83
|
+
- 'spec/iev/supersession_parser_spec.rb'
|
|
44
84
|
|
|
45
|
-
# Offense count:
|
|
85
|
+
# Offense count: 15
|
|
46
86
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
47
87
|
Metrics/CyclomaticComplexity:
|
|
48
88
|
Exclude:
|
|
49
|
-
- 'lib/iev.rb'
|
|
50
89
|
- 'lib/iev/converter/mathml_to_asciimath.rb'
|
|
90
|
+
- 'lib/iev/exporter.rb'
|
|
91
|
+
- 'lib/iev/scraper/page_parser.rb'
|
|
51
92
|
- 'lib/iev/source_parser.rb'
|
|
93
|
+
- 'lib/iev/subject_areas.rb'
|
|
52
94
|
- 'lib/iev/term_builder.rb'
|
|
95
|
+
- 'lib/iev/utilities.rb'
|
|
53
96
|
|
|
54
|
-
# Offense count:
|
|
97
|
+
# Offense count: 36
|
|
55
98
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
56
99
|
Metrics/MethodLength:
|
|
57
|
-
Max:
|
|
100
|
+
Max: 73
|
|
101
|
+
|
|
102
|
+
# Offense count: 1
|
|
103
|
+
# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
|
|
104
|
+
Metrics/ParameterLists:
|
|
105
|
+
Max: 7
|
|
58
106
|
|
|
59
|
-
# Offense count:
|
|
107
|
+
# Offense count: 8
|
|
60
108
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
61
109
|
Metrics/PerceivedComplexity:
|
|
62
110
|
Exclude:
|
|
63
|
-
- 'lib/iev.rb'
|
|
64
111
|
- 'lib/iev/converter/mathml_to_asciimath.rb'
|
|
112
|
+
- 'lib/iev/exporter.rb'
|
|
113
|
+
- 'lib/iev/scraper/page_parser.rb'
|
|
114
|
+
- 'lib/iev/subject_areas.rb'
|
|
65
115
|
- 'lib/iev/term_builder.rb'
|
|
116
|
+
|
|
117
|
+
# Offense count: 1
|
|
118
|
+
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
|
119
|
+
# AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
|
|
120
|
+
Naming/MethodParameterName:
|
|
121
|
+
Exclude:
|
|
122
|
+
- 'lib/iev/subject_areas.rb'
|
|
123
|
+
|
|
124
|
+
# Offense count: 2
|
|
125
|
+
# Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
|
|
126
|
+
# SupportedStyles: snake_case, normalcase, non_integer
|
|
127
|
+
# AllowedIdentifiers: TLS1_1, TLS1_2, capture3, iso8601, rfc1123_date, rfc822, rfc2822, rfc3339, x86_64
|
|
128
|
+
Naming/VariableNumber:
|
|
129
|
+
Exclude:
|
|
130
|
+
- 'spec/iev/exporter_spec.rb'
|
|
131
|
+
|
|
132
|
+
# Offense count: 5
|
|
133
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
134
|
+
# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
|
|
135
|
+
# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
|
|
136
|
+
# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
|
|
137
|
+
# FunctionalMethods: let, let!, subject, watch
|
|
138
|
+
# AllowedMethods: lambda, proc, it
|
|
139
|
+
Style/BlockDelimiters:
|
|
140
|
+
Exclude:
|
|
141
|
+
- 'lib/iev/exporter.rb'
|
|
142
|
+
- 'spec/iev/exporter_spec.rb'
|
data/CLAUDE.md
CHANGED
|
@@ -27,15 +27,23 @@ This is a Ruby gem (`iev`) for working with the International Electrotechnical V
|
|
|
27
27
|
|
|
28
28
|
### Key Modules
|
|
29
29
|
|
|
30
|
-
- `TermBuilder` — the core converter that turns a spreadsheet row into a `Glossarist::LocalizedConcept`. Handles definition splitting (notes/examples extraction), term designation parsing, and source parsing.
|
|
30
|
+
- `TermBuilder` — the core converter that turns a spreadsheet row into a `Glossarist::LocalizedConcept`. Handles definition splitting (notes/examples extraction), term designation parsing, and source parsing. Sets `ConceptData#domain` to section/area title text (not URI).
|
|
31
31
|
- `SourceParser` — parses the SOURCE column from IEV exports, normalizing references (CEI→IEC, UIT→ITU, etc.) and extracting ref/clause/relationship using extensive regex matching.
|
|
32
|
-
- `TermAttrsParser` — parses the TERMATTRIBUTE field (gender, plurality, part of speech, geographical area,
|
|
32
|
+
- `TermAttrsParser` — parses the TERMATTRIBUTE field (gender, plurality, part of speech, geographical area, usage_info).
|
|
33
33
|
- `SupersessionParser` — parses the REPLACES field for deprecated term relationships.
|
|
34
34
|
- `SubjectAreas` — manages the IEV subject area/section hierarchy. Bundled `data/subject_areas.yaml` contains the area/section tree. URI scheme: `area-{code}` and `section-{code}`.
|
|
35
|
-
- `SubjectAreaConcepts` — builds area and section hierarchy concepts
|
|
36
|
-
- `Exporter` — full export pipeline (Excel/SQLite → Glossarist YAML). Assigns domain `ConceptReference` objects via `domain_references_for`.
|
|
35
|
+
- `SubjectAreaConcepts` — builds area and section hierarchy concepts. Uses `ConceptReference` with proper `ref_type` per `ConceptReferenceType`: `"domain"` for thematic area classification, `"section"` for structural section membership. Sets `ConceptData#domain` to area title text.
|
|
36
|
+
- `Exporter` — full export pipeline (Excel/SQLite → Glossarist YAML). Assigns domain and section `ConceptReference` objects via `domain_references_for`. Uses `Glossarist::DatasetRegister` model for `register.yaml`. Sets `schema_version: "3"` on all exported concepts.
|
|
37
37
|
- `Converter::MathmlToAsciimath` — converts MathML markup to AsciiMath using Plurimath.
|
|
38
|
-
- `Utilities` — HTML processing: converts IEV cross-references (`<a href=IEV...>`) to `{{
|
|
38
|
+
- `Utilities` — HTML processing: converts IEV cross-references (`<a href=IEV...>`) to `{{URN, term}}` format (ID first, display text last), handles figures, images, bold tags, and newline normalization.
|
|
39
|
+
|
|
40
|
+
### Domain/Section Model
|
|
41
|
+
|
|
42
|
+
Per the concept model's `ConceptReferenceType`:
|
|
43
|
+
- `"domain"` — thematic/subject-area classification (area level, e.g. "103")
|
|
44
|
+
- `"section"` — structural section membership (section level, e.g. "103-01")
|
|
45
|
+
|
|
46
|
+
Each concept's `ManagedConceptData#domains` contains both refs. `ConceptData#domain` (a `LocalizedString`) holds the section/area title text. The `ManagedConcept#related` array holds `broader`/`narrower` relationships for the hierarchy tree.
|
|
39
47
|
|
|
40
48
|
### Configuration
|
|
41
49
|
|
|
@@ -47,7 +55,11 @@ This is a Ruby gem (`iev`) for working with the International Electrotechnical V
|
|
|
47
55
|
## Key Conventions
|
|
48
56
|
|
|
49
57
|
- Ruby >= 3.1.0 required
|
|
58
|
+
- All constants live under `Iev::` namespace (e.g. `Iev::IEV_SOURCE`, not top-level `IEV_SOURCE`)
|
|
59
|
+
- `Iev.config` / `Iev.configure` / `Iev.reset_config!` are defined directly in `lib/iev.rb` — they must be available at load time without triggering autoload
|
|
50
60
|
- `plurimath` and `unitsml` are optional runtime dependencies — loaded with `rescue LoadError`, so the `DataSource`/`Db` APIs work without them
|
|
51
61
|
- The IEV Excel export format is specific to IEC-internal use; column structure is documented in README.adoc
|
|
52
62
|
- Language codes: the spreadsheet uses ISO 639-1 (2-char like "en"), internally converted to ISO 639-2/3 (3-char like "eng") via `Iso639Code` and `DataConversions`
|
|
53
63
|
- `DataConversions` is a refinement (`using DataConversions`) that adds `.sanitize` and `.decode_html` methods to String
|
|
64
|
+
- `IevCode` is the single source of truth for IEV code decomposition — always use it instead of manual `split("-")` parsing
|
|
65
|
+
- Schema version 3: all exported concepts use `schema_version: "3"`, which supports `annotations`, V3 concept sources, and structured references
|
data/Gemfile
CHANGED
|
@@ -2,6 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
source "https://rubygems.org"
|
|
4
4
|
|
|
5
|
+
# Use local glossarist-ruby when available for development.
|
|
6
|
+
# Otherwise falls back to released gem (requires >= 2.8.2 for tags support).
|
|
7
|
+
if File.directory?(File.expand_path("../glossarist-ruby", __dir__))
|
|
8
|
+
gem "glossarist", path: "../glossarist-ruby"
|
|
9
|
+
else
|
|
10
|
+
gem "glossarist", ">= 2.8.2"
|
|
11
|
+
end
|
|
12
|
+
|
|
5
13
|
gem "benchmark"
|
|
6
14
|
gem "canon"
|
|
7
15
|
gem "openssl"
|
|
@@ -14,7 +22,3 @@ gem "rubocop-rspec"
|
|
|
14
22
|
gem "simplecov"
|
|
15
23
|
|
|
16
24
|
gemspec
|
|
17
|
-
|
|
18
|
-
# TODO: remove once glossarist 2.7.0 is released with domains migration
|
|
19
|
-
gem "glossarist", git: "https://github.com/glossarist/glossarist-ruby.git",
|
|
20
|
-
branch: "feat/metanorma-parity-designation-model"
|