iev 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +3 -0
- data/Gemfile +3 -18
- data/README.adoc +34 -0
- data/data/subject_areas.yaml +1920 -0
- data/lib/iev/cli/command.rb +24 -0
- data/lib/iev/exporter.rb +19 -0
- data/lib/iev/scraper/browser.rb +102 -0
- data/lib/iev/scraper.rb +5 -105
- data/lib/iev/subject_area_concepts.rb +123 -0
- data/lib/iev/subject_areas.rb +232 -0
- data/lib/iev/term_builder.rb +19 -0
- data/lib/iev/version.rb +1 -1
- data/lib/iev.rb +36 -0
- metadata +6 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 54693df7923bcf4dc686eac69daf1f4e3f981db12a0c3f7f672e2c7c656a53b2
|
|
4
|
+
data.tar.gz: 5404339eaad56057bccd9880b324194c73a9d82f8e86224429df4d651b361b67
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 474e483fbab5ad2b3450a617d51a67ea1a6217bdfcfa46d99a469234e5fbffc2da4e89e20d7f5049303212086d0edebcb1712b2334829be2daed5d55816ad2b9
|
|
7
|
+
data.tar.gz: 34595c499fb3855449f92059629fad4da01a5d24b77409e8eab7cd7abf3b6a1eb4b6770e83baca6d0d2df46f22f69d9635eb282d7200e2ac364dd0eef8dea983
|
data/CLAUDE.md
CHANGED
|
@@ -31,6 +31,9 @@ This is a Ruby gem (`iev`) for working with the International Electrotechnical V
|
|
|
31
31
|
- `SourceParser` — parses the SOURCE column from IEV exports, normalizing references (CEI→IEC, UIT→ITU, etc.) and extracting ref/clause/relationship using extensive regex matching.
|
|
32
32
|
- `TermAttrsParser` — parses the TERMATTRIBUTE field (gender, plurality, part of speech, geographical area, abbreviations).
|
|
33
33
|
- `SupersessionParser` — parses the REPLACES field for deprecated term relationships.
|
|
34
|
+
- `SubjectAreas` — manages the IEV subject area/section hierarchy. Bundled `data/subject_areas.yaml` contains the area/section tree. URI scheme: `area-{code}` and `section-{code}`.
|
|
35
|
+
- `SubjectAreaConcepts` — builds area and section hierarchy concepts with `ConceptReference.domain()` objects in `ManagedConceptData#domains`, `broader`/`narrower` relations, and `ConceptData#domain` references.
|
|
36
|
+
- `Exporter` — full export pipeline (Excel/SQLite → Glossarist YAML). Assigns domain `ConceptReference` objects via `domain_references_for`.
|
|
34
37
|
- `Converter::MathmlToAsciimath` — converts MathML markup to AsciiMath using Plurimath.
|
|
35
38
|
- `Utilities` — HTML processing: converts IEV cross-references (`<a href=IEV...>`) to `{{term, IEV:code}}` format, handles figures, images, bold tags, and newline normalization.
|
|
36
39
|
|
data/Gemfile
CHANGED
|
@@ -5,7 +5,6 @@ source "https://rubygems.org"
|
|
|
5
5
|
gem "benchmark"
|
|
6
6
|
gem "canon"
|
|
7
7
|
gem "openssl"
|
|
8
|
-
gem "lutaml-model", github: "lutaml/lutaml-model", ref: "main"
|
|
9
8
|
gem "rake"
|
|
10
9
|
gem "rspec"
|
|
11
10
|
gem "rubocop"
|
|
@@ -16,20 +15,6 @@ gem "simplecov"
|
|
|
16
15
|
|
|
17
16
|
gemspec
|
|
18
17
|
|
|
19
|
-
#
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
# lutaml-integration branches have typed attributes and are compatible with relaton-bib 2.0.0.
|
|
23
|
-
# relaton/relaton#142 provides the meta-gem with lutaml-model ~> 0.8 support.
|
|
24
|
-
# TODO: Remove once relaton gems release versions with lutaml-model 0.8 support.
|
|
25
|
-
gem "relaton", github: "relaton/relaton", branch: "lutaml-integration"
|
|
26
|
-
gem "relaton-bib", github: "relaton/relaton-bib", branch: "fix/lutaml-model-0.8"
|
|
27
|
-
gem "relaton-iso", github: "relaton/relaton-iso", branch: "fix/lutaml-model-0.8"
|
|
28
|
-
gem "relaton-3gpp", github: "relaton/relaton-3gpp", branch: "fix/lutaml-model-0.8"
|
|
29
|
-
gem "relaton-bipm", github: "relaton/relaton-bipm", branch: "fix/lutaml-model-0.8"
|
|
30
|
-
gem "relaton-bsi", github: "relaton/relaton-bsi", branch: "fix/lutaml-model-0.8"
|
|
31
|
-
gem "relaton-calconnect", github: "relaton/relaton-calconnect", branch: "lutaml-integration"
|
|
32
|
-
gem "relaton-ccsds", github: "relaton/relaton-ccsds", branch: "lutaml-integration"
|
|
33
|
-
gem "relaton-cen", github: "relaton/relaton-cen", branch: "lutaml-integration"
|
|
34
|
-
gem "relaton-iec", github: "relaton/relaton-iec", branch: "lutaml-integration"
|
|
35
|
-
gem "relaton-itu", github: "relaton/relaton-itu", branch: "lutaml-integration"
|
|
18
|
+
# TODO: remove once glossarist 2.7.0 is released with domains migration
|
|
19
|
+
gem "glossarist", git: "https://github.com/glossarist/glossarist-ruby.git",
|
|
20
|
+
branch: "feat/metanorma-parity-designation-model"
|
data/README.adoc
CHANGED
|
@@ -363,3 +363,37 @@ authoritative_source:
|
|
|
363
363
|
== Copyright and license
|
|
364
364
|
|
|
365
365
|
Data copyright IEC. All others copyright Ribose.
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
== Data Model
|
|
369
|
+
|
|
370
|
+
=== Concept Domains
|
|
371
|
+
|
|
372
|
+
Exported concepts use `domains` (a collection of `ConceptReference` objects)
|
|
373
|
+
to represent the IEV subject area hierarchy. Each concept's domains include
|
|
374
|
+
references to its area (e.g. `area-103`) and section (e.g. `section-103-01`).
|
|
375
|
+
|
|
376
|
+
[source,yaml]
|
|
377
|
+
----
|
|
378
|
+
data:
|
|
379
|
+
identifier: "103-01-01"
|
|
380
|
+
domains:
|
|
381
|
+
- concept_id: area-103
|
|
382
|
+
ref_type: domain
|
|
383
|
+
- concept_id: section-103-01
|
|
384
|
+
ref_type: domain
|
|
385
|
+
----
|
|
386
|
+
|
|
387
|
+
The `ref_type: domain` distinguishes domain references from other
|
|
388
|
+
`ConceptReference` types (local, urn, designation).
|
|
389
|
+
|
|
390
|
+
=== Subject Area Hierarchy
|
|
391
|
+
|
|
392
|
+
The `SubjectAreaConcepts` module creates area and section concepts that
|
|
393
|
+
form a two-level hierarchy:
|
|
394
|
+
|
|
395
|
+
* **Area concepts** (e.g. `area-103`) — have domain reference to themselves,
|
|
396
|
+
and `narrower` relations to their sections
|
|
397
|
+
* **Section concepts** (e.g. `section-103-01`) — have domain references to
|
|
398
|
+
both parent area and themselves, a `broader` relation to the parent area,
|
|
399
|
+
and `ConceptData#domain` pointing to the parent area
|