iev 0.4.3 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +7 -4
- data/.github/workflows/release.yml +2 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +4 -2
- data/CLAUDE.md +3 -0
- data/Gemfile +0 -19
- data/README.adoc +412 -0
- data/data/subject_areas.yaml +1920 -0
- data/exe/iev +1 -1
- data/iev.gemspec +1 -1
- data/lib/iev/cli/command.rb +24 -0
- data/lib/iev/cli/command_helper.rb +1 -2
- data/lib/iev/exporter.rb +108 -2
- data/lib/iev/iev_code.rb +80 -0
- data/lib/iev/iso_639_code.rb +1 -1
- data/lib/iev/scraper/browser.rb +102 -0
- data/lib/iev/scraper.rb +5 -105
- data/lib/iev/section.rb +37 -0
- data/lib/iev/source_parser.rb +48 -1
- data/lib/iev/subject_area.rb +46 -0
- data/lib/iev/subject_area_concepts.rb +145 -0
- data/lib/iev/subject_areas.rb +273 -0
- data/lib/iev/supersession_parser.rb +1 -2
- data/lib/iev/term_builder.rb +19 -0
- data/lib/iev/version.rb +1 -1
- data/lib/iev.rb +46 -0
- metadata +17 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a539da108086dd1fb862d1d99d6763a3d32015a921ef580018ac9671660b0b68
|
|
4
|
+
data.tar.gz: af0e1813cae179dd69da3a9e8fde6e8d275e34d59949bd43a0642a171988f311
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c93e2479cccfbe56b37b1d1cd604a3b327258abd8c50f9b99e2091abe7e4e714aa8e44c49f9e886d11483ec80e8e2325ce78f0598e8b34fdeb4fe7442b0e0d32
|
|
7
|
+
data.tar.gz: 28c5084f7a75106feee144201b75b642217674438f14a48c216a1069ed1b5d0abe822d137afb6671303bc1a407689ec236c4b2ff5abe2c0626e3a34f1ec35b34
|
data/.github/workflows/rake.yml
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
|
2
|
+
# See https://github.com/metanorma/cimas
|
|
1
3
|
name: rake
|
|
2
4
|
|
|
3
|
-
permissions:
|
|
4
|
-
contents: write
|
|
5
|
-
|
|
6
5
|
on:
|
|
7
6
|
push:
|
|
8
7
|
branches: [ master, main ]
|
|
9
8
|
tags: [ v* ]
|
|
10
9
|
pull_request:
|
|
11
|
-
|
|
10
|
+
|
|
11
|
+
permissions:
|
|
12
|
+
contents: write
|
|
12
13
|
|
|
13
14
|
jobs:
|
|
14
15
|
rake:
|
|
15
16
|
uses: metanorma/ci/.github/workflows/generic-rake.yml@main
|
|
17
|
+
secrets:
|
|
18
|
+
pat_token: ${{ secrets.GLOSSARIST_CI_PAT_TOKEN }}
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
# Auto-generated by Cimas: Do not edit it manually!
|
|
2
2
|
# See https://github.com/metanorma/cimas
|
|
3
3
|
inherit_from:
|
|
4
|
-
- .
|
|
5
|
-
- https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
|
|
4
|
+
- https://raw.githubusercontent.com/riboseinc/oss-guides/main/ci/rubocop.yml
|
|
6
5
|
|
|
7
6
|
# local repo-specific modifications
|
|
8
7
|
# ...
|
|
8
|
+
|
|
9
|
+
AllCops:
|
|
10
|
+
TargetRubyVersion: 3.4
|
data/CLAUDE.md
CHANGED
|
@@ -31,6 +31,9 @@ This is a Ruby gem (`iev`) for working with the International Electrotechnical V
|
|
|
31
31
|
- `SourceParser` — parses the SOURCE column from IEV exports, normalizing references (CEI→IEC, UIT→ITU, etc.) and extracting ref/clause/relationship using extensive regex matching.
|
|
32
32
|
- `TermAttrsParser` — parses the TERMATTRIBUTE field (gender, plurality, part of speech, geographical area, abbreviations).
|
|
33
33
|
- `SupersessionParser` — parses the REPLACES field for deprecated term relationships.
|
|
34
|
+
- `SubjectAreas` — manages the IEV subject area/section hierarchy. Bundled `data/subject_areas.yaml` contains the area/section tree. URI scheme: `area-{code}` and `section-{code}`.
|
|
35
|
+
- `SubjectAreaConcepts` — builds area and section hierarchy concepts with `ConceptReference.domain()` objects in `ManagedConceptData#domains`, `broader`/`narrower` relations, and `ConceptData#domain` references.
|
|
36
|
+
- `Exporter` — full export pipeline (Excel/SQLite → Glossarist YAML). Assigns domain `ConceptReference` objects via `domain_references_for`.
|
|
34
37
|
- `Converter::MathmlToAsciimath` — converts MathML markup to AsciiMath using Plurimath.
|
|
35
38
|
- `Utilities` — HTML processing: converts IEV cross-references (`<a href=IEV...>`) to `{{term, IEV:code}}` format, handles figures, images, bold tags, and newline normalization.
|
|
36
39
|
|
data/Gemfile
CHANGED
|
@@ -5,7 +5,6 @@ source "https://rubygems.org"
|
|
|
5
5
|
gem "benchmark"
|
|
6
6
|
gem "canon"
|
|
7
7
|
gem "openssl"
|
|
8
|
-
gem "lutaml-model", github: "lutaml/lutaml-model", ref: "main"
|
|
9
8
|
gem "rake"
|
|
10
9
|
gem "rspec"
|
|
11
10
|
gem "rubocop"
|
|
@@ -15,21 +14,3 @@ gem "rubocop-rspec"
|
|
|
15
14
|
gem "simplecov"
|
|
16
15
|
|
|
17
16
|
gemspec
|
|
18
|
-
|
|
19
|
-
# Override relaton gems with lutaml-model 0.8 compatible versions.
|
|
20
|
-
# Released 2.0.0 sub-gems have untyped lutaml-model attributes that fail with 0.8+.
|
|
21
|
-
# relaton-bib ~> 2.0.0 is required by sub-gems; fix/lutaml-model-0.8 provides 2.0.0 + lutaml-model ~> 0.8.
|
|
22
|
-
# lutaml-integration branches have typed attributes and are compatible with relaton-bib 2.0.0.
|
|
23
|
-
# relaton/relaton#142 provides the meta-gem with lutaml-model ~> 0.8 support.
|
|
24
|
-
# TODO: Remove once relaton gems release versions with lutaml-model 0.8 support.
|
|
25
|
-
gem "relaton", github: "relaton/relaton", branch: "lutaml-integration"
|
|
26
|
-
gem "relaton-bib", github: "relaton/relaton-bib", branch: "fix/lutaml-model-0.8"
|
|
27
|
-
gem "relaton-iso", github: "relaton/relaton-iso", branch: "fix/lutaml-model-0.8"
|
|
28
|
-
gem "relaton-3gpp", github: "relaton/relaton-3gpp", branch: "fix/lutaml-model-0.8"
|
|
29
|
-
gem "relaton-bipm", github: "relaton/relaton-bipm", branch: "fix/lutaml-model-0.8"
|
|
30
|
-
gem "relaton-bsi", github: "relaton/relaton-bsi", branch: "fix/lutaml-model-0.8"
|
|
31
|
-
gem "relaton-calconnect", github: "relaton/relaton-calconnect", branch: "lutaml-integration"
|
|
32
|
-
gem "relaton-ccsds", github: "relaton/relaton-ccsds", branch: "lutaml-integration"
|
|
33
|
-
gem "relaton-cen", github: "relaton/relaton-cen", branch: "lutaml-integration"
|
|
34
|
-
gem "relaton-iec", github: "relaton/relaton-iec", branch: "lutaml-integration"
|
|
35
|
-
gem "relaton-itu", github: "relaton/relaton-itu", branch: "lutaml-integration"
|
data/README.adoc
CHANGED
|
@@ -360,6 +360,418 @@ authoritative_source:
|
|
|
360
360
|
----
|
|
361
361
|
|
|
362
362
|
|
|
363
|
+
== Excel-to-Glossarist Column Mapping
|
|
364
|
+
|
|
365
|
+
This section provides a complete mapping from every IEV Excel export column
|
|
366
|
+
to the corresponding Glossarist concept model field. The IEV Excel export has
|
|
367
|
+
19 columns (see <<_structure_of_the_iev_excel_export>>). Each row represents
|
|
368
|
+
one *localized term entry* (one language variant of one concept).
|
|
369
|
+
|
|
370
|
+
=== Glossarist Model Layers
|
|
371
|
+
|
|
372
|
+
The Glossarist model organizes concept data into two layers:
|
|
373
|
+
|
|
374
|
+
* *ManagedConcept* — the concept entry itself (identity, domain classification,
|
|
375
|
+
cross-concept relationships, lifecycle)
|
|
376
|
+
* *LocalizedConcept* — a language-specific variant of a concept (designations,
|
|
377
|
+
definition, notes, examples, sources)
|
|
378
|
+
|
|
379
|
+
One IEV Excel row produces one `LocalizedConcept`, which is attached to its
|
|
380
|
+
`ManagedConcept` (identified by `IEVREF`).
|
|
381
|
+
|
|
382
|
+
=== Column-by-Column Mapping
|
|
383
|
+
|
|
384
|
+
The table below maps each of the 19 Excel columns to the Glossarist model.
|
|
385
|
+
|
|
386
|
+
[cols="15h,25h,15h,45h",options="header"]
|
|
387
|
+
|===
|
|
388
|
+
| Excel Column | Glossarist Path | Data Type | Notes
|
|
389
|
+
|
|
390
|
+
| `IEVREF`
|
|
391
|
+
| `ManagedConceptData#id`
|
|
392
|
+
| `String`
|
|
393
|
+
| The concept identifier (e.g. `103-01-02`). Also set as `LocalizedConcept#id` and `ConceptData#id`. Used to group multiple language rows into one `ManagedConcept`. The IEVREF pattern `AAA-BB-CC` is also used to derive domain references (see <<_derived-fields>>).
|
|
394
|
+
|
|
395
|
+
| `LANGUAGE`
|
|
396
|
+
| `ConceptData#language_code`
|
|
397
|
+
| `String` (ISO 639-2/3)
|
|
398
|
+
| Two-character code (e.g. `en`, `fr`) converted to three-character ISO 639 code (e.g. `eng`, `fra`) via `Iev::Iso639Code`. This determines which language slot the localized concept fills.
|
|
399
|
+
|
|
400
|
+
| `TERM`
|
|
401
|
+
| `Designation::Expression#designation`
|
|
402
|
+
| `String`
|
|
403
|
+
| Primary term designation. Creates a `Designation::Expression` with `normative_status: "preferred"`. If the value is `.....` (5 dots, meaning "not available"), it is replaced with `"NA"`. The term text undergoes MathML-to-AsciiMath conversion and cross-reference expansion.
|
|
404
|
+
|
|
405
|
+
| `TERMATTRIBUTE`
|
|
406
|
+
| (multiple designation fields)
|
|
407
|
+
| Composite string
|
|
408
|
+
| Parsed by `TermAttrsParser` into multiple designation attributes. See <<_termattribute-breakdown>> for the full sub-mapping.
|
|
409
|
+
|
|
410
|
+
| `SYNONYM1`
|
|
411
|
+
| `Designation::Expression#designation`
|
|
412
|
+
| `String`
|
|
413
|
+
| Additional designation. Creates a `Designation::Expression`. Some synonyms contain multiple entries separated by `<p>`, `<b>`, `<br>` tags — each is split into a separate designation. `normative_status` comes from `SYNONYM1STATUS`.
|
|
414
|
+
|
|
415
|
+
| `SYNONYM1ATTRIBUTE`
|
|
416
|
+
| (multiple designation fields)
|
|
417
|
+
| Composite string
|
|
418
|
+
| Same parsing as `TERMATTRIBUTE`, applied to the `SYNONYM1` designation. See <<_termattribute-breakdown>>.
|
|
419
|
+
|
|
420
|
+
| `SYNONYM1STATUS`
|
|
421
|
+
| `Designation::Expression#normative_status`
|
|
422
|
+
| `String` or nil
|
|
423
|
+
| Maps to the synonym's normative status. The value is lowercased. Known localized values are mapped: e.g. `"obsoleto"` to `"deprecated"`, Cyrillic variants similarly. When nil, the synonym has no explicit status. Also used to derive `LocalizedConcept#classification` (see <<_derived-fields>>).
|
|
424
|
+
|
|
425
|
+
| `SYNONYM2`
|
|
426
|
+
| `Designation::Expression#designation`
|
|
427
|
+
| `String`
|
|
428
|
+
| Same pattern as `SYNONYM1`.
|
|
429
|
+
|
|
430
|
+
| `SYNONYM2ATTRIBUTE`
|
|
431
|
+
| (multiple designation fields)
|
|
432
|
+
| Composite string
|
|
433
|
+
| Same as `SYNONYM1ATTRIBUTE`.
|
|
434
|
+
|
|
435
|
+
| `SYNONYM2STATUS`
|
|
436
|
+
| `Designation::Expression#normative_status`
|
|
437
|
+
| `String` or nil
|
|
438
|
+
| Same as `SYNONYM1STATUS`.
|
|
439
|
+
|
|
440
|
+
| `SYNONYM3`
|
|
441
|
+
| `Designation::Expression#designation`
|
|
442
|
+
| `String`
|
|
443
|
+
| Same pattern as `SYNONYM1`.
|
|
444
|
+
|
|
445
|
+
| `SYNONYM3ATTRIBUTE`
|
|
446
|
+
| (multiple designation fields)
|
|
447
|
+
| Composite string
|
|
448
|
+
| Same as `SYNONYM1ATTRIBUTE`.
|
|
449
|
+
|
|
450
|
+
| `SYNONYM3STATUS`
|
|
451
|
+
| `Designation::Expression#normative_status`
|
|
452
|
+
| `String` or nil
|
|
453
|
+
| Same as `SYNONYM1STATUS`.
|
|
454
|
+
|
|
455
|
+
| `SYMBOLE`
|
|
456
|
+
| `Designation::Symbol#designation`
|
|
457
|
+
| `String`
|
|
458
|
+
| International math symbol. Creates a `Designation::Symbol` with `international: true`. If this column is empty, no symbol designation is created.
|
|
459
|
+
|
|
460
|
+
| `DEFINITION`
|
|
461
|
+
| `ConceptData#definition`, `ConceptData#examples`, `ConceptData#notes`
|
|
462
|
+
| HTML string
|
|
463
|
+
| The unified definition text is split by `TermBuilder#split_definition` which uses regex to detect EXAMPLE, EXEMPLE, Note N to entry, Note N a l'article, NOTE markers. Each part becomes a `DetailedDefinition` object in the corresponding collection. The content undergoes MathML-to-AsciiMath conversion and cross-reference expansion.
|
|
464
|
+
|
|
465
|
+
| `SOURCE`
|
|
466
|
+
| `ConceptData#sources` (via `ConceptSource`)
|
|
467
|
+
| HTML string
|
|
468
|
+
| Parsed by `SourceParser` into one or more `ConceptSource` objects, each with `type: "authoritative"`. The source string is split after normalization. Each source has: `status` (identical/modified/similar/related/not_equal), `origin` (a `Citation` with `ref`, `locality`, `link`, `original`), and optionally `modification` text. See <<_source-parsing>>.
|
|
469
|
+
|
|
470
|
+
| `PUBLICATIONDATE`
|
|
471
|
+
| `ConceptData#dates` (via `ConceptDate`)
|
|
472
|
+
| `String` (YYYY-MM or YYYY-MM-DD)
|
|
473
|
+
| Converted to a full ISO 8601 datetime. Creates two `ConceptDate` entries: `{type: "accepted", date: ...}` and `{type: "amended", date: ...}`. Also sets `ConceptData#review_date` and `ConceptData#review_decision_date` to the same value.
|
|
474
|
+
|
|
475
|
+
| `STATUS`
|
|
476
|
+
| `LocalizedConcept#entry_status`
|
|
477
|
+
| `String`
|
|
478
|
+
| Only `Standard` is known; it maps to `"valid"`. Lowercased and matched.
|
|
479
|
+
|
|
480
|
+
| `REPLACES`
|
|
481
|
+
| `ConceptData#related` (via `RelatedConcept`)
|
|
482
|
+
| `String`
|
|
483
|
+
| Parsed by `SupersessionParser`. Expected format: `IEVREF:VERSION` (e.g. `881-01-23:1983-01`). Creates a `RelatedConcept` with `type: "supersedes"` and a `Citation` containing `{source: "IEV", id: "...", version: "..."}`.
|
|
484
|
+
|
|
485
|
+
|===
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
[[_termattribute-breakdown]]
|
|
489
|
+
=== TERMATTRIBUTE Sub-Field Mapping
|
|
490
|
+
|
|
491
|
+
The `TERMATTRIBUTE` column is a composite string parsed by `TermAttrsParser`.
|
|
492
|
+
It may contain multiple attributes separated by semicolons. The parser extracts
|
|
493
|
+
them in order: gender, plurality, geographical area, part of speech, usage
|
|
494
|
+
info, prefix.
|
|
495
|
+
|
|
496
|
+
[cols="15h,30h,55h",options="header"]
|
|
497
|
+
|===
|
|
498
|
+
| Parsed Value | Glossarist Path | Notes
|
|
499
|
+
|
|
500
|
+
| `m`, `f`, `n`
|
|
501
|
+
| `GrammarInfo#gender` (via `Designation::Expression#grammar_info`)
|
|
502
|
+
| Grammatical gender. May appear inside brackets: `(m)`, `[f]`.
|
|
503
|
+
|
|
504
|
+
| `pl`
|
|
505
|
+
| `GrammarInfo#number` (via `Designation::Expression#grammar_info`)
|
|
506
|
+
| Plurality. `pl` maps to `"plural"`. If gender was found but not `pl`, defaults to `"singular"`.
|
|
507
|
+
|
|
508
|
+
| `adj`, `noun`, `verb`
|
|
509
|
+
| `GrammarInfo#part_of_speech`
|
|
510
|
+
| Part of speech. Localized variants are mapped: German `Adjektiv` to `adj`, Japanese and Korean variants similarly.
|
|
511
|
+
|
|
512
|
+
| Angle bracket text (ASCII or full-width)
|
|
513
|
+
| `Designation::Expression#usage_info`
|
|
514
|
+
| Usage info / domain indicator extracted from angle brackets. Full-width brackets used in some CJK terms.
|
|
515
|
+
|
|
516
|
+
| Prefix keywords in multiple languages
|
|
517
|
+
| `Designation::Expression#prefix`
|
|
518
|
+
| Marks the designation as a prefix. Keywords include German, French, Japanese, Korean, Chinese, Portuguese variants.
|
|
519
|
+
|
|
520
|
+
| Two-letter uppercase (e.g. `CA`, `US`)
|
|
521
|
+
| `Designation::Base#geographical_area`
|
|
522
|
+
| ISO 3166-1 alpha-2 country code.
|
|
523
|
+
|
|
524
|
+
|===
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
[[_source-parsing]]
|
|
528
|
+
=== SOURCE Column Parsing
|
|
529
|
+
|
|
530
|
+
The `SOURCE` column is the most complex field. It is parsed by `SourceParser`
|
|
531
|
+
into one or more `ConceptSource` objects.
|
|
532
|
+
|
|
533
|
+
==== Relationship Status Detection
|
|
534
|
+
|
|
535
|
+
The parser detects the source relationship type from textual markers:
|
|
536
|
+
|
|
537
|
+
[cols="20h,20h,60h",options="header"]
|
|
538
|
+
|===
|
|
539
|
+
| Marker | Status | Notes
|
|
540
|
+
|
|
541
|
+
| Not-equal sign
|
|
542
|
+
| `not_equal`
|
|
543
|
+
| Definition differs from source.
|
|
544
|
+
|
|
545
|
+
| Approximately-equal sign
|
|
546
|
+
| `similar`
|
|
547
|
+
| Definition is similar to source.
|
|
548
|
+
|
|
549
|
+
| `see`, `voir`
|
|
550
|
+
| `related`
|
|
551
|
+
| Cross-reference to another definition.
|
|
552
|
+
|
|
553
|
+
| `MOD`, `modified`, `modifie` (with accent)
|
|
554
|
+
| `modified`
|
|
555
|
+
| Definition modified from source. Modification text is captured in `ConceptSource#modification`.
|
|
556
|
+
|
|
557
|
+
| (default)
|
|
558
|
+
| `identical`
|
|
559
|
+
| No special marker found.
|
|
560
|
+
|
|
561
|
+
|===
|
|
562
|
+
|
|
563
|
+
==== Source Reference Extraction
|
|
564
|
+
|
|
565
|
+
The parser normalizes and extracts the source reference (e.g. `IEC 60050-121`),
|
|
566
|
+
the clause locality (e.g. `151-12-05`), and optionally resolves a URL via
|
|
567
|
+
Relaton. Reference normalization handles many localized forms: `CEI` to `IEC`,
|
|
568
|
+
`UIT` to `ITU`, `VEI` to `IEV`, etc.
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
[[_derived-fields]]
|
|
572
|
+
=== Derived Fields (Not Directly From Excel Columns)
|
|
573
|
+
|
|
574
|
+
Some Glossarist model fields are *derived* from IEVREF or from combinations
|
|
575
|
+
of columns during export:
|
|
576
|
+
|
|
577
|
+
[cols="25h,25h,50h",options="header"]
|
|
578
|
+
|===
|
|
579
|
+
| Glossarist Path | Source | Notes
|
|
580
|
+
|
|
581
|
+
| `ManagedConceptData#domains`
|
|
582
|
+
| Derived from `IEVREF`
|
|
583
|
+
| The IEVREF pattern `AAA-BB-CC` is split. Creates two `ConceptReference` objects with `ref_type: "domain"` and `source: "urn:iec:std:iec:60050"` (IEC URN per IEC URN specification): `area-AAA` and `section-AAA-BB`. For example, `103-01-02` produces `area-103` + `section-103-01`.
|
|
584
|
+
|
|
585
|
+
| `LocalizedConcept#classification`
|
|
586
|
+
| `SYNONYM1STATUS`
|
|
587
|
+
| Maps localized classification values: Chinese/Russian/Spanish `"admitido"` to `"admitted"`, various forms of `"preferred"` similarly; other values lowercased as-is.
|
|
588
|
+
|
|
589
|
+
| `ConceptData#domain`
|
|
590
|
+
| Derived from `IEVREF`
|
|
591
|
+
| The section-level domain URI (e.g. `section-103-01`), resolved from the `SubjectAreas` data. Falls back to area-level if section not found.
|
|
592
|
+
|
|
593
|
+
| `ConceptData#review_decision_event`
|
|
594
|
+
| Hard-coded
|
|
595
|
+
| Always set to `"published"`.
|
|
596
|
+
|
|
597
|
+
| `ConceptDate {type: "amended"}`
|
|
598
|
+
| `PUBLICATIONDATE`
|
|
599
|
+
| A second date entry with type `"amended"` is created alongside the `"accepted"` date, using the same publication date value.
|
|
600
|
+
|
|
601
|
+
| `ManagedConcept#related`
|
|
602
|
+
| Derived from `IEVREF`
|
|
603
|
+
| Hierarchy relations using `broader`/`narrower`. Regular IEV concepts have `broader → section-AAA-BB`. Section concepts have `broader → area-AAA` (from SubjectAreaConcepts) and `narrower → child concepts` (from Exporter). Area concepts have `narrower → section-AAA-BB`. Each `RelatedConcept` has both `content` (string) and `ref` (Citation with source `"IEV"` and `id`) set, so the glossarist RDF transform emits `skos:broader`/`skos:narrower` triples.
|
|
604
|
+
|
|
605
|
+
|===
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
=== Glossarist Model Fields NOT Populated From IEV Excel
|
|
609
|
+
|
|
610
|
+
The following Glossarist model fields exist in the data model but are *not*
|
|
611
|
+
populated from any IEV Excel column. They remain at their defaults:
|
|
612
|
+
|
|
613
|
+
[cols="25h,60h,15h",options="header"]
|
|
614
|
+
|===
|
|
615
|
+
| Glossarist Field | Description | Default
|
|
616
|
+
|
|
617
|
+
| `ManagedConceptData#uri`
|
|
618
|
+
| External URI for the concept
|
|
619
|
+
| nil
|
|
620
|
+
|
|
621
|
+
| `ManagedConceptData#sources`
|
|
622
|
+
| Managed-concept-level sources (distinct from localized sources)
|
|
623
|
+
| empty
|
|
624
|
+
|
|
625
|
+
| `ManagedConcept#dates`
|
|
626
|
+
| Managed-concept-level dates (distinct from localized dates)
|
|
627
|
+
| empty
|
|
628
|
+
|
|
629
|
+
| `ManagedConcept#status`
|
|
630
|
+
| Concept lifecycle status (draft/valid/retired etc.)
|
|
631
|
+
| nil
|
|
632
|
+
|
|
633
|
+
| `ConceptData#release`
|
|
634
|
+
| Release version tag
|
|
635
|
+
| nil
|
|
636
|
+
|
|
637
|
+
| `ConceptData#lineage_source_similarity`
|
|
638
|
+
| Lineage source similarity percentage
|
|
639
|
+
| nil
|
|
640
|
+
|
|
641
|
+
| `ConceptData#script`
|
|
642
|
+
| ISO 15924 script code
|
|
643
|
+
| nil
|
|
644
|
+
|
|
645
|
+
| `ConceptData#system`
|
|
646
|
+
| ISO 24229 conversion system code
|
|
647
|
+
| nil
|
|
648
|
+
|
|
649
|
+
| `ConceptData#references`
|
|
650
|
+
| ConceptReference collection on localized concept
|
|
651
|
+
| empty
|
|
652
|
+
|
|
653
|
+
| `ConceptData#entry_status`
|
|
654
|
+
| Entry status on ConceptData (duplicate of LocalizedConcept#entry_status)
|
|
655
|
+
| nil
|
|
656
|
+
|
|
657
|
+
| `Concept#non_verb_rep`
|
|
658
|
+
| Non-verbal representations (images, tables, formulas)
|
|
659
|
+
| empty
|
|
660
|
+
|
|
661
|
+
| `Designation::Base#language`
|
|
662
|
+
| Per-designation language override
|
|
663
|
+
| nil
|
|
664
|
+
|
|
665
|
+
| `Designation::Base#script`
|
|
666
|
+
| Per-designation ISO 15924 script
|
|
667
|
+
| nil
|
|
668
|
+
|
|
669
|
+
| `Designation::Base#system`
|
|
670
|
+
| Per-designation ISO 24229 system
|
|
671
|
+
| nil
|
|
672
|
+
|
|
673
|
+
| `Designation::Base#international`
|
|
674
|
+
| International validity flag (set `true` only for SYMBOLE)
|
|
675
|
+
| false
|
|
676
|
+
|
|
677
|
+
| `Designation::Base#absent`
|
|
678
|
+
| Explicitly absent designation flag
|
|
679
|
+
| false
|
|
680
|
+
|
|
681
|
+
| `Designation::Base#pronunciation`
|
|
682
|
+
| Pronunciation entries (IPA, romanization, etc.)
|
|
683
|
+
| empty
|
|
684
|
+
|
|
685
|
+
| `Designation::Base#sources`
|
|
686
|
+
| Per-designation bibliographic sources
|
|
687
|
+
| empty
|
|
688
|
+
|
|
689
|
+
| `Designation::Base#term_type`
|
|
690
|
+
| ISO 12620 term type classification (24 values)
|
|
691
|
+
| nil
|
|
692
|
+
|
|
693
|
+
| `Designation::Base#related`
|
|
694
|
+
| Designation-level relationships (abbreviated_form_for, short_form_for)
|
|
695
|
+
| empty
|
|
696
|
+
|
|
697
|
+
| `Designation::Expression#field_of_application`
|
|
698
|
+
| Subject field / specific use
|
|
699
|
+
| nil
|
|
700
|
+
|
|
701
|
+
| `Designation::Abbreviation#acronym`
|
|
702
|
+
| Acronym type flag
|
|
703
|
+
| false
|
|
704
|
+
|
|
705
|
+
| `Designation::Abbreviation#initialism`
|
|
706
|
+
| Initialism type flag
|
|
707
|
+
| false
|
|
708
|
+
|
|
709
|
+
| `Designation::Abbreviation#truncation`
|
|
710
|
+
| Truncation type flag
|
|
711
|
+
| false
|
|
712
|
+
|
|
713
|
+
| `Designation::LetterSymbol`
|
|
714
|
+
| Letter symbol designation type (subclass of Symbol with `text`)
|
|
715
|
+
| (not used)
|
|
716
|
+
|
|
717
|
+
| `Designation::GraphicalSymbol`
|
|
718
|
+
| Graphical symbol designation type (subclass of Symbol with `text`, `image`)
|
|
719
|
+
| (not used)
|
|
720
|
+
|
|
721
|
+
| `LocalizedConcept#review_type`
|
|
722
|
+
| Review type
|
|
723
|
+
| nil
|
|
724
|
+
|
|
725
|
+
|===
|
|
726
|
+
|
|
727
|
+
|
|
363
728
|
== Copyright and license
|
|
364
729
|
|
|
365
730
|
Data copyright IEC. All others copyright Ribose.
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
== Data Model
|
|
734
|
+
|
|
735
|
+
=== Concept Domains
|
|
736
|
+
|
|
737
|
+
Exported concepts use `domains` (a collection of `ConceptReference` objects)
|
|
738
|
+
to represent the IEV subject area hierarchy. Each concept's domains include
|
|
739
|
+
references to its area (e.g. `area-103`) and section (e.g. `section-103-01`).
|
|
740
|
+
|
|
741
|
+
[source,yaml]
|
|
742
|
+
----
|
|
743
|
+
data:
|
|
744
|
+
identifier: "103-01-01"
|
|
745
|
+
domains:
|
|
746
|
+
- concept_id: area-103
|
|
747
|
+
source: urn:iec:std:iec:60050
|
|
748
|
+
ref_type: domain
|
|
749
|
+
- concept_id: section-103-01
|
|
750
|
+
source: urn:iec:std:iec:60050
|
|
751
|
+
ref_type: domain
|
|
752
|
+
----
|
|
753
|
+
|
|
754
|
+
The `ref_type: domain` distinguishes domain references from other
|
|
755
|
+
`ConceptReference` types (local, urn, designation).
|
|
756
|
+
|
|
757
|
+
=== Subject Area Hierarchy
|
|
758
|
+
|
|
759
|
+
The `SubjectAreaConcepts` module creates area and section concepts that
|
|
760
|
+
form a two-level hierarchy with symmetric `broader`/`narrower` linkages
|
|
761
|
+
at the `ManagedConcept#related` level:
|
|
762
|
+
|
|
763
|
+
* **Area concepts** (e.g. `area-103`) — domain reference to themselves,
|
|
764
|
+
`narrower` relations to their sections
|
|
765
|
+
* **Section concepts** (e.g. `section-103-01`) — domain references to
|
|
766
|
+
both parent area and themselves, `broader` relation to parent area,
|
|
767
|
+
`narrower` relations to child IEV concepts (added by `Exporter`)
|
|
768
|
+
* **Regular IEV concepts** (e.g. `103-01-02`) — `broader` relation to
|
|
769
|
+
their section concept (added by `Exporter`)
|
|
770
|
+
|
|
771
|
+
All hierarchy `RelatedConcept` entries set both `content` (string, for
|
|
772
|
+
YAML serialization) and `ref` (`Citation` with `source: "IEV"` and `id`,
|
|
773
|
+
for RDF transformation via glossarist's gloss ontology).
|
|
774
|
+
|
|
775
|
+
Separately, `domains` (classification via `ConceptReference.domain(...)`)
|
|
776
|
+
and `ConceptData#domain` (per-localization string) remain for
|
|
777
|
+
classification/filtering — distinct from hierarchy.
|