iev 0.4.4 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +7 -4
- data/.github/workflows/release.yml +2 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +4 -2
- data/Gemfile +0 -4
- data/README.adoc +385 -7
- data/exe/iev +1 -1
- data/iev.gemspec +1 -1
- data/lib/iev/cli/command_helper.rb +1 -2
- data/lib/iev/exporter.rb +95 -8
- data/lib/iev/iev_code.rb +80 -0
- data/lib/iev/iso_639_code.rb +1 -1
- data/lib/iev/section.rb +37 -0
- data/lib/iev/source_parser.rb +48 -1
- data/lib/iev/subject_area.rb +46 -0
- data/lib/iev/subject_area_concepts.rb +45 -23
- data/lib/iev/subject_areas.rb +65 -24
- data/lib/iev/supersession_parser.rb +1 -2
- data/lib/iev/version.rb +1 -1
- data/lib/iev.rb +15 -5
- metadata +13 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a539da108086dd1fb862d1d99d6763a3d32015a921ef580018ac9671660b0b68
|
|
4
|
+
data.tar.gz: af0e1813cae179dd69da3a9e8fde6e8d275e34d59949bd43a0642a171988f311
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c93e2479cccfbe56b37b1d1cd604a3b327258abd8c50f9b99e2091abe7e4e714aa8e44c49f9e886d11483ec80e8e2325ce78f0598e8b34fdeb4fe7442b0e0d32
|
|
7
|
+
data.tar.gz: 28c5084f7a75106feee144201b75b642217674438f14a48c216a1069ed1b5d0abe822d137afb6671303bc1a407689ec236c4b2ff5abe2c0626e3a34f1ec35b34
|
data/.github/workflows/rake.yml
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
|
2
|
+
# See https://github.com/metanorma/cimas
|
|
1
3
|
name: rake
|
|
2
4
|
|
|
3
|
-
permissions:
|
|
4
|
-
contents: write
|
|
5
|
-
|
|
6
5
|
on:
|
|
7
6
|
push:
|
|
8
7
|
branches: [ master, main ]
|
|
9
8
|
tags: [ v* ]
|
|
10
9
|
pull_request:
|
|
11
|
-
|
|
10
|
+
|
|
11
|
+
permissions:
|
|
12
|
+
contents: write
|
|
12
13
|
|
|
13
14
|
jobs:
|
|
14
15
|
rake:
|
|
15
16
|
uses: metanorma/ci/.github/workflows/generic-rake.yml@main
|
|
17
|
+
secrets:
|
|
18
|
+
pat_token: ${{ secrets.GLOSSARIST_CI_PAT_TOKEN }}
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
# Auto-generated by Cimas: Do not edit it manually!
|
|
2
2
|
# See https://github.com/metanorma/cimas
|
|
3
3
|
inherit_from:
|
|
4
|
-
- .
|
|
5
|
-
- https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
|
|
4
|
+
- https://raw.githubusercontent.com/riboseinc/oss-guides/main/ci/rubocop.yml
|
|
6
5
|
|
|
7
6
|
# local repo-specific modifications
|
|
8
7
|
# ...
|
|
8
|
+
|
|
9
|
+
AllCops:
|
|
10
|
+
TargetRubyVersion: 3.4
|
data/Gemfile
CHANGED
data/README.adoc
CHANGED
|
@@ -360,6 +360,371 @@ authoritative_source:
|
|
|
360
360
|
----
|
|
361
361
|
|
|
362
362
|
|
|
363
|
+
== Excel-to-Glossarist Column Mapping
|
|
364
|
+
|
|
365
|
+
This section provides a complete mapping from every IEV Excel export column
|
|
366
|
+
to the corresponding Glossarist concept model field. The IEV Excel export has
|
|
367
|
+
19 columns (see <<_structure_of_the_iev_excel_export>>). Each row represents
|
|
368
|
+
one *localized term entry* (one language variant of one concept).
|
|
369
|
+
|
|
370
|
+
=== Glossarist Model Layers
|
|
371
|
+
|
|
372
|
+
The Glossarist model organizes concept data into two layers:
|
|
373
|
+
|
|
374
|
+
* *ManagedConcept* — the concept entry itself (identity, domain classification,
|
|
375
|
+
cross-concept relationships, lifecycle)
|
|
376
|
+
* *LocalizedConcept* — a language-specific variant of a concept (designations,
|
|
377
|
+
definition, notes, examples, sources)
|
|
378
|
+
|
|
379
|
+
One IEV Excel row produces one `LocalizedConcept`, which is attached to its
|
|
380
|
+
`ManagedConcept` (identified by `IEVREF`).
|
|
381
|
+
|
|
382
|
+
=== Column-by-Column Mapping
|
|
383
|
+
|
|
384
|
+
The table below maps each of the 19 Excel columns to the Glossarist model.
|
|
385
|
+
|
|
386
|
+
[cols="15h,25h,15h,45h",options="header"]
|
|
387
|
+
|===
|
|
388
|
+
| Excel Column | Glossarist Path | Data Type | Notes
|
|
389
|
+
|
|
390
|
+
| `IEVREF`
|
|
391
|
+
| `ManagedConceptData#id`
|
|
392
|
+
| `String`
|
|
393
|
+
| The concept identifier (e.g. `103-01-02`). Also set as `LocalizedConcept#id` and `ConceptData#id`. Used to group multiple language rows into one `ManagedConcept`. The IEVREF pattern `AAA-BB-CC` is also used to derive domain references (see <<_derived-fields>>).
|
|
394
|
+
|
|
395
|
+
| `LANGUAGE`
|
|
396
|
+
| `ConceptData#language_code`
|
|
397
|
+
| `String` (ISO 639-2/3)
|
|
398
|
+
| Two-character code (e.g. `en`, `fr`) converted to three-character ISO 639 code (e.g. `eng`, `fra`) via `Iev::Iso639Code`. This determines which language slot the localized concept fills.
|
|
399
|
+
|
|
400
|
+
| `TERM`
|
|
401
|
+
| `Designation::Expression#designation`
|
|
402
|
+
| `String`
|
|
403
|
+
| Primary term designation. Creates a `Designation::Expression` with `normative_status: "preferred"`. If the value is `.....` (5 dots, meaning "not available"), it is replaced with `"NA"`. The term text undergoes MathML-to-AsciiMath conversion and cross-reference expansion.
|
|
404
|
+
|
|
405
|
+
| `TERMATTRIBUTE`
|
|
406
|
+
| (multiple designation fields)
|
|
407
|
+
| Composite string
|
|
408
|
+
| Parsed by `TermAttrsParser` into multiple designation attributes. See <<_termattribute-breakdown>> for the full sub-mapping.
|
|
409
|
+
|
|
410
|
+
| `SYNONYM1`
|
|
411
|
+
| `Designation::Expression#designation`
|
|
412
|
+
| `String`
|
|
413
|
+
| Additional designation. Creates a `Designation::Expression`. Some synonyms contain multiple entries separated by `<p>`, `<b>`, `<br>` tags — each is split into a separate designation. `normative_status` comes from `SYNONYM1STATUS`.
|
|
414
|
+
|
|
415
|
+
| `SYNONYM1ATTRIBUTE`
|
|
416
|
+
| (multiple designation fields)
|
|
417
|
+
| Composite string
|
|
418
|
+
| Same parsing as `TERMATTRIBUTE`, applied to the `SYNONYM1` designation. See <<_termattribute-breakdown>>.
|
|
419
|
+
|
|
420
|
+
| `SYNONYM1STATUS`
|
|
421
|
+
| `Designation::Expression#normative_status`
|
|
422
|
+
| `String` or nil
|
|
423
|
+
| Maps to the synonym's normative status. The value is lowercased. Known localized values are mapped: e.g. `"obsoleto"` to `"deprecated"`, Cyrillic variants similarly. When nil, the synonym has no explicit status. Also used to derive `LocalizedConcept#classification` (see <<_derived-fields>>).
|
|
424
|
+
|
|
425
|
+
| `SYNONYM2`
|
|
426
|
+
| `Designation::Expression#designation`
|
|
427
|
+
| `String`
|
|
428
|
+
| Same pattern as `SYNONYM1`.
|
|
429
|
+
|
|
430
|
+
| `SYNONYM2ATTRIBUTE`
|
|
431
|
+
| (multiple designation fields)
|
|
432
|
+
| Composite string
|
|
433
|
+
| Same as `SYNONYM1ATTRIBUTE`.
|
|
434
|
+
|
|
435
|
+
| `SYNONYM2STATUS`
|
|
436
|
+
| `Designation::Expression#normative_status`
|
|
437
|
+
| `String` or nil
|
|
438
|
+
| Same as `SYNONYM1STATUS`.
|
|
439
|
+
|
|
440
|
+
| `SYNONYM3`
|
|
441
|
+
| `Designation::Expression#designation`
|
|
442
|
+
| `String`
|
|
443
|
+
| Same pattern as `SYNONYM1`.
|
|
444
|
+
|
|
445
|
+
| `SYNONYM3ATTRIBUTE`
|
|
446
|
+
| (multiple designation fields)
|
|
447
|
+
| Composite string
|
|
448
|
+
| Same as `SYNONYM1ATTRIBUTE`.
|
|
449
|
+
|
|
450
|
+
| `SYNONYM3STATUS`
|
|
451
|
+
| `Designation::Expression#normative_status`
|
|
452
|
+
| `String` or nil
|
|
453
|
+
| Same as `SYNONYM1STATUS`.
|
|
454
|
+
|
|
455
|
+
| `SYMBOLE`
|
|
456
|
+
| `Designation::Symbol#designation`
|
|
457
|
+
| `String`
|
|
458
|
+
| International math symbol. Creates a `Designation::Symbol` with `international: true`. If this column is empty, no symbol designation is created.
|
|
459
|
+
|
|
460
|
+
| `DEFINITION`
|
|
461
|
+
| `ConceptData#definition`, `ConceptData#examples`, `ConceptData#notes`
|
|
462
|
+
| HTML string
|
|
463
|
+
| The unified definition text is split by `TermBuilder#split_definition` which uses regex to detect EXAMPLE, EXEMPLE, Note N to entry, Note N a l'article, NOTE markers. Each part becomes a `DetailedDefinition` object in the corresponding collection. The content undergoes MathML-to-AsciiMath conversion and cross-reference expansion.
|
|
464
|
+
|
|
465
|
+
| `SOURCE`
|
|
466
|
+
| `ConceptData#sources` (via `ConceptSource`)
|
|
467
|
+
| HTML string
|
|
468
|
+
| Parsed by `SourceParser` into one or more `ConceptSource` objects, each with `type: "authoritative"`. The source string is split after normalization. Each source has: `status` (identical/modified/similar/related/not_equal), `origin` (a `Citation` with `ref`, `locality`, `link`, `original`), and optionally `modification` text. See <<_source-parsing>>.
|
|
469
|
+
|
|
470
|
+
| `PUBLICATIONDATE`
|
|
471
|
+
| `ConceptData#dates` (via `ConceptDate`)
|
|
472
|
+
| `String` (YYYY-MM or YYYY-MM-DD)
|
|
473
|
+
| Converted to a full ISO 8601 datetime. Creates two `ConceptDate` entries: `{type: "accepted", date: ...}` and `{type: "amended", date: ...}`. Also sets `ConceptData#review_date` and `ConceptData#review_decision_date` to the same value.
|
|
474
|
+
|
|
475
|
+
| `STATUS`
|
|
476
|
+
| `LocalizedConcept#entry_status`
|
|
477
|
+
| `String`
|
|
478
|
+
| Only `Standard` is known; it maps to `"valid"`. Lowercased and matched.
|
|
479
|
+
|
|
480
|
+
| `REPLACES`
|
|
481
|
+
| `ConceptData#related` (via `RelatedConcept`)
|
|
482
|
+
| `String`
|
|
483
|
+
| Parsed by `SupersessionParser`. Expected format: `IEVREF:VERSION` (e.g. `881-01-23:1983-01`). Creates a `RelatedConcept` with `type: "supersedes"` and a `Citation` containing `{source: "IEV", id: "...", version: "..."}`.
|
|
484
|
+
|
|
485
|
+
|===
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
[[_termattribute-breakdown]]
|
|
489
|
+
=== TERMATTRIBUTE Sub-Field Mapping
|
|
490
|
+
|
|
491
|
+
The `TERMATTRIBUTE` column is a composite string parsed by `TermAttrsParser`.
|
|
492
|
+
It may contain multiple attributes separated by semicolons. The parser extracts
|
|
493
|
+
them in order: gender, plurality, geographical area, part of speech, usage
|
|
494
|
+
info, prefix.
|
|
495
|
+
|
|
496
|
+
[cols="15h,30h,55h",options="header"]
|
|
497
|
+
|===
|
|
498
|
+
| Parsed Value | Glossarist Path | Notes
|
|
499
|
+
|
|
500
|
+
| `m`, `f`, `n`
|
|
501
|
+
| `GrammarInfo#gender` (via `Designation::Expression#grammar_info`)
|
|
502
|
+
| Grammatical gender. May appear inside brackets: `(m)`, `[f]`.
|
|
503
|
+
|
|
504
|
+
| `pl`
|
|
505
|
+
| `GrammarInfo#number` (via `Designation::Expression#grammar_info`)
|
|
506
|
+
| Plurality. `pl` maps to `"plural"`. If gender was found but not `pl`, defaults to `"singular"`.
|
|
507
|
+
|
|
508
|
+
| `adj`, `noun`, `verb`
|
|
509
|
+
| `GrammarInfo#part_of_speech`
|
|
510
|
+
| Part of speech. Localized variants are mapped: German `Adjektiv` to `adj`, Japanese and Korean variants similarly.
|
|
511
|
+
|
|
512
|
+
| Angle bracket text (ASCII or full-width)
|
|
513
|
+
| `Designation::Expression#usage_info`
|
|
514
|
+
| Usage info / domain indicator extracted from angle brackets. Full-width brackets used in some CJK terms.
|
|
515
|
+
|
|
516
|
+
| Prefix keywords in multiple languages
|
|
517
|
+
| `Designation::Expression#prefix`
|
|
518
|
+
| Marks the designation as a prefix. Keywords include German, French, Japanese, Korean, Chinese, Portuguese variants.
|
|
519
|
+
|
|
520
|
+
| Two-letter uppercase (e.g. `CA`, `US`)
|
|
521
|
+
| `Designation::Base#geographical_area`
|
|
522
|
+
| ISO 3166-1 alpha-2 country code.
|
|
523
|
+
|
|
524
|
+
|===
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
[[_source-parsing]]
|
|
528
|
+
=== SOURCE Column Parsing
|
|
529
|
+
|
|
530
|
+
The `SOURCE` column is the most complex field. It is parsed by `SourceParser`
|
|
531
|
+
into one or more `ConceptSource` objects.
|
|
532
|
+
|
|
533
|
+
==== Relationship Status Detection
|
|
534
|
+
|
|
535
|
+
The parser detects the source relationship type from textual markers:
|
|
536
|
+
|
|
537
|
+
[cols="20h,20h,60h",options="header"]
|
|
538
|
+
|===
|
|
539
|
+
| Marker | Status | Notes
|
|
540
|
+
|
|
541
|
+
| Not-equal sign
|
|
542
|
+
| `not_equal`
|
|
543
|
+
| Definition differs from source.
|
|
544
|
+
|
|
545
|
+
| Approximately-equal sign
|
|
546
|
+
| `similar`
|
|
547
|
+
| Definition is similar to source.
|
|
548
|
+
|
|
549
|
+
| `see`, `voir`
|
|
550
|
+
| `related`
|
|
551
|
+
| Cross-reference to another definition.
|
|
552
|
+
|
|
553
|
+
| `MOD`, `modified`, `modifie` (with accent)
|
|
554
|
+
| `modified`
|
|
555
|
+
| Definition modified from source. Modification text is captured in `ConceptSource#modification`.
|
|
556
|
+
|
|
557
|
+
| (default)
|
|
558
|
+
| `identical`
|
|
559
|
+
| No special marker found.
|
|
560
|
+
|
|
561
|
+
|===
|
|
562
|
+
|
|
563
|
+
==== Source Reference Extraction
|
|
564
|
+
|
|
565
|
+
The parser normalizes and extracts the source reference (e.g. `IEC 60050-121`),
|
|
566
|
+
the clause locality (e.g. `151-12-05`), and optionally resolves a URL via
|
|
567
|
+
Relaton. Reference normalization handles many localized forms: `CEI` to `IEC`,
|
|
568
|
+
`UIT` to `ITU`, `VEI` to `IEV`, etc.
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
[[_derived-fields]]
|
|
572
|
+
=== Derived Fields (Not Directly From Excel Columns)
|
|
573
|
+
|
|
574
|
+
Some Glossarist model fields are *derived* from IEVREF or from combinations
|
|
575
|
+
of columns during export:
|
|
576
|
+
|
|
577
|
+
[cols="25h,25h,50h",options="header"]
|
|
578
|
+
|===
|
|
579
|
+
| Glossarist Path | Source | Notes
|
|
580
|
+
|
|
581
|
+
| `ManagedConceptData#domains`
|
|
582
|
+
| Derived from `IEVREF`
|
|
583
|
+
| The IEVREF pattern `AAA-BB-CC` is split. Creates two `ConceptReference` objects with `ref_type: "domain"` and `source: "urn:iec:std:iec:60050"` (IEC URN per IEC URN specification): `area-AAA` and `section-AAA-BB`. For example, `103-01-02` produces `area-103` + `section-103-01`.
|
|
584
|
+
|
|
585
|
+
| `LocalizedConcept#classification`
|
|
586
|
+
| `SYNONYM1STATUS`
|
|
587
|
+
| Maps localized classification values: Chinese/Russian/Spanish `"admitido"` to `"admitted"`, various forms of `"preferred"` similarly; other values lowercased as-is.
|
|
588
|
+
|
|
589
|
+
| `ConceptData#domain`
|
|
590
|
+
| Derived from `IEVREF`
|
|
591
|
+
| The section-level domain URI (e.g. `section-103-01`), resolved from the `SubjectAreas` data. Falls back to area-level if section not found.
|
|
592
|
+
|
|
593
|
+
| `ConceptData#review_decision_event`
|
|
594
|
+
| Hard-coded
|
|
595
|
+
| Always set to `"published"`.
|
|
596
|
+
|
|
597
|
+
| `ConceptDate {type: "amended"}`
|
|
598
|
+
| `PUBLICATIONDATE`
|
|
599
|
+
| A second date entry with type `"amended"` is created alongside the `"accepted"` date, using the same publication date value.
|
|
600
|
+
|
|
601
|
+
| `ManagedConcept#related`
|
|
602
|
+
| Derived from `IEVREF`
|
|
603
|
+
| Hierarchy relations using `broader`/`narrower`. Regular IEV concepts have `broader → section-AAA-BB`. Section concepts have `broader → area-AAA` (from SubjectAreaConcepts) and `narrower → child concepts` (from Exporter). Area concepts have `narrower → section-AAA-BB`. Each `RelatedConcept` has both `content` (string) and `ref` (Citation with source `"IEV"` and `id`) set, so the glossarist RDF transform emits `skos:broader`/`skos:narrower` triples.
|
|
604
|
+
|
|
605
|
+
|===
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
=== Glossarist Model Fields NOT Populated From IEV Excel
|
|
609
|
+
|
|
610
|
+
The following Glossarist model fields exist in the data model but are *not*
|
|
611
|
+
populated from any IEV Excel column. They remain at their defaults:
|
|
612
|
+
|
|
613
|
+
[cols="25h,60h,15h",options="header"]
|
|
614
|
+
|===
|
|
615
|
+
| Glossarist Field | Description | Default
|
|
616
|
+
|
|
617
|
+
| `ManagedConceptData#uri`
|
|
618
|
+
| External URI for the concept
|
|
619
|
+
| nil
|
|
620
|
+
|
|
621
|
+
| `ManagedConceptData#sources`
|
|
622
|
+
| Managed-concept-level sources (distinct from localized sources)
|
|
623
|
+
| empty
|
|
624
|
+
|
|
625
|
+
| `ManagedConcept#dates`
|
|
626
|
+
| Managed-concept-level dates (distinct from localized dates)
|
|
627
|
+
| empty
|
|
628
|
+
|
|
629
|
+
| `ManagedConcept#status`
|
|
630
|
+
| Concept lifecycle status (draft/valid/retired etc.)
|
|
631
|
+
| nil
|
|
632
|
+
|
|
633
|
+
| `ConceptData#release`
|
|
634
|
+
| Release version tag
|
|
635
|
+
| nil
|
|
636
|
+
|
|
637
|
+
| `ConceptData#lineage_source_similarity`
|
|
638
|
+
| Lineage source similarity percentage
|
|
639
|
+
| nil
|
|
640
|
+
|
|
641
|
+
| `ConceptData#script`
|
|
642
|
+
| ISO 15924 script code
|
|
643
|
+
| nil
|
|
644
|
+
|
|
645
|
+
| `ConceptData#system`
|
|
646
|
+
| ISO 24229 conversion system code
|
|
647
|
+
| nil
|
|
648
|
+
|
|
649
|
+
| `ConceptData#references`
|
|
650
|
+
| ConceptReference collection on localized concept
|
|
651
|
+
| empty
|
|
652
|
+
|
|
653
|
+
| `ConceptData#entry_status`
|
|
654
|
+
| Entry status on ConceptData (duplicate of LocalizedConcept#entry_status)
|
|
655
|
+
| nil
|
|
656
|
+
|
|
657
|
+
| `Concept#non_verb_rep`
|
|
658
|
+
| Non-verbal representations (images, tables, formulas)
|
|
659
|
+
| empty
|
|
660
|
+
|
|
661
|
+
| `Designation::Base#language`
|
|
662
|
+
| Per-designation language override
|
|
663
|
+
| nil
|
|
664
|
+
|
|
665
|
+
| `Designation::Base#script`
|
|
666
|
+
| Per-designation ISO 15924 script
|
|
667
|
+
| nil
|
|
668
|
+
|
|
669
|
+
| `Designation::Base#system`
|
|
670
|
+
| Per-designation ISO 24229 system
|
|
671
|
+
| nil
|
|
672
|
+
|
|
673
|
+
| `Designation::Base#international`
|
|
674
|
+
| International validity flag (set `true` only for SYMBOLE)
|
|
675
|
+
| false
|
|
676
|
+
|
|
677
|
+
| `Designation::Base#absent`
|
|
678
|
+
| Explicitly absent designation flag
|
|
679
|
+
| false
|
|
680
|
+
|
|
681
|
+
| `Designation::Base#pronunciation`
|
|
682
|
+
| Pronunciation entries (IPA, romanization, etc.)
|
|
683
|
+
| empty
|
|
684
|
+
|
|
685
|
+
| `Designation::Base#sources`
|
|
686
|
+
| Per-designation bibliographic sources
|
|
687
|
+
| empty
|
|
688
|
+
|
|
689
|
+
| `Designation::Base#term_type`
|
|
690
|
+
| ISO 12620 term type classification (24 values)
|
|
691
|
+
| nil
|
|
692
|
+
|
|
693
|
+
| `Designation::Base#related`
|
|
694
|
+
| Designation-level relationships (abbreviated_form_for, short_form_for)
|
|
695
|
+
| empty
|
|
696
|
+
|
|
697
|
+
| `Designation::Expression#field_of_application`
|
|
698
|
+
| Subject field / specific use
|
|
699
|
+
| nil
|
|
700
|
+
|
|
701
|
+
| `Designation::Abbreviation#acronym`
|
|
702
|
+
| Acronym type flag
|
|
703
|
+
| false
|
|
704
|
+
|
|
705
|
+
| `Designation::Abbreviation#initialism`
|
|
706
|
+
| Initialism type flag
|
|
707
|
+
| false
|
|
708
|
+
|
|
709
|
+
| `Designation::Abbreviation#truncation`
|
|
710
|
+
| Truncation type flag
|
|
711
|
+
| false
|
|
712
|
+
|
|
713
|
+
| `Designation::LetterSymbol`
|
|
714
|
+
| Letter symbol designation type (subclass of Symbol with `text`)
|
|
715
|
+
| (not used)
|
|
716
|
+
|
|
717
|
+
| `Designation::GraphicalSymbol`
|
|
718
|
+
| Graphical symbol designation type (subclass of Symbol with `text`, `image`)
|
|
719
|
+
| (not used)
|
|
720
|
+
|
|
721
|
+
| `LocalizedConcept#review_type`
|
|
722
|
+
| Review type
|
|
723
|
+
| nil
|
|
724
|
+
|
|
725
|
+
|===
|
|
726
|
+
|
|
727
|
+
|
|
363
728
|
== Copyright and license
|
|
364
729
|
|
|
365
730
|
Data copyright IEC. All others copyright Ribose.
|
|
@@ -379,8 +744,10 @@ data:
|
|
|
379
744
|
identifier: "103-01-01"
|
|
380
745
|
domains:
|
|
381
746
|
- concept_id: area-103
|
|
747
|
+
source: urn:iec:std:iec:60050
|
|
382
748
|
ref_type: domain
|
|
383
749
|
- concept_id: section-103-01
|
|
750
|
+
source: urn:iec:std:iec:60050
|
|
384
751
|
ref_type: domain
|
|
385
752
|
----
|
|
386
753
|
|
|
@@ -390,10 +757,21 @@ The `ref_type: domain` distinguishes domain references from other
|
|
|
390
757
|
=== Subject Area Hierarchy
|
|
391
758
|
|
|
392
759
|
The `SubjectAreaConcepts` module creates area and section concepts that
|
|
393
|
-
form a two-level hierarchy
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
and `
|
|
760
|
+
form a two-level hierarchy with symmetric `broader`/`narrower` linkages
|
|
761
|
+
at the `ManagedConcept#related` level:
|
|
762
|
+
|
|
763
|
+
* **Area concepts** (e.g. `area-103`) — domain reference to themselves,
|
|
764
|
+
`narrower` relations to their sections
|
|
765
|
+
* **Section concepts** (e.g. `section-103-01`) — domain references to
|
|
766
|
+
both parent area and themselves, `broader` relation to parent area,
|
|
767
|
+
`narrower` relations to child IEV concepts (added by `Exporter`)
|
|
768
|
+
* **Regular IEV concepts** (e.g. `103-01-02`) — `broader` relation to
|
|
769
|
+
their section concept (added by `Exporter`)
|
|
770
|
+
|
|
771
|
+
All hierarchy `RelatedConcept` entries set both `content` (string, for
|
|
772
|
+
YAML serialization) and `ref` (`Citation` with `source: "IEV"` and `id`,
|
|
773
|
+
for RDF transformation via glossarist's gloss ontology).
|
|
774
|
+
|
|
775
|
+
Separately, `domains` (classification via `ConceptReference.domain(...)`)
|
|
776
|
+
and `ConceptData#domain` (per-localization string) remain for
|
|
777
|
+
classification/filtering — distinct from hierarchy.
|
data/exe/iev
CHANGED
data/iev.gemspec
CHANGED
|
@@ -22,7 +22,7 @@ Gem::Specification.new do |spec|
|
|
|
22
22
|
spec.required_ruby_version = Gem::Requirement.new(">= 3.2.0")
|
|
23
23
|
|
|
24
24
|
spec.add_dependency "creek", "~> 2.6"
|
|
25
|
-
spec.add_dependency "glossarist", ">= 2.
|
|
25
|
+
spec.add_dependency "glossarist", "~> 2.6", ">= 2.6.7"
|
|
26
26
|
spec.add_dependency "ferrum", "~> 0.15"
|
|
27
27
|
spec.add_dependency "nokogiri", "~> 1.19"
|
|
28
28
|
spec.add_dependency "plurimath"
|
|
@@ -111,8 +111,7 @@ module Iev
|
|
|
111
111
|
|
|
112
112
|
definition = entry["definition"]
|
|
113
113
|
if definition
|
|
114
|
-
|
|
115
|
-
cd.definition = [Glossarist::DetailedDefinition.new(content: content)]
|
|
114
|
+
cd.definition = [Glossarist::DetailedDefinition.new(content: definition)]
|
|
116
115
|
end
|
|
117
116
|
|
|
118
117
|
l10n = Glossarist::LocalizedConcept.new
|
data/lib/iev/exporter.rb
CHANGED
|
@@ -55,6 +55,7 @@ module Iev
|
|
|
55
55
|
dataset = load_dataset
|
|
56
56
|
collection = build_collection(dataset)
|
|
57
57
|
add_subject_area_concepts(collection) if @include_areas
|
|
58
|
+
build_section_narrower_relations(collection) if @include_areas
|
|
58
59
|
save_collection(collection)
|
|
59
60
|
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
|
|
60
61
|
|
|
@@ -85,7 +86,7 @@ module Iev
|
|
|
85
86
|
|
|
86
87
|
exts = (XLSX_EXTENSIONS + SQLITE_EXTENSIONS).join(", ")
|
|
87
88
|
raise ArgumentError,
|
|
88
|
-
|
|
89
|
+
"Unsupported format: #{input_path.extname}. Supported: #{exts}"
|
|
89
90
|
end
|
|
90
91
|
|
|
91
92
|
def input_format
|
|
@@ -141,11 +142,16 @@ module Iev
|
|
|
141
142
|
|
|
142
143
|
concept = concept_index[term.id] ||= begin
|
|
143
144
|
c = Glossarist::ManagedConcept.new(data: { "id" => term.id })
|
|
145
|
+
c.uuid = term.id
|
|
144
146
|
c.data.domains = domain_references_for(term.id)
|
|
147
|
+
add_section_broader(c, term.id)
|
|
145
148
|
collection.store(c)
|
|
146
149
|
c
|
|
147
150
|
end
|
|
148
151
|
concept.add_l10n(term)
|
|
152
|
+
|
|
153
|
+
promote_supersession(concept, term)
|
|
154
|
+
set_managed_status(concept, term)
|
|
149
155
|
end
|
|
150
156
|
|
|
151
157
|
collection
|
|
@@ -160,21 +166,102 @@ module Iev
|
|
|
160
166
|
def save_collection(collection)
|
|
161
167
|
concepts_dir = output_dir.expand_path.join("concepts")
|
|
162
168
|
FileUtils.mkdir_p(concepts_dir)
|
|
163
|
-
collection.
|
|
169
|
+
collection.save_grouped_concepts_to_files(concepts_dir.to_s)
|
|
164
170
|
end
|
|
165
171
|
|
|
166
172
|
def localized_count(collection)
|
|
167
173
|
collection.sum { |c| c.localized_concepts.count }
|
|
168
174
|
end
|
|
169
175
|
|
|
176
|
+
IEV_SOURCE = "urn:iec:std:iec:60050"
|
|
177
|
+
|
|
170
178
|
def domain_references_for(ievref)
|
|
171
|
-
|
|
172
|
-
|
|
179
|
+
code = IevCode.new(ievref.to_s)
|
|
180
|
+
refs = []
|
|
181
|
+
if code.area_code
|
|
182
|
+
refs << Glossarist::ConceptReference.new(
|
|
183
|
+
concept_id: code.area_uri,
|
|
184
|
+
source: IEV_SOURCE,
|
|
185
|
+
ref_type: "domain",
|
|
186
|
+
)
|
|
187
|
+
end
|
|
188
|
+
if code.section_code
|
|
189
|
+
refs << Glossarist::ConceptReference.new(
|
|
190
|
+
concept_id: code.section_uri,
|
|
191
|
+
source: IEV_SOURCE,
|
|
192
|
+
ref_type: "domain",
|
|
193
|
+
)
|
|
194
|
+
end
|
|
195
|
+
refs
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def add_section_broader(concept, ievref)
|
|
199
|
+
code = IevCode.new(ievref.to_s)
|
|
200
|
+
return unless code.section_uri
|
|
201
|
+
|
|
202
|
+
concept.related ||= []
|
|
203
|
+
return if concept.related.any? do |r|
|
|
204
|
+
r.type == "broader" && r.ref&.id == code.section_uri
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
concept.related << Glossarist::RelatedConcept.new(
|
|
208
|
+
type: "broader",
|
|
209
|
+
content: code.section_uri,
|
|
210
|
+
ref: Glossarist::ConceptRef.new(source: "IEV", id: code.section_uri),
|
|
211
|
+
)
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def build_section_narrower_relations(collection)
|
|
215
|
+
mc_index = collection.each_with_object({}) do |c, h|
|
|
216
|
+
h[c.data&.id] = c if c.data&.id
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
section_children = {}
|
|
220
|
+
mc_index.each_key do |concept_id|
|
|
221
|
+
code = IevCode.new(concept_id)
|
|
222
|
+
next unless code.section_uri
|
|
223
|
+
|
|
224
|
+
(section_children[code.section_uri] ||= []) << concept_id
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
section_children.each do |section_uri, child_ids|
|
|
228
|
+
section_mc = mc_index[section_uri]
|
|
229
|
+
next unless section_mc
|
|
230
|
+
|
|
231
|
+
narrower = child_ids.sort.map do |child_id|
|
|
232
|
+
Glossarist::RelatedConcept.new(
|
|
233
|
+
type: "narrower",
|
|
234
|
+
content: child_id,
|
|
235
|
+
ref: Glossarist::ConceptRef.new(source: "IEV", id: child_id),
|
|
236
|
+
)
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
section_mc.related ||= []
|
|
240
|
+
section_mc.related.concat(narrower)
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Promote supersedes relations from localized ConceptData to managed level.
|
|
245
|
+
# Supersession is language-independent (REPLACES column is per-concept).
|
|
246
|
+
def promote_supersession(concept, term)
|
|
247
|
+
related = term.data&.related
|
|
248
|
+
return unless related&.any?
|
|
249
|
+
|
|
250
|
+
concept.related ||= []
|
|
251
|
+
related.each do |r|
|
|
252
|
+
next if concept.related.any? { |er| er.type == r.type && er.ref&.id == r.ref&.id }
|
|
253
|
+
|
|
254
|
+
concept.related << r
|
|
255
|
+
end
|
|
256
|
+
term.data.related = nil
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Derive managed concept status from the localization's entry_status.
|
|
260
|
+
def set_managed_status(concept, term)
|
|
261
|
+
return if concept.status
|
|
173
262
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
SubjectAreas.section_uri(parts[0..1].join("-")),
|
|
177
|
-
].map { |id| Glossarist::ConceptReference.domain(id) }
|
|
263
|
+
status = term.entry_status
|
|
264
|
+
concept.status = status if status && !status.empty?
|
|
178
265
|
end
|
|
179
266
|
end
|
|
180
267
|
end
|
data/lib/iev/iev_code.rb
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Iev
|
|
4
|
+
# Immutable value object that decomposes an IEV concept code
|
|
5
|
+
# into its structural parts: area code, section code, and number.
|
|
6
|
+
#
|
|
7
|
+
# The IEV code format is AAA-BB-CC where:
|
|
8
|
+
# AAA = area code (e.g. "103")
|
|
9
|
+
# BB = section sub-code (e.g. "01")
|
|
10
|
+
# CC = concept number (e.g. "02")
|
|
11
|
+
#
|
|
12
|
+
# @example Full concept code
|
|
13
|
+
# code = Iev::IevCode.new("103-01-02")
|
|
14
|
+
# code.area_code #=> "103"
|
|
15
|
+
# code.section_code #=> "103-01"
|
|
16
|
+
# code.number #=> "02"
|
|
17
|
+
# code.area_uri #=> "area-103"
|
|
18
|
+
# code.section_uri #=> "section-103-01"
|
|
19
|
+
#
|
|
20
|
+
# @example Section code (no concept number)
|
|
21
|
+
# code = Iev::IevCode.new("103-01")
|
|
22
|
+
# code.area_code #=> "103"
|
|
23
|
+
# code.section_code #=> "103-01"
|
|
24
|
+
# code.number #=> nil
|
|
25
|
+
# code.section_uri #=> "section-103-01"
|
|
26
|
+
#
|
|
27
|
+
class IevCode
|
|
28
|
+
include Comparable
|
|
29
|
+
|
|
30
|
+
attr_reader :raw, :area_code, :section_code, :number
|
|
31
|
+
|
|
32
|
+
# @param code [#to_s] IEV reference, e.g. "103-01-02"
|
|
33
|
+
def initialize(code)
|
|
34
|
+
@raw = code.to_s
|
|
35
|
+
parts = @raw.split("-")
|
|
36
|
+
@area_code = parts[0]
|
|
37
|
+
@section_code = parts.length >= 2 ? "#{parts[0]}-#{parts[1]}" : nil
|
|
38
|
+
@number = parts.length >= 3 ? parts[2] : nil
|
|
39
|
+
freeze
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def area_uri
|
|
43
|
+
"area-#{area_code}"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def section_uri
|
|
47
|
+
"section-#{section_code}" if section_code
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def to_s
|
|
51
|
+
@raw
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def to_str
|
|
55
|
+
@raw
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def ==(other)
|
|
59
|
+
other.is_a?(self.class) && raw == other.raw
|
|
60
|
+
end
|
|
61
|
+
alias_method :eql?, :==
|
|
62
|
+
|
|
63
|
+
def hash
|
|
64
|
+
raw.hash
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def <=>(other)
|
|
68
|
+
to_s <=> other.to_s
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Safe constructor that returns nil for codes that don't parse.
|
|
72
|
+
# @param code [#to_s]
|
|
73
|
+
# @return [IevCode, nil]
|
|
74
|
+
def self.parse(code)
|
|
75
|
+
new(code)
|
|
76
|
+
rescue ArgumentError
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
data/lib/iev/iso_639_code.rb
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
module Iev
|
|
7
7
|
# @todo This needs to be rewritten.
|
|
8
8
|
class Iso639Code
|
|
9
|
-
COUNTRY_CODES = YAML.
|
|
9
|
+
COUNTRY_CODES = YAML.safe_load(IO.read(File.join(__dir__, "iso_639_2.yaml")), permitted_classes: [Symbol]).freeze
|
|
10
10
|
# rubocop:disable Style/MutableConstant
|
|
11
11
|
THREE_CHAR_MEMO = {} # Memoization cache, must be mutable
|
|
12
12
|
# rubocop:enable Style/MutableConstant
|
data/lib/iev/section.rb
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Iev
|
|
4
|
+
# Immutable value object representing an IEV section (e.g. "103-01").
|
|
5
|
+
#
|
|
6
|
+
# A section belongs to exactly one area, identified by +area_code+.
|
|
7
|
+
class Section
|
|
8
|
+
attr_reader :code, :title, :area_code
|
|
9
|
+
|
|
10
|
+
# @param code [#to_s] section code, e.g. "103-01"
|
|
11
|
+
# @param title [#to_s] section title, e.g. "General concepts on functions"
|
|
12
|
+
# @param area_code [#to_s] parent area code, e.g. "103"
|
|
13
|
+
def initialize(code:, title:, area_code:)
|
|
14
|
+
@code = code.to_s
|
|
15
|
+
@title = title.to_s
|
|
16
|
+
@area_code = area_code.to_s
|
|
17
|
+
freeze
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def uri
|
|
21
|
+
"section-#{code}"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def to_h
|
|
25
|
+
{ "code" => code, "title" => title }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def ==(other)
|
|
29
|
+
other.is_a?(self.class) && code == other.code
|
|
30
|
+
end
|
|
31
|
+
alias_method :eql?, :==
|
|
32
|
+
|
|
33
|
+
def hash
|
|
34
|
+
code.hash
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
data/lib/iev/source_parser.rb
CHANGED
|
@@ -79,10 +79,11 @@ module Iev
|
|
|
79
79
|
relationship = extract_source_relationship(raw_ref)
|
|
80
80
|
clean_ref = normalize_ref_string(raw_ref)
|
|
81
81
|
source_ref = extract_source_ref(clean_ref)
|
|
82
|
+
ref_source, ref_id = split_ref(source_ref)
|
|
82
83
|
clause = extract_source_clause(clean_ref)
|
|
83
84
|
|
|
84
85
|
origin = Glossarist::Citation.new(
|
|
85
|
-
ref:
|
|
86
|
+
ref: Glossarist::Citation::Ref.new(source: ref_source, id: ref_id),
|
|
86
87
|
locality: build_locality(clause),
|
|
87
88
|
link: obtain_source_link(source_ref),
|
|
88
89
|
original: Iev::Converter.mathml_to_asciimath(
|
|
@@ -351,6 +352,52 @@ module Iev
|
|
|
351
352
|
)
|
|
352
353
|
end
|
|
353
354
|
|
|
355
|
+
# Splits a normalized bibliographic reference into [source, id] for
|
|
356
|
+
# structured Citation::Ref construction. The full string is still
|
|
357
|
+
# passed to Relaton for link resolution — only the Citation::Ref
|
|
358
|
+
# model receives the split form.
|
|
359
|
+
#
|
|
360
|
+
# "IEC 62302:2007" → ["IEC", "62302:2007"]
|
|
361
|
+
# "ISO/IEC 2382:2015" → ["ISO/IEC", "2382:2015"]
|
|
362
|
+
# "ISO/TS 14812:2022" → ["ISO/TS", "14812:2022"]
|
|
363
|
+
# "IEC CISPR 16-1:2003" → ["IEC CISPR", "16-1:2003"]
|
|
364
|
+
# "ITU-T Recommendation F.791 (11/2015)" → ["ITU-T Recommendation", "F.791 (11/2015)"]
|
|
365
|
+
# "IEV" → ["IEV", nil]
|
|
366
|
+
def split_ref(full_ref)
|
|
367
|
+
case full_ref
|
|
368
|
+
when /\A(ISO\/IEC\/IEEE)\s+(.+)/
|
|
369
|
+
[$1, $2]
|
|
370
|
+
when /\A(ISO\/IEC\s+Guide)\s+(.+)/
|
|
371
|
+
[$1, $2]
|
|
372
|
+
when /\A(ISO\/IEC)\s+(.+)/
|
|
373
|
+
[$1, $2]
|
|
374
|
+
when /\A(IEC\/IEEE)\s+(.+)/
|
|
375
|
+
[$1, $2]
|
|
376
|
+
when %r{\A((?:ISO|IEC)/(?:PAS|TR|TS))\s+(.+)}
|
|
377
|
+
[$1, $2]
|
|
378
|
+
when /\A(IEC\s+CISPR)\s+(.+)/
|
|
379
|
+
[$1, $2]
|
|
380
|
+
when /\A(ITU-T\s+Recommendation)\s+(.+)/
|
|
381
|
+
[$1, $2]
|
|
382
|
+
when /\A(ITU-R\s+Recommendation)\s+(.+)/
|
|
383
|
+
[$1, $2]
|
|
384
|
+
when /\A(ITU-R)\s+(.+)/
|
|
385
|
+
[$1, $2]
|
|
386
|
+
when /\A((?:ISO|IEC)\s+Guide)\s+(.+)/
|
|
387
|
+
[$1, $2]
|
|
388
|
+
when /\A(ISO|IEC|IAEA)\s+(.+)/
|
|
389
|
+
[$1, $2]
|
|
390
|
+
when /\AIEV\z/
|
|
391
|
+
["IEV", nil]
|
|
392
|
+
when /\A(JCGM)\s+(VIM)\z/
|
|
393
|
+
[$1, $2]
|
|
394
|
+
when /\ABBIPM/
|
|
395
|
+
["BIPM", "SI Brochure"]
|
|
396
|
+
else
|
|
397
|
+
[full_ref, nil]
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
|
|
354
401
|
# Uses Relaton to obtain link for given source ref.
|
|
355
402
|
def obtain_source_link(ref)
|
|
356
403
|
return nil unless self.class.relaton_enabled
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Iev
|
|
4
|
+
# Immutable value object representing an IEV subject area (e.g. "102").
|
|
5
|
+
#
|
|
6
|
+
# A subject area is the aggregate root for its sections.
|
|
7
|
+
# Navigation: area → sections (direct), section → area (via registry).
|
|
8
|
+
class SubjectArea
|
|
9
|
+
attr_reader :code, :title, :sections
|
|
10
|
+
|
|
11
|
+
# @param code [#to_s] area code, e.g. "103"
|
|
12
|
+
# @param title [#to_s] area title, e.g. "Mathematics - Functions"
|
|
13
|
+
# @param sections [Array<Iev::Section>] child sections
|
|
14
|
+
def initialize(code:, title:, sections: [])
|
|
15
|
+
@code = code.to_s
|
|
16
|
+
@title = title.to_s
|
|
17
|
+
@sections = sections
|
|
18
|
+
freeze
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def uri
|
|
22
|
+
"area-#{code}"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def section(section_code)
|
|
26
|
+
sections.find { |s| s.code == section_code.to_s }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def to_h
|
|
30
|
+
{
|
|
31
|
+
"code" => code,
|
|
32
|
+
"title" => title,
|
|
33
|
+
"sections" => sections.map(&:to_h),
|
|
34
|
+
}
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def ==(other)
|
|
38
|
+
other.is_a?(self.class) && code == other.code
|
|
39
|
+
end
|
|
40
|
+
alias_method :eql?, :==
|
|
41
|
+
|
|
42
|
+
def hash
|
|
43
|
+
code.hash
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -4,15 +4,24 @@ module Iev
|
|
|
4
4
|
# Creates ManagedConcept entries for the IEV subject area hierarchy.
|
|
5
5
|
#
|
|
6
6
|
# The hierarchy has two levels:
|
|
7
|
-
# - Area (e.g., "102" = "Mathematics - General concepts
|
|
7
|
+
# - Area (e.g., "102" = "Mathematics - General concepts")
|
|
8
8
|
# - Section (e.g., "102-01" = "Sets and operations")
|
|
9
9
|
#
|
|
10
|
-
# Linking:
|
|
11
|
-
# - Each
|
|
12
|
-
# - Each
|
|
13
|
-
# - Each section
|
|
14
|
-
# - Each
|
|
10
|
+
# Linking (all at ManagedConcept#related level):
|
|
11
|
+
# - Each area has "narrower" relations to its sections
|
|
12
|
+
# - Each section has "broader" relation to parent area
|
|
13
|
+
# - Each section gets "narrower" to child concepts (added by Exporter)
|
|
14
|
+
# - Each regular IEV concept gets "broader" to its section
|
|
15
|
+
# (added by Exporter)
|
|
16
|
+
#
|
|
17
|
+
# Classification (separate from hierarchy):
|
|
18
|
+
# - Each concept's ManagedConceptData#domains includes area and
|
|
19
|
+
# section ConceptReferences
|
|
20
|
+
# - Each concept's ConceptData#domain references its section URI
|
|
21
|
+
# - Each section concept's ConceptData#domain references parent area
|
|
15
22
|
module SubjectAreaConcepts
|
|
23
|
+
IEV_SOURCE = "urn:iec:std:iec:60050"
|
|
24
|
+
|
|
16
25
|
class << self
|
|
17
26
|
# Build all area and section concepts and add them to the collection.
|
|
18
27
|
#
|
|
@@ -23,7 +32,7 @@ module Iev
|
|
|
23
32
|
area_mc = build_area_concept(area)
|
|
24
33
|
collection.store(area_mc)
|
|
25
34
|
|
|
26
|
-
|
|
35
|
+
area.sections.each do |section|
|
|
27
36
|
section_mc = build_section_concept(section, area)
|
|
28
37
|
collection.store(section_mc)
|
|
29
38
|
end
|
|
@@ -32,42 +41,53 @@ module Iev
|
|
|
32
41
|
|
|
33
42
|
private
|
|
34
43
|
|
|
44
|
+
def domain_ref(concept_id)
|
|
45
|
+
Glossarist::ConceptReference.new(
|
|
46
|
+
concept_id: concept_id,
|
|
47
|
+
source: IEV_SOURCE,
|
|
48
|
+
ref_type: "domain",
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
|
|
35
52
|
def build_area_concept(area)
|
|
36
|
-
id =
|
|
53
|
+
id = area.uri
|
|
37
54
|
|
|
38
55
|
mc = Glossarist::ManagedConcept.new(
|
|
39
56
|
data: Glossarist::ManagedConceptData.new(
|
|
40
57
|
id: id,
|
|
41
|
-
domains: [
|
|
58
|
+
domains: [domain_ref(id)],
|
|
42
59
|
),
|
|
43
60
|
)
|
|
61
|
+
mc.uuid = id
|
|
44
62
|
|
|
45
|
-
mc.add_localization(build_localization(id, area
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
mc.related = narrower unless narrower.empty?
|
|
63
|
+
mc.add_localization(build_localization(id, area.title, "eng"))
|
|
64
|
+
mc.related = area.sections.map { |s| build_narrower_relation(s.uri) }
|
|
65
|
+
mc.related = nil if mc.related.empty?
|
|
49
66
|
|
|
50
67
|
mc
|
|
51
68
|
end
|
|
52
69
|
|
|
53
70
|
def build_section_concept(section, area)
|
|
54
|
-
id =
|
|
71
|
+
id = section.uri
|
|
55
72
|
|
|
56
73
|
mc = Glossarist::ManagedConcept.new(
|
|
57
74
|
data: Glossarist::ManagedConceptData.new(
|
|
58
75
|
id: id,
|
|
59
76
|
domains: [
|
|
60
|
-
|
|
61
|
-
|
|
77
|
+
domain_ref(area.uri),
|
|
78
|
+
domain_ref(id),
|
|
62
79
|
],
|
|
63
80
|
),
|
|
64
81
|
)
|
|
82
|
+
mc.uuid = id
|
|
65
83
|
|
|
66
|
-
cd = build_concept_data(id, section
|
|
67
|
-
cd.domain =
|
|
68
|
-
cd.related = [build_broader_ref(area["code"])]
|
|
84
|
+
cd = build_concept_data(id, section.title, "eng")
|
|
85
|
+
cd.domain = area.uri
|
|
69
86
|
|
|
70
87
|
mc.add_localization(build_localization_from_data(id, cd))
|
|
88
|
+
|
|
89
|
+
mc.related = [build_broader_relation(area.uri)]
|
|
90
|
+
|
|
71
91
|
mc
|
|
72
92
|
end
|
|
73
93
|
|
|
@@ -105,17 +125,19 @@ module Iev
|
|
|
105
125
|
l10n
|
|
106
126
|
end
|
|
107
127
|
|
|
108
|
-
def
|
|
128
|
+
def build_broader_relation(target_uri)
|
|
109
129
|
Glossarist::RelatedConcept.new(
|
|
110
130
|
type: "broader",
|
|
111
|
-
content:
|
|
131
|
+
content: target_uri,
|
|
132
|
+
ref: Glossarist::ConceptRef.new(source: "IEV", id: target_uri),
|
|
112
133
|
)
|
|
113
134
|
end
|
|
114
135
|
|
|
115
|
-
def
|
|
136
|
+
def build_narrower_relation(target_uri)
|
|
116
137
|
Glossarist::RelatedConcept.new(
|
|
117
138
|
type: "narrower",
|
|
118
|
-
content:
|
|
139
|
+
content: target_uri,
|
|
140
|
+
ref: Glossarist::ConceptRef.new(source: "IEV", id: target_uri),
|
|
119
141
|
)
|
|
120
142
|
end
|
|
121
143
|
end
|
data/lib/iev/subject_areas.rb
CHANGED
|
@@ -39,49 +39,59 @@ module Iev
|
|
|
39
39
|
"section-#{code}"
|
|
40
40
|
end
|
|
41
41
|
|
|
42
|
-
# --- Query API (
|
|
42
|
+
# --- Query API (returns typed objects) ---
|
|
43
43
|
|
|
44
44
|
# Return all subject areas with their sections.
|
|
45
|
-
# @return [Array<
|
|
45
|
+
# @return [Array<SubjectArea>]
|
|
46
46
|
def all
|
|
47
|
-
|
|
47
|
+
@typed_areas ||= raw_data["areas"].map { |h| build_area(h) }
|
|
48
48
|
end
|
|
49
49
|
|
|
50
|
-
# Find a single subject area by its numeric code.
|
|
50
|
+
# Find a single subject area by its numeric code. O(1) indexed.
|
|
51
51
|
# @param code [String, Integer] e.g. "102" or 102
|
|
52
|
-
# @return [
|
|
52
|
+
# @return [SubjectArea, nil]
|
|
53
53
|
def find_area(code)
|
|
54
|
-
|
|
54
|
+
area_index[code.to_s]
|
|
55
55
|
end
|
|
56
56
|
|
|
57
57
|
# Return all sections for a given area code.
|
|
58
58
|
# @param code [String, Integer] area code, e.g. "102"
|
|
59
|
-
# @return [Array<
|
|
59
|
+
# @return [Array<Section>]
|
|
60
60
|
def sections_for(code)
|
|
61
|
-
|
|
62
|
-
area ? area["sections"] : []
|
|
61
|
+
find_area(code)&.sections || []
|
|
63
62
|
end
|
|
64
63
|
|
|
65
|
-
# Find a single section by its section code.
|
|
64
|
+
# Find a single section by its section code. O(1) indexed.
|
|
66
65
|
# @param section_code [String] e.g. "102-01"
|
|
67
|
-
# @return [
|
|
66
|
+
# @return [Section, nil]
|
|
68
67
|
def find_section(section_code)
|
|
69
|
-
|
|
70
|
-
all.each do |area|
|
|
71
|
-
found = area["sections"]&.find { |s| s["code"] == sc }
|
|
72
|
-
return found if found
|
|
73
|
-
end
|
|
74
|
-
nil
|
|
68
|
+
section_index[section_code.to_s]
|
|
75
69
|
end
|
|
76
70
|
|
|
77
71
|
# Return the parent area for a given section code.
|
|
78
72
|
# @param section_code [String] e.g. "102-01"
|
|
79
|
-
# @return [
|
|
73
|
+
# @return [SubjectArea, nil]
|
|
80
74
|
def area_for_section(section_code)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
75
|
+
sec = find_section(section_code)
|
|
76
|
+
sec ? find_area(sec.area_code) : nil
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# --- Navigation from IEV reference ---
|
|
80
|
+
|
|
81
|
+
# Find the subject area for any IEV reference.
|
|
82
|
+
# @param ievref [String] e.g. "103-01-02"
|
|
83
|
+
# @return [SubjectArea, nil]
|
|
84
|
+
def area_for(ievref)
|
|
85
|
+
code = IevCode.new(ievref)
|
|
86
|
+
find_area(code.area_code)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Find the section for any IEV reference.
|
|
90
|
+
# @param ievref [String] e.g. "103-01-02"
|
|
91
|
+
# @return [Section, nil]
|
|
92
|
+
def section_for(ievref)
|
|
93
|
+
code = IevCode.new(ievref)
|
|
94
|
+
code.section_code ? find_section(code.section_code) : nil
|
|
85
95
|
end
|
|
86
96
|
|
|
87
97
|
# --- Fetching (network, writes to bundled data file) ---
|
|
@@ -164,10 +174,31 @@ module Iev
|
|
|
164
174
|
sections.uniq { |s| s["code"] }
|
|
165
175
|
end
|
|
166
176
|
|
|
177
|
+
# Clear cached typed objects (useful after fetch updates raw data).
|
|
178
|
+
def reload!
|
|
179
|
+
@typed_areas = nil
|
|
180
|
+
@area_index = nil
|
|
181
|
+
@section_index = nil
|
|
182
|
+
@raw_data = nil
|
|
183
|
+
end
|
|
184
|
+
|
|
167
185
|
private
|
|
168
186
|
|
|
169
|
-
def
|
|
170
|
-
|
|
187
|
+
def build_area(hash)
|
|
188
|
+
area_code = hash["code"]
|
|
189
|
+
sections = (hash["sections"] || []).map do |s|
|
|
190
|
+
Section.new(code: s["code"], title: s["title"], area_code: area_code)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
SubjectArea.new(
|
|
194
|
+
code: area_code,
|
|
195
|
+
title: hash["title"],
|
|
196
|
+
sections: sections,
|
|
197
|
+
)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def raw_data
|
|
201
|
+
@raw_data ||= begin
|
|
171
202
|
path = File.exist?(DATA_FILE) ? DATA_FILE : nil
|
|
172
203
|
if path
|
|
173
204
|
YAML.safe_load(File.read(path, encoding: "utf-8")) || { "areas" => [] }
|
|
@@ -177,6 +208,16 @@ module Iev
|
|
|
177
208
|
end
|
|
178
209
|
end
|
|
179
210
|
|
|
211
|
+
def area_index
|
|
212
|
+
@area_index ||= all.each_with_object({}) { |a, h| h[a.code] = a }
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def section_index
|
|
216
|
+
@section_index ||= all.each_with_object({}) do |area, h|
|
|
217
|
+
area.sections.each { |s| h[s.code] = s }
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
180
221
|
def complete?(data)
|
|
181
222
|
areas = data["areas"]
|
|
182
223
|
return false unless areas&.length&.>= 99
|
|
@@ -54,10 +54,9 @@ module Iev
|
|
|
54
54
|
def relation_from_match(match_data)
|
|
55
55
|
Glossarist::RelatedConcept.new(
|
|
56
56
|
type: "supersedes",
|
|
57
|
-
ref: Glossarist::
|
|
57
|
+
ref: Glossarist::ConceptRef.new(
|
|
58
58
|
source: "IEV",
|
|
59
59
|
id: match_data[:ref],
|
|
60
|
-
version: match_data[:version],
|
|
61
60
|
),
|
|
62
61
|
)
|
|
63
62
|
end
|
data/lib/iev/version.rb
CHANGED
data/lib/iev.rb
CHANGED
|
@@ -29,11 +29,14 @@ module Iev
|
|
|
29
29
|
autoload :DataSource, "iev/data_source"
|
|
30
30
|
autoload :DbWriter, "iev/db_writer"
|
|
31
31
|
autoload :Exporter, "iev/exporter"
|
|
32
|
+
autoload :IevCode, "iev/iev_code"
|
|
32
33
|
autoload :Iso639Code, "iev/iso_639_code"
|
|
33
34
|
autoload :Profiler, "iev/profiler"
|
|
34
35
|
autoload :RelatonDb, "iev/relaton_db"
|
|
35
36
|
autoload :Scraper, "iev/scraper"
|
|
37
|
+
autoload :Section, "iev/section"
|
|
36
38
|
autoload :SourceParser, "iev/source_parser"
|
|
39
|
+
autoload :SubjectArea, "iev/subject_area"
|
|
37
40
|
autoload :SubjectAreas, "iev/subject_areas"
|
|
38
41
|
autoload :SubjectAreaConcepts, "iev/subject_area_concepts"
|
|
39
42
|
autoload :SupersessionParser, "iev/supersession_parser"
|
|
@@ -84,36 +87,43 @@ module Iev
|
|
|
84
87
|
end
|
|
85
88
|
|
|
86
89
|
# Return all IEV subject areas with their sections (from bundled data).
|
|
87
|
-
# @return [Array<
|
|
90
|
+
# @return [Array<SubjectArea>]
|
|
88
91
|
def self.subject_areas
|
|
89
92
|
SubjectAreas.all
|
|
90
93
|
end
|
|
91
94
|
|
|
92
95
|
# Find a subject area by code.
|
|
93
96
|
# @param code [String, Integer] e.g. "102"
|
|
94
|
-
# @return [
|
|
97
|
+
# @return [SubjectArea, nil]
|
|
95
98
|
def self.find_subject_area(code)
|
|
96
99
|
SubjectAreas.find_area(code)
|
|
97
100
|
end
|
|
98
101
|
|
|
99
102
|
# Find a section by its section code.
|
|
100
103
|
# @param section_code [String] e.g. "102-01"
|
|
101
|
-
# @return [
|
|
104
|
+
# @return [Section, nil]
|
|
102
105
|
def self.find_section(section_code)
|
|
103
106
|
SubjectAreas.find_section(section_code)
|
|
104
107
|
end
|
|
105
108
|
|
|
106
109
|
# Return sections for a given area code.
|
|
107
110
|
# @param code [String, Integer] e.g. "102"
|
|
108
|
-
# @return [Array<
|
|
111
|
+
# @return [Array<Section>]
|
|
109
112
|
def self.sections_for(code)
|
|
110
113
|
SubjectAreas.sections_for(code)
|
|
111
114
|
end
|
|
112
115
|
|
|
113
116
|
# Return the parent subject area for a given section code.
|
|
114
117
|
# @param section_code [String] e.g. "102-01"
|
|
115
|
-
# @return [
|
|
118
|
+
# @return [SubjectArea, nil]
|
|
116
119
|
def self.area_for_section(section_code)
|
|
117
120
|
SubjectAreas.area_for_section(section_code)
|
|
118
121
|
end
|
|
122
|
+
|
|
123
|
+
# Parse an IEV code into its structural components.
|
|
124
|
+
# @param code [String] e.g. "103-01-02"
|
|
125
|
+
# @return [IevCode, nil] nil if the code is blank
|
|
126
|
+
def self.parse_code(code)
|
|
127
|
+
IevCode.parse(code)
|
|
128
|
+
end
|
|
119
129
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: iev
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-25 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: creek
|
|
@@ -28,16 +28,22 @@ dependencies:
|
|
|
28
28
|
name: glossarist
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '2.6'
|
|
31
34
|
- - ">="
|
|
32
35
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: 2.
|
|
36
|
+
version: 2.6.7
|
|
34
37
|
type: :runtime
|
|
35
38
|
prerelease: false
|
|
36
39
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
40
|
requirements:
|
|
41
|
+
- - "~>"
|
|
42
|
+
- !ruby/object:Gem::Version
|
|
43
|
+
version: '2.6'
|
|
38
44
|
- - ">="
|
|
39
45
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: 2.
|
|
46
|
+
version: 2.6.7
|
|
41
47
|
- !ruby/object:Gem::Dependency
|
|
42
48
|
name: ferrum
|
|
43
49
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -206,6 +212,7 @@ files:
|
|
|
206
212
|
- lib/iev/data_source.rb
|
|
207
213
|
- lib/iev/db_writer.rb
|
|
208
214
|
- lib/iev/exporter.rb
|
|
215
|
+
- lib/iev/iev_code.rb
|
|
209
216
|
- lib/iev/iso_639_2.yaml
|
|
210
217
|
- lib/iev/iso_639_code.rb
|
|
211
218
|
- lib/iev/profiler.rb
|
|
@@ -213,7 +220,9 @@ files:
|
|
|
213
220
|
- lib/iev/scraper.rb
|
|
214
221
|
- lib/iev/scraper/browser.rb
|
|
215
222
|
- lib/iev/scraper/page_parser.rb
|
|
223
|
+
- lib/iev/section.rb
|
|
216
224
|
- lib/iev/source_parser.rb
|
|
225
|
+
- lib/iev/subject_area.rb
|
|
217
226
|
- lib/iev/subject_area_concepts.rb
|
|
218
227
|
- lib/iev/subject_areas.rb
|
|
219
228
|
- lib/iev/supersession_parser.rb
|