glossarist 2.6.5 → 2.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/release.yml +1 -4
- data/.rubocop_todo.yml +53 -2
- data/CLAUDE.md +27 -2
- data/README.adoc +532 -56
- data/config.yml +68 -1
- data/glossarist.gemspec +2 -0
- data/lib/glossarist/citation.rb +26 -123
- data/lib/glossarist/cli/compare_command.rb +106 -0
- data/lib/glossarist/cli/export_command.rb +11 -14
- data/lib/glossarist/cli/validate_command.rb +111 -20
- data/lib/glossarist/cli.rb +18 -0
- data/lib/glossarist/collections/bibliography_collection.rb +4 -2
- data/lib/glossarist/collections/localization_collection.rb +2 -0
- data/lib/glossarist/comparison_result.rb +35 -0
- data/lib/glossarist/concept.rb +1 -1
- data/lib/glossarist/concept_collector.rb +44 -0
- data/lib/glossarist/concept_comparator.rb +72 -0
- data/lib/glossarist/concept_data.rb +20 -0
- data/lib/glossarist/concept_diff.rb +15 -0
- data/lib/glossarist/concept_document.rb +11 -0
- data/lib/glossarist/concept_manager.rb +19 -5
- data/lib/glossarist/concept_ref.rb +13 -0
- data/lib/glossarist/concept_reference.rb +12 -19
- data/lib/glossarist/concept_validator.rb +6 -1
- data/lib/glossarist/context_configuration.rb +90 -0
- data/lib/glossarist/dataset_validator.rb +8 -4
- data/lib/glossarist/designation/abbreviation.rb +0 -2
- data/lib/glossarist/designation/base.rb +21 -1
- data/lib/glossarist/designation/expression.rb +3 -0
- data/lib/glossarist/designation/letter_symbol.rb +0 -4
- data/lib/glossarist/designation/prefix.rb +17 -0
- data/lib/glossarist/designation/suffix.rb +17 -0
- data/lib/glossarist/designation/symbol.rb +0 -2
- data/lib/glossarist/gcr_metadata.rb +7 -14
- data/lib/glossarist/gcr_package.rb +35 -23
- data/lib/glossarist/gcr_validator.rb +38 -17
- data/lib/glossarist/glossary_definition.rb +5 -0
- data/lib/glossarist/localized_concept.rb +8 -0
- data/lib/glossarist/managed_concept.rb +39 -6
- data/lib/glossarist/managed_concept_data.rb +22 -2
- data/lib/glossarist/non_verb_rep.rb +21 -6
- data/lib/glossarist/pronunciation.rb +32 -0
- data/lib/glossarist/rdf/ext/jsonld_transform_ext.rb +208 -0
- data/lib/glossarist/rdf/ext/mapping_ext.rb +37 -0
- data/lib/glossarist/rdf/ext/mapping_rule_ext.rb +27 -0
- data/lib/glossarist/rdf/ext/member_rule_ext.rb +34 -0
- data/lib/glossarist/rdf/ext/turtle_transform_ext.rb +222 -0
- data/lib/glossarist/rdf/ext.rb +39 -0
- data/lib/glossarist/rdf/gloss_citation.rb +36 -0
- data/lib/glossarist/rdf/gloss_concept.rb +58 -0
- data/lib/glossarist/rdf/gloss_concept_date.rb +24 -0
- data/lib/glossarist/rdf/gloss_concept_reference.rb +29 -0
- data/lib/glossarist/rdf/gloss_concept_source.rb +37 -0
- data/lib/glossarist/rdf/gloss_designation.rb +146 -0
- data/lib/glossarist/rdf/gloss_detailed_definition.rb +24 -0
- data/lib/glossarist/rdf/gloss_grammar_info.rb +57 -0
- data/lib/glossarist/rdf/gloss_locality.rb +25 -0
- data/lib/glossarist/rdf/gloss_localized_concept.rb +67 -0
- data/lib/glossarist/rdf/gloss_non_verbal_rep.rb +31 -0
- data/lib/glossarist/rdf/gloss_pronunciation.rb +32 -0
- data/lib/glossarist/rdf/gloss_reference.rb +55 -0
- data/lib/glossarist/rdf/namespaces/glossarist_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/iso_thes_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/owl_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/prov_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/rdf_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/skosxl_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces.rb +8 -2
- data/lib/glossarist/rdf/relationships.rb +19 -0
- data/lib/glossarist/rdf/v3/configuration.rb +15 -0
- data/lib/glossarist/rdf/v3.rb +79 -0
- data/lib/glossarist/rdf.rb +22 -2
- data/lib/glossarist/reference_extractor.rb +15 -24
- data/lib/glossarist/reference_resolver.rb +3 -3
- data/lib/glossarist/related_concept.rb +2 -10
- data/lib/glossarist/schema_migration.rb +39 -0
- data/lib/glossarist/sts/term_mapper.rb +2 -2
- data/lib/glossarist/transforms/concept_to_gloss_transform.rb +355 -0
- data/lib/glossarist/transforms.rb +2 -2
- data/lib/glossarist/urn_resolver.rb +13 -1
- data/lib/glossarist/v1/concept.rb +18 -11
- data/lib/glossarist/v2/citation.rb +36 -0
- data/lib/glossarist/v2/concept_data.rb +46 -0
- data/lib/glossarist/v2/concept_document.rb +18 -0
- data/lib/glossarist/v2/concept_ref.rb +8 -0
- data/lib/glossarist/v2/concept_source.rb +16 -0
- data/lib/glossarist/v2/configuration.rb +13 -0
- data/lib/glossarist/v2/detailed_definition.rb +14 -0
- data/lib/glossarist/v2/localized_concept.rb +9 -0
- data/lib/glossarist/v2/managed_concept.rb +25 -0
- data/lib/glossarist/v2/managed_concept_data.rb +49 -0
- data/lib/glossarist/v2/related_concept.rb +15 -0
- data/lib/glossarist/v2.rb +28 -0
- data/lib/glossarist/v3/bibliography_entry.rb +19 -0
- data/lib/glossarist/v3/bibliography_file.rb +27 -0
- data/lib/glossarist/v3/citation.rb +30 -0
- data/lib/glossarist/v3/concept_data.rb +46 -0
- data/lib/glossarist/v3/concept_document.rb +18 -0
- data/lib/glossarist/v3/concept_ref.rb +8 -0
- data/lib/glossarist/v3/concept_source.rb +16 -0
- data/lib/glossarist/v3/configuration.rb +13 -0
- data/lib/glossarist/v3/detailed_definition.rb +14 -0
- data/lib/glossarist/v3/image_entry.rb +21 -0
- data/lib/glossarist/v3/image_file.rb +31 -0
- data/lib/glossarist/v3/localized_concept.rb +9 -0
- data/lib/glossarist/v3/managed_concept.rb +26 -0
- data/lib/glossarist/v3/managed_concept_data.rb +34 -0
- data/lib/glossarist/v3/related_concept.rb +15 -0
- data/lib/glossarist/v3.rb +36 -0
- data/lib/glossarist/validation/asset_index.rb +4 -3
- data/lib/glossarist/validation/bibliography_index.rb +61 -30
- data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +2 -15
- data/lib/glossarist/validation/rules/authoritative_source_rule.rb +2 -15
- data/lib/glossarist/validation/rules/base.rb +5 -0
- data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +2 -3
- data/lib/glossarist/validation/rules/citation_completeness_rule.rb +5 -27
- data/lib/glossarist/validation/rules/dataset_context.rb +8 -3
- data/lib/glossarist/validation/rules/date_validity_rule.rb +1 -1
- data/lib/glossarist/validation/rules/designation_status_rule.rb +0 -1
- data/lib/glossarist/validation/rules/designation_type_rule.rb +1 -5
- data/lib/glossarist/validation/rules/domain_ref_rule.rb +37 -0
- data/lib/glossarist/validation/rules/domain_target_rule.rb +56 -0
- data/lib/glossarist/validation/rules/gcr_context.rb +12 -13
- data/lib/glossarist/validation/rules/image_reference_rule.rb +2 -17
- data/lib/glossarist/validation/rules/locality_completeness_rule.rb +58 -0
- data/lib/glossarist/validation/rules/localization_consistency_rule.rb +72 -0
- data/lib/glossarist/validation/rules/localization_presence_rule.rb +1 -1
- data/lib/glossarist/validation/rules/model_validity_rule.rb +71 -0
- data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +1 -13
- data/lib/glossarist/validation/rules/orphaned_images_rule.rb +16 -11
- data/lib/glossarist/validation/rules/ref_shape_rule.rb +68 -0
- data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +1 -3
- data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +1 -3
- data/lib/glossarist/validation/rules/related_concept_target_rule.rb +64 -0
- data/lib/glossarist/validation/rules/schema_version_rule.rb +41 -0
- data/lib/glossarist/validation/rules/source_type_rule.rb +1 -15
- data/lib/glossarist/validation/rules/source_urn_format_rule.rb +65 -0
- data/lib/glossarist/validation/rules/uuid_format_rule.rb +33 -0
- data/lib/glossarist/validation/rules.rb +10 -43
- data/lib/glossarist/validation/validation_issue.rb +14 -11
- data/lib/glossarist/validation_result.rb +12 -22
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +10 -0
- data/memory/project-status.md +43 -0
- data/scripts/migrate_dataset.rb +180 -0
- data/scripts/migrate_isotc204_to_v3.rb +134 -0
- data/scripts/migrate_isotc211_to_v3.rb +153 -0
- data/scripts/migrate_osgeo_to_v3.rb +155 -0
- data/scripts/upgrade_dataset_to_v3.rb +47 -0
- metadata +112 -6
- data/TODO.integration/01-gcr-package-cli.md +0 -180
- data/lib/glossarist/rdf/skos_concept.rb +0 -43
- data/lib/glossarist/rdf/skos_vocabulary.rb +0 -25
- data/lib/glossarist/transforms/concept_to_skos_transform.rb +0 -131
|
@@ -1,55 +1,45 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Glossarist
|
|
4
|
-
class ValidationResult
|
|
5
|
-
|
|
4
|
+
class ValidationResult < Lutaml::Model::Serializable
|
|
5
|
+
attribute :issues, Validation::ValidationIssue, collection: true,
|
|
6
|
+
initialize_empty: true
|
|
6
7
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
errors.each { |e| add_error(e) }
|
|
10
|
-
warnings.each { |w| add_warning(w) }
|
|
11
|
-
issues.each { |i| add_issue(i) }
|
|
8
|
+
key_value do
|
|
9
|
+
map :issues, to: :issues
|
|
12
10
|
end
|
|
13
11
|
|
|
14
12
|
def valid?
|
|
15
|
-
|
|
13
|
+
issues.none?(&:error?)
|
|
16
14
|
end
|
|
17
15
|
|
|
18
16
|
def errors
|
|
19
|
-
|
|
17
|
+
issues.select(&:error?).map(&:to_s)
|
|
20
18
|
end
|
|
21
19
|
|
|
22
20
|
def warnings
|
|
23
|
-
|
|
21
|
+
issues.select(&:warning?).map(&:to_s)
|
|
24
22
|
end
|
|
25
23
|
|
|
26
24
|
def add_error(message)
|
|
27
|
-
|
|
25
|
+
issues << Validation::ValidationIssue.new(
|
|
28
26
|
severity: "error", message: message,
|
|
29
27
|
)
|
|
30
28
|
end
|
|
31
29
|
|
|
32
30
|
def add_warning(message)
|
|
33
|
-
|
|
31
|
+
issues << Validation::ValidationIssue.new(
|
|
34
32
|
severity: "warning", message: message,
|
|
35
33
|
)
|
|
36
34
|
end
|
|
37
35
|
|
|
38
36
|
def add_issue(issue)
|
|
39
|
-
|
|
37
|
+
issues << issue
|
|
40
38
|
end
|
|
41
39
|
|
|
42
40
|
def merge(other)
|
|
43
|
-
other.issues.each { |i|
|
|
41
|
+
other.issues.each { |i| issues << i }
|
|
44
42
|
self
|
|
45
43
|
end
|
|
46
|
-
|
|
47
|
-
def to_h
|
|
48
|
-
{
|
|
49
|
-
"valid" => valid?,
|
|
50
|
-
"errors" => errors,
|
|
51
|
-
"warnings" => warnings,
|
|
52
|
-
}
|
|
53
|
-
end
|
|
54
44
|
end
|
|
55
45
|
end
|
data/lib/glossarist/version.rb
CHANGED
data/lib/glossarist.rb
CHANGED
|
@@ -18,6 +18,7 @@ module Glossarist
|
|
|
18
18
|
autoload :Collections, "glossarist/collections"
|
|
19
19
|
autoload :Concept, "glossarist/concept"
|
|
20
20
|
autoload :ConceptData, "glossarist/concept_data"
|
|
21
|
+
autoload :ConceptRef, "glossarist/concept_ref"
|
|
21
22
|
autoload :ConceptReference, "glossarist/concept_reference"
|
|
22
23
|
autoload :ReferenceExtractor, "glossarist/reference_extractor"
|
|
23
24
|
autoload :ReferenceResolver, "glossarist/reference_resolver"
|
|
@@ -28,6 +29,9 @@ module Glossarist
|
|
|
28
29
|
autoload :ConceptSource, "glossarist/concept_source"
|
|
29
30
|
autoload :ConceptValidator, "glossarist/concept_validator"
|
|
30
31
|
autoload :ConceptCollector, "glossarist/concept_collector"
|
|
32
|
+
autoload :ConceptComparator, "glossarist/concept_comparator"
|
|
33
|
+
autoload :ComparisonResult, "glossarist/comparison_result"
|
|
34
|
+
autoload :ConceptDiff, "glossarist/concept_diff"
|
|
31
35
|
autoload :ConceptDocument, "glossarist/concept_document"
|
|
32
36
|
autoload :ConceptEnricher, "glossarist/concept_enricher"
|
|
33
37
|
autoload :Config, "glossarist/config"
|
|
@@ -52,6 +56,7 @@ module Glossarist
|
|
|
52
56
|
autoload :ManagedConceptCollection, "glossarist/managed_concept_collection"
|
|
53
57
|
autoload :ManagedConceptData, "glossarist/managed_concept_data"
|
|
54
58
|
autoload :NonVerbRep, "glossarist/non_verb_rep"
|
|
59
|
+
autoload :Pronunciation, "glossarist/pronunciation"
|
|
55
60
|
autoload :RelatedConcept, "glossarist/related_concept"
|
|
56
61
|
autoload :Rdf, "glossarist/rdf"
|
|
57
62
|
autoload :Sts, "glossarist/sts"
|
|
@@ -63,6 +68,8 @@ module Glossarist
|
|
|
63
68
|
autoload :RegisterData, "glossarist/register_data"
|
|
64
69
|
autoload :ValidationResult, "glossarist/validation_result"
|
|
65
70
|
autoload :V1, "glossarist/v1"
|
|
71
|
+
autoload :V2, "glossarist/v2"
|
|
72
|
+
autoload :V3, "glossarist/v3"
|
|
66
73
|
end
|
|
67
74
|
|
|
68
75
|
require_relative "glossarist/version"
|
|
@@ -73,6 +80,9 @@ module Glossarist
|
|
|
73
80
|
LANG_CODES = %w[eng ara deu fra spa ita jpn kor pol por srp swe zho rus fin
|
|
74
81
|
dan nld msa nob nno].freeze
|
|
75
82
|
|
|
83
|
+
SCHEMA_VERSION = "3"
|
|
84
|
+
V3_SCHEMA_VERSION = "3"
|
|
85
|
+
|
|
76
86
|
def self.configure
|
|
77
87
|
config = Glossarist::Config.instance
|
|
78
88
|
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: V2/V3 Namespace Architecture
|
|
3
|
+
description: lutaml-model mapping inheritance, model register, V2/V3 namespace design
|
|
4
|
+
type: project
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## lutaml-model mapping inheritance
|
|
8
|
+
|
|
9
|
+
lutaml-model `key_value` mappings ARE inherited by subclasses — a subclass's `key_value` block merges on top of the parent's mappings, it does NOT replace them. A subclass cannot "unmap" a parent mapping by omitting it.
|
|
10
|
+
|
|
11
|
+
**Why:** Discovered during V2/V3 namespace implementation. V2::ManagedConcept originally tried to define its own `key_value` block without `related`/`schema_version`/`sources`, but the parent ManagedConcept's mappings for those fields were still active.
|
|
12
|
+
|
|
13
|
+
**How to apply:** For V2/V3 versioning, only V2::ManagedConceptData needs its own `key_value` (to add `related` mapping inside data). V2::ManagedConcept only overrides the `data` attribute to point to V2::ManagedConceptData — it does NOT need its own `key_value`. V2 is only for deserialization; serialization always uses v3 format via inherited base class mappings.
|
|
14
|
+
|
|
15
|
+
## Model register (lutaml-model GlobalContext)
|
|
16
|
+
|
|
17
|
+
Follows the plurimath/mml pattern. Each version has a Configuration module with a unique CONTEXT_ID that extends Glossarist::ContextConfiguration. Models are registered via `Configuration.register_model(ClassName, id: :symbol)`. Type resolution uses `Configuration.resolve_model(:symbol)` which delegates to `Lutaml::Model::GlobalContext.resolve_type`.
|
|
18
|
+
|
|
19
|
+
**Why:** Enables context-based type resolution instead of hardcoded case/when. Each version's registry is isolated — V2::Configuration resolves `:managed_concept` to V2::ManagedConcept, V3::Configuration resolves to V3::ManagedConcept.
|
|
20
|
+
|
|
21
|
+
**How to apply:** `ConceptDocument.for_version(version)` looks up the version's Configuration from a VERSION_CONFIGURATION hash and calls `resolve_model(:concept_document)`. Adding a new version requires only a new Configuration module and register_model calls.
|
|
22
|
+
|
|
23
|
+
## V2 → V3 model-driven migration
|
|
24
|
+
|
|
25
|
+
V2→V3 migration is fully model-driven: V2::ConceptDocument deserializes v2 YAML (data.related → model), then `SchemaMigration.migrate_concept` promotes `data.related` to `concept.related` and sets schema_version to "3". No hash-based transformation needed — `Steps::V2ToV3` was deleted.
|
|
26
|
+
|
|
27
|
+
## RDF / JSON-LD / Turtle: version-agnostic
|
|
28
|
+
|
|
29
|
+
RDF view classes (GlossConcept, GlossLocalizedConcept, etc.) are NOT versioned. They operate on the domain model (ManagedConcept), not on YAML serialization format. schema_version is a YAML metadata field with no SKOS/gloss ontology equivalent. ConceptToGlossTransform takes a domain-model ManagedConcept and produces view-model instances — it is format-agnostic.
|
|
30
|
+
|
|
31
|
+
**Why:** Whether a concept was loaded from v2 or v3 YAML, the RDF output is identical. The domain model normalizes away format differences.
|
|
32
|
+
|
|
33
|
+
**How to apply:** No RDF model changes needed for v2/v3. If a future v4 changes the domain model semantics (not just serialization), RDF view classes would need updating.
|
|
34
|
+
|
|
35
|
+
## README documentation (completed 2026-05-20)
|
|
36
|
+
|
|
37
|
+
The README.adoc now includes a comprehensive "Schema Versioning (v2 / v3)" section covering:
|
|
38
|
+
- V2 vs V3 format differences with YAML examples
|
|
39
|
+
- Namespace architecture diagram
|
|
40
|
+
- Model register (GlobalContext) usage
|
|
41
|
+
- Loading & migration flow diagram
|
|
42
|
+
- Usage examples for v2, v3, migration, and RDF export
|
|
43
|
+
- "Adding a new schema version" guide (Open/Closed Principle)
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Migrate glossarist datasets to current schema format.
|
|
4
|
+
#
|
|
5
|
+
# Usage:
|
|
6
|
+
# bundle exec ruby scripts/migrate_dataset.rb SOURCE_DIR OUTPUT_DIR [--add-iev-domains]
|
|
7
|
+
#
|
|
8
|
+
# --add-iev-domains: Add domain ConceptReference objects for IEV-style identifiers
|
|
9
|
+
# (e.g. "426-24-74" → area-426, section-426-24)
|
|
10
|
+
|
|
11
|
+
require "glossarist"
|
|
12
|
+
require "fileutils"
|
|
13
|
+
|
|
14
|
+
source_dir = ARGV[0]
|
|
15
|
+
output_dir = ARGV[1]
|
|
16
|
+
add_iev_domains = ARGV.include?("--add-iev-domains")
|
|
17
|
+
|
|
18
|
+
unless source_dir && output_dir
|
|
19
|
+
abort "Usage: bundle exec ruby scripts/migrate_dataset.rb SOURCE_DIR OUTPUT_DIR [--add-iev-domains]"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
source_dir = File.expand_path(source_dir)
|
|
23
|
+
output_dir = File.expand_path(output_dir)
|
|
24
|
+
|
|
25
|
+
unless File.directory?(source_dir)
|
|
26
|
+
abort "Error: #{source_dir} is not a directory"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def add_subject_area_concepts(collection)
|
|
30
|
+
areas = {}
|
|
31
|
+
sections = {}
|
|
32
|
+
|
|
33
|
+
collection.each do |concept|
|
|
34
|
+
next unless concept.data.domains
|
|
35
|
+
|
|
36
|
+
concept.data.domains.each do |ref|
|
|
37
|
+
next unless ref.is_a?(Glossarist::ConceptReference) && ref.concept_id
|
|
38
|
+
|
|
39
|
+
id = ref.concept_id
|
|
40
|
+
if id.start_with?("area-")
|
|
41
|
+
areas[id] = true
|
|
42
|
+
elsif id.start_with?("section-")
|
|
43
|
+
sections[id] = true
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
existing_ids = collection.map { |c| c.data.id }.to_set
|
|
49
|
+
|
|
50
|
+
areas.each_key do |area_id|
|
|
51
|
+
next if existing_ids.include?(area_id)
|
|
52
|
+
|
|
53
|
+
mc = Glossarist::ManagedConcept.new(
|
|
54
|
+
data: Glossarist::ManagedConceptData.new(
|
|
55
|
+
id: area_id,
|
|
56
|
+
domains: [Glossarist::ConceptReference.domain(area_id)],
|
|
57
|
+
),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
code = area_id.sub("area-", "")
|
|
61
|
+
narrower = sections.keys.select { |s| s.start_with?("section-#{code}-") }
|
|
62
|
+
mc.related = narrower.map { |s| Glossarist::RelatedConcept.new(type: "narrower", content: s) }
|
|
63
|
+
|
|
64
|
+
l10n = build_domain_localization(area_id, code, "eng")
|
|
65
|
+
mc.add_l10n(l10n)
|
|
66
|
+
|
|
67
|
+
collection.store(mc)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
sections.each_key do |section_id|
|
|
71
|
+
next if existing_ids.include?(section_id)
|
|
72
|
+
|
|
73
|
+
parts = section_id.sub("section-", "").split("-")
|
|
74
|
+
area_id = "area-#{parts[0]}"
|
|
75
|
+
|
|
76
|
+
mc = Glossarist::ManagedConcept.new(
|
|
77
|
+
data: Glossarist::ManagedConceptData.new(
|
|
78
|
+
id: section_id,
|
|
79
|
+
domains: [
|
|
80
|
+
Glossarist::ConceptReference.domain(area_id),
|
|
81
|
+
Glossarist::ConceptReference.domain(section_id),
|
|
82
|
+
],
|
|
83
|
+
),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
mc.related = [Glossarist::RelatedConcept.new(type: "broader", content: area_id)]
|
|
87
|
+
|
|
88
|
+
section_code = parts.length > 1 ? parts[0..1].join("-") : parts[0]
|
|
89
|
+
l10n = build_domain_localization(section_id, section_code, "eng")
|
|
90
|
+
l10n.data.domain = area_id
|
|
91
|
+
mc.add_l10n(l10n)
|
|
92
|
+
|
|
93
|
+
collection.store(mc)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def build_domain_localization(id, label, lang_code)
|
|
98
|
+
cd = Glossarist::ConceptData.new(
|
|
99
|
+
id: id,
|
|
100
|
+
language_code: lang_code,
|
|
101
|
+
terms: [
|
|
102
|
+
Glossarist::Designation::Expression.new(
|
|
103
|
+
type: "expression",
|
|
104
|
+
designation: id,
|
|
105
|
+
normative_status: "preferred",
|
|
106
|
+
),
|
|
107
|
+
],
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
l10n = Glossarist::LocalizedConcept.new
|
|
111
|
+
l10n.data = cd
|
|
112
|
+
l10n.entry_status = "valid"
|
|
113
|
+
l10n
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Detect format: managed (concept/ + localized_concept/) vs grouped (*.yaml)
|
|
117
|
+
concept_subdir = File.join(source_dir, "concept")
|
|
118
|
+
is_managed_format = File.directory?(concept_subdir)
|
|
119
|
+
|
|
120
|
+
puts "Loading concepts from #{source_dir} (#{is_managed_format ? 'managed' : 'grouped'} format)..."
|
|
121
|
+
|
|
122
|
+
collection = Glossarist::ManagedConceptCollection.new
|
|
123
|
+
collection.load_from_files(source_dir)
|
|
124
|
+
|
|
125
|
+
puts "Loaded #{collection.count} concepts"
|
|
126
|
+
|
|
127
|
+
# Add IEV domain references if requested
|
|
128
|
+
if add_iev_domains
|
|
129
|
+
puts "Adding IEV domain references..."
|
|
130
|
+
|
|
131
|
+
collection.each do |concept|
|
|
132
|
+
next if concept.data.domains && !concept.data.domains.empty?
|
|
133
|
+
|
|
134
|
+
identifier = concept.data.id.to_s
|
|
135
|
+
next if identifier.empty? || identifier.start_with?("area-", "section-")
|
|
136
|
+
parts = identifier.split("-")
|
|
137
|
+
next unless parts.length >= 2
|
|
138
|
+
|
|
139
|
+
area_uri = "area-#{parts[0]}"
|
|
140
|
+
section_uri = "section-#{parts[0]}-#{parts[1]}"
|
|
141
|
+
|
|
142
|
+
concept.data.domains = [
|
|
143
|
+
Glossarist::ConceptReference.domain(area_uri),
|
|
144
|
+
Glossarist::ConceptReference.domain(section_uri),
|
|
145
|
+
]
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
puts "Adding subject area hierarchy concepts..."
|
|
149
|
+
add_subject_area_concepts(collection)
|
|
150
|
+
|
|
151
|
+
puts "Domains added. Total concepts: #{collection.count}"
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Save output
|
|
155
|
+
puts "Saving to #{output_dir}..."
|
|
156
|
+
|
|
157
|
+
if is_managed_format
|
|
158
|
+
concepts_out = File.join(output_dir, "concepts")
|
|
159
|
+
FileUtils.mkdir_p(concepts_out)
|
|
160
|
+
collection.save_to_files(concepts_out)
|
|
161
|
+
else
|
|
162
|
+
concepts_out = File.join(output_dir)
|
|
163
|
+
FileUtils.mkdir_p(concepts_out)
|
|
164
|
+
collection.save_grouped_concepts_to_files(concepts_out)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Copy register.yaml if present
|
|
168
|
+
register_src = File.join(File.dirname(source_dir), "register.yaml")
|
|
169
|
+
if File.exist?(register_src) && !File.exist?(File.join(output_dir, "..", "register.yaml"))
|
|
170
|
+
register_dst_dir = is_managed_format ? File.dirname(output_dir) : output_dir
|
|
171
|
+
register_dst = if File.exist?(File.join(File.dirname(source_dir), "register.yaml"))
|
|
172
|
+
File.join(is_managed_format ? File.dirname(output_dir) : File.dirname(output_dir), "register.yaml")
|
|
173
|
+
end
|
|
174
|
+
if register_dst
|
|
175
|
+
FileUtils.mkdir_p(File.dirname(register_dst))
|
|
176
|
+
FileUtils.cp(register_src, register_dst) unless register_src == register_dst
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
puts "Done. #{collection.count} concepts migrated."
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Migration script for isotc204-glossary: adds v3 fields to existing concepts.
|
|
5
|
+
#
|
|
6
|
+
# Adds:
|
|
7
|
+
# - status: "valid" at managed concept level
|
|
8
|
+
# - date_accepted at managed concept level
|
|
9
|
+
# - domains (section ConceptReference) at managed concept data level
|
|
10
|
+
# - related (broader to section) at managed concept level
|
|
11
|
+
# - Creates section hierarchy concepts with narrower relations
|
|
12
|
+
#
|
|
13
|
+
# Usage:
|
|
14
|
+
# cd /Users/mulgogi/src/glossarist/glossarist-ruby
|
|
15
|
+
# bundle exec ruby scripts/migrate_isotc204_to_v3.rb
|
|
16
|
+
#
|
|
17
|
+
# Safe to run multiple times (idempotent).
|
|
18
|
+
|
|
19
|
+
require "glossarist"
|
|
20
|
+
|
|
21
|
+
DIR = "/Users/mulgogi/src/geolexica/isotc204-glossary/concepts"
|
|
22
|
+
|
|
23
|
+
ISO_TS_14812_SECTIONS = {
|
|
24
|
+
"3.1" => "General concepts",
|
|
25
|
+
"3.2" => "Transport information and control",
|
|
26
|
+
"3.3" => "ITS station",
|
|
27
|
+
"3.4" => "Communications",
|
|
28
|
+
"3.5" => "ITS services",
|
|
29
|
+
"3.6" => "Geospatial",
|
|
30
|
+
"3.7" => "Driving automation",
|
|
31
|
+
}.freeze
|
|
32
|
+
|
|
33
|
+
ISO_TS_14812_DATE = "2022-01-01T00:00:00+00:00"
|
|
34
|
+
ISO_TS_14812_SOURCE = "urn:iso:std:iso:ts:14812"
|
|
35
|
+
|
|
36
|
+
collection = Glossarist::ManagedConceptCollection.new
|
|
37
|
+
collection.load_from_files(DIR)
|
|
38
|
+
|
|
39
|
+
puts "Loaded #{collection.count} concepts"
|
|
40
|
+
|
|
41
|
+
# Track which concepts belong to which section for narrower relations
|
|
42
|
+
section_children = Hash.new { |h, k| h[k] = [] }
|
|
43
|
+
|
|
44
|
+
collection.each do |concept|
|
|
45
|
+
identifier = concept.data.id.to_s
|
|
46
|
+
section_code = identifier.split(".")[0..1].join(".")
|
|
47
|
+
section_uri = "section-#{section_code.gsub('.', '-')}"
|
|
48
|
+
section_children[section_uri] << identifier
|
|
49
|
+
|
|
50
|
+
# Set status
|
|
51
|
+
concept.status = "valid" unless concept.status
|
|
52
|
+
|
|
53
|
+
# Set date_accepted
|
|
54
|
+
unless concept.date_accepted
|
|
55
|
+
concept.date_accepted = Glossarist::ConceptDate.new(
|
|
56
|
+
type: "accepted",
|
|
57
|
+
date: ISO_TS_14812_DATE,
|
|
58
|
+
)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Add domain ConceptReference
|
|
62
|
+
concept.data.domains ||= []
|
|
63
|
+
unless concept.data.domains.any? { |d| d.concept_id == section_uri }
|
|
64
|
+
concept.data.domains << Glossarist::ConceptReference.new(
|
|
65
|
+
concept_id: section_uri,
|
|
66
|
+
source: ISO_TS_14812_SOURCE,
|
|
67
|
+
ref_type: "domain",
|
|
68
|
+
)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Add broader relation to section
|
|
72
|
+
concept.related ||= []
|
|
73
|
+
unless concept.related.any? { |r| r.type == "broader" && r.ref&.id == section_uri }
|
|
74
|
+
concept.related << Glossarist::RelatedConcept.new(
|
|
75
|
+
type: "broader",
|
|
76
|
+
content: section_uri,
|
|
77
|
+
ref: Glossarist::Citation.new(source: "ISO/TS 14812", id: section_uri),
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
puts "Updated #{collection.count} concepts with status, date_accepted, domains, related"
|
|
83
|
+
|
|
84
|
+
# Create section hierarchy concepts
|
|
85
|
+
ISO_TS_14812_SECTIONS.each do |code, title|
|
|
86
|
+
section_uri = "section-#{code.gsub('.', '-')}"
|
|
87
|
+
|
|
88
|
+
mc = Glossarist::ManagedConcept.new(
|
|
89
|
+
data: Glossarist::ManagedConceptData.new(
|
|
90
|
+
id: section_uri,
|
|
91
|
+
domains: [Glossarist::ConceptReference.new(
|
|
92
|
+
concept_id: section_uri,
|
|
93
|
+
source: ISO_TS_14812_SOURCE,
|
|
94
|
+
ref_type: "domain",
|
|
95
|
+
)],
|
|
96
|
+
),
|
|
97
|
+
)
|
|
98
|
+
mc.status = "valid"
|
|
99
|
+
mc.date_accepted = Glossarist::ConceptDate.new(
|
|
100
|
+
type: "accepted",
|
|
101
|
+
date: ISO_TS_14812_DATE,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
l10n = Glossarist::LocalizedConcept.new
|
|
105
|
+
l10n.data = Glossarist::ConceptData.new(
|
|
106
|
+
id: section_uri,
|
|
107
|
+
language_code: "eng",
|
|
108
|
+
terms: [Glossarist::Designation::Expression.new(
|
|
109
|
+
type: "expression",
|
|
110
|
+
designation: title,
|
|
111
|
+
normative_status: "preferred",
|
|
112
|
+
)],
|
|
113
|
+
)
|
|
114
|
+
l10n.entry_status = "valid"
|
|
115
|
+
mc.add_l10n(l10n)
|
|
116
|
+
|
|
117
|
+
# Add narrower relations to child concepts
|
|
118
|
+
children = section_children[section_uri]
|
|
119
|
+
if children.any?
|
|
120
|
+
mc.related = children.sort.map do |child_id|
|
|
121
|
+
Glossarist::RelatedConcept.new(
|
|
122
|
+
type: "narrower",
|
|
123
|
+
content: child_id,
|
|
124
|
+
ref: Glossarist::Citation.new(source: "ISO/TS 14812", id: child_id),
|
|
125
|
+
)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
collection.store(mc)
|
|
130
|
+
puts "Created section concept: #{section_uri} (#{title}) — #{children.length} narrower"
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
collection.save_grouped_concepts_to_files(DIR)
|
|
134
|
+
puts "Saved #{collection.count} concepts to #{DIR}"
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Migration script for isotc211-glossary: adds v3 fields to existing concepts.
|
|
5
|
+
#
|
|
6
|
+
# Adds:
|
|
7
|
+
# - domains (ISO standard ConceptReference) at managed concept data level
|
|
8
|
+
# - related (broader to standard domain) at managed concept level
|
|
9
|
+
# - Creates domain concepts per ISO standard (with narrower relations)
|
|
10
|
+
#
|
|
11
|
+
# Idempotent: safe to run multiple times.
|
|
12
|
+
#
|
|
13
|
+
# Usage:
|
|
14
|
+
# cd /Users/mulgogi/src/glossarist/glossarist-ruby
|
|
15
|
+
# bundle exec ruby scripts/migrate_isotc211_to_v3.rb
|
|
16
|
+
|
|
17
|
+
require "glossarist"
|
|
18
|
+
|
|
19
|
+
DIR = "/Users/mulgogi/src/geolexica/isotc211-glossary/concepts"
|
|
20
|
+
|
|
21
|
+
ISO_SOURCE_URN = "urn:iso:std:iso"
|
|
22
|
+
|
|
23
|
+
# Extract a stable domain ID from an authoritative source reference string.
|
|
24
|
+
# @param ref_text [String] e.g. "ISO 19136-1:2020", "ISO/IEC 19501:2005"
|
|
25
|
+
# @return [String, nil] domain ID e.g. "iso-19136-1", "iso-iec-19501"
|
|
26
|
+
def extract_domain_id(ref_text)
|
|
27
|
+
# Match various ISO reference patterns
|
|
28
|
+
patterns = [
|
|
29
|
+
%r{ISO/IEC/IEEE\s+([\d-]+)}, # ISO/IEC/IEEE 24765:2017
|
|
30
|
+
%r{ISO/IEC\s+([\d-]+)}, # ISO/IEC 19501:2005
|
|
31
|
+
%r{ISO/TS\s+([\d-]+)}, # ISO/TS 19130:2010
|
|
32
|
+
%r{ISO/TR\s+([\d-]+)}, # ISO/TR 19120:2001
|
|
33
|
+
%r{ISO/IEC\s+Guide\s+([\d-]+)}, # ISO/IEC Guide 98-3:2008
|
|
34
|
+
%r{ISO\s+DIS\s+([\d-]+)}, # ISO DIS 19123-1:2022
|
|
35
|
+
%r{ISO\s+([\d]+-?[\d]*)}, # ISO 19136-1:2020
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
patterns.each do |pat|
|
|
39
|
+
if (m = ref_text.match(pat))
|
|
40
|
+
# Extract the full match, normalize
|
|
41
|
+
prefix = ref_text[m.begin(0)...m.begin(1)].strip
|
|
42
|
+
number = m[1]
|
|
43
|
+
domain = "#{prefix} #{number}"
|
|
44
|
+
return domain.downcase.gsub(/[\s\/]+/, "-")
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
nil
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
collection = Glossarist::ManagedConceptCollection.new
|
|
52
|
+
collection.load_from_files(DIR)
|
|
53
|
+
|
|
54
|
+
puts "Loaded #{collection.count} concepts"
|
|
55
|
+
|
|
56
|
+
# Build index: domain_id -> [concept_id]
|
|
57
|
+
domain_index = {}
|
|
58
|
+
concepts_with_domain = 0
|
|
59
|
+
concepts_without_domain = 0
|
|
60
|
+
|
|
61
|
+
collection.each do |concept|
|
|
62
|
+
eng = concept.localization("eng")
|
|
63
|
+
next unless eng
|
|
64
|
+
|
|
65
|
+
sources = eng.data&.sources
|
|
66
|
+
next unless sources
|
|
67
|
+
|
|
68
|
+
auth = sources.find { |s| s.type == "authoritative" }
|
|
69
|
+
next unless auth&.origin
|
|
70
|
+
|
|
71
|
+
ref_text = auth.origin.text || auth.origin.ref
|
|
72
|
+
unless ref_text
|
|
73
|
+
concepts_without_domain += 1
|
|
74
|
+
next
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
domain_id = extract_domain_id(ref_text)
|
|
78
|
+
unless domain_id
|
|
79
|
+
concepts_without_domain += 1
|
|
80
|
+
next
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
(domain_index[domain_id] ||= []) << concept.data.id
|
|
84
|
+
concepts_with_domain += 1
|
|
85
|
+
|
|
86
|
+
# Add domain ConceptReference
|
|
87
|
+
concept.data.domains ||= []
|
|
88
|
+
unless concept.data.domains.any? { |d| d.concept_id == domain_id }
|
|
89
|
+
concept.data.domains << Glossarist::ConceptReference.new(
|
|
90
|
+
concept_id: domain_id,
|
|
91
|
+
source: ISO_SOURCE_URN,
|
|
92
|
+
ref_type: "domain",
|
|
93
|
+
)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Add broader relation to domain concept
|
|
97
|
+
concept.related ||= []
|
|
98
|
+
unless concept.related.any? { |r| r.type == "broader" && r.ref&.id == domain_id }
|
|
99
|
+
concept.related << Glossarist::RelatedConcept.new(
|
|
100
|
+
type: "broader",
|
|
101
|
+
content: domain_id,
|
|
102
|
+
ref: Glossarist::Citation.new(source: "ISO", id: domain_id),
|
|
103
|
+
)
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
puts "Added domains to #{concepts_with_domain} concepts"
|
|
108
|
+
puts "No domain extracted for #{concepts_without_domain} concepts"
|
|
109
|
+
|
|
110
|
+
# Create domain hierarchy concepts
|
|
111
|
+
domain_index.sort.each do |domain_id, child_ids|
|
|
112
|
+
mc = Glossarist::ManagedConcept.new(
|
|
113
|
+
data: Glossarist::ManagedConceptData.new(
|
|
114
|
+
id: domain_id,
|
|
115
|
+
domains: [Glossarist::ConceptReference.new(
|
|
116
|
+
concept_id: domain_id,
|
|
117
|
+
source: ISO_SOURCE_URN,
|
|
118
|
+
ref_type: "domain",
|
|
119
|
+
)],
|
|
120
|
+
),
|
|
121
|
+
)
|
|
122
|
+
mc.status = "valid"
|
|
123
|
+
|
|
124
|
+
# Create a basic English localization with the domain ID as the term
|
|
125
|
+
l10n = Glossarist::LocalizedConcept.new
|
|
126
|
+
l10n.data = Glossarist::ConceptData.new(
|
|
127
|
+
id: domain_id,
|
|
128
|
+
language_code: "eng",
|
|
129
|
+
terms: [Glossarist::Designation::Expression.new(
|
|
130
|
+
type: "expression",
|
|
131
|
+
designation: domain_id,
|
|
132
|
+
normative_status: "preferred",
|
|
133
|
+
)],
|
|
134
|
+
)
|
|
135
|
+
l10n.entry_status = "valid"
|
|
136
|
+
mc.add_l10n(l10n)
|
|
137
|
+
|
|
138
|
+
# Add narrower relations to child concepts
|
|
139
|
+
narrower = child_ids.sort.map do |child_id|
|
|
140
|
+
Glossarist::RelatedConcept.new(
|
|
141
|
+
type: "narrower",
|
|
142
|
+
content: child_id.to_s,
|
|
143
|
+
ref: Glossarist::Citation.new(source: "ISO", id: child_id.to_s),
|
|
144
|
+
)
|
|
145
|
+
end
|
|
146
|
+
mc.related = narrower
|
|
147
|
+
|
|
148
|
+
collection.store(mc)
|
|
149
|
+
puts "Created domain: #{domain_id} — #{child_ids.size} narrower"
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
collection.save_grouped_concepts_to_files(DIR)
|
|
153
|
+
puts "Saved #{collection.count} concepts to #{DIR}"
|