glossarist 2.6.5 → 2.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/release.yml +1 -4
  3. data/.rubocop_todo.yml +53 -2
  4. data/CLAUDE.md +27 -2
  5. data/README.adoc +532 -56
  6. data/config.yml +68 -1
  7. data/glossarist.gemspec +2 -0
  8. data/lib/glossarist/citation.rb +26 -123
  9. data/lib/glossarist/cli/compare_command.rb +106 -0
  10. data/lib/glossarist/cli/export_command.rb +11 -14
  11. data/lib/glossarist/cli/validate_command.rb +111 -20
  12. data/lib/glossarist/cli.rb +18 -0
  13. data/lib/glossarist/collections/bibliography_collection.rb +4 -2
  14. data/lib/glossarist/collections/localization_collection.rb +2 -0
  15. data/lib/glossarist/comparison_result.rb +35 -0
  16. data/lib/glossarist/concept.rb +1 -1
  17. data/lib/glossarist/concept_collector.rb +44 -0
  18. data/lib/glossarist/concept_comparator.rb +72 -0
  19. data/lib/glossarist/concept_data.rb +20 -0
  20. data/lib/glossarist/concept_diff.rb +15 -0
  21. data/lib/glossarist/concept_document.rb +11 -0
  22. data/lib/glossarist/concept_manager.rb +19 -5
  23. data/lib/glossarist/concept_ref.rb +13 -0
  24. data/lib/glossarist/concept_reference.rb +12 -19
  25. data/lib/glossarist/concept_validator.rb +6 -1
  26. data/lib/glossarist/context_configuration.rb +90 -0
  27. data/lib/glossarist/dataset_validator.rb +8 -4
  28. data/lib/glossarist/designation/abbreviation.rb +0 -2
  29. data/lib/glossarist/designation/base.rb +21 -1
  30. data/lib/glossarist/designation/expression.rb +3 -0
  31. data/lib/glossarist/designation/letter_symbol.rb +0 -4
  32. data/lib/glossarist/designation/prefix.rb +17 -0
  33. data/lib/glossarist/designation/suffix.rb +17 -0
  34. data/lib/glossarist/designation/symbol.rb +0 -2
  35. data/lib/glossarist/gcr_metadata.rb +7 -14
  36. data/lib/glossarist/gcr_package.rb +35 -23
  37. data/lib/glossarist/gcr_validator.rb +38 -17
  38. data/lib/glossarist/glossary_definition.rb +5 -0
  39. data/lib/glossarist/localized_concept.rb +8 -0
  40. data/lib/glossarist/managed_concept.rb +39 -6
  41. data/lib/glossarist/managed_concept_data.rb +22 -2
  42. data/lib/glossarist/non_verb_rep.rb +21 -6
  43. data/lib/glossarist/pronunciation.rb +32 -0
  44. data/lib/glossarist/rdf/ext/jsonld_transform_ext.rb +208 -0
  45. data/lib/glossarist/rdf/ext/mapping_ext.rb +37 -0
  46. data/lib/glossarist/rdf/ext/mapping_rule_ext.rb +27 -0
  47. data/lib/glossarist/rdf/ext/member_rule_ext.rb +34 -0
  48. data/lib/glossarist/rdf/ext/turtle_transform_ext.rb +222 -0
  49. data/lib/glossarist/rdf/ext.rb +39 -0
  50. data/lib/glossarist/rdf/gloss_citation.rb +36 -0
  51. data/lib/glossarist/rdf/gloss_concept.rb +58 -0
  52. data/lib/glossarist/rdf/gloss_concept_date.rb +24 -0
  53. data/lib/glossarist/rdf/gloss_concept_reference.rb +29 -0
  54. data/lib/glossarist/rdf/gloss_concept_source.rb +37 -0
  55. data/lib/glossarist/rdf/gloss_designation.rb +146 -0
  56. data/lib/glossarist/rdf/gloss_detailed_definition.rb +24 -0
  57. data/lib/glossarist/rdf/gloss_grammar_info.rb +57 -0
  58. data/lib/glossarist/rdf/gloss_locality.rb +25 -0
  59. data/lib/glossarist/rdf/gloss_localized_concept.rb +67 -0
  60. data/lib/glossarist/rdf/gloss_non_verbal_rep.rb +31 -0
  61. data/lib/glossarist/rdf/gloss_pronunciation.rb +32 -0
  62. data/lib/glossarist/rdf/gloss_reference.rb +55 -0
  63. data/lib/glossarist/rdf/namespaces/glossarist_namespace.rb +12 -0
  64. data/lib/glossarist/rdf/namespaces/iso_thes_namespace.rb +12 -0
  65. data/lib/glossarist/rdf/namespaces/owl_namespace.rb +12 -0
  66. data/lib/glossarist/rdf/namespaces/prov_namespace.rb +12 -0
  67. data/lib/glossarist/rdf/namespaces/rdf_namespace.rb +12 -0
  68. data/lib/glossarist/rdf/namespaces/skosxl_namespace.rb +12 -0
  69. data/lib/glossarist/rdf/namespaces.rb +8 -2
  70. data/lib/glossarist/rdf/relationships.rb +19 -0
  71. data/lib/glossarist/rdf/v3/configuration.rb +15 -0
  72. data/lib/glossarist/rdf/v3.rb +79 -0
  73. data/lib/glossarist/rdf.rb +22 -2
  74. data/lib/glossarist/reference_extractor.rb +15 -24
  75. data/lib/glossarist/reference_resolver.rb +3 -3
  76. data/lib/glossarist/related_concept.rb +2 -10
  77. data/lib/glossarist/schema_migration.rb +39 -0
  78. data/lib/glossarist/sts/term_mapper.rb +2 -2
  79. data/lib/glossarist/transforms/concept_to_gloss_transform.rb +355 -0
  80. data/lib/glossarist/transforms.rb +2 -2
  81. data/lib/glossarist/urn_resolver.rb +13 -1
  82. data/lib/glossarist/v1/concept.rb +18 -11
  83. data/lib/glossarist/v2/citation.rb +36 -0
  84. data/lib/glossarist/v2/concept_data.rb +46 -0
  85. data/lib/glossarist/v2/concept_document.rb +18 -0
  86. data/lib/glossarist/v2/concept_ref.rb +8 -0
  87. data/lib/glossarist/v2/concept_source.rb +16 -0
  88. data/lib/glossarist/v2/configuration.rb +13 -0
  89. data/lib/glossarist/v2/detailed_definition.rb +14 -0
  90. data/lib/glossarist/v2/localized_concept.rb +9 -0
  91. data/lib/glossarist/v2/managed_concept.rb +25 -0
  92. data/lib/glossarist/v2/managed_concept_data.rb +49 -0
  93. data/lib/glossarist/v2/related_concept.rb +15 -0
  94. data/lib/glossarist/v2.rb +28 -0
  95. data/lib/glossarist/v3/bibliography_entry.rb +19 -0
  96. data/lib/glossarist/v3/bibliography_file.rb +27 -0
  97. data/lib/glossarist/v3/citation.rb +30 -0
  98. data/lib/glossarist/v3/concept_data.rb +46 -0
  99. data/lib/glossarist/v3/concept_document.rb +18 -0
  100. data/lib/glossarist/v3/concept_ref.rb +8 -0
  101. data/lib/glossarist/v3/concept_source.rb +16 -0
  102. data/lib/glossarist/v3/configuration.rb +13 -0
  103. data/lib/glossarist/v3/detailed_definition.rb +14 -0
  104. data/lib/glossarist/v3/image_entry.rb +21 -0
  105. data/lib/glossarist/v3/image_file.rb +31 -0
  106. data/lib/glossarist/v3/localized_concept.rb +9 -0
  107. data/lib/glossarist/v3/managed_concept.rb +26 -0
  108. data/lib/glossarist/v3/managed_concept_data.rb +34 -0
  109. data/lib/glossarist/v3/related_concept.rb +15 -0
  110. data/lib/glossarist/v3.rb +36 -0
  111. data/lib/glossarist/validation/asset_index.rb +4 -3
  112. data/lib/glossarist/validation/bibliography_index.rb +61 -30
  113. data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +2 -15
  114. data/lib/glossarist/validation/rules/authoritative_source_rule.rb +2 -15
  115. data/lib/glossarist/validation/rules/base.rb +5 -0
  116. data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +2 -3
  117. data/lib/glossarist/validation/rules/citation_completeness_rule.rb +5 -27
  118. data/lib/glossarist/validation/rules/dataset_context.rb +8 -3
  119. data/lib/glossarist/validation/rules/date_validity_rule.rb +1 -1
  120. data/lib/glossarist/validation/rules/designation_status_rule.rb +0 -1
  121. data/lib/glossarist/validation/rules/designation_type_rule.rb +1 -5
  122. data/lib/glossarist/validation/rules/domain_ref_rule.rb +37 -0
  123. data/lib/glossarist/validation/rules/domain_target_rule.rb +56 -0
  124. data/lib/glossarist/validation/rules/gcr_context.rb +12 -13
  125. data/lib/glossarist/validation/rules/image_reference_rule.rb +2 -17
  126. data/lib/glossarist/validation/rules/locality_completeness_rule.rb +58 -0
  127. data/lib/glossarist/validation/rules/localization_consistency_rule.rb +72 -0
  128. data/lib/glossarist/validation/rules/localization_presence_rule.rb +1 -1
  129. data/lib/glossarist/validation/rules/model_validity_rule.rb +71 -0
  130. data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +1 -13
  131. data/lib/glossarist/validation/rules/orphaned_images_rule.rb +16 -11
  132. data/lib/glossarist/validation/rules/ref_shape_rule.rb +68 -0
  133. data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +1 -3
  134. data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +1 -3
  135. data/lib/glossarist/validation/rules/related_concept_target_rule.rb +64 -0
  136. data/lib/glossarist/validation/rules/schema_version_rule.rb +41 -0
  137. data/lib/glossarist/validation/rules/source_type_rule.rb +1 -15
  138. data/lib/glossarist/validation/rules/source_urn_format_rule.rb +65 -0
  139. data/lib/glossarist/validation/rules/uuid_format_rule.rb +33 -0
  140. data/lib/glossarist/validation/rules.rb +10 -43
  141. data/lib/glossarist/validation/validation_issue.rb +14 -11
  142. data/lib/glossarist/validation_result.rb +12 -22
  143. data/lib/glossarist/version.rb +1 -1
  144. data/lib/glossarist.rb +10 -0
  145. data/memory/project-status.md +43 -0
  146. data/scripts/migrate_dataset.rb +180 -0
  147. data/scripts/migrate_isotc204_to_v3.rb +134 -0
  148. data/scripts/migrate_isotc211_to_v3.rb +153 -0
  149. data/scripts/migrate_osgeo_to_v3.rb +155 -0
  150. data/scripts/upgrade_dataset_to_v3.rb +47 -0
  151. metadata +112 -6
  152. data/TODO.integration/01-gcr-package-cli.md +0 -180
  153. data/lib/glossarist/rdf/skos_concept.rb +0 -43
  154. data/lib/glossarist/rdf/skos_vocabulary.rb +0 -25
  155. data/lib/glossarist/transforms/concept_to_skos_transform.rb +0 -131
@@ -1,55 +1,45 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Glossarist
4
- class ValidationResult
5
- attr_reader :issues
4
+ class ValidationResult < Lutaml::Model::Serializable
5
+ attribute :issues, Validation::ValidationIssue, collection: true,
6
+ initialize_empty: true
6
7
 
7
- def initialize(errors: [], warnings: [], issues: [])
8
- @issues = []
9
- errors.each { |e| add_error(e) }
10
- warnings.each { |w| add_warning(w) }
11
- issues.each { |i| add_issue(i) }
8
+ key_value do
9
+ map :issues, to: :issues
12
10
  end
13
11
 
14
12
  def valid?
15
- @issues.none?(&:error?)
13
+ issues.none?(&:error?)
16
14
  end
17
15
 
18
16
  def errors
19
- @issues.select(&:error?).map(&:message)
17
+ issues.select(&:error?).map(&:to_s)
20
18
  end
21
19
 
22
20
  def warnings
23
- @issues.select(&:warning?).map(&:message)
21
+ issues.select(&:warning?).map(&:to_s)
24
22
  end
25
23
 
26
24
  def add_error(message)
27
- @issues << Validation::ValidationIssue.new(
25
+ issues << Validation::ValidationIssue.new(
28
26
  severity: "error", message: message,
29
27
  )
30
28
  end
31
29
 
32
30
  def add_warning(message)
33
- @issues << Validation::ValidationIssue.new(
31
+ issues << Validation::ValidationIssue.new(
34
32
  severity: "warning", message: message,
35
33
  )
36
34
  end
37
35
 
38
36
  def add_issue(issue)
39
- @issues << issue
37
+ issues << issue
40
38
  end
41
39
 
42
40
  def merge(other)
43
- other.issues.each { |i| @issues << i }
41
+ other.issues.each { |i| issues << i }
44
42
  self
45
43
  end
46
-
47
- def to_h
48
- {
49
- "valid" => valid?,
50
- "errors" => errors,
51
- "warnings" => warnings,
52
- }
53
- end
54
44
  end
55
45
  end
@@ -4,5 +4,5 @@
4
4
  #
5
5
 
6
6
  module Glossarist
7
- VERSION = "2.6.5"
7
+ VERSION = "2.6.7"
8
8
  end
data/lib/glossarist.rb CHANGED
@@ -18,6 +18,7 @@ module Glossarist
18
18
  autoload :Collections, "glossarist/collections"
19
19
  autoload :Concept, "glossarist/concept"
20
20
  autoload :ConceptData, "glossarist/concept_data"
21
+ autoload :ConceptRef, "glossarist/concept_ref"
21
22
  autoload :ConceptReference, "glossarist/concept_reference"
22
23
  autoload :ReferenceExtractor, "glossarist/reference_extractor"
23
24
  autoload :ReferenceResolver, "glossarist/reference_resolver"
@@ -28,6 +29,9 @@ module Glossarist
28
29
  autoload :ConceptSource, "glossarist/concept_source"
29
30
  autoload :ConceptValidator, "glossarist/concept_validator"
30
31
  autoload :ConceptCollector, "glossarist/concept_collector"
32
+ autoload :ConceptComparator, "glossarist/concept_comparator"
33
+ autoload :ComparisonResult, "glossarist/comparison_result"
34
+ autoload :ConceptDiff, "glossarist/concept_diff"
31
35
  autoload :ConceptDocument, "glossarist/concept_document"
32
36
  autoload :ConceptEnricher, "glossarist/concept_enricher"
33
37
  autoload :Config, "glossarist/config"
@@ -52,6 +56,7 @@ module Glossarist
52
56
  autoload :ManagedConceptCollection, "glossarist/managed_concept_collection"
53
57
  autoload :ManagedConceptData, "glossarist/managed_concept_data"
54
58
  autoload :NonVerbRep, "glossarist/non_verb_rep"
59
+ autoload :Pronunciation, "glossarist/pronunciation"
55
60
  autoload :RelatedConcept, "glossarist/related_concept"
56
61
  autoload :Rdf, "glossarist/rdf"
57
62
  autoload :Sts, "glossarist/sts"
@@ -63,6 +68,8 @@ module Glossarist
63
68
  autoload :RegisterData, "glossarist/register_data"
64
69
  autoload :ValidationResult, "glossarist/validation_result"
65
70
  autoload :V1, "glossarist/v1"
71
+ autoload :V2, "glossarist/v2"
72
+ autoload :V3, "glossarist/v3"
66
73
  end
67
74
 
68
75
  require_relative "glossarist/version"
@@ -73,6 +80,9 @@ module Glossarist
73
80
  LANG_CODES = %w[eng ara deu fra spa ita jpn kor pol por srp swe zho rus fin
74
81
  dan nld msa nob nno].freeze
75
82
 
83
+ SCHEMA_VERSION = "3"
84
+ V3_SCHEMA_VERSION = "3"
85
+
76
86
  def self.configure
77
87
  config = Glossarist::Config.instance
78
88
 
@@ -0,0 +1,43 @@
1
+ ---
2
+ name: V2/V3 Namespace Architecture
3
+ description: lutaml-model mapping inheritance, model register, V2/V3 namespace design
4
+ type: project
5
+ ---
6
+
7
+ ## lutaml-model mapping inheritance
8
+
9
+ lutaml-model `key_value` mappings ARE inherited by subclasses — a subclass's `key_value` block merges on top of the parent's mappings, it does NOT replace them. A subclass cannot "unmap" a parent mapping by omitting it.
10
+
11
+ **Why:** Discovered during V2/V3 namespace implementation. V2::ManagedConcept originally tried to define its own `key_value` block without `related`/`schema_version`/`sources`, but the parent ManagedConcept's mappings for those fields were still active.
12
+
13
+ **How to apply:** For V2/V3 versioning, only V2::ManagedConceptData needs its own `key_value` (to add `related` mapping inside data). V2::ManagedConcept only overrides the `data` attribute to point to V2::ManagedConceptData — it does NOT need its own `key_value`. V2 is only for deserialization; serialization always uses v3 format via inherited base class mappings.
14
+
15
+ ## Model register (lutaml-model GlobalContext)
16
+
17
+ Follows the plurimath/mml pattern. Each version has a Configuration module with a unique CONTEXT_ID that extends Glossarist::ContextConfiguration. Models are registered via `Configuration.register_model(ClassName, id: :symbol)`. Type resolution uses `Configuration.resolve_model(:symbol)` which delegates to `Lutaml::Model::GlobalContext.resolve_type`.
18
+
19
+ **Why:** Enables context-based type resolution instead of hardcoded case/when. Each version's registry is isolated — V2::Configuration resolves `:managed_concept` to V2::ManagedConcept, V3::Configuration resolves to V3::ManagedConcept.
20
+
21
+ **How to apply:** `ConceptDocument.for_version(version)` looks up the version's Configuration from a VERSION_CONFIGURATION hash and calls `resolve_model(:concept_document)`. Adding a new version requires only a new Configuration module and register_model calls.
22
+
23
+ ## V2 → V3 model-driven migration
24
+
25
+ V2→V3 migration is fully model-driven: V2::ConceptDocument deserializes v2 YAML (data.related → model), then `SchemaMigration.migrate_concept` promotes `data.related` to `concept.related` and sets schema_version to "3". No hash-based transformation needed — `Steps::V2ToV3` was deleted.
26
+
27
+ ## RDF / JSON-LD / Turtle: version-agnostic
28
+
29
+ RDF view classes (GlossConcept, GlossLocalizedConcept, etc.) are NOT versioned. They operate on the domain model (ManagedConcept), not on YAML serialization format. schema_version is a YAML metadata field with no SKOS/gloss ontology equivalent. ConceptToGlossTransform takes a domain-model ManagedConcept and produces view-model instances — it is format-agnostic.
30
+
31
+ **Why:** Whether a concept was loaded from v2 or v3 YAML, the RDF output is identical. The domain model normalizes away format differences.
32
+
33
+ **How to apply:** No RDF model changes needed for v2/v3. If a future v4 changes the domain model semantics (not just serialization), RDF view classes would need updating.
34
+
35
+ ## README documentation (completed 2026-05-20)
36
+
37
+ The README.adoc now includes a comprehensive "Schema Versioning (v2 / v3)" section covering:
38
+ - V2 vs V3 format differences with YAML examples
39
+ - Namespace architecture diagram
40
+ - Model register (GlobalContext) usage
41
+ - Loading & migration flow diagram
42
+ - Usage examples for v2, v3, migration, and RDF export
43
+ - "Adding a new schema version" guide (Open/Closed Principle)
@@ -0,0 +1,180 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Migrate glossarist datasets to current schema format.
4
+ #
5
+ # Usage:
6
+ # bundle exec ruby scripts/migrate_dataset.rb SOURCE_DIR OUTPUT_DIR [--add-iev-domains]
7
+ #
8
+ # --add-iev-domains: Add domain ConceptReference objects for IEV-style identifiers
9
+ # (e.g. "426-24-74" → area-426, section-426-24)
10
+
11
+ require "glossarist"
12
+ require "fileutils"
13
+
14
+ source_dir = ARGV[0]
15
+ output_dir = ARGV[1]
16
+ add_iev_domains = ARGV.include?("--add-iev-domains")
17
+
18
+ unless source_dir && output_dir
19
+ abort "Usage: bundle exec ruby scripts/migrate_dataset.rb SOURCE_DIR OUTPUT_DIR [--add-iev-domains]"
20
+ end
21
+
22
+ source_dir = File.expand_path(source_dir)
23
+ output_dir = File.expand_path(output_dir)
24
+
25
+ unless File.directory?(source_dir)
26
+ abort "Error: #{source_dir} is not a directory"
27
+ end
28
+
29
+ def add_subject_area_concepts(collection)
30
+ areas = {}
31
+ sections = {}
32
+
33
+ collection.each do |concept|
34
+ next unless concept.data.domains
35
+
36
+ concept.data.domains.each do |ref|
37
+ next unless ref.is_a?(Glossarist::ConceptReference) && ref.concept_id
38
+
39
+ id = ref.concept_id
40
+ if id.start_with?("area-")
41
+ areas[id] = true
42
+ elsif id.start_with?("section-")
43
+ sections[id] = true
44
+ end
45
+ end
46
+ end
47
+
48
+ existing_ids = collection.map { |c| c.data.id }.to_set
49
+
50
+ areas.each_key do |area_id|
51
+ next if existing_ids.include?(area_id)
52
+
53
+ mc = Glossarist::ManagedConcept.new(
54
+ data: Glossarist::ManagedConceptData.new(
55
+ id: area_id,
56
+ domains: [Glossarist::ConceptReference.domain(area_id)],
57
+ ),
58
+ )
59
+
60
+ code = area_id.sub("area-", "")
61
+ narrower = sections.keys.select { |s| s.start_with?("section-#{code}-") }
62
+ mc.related = narrower.map { |s| Glossarist::RelatedConcept.new(type: "narrower", content: s) }
63
+
64
+ l10n = build_domain_localization(area_id, code, "eng")
65
+ mc.add_l10n(l10n)
66
+
67
+ collection.store(mc)
68
+ end
69
+
70
+ sections.each_key do |section_id|
71
+ next if existing_ids.include?(section_id)
72
+
73
+ parts = section_id.sub("section-", "").split("-")
74
+ area_id = "area-#{parts[0]}"
75
+
76
+ mc = Glossarist::ManagedConcept.new(
77
+ data: Glossarist::ManagedConceptData.new(
78
+ id: section_id,
79
+ domains: [
80
+ Glossarist::ConceptReference.domain(area_id),
81
+ Glossarist::ConceptReference.domain(section_id),
82
+ ],
83
+ ),
84
+ )
85
+
86
+ mc.related = [Glossarist::RelatedConcept.new(type: "broader", content: area_id)]
87
+
88
+ section_code = parts.length > 1 ? parts[0..1].join("-") : parts[0]
89
+ l10n = build_domain_localization(section_id, section_code, "eng")
90
+ l10n.data.domain = area_id
91
+ mc.add_l10n(l10n)
92
+
93
+ collection.store(mc)
94
+ end
95
+ end
96
+
97
+ def build_domain_localization(id, label, lang_code)
98
+ cd = Glossarist::ConceptData.new(
99
+ id: id,
100
+ language_code: lang_code,
101
+ terms: [
102
+ Glossarist::Designation::Expression.new(
103
+ type: "expression",
104
+ designation: id,
105
+ normative_status: "preferred",
106
+ ),
107
+ ],
108
+ )
109
+
110
+ l10n = Glossarist::LocalizedConcept.new
111
+ l10n.data = cd
112
+ l10n.entry_status = "valid"
113
+ l10n
114
+ end
115
+
116
+ # Detect format: managed (concept/ + localized_concept/) vs grouped (*.yaml)
117
+ concept_subdir = File.join(source_dir, "concept")
118
+ is_managed_format = File.directory?(concept_subdir)
119
+
120
+ puts "Loading concepts from #{source_dir} (#{is_managed_format ? 'managed' : 'grouped'} format)..."
121
+
122
+ collection = Glossarist::ManagedConceptCollection.new
123
+ collection.load_from_files(source_dir)
124
+
125
+ puts "Loaded #{collection.count} concepts"
126
+
127
+ # Add IEV domain references if requested
128
+ if add_iev_domains
129
+ puts "Adding IEV domain references..."
130
+
131
+ collection.each do |concept|
132
+ next if concept.data.domains && !concept.data.domains.empty?
133
+
134
+ identifier = concept.data.id.to_s
135
+ next if identifier.empty? || identifier.start_with?("area-", "section-")
136
+ parts = identifier.split("-")
137
+ next unless parts.length >= 2
138
+
139
+ area_uri = "area-#{parts[0]}"
140
+ section_uri = "section-#{parts[0]}-#{parts[1]}"
141
+
142
+ concept.data.domains = [
143
+ Glossarist::ConceptReference.domain(area_uri),
144
+ Glossarist::ConceptReference.domain(section_uri),
145
+ ]
146
+ end
147
+
148
+ puts "Adding subject area hierarchy concepts..."
149
+ add_subject_area_concepts(collection)
150
+
151
+ puts "Domains added. Total concepts: #{collection.count}"
152
+ end
153
+
154
+ # Save output
155
+ puts "Saving to #{output_dir}..."
156
+
157
+ if is_managed_format
158
+ concepts_out = File.join(output_dir, "concepts")
159
+ FileUtils.mkdir_p(concepts_out)
160
+ collection.save_to_files(concepts_out)
161
+ else
162
+ concepts_out = File.join(output_dir)
163
+ FileUtils.mkdir_p(concepts_out)
164
+ collection.save_grouped_concepts_to_files(concepts_out)
165
+ end
166
+
167
+ # Copy register.yaml if present
168
+ register_src = File.join(File.dirname(source_dir), "register.yaml")
169
+ if File.exist?(register_src) && !File.exist?(File.join(output_dir, "..", "register.yaml"))
170
+ register_dst_dir = is_managed_format ? File.dirname(output_dir) : output_dir
171
+ register_dst = if File.exist?(File.join(File.dirname(source_dir), "register.yaml"))
172
+ File.join(is_managed_format ? File.dirname(output_dir) : File.dirname(output_dir), "register.yaml")
173
+ end
174
+ if register_dst
175
+ FileUtils.mkdir_p(File.dirname(register_dst))
176
+ FileUtils.cp(register_src, register_dst) unless register_src == register_dst
177
+ end
178
+ end
179
+
180
+ puts "Done. #{collection.count} concepts migrated."
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Migration script for isotc204-glossary: adds v3 fields to existing concepts.
5
+ #
6
+ # Adds:
7
+ # - status: "valid" at managed concept level
8
+ # - date_accepted at managed concept level
9
+ # - domains (section ConceptReference) at managed concept data level
10
+ # - related (broader to section) at managed concept level
11
+ # - Creates section hierarchy concepts with narrower relations
12
+ #
13
+ # Usage:
14
+ # cd /Users/mulgogi/src/glossarist/glossarist-ruby
15
+ # bundle exec ruby scripts/migrate_isotc204_to_v3.rb
16
+ #
17
+ # Safe to run multiple times (idempotent).
18
+
19
+ require "glossarist"
20
+
21
+ DIR = "/Users/mulgogi/src/geolexica/isotc204-glossary/concepts"
22
+
23
+ ISO_TS_14812_SECTIONS = {
24
+ "3.1" => "General concepts",
25
+ "3.2" => "Transport information and control",
26
+ "3.3" => "ITS station",
27
+ "3.4" => "Communications",
28
+ "3.5" => "ITS services",
29
+ "3.6" => "Geospatial",
30
+ "3.7" => "Driving automation",
31
+ }.freeze
32
+
33
+ ISO_TS_14812_DATE = "2022-01-01T00:00:00+00:00"
34
+ ISO_TS_14812_SOURCE = "urn:iso:std:iso:ts:14812"
35
+
36
+ collection = Glossarist::ManagedConceptCollection.new
37
+ collection.load_from_files(DIR)
38
+
39
+ puts "Loaded #{collection.count} concepts"
40
+
41
+ # Track which concepts belong to which section for narrower relations
42
+ section_children = Hash.new { |h, k| h[k] = [] }
43
+
44
+ collection.each do |concept|
45
+ identifier = concept.data.id.to_s
46
+ section_code = identifier.split(".")[0..1].join(".")
47
+ section_uri = "section-#{section_code.gsub('.', '-')}"
48
+ section_children[section_uri] << identifier
49
+
50
+ # Set status
51
+ concept.status = "valid" unless concept.status
52
+
53
+ # Set date_accepted
54
+ unless concept.date_accepted
55
+ concept.date_accepted = Glossarist::ConceptDate.new(
56
+ type: "accepted",
57
+ date: ISO_TS_14812_DATE,
58
+ )
59
+ end
60
+
61
+ # Add domain ConceptReference
62
+ concept.data.domains ||= []
63
+ unless concept.data.domains.any? { |d| d.concept_id == section_uri }
64
+ concept.data.domains << Glossarist::ConceptReference.new(
65
+ concept_id: section_uri,
66
+ source: ISO_TS_14812_SOURCE,
67
+ ref_type: "domain",
68
+ )
69
+ end
70
+
71
+ # Add broader relation to section
72
+ concept.related ||= []
73
+ unless concept.related.any? { |r| r.type == "broader" && r.ref&.id == section_uri }
74
+ concept.related << Glossarist::RelatedConcept.new(
75
+ type: "broader",
76
+ content: section_uri,
77
+ ref: Glossarist::Citation.new(source: "ISO/TS 14812", id: section_uri),
78
+ )
79
+ end
80
+ end
81
+
82
+ puts "Updated #{collection.count} concepts with status, date_accepted, domains, related"
83
+
84
+ # Create section hierarchy concepts
85
+ ISO_TS_14812_SECTIONS.each do |code, title|
86
+ section_uri = "section-#{code.gsub('.', '-')}"
87
+
88
+ mc = Glossarist::ManagedConcept.new(
89
+ data: Glossarist::ManagedConceptData.new(
90
+ id: section_uri,
91
+ domains: [Glossarist::ConceptReference.new(
92
+ concept_id: section_uri,
93
+ source: ISO_TS_14812_SOURCE,
94
+ ref_type: "domain",
95
+ )],
96
+ ),
97
+ )
98
+ mc.status = "valid"
99
+ mc.date_accepted = Glossarist::ConceptDate.new(
100
+ type: "accepted",
101
+ date: ISO_TS_14812_DATE,
102
+ )
103
+
104
+ l10n = Glossarist::LocalizedConcept.new
105
+ l10n.data = Glossarist::ConceptData.new(
106
+ id: section_uri,
107
+ language_code: "eng",
108
+ terms: [Glossarist::Designation::Expression.new(
109
+ type: "expression",
110
+ designation: title,
111
+ normative_status: "preferred",
112
+ )],
113
+ )
114
+ l10n.entry_status = "valid"
115
+ mc.add_l10n(l10n)
116
+
117
+ # Add narrower relations to child concepts
118
+ children = section_children[section_uri]
119
+ if children.any?
120
+ mc.related = children.sort.map do |child_id|
121
+ Glossarist::RelatedConcept.new(
122
+ type: "narrower",
123
+ content: child_id,
124
+ ref: Glossarist::Citation.new(source: "ISO/TS 14812", id: child_id),
125
+ )
126
+ end
127
+ end
128
+
129
+ collection.store(mc)
130
+ puts "Created section concept: #{section_uri} (#{title}) — #{children.length} narrower"
131
+ end
132
+
133
+ collection.save_grouped_concepts_to_files(DIR)
134
+ puts "Saved #{collection.count} concepts to #{DIR}"
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Migration script for isotc211-glossary: adds v3 fields to existing concepts.
5
+ #
6
+ # Adds:
7
+ # - domains (ISO standard ConceptReference) at managed concept data level
8
+ # - related (broader to standard domain) at managed concept level
9
+ # - Creates domain concepts per ISO standard (with narrower relations)
10
+ #
11
+ # Idempotent: safe to run multiple times.
12
+ #
13
+ # Usage:
14
+ # cd /Users/mulgogi/src/glossarist/glossarist-ruby
15
+ # bundle exec ruby scripts/migrate_isotc211_to_v3.rb
16
+
17
+ require "glossarist"
18
+
19
+ DIR = "/Users/mulgogi/src/geolexica/isotc211-glossary/concepts"
20
+
21
+ ISO_SOURCE_URN = "urn:iso:std:iso"
22
+
23
+ # Extract a stable domain ID from an authoritative source reference string.
24
+ # @param ref_text [String] e.g. "ISO 19136-1:2020", "ISO/IEC 19501:2005"
25
+ # @return [String, nil] domain ID e.g. "iso-19136-1", "iso-iec-19501"
26
+ def extract_domain_id(ref_text)
27
+ # Match various ISO reference patterns
28
+ patterns = [
29
+ %r{ISO/IEC/IEEE\s+([\d-]+)}, # ISO/IEC/IEEE 24765:2017
30
+ %r{ISO/IEC\s+([\d-]+)}, # ISO/IEC 19501:2005
31
+ %r{ISO/TS\s+([\d-]+)}, # ISO/TS 19130:2010
32
+ %r{ISO/TR\s+([\d-]+)}, # ISO/TR 19120:2001
33
+ %r{ISO/IEC\s+Guide\s+([\d-]+)}, # ISO/IEC Guide 98-3:2008
34
+ %r{ISO\s+DIS\s+([\d-]+)}, # ISO DIS 19123-1:2022
35
+ %r{ISO\s+([\d]+-?[\d]*)}, # ISO 19136-1:2020
36
+ ]
37
+
38
+ patterns.each do |pat|
39
+ if (m = ref_text.match(pat))
40
+ # Extract the full match, normalize
41
+ prefix = ref_text[m.begin(0)...m.begin(1)].strip
42
+ number = m[1]
43
+ domain = "#{prefix} #{number}"
44
+ return domain.downcase.gsub(/[\s\/]+/, "-")
45
+ end
46
+ end
47
+
48
+ nil
49
+ end
50
+
51
+ collection = Glossarist::ManagedConceptCollection.new
52
+ collection.load_from_files(DIR)
53
+
54
+ puts "Loaded #{collection.count} concepts"
55
+
56
+ # Build index: domain_id -> [concept_id]
57
+ domain_index = {}
58
+ concepts_with_domain = 0
59
+ concepts_without_domain = 0
60
+
61
+ collection.each do |concept|
62
+ eng = concept.localization("eng")
63
+ next unless eng
64
+
65
+ sources = eng.data&.sources
66
+ next unless sources
67
+
68
+ auth = sources.find { |s| s.type == "authoritative" }
69
+ next unless auth&.origin
70
+
71
+ ref_text = auth.origin.text || auth.origin.ref
72
+ unless ref_text
73
+ concepts_without_domain += 1
74
+ next
75
+ end
76
+
77
+ domain_id = extract_domain_id(ref_text)
78
+ unless domain_id
79
+ concepts_without_domain += 1
80
+ next
81
+ end
82
+
83
+ (domain_index[domain_id] ||= []) << concept.data.id
84
+ concepts_with_domain += 1
85
+
86
+ # Add domain ConceptReference
87
+ concept.data.domains ||= []
88
+ unless concept.data.domains.any? { |d| d.concept_id == domain_id }
89
+ concept.data.domains << Glossarist::ConceptReference.new(
90
+ concept_id: domain_id,
91
+ source: ISO_SOURCE_URN,
92
+ ref_type: "domain",
93
+ )
94
+ end
95
+
96
+ # Add broader relation to domain concept
97
+ concept.related ||= []
98
+ unless concept.related.any? { |r| r.type == "broader" && r.ref&.id == domain_id }
99
+ concept.related << Glossarist::RelatedConcept.new(
100
+ type: "broader",
101
+ content: domain_id,
102
+ ref: Glossarist::Citation.new(source: "ISO", id: domain_id),
103
+ )
104
+ end
105
+ end
106
+
107
+ puts "Added domains to #{concepts_with_domain} concepts"
108
+ puts "No domain extracted for #{concepts_without_domain} concepts"
109
+
110
+ # Create domain hierarchy concepts
111
+ domain_index.sort.each do |domain_id, child_ids|
112
+ mc = Glossarist::ManagedConcept.new(
113
+ data: Glossarist::ManagedConceptData.new(
114
+ id: domain_id,
115
+ domains: [Glossarist::ConceptReference.new(
116
+ concept_id: domain_id,
117
+ source: ISO_SOURCE_URN,
118
+ ref_type: "domain",
119
+ )],
120
+ ),
121
+ )
122
+ mc.status = "valid"
123
+
124
+ # Create a basic English localization with the domain ID as the term
125
+ l10n = Glossarist::LocalizedConcept.new
126
+ l10n.data = Glossarist::ConceptData.new(
127
+ id: domain_id,
128
+ language_code: "eng",
129
+ terms: [Glossarist::Designation::Expression.new(
130
+ type: "expression",
131
+ designation: domain_id,
132
+ normative_status: "preferred",
133
+ )],
134
+ )
135
+ l10n.entry_status = "valid"
136
+ mc.add_l10n(l10n)
137
+
138
+ # Add narrower relations to child concepts
139
+ narrower = child_ids.sort.map do |child_id|
140
+ Glossarist::RelatedConcept.new(
141
+ type: "narrower",
142
+ content: child_id.to_s,
143
+ ref: Glossarist::Citation.new(source: "ISO", id: child_id.to_s),
144
+ )
145
+ end
146
+ mc.related = narrower
147
+
148
+ collection.store(mc)
149
+ puts "Created domain: #{domain_id} — #{child_ids.size} narrower"
150
+ end
151
+
152
+ collection.save_grouped_concepts_to_files(DIR)
153
+ puts "Saved #{collection.count} concepts to #{DIR}"