glossarist 2.6.5 → 2.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/release.yml +1 -4
  3. data/.rubocop_todo.yml +53 -2
  4. data/CLAUDE.md +27 -2
  5. data/README.adoc +532 -56
  6. data/config.yml +68 -1
  7. data/glossarist.gemspec +2 -0
  8. data/lib/glossarist/citation.rb +26 -123
  9. data/lib/glossarist/cli/compare_command.rb +106 -0
  10. data/lib/glossarist/cli/export_command.rb +11 -14
  11. data/lib/glossarist/cli/validate_command.rb +111 -20
  12. data/lib/glossarist/cli.rb +18 -0
  13. data/lib/glossarist/collections/bibliography_collection.rb +4 -2
  14. data/lib/glossarist/collections/localization_collection.rb +2 -0
  15. data/lib/glossarist/comparison_result.rb +35 -0
  16. data/lib/glossarist/concept.rb +1 -1
  17. data/lib/glossarist/concept_collector.rb +44 -0
  18. data/lib/glossarist/concept_comparator.rb +72 -0
  19. data/lib/glossarist/concept_data.rb +20 -0
  20. data/lib/glossarist/concept_diff.rb +15 -0
  21. data/lib/glossarist/concept_document.rb +11 -0
  22. data/lib/glossarist/concept_manager.rb +19 -5
  23. data/lib/glossarist/concept_ref.rb +13 -0
  24. data/lib/glossarist/concept_reference.rb +12 -19
  25. data/lib/glossarist/concept_validator.rb +6 -1
  26. data/lib/glossarist/context_configuration.rb +90 -0
  27. data/lib/glossarist/dataset_validator.rb +8 -4
  28. data/lib/glossarist/designation/abbreviation.rb +0 -2
  29. data/lib/glossarist/designation/base.rb +21 -1
  30. data/lib/glossarist/designation/expression.rb +3 -0
  31. data/lib/glossarist/designation/letter_symbol.rb +0 -4
  32. data/lib/glossarist/designation/prefix.rb +17 -0
  33. data/lib/glossarist/designation/suffix.rb +17 -0
  34. data/lib/glossarist/designation/symbol.rb +0 -2
  35. data/lib/glossarist/gcr_metadata.rb +7 -14
  36. data/lib/glossarist/gcr_package.rb +35 -23
  37. data/lib/glossarist/gcr_validator.rb +38 -17
  38. data/lib/glossarist/glossary_definition.rb +5 -0
  39. data/lib/glossarist/localized_concept.rb +8 -0
  40. data/lib/glossarist/managed_concept.rb +39 -6
  41. data/lib/glossarist/managed_concept_data.rb +22 -2
  42. data/lib/glossarist/non_verb_rep.rb +21 -6
  43. data/lib/glossarist/pronunciation.rb +32 -0
  44. data/lib/glossarist/rdf/ext/jsonld_transform_ext.rb +208 -0
  45. data/lib/glossarist/rdf/ext/mapping_ext.rb +37 -0
  46. data/lib/glossarist/rdf/ext/mapping_rule_ext.rb +27 -0
  47. data/lib/glossarist/rdf/ext/member_rule_ext.rb +34 -0
  48. data/lib/glossarist/rdf/ext/turtle_transform_ext.rb +222 -0
  49. data/lib/glossarist/rdf/ext.rb +39 -0
  50. data/lib/glossarist/rdf/gloss_citation.rb +36 -0
  51. data/lib/glossarist/rdf/gloss_concept.rb +58 -0
  52. data/lib/glossarist/rdf/gloss_concept_date.rb +24 -0
  53. data/lib/glossarist/rdf/gloss_concept_reference.rb +29 -0
  54. data/lib/glossarist/rdf/gloss_concept_source.rb +37 -0
  55. data/lib/glossarist/rdf/gloss_designation.rb +146 -0
  56. data/lib/glossarist/rdf/gloss_detailed_definition.rb +24 -0
  57. data/lib/glossarist/rdf/gloss_grammar_info.rb +57 -0
  58. data/lib/glossarist/rdf/gloss_locality.rb +25 -0
  59. data/lib/glossarist/rdf/gloss_localized_concept.rb +67 -0
  60. data/lib/glossarist/rdf/gloss_non_verbal_rep.rb +31 -0
  61. data/lib/glossarist/rdf/gloss_pronunciation.rb +32 -0
  62. data/lib/glossarist/rdf/gloss_reference.rb +55 -0
  63. data/lib/glossarist/rdf/namespaces/glossarist_namespace.rb +12 -0
  64. data/lib/glossarist/rdf/namespaces/iso_thes_namespace.rb +12 -0
  65. data/lib/glossarist/rdf/namespaces/owl_namespace.rb +12 -0
  66. data/lib/glossarist/rdf/namespaces/prov_namespace.rb +12 -0
  67. data/lib/glossarist/rdf/namespaces/rdf_namespace.rb +12 -0
  68. data/lib/glossarist/rdf/namespaces/skosxl_namespace.rb +12 -0
  69. data/lib/glossarist/rdf/namespaces.rb +8 -2
  70. data/lib/glossarist/rdf/relationships.rb +19 -0
  71. data/lib/glossarist/rdf/v3/configuration.rb +15 -0
  72. data/lib/glossarist/rdf/v3.rb +79 -0
  73. data/lib/glossarist/rdf.rb +22 -2
  74. data/lib/glossarist/reference_extractor.rb +15 -24
  75. data/lib/glossarist/reference_resolver.rb +3 -3
  76. data/lib/glossarist/related_concept.rb +2 -10
  77. data/lib/glossarist/schema_migration.rb +39 -0
  78. data/lib/glossarist/sts/term_mapper.rb +2 -2
  79. data/lib/glossarist/transforms/concept_to_gloss_transform.rb +355 -0
  80. data/lib/glossarist/transforms.rb +2 -2
  81. data/lib/glossarist/urn_resolver.rb +13 -1
  82. data/lib/glossarist/v1/concept.rb +18 -11
  83. data/lib/glossarist/v2/citation.rb +36 -0
  84. data/lib/glossarist/v2/concept_data.rb +46 -0
  85. data/lib/glossarist/v2/concept_document.rb +18 -0
  86. data/lib/glossarist/v2/concept_ref.rb +8 -0
  87. data/lib/glossarist/v2/concept_source.rb +16 -0
  88. data/lib/glossarist/v2/configuration.rb +13 -0
  89. data/lib/glossarist/v2/detailed_definition.rb +14 -0
  90. data/lib/glossarist/v2/localized_concept.rb +9 -0
  91. data/lib/glossarist/v2/managed_concept.rb +25 -0
  92. data/lib/glossarist/v2/managed_concept_data.rb +49 -0
  93. data/lib/glossarist/v2/related_concept.rb +15 -0
  94. data/lib/glossarist/v2.rb +28 -0
  95. data/lib/glossarist/v3/bibliography_entry.rb +19 -0
  96. data/lib/glossarist/v3/bibliography_file.rb +27 -0
  97. data/lib/glossarist/v3/citation.rb +30 -0
  98. data/lib/glossarist/v3/concept_data.rb +46 -0
  99. data/lib/glossarist/v3/concept_document.rb +18 -0
  100. data/lib/glossarist/v3/concept_ref.rb +8 -0
  101. data/lib/glossarist/v3/concept_source.rb +16 -0
  102. data/lib/glossarist/v3/configuration.rb +13 -0
  103. data/lib/glossarist/v3/detailed_definition.rb +14 -0
  104. data/lib/glossarist/v3/image_entry.rb +21 -0
  105. data/lib/glossarist/v3/image_file.rb +31 -0
  106. data/lib/glossarist/v3/localized_concept.rb +9 -0
  107. data/lib/glossarist/v3/managed_concept.rb +26 -0
  108. data/lib/glossarist/v3/managed_concept_data.rb +34 -0
  109. data/lib/glossarist/v3/related_concept.rb +15 -0
  110. data/lib/glossarist/v3.rb +36 -0
  111. data/lib/glossarist/validation/asset_index.rb +4 -3
  112. data/lib/glossarist/validation/bibliography_index.rb +61 -30
  113. data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +2 -15
  114. data/lib/glossarist/validation/rules/authoritative_source_rule.rb +2 -15
  115. data/lib/glossarist/validation/rules/base.rb +5 -0
  116. data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +2 -3
  117. data/lib/glossarist/validation/rules/citation_completeness_rule.rb +5 -27
  118. data/lib/glossarist/validation/rules/dataset_context.rb +8 -3
  119. data/lib/glossarist/validation/rules/date_validity_rule.rb +1 -1
  120. data/lib/glossarist/validation/rules/designation_status_rule.rb +0 -1
  121. data/lib/glossarist/validation/rules/designation_type_rule.rb +1 -5
  122. data/lib/glossarist/validation/rules/domain_ref_rule.rb +37 -0
  123. data/lib/glossarist/validation/rules/domain_target_rule.rb +56 -0
  124. data/lib/glossarist/validation/rules/gcr_context.rb +12 -13
  125. data/lib/glossarist/validation/rules/image_reference_rule.rb +2 -17
  126. data/lib/glossarist/validation/rules/locality_completeness_rule.rb +58 -0
  127. data/lib/glossarist/validation/rules/localization_consistency_rule.rb +72 -0
  128. data/lib/glossarist/validation/rules/localization_presence_rule.rb +1 -1
  129. data/lib/glossarist/validation/rules/model_validity_rule.rb +71 -0
  130. data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +1 -13
  131. data/lib/glossarist/validation/rules/orphaned_images_rule.rb +16 -11
  132. data/lib/glossarist/validation/rules/ref_shape_rule.rb +68 -0
  133. data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +1 -3
  134. data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +1 -3
  135. data/lib/glossarist/validation/rules/related_concept_target_rule.rb +64 -0
  136. data/lib/glossarist/validation/rules/schema_version_rule.rb +41 -0
  137. data/lib/glossarist/validation/rules/source_type_rule.rb +1 -15
  138. data/lib/glossarist/validation/rules/source_urn_format_rule.rb +65 -0
  139. data/lib/glossarist/validation/rules/uuid_format_rule.rb +33 -0
  140. data/lib/glossarist/validation/rules.rb +10 -43
  141. data/lib/glossarist/validation/validation_issue.rb +14 -11
  142. data/lib/glossarist/validation_result.rb +12 -22
  143. data/lib/glossarist/version.rb +1 -1
  144. data/lib/glossarist.rb +10 -0
  145. data/memory/project-status.md +43 -0
  146. data/scripts/migrate_dataset.rb +180 -0
  147. data/scripts/migrate_isotc204_to_v3.rb +134 -0
  148. data/scripts/migrate_isotc211_to_v3.rb +153 -0
  149. data/scripts/migrate_osgeo_to_v3.rb +155 -0
  150. data/scripts/upgrade_dataset_to_v3.rb +47 -0
  151. metadata +112 -6
  152. data/TODO.integration/01-gcr-package-cli.md +0 -180
  153. data/lib/glossarist/rdf/skos_concept.rb +0 -43
  154. data/lib/glossarist/rdf/skos_vocabulary.rb +0 -25
  155. data/lib/glossarist/transforms/concept_to_skos_transform.rb +0 -131
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Load shared infrastructure first
4
+ require_relative "ext"
5
+ require_relative "namespaces"
6
+ require_relative "relationships"
7
+ require_relative "localized_literal"
8
+
9
+ # V3 Configuration must be loaded before view classes are registered
10
+ require_relative "v3/configuration"
11
+
12
+ # Load all view class files (must precede V3 constant assignments)
13
+ require_relative "gloss_locality"
14
+ require_relative "gloss_reference"
15
+ require_relative "gloss_concept_source"
16
+ require_relative "gloss_detailed_definition"
17
+ require_relative "gloss_pronunciation"
18
+ require_relative "gloss_grammar_info"
19
+ require_relative "gloss_non_verbal_rep"
20
+ require_relative "gloss_concept_date"
21
+ require_relative "gloss_designation"
22
+ require_relative "gloss_localized_concept"
23
+ require_relative "gloss_concept"
24
+
25
+ module Glossarist
26
+ module Rdf
27
+ # V3 is the current (and only) RDF schema version.
28
+ #
29
+ # All RDF view classes produce v3 gloss ontology output:
30
+ # namespace URI: https://www.glossarist.org/ontologies/v3/
31
+ #
32
+ # Each version has its own Configuration module with a unique CONTEXT_ID
33
+ # so that V3 and (future) V4 classes are isolated in separate
34
+ # Lutaml::Model::GlobalContext registries.
35
+ #
36
+ # When v4 is added:
37
+ # - Create Rdf::V4::Configuration with CONTEXT_ID = :glossarist_rdf_v4
38
+ # - Create V4 view classes (standalone or inheriting from V3)
39
+ # - Register V4 classes in Rdf::V4::Configuration
40
+ # - Update ConceptToGlossTransform to support v4
41
+ # - V3 and V4 coexist — consumers choose which to use
42
+ module V3
43
+ # Namespace
44
+ GlossaristNamespace = Namespaces::GlossaristNamespace
45
+
46
+ # Single source of truth: add new view classes here once.
47
+ # Each entry is registered in the V3 type registry and
48
+ # re-exported as a V3:: constant.
49
+ VIEW_CLASS_NAMES = %i[
50
+ GlossLocality
51
+ GlossPronunciation
52
+ GlossGrammarInfo
53
+ GlossConceptDate
54
+ GlossReference
55
+ GlossConceptSource
56
+ GlossDetailedDefinition
57
+ GlossNonVerbalRep
58
+ GlossDesignation
59
+ GlossExpression
60
+ GlossAbbreviation
61
+ GlossSymbol
62
+ GlossLetterSymbol
63
+ GlossGraphicalSymbol
64
+ GlossPrefix
65
+ GlossSuffix
66
+ GlossLocalizedConcept
67
+ GlossConcept
68
+ GlossDocument
69
+ ].freeze
70
+
71
+ VIEW_CLASS_NAMES.each do |name|
72
+ klass = ::Glossarist::Rdf.const_get(name)
73
+ registry_id = name.to_s.gsub(/([A-Z])/) { |c| "_#{c.downcase}" }.sub(/^_/, "").to_sym
74
+ Configuration.register_model(klass, id: registry_id)
75
+ const_set(name, klass)
76
+ end
77
+ end
78
+ end
79
+ end
@@ -1,10 +1,30 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # Load lutaml-model RDF extensions before any RDF view classes
4
+ require_relative "rdf/ext"
5
+
3
6
  module Glossarist
4
7
  module Rdf
5
8
  autoload :Namespaces, "#{__dir__}/rdf/namespaces"
6
9
  autoload :LocalizedLiteral, "#{__dir__}/rdf/localized_literal"
7
- autoload :SkosConcept, "#{__dir__}/rdf/skos_concept"
8
- autoload :SkosVocabulary, "#{__dir__}/rdf/skos_vocabulary"
10
+ autoload :Relationships, "#{__dir__}/rdf/relationships"
11
+ autoload :GlossLocality, "#{__dir__}/rdf/gloss_locality"
12
+ autoload :GlossCitation, "#{__dir__}/rdf/gloss_citation"
13
+ autoload :GlossConceptSource, "#{__dir__}/rdf/gloss_concept_source"
14
+ autoload :GlossDetailedDefinition, "#{__dir__}/rdf/gloss_detailed_definition"
15
+ autoload :GlossPronunciation, "#{__dir__}/rdf/gloss_pronunciation"
16
+ autoload :GlossGrammarInfo, "#{__dir__}/rdf/gloss_grammar_info"
17
+ autoload :GlossNonVerbalRep, "#{__dir__}/rdf/gloss_non_verbal_rep"
18
+ autoload :GlossConceptReference, "#{__dir__}/rdf/gloss_concept_reference"
19
+ autoload :GlossConceptDate, "#{__dir__}/rdf/gloss_concept_date"
20
+ autoload :GlossDesignation, "#{__dir__}/rdf/gloss_designation"
21
+ autoload :GlossExpression, "#{__dir__}/rdf/gloss_designation"
22
+ autoload :GlossAbbreviation, "#{__dir__}/rdf/gloss_designation"
23
+ autoload :GlossSymbol, "#{__dir__}/rdf/gloss_designation"
24
+ autoload :GlossLetterSymbol, "#{__dir__}/rdf/gloss_designation"
25
+ autoload :GlossGraphicalSymbol, "#{__dir__}/rdf/gloss_designation"
26
+ autoload :GlossLocalizedConcept, "#{__dir__}/rdf/gloss_localized_concept"
27
+ autoload :GlossConcept, "#{__dir__}/rdf/gloss_concept"
28
+ autoload :GlossDocument, "#{__dir__}/rdf/gloss_concept"
9
29
  end
10
30
  end
@@ -62,11 +62,7 @@ module Glossarist
62
62
  end
63
63
 
64
64
  def extract_from_localized_concept(l10n)
65
- texts = []
66
- l10n.data.definition&.each { |d| texts << d.content if d.content }
67
- l10n.data.notes&.each { |n| texts << n.content if n.content }
68
- l10n.data.examples&.each { |e| texts << e.content if e.content }
69
- texts.flat_map { |t| extract_from_text(t) }
65
+ l10n.text_content.flat_map { |t| extract_from_text(t) }
70
66
  end
71
67
 
72
68
  # Unified concept mention dispatcher.
@@ -157,11 +153,10 @@ module Glossarist
157
153
  refs = []
158
154
 
159
155
  concept.localizations.each do |l10n|
160
- nvr = l10n.non_verb_rep
161
- if nvr.is_a?(String) && !nvr.strip.empty?
162
- nvr.strip.split.each do |p|
163
- refs << AssetReference.new(path: p) unless p.empty?
164
- end
156
+ Array(l10n.non_verb_rep).each do |nvr|
157
+ next unless nvr.is_a?(NonVerbRep) && nvr.ref && !nvr.ref.strip.empty?
158
+
159
+ refs << AssetReference.new(path: nvr.ref.strip)
165
160
  end
166
161
 
167
162
  (l10n.data&.terms || []).each do |term|
@@ -178,19 +173,23 @@ module Glossarist
178
173
  def extract_bib_refs_from_concept(concept)
179
174
  refs = []
180
175
  concept.localizations.each do |l10n|
181
- gather_all_sources(l10n).each do |source|
176
+ l10n.all_sources.each do |source|
182
177
  origin = source.origin
183
178
  next unless origin
184
179
 
185
- if origin.text && !origin.text.strip.empty?
186
- refs << BibliographicReference.new(anchor: origin.text)
180
+ ref = origin.ref
181
+ next unless ref
182
+
183
+ source_text = ref.source
184
+ if source_text && !source_text.strip.empty?
185
+ refs << BibliographicReference.new(anchor: source_text)
187
186
  end
188
187
 
189
- next unless origin.source && origin.id
188
+ next unless ref.source && ref.id
190
189
 
191
- key = "#{origin.source} #{origin.id}"
190
+ key = "#{ref.source} #{ref.id}"
192
191
  refs << BibliographicReference.new(anchor: key)
193
- refs << BibliographicReference.new(anchor: origin.id.to_s)
192
+ refs << BibliographicReference.new(anchor: ref.id.to_s)
194
193
  end
195
194
  end
196
195
  refs
@@ -285,13 +284,5 @@ module Glossarist
285
284
  register_identifier_resolver("urn:") do |ext, identifier, display|
286
285
  ext.resolve_generic_urn(identifier, display)
287
286
  end
288
-
289
- def gather_all_sources(l10n)
290
- sources = Array(l10n.data&.sources)
291
- sources += Array((l10n.data&.definition || []).flat_map(&:sources).compact)
292
- sources += Array((l10n.data&.notes || []).flat_map(&:sources).compact)
293
- sources += Array((l10n.data&.examples || []).flat_map(&:sources).compact)
294
- sources
295
- end
296
287
  end
297
288
  end
@@ -116,7 +116,7 @@ module Glossarist
116
116
 
117
117
  def infer_uri_prefix(package_or_concepts)
118
118
  case package_or_concepts
119
- when GcrPackage then package_or_concepts.metadata&.dig("uri_prefix")
119
+ when GcrPackage then package_or_concepts.metadata&.uri_prefix
120
120
  end
121
121
  end
122
122
 
@@ -128,7 +128,7 @@ module Glossarist
128
128
  next unless File.exist?(gcr_path)
129
129
 
130
130
  gcr = GcrPackage.load(gcr_path)
131
- prefix = pkg["uri_prefix"] || gcr.metadata&.dig("uri_prefix")
131
+ prefix = pkg["uri_prefix"] || gcr.metadata&.uri_prefix
132
132
  register_package(gcr, uri_prefix: prefix)
133
133
  end
134
134
 
@@ -145,7 +145,7 @@ module Glossarist
145
145
  def load_gcr_directory(dir)
146
146
  Dir.glob(File.join(dir, "*.gcr")).each do |gcr_path|
147
147
  pkg = GcrPackage.load(gcr_path)
148
- prefix = pkg.metadata&.dig("uri_prefix")
148
+ prefix = pkg.metadata&.uri_prefix
149
149
  register_package(pkg, uri_prefix: prefix)
150
150
  end
151
151
  end
@@ -5,20 +5,12 @@ module Glossarist
5
5
  attribute :content, :string
6
6
  attribute :type, :string,
7
7
  values: Glossarist::GlossaryDefinition::RELATED_CONCEPT_TYPES
8
- attribute :ref, Citation
8
+ attribute :ref, ConceptRef
9
9
 
10
10
  key_value do
11
11
  map :content, to: :content
12
12
  map :type, to: :type
13
- map :ref, with: { from: :ref_from_yaml, to: :ref_to_yaml }
14
- end
15
-
16
- def ref_to_yaml(model, doc)
17
- doc["ref"] = Citation.as_yaml(model.ref)["ref"] if model.ref
18
- end
19
-
20
- def ref_from_yaml(model, value)
21
- model.ref = Citation.of_yaml(value)
13
+ map :ref, to: :ref
22
14
  end
23
15
  end
24
16
  end
@@ -6,6 +6,45 @@ module Glossarist
6
6
  class SchemaMigration
7
7
  CURRENT_SCHEMA_VERSION = "1"
8
8
 
9
+ def self.migrate_concept(concept, target_version: Glossarist::SCHEMA_VERSION)
10
+ current = concept_version(concept)
11
+ target = target_version.to_s
12
+
13
+ return concept if current == target
14
+
15
+ max_steps = 5
16
+ max_steps.times do
17
+ break if current == target
18
+
19
+ case current
20
+ when "2" then current = step_v2_to_v3(concept)
21
+ else
22
+ raise Error, "No concept migration step from version #{current}"
23
+ end
24
+ end
25
+
26
+ raise Error, "Migration chain too long or unresolvable" unless current == target
27
+
28
+ concept.schema_version = target
29
+ concept
30
+ end
31
+
32
+ def self.concept_version(concept)
33
+ version = concept.schema_version
34
+ return version.to_s if version && !version.to_s.empty?
35
+
36
+ ManagedConcept.detect_schema_version(concept)
37
+ end
38
+
39
+ def self.step_v2_to_v3(concept)
40
+ if concept.data&.related&.any?
41
+ concept.related ||= []
42
+ concept.related = (concept.related + concept.data.related).uniq
43
+ concept.data.related = []
44
+ end
45
+ "3"
46
+ end
47
+
9
48
  ENTRY_STATUS_MAP = {
10
49
  "Standard" => "valid",
11
50
  "Confirmed" => "valid",
@@ -98,7 +98,7 @@ module Glossarist
98
98
  sources << {
99
99
  "status" => "identical",
100
100
  "type" => "authoritative",
101
- "origin" => { "text" => source_ref },
101
+ "origin" => { "ref" => { "source" => source_ref } },
102
102
  }
103
103
  end
104
104
 
@@ -107,7 +107,7 @@ module Glossarist
107
107
 
108
108
  sources << {
109
109
  "type" => "authoritative",
110
- "origin" => { "text" => text },
110
+ "origin" => { "ref" => { "source" => text } },
111
111
  }
112
112
  end
113
113
 
@@ -0,0 +1,355 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Transforms
5
+ # Transforms Glossarist domain model objects into ontology-faithful RDF
6
+ # using lutaml-model serializable view classes.
7
+ #
8
+ # Creates GlossConcept/GlossLocalizedConcept/GlossDesignation instances
9
+ # and delegates Turtle/JSON-LD serialization to lutaml-model.
10
+ class ConceptToGlossTransform
11
+ GLOSS = Rdf::Namespaces::GlossaristNamespace.uri
12
+ SKOS = Rdf::Namespaces::SkosNamespace.uri
13
+ XL = Rdf::Namespaces::SkosxlNamespace.uri
14
+ ISO = Rdf::Namespaces::IsoThesNamespace.uri
15
+ DCT = Rdf::Namespaces::DctermsNamespace.uri
16
+ RDF_NS = Rdf::Namespaces::RdfNamespace.uri
17
+
18
+ REL_PROPERTY_MAP = {
19
+ "broader" => "#{SKOS}broader",
20
+ "narrower" => "#{SKOS}narrower",
21
+ "broader_generic" => "#{ISO}broaderGeneric",
22
+ "narrower_generic" => "#{ISO}narrowerGeneric",
23
+ "broader_partitive" => "#{ISO}broaderPartitive",
24
+ "narrower_partitive" => "#{ISO}narrowerPartitive",
25
+ "broader_instantial" => "#{ISO}broaderInstantial",
26
+ "narrower_instantial" => "#{ISO}narrowerInstantial",
27
+ "equivalent" => "#{SKOS}exactMatch",
28
+ "close_match" => "#{SKOS}closeMatch",
29
+ "broad_match" => "#{SKOS}broadMatch",
30
+ "narrow_match" => "#{SKOS}narrowMatch",
31
+ "related_match" => "#{SKOS}relatedMatch",
32
+ "see" => "#{SKOS}related",
33
+ "deprecates" => "#{GLOSS}deprecates",
34
+ "supersedes" => "#{GLOSS}supersedes",
35
+ "superseded_by" => "#{GLOSS}supersededBy",
36
+ "compare" => "#{GLOSS}compares",
37
+ "contrast" => "#{GLOSS}contrasts",
38
+ "sequentially_related_concept" => "#{GLOSS}sequentiallyRelated",
39
+ "spatially_related_concept" => "#{GLOSS}spatiallyRelated",
40
+ "temporally_related_concept" => "#{GLOSS}temporallyRelated",
41
+ "homograph" => "#{GLOSS}hasHomograph",
42
+ "false_friend" => "#{GLOSS}hasFalseFriend",
43
+ "related_concept_broader" => "#{GLOSS}relatedConceptBroader",
44
+ "related_concept_narrower" => "#{GLOSS}relatedConceptNarrower",
45
+ "abbreviated_form_for" => "#{GLOSS}abbreviatedFormFor",
46
+ "short_form_for" => "#{GLOSS}shortFormFor",
47
+ }.freeze
48
+
49
+ DATE_TYPE_MAP = {
50
+ "accepted" => "#{GLOSS}status/accepted",
51
+ "amended" => "#{GLOSS}status/amended",
52
+ "retired" => "#{GLOSS}status/retired",
53
+ }.freeze
54
+
55
+ def self.transform(managed_concept, options = {})
56
+ new(managed_concept, options).build
57
+ end
58
+
59
+ def self.transform_document(concepts, options = {})
60
+ new(nil, options).build_document(concepts)
61
+ end
62
+
63
+ def initialize(managed_concept, options = {})
64
+ @concept = managed_concept
65
+ @options = options
66
+ end
67
+
68
+ def build
69
+ build_gloss_concept(concept)
70
+ end
71
+
72
+ def build_document(concepts)
73
+ gloss_concepts = concepts.map { |c| build_gloss_concept(c) }
74
+ doc = Rdf::GlossDocument.new(concepts: gloss_concepts)
75
+ Rdf::GlossDocument.to_turtle(doc)
76
+ end
77
+
78
+ def to_turtle(concepts_or_concept = nil)
79
+ if concepts_or_concept.is_a?(Array)
80
+ build_document(concepts_or_concept)
81
+ else
82
+ target = concepts_or_concept || @concept
83
+ return "" unless target
84
+
85
+ gc = build_gloss_concept(target)
86
+ Rdf::GlossConcept.to_turtle(gc)
87
+ end
88
+ end
89
+
90
+ def to_jsonld(concepts_or_concept = nil)
91
+ if concepts_or_concept.is_a?(Array)
92
+ gloss_concepts = concepts_or_concept.map { |c| build_gloss_concept(c) }
93
+ doc = Rdf::GlossDocument.new(concepts: gloss_concepts)
94
+ Rdf::GlossDocument.to_jsonld(doc)
95
+ else
96
+ target = concepts_or_concept || @concept
97
+ return "" unless target
98
+
99
+ gc = build_gloss_concept(target)
100
+ Rdf::GlossConcept.to_jsonld(gc)
101
+ end
102
+ end
103
+
104
+ def to_jsonl_line
105
+ return "" unless @concept
106
+
107
+ gc = build_gloss_concept(@concept)
108
+ Rdf::GlossConcept.to_jsonld(gc)
109
+ end
110
+
111
+ private
112
+
113
+ attr_reader :concept, :options
114
+
115
+ # ── Build RDF view instances from domain model ─────────────────────
116
+
117
+ def build_gloss_concept(managed_concept)
118
+ identifier = managed_concept.data&.id || managed_concept.identifier
119
+
120
+ localizations = managed_concept.localizations.each_value.map do |l10n|
121
+ build_gloss_localized_concept(l10n, identifier)
122
+ end
123
+
124
+ gc = Rdf::GlossConcept.new(
125
+ identifier: identifier.to_s,
126
+ status: status_uri(managed_concept.status),
127
+ localizations: localizations,
128
+ sources: build_gloss_sources(managed_concept.data&.sources),
129
+ domains: build_gloss_domains(managed_concept.data&.domains, identifier),
130
+ dates: build_gloss_dates(managed_concept.dates, identifier),
131
+ )
132
+
133
+ gc.relationship_triples = build_relationship_triples(managed_concept.related)
134
+ gc
135
+ end
136
+
137
+ def build_gloss_localized_concept(l10n, concept_id)
138
+ lang = l10n.language_code
139
+ data = l10n.data
140
+
141
+ designations = Array(l10n.designations).each_with_index.map do |desig, idx|
142
+ build_gloss_designation(desig, concept_id, lang, idx)
143
+ end
144
+
145
+ definitions = build_gloss_definitions(data&.definition)
146
+ notes = build_gloss_definitions(data&.notes)
147
+ examples = build_gloss_definitions(data&.examples)
148
+ sources = build_gloss_sources(data&.sources)
149
+ non_verb_reps = build_gloss_non_verbal_reps(l10n.non_verb_rep, concept_id, lang)
150
+
151
+ Rdf::GlossLocalizedConcept.new(
152
+ concept_id: concept_id.to_s,
153
+ language_code: lang,
154
+ domain: data&.domain,
155
+ entry_status: data&.entry_status ? "gloss:entstatus/#{data.entry_status}" : nil,
156
+ release: data&.release,
157
+ lineage_similarity: data&.lineage_source_similarity,
158
+ script: data&.script,
159
+ system: data&.system,
160
+ designations: designations,
161
+ definitions: definitions,
162
+ notes: notes,
163
+ examples: examples,
164
+ sources: sources,
165
+ non_verb_reps: non_verb_reps,
166
+ )
167
+ end
168
+
169
+ def build_gloss_designation(desig, concept_id, lang, index)
170
+ common_attrs = designation_common_attrs(desig, concept_id, lang, index)
171
+
172
+ instance = case desig
173
+ when Designation::Abbreviation
174
+ build_gloss_abbreviation(desig, common_attrs, concept_id, lang, index)
175
+ when Designation::Expression
176
+ build_gloss_expression(desig, common_attrs, concept_id, lang, index)
177
+ when Designation::GraphicalSymbol
178
+ Rdf::GlossGraphicalSymbol.new(common_attrs.merge(text: desig.text, image: desig.image))
179
+ when Designation::LetterSymbol
180
+ Rdf::GlossLetterSymbol.new(common_attrs.merge(text: desig.text))
181
+ when Designation::Symbol
182
+ Rdf::GlossSymbol.new(common_attrs)
183
+ else
184
+ Rdf::GlossExpression.new(common_attrs)
185
+ end
186
+
187
+ instance.relationship_triples = build_relationship_triples(desig.related)
188
+ instance
189
+ end
190
+
191
+ def designation_common_attrs(desig, concept_id, lang, index)
192
+ norm_status = desig.normative_status
193
+ {
194
+ designation: desig.designation,
195
+ normative_status: norm_status ? "gloss:norm/#{norm_status}" : nil,
196
+ type: desig.type,
197
+ language: desig.language || lang,
198
+ script: desig.script,
199
+ system: desig.system,
200
+ international: desig.international,
201
+ absent: desig.absent,
202
+ term_type: desig.term_type ? "gloss:termtype/#{desig.term_type}" : nil,
203
+ concept_id: concept_id.to_s,
204
+ lang_code: (desig.language || lang).to_s,
205
+ index: index.to_s,
206
+ pronunciations: build_gloss_pronunciations(desig.pronunciation, concept_id, lang, index),
207
+ sources: build_gloss_sources(desig.sources),
208
+ }
209
+ end
210
+
211
+ def build_gloss_abbreviation(desig, common_attrs, concept_id, lang, index)
212
+ Rdf::GlossAbbreviation.new(common_attrs.merge(
213
+ prefix: desig.prefix,
214
+ usage_info: desig.usage_info,
215
+ field_of_application: desig.field_of_application,
216
+ acronym: desig.acronym,
217
+ initialism: desig.initialism,
218
+ truncation: desig.truncation,
219
+ grammar_info: build_gloss_grammar_infos(desig.grammar_info, concept_id, lang, index),
220
+ ))
221
+ end
222
+
223
+ def build_gloss_expression(desig, common_attrs, concept_id, lang, index)
224
+ Rdf::GlossExpression.new(common_attrs.merge(
225
+ prefix: desig.prefix,
226
+ usage_info: desig.usage_info,
227
+ field_of_application: desig.field_of_application,
228
+ grammar_info: build_gloss_grammar_infos(desig.grammar_info, concept_id, lang, index),
229
+ ))
230
+ end
231
+
232
+ def build_gloss_definitions(definitions)
233
+ Array(definitions).map do |dd|
234
+ Rdf::GlossDetailedDefinition.new(
235
+ content: dd.content,
236
+ sources: build_gloss_sources(dd.sources),
237
+ )
238
+ end
239
+ end
240
+
241
+ def build_gloss_sources(sources)
242
+ Array(sources).map do |src|
243
+ origin = src.origin ? build_gloss_citation(src.origin) : nil
244
+ Rdf::GlossConceptSource.new(
245
+ status: src.status ? "gloss:srcstatus/#{src.status}" : nil,
246
+ type: src.type ? "gloss:srctype/#{src.type}" : nil,
247
+ modification: src.modification,
248
+ origin: origin,
249
+ )
250
+ end
251
+ end
252
+
253
+ def build_gloss_citation(citation)
254
+ locality = citation.locality ? build_gloss_locality(citation.locality) : nil
255
+ ref = citation.ref
256
+
257
+ Rdf::GlossCitation.new(
258
+ source: ref&.source,
259
+ id: ref&.id,
260
+ version: ref&.version,
261
+ link: citation.link,
262
+ locality: locality,
263
+ )
264
+ end
265
+
266
+ def build_gloss_locality(loc)
267
+ Rdf::GlossLocality.new(
268
+ locality_type: loc.type,
269
+ reference_from: loc.reference_from,
270
+ reference_to: loc.reference_to,
271
+ )
272
+ end
273
+
274
+ def build_gloss_pronunciations(pronunciations, concept_id, lang, _desig_index)
275
+ Array(pronunciations).each_with_index.map do |pron, idx|
276
+ Rdf::GlossPronunciation.new(
277
+ content: pron.content,
278
+ language: pron.language,
279
+ script: pron.script,
280
+ country: pron.country,
281
+ system: pron.system,
282
+ concept_id: concept_id.to_s,
283
+ lang_code: lang.to_s,
284
+ index: idx.to_s,
285
+ )
286
+ end
287
+ end
288
+
289
+ def build_gloss_grammar_infos(grammar_infos, concept_id, lang, desig_index)
290
+ Array(grammar_infos).map do |gi|
291
+ Rdf::GlossGrammarInfo.new(
292
+ gender: Array(gi.gender).map { |g| "gloss:gender/#{g}" },
293
+ number: Array(gi.number).map { |n| "gloss:number/#{n}" },
294
+ part_of_speech: gi.part_of_speech,
295
+ concept_id: concept_id.to_s,
296
+ lang_code: lang.to_s,
297
+ index: desig_index.to_s,
298
+ )
299
+ end
300
+ end
301
+
302
+ def build_gloss_non_verbal_reps(non_verb_reps, concept_id, lang)
303
+ Array(non_verb_reps).each_with_index.map do |nvr, idx|
304
+ Rdf::GlossNonVerbalRep.new(
305
+ representation_type: nvr.type,
306
+ representation_ref: nvr.ref,
307
+ representation_text: nvr.text,
308
+ sources: build_gloss_sources(nvr.sources),
309
+ concept_id: concept_id.to_s,
310
+ lang_code: lang.to_s,
311
+ index: idx.to_s,
312
+ )
313
+ end
314
+ end
315
+
316
+ def build_gloss_domains(domains, concept_id)
317
+ Array(domains).map do |ref|
318
+ Rdf::GlossConceptReference.new(
319
+ concept_id: ref.concept_id,
320
+ source: ref.source,
321
+ ref_type: ref.ref_type,
322
+ urn: ref.urn,
323
+ parent_id: concept_id.to_s,
324
+ )
325
+ end
326
+ end
327
+
328
+ def build_gloss_dates(dates, concept_id)
329
+ Array(dates).map do |date|
330
+ Rdf::GlossConceptDate.new(
331
+ date_value: date.date&.to_s,
332
+ date_type: DATE_TYPE_MAP[date.type] || "gloss:status/#{date.type}",
333
+ concept_id: concept_id.to_s,
334
+ )
335
+ end
336
+ end
337
+
338
+ def build_relationship_triples(related_concepts)
339
+ Array(related_concepts).filter_map do |rc|
340
+ predicate_uri = REL_PROPERTY_MAP[rc.type]
341
+ next unless predicate_uri
342
+
343
+ target_id = rc.ref&.id
344
+ next unless target_id
345
+
346
+ [predicate_uri, "concept/#{target_id}"]
347
+ end
348
+ end
349
+
350
+ def status_uri(status)
351
+ status ? "gloss:status/#{status}" : nil
352
+ end
353
+ end
354
+ end
355
+ end
@@ -2,8 +2,8 @@
2
2
 
3
3
  module Glossarist
4
4
  module Transforms
5
- autoload :ConceptToSkosTransform,
6
- "glossarist/transforms/concept_to_skos_transform"
5
+ autoload :ConceptToGlossTransform,
6
+ "glossarist/transforms/concept_to_gloss_transform"
7
7
  autoload :ConceptToTbxTransform,
8
8
  "glossarist/transforms/concept_to_tbx_transform"
9
9
  end
@@ -64,7 +64,19 @@ module Glossarist
64
64
  def to_urn(urn_or_reference)
65
65
  case urn_or_reference
66
66
  when String then urn_or_reference
67
- when ConceptReference then urn_or_reference.to_urn
67
+ when ConceptReference then concept_reference_to_urn(urn_or_reference)
68
+ end
69
+ end
70
+
71
+ def concept_reference_to_urn(ref)
72
+ return ref.urn if ref.urn && !ref.urn.strip.empty?
73
+ return nil unless ref.external?
74
+ return nil unless ref.source && ref.concept_id
75
+
76
+ case ref.source
77
+ when /\Aurn:iec/ then "#{ref.source}-#{ref.concept_id}"
78
+ when /\Aurn:iso/ then "#{ref.source}:term:#{ref.concept_id}"
79
+ else "#{ref.source}/#{ref.concept_id}"
68
80
  end
69
81
  end
70
82
  end