glossarist 2.6.5 → 2.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/release.yml +1 -4
  3. data/.rubocop_todo.yml +53 -2
  4. data/CLAUDE.md +27 -2
  5. data/README.adoc +532 -56
  6. data/config.yml +68 -1
  7. data/glossarist.gemspec +2 -0
  8. data/lib/glossarist/citation.rb +26 -123
  9. data/lib/glossarist/cli/compare_command.rb +106 -0
  10. data/lib/glossarist/cli/export_command.rb +11 -14
  11. data/lib/glossarist/cli/validate_command.rb +111 -20
  12. data/lib/glossarist/cli.rb +18 -0
  13. data/lib/glossarist/collections/bibliography_collection.rb +4 -2
  14. data/lib/glossarist/collections/localization_collection.rb +2 -0
  15. data/lib/glossarist/comparison_result.rb +35 -0
  16. data/lib/glossarist/concept.rb +1 -1
  17. data/lib/glossarist/concept_collector.rb +44 -0
  18. data/lib/glossarist/concept_comparator.rb +72 -0
  19. data/lib/glossarist/concept_data.rb +20 -0
  20. data/lib/glossarist/concept_diff.rb +15 -0
  21. data/lib/glossarist/concept_document.rb +11 -0
  22. data/lib/glossarist/concept_manager.rb +19 -5
  23. data/lib/glossarist/concept_ref.rb +13 -0
  24. data/lib/glossarist/concept_reference.rb +12 -19
  25. data/lib/glossarist/concept_validator.rb +6 -1
  26. data/lib/glossarist/context_configuration.rb +90 -0
  27. data/lib/glossarist/dataset_validator.rb +8 -4
  28. data/lib/glossarist/designation/abbreviation.rb +0 -2
  29. data/lib/glossarist/designation/base.rb +21 -1
  30. data/lib/glossarist/designation/expression.rb +3 -0
  31. data/lib/glossarist/designation/letter_symbol.rb +0 -4
  32. data/lib/glossarist/designation/prefix.rb +17 -0
  33. data/lib/glossarist/designation/suffix.rb +17 -0
  34. data/lib/glossarist/designation/symbol.rb +0 -2
  35. data/lib/glossarist/gcr_metadata.rb +7 -14
  36. data/lib/glossarist/gcr_package.rb +35 -23
  37. data/lib/glossarist/gcr_validator.rb +38 -17
  38. data/lib/glossarist/glossary_definition.rb +5 -0
  39. data/lib/glossarist/localized_concept.rb +8 -0
  40. data/lib/glossarist/managed_concept.rb +39 -6
  41. data/lib/glossarist/managed_concept_data.rb +22 -2
  42. data/lib/glossarist/non_verb_rep.rb +21 -6
  43. data/lib/glossarist/pronunciation.rb +32 -0
  44. data/lib/glossarist/rdf/ext/jsonld_transform_ext.rb +208 -0
  45. data/lib/glossarist/rdf/ext/mapping_ext.rb +37 -0
  46. data/lib/glossarist/rdf/ext/mapping_rule_ext.rb +27 -0
  47. data/lib/glossarist/rdf/ext/member_rule_ext.rb +34 -0
  48. data/lib/glossarist/rdf/ext/turtle_transform_ext.rb +222 -0
  49. data/lib/glossarist/rdf/ext.rb +39 -0
  50. data/lib/glossarist/rdf/gloss_citation.rb +36 -0
  51. data/lib/glossarist/rdf/gloss_concept.rb +58 -0
  52. data/lib/glossarist/rdf/gloss_concept_date.rb +24 -0
  53. data/lib/glossarist/rdf/gloss_concept_reference.rb +29 -0
  54. data/lib/glossarist/rdf/gloss_concept_source.rb +37 -0
  55. data/lib/glossarist/rdf/gloss_designation.rb +146 -0
  56. data/lib/glossarist/rdf/gloss_detailed_definition.rb +24 -0
  57. data/lib/glossarist/rdf/gloss_grammar_info.rb +57 -0
  58. data/lib/glossarist/rdf/gloss_locality.rb +25 -0
  59. data/lib/glossarist/rdf/gloss_localized_concept.rb +67 -0
  60. data/lib/glossarist/rdf/gloss_non_verbal_rep.rb +31 -0
  61. data/lib/glossarist/rdf/gloss_pronunciation.rb +32 -0
  62. data/lib/glossarist/rdf/gloss_reference.rb +55 -0
  63. data/lib/glossarist/rdf/namespaces/glossarist_namespace.rb +12 -0
  64. data/lib/glossarist/rdf/namespaces/iso_thes_namespace.rb +12 -0
  65. data/lib/glossarist/rdf/namespaces/owl_namespace.rb +12 -0
  66. data/lib/glossarist/rdf/namespaces/prov_namespace.rb +12 -0
  67. data/lib/glossarist/rdf/namespaces/rdf_namespace.rb +12 -0
  68. data/lib/glossarist/rdf/namespaces/skosxl_namespace.rb +12 -0
  69. data/lib/glossarist/rdf/namespaces.rb +8 -2
  70. data/lib/glossarist/rdf/relationships.rb +19 -0
  71. data/lib/glossarist/rdf/v3/configuration.rb +15 -0
  72. data/lib/glossarist/rdf/v3.rb +79 -0
  73. data/lib/glossarist/rdf.rb +22 -2
  74. data/lib/glossarist/reference_extractor.rb +15 -24
  75. data/lib/glossarist/reference_resolver.rb +3 -3
  76. data/lib/glossarist/related_concept.rb +2 -10
  77. data/lib/glossarist/schema_migration.rb +39 -0
  78. data/lib/glossarist/sts/term_mapper.rb +2 -2
  79. data/lib/glossarist/transforms/concept_to_gloss_transform.rb +355 -0
  80. data/lib/glossarist/transforms.rb +2 -2
  81. data/lib/glossarist/urn_resolver.rb +13 -1
  82. data/lib/glossarist/v1/concept.rb +18 -11
  83. data/lib/glossarist/v2/citation.rb +36 -0
  84. data/lib/glossarist/v2/concept_data.rb +46 -0
  85. data/lib/glossarist/v2/concept_document.rb +18 -0
  86. data/lib/glossarist/v2/concept_ref.rb +8 -0
  87. data/lib/glossarist/v2/concept_source.rb +16 -0
  88. data/lib/glossarist/v2/configuration.rb +13 -0
  89. data/lib/glossarist/v2/detailed_definition.rb +14 -0
  90. data/lib/glossarist/v2/localized_concept.rb +9 -0
  91. data/lib/glossarist/v2/managed_concept.rb +25 -0
  92. data/lib/glossarist/v2/managed_concept_data.rb +49 -0
  93. data/lib/glossarist/v2/related_concept.rb +15 -0
  94. data/lib/glossarist/v2.rb +28 -0
  95. data/lib/glossarist/v3/bibliography_entry.rb +19 -0
  96. data/lib/glossarist/v3/bibliography_file.rb +27 -0
  97. data/lib/glossarist/v3/citation.rb +30 -0
  98. data/lib/glossarist/v3/concept_data.rb +46 -0
  99. data/lib/glossarist/v3/concept_document.rb +18 -0
  100. data/lib/glossarist/v3/concept_ref.rb +8 -0
  101. data/lib/glossarist/v3/concept_source.rb +16 -0
  102. data/lib/glossarist/v3/configuration.rb +13 -0
  103. data/lib/glossarist/v3/detailed_definition.rb +14 -0
  104. data/lib/glossarist/v3/image_entry.rb +21 -0
  105. data/lib/glossarist/v3/image_file.rb +31 -0
  106. data/lib/glossarist/v3/localized_concept.rb +9 -0
  107. data/lib/glossarist/v3/managed_concept.rb +26 -0
  108. data/lib/glossarist/v3/managed_concept_data.rb +34 -0
  109. data/lib/glossarist/v3/related_concept.rb +15 -0
  110. data/lib/glossarist/v3.rb +36 -0
  111. data/lib/glossarist/validation/asset_index.rb +4 -3
  112. data/lib/glossarist/validation/bibliography_index.rb +61 -30
  113. data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +2 -15
  114. data/lib/glossarist/validation/rules/authoritative_source_rule.rb +2 -15
  115. data/lib/glossarist/validation/rules/base.rb +5 -0
  116. data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +2 -3
  117. data/lib/glossarist/validation/rules/citation_completeness_rule.rb +5 -27
  118. data/lib/glossarist/validation/rules/dataset_context.rb +8 -3
  119. data/lib/glossarist/validation/rules/date_validity_rule.rb +1 -1
  120. data/lib/glossarist/validation/rules/designation_status_rule.rb +0 -1
  121. data/lib/glossarist/validation/rules/designation_type_rule.rb +1 -5
  122. data/lib/glossarist/validation/rules/domain_ref_rule.rb +37 -0
  123. data/lib/glossarist/validation/rules/domain_target_rule.rb +56 -0
  124. data/lib/glossarist/validation/rules/gcr_context.rb +12 -13
  125. data/lib/glossarist/validation/rules/image_reference_rule.rb +2 -17
  126. data/lib/glossarist/validation/rules/locality_completeness_rule.rb +58 -0
  127. data/lib/glossarist/validation/rules/localization_consistency_rule.rb +72 -0
  128. data/lib/glossarist/validation/rules/localization_presence_rule.rb +1 -1
  129. data/lib/glossarist/validation/rules/model_validity_rule.rb +71 -0
  130. data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +1 -13
  131. data/lib/glossarist/validation/rules/orphaned_images_rule.rb +16 -11
  132. data/lib/glossarist/validation/rules/ref_shape_rule.rb +68 -0
  133. data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +1 -3
  134. data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +1 -3
  135. data/lib/glossarist/validation/rules/related_concept_target_rule.rb +64 -0
  136. data/lib/glossarist/validation/rules/schema_version_rule.rb +41 -0
  137. data/lib/glossarist/validation/rules/source_type_rule.rb +1 -15
  138. data/lib/glossarist/validation/rules/source_urn_format_rule.rb +65 -0
  139. data/lib/glossarist/validation/rules/uuid_format_rule.rb +33 -0
  140. data/lib/glossarist/validation/rules.rb +10 -43
  141. data/lib/glossarist/validation/validation_issue.rb +14 -11
  142. data/lib/glossarist/validation_result.rb +12 -22
  143. data/lib/glossarist/version.rb +1 -1
  144. data/lib/glossarist.rb +10 -0
  145. data/memory/project-status.md +43 -0
  146. data/scripts/migrate_dataset.rb +180 -0
  147. data/scripts/migrate_isotc204_to_v3.rb +134 -0
  148. data/scripts/migrate_isotc211_to_v3.rb +153 -0
  149. data/scripts/migrate_osgeo_to_v3.rb +155 -0
  150. data/scripts/upgrade_dataset_to_v3.rb +47 -0
  151. metadata +112 -6
  152. data/TODO.integration/01-gcr-package-cli.md +0 -180
  153. data/lib/glossarist/rdf/skos_concept.rb +0 -43
  154. data/lib/glossarist/rdf/skos_vocabulary.rb +0 -25
  155. data/lib/glossarist/transforms/concept_to_skos_transform.rb +0 -131
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module V3
5
+ class RelatedConcept < Glossarist::RelatedConcept
6
+ attribute :ref, V3::ConceptRef
7
+
8
+ key_value do
9
+ map :content, to: :content
10
+ map :type, to: :type
11
+ map :ref, to: :ref
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "v3/configuration"
4
+ require_relative "v3/citation"
5
+ require_relative "v3/concept_source"
6
+ require_relative "v3/detailed_definition"
7
+ require_relative "v3/concept_ref"
8
+ require_relative "v3/related_concept"
9
+ require_relative "v3/concept_data"
10
+ require_relative "v3/localized_concept"
11
+ require_relative "v3/managed_concept_data"
12
+ require_relative "v3/managed_concept"
13
+ require_relative "v3/concept_document"
14
+ require_relative "v3/bibliography_entry"
15
+ require_relative "v3/bibliography_file"
16
+ require_relative "v3/image_entry"
17
+ require_relative "v3/image_file"
18
+
19
+ module Glossarist
20
+ module V3
21
+ Configuration.register_model(Citation, id: :citation)
22
+ Configuration.register_model(ConceptSource, id: :concept_source)
23
+ Configuration.register_model(DetailedDefinition, id: :detailed_definition)
24
+ Configuration.register_model(ConceptData, id: :concept_data)
25
+ Configuration.register_model(LocalizedConcept, id: :localized_concept)
26
+ Configuration.register_model(ConceptRef, id: :concept_ref)
27
+ Configuration.register_model(RelatedConcept, id: :related_concept)
28
+ Configuration.register_model(ManagedConceptData, id: :managed_concept_data)
29
+ Configuration.register_model(ManagedConcept, id: :managed_concept)
30
+ Configuration.register_model(ConceptDocument, id: :concept_document)
31
+ Configuration.register_model(BibliographyEntry, id: :bibliography_entry)
32
+ Configuration.register_model(BibliographyFile, id: :bibliography_file)
33
+ Configuration.register_model(ImageEntry, id: :image_entry)
34
+ Configuration.register_model(ImageFile, id: :image_file)
35
+ end
36
+ end
@@ -94,10 +94,11 @@ module Glossarist
94
94
  end
95
95
 
96
96
  def register_non_verb_rep(index, l10n)
97
- nvr = l10n.non_verb_rep
98
- return unless nvr.is_a?(String) && !nvr.strip.empty?
97
+ Array(l10n.non_verb_rep).each do |nvr|
98
+ next unless nvr.is_a?(NonVerbRep) && nvr.ref && !nvr.ref.strip.empty?
99
99
 
100
- nvr.strip.split.each { |p| index.register(p) }
100
+ index.register(nvr.ref.strip)
101
+ end
101
102
  end
102
103
 
103
104
  def register_graphical_symbols(index, l10n)
@@ -3,9 +3,6 @@
3
3
  module Glossarist
4
4
  module Validation
5
5
  class BibliographyIndex
6
- BIB_ENTRY_KEYS = %w[id ref text anchor].freeze
7
- private_constant :BIB_ENTRY_KEYS
8
-
9
6
  attr_reader :entries
10
7
 
11
8
  def initialize
@@ -28,14 +25,22 @@ module Glossarist
28
25
  @entries.each_value(&block)
29
26
  end
30
27
 
31
- def self.build_from_concepts(concepts, dataset_path: nil,
32
- bibliography_yaml: nil)
28
+ def self.build_from_concepts(concepts, dataset_path: nil)
33
29
  index = new
34
30
 
35
31
  concepts.each { |concept| index_concept_sources(index, concept) }
32
+ index_bibliography_file(index, dataset_path)
33
+ index_images_file(index, dataset_path)
34
+
35
+ index
36
+ end
37
+
38
+ def self.build_from_yaml(concepts, bibliography_yaml: nil, images_yaml: nil)
39
+ index = new
36
40
 
37
- yaml = bibliography_yaml || read_bibliography_file(dataset_path)
38
- index_bibliography_yaml(index, yaml) if yaml
41
+ concepts.each { |concept| index_concept_sources(index, concept) }
42
+ index_bib_from_yaml_string(index, bibliography_yaml)
43
+ index_images_from_yaml_string(index, images_yaml)
39
44
 
40
45
  index
41
46
  end
@@ -49,13 +54,6 @@ bibliography_yaml: nil)
49
54
  class << self
50
55
  private
51
56
 
52
- def read_bibliography_file(dataset_path)
53
- return nil unless dataset_path
54
-
55
- bib_path = File.join(dataset_path, "bibliography.yaml")
56
- File.exist?(bib_path) ? File.read(bib_path) : nil
57
- end
58
-
59
57
  def index_concept_sources(index, concept)
60
58
  concept.localizations.each do |l10n|
61
59
  index_l10n_sources(index, l10n)
@@ -86,33 +84,66 @@ bibliography_yaml: nil)
86
84
  end
87
85
 
88
86
  def register_origin_text(index, origin)
89
- return unless origin.text && !origin.text.strip.empty?
87
+ ref = origin.ref
88
+ return unless ref && ref.source && !ref.source.strip.empty?
90
89
 
91
- index.register(origin.text, origin)
90
+ index.register(ref.source, origin)
92
91
  end
93
92
 
94
93
  def register_origin_ref(index, origin)
95
- return unless origin.source && origin.id
94
+ ref = origin.ref
95
+ return unless ref && ref.source && ref.id
96
96
 
97
- key = "#{origin.source} #{origin.id}"
97
+ key = "#{ref.source} #{ref.id}"
98
98
  index.register(key, origin)
99
- index.register(origin.id.to_s, origin)
99
+ index.register(ref.id.to_s, origin)
100
100
  end
101
101
 
102
- def index_bibliography_yaml(index, yaml_content)
103
- data = YAML.safe_load(yaml_content)
104
- return unless data.is_a?(Hash) || data.is_a?(Array)
102
+ def index_bibliography_file(index, dataset_path)
103
+ return unless dataset_path
105
104
 
106
- entries = data.is_a?(Hash) ? data.values : data
107
- entries.each do |entry|
108
- next unless entry.is_a?(Hash)
105
+ bib = V3::BibliographyFile.from_file(
106
+ File.join(dataset_path, "bibliography.yaml"),
107
+ )
108
+ return unless bib
109
109
 
110
- BIB_ENTRY_KEYS.each do |key|
111
- val = entry[key]
112
- index.register(val.to_s, entry) if val && !val.to_s.strip.empty?
113
- end
110
+ bib.entries.each do |entry|
111
+ index.register(entry.id, entry)
112
+ index.register(entry.reference, entry) if entry.reference
114
113
  end
115
- rescue Psych::SyntaxError, Psych::DisallowedClass
114
+ end
115
+
116
+ def index_images_file(index, dataset_path)
117
+ return unless dataset_path
118
+
119
+ images = V3::ImageFile.from_file(
120
+ File.join(dataset_path, "images.yaml"),
121
+ )
122
+ return unless images
123
+
124
+ images.entries.each do |entry|
125
+ index.register(entry.id, entry)
126
+ end
127
+ end
128
+
129
+ def index_bib_from_yaml_string(index, yaml_content)
130
+ return unless yaml_content
131
+
132
+ bib = V3::BibliographyFile.from_yaml(yaml_content)
133
+ bib.entries.each do |entry|
134
+ index.register(entry.id, entry)
135
+ index.register(entry.reference, entry) if entry.reference
136
+ end
137
+ rescue StandardError
138
+ nil
139
+ end
140
+
141
+ def index_images_from_yaml_string(index, yaml_content)
142
+ return unless yaml_content
143
+
144
+ images = V3::ImageFile.from_yaml(yaml_content)
145
+ images.entries.each { |entry| index.register(entry.id, entry) }
146
+ rescue StandardError
116
147
  nil
117
148
  end
118
149
  end
@@ -21,12 +21,10 @@ module Glossarist
21
21
 
22
22
  concept.localizations.each do |l10n|
23
23
  lang = l10n.language_code || "unknown"
24
- texts = extract_texts(l10n)
25
24
 
26
- texts.each do |text|
25
+ l10n.text_content.each do |text|
27
26
  next unless text
28
- refs = extractor.extract_from_text(text)
29
- refs.each do |ref|
27
+ extractor.extract_from_text(text).each do |ref|
30
28
  next unless ref.is_a?(BibliographicReference)
31
29
  next if context.bibliography_index.resolve?(ref.anchor)
32
30
 
@@ -43,18 +41,7 @@ module Glossarist
43
41
 
44
42
  issues
45
43
  end
46
-
47
- private
48
-
49
- def extract_texts(l10n)
50
- texts = []
51
- (l10n.data&.definition || []).each { |d| texts << d.content if d.content }
52
- (l10n.data&.notes || []).each { |n| texts << n.content if n.content }
53
- (l10n.data&.examples || []).each { |e| texts << e.content if e.content }
54
- texts
55
- end
56
44
  end
57
45
  end
58
46
  end
59
47
  end
60
-
@@ -16,7 +16,8 @@ module Glossarist
16
16
  def check(context)
17
17
  concept = context.concept
18
18
  fname = context.file_name
19
- all_sources = gather_all_sources(concept)
19
+
20
+ all_sources = concept.localizations.flat_map(&:all_sources)
20
21
 
21
22
  return [] if all_sources.any? { |s| s.type == "authoritative" }
22
23
 
@@ -27,21 +28,7 @@ module Glossarist
27
28
  suggestion: "Add at least one source with type: authoritative",
28
29
  )]
29
30
  end
30
-
31
- private
32
-
33
- def gather_all_sources(concept)
34
- sources = []
35
- concept.localizations.each do |l10n|
36
- (l10n.data&.sources || []).each { |s| sources << s }
37
- (l10n.data&.definition || []).each { |d| (d.sources || []).each { |s| sources << s } }
38
- (l10n.data&.notes || []).each { |n| (n.sources || []).each { |s| sources << s } }
39
- (l10n.data&.examples || []).each { |e| (e.sources || []).each { |s| sources << s } }
40
- end
41
- sources
42
- end
43
31
  end
44
32
  end
45
33
  end
46
34
  end
47
-
@@ -4,6 +4,11 @@ module Glossarist
4
4
  module Validation
5
5
  module Rules
6
6
  class Base
7
+ def self.inherited(subclass)
8
+ super
9
+ Registry.register(subclass)
10
+ end
11
+
7
12
  def code
8
13
  nil
9
14
  end
@@ -18,9 +18,8 @@ module Glossarist
18
18
  return [] unless bib_content
19
19
 
20
20
  begin
21
- data = YAML.safe_load(bib_content)
22
- return [] if data.nil? || data.is_a?(Hash) || data.is_a?(Array)
23
- rescue Psych::SyntaxError => e
21
+ V3::BibliographyFile.from_yaml(bib_content)
22
+ rescue StandardError => e
24
23
  return [issue(
25
24
  "bibliography.yaml is invalid YAML: #{e.message}",
26
25
  code: code, severity: severity,
@@ -18,46 +18,24 @@ module Glossarist
18
18
  fname = context.file_name
19
19
  issues = []
20
20
 
21
- gather_all_sources(concept).each_with_index do |source, idx|
21
+ concept.localizations.flat_map(&:all_sources).each_with_index do |source, idx|
22
22
  origin = source.origin
23
23
  next unless origin
24
24
 
25
- if origin.text.nil? && origin.source.nil? && origin.id.nil?
25
+ ref = origin.ref
26
+ if ref.nil? || (ref.source.nil? && ref.id.nil?)
26
27
  issues << issue(
27
- "source #{idx + 1} has empty origin (no text, source, or id)",
28
+ "source #{idx + 1} has empty origin (no ref source or id)",
28
29
  code: "GLS-304", severity: severity,
29
30
  location: fname,
30
- suggestion: "Add at minimum an origin.text or origin.source + origin.id",
31
+ suggestion: "Add at minimum an origin.ref with source or id",
31
32
  )
32
33
  end
33
-
34
- next unless origin.structured? && origin.source.nil?
35
-
36
- issues << issue(
37
- "source #{idx + 1} is structured but missing source field",
38
- code: "GLS-304", severity: severity,
39
- location: fname,
40
- suggestion: "Add origin.source to the citation",
41
- )
42
34
  end
43
35
 
44
36
  issues
45
37
  end
46
-
47
- private
48
-
49
- def gather_all_sources(concept)
50
- sources = []
51
- concept.localizations.each do |l10n|
52
- (l10n.data&.sources || []).each { |s| sources << s }
53
- (l10n.data&.definition || []).each { |d| (d.sources || []).each { |s| sources << s } }
54
- (l10n.data&.notes || []).each { |n| (n.sources || []).each { |s| sources << s } }
55
- (l10n.data&.examples || []).each { |e| (e.sources || []).each { |s| sources << s } }
56
- end
57
- sources
58
- end
59
38
  end
60
39
  end
61
40
  end
62
41
  end
63
-
@@ -8,14 +8,19 @@ module Glossarist
8
8
 
9
9
  def initialize(path)
10
10
  @path = File.expand_path(path)
11
- @concepts = nil
11
+ @accumulated_concepts = []
12
12
  @bibliography_index = nil
13
13
  @asset_index = nil
14
14
  @declared_languages = nil
15
15
  end
16
16
 
17
+ def add_concept(concept)
18
+ @accumulated_concepts << concept
19
+ @concept_ids = nil
20
+ end
21
+
17
22
  def concepts
18
- @concepts ||= ConceptCollector.collect(@path)
23
+ @accumulated_concepts
19
24
  end
20
25
 
21
26
  def concept_ids
@@ -77,7 +82,7 @@ module Glossarist
77
82
  reg_path = File.join(@path, "register.yaml")
78
83
  return nil unless File.exist?(reg_path)
79
84
 
80
- YAML.safe_load_file(reg_path)
85
+ RegisterData.from_file(reg_path)
81
86
  end
82
87
 
83
88
  def build_localization_index
@@ -48,7 +48,7 @@ module Glossarist
48
48
 
49
49
  return if date_value.nil?
50
50
 
51
- str = date_value.respond_to?(:iso8601) ? date_value.iso8601 : date_value.to_s
51
+ str = date_value.is_a?(Date) || date_value.is_a?(Time) ? date_value.iso8601 : date_value.to_s
52
52
 
53
53
  begin
54
54
  DateTime.parse(str)
@@ -24,7 +24,6 @@ module Glossarist
24
24
  lang = l10n.language_code || "unknown"
25
25
  terms = l10n.data&.terms || []
26
26
  terms.each_with_index do |term, idx|
27
- next unless term.respond_to?(:normative_status)
28
27
  next if term.normative_status.nil? || term.normative_status.to_s.strip.empty?
29
28
 
30
29
  unless VALID_STATUSES.include?(term.normative_status.to_s)
@@ -43,11 +43,7 @@ module Glossarist
43
43
  private
44
44
 
45
45
  def designation_type(term)
46
- if term.is_a?(Hash)
47
- term["type"]
48
- elsif term.respond_to?(:type)
49
- term.type
50
- end
46
+ term.type if term.is_a?(Designation::Base)
51
47
  end
52
48
  end
53
49
  end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class DomainRefRule < Base
7
+ def code = "GLS-309"
8
+ def category = :quality
9
+ def severity = "warning"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.concept.data&.domains&.any?
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ issues = []
20
+
21
+ (concept.data.domains || []).each_with_index do |domain, idx|
22
+ has_ref = domain.concept_id || domain.urn
23
+ unless has_ref
24
+ issues << issue(
25
+ "domain #{idx + 1} has neither concept_id nor urn",
26
+ location: fname,
27
+ suggestion: "Provide at least concept_id or urn for the domain reference",
28
+ )
29
+ end
30
+ end
31
+
32
+ issues
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ # Validates that domain references point to concepts that exist in the
7
+ # dataset (for local refs with concept_id) or have a valid URN.
8
+ class DomainTargetRule < Base
9
+ URN_RE = %r{\Aurn:[a-z0-9][a-z0-9-]{0,31}:[a-z0-9()+,\-.:=@;$_!*'%/?#]+\z}i.freeze
10
+
11
+ def code = "GLS-111"
12
+ def category = :references
13
+ def severity = "warning"
14
+ def scope = :concept
15
+
16
+ def applicable?(context)
17
+ context.concept.data&.domains&.any?
18
+ end
19
+
20
+ def check(context)
21
+ concept = context.concept
22
+ fname = context.file_name
23
+ issues = []
24
+
25
+ (concept.data.domains || []).each_with_index do |domain, idx|
26
+ if domain.concept_id && local_domain?(domain)
27
+ unless context.concept_ids.include?(domain.concept_id)
28
+ issues << issue(
29
+ "domain #{idx + 1} references '#{domain.concept_id}' not in dataset",
30
+ location: fname,
31
+ suggestion: "Add concept '#{domain.concept_id}' or fix the domain ref",
32
+ )
33
+ end
34
+ elsif domain.urn
35
+ if domain.urn.start_with?("urn:") && !URN_RE.match?(domain.urn)
36
+ issues << issue(
37
+ "domain #{idx + 1} has invalid URN '#{domain.urn}'",
38
+ location: fname,
39
+ suggestion: "Fix the URN format",
40
+ )
41
+ end
42
+ end
43
+ end
44
+
45
+ issues
46
+ end
47
+
48
+ private
49
+
50
+ def local_domain?(domain)
51
+ domain.source.nil? || domain.source.strip.empty?
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -10,21 +10,20 @@ module Glossarist
10
10
 
11
11
  def initialize(zip_path)
12
12
  @zip_path = zip_path
13
+ @accumulated_concepts = []
13
14
  @metadata = nil
14
- @concepts = nil
15
15
  @bibliography_index = nil
16
16
  @asset_index = nil
17
17
  @zip_entries = nil
18
- @localization_index = nil
18
+ end
19
+
20
+ def add_concept(concept)
21
+ @accumulated_concepts << concept
22
+ @concept_ids = nil
19
23
  end
20
24
 
21
25
  def concepts
22
- @concepts ||= begin
23
- pkg = GcrPackage.load(@zip_path)
24
- pkg.concepts
25
- rescue StandardError
26
- []
27
- end
26
+ @accumulated_concepts
28
27
  end
29
28
 
30
29
  def concept_ids
@@ -41,11 +40,11 @@ module Glossarist
41
40
  end
42
41
 
43
42
  def bibliography_index
44
- @bibliography_index ||= begin
45
- bib_yaml = read_zip_file("bibliography.yaml")
46
- BibliographyIndex.build_from_concepts(concepts,
47
- bibliography_yaml: bib_yaml)
48
- end
43
+ @bibliography_index ||= BibliographyIndex.build_from_yaml(
44
+ concepts,
45
+ bibliography_yaml: read_zip_file("bibliography.yaml"),
46
+ images_yaml: read_zip_file("images.yaml"),
47
+ )
49
48
  end
50
49
 
51
50
  def asset_index
@@ -19,15 +19,12 @@ module Glossarist
19
19
  extractor = ReferenceExtractor.new
20
20
  issues = []
21
21
 
22
- # Text-embedded image references (image::path[])
23
22
  concept.localizations.each do |l10n|
24
23
  lang = l10n.language_code || "unknown"
25
- texts = extract_texts(l10n)
26
24
 
27
- texts.each do |text|
25
+ l10n.text_content.each do |text|
28
26
  next unless text
29
- refs = extractor.extract_from_text(text)
30
- refs.each do |ref|
27
+ extractor.extract_from_text(text).each do |ref|
31
28
  next unless ref.is_a?(AssetReference)
32
29
  next if context.asset_index.resolve?(ref.path)
33
30
 
@@ -41,7 +38,6 @@ module Glossarist
41
38
  end
42
39
  end
43
40
 
44
- # Model-level asset references (NonVerbRep, GraphicalSymbol)
45
41
  asset_refs = extractor.extract_asset_refs_from_concept(concept)
46
42
  asset_refs.each do |ref|
47
43
  next if context.asset_index.resolve?(ref.path)
@@ -56,18 +52,7 @@ module Glossarist
56
52
 
57
53
  issues
58
54
  end
59
-
60
- private
61
-
62
- def extract_texts(l10n)
63
- texts = []
64
- (l10n.data&.definition || []).each { |d| texts << d.content if d.content }
65
- (l10n.data&.notes || []).each { |n| texts << n.content if n.content }
66
- (l10n.data&.examples || []).each { |e| texts << e.content if e.content }
67
- texts
68
- end
69
55
  end
70
56
  end
71
57
  end
72
58
  end
73
-
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class LocalityCompletenessRule < Base
7
+ def code = "GLS-308"
8
+ def category = :quality
9
+ def severity = "warning"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.concept.localizations&.any?
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ issues = []
20
+
21
+ all_origins(concept).each_with_index do |origin, idx|
22
+ next unless origin
23
+ next unless origin.locality
24
+
25
+ loc = origin.locality
26
+ if loc.type.nil? || loc.type.to_s.strip.empty?
27
+ issues << issue(
28
+ "source #{idx + 1} locality has no type",
29
+ location: fname,
30
+ suggestion: "Add locality type (e.g. 'clause')",
31
+ )
32
+ end
33
+
34
+ if loc.reference_from.nil? || loc.reference_from.to_s.strip.empty?
35
+ issues << issue(
36
+ "source #{idx + 1} locality has no reference_from",
37
+ location: fname,
38
+ suggestion: "Add locality.reference_from (e.g. '3.1.3.10')",
39
+ )
40
+ end
41
+ end
42
+
43
+ issues
44
+ end
45
+
46
+ private
47
+
48
+ def all_origins(concept)
49
+ origins = []
50
+ concept.localizations.each do |l10n|
51
+ (l10n.data&.sources || []).each { |s| origins << s.origin if s.origin }
52
+ end
53
+ origins
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end