glossarist 2.6.4 → 2.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +11 -111
  3. data/Gemfile +0 -2
  4. data/README.adoc +207 -1
  5. data/glossarist.gemspec +1 -1
  6. data/lib/glossarist/asset_reference.rb +16 -0
  7. data/lib/glossarist/bibliographic_reference.rb +16 -0
  8. data/lib/glossarist/concept_enricher.rb +1 -0
  9. data/lib/glossarist/concept_reference.rb +4 -0
  10. data/lib/glossarist/concept_validator.rb +27 -56
  11. data/lib/glossarist/dataset_validator.rb +30 -34
  12. data/lib/glossarist/gcr_validator.rb +26 -101
  13. data/lib/glossarist/reference_extractor.rb +80 -10
  14. data/lib/glossarist/reference_resolver.rb +1 -0
  15. data/lib/glossarist/validation/asset_index.rb +113 -0
  16. data/lib/glossarist/validation/bibliography_index.rb +121 -0
  17. data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +60 -0
  18. data/lib/glossarist/validation/rules/authoritative_source_rule.rb +47 -0
  19. data/lib/glossarist/validation/rules/base.rb +46 -0
  20. data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +37 -0
  21. data/lib/glossarist/validation/rules/citation_completeness_rule.rb +63 -0
  22. data/lib/glossarist/validation/rules/concept_context.rb +45 -0
  23. data/lib/glossarist/validation/rules/concept_count_rule.rb +34 -0
  24. data/lib/glossarist/validation/rules/concept_id_rule.rb +29 -0
  25. data/lib/glossarist/validation/rules/concept_id_uniqueness_rule.rb +42 -0
  26. data/lib/glossarist/validation/rules/concept_mention_rule.rb +44 -0
  27. data/lib/glossarist/validation/rules/concept_status_rule.rb +36 -0
  28. data/lib/glossarist/validation/rules/concept_uri_rule.rb +30 -0
  29. data/lib/glossarist/validation/rules/dataset_context.rb +99 -0
  30. data/lib/glossarist/validation/rules/date_type_rule.rb +54 -0
  31. data/lib/glossarist/validation/rules/date_validity_rule.rb +66 -0
  32. data/lib/glossarist/validation/rules/definition_content_rule.rb +41 -0
  33. data/lib/glossarist/validation/rules/designation_status_rule.rb +45 -0
  34. data/lib/glossarist/validation/rules/designation_type_rule.rb +55 -0
  35. data/lib/glossarist/validation/rules/duplicate_term_rule.rb +63 -0
  36. data/lib/glossarist/validation/rules/entry_status_rule.rb +39 -0
  37. data/lib/glossarist/validation/rules/filename_id_rule.rb +35 -0
  38. data/lib/glossarist/validation/rules/gcr_context.rb +92 -0
  39. data/lib/glossarist/validation/rules/image_reference_rule.rb +73 -0
  40. data/lib/glossarist/validation/rules/l10n_uuid_integrity_rule.rb +40 -0
  41. data/lib/glossarist/validation/rules/language_code_format_rule.rb +39 -0
  42. data/lib/glossarist/validation/rules/language_coverage_rule.rb +37 -0
  43. data/lib/glossarist/validation/rules/language_list_rule.rb +46 -0
  44. data/lib/glossarist/validation/rules/localization_presence_rule.rb +25 -0
  45. data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +64 -0
  46. data/lib/glossarist/validation/rules/orphaned_images_rule.rb +68 -0
  47. data/lib/glossarist/validation/rules/orphaned_l10n_files_rule.rb +39 -0
  48. data/lib/glossarist/validation/rules/preferred_term_rule.rb +41 -0
  49. data/lib/glossarist/validation/rules/registry.rb +42 -0
  50. data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +102 -0
  51. data/lib/glossarist/validation/rules/related_concept_rule.rb +40 -0
  52. data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +87 -0
  53. data/lib/glossarist/validation/rules/source_type_rule.rb +63 -0
  54. data/lib/glossarist/validation/rules/terms_presence_rule.rb +39 -0
  55. data/lib/glossarist/validation/rules.rb +85 -0
  56. data/lib/glossarist/validation/validation_issue.rb +39 -0
  57. data/lib/glossarist/validation.rb +12 -0
  58. data/lib/glossarist/validation_result.rb +26 -9
  59. data/lib/glossarist/version.rb +1 -1
  60. data/lib/glossarist.rb +3 -0
  61. metadata +60 -15
@@ -3,43 +3,53 @@
3
3
  module Glossarist
4
4
  class DatasetValidator
5
5
  def validate(path, strict: false, reference_path: nil)
6
- result = validate_path(path)
6
+ if File.extname(path).downcase == ".gcr"
7
+ validate_gcr(path, reference_path: reference_path)
8
+ else
9
+ validate_directory(path, reference_path: reference_path)
10
+ end
11
+ end
12
+
13
+ private
14
+
15
+ def validate_gcr(path, reference_path: nil)
16
+ result = GcrValidator.new.validate(path)
7
17
 
8
18
  if reference_path
9
- ref_result = validate_cross_references(path, reference_path)
19
+ ref_result = validate_gcr_cross_references(path, reference_path)
10
20
  result.merge(ref_result)
11
21
  end
12
22
 
13
23
  result
14
24
  end
15
25
 
16
- private
26
+ def validate_directory(path, reference_path: nil)
27
+ result = ConceptValidator.new(path).validate_all
17
28
 
18
- def validate_path(path)
19
- if File.extname(path).downcase == ".gcr"
20
- validate_gcr(path)
21
- else
22
- validate_directory(path)
29
+ if reference_path
30
+ ref_result = validate_directory_cross_references(path, reference_path)
31
+ result.merge(ref_result)
23
32
  end
24
- end
25
33
 
26
- def validate_gcr(path)
27
- GcrValidator.new.validate(path)
34
+ result
28
35
  end
29
36
 
30
- def validate_directory(path)
31
- ConceptValidator.new(path).validate_all
37
+ def validate_gcr_cross_references(path, reference_path)
38
+ extractor = ReferenceExtractor.new
39
+ resolver = build_resolver(reference_path)
40
+ pkg = GcrPackage.load(path)
41
+ uri_prefix = pkg.metadata&.dig("uri_prefix") || pkg.metadata&.dig("shortname")
42
+ resolver.register_self(pkg.concepts)
43
+ resolver.register_package(pkg, uri_prefix: uri_prefix)
44
+ resolver.validate_all(pkg, extractor: extractor)
32
45
  end
33
46
 
34
- def validate_cross_references(path, reference_path)
47
+ def validate_directory_cross_references(path, reference_path)
35
48
  extractor = ReferenceExtractor.new
36
49
  resolver = build_resolver(reference_path)
37
-
38
- if File.extname(path).downcase == ".gcr"
39
- validate_gcr_refs(resolver, path, extractor)
40
- else
41
- validate_directory_refs(resolver, path, extractor)
42
- end
50
+ concepts = ConceptCollector.collect(path)
51
+ resolver.register_self(concepts)
52
+ resolver.validate_all(concepts, extractor: extractor)
43
53
  end
44
54
 
45
55
  def build_resolver(reference_path)
@@ -51,19 +61,5 @@ module Glossarist
51
61
  end
52
62
  resolver
53
63
  end
54
-
55
- def validate_gcr_refs(resolver, path, extractor)
56
- pkg = GcrPackage.load(path)
57
- uri_prefix = pkg.metadata&.dig("uri_prefix") || pkg.metadata&.dig("shortname")
58
- resolver.register_self(pkg.concepts)
59
- resolver.register_package(pkg, uri_prefix: uri_prefix)
60
- resolver.validate_all(pkg, extractor: extractor)
61
- end
62
-
63
- def validate_directory_refs(resolver, path, extractor)
64
- concepts = ConceptCollector.collect(path)
65
- resolver.register_self(concepts)
66
- resolver.validate_all(concepts, extractor: extractor)
67
- end
68
64
  end
69
65
  end
@@ -4,7 +4,7 @@ require "zip"
4
4
 
5
5
  module Glossarist
6
6
  class GcrValidator
7
- def validate(zip_path) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
7
+ def validate(zip_path)
8
8
  result = ValidationResult.new
9
9
 
10
10
  unless File.exist?(zip_path)
@@ -13,123 +13,48 @@ module Glossarist
13
13
  end
14
14
 
15
15
  begin
16
- Zip::File.open(zip_path) do |zip_file|
17
- validate_zip_contents(zip_file, result)
18
- end
16
+ zip_entries = Zip::File.open(zip_path) { |zf| zf.entries.to_set(&:name) }
19
17
  rescue StandardError => e
20
18
  result.add_error("Failed to read ZIP: #{e.message}")
19
+ return result
21
20
  end
22
21
 
23
- result
24
- end
25
-
26
- private
27
-
28
- def validate_zip_contents(zip_file, result) # rubocop:disable Metrics/AbcSize
29
- unless zip_file.find_entry("metadata.yaml")
22
+ unless zip_entries.include?("metadata.yaml")
30
23
  result.add_error("Missing metadata.yaml")
31
- return
32
- end
33
-
34
- metadata = GcrMetadata.from_yaml(
35
- zip_file.find_entry("metadata.yaml").get_input_stream.read,
36
- )
37
- validate_metadata(metadata, result)
38
-
39
- concept_entries = zip_file.entries.select do |e|
40
- e.name.start_with?("concepts/") && e.name.end_with?(".yaml")
41
- end
42
- if concept_entries.empty?
43
- result.add_error("No concept files found in concepts/")
44
- end
45
-
46
- concept_entries.each do |entry|
47
- validate_concept_entry(entry, metadata, result)
48
- end
49
-
50
- validate_assets(zip_file, result)
51
- end
52
-
53
- def validate_metadata(metadata, result)
54
- unless metadata&.concept_count
55
- result.add_error("metadata.yaml missing required fields (concept_count)")
56
- end
57
-
58
- unless metadata&.shortname
59
- result.add_error("metadata.yaml missing shortname")
24
+ return result
60
25
  end
61
26
 
62
- unless metadata&.version
63
- result.add_error("metadata.yaml missing version")
27
+ begin
28
+ context = Validation::Rules::GcrContext.new(zip_path)
29
+ rescue StandardError => e
30
+ result.add_error("Failed to load GCR: #{e.message}")
31
+ return result
64
32
  end
65
- end
66
33
 
67
- def validate_concept_entry(entry, metadata, result) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
68
- raw = entry.get_input_stream.read
69
- doc = ConceptDocument.from_yamls(raw)
70
- rescue Psych::SyntaxError => e
71
- result.add_error("#{entry.name}: invalid YAML at line #{e.line}: #{e.message}")
72
- rescue StandardError => e
73
- result.add_error("#{entry.name}: parse error: #{e.message}")
74
- else
75
- concept = doc.concept
76
- unless concept&.data&.id
77
- result.add_error("#{entry.name}: document 0 missing data.identifier")
78
- end
34
+ # Collection-level rules (metadata, structure, integrity)
35
+ collection_rules = Validation::Rules::Registry.for_scope(:collection)
36
+ collection_rules.each do |rule|
37
+ next unless rule.applicable?(context)
79
38
 
80
- localizations = doc.localizations
81
- if localizations.empty?
82
- result.add_error("#{entry.name}: expected at least 1 localization document")
83
- else
84
- localizations.each_with_index do |l10n, idx|
85
- unless l10n&.language_code
86
- result.add_error("#{entry.name}: document #{idx + 1} missing data.language_code")
87
- end
88
- end
39
+ rule.check(context).each { |i| result.add_issue(i) }
89
40
  end
90
41
 
91
- validate_concept_uri(entry, concept, metadata, result)
92
- end
93
-
94
- def validate_concept_uri(entry, concept, metadata, result) # rubocop:disable Metrics/CyclomaticComplexity
95
- concept_uri = concept&.data&.uri
96
- template = metadata&.concept_uri_template
97
- uri_prefix = metadata&.uri_prefix
42
+ # Per-concept rules
43
+ concept_rules = Validation::Rules::Registry.for_scope(:concept)
44
+ context.concepts.each_with_index do |concept, idx|
45
+ fname = concept.data&.id ? "concepts/#{concept.data.id}.yaml" : "concepts/concept-#{idx}.yaml"
46
+ concept_context = Validation::Rules::ConceptContext.new(
47
+ concept, file_name: fname, collection_context: context
48
+ )
98
49
 
99
- if concept_uri.nil? && template.nil? && uri_prefix.nil?
100
- result.add_warning("#{entry.name}: no concept URI (data.uri) and no concept_uri_template or uri_prefix in metadata")
101
- end
102
- end
50
+ concept_rules.each do |rule|
51
+ next unless rule.applicable?(concept_context)
103
52
 
104
- def validate_assets(zip_file, result)
105
- GcrPackage::DATASET_ASSETS.each do |asset|
106
- case asset[:type]
107
- when :file
108
- validate_file_asset_entry(zip_file, asset[:path], result)
109
- when :directory
110
- validate_directory_asset(zip_file, asset[:path], result)
53
+ rule.check(concept_context).each { |i| result.add_issue(i) }
111
54
  end
112
55
  end
113
- end
114
-
115
- def validate_file_asset_entry(zip_file, path, result)
116
- entry = zip_file.find_entry(path)
117
- return unless entry
118
-
119
- YAML.safe_load(entry.get_input_stream.read)
120
- rescue Psych::SyntaxError => e
121
- result.add_error("#{path}: invalid YAML at line #{e.line}: #{e.message}")
122
- end
123
56
 
124
- def validate_directory_asset(zip_file, dir_path, result)
125
- dir_entries = zip_file.entries.select do |e|
126
- e.name.start_with?("#{dir_path}/")
127
- end
128
- return unless dir_entries.any? && dir_entries.all? do |e|
129
- e.name.end_with?("/")
130
- end
131
-
132
- result.add_warning("#{dir_path}/ directory is empty")
57
+ result
133
58
  end
134
59
  end
135
60
  end
@@ -152,6 +152,65 @@ module Glossarist
152
152
 
153
153
  LANG_CODES = Glossarist::LANG_CODES
154
154
 
155
+ # Extract asset references from model attributes (NonVerbRep, GraphicalSymbol).
156
+ def extract_asset_refs_from_concept(concept)
157
+ refs = []
158
+
159
+ concept.localizations.each do |l10n|
160
+ nvr = l10n.non_verb_rep
161
+ if nvr.is_a?(String) && !nvr.strip.empty?
162
+ nvr.strip.split.each do |p|
163
+ refs << AssetReference.new(path: p) unless p.empty?
164
+ end
165
+ end
166
+
167
+ (l10n.data&.terms || []).each do |term|
168
+ if term.is_a?(Designation::GraphicalSymbol) && term.image && !term.image.strip.empty?
169
+ refs << AssetReference.new(path: term.image.strip)
170
+ end
171
+ end
172
+ end
173
+
174
+ refs
175
+ end
176
+
177
+ # Extract bibliographic xrefs from model-level source citations.
178
+ def extract_bib_refs_from_concept(concept)
179
+ refs = []
180
+ concept.localizations.each do |l10n|
181
+ gather_all_sources(l10n).each do |source|
182
+ origin = source.origin
183
+ next unless origin
184
+
185
+ if origin.text && !origin.text.strip.empty?
186
+ refs << BibliographicReference.new(anchor: origin.text)
187
+ end
188
+
189
+ next unless origin.source && origin.id
190
+
191
+ key = "#{origin.source} #{origin.id}"
192
+ refs << BibliographicReference.new(anchor: key)
193
+ refs << BibliographicReference.new(anchor: origin.id.to_s)
194
+ end
195
+ end
196
+ refs
197
+ end
198
+
199
+ # Extract all reference types from a managed concept.
200
+ def extract_all_from_managed_concept(concept)
201
+ concept_refs = extract_from_managed_concept(concept)
202
+ asset_refs = extract_asset_refs_from_concept(concept)
203
+ concept_refs + asset_refs
204
+ end
205
+
206
+ def resolve_asciidoc_xref(target)
207
+ BibliographicReference.new(anchor: target.strip)
208
+ end
209
+
210
+ def resolve_image_ref(path)
211
+ AssetReference.new(path: path.strip)
212
+ end
213
+
155
214
  private
156
215
 
157
216
  def gather_texts(lc_hash)
@@ -170,16 +229,7 @@ module Glossarist
170
229
 
171
230
  def deduplicate(refs)
172
231
  seen = Set.new
173
- refs.select do |ref|
174
- key = if ref.concept_id
175
- [ref.source,
176
- ref.concept_id]
177
- else
178
- [ref.source, ref.concept_id,
179
- ref.term]
180
- end
181
- seen.add?(key)
182
- end
232
+ refs.select { |ref| seen.add?(ref.dedup_key) }
183
233
  end
184
234
 
185
235
  def extract_term_id_from_urn_tail(tail)
@@ -212,6 +262,18 @@ module Glossarist
212
262
  regex: /\{\{([^}]+)\}\}/,
213
263
  ) { |ext, content| ext.resolve_mention(content) }
214
264
 
265
+ # AsciiDoc cross-references: <<anchor>> or <<anchor,display text>>
266
+ register_pattern(
267
+ name: :asciidoc_xref,
268
+ regex: /<<([^,>\n]+?)(?:,[^>\n]*)?>>/,
269
+ ) { |ext, target| ext.resolve_asciidoc_xref(target) }
270
+
271
+ # Image references: image::path[] or image:path[]
272
+ register_pattern(
273
+ name: :asciidoc_image,
274
+ regex: /image::?([^\[\]]+)\[/,
275
+ ) { |ext, path| ext.resolve_image_ref(path) }
276
+
215
277
  register_identifier_resolver("urn:iec:std:iec:60050") do |ext, identifier, display|
216
278
  ext.resolve_iec_urn(identifier, display)
217
279
  end
@@ -223,5 +285,13 @@ module Glossarist
223
285
  register_identifier_resolver("urn:") do |ext, identifier, display|
224
286
  ext.resolve_generic_urn(identifier, display)
225
287
  end
288
+
289
+ def gather_all_sources(l10n)
290
+ sources = Array(l10n.data&.sources)
291
+ sources += Array((l10n.data&.definition || []).flat_map(&:sources).compact)
292
+ sources += Array((l10n.data&.notes || []).flat_map(&:sources).compact)
293
+ sources += Array((l10n.data&.examples || []).flat_map(&:sources).compact)
294
+ sources
295
+ end
226
296
  end
227
297
  end
@@ -153,6 +153,7 @@ module Glossarist
153
153
  def extract_refs(concept, extractor)
154
154
  if concept.is_a?(ManagedConcept)
155
155
  extractor.extract_from_managed_concept(concept)
156
+ .grep(ConceptReference)
156
157
  else
157
158
  extractor.extract_from_concept_hash(concept)
158
159
  end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require "zip"
5
+
6
+ module Glossarist
7
+ module Validation
8
+ class AssetIndex
9
+ IMAGE_TERMS = %w[id ref text anchor].freeze
10
+ private_constant :IMAGE_TERMS
11
+
12
+ attr_reader :paths
13
+
14
+ def initialize
15
+ @paths = Set.new
16
+ end
17
+
18
+ def register(path)
19
+ @paths.add(normalize_path(path))
20
+ end
21
+
22
+ def resolve?(path)
23
+ @paths.include?(normalize_path(path))
24
+ end
25
+
26
+ def each_path(&block)
27
+ @paths.each(&block)
28
+ end
29
+
30
+ def self.build_from_directory(dataset_path)
31
+ index = new
32
+ index_image_files(index, dataset_path)
33
+ index_model_assets(index, dataset_path)
34
+ index
35
+ end
36
+
37
+ def self.build_from_zip(zip_path)
38
+ index = new
39
+ index_zip_images(index, zip_path)
40
+ index_zip_concept_assets(index, zip_path)
41
+ index
42
+ end
43
+
44
+ private
45
+
46
+ def normalize_path(path)
47
+ path.to_s.delete_prefix("/")
48
+ end
49
+
50
+ class << self
51
+ private
52
+
53
+ def index_image_files(index, dataset_path)
54
+ images_dir = File.join(dataset_path, "images")
55
+ return unless File.directory?(images_dir)
56
+
57
+ base = File.expand_path(dataset_path)
58
+ Dir.glob(File.join(images_dir, "**", "*")).each do |file|
59
+ next unless File.file?(file)
60
+
61
+ relative = file.sub("#{base}/", "")
62
+ index.register(relative)
63
+ end
64
+ end
65
+
66
+ def index_model_assets(index, dataset_path)
67
+ concepts = ConceptCollector.collect(dataset_path)
68
+ index_concept_assets(index, concepts)
69
+ end
70
+
71
+ def index_zip_images(index, zip_path)
72
+ Zip::File.open(zip_path) do |zf|
73
+ zf.entries.each do |entry|
74
+ next if entry.name.end_with?("/")
75
+ next unless entry.name.start_with?("images/")
76
+
77
+ index.register(entry.name)
78
+ end
79
+ end
80
+ end
81
+
82
+ def index_zip_concept_assets(index, zip_path)
83
+ pkg = GcrPackage.load(zip_path)
84
+ index_concept_assets(index, pkg.concepts)
85
+ end
86
+
87
+ def index_concept_assets(index, concepts)
88
+ concepts.each do |concept|
89
+ concept.localizations.each do |l10n|
90
+ register_non_verb_rep(index, l10n)
91
+ register_graphical_symbols(index, l10n)
92
+ end
93
+ end
94
+ end
95
+
96
+ def register_non_verb_rep(index, l10n)
97
+ nvr = l10n.non_verb_rep
98
+ return unless nvr.is_a?(String) && !nvr.strip.empty?
99
+
100
+ nvr.strip.split.each { |p| index.register(p) }
101
+ end
102
+
103
+ def register_graphical_symbols(index, l10n)
104
+ (l10n.data&.terms || []).each do |term|
105
+ next unless term.is_a?(Designation::GraphicalSymbol) && term.image
106
+
107
+ index.register(term.image)
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ class BibliographyIndex
6
+ BIB_ENTRY_KEYS = %w[id ref text anchor].freeze
7
+ private_constant :BIB_ENTRY_KEYS
8
+
9
+ attr_reader :entries
10
+
11
+ def initialize
12
+ @entries = {}
13
+ end
14
+
15
+ def register(anchor, source = nil)
16
+ @entries[normalize_anchor(anchor)] = { anchor: anchor, source: source }
17
+ end
18
+
19
+ def resolve?(anchor)
20
+ @entries.key?(normalize_anchor(anchor))
21
+ end
22
+
23
+ def anchors
24
+ @entries.keys
25
+ end
26
+
27
+ def each_entry(&block)
28
+ @entries.each_value(&block)
29
+ end
30
+
31
+ def self.build_from_concepts(concepts, dataset_path: nil,
32
+ bibliography_yaml: nil)
33
+ index = new
34
+
35
+ concepts.each { |concept| index_concept_sources(index, concept) }
36
+
37
+ yaml = bibliography_yaml || read_bibliography_file(dataset_path)
38
+ index_bibliography_yaml(index, yaml) if yaml
39
+
40
+ index
41
+ end
42
+
43
+ private
44
+
45
+ def normalize_anchor(anchor)
46
+ anchor.to_s.gsub(/[ \/:]/, "_").gsub(/__+/, "_")
47
+ end
48
+
49
+ class << self
50
+ private
51
+
52
+ def read_bibliography_file(dataset_path)
53
+ return nil unless dataset_path
54
+
55
+ bib_path = File.join(dataset_path, "bibliography.yaml")
56
+ File.exist?(bib_path) ? File.read(bib_path) : nil
57
+ end
58
+
59
+ def index_concept_sources(index, concept)
60
+ concept.localizations.each do |l10n|
61
+ index_l10n_sources(index, l10n)
62
+ end
63
+ end
64
+
65
+ def index_l10n_sources(index, l10n)
66
+ data = l10n.data
67
+ return unless data
68
+
69
+ register_source_collection(index, data.sources)
70
+ register_source_collection(index,
71
+ data.definition&.flat_map(&:sources))
72
+ register_source_collection(index, data.notes&.flat_map(&:sources))
73
+ register_source_collection(index, data.examples&.flat_map(&:sources))
74
+ end
75
+
76
+ def register_source_collection(index, sources)
77
+ Array(sources).compact.each { |s| register_source(index, s) }
78
+ end
79
+
80
+ def register_source(index, source)
81
+ origin = source.origin
82
+ return unless origin
83
+
84
+ register_origin_text(index, origin)
85
+ register_origin_ref(index, origin)
86
+ end
87
+
88
+ def register_origin_text(index, origin)
89
+ return unless origin.text && !origin.text.strip.empty?
90
+
91
+ index.register(origin.text, origin)
92
+ end
93
+
94
+ def register_origin_ref(index, origin)
95
+ return unless origin.source && origin.id
96
+
97
+ key = "#{origin.source} #{origin.id}"
98
+ index.register(key, origin)
99
+ index.register(origin.id.to_s, origin)
100
+ end
101
+
102
+ def index_bibliography_yaml(index, yaml_content)
103
+ data = YAML.safe_load(yaml_content)
104
+ return unless data.is_a?(Hash) || data.is_a?(Array)
105
+
106
+ entries = data.is_a?(Hash) ? data.values : data
107
+ entries.each do |entry|
108
+ next unless entry.is_a?(Hash)
109
+
110
+ BIB_ENTRY_KEYS.each do |key|
111
+ val = entry[key]
112
+ index.register(val.to_s, entry) if val && !val.to_s.strip.empty?
113
+ end
114
+ end
115
+ rescue Psych::SyntaxError, Psych::DisallowedClass
116
+ nil
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class AsciidocXrefRule < Base
7
+ def code = "GLS-102"
8
+ def category = :references
9
+ def severity = "warning"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.concept.localizations&.any?
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ extractor = ReferenceExtractor.new
20
+ issues = []
21
+
22
+ concept.localizations.each do |l10n|
23
+ lang = l10n.language_code || "unknown"
24
+ texts = extract_texts(l10n)
25
+
26
+ texts.each do |text|
27
+ next unless text
28
+ refs = extractor.extract_from_text(text)
29
+ refs.each do |ref|
30
+ next unless ref.is_a?(BibliographicReference)
31
+ next if context.bibliography_index.resolve?(ref.anchor)
32
+
33
+ issues << issue(
34
+ "unresolved bibliography reference <<#{ref.anchor}>>",
35
+ code: code, severity: severity,
36
+ location: "#{fname}/#{lang}",
37
+ suggestion: "add '#{ref.anchor}' as a source, " \
38
+ "or verify it exists in bibliography.yaml",
39
+ )
40
+ end
41
+ end
42
+ end
43
+
44
+ issues
45
+ end
46
+
47
+ private
48
+
49
+ def extract_texts(l10n)
50
+ texts = []
51
+ (l10n.data&.definition || []).each { |d| texts << d.content if d.content }
52
+ (l10n.data&.notes || []).each { |n| texts << n.content if n.content }
53
+ (l10n.data&.examples || []).each { |e| texts << e.content if e.content }
54
+ texts
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+