glossarist 2.6.4 → 2.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +11 -111
  3. data/Gemfile +0 -2
  4. data/README.adoc +207 -1
  5. data/glossarist.gemspec +1 -1
  6. data/lib/glossarist/asset_reference.rb +16 -0
  7. data/lib/glossarist/bibliographic_reference.rb +16 -0
  8. data/lib/glossarist/concept_enricher.rb +1 -0
  9. data/lib/glossarist/concept_reference.rb +4 -0
  10. data/lib/glossarist/concept_validator.rb +27 -56
  11. data/lib/glossarist/dataset_validator.rb +30 -34
  12. data/lib/glossarist/gcr_validator.rb +26 -101
  13. data/lib/glossarist/reference_extractor.rb +80 -10
  14. data/lib/glossarist/reference_resolver.rb +1 -0
  15. data/lib/glossarist/validation/asset_index.rb +113 -0
  16. data/lib/glossarist/validation/bibliography_index.rb +121 -0
  17. data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +60 -0
  18. data/lib/glossarist/validation/rules/authoritative_source_rule.rb +47 -0
  19. data/lib/glossarist/validation/rules/base.rb +46 -0
  20. data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +37 -0
  21. data/lib/glossarist/validation/rules/citation_completeness_rule.rb +63 -0
  22. data/lib/glossarist/validation/rules/concept_context.rb +45 -0
  23. data/lib/glossarist/validation/rules/concept_count_rule.rb +34 -0
  24. data/lib/glossarist/validation/rules/concept_id_rule.rb +29 -0
  25. data/lib/glossarist/validation/rules/concept_id_uniqueness_rule.rb +42 -0
  26. data/lib/glossarist/validation/rules/concept_mention_rule.rb +44 -0
  27. data/lib/glossarist/validation/rules/concept_status_rule.rb +36 -0
  28. data/lib/glossarist/validation/rules/concept_uri_rule.rb +30 -0
  29. data/lib/glossarist/validation/rules/dataset_context.rb +99 -0
  30. data/lib/glossarist/validation/rules/date_type_rule.rb +54 -0
  31. data/lib/glossarist/validation/rules/date_validity_rule.rb +66 -0
  32. data/lib/glossarist/validation/rules/definition_content_rule.rb +41 -0
  33. data/lib/glossarist/validation/rules/designation_status_rule.rb +45 -0
  34. data/lib/glossarist/validation/rules/designation_type_rule.rb +55 -0
  35. data/lib/glossarist/validation/rules/duplicate_term_rule.rb +63 -0
  36. data/lib/glossarist/validation/rules/entry_status_rule.rb +39 -0
  37. data/lib/glossarist/validation/rules/filename_id_rule.rb +35 -0
  38. data/lib/glossarist/validation/rules/gcr_context.rb +92 -0
  39. data/lib/glossarist/validation/rules/image_reference_rule.rb +73 -0
  40. data/lib/glossarist/validation/rules/l10n_uuid_integrity_rule.rb +40 -0
  41. data/lib/glossarist/validation/rules/language_code_format_rule.rb +39 -0
  42. data/lib/glossarist/validation/rules/language_coverage_rule.rb +37 -0
  43. data/lib/glossarist/validation/rules/language_list_rule.rb +46 -0
  44. data/lib/glossarist/validation/rules/localization_presence_rule.rb +25 -0
  45. data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +64 -0
  46. data/lib/glossarist/validation/rules/orphaned_images_rule.rb +68 -0
  47. data/lib/glossarist/validation/rules/orphaned_l10n_files_rule.rb +39 -0
  48. data/lib/glossarist/validation/rules/preferred_term_rule.rb +41 -0
  49. data/lib/glossarist/validation/rules/registry.rb +42 -0
  50. data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +102 -0
  51. data/lib/glossarist/validation/rules/related_concept_rule.rb +40 -0
  52. data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +87 -0
  53. data/lib/glossarist/validation/rules/source_type_rule.rb +63 -0
  54. data/lib/glossarist/validation/rules/terms_presence_rule.rb +39 -0
  55. data/lib/glossarist/validation/rules.rb +85 -0
  56. data/lib/glossarist/validation/validation_issue.rb +39 -0
  57. data/lib/glossarist/validation.rb +12 -0
  58. data/lib/glossarist/validation_result.rb +26 -9
  59. data/lib/glossarist/version.rb +1 -1
  60. data/lib/glossarist.rb +3 -0
  61. metadata +60 -15
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class DefinitionContentRule < Base
7
+ def code = "GLS-300"
8
+ def category = :quality
9
+ def severity = "warning"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.concept.localizations&.any?
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ issues = []
20
+
21
+ concept.localizations.each do |l10n|
22
+ lang = l10n.language_code || "unknown"
23
+ (l10n.data&.definition || []).each_with_index do |d, idx|
24
+ if d.content.nil? || d.content.strip.empty?
25
+ issues << issue(
26
+ "definition #{idx + 1} has empty content",
27
+ code: code, severity: severity,
28
+ location: "#{fname}/#{lang}",
29
+ suggestion: "Add definition text or remove the empty entry",
30
+ )
31
+ end
32
+ end
33
+ end
34
+
35
+ issues
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class DesignationStatusRule < Base
7
+ def code = "GLS-204"
8
+ def category = :schema
9
+ def severity = "error"
10
+ def scope = :concept
11
+
12
+ VALID_STATUSES = Glossarist::GlossaryDefinition::DESIGNATION_BASE_NORMATIVE_STATUSES
13
+
14
+ def applicable?(context)
15
+ context.concept.localizations&.any?
16
+ end
17
+
18
+ def check(context)
19
+ concept = context.concept
20
+ fname = context.file_name
21
+ issues = []
22
+
23
+ concept.localizations.each do |l10n|
24
+ lang = l10n.language_code || "unknown"
25
+ terms = l10n.data&.terms || []
26
+ terms.each_with_index do |term, idx|
27
+ next unless term.respond_to?(:normative_status)
28
+ next if term.normative_status.nil? || term.normative_status.to_s.strip.empty?
29
+
30
+ unless VALID_STATUSES.include?(term.normative_status.to_s)
31
+ issues << issue(
32
+ "#{lang}: term #{idx + 1} has invalid normative_status '#{term.normative_status}'",
33
+ location: fname,
34
+ suggestion: "Use one of: #{VALID_STATUSES.join(', ')}",
35
+ )
36
+ end
37
+ end
38
+ end
39
+
40
+ issues
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class DesignationTypeRule < Base
7
+ def code = "GLS-207"
8
+ def category = :schema
9
+ def severity = "error"
10
+ def scope = :concept
11
+
12
+ VALID_TYPES = Designation::SERIALIZED_TYPES.values.grep(String).uniq.freeze
13
+
14
+ def applicable?(context)
15
+ context.concept.localizations&.any?
16
+ end
17
+
18
+ def check(context)
19
+ concept = context.concept
20
+ fname = context.file_name
21
+ issues = []
22
+
23
+ concept.localizations.each do |l10n|
24
+ lang = l10n.language_code || "unknown"
25
+ terms = l10n.data&.terms || []
26
+ terms.each_with_index do |term, idx|
27
+ type = designation_type(term)
28
+ next unless type
29
+
30
+ unless VALID_TYPES.include?(type)
31
+ issues << issue(
32
+ "#{lang}: term #{idx + 1} has unknown designation type '#{type}'",
33
+ location: fname,
34
+ suggestion: "Use one of: #{VALID_TYPES.join(', ')}",
35
+ )
36
+ end
37
+ end
38
+ end
39
+
40
+ issues
41
+ end
42
+
43
+ private
44
+
45
+ def designation_type(term)
46
+ if term.is_a?(Hash)
47
+ term["type"]
48
+ elsif term.respond_to?(:type)
49
+ term.type
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class DuplicateTermRule < Base
7
+ def code = "GLS-302"
8
+ def category = :quality
9
+ def severity = "warning"
10
+ def scope = :collection
11
+
12
+ def applicable?(context)
13
+ context.concepts.any?
14
+ end
15
+
16
+ def check(context)
17
+ term_index = build_term_index(context.concepts)
18
+ issues = []
19
+
20
+ term_index.each do |(lang, term), ids|
21
+ next if ids.size <= 1
22
+
23
+ issues << issue(
24
+ "Duplicate preferred term '#{term}' in #{lang}: " \
25
+ "concepts #{ids.join(', ')}",
26
+ code: code, severity: severity,
27
+ location: lang,
28
+ suggestion: "Differentiate the terms or consolidate the concepts",
29
+ )
30
+ end
31
+
32
+ issues
33
+ end
34
+
35
+ private
36
+
37
+ def build_term_index(concepts)
38
+ index = Hash.new { |h, k| h[k] = [] }
39
+
40
+ concepts.each do |concept|
41
+ id = concept.data&.id&.to_s
42
+ next unless id
43
+
44
+ concept.localizations.each do |l10n|
45
+ lang = l10n.language_code
46
+ next unless lang
47
+
48
+ (l10n.data&.terms || []).each do |term|
49
+ next unless term.normative_status == "preferred"
50
+ next unless term.designation
51
+
52
+ index[[lang, term.designation.downcase]] << id
53
+ end
54
+ end
55
+ end
56
+
57
+ index
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class EntryStatusRule < Base
7
+ def code = "GLS-003"
8
+ def category = :schema
9
+ def severity = "error"
10
+ def scope = :concept
11
+
12
+ VALID_STATUSES = %w[valid superseded withdrawn draft].freeze
13
+
14
+ def check(context)
15
+ concept = context.concept
16
+ fname = context.file_name
17
+ issues = []
18
+
19
+ (concept.localizations&.values || []).each do |l10n|
20
+ lang = l10n.language_code || "unknown"
21
+ status = l10n.data&.entry_status
22
+ next unless status
23
+ next if VALID_STATUSES.include?(status)
24
+
25
+ issues << issue(
26
+ "#{fname}/#{lang}: invalid entry_status '#{status}' " \
27
+ "(expected one of: #{VALID_STATUSES.join(', ')})",
28
+ code: code, severity: "error",
29
+ location: "#{fname}/#{lang}",
30
+ )
31
+ end
32
+
33
+ issues
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class FilenameIdRule < Base
7
+ def code = "GLS-015"
8
+ def category = :integrity
9
+ def severity = "error"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.gcr?
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ expected_id = concept.data&.id&.to_s
20
+ actual_name = fname.sub(/\.yaml\z/, "").sub(%r{\Aconcepts/}, "")
21
+
22
+ return [] unless expected_id && expected_id != actual_name
23
+
24
+ [issue(
25
+ "filename '#{actual_name}' does not match concept id '#{expected_id}'",
26
+ code: code, severity: severity,
27
+ location: "concepts/#{fname}",
28
+ suggestion: "Rename the entry or fix the concept id",
29
+ )]
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zip"
4
+
5
+ module Glossarist
6
+ module Validation
7
+ module Rules
8
+ class GcrContext
9
+ attr_reader :zip_path
10
+
11
+ def initialize(zip_path)
12
+ @zip_path = zip_path
13
+ @metadata = nil
14
+ @concepts = nil
15
+ @bibliography_index = nil
16
+ @asset_index = nil
17
+ @zip_entries = nil
18
+ @localization_index = nil
19
+ end
20
+
21
+ def concepts
22
+ @concepts ||= begin
23
+ pkg = GcrPackage.load(@zip_path)
24
+ pkg.concepts
25
+ rescue StandardError
26
+ []
27
+ end
28
+ end
29
+
30
+ def concept_ids
31
+ @concept_ids ||= concepts.filter_map { |c| c.data&.id&.to_s }.to_set
32
+ end
33
+
34
+ def metadata
35
+ @metadata ||= Zip::File.open(@zip_path) do |zf|
36
+ entry = zf.find_entry("metadata.yaml")
37
+ return nil unless entry
38
+
39
+ GcrMetadata.from_yaml(entry.get_input_stream.read)
40
+ end
41
+ end
42
+
43
+ def bibliography_index
44
+ @bibliography_index ||= begin
45
+ bib_yaml = read_zip_file("bibliography.yaml")
46
+ BibliographyIndex.build_from_concepts(concepts,
47
+ bibliography_yaml: bib_yaml)
48
+ end
49
+ end
50
+
51
+ def asset_index
52
+ @asset_index ||= AssetIndex.build_from_zip(@zip_path)
53
+ end
54
+
55
+ def declared_languages
56
+ metadata&.languages || []
57
+ end
58
+
59
+ def actual_languages
60
+ @actual_languages ||= concepts.flat_map do |c|
61
+ c.localizations.map(&:language_code)
62
+ end.compact.uniq.sort
63
+ end
64
+
65
+ def zip_entries
66
+ @zip_entries ||= Zip::File.open(@zip_path) do |zf|
67
+ zf.entries.to_set(&:name)
68
+ end
69
+ end
70
+
71
+ def localization_index
72
+ {}
73
+ end
74
+
75
+ def referenced_l10n_uuids
76
+ Set.new
77
+ end
78
+
79
+ def gcr?
80
+ true
81
+ end
82
+
83
+ def read_zip_file(name)
84
+ Zip::File.open(@zip_path) do |zf|
85
+ entry = zf.find_entry(name)
86
+ entry&.get_input_stream&.read
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class ImageReferenceRule < Base
7
+ def code = "GLS-103"
8
+ def category = :references
9
+ def severity = "warning"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.concept.localizations&.any?
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ extractor = ReferenceExtractor.new
20
+ issues = []
21
+
22
+ # Text-embedded image references (image::path[])
23
+ concept.localizations.each do |l10n|
24
+ lang = l10n.language_code || "unknown"
25
+ texts = extract_texts(l10n)
26
+
27
+ texts.each do |text|
28
+ next unless text
29
+ refs = extractor.extract_from_text(text)
30
+ refs.each do |ref|
31
+ next unless ref.is_a?(AssetReference)
32
+ next if context.asset_index.resolve?(ref.path)
33
+
34
+ issues << issue(
35
+ "unresolved image reference #{ref.path}",
36
+ code: "GLS-103", severity: severity,
37
+ location: "#{fname}/#{lang}",
38
+ suggestion: "add '#{ref.path}' to the dataset's images/ directory",
39
+ )
40
+ end
41
+ end
42
+ end
43
+
44
+ # Model-level asset references (NonVerbRep, GraphicalSymbol)
45
+ asset_refs = extractor.extract_asset_refs_from_concept(concept)
46
+ asset_refs.each do |ref|
47
+ next if context.asset_index.resolve?(ref.path)
48
+
49
+ issues << issue(
50
+ "unresolved asset reference #{ref.path}",
51
+ code: "GLS-104", severity: "error",
52
+ location: fname,
53
+ suggestion: "add '#{ref.path}' to the dataset's images/ directory",
54
+ )
55
+ end
56
+
57
+ issues
58
+ end
59
+
60
+ private
61
+
62
+ def extract_texts(l10n)
63
+ texts = []
64
+ (l10n.data&.definition || []).each { |d| texts << d.content if d.content }
65
+ (l10n.data&.notes || []).each { |n| texts << n.content if n.content }
66
+ (l10n.data&.examples || []).each { |e| texts << e.content if e.content }
67
+ texts
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class L10nUuidIntegrityRule < Base
7
+ def code = "GLS-018"
8
+ def category = :integrity
9
+ def severity = "error"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.collection_context.localization_index.any?
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ lc_map = concept.data.localized_concepts || {}
20
+ lc_index = context.collection_context.localization_index
21
+ issues = []
22
+
23
+ lc_map.each do |lang, uuid|
24
+ next if lc_index.key?(uuid)
25
+
26
+ issues << issue(
27
+ "localized_concepts '#{lang}' => '#{uuid}' has no matching file",
28
+ code: code, severity: severity,
29
+ location: fname,
30
+ suggestion: "Add the missing localization file or remove the UUID",
31
+ )
32
+ end
33
+
34
+ issues
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class LanguageCodeFormatRule < Base
7
+ def code = "GLS-206"
8
+ def category = :schema
9
+ def severity = "error"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.concept.localizations&.any?
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ issues = []
20
+
21
+ concept.localizations.each do |l10n|
22
+ lang = l10n.language_code
23
+ next if lang.nil?
24
+
25
+ unless lang.to_s.match?(/\A[a-z]{3}\z/)
26
+ issues << issue(
27
+ "language_code '#{lang}' is not a valid ISO 639-3 code (expected 3 lowercase letters)",
28
+ location: fname,
29
+ suggestion: "Use a 3-letter ISO 639-3 code (e.g. eng, fra, deu)",
30
+ )
31
+ end
32
+ end
33
+
34
+ issues
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class LanguageCoverageRule < Base
7
+ def code = "GLS-013"
8
+ def category = :localization
9
+ def severity = "warning"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ declared = context.declared_languages
14
+ declared.is_a?(Array) && declared.any?
15
+ end
16
+
17
+ def check(context)
18
+ concept = context.concept
19
+ fname = context.file_name
20
+ declared = context.declared_languages
21
+ present = concept.localizations&.values&.map(&:language_code) || []
22
+ missing = declared - present
23
+
24
+ return [] if missing.empty?
25
+
26
+ [issue(
27
+ "missing localizations for declared languages: #{missing.join(', ')}",
28
+ code: code, severity: severity,
29
+ location: fname,
30
+ suggestion: "Add localizations for: #{missing.join(', ')}",
31
+ )]
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class LanguageListRule < Base
7
+ def code = "GLS-012"
8
+ def category = :integrity
9
+ def severity = "warning"
10
+ def scope = :collection
11
+
12
+ def applicable?(context)
13
+ declared = context.declared_languages
14
+ declared.is_a?(Array) && declared.any?
15
+ end
16
+
17
+ def check(context)
18
+ declared = Set.new(context.declared_languages)
19
+ actual = Set.new(context.actual_languages)
20
+ issues = []
21
+
22
+ missing = declared - actual
23
+ if missing.any?
24
+ issues << issue(
25
+ "declared languages not found in concepts: #{missing.sort.join(', ')}",
26
+ code: code, severity: severity,
27
+ suggestion: "Update the languages list or add missing localizations",
28
+ )
29
+ end
30
+
31
+ extra = actual - declared
32
+ if extra.any?
33
+ issues << issue(
34
+ "concepts use languages not declared: #{extra.sort.join(', ')}",
35
+ code: code, severity: severity,
36
+ suggestion: "Add these languages to the languages list in metadata",
37
+ )
38
+ end
39
+
40
+ issues
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class LocalizationPresenceRule < Base
7
+ def code = "GLS-002"
8
+ def category = :structure
9
+ def scope = :concept
10
+
11
+ def check(context)
12
+ concept = context.concept
13
+ fname = context.file_name
14
+ l10ns = concept.localizations&.values || []
15
+
16
+ return [] if l10ns.any?
17
+
18
+ [issue("#{fname}: no localizations found",
19
+ code: code, severity: "error")]
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+