glossarist 2.6.4 → 2.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +11 -111
  3. data/Gemfile +0 -2
  4. data/README.adoc +207 -1
  5. data/glossarist.gemspec +1 -1
  6. data/lib/glossarist/asset_reference.rb +16 -0
  7. data/lib/glossarist/bibliographic_reference.rb +16 -0
  8. data/lib/glossarist/concept_enricher.rb +1 -0
  9. data/lib/glossarist/concept_reference.rb +4 -0
  10. data/lib/glossarist/concept_validator.rb +27 -56
  11. data/lib/glossarist/dataset_validator.rb +30 -34
  12. data/lib/glossarist/gcr_validator.rb +26 -101
  13. data/lib/glossarist/reference_extractor.rb +80 -10
  14. data/lib/glossarist/reference_resolver.rb +1 -0
  15. data/lib/glossarist/validation/asset_index.rb +113 -0
  16. data/lib/glossarist/validation/bibliography_index.rb +121 -0
  17. data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +60 -0
  18. data/lib/glossarist/validation/rules/authoritative_source_rule.rb +47 -0
  19. data/lib/glossarist/validation/rules/base.rb +46 -0
  20. data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +37 -0
  21. data/lib/glossarist/validation/rules/citation_completeness_rule.rb +63 -0
  22. data/lib/glossarist/validation/rules/concept_context.rb +45 -0
  23. data/lib/glossarist/validation/rules/concept_count_rule.rb +34 -0
  24. data/lib/glossarist/validation/rules/concept_id_rule.rb +29 -0
  25. data/lib/glossarist/validation/rules/concept_id_uniqueness_rule.rb +42 -0
  26. data/lib/glossarist/validation/rules/concept_mention_rule.rb +44 -0
  27. data/lib/glossarist/validation/rules/concept_status_rule.rb +36 -0
  28. data/lib/glossarist/validation/rules/concept_uri_rule.rb +30 -0
  29. data/lib/glossarist/validation/rules/dataset_context.rb +99 -0
  30. data/lib/glossarist/validation/rules/date_type_rule.rb +54 -0
  31. data/lib/glossarist/validation/rules/date_validity_rule.rb +66 -0
  32. data/lib/glossarist/validation/rules/definition_content_rule.rb +41 -0
  33. data/lib/glossarist/validation/rules/designation_status_rule.rb +45 -0
  34. data/lib/glossarist/validation/rules/designation_type_rule.rb +55 -0
  35. data/lib/glossarist/validation/rules/duplicate_term_rule.rb +63 -0
  36. data/lib/glossarist/validation/rules/entry_status_rule.rb +39 -0
  37. data/lib/glossarist/validation/rules/filename_id_rule.rb +35 -0
  38. data/lib/glossarist/validation/rules/gcr_context.rb +92 -0
  39. data/lib/glossarist/validation/rules/image_reference_rule.rb +73 -0
  40. data/lib/glossarist/validation/rules/l10n_uuid_integrity_rule.rb +40 -0
  41. data/lib/glossarist/validation/rules/language_code_format_rule.rb +39 -0
  42. data/lib/glossarist/validation/rules/language_coverage_rule.rb +37 -0
  43. data/lib/glossarist/validation/rules/language_list_rule.rb +46 -0
  44. data/lib/glossarist/validation/rules/localization_presence_rule.rb +25 -0
  45. data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +64 -0
  46. data/lib/glossarist/validation/rules/orphaned_images_rule.rb +68 -0
  47. data/lib/glossarist/validation/rules/orphaned_l10n_files_rule.rb +39 -0
  48. data/lib/glossarist/validation/rules/preferred_term_rule.rb +41 -0
  49. data/lib/glossarist/validation/rules/registry.rb +42 -0
  50. data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +102 -0
  51. data/lib/glossarist/validation/rules/related_concept_rule.rb +40 -0
  52. data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +87 -0
  53. data/lib/glossarist/validation/rules/source_type_rule.rb +63 -0
  54. data/lib/glossarist/validation/rules/terms_presence_rule.rb +39 -0
  55. data/lib/glossarist/validation/rules.rb +85 -0
  56. data/lib/glossarist/validation/validation_issue.rb +39 -0
  57. data/lib/glossarist/validation.rb +12 -0
  58. data/lib/glossarist/validation_result.rb +26 -9
  59. data/lib/glossarist/version.rb +1 -1
  60. data/lib/glossarist.rb +3 -0
  61. metadata +60 -15
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class AuthoritativeSourceRule < Base
7
+ def code = "GLS-306"
8
+ def category = :quality
9
+ def severity = "warning"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.concept.localizations&.any?
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ all_sources = gather_all_sources(concept)
20
+
21
+ return [] if all_sources.any? { |s| s.type == "authoritative" }
22
+
23
+ [issue(
24
+ "no authoritative source defined",
25
+ code: code, severity: severity,
26
+ location: fname,
27
+ suggestion: "Add at least one source with type: authoritative",
28
+ )]
29
+ end
30
+
31
+ private
32
+
33
+ def gather_all_sources(concept)
34
+ sources = []
35
+ concept.localizations.each do |l10n|
36
+ (l10n.data&.sources || []).each { |s| sources << s }
37
+ (l10n.data&.definition || []).each { |d| (d.sources || []).each { |s| sources << s } }
38
+ (l10n.data&.notes || []).each { |n| (n.sources || []).each { |s| sources << s } }
39
+ (l10n.data&.examples || []).each { |e| (e.sources || []).each { |s| sources << s } }
40
+ end
41
+ sources
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class Base
7
+ def code
8
+ nil
9
+ end
10
+
11
+ def category
12
+ :general
13
+ end
14
+
15
+ def severity
16
+ "error"
17
+ end
18
+
19
+ def scope
20
+ :concept
21
+ end
22
+
23
+ def applicable?(_context)
24
+ true
25
+ end
26
+
27
+ def check(_context)
28
+ []
29
+ end
30
+
31
+ private
32
+
33
+ def issue(message, location: nil, suggestion: nil, severity: nil,
34
+ code: nil)
35
+ ValidationIssue.new(
36
+ severity: severity || self.severity,
37
+ code: code || self.code,
38
+ message: message,
39
+ location: location,
40
+ suggestion: suggestion,
41
+ )
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class BibliographyYamlRule < Base
7
+ def code = "GLS-020-YAML"
8
+ def category = :structure
9
+ def severity = "error"
10
+ def scope = :collection
11
+
12
+ def applicable?(context)
13
+ context.gcr?
14
+ end
15
+
16
+ def check(context)
17
+ bib_content = context.read_zip_file("bibliography.yaml")
18
+ return [] unless bib_content
19
+
20
+ begin
21
+ data = YAML.safe_load(bib_content)
22
+ return [] if data.nil? || data.is_a?(Hash) || data.is_a?(Array)
23
+ rescue Psych::SyntaxError => e
24
+ return [issue(
25
+ "bibliography.yaml is invalid YAML: #{e.message}",
26
+ code: code, severity: severity,
27
+ location: "bibliography.yaml",
28
+ suggestion: "Fix YAML syntax errors in bibliography.yaml",
29
+ )]
30
+ end
31
+
32
+ []
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class CitationCompletenessRule < Base
7
+ def code = "GLS-304"
8
+ def category = :quality
9
+ def severity = "warning"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.concept.localizations&.any?
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ issues = []
20
+
21
+ gather_all_sources(concept).each_with_index do |source, idx|
22
+ origin = source.origin
23
+ next unless origin
24
+
25
+ if origin.text.nil? && origin.source.nil? && origin.id.nil?
26
+ issues << issue(
27
+ "source #{idx + 1} has empty origin (no text, source, or id)",
28
+ code: "GLS-304", severity: severity,
29
+ location: fname,
30
+ suggestion: "Add at minimum an origin.text or origin.source + origin.id",
31
+ )
32
+ end
33
+
34
+ next unless origin.structured? && origin.source.nil?
35
+
36
+ issues << issue(
37
+ "source #{idx + 1} is structured but missing source field",
38
+ code: "GLS-304", severity: severity,
39
+ location: fname,
40
+ suggestion: "Add origin.source to the citation",
41
+ )
42
+ end
43
+
44
+ issues
45
+ end
46
+
47
+ private
48
+
49
+ def gather_all_sources(concept)
50
+ sources = []
51
+ concept.localizations.each do |l10n|
52
+ (l10n.data&.sources || []).each { |s| sources << s }
53
+ (l10n.data&.definition || []).each { |d| (d.sources || []).each { |s| sources << s } }
54
+ (l10n.data&.notes || []).each { |n| (n.sources || []).each { |s| sources << s } }
55
+ (l10n.data&.examples || []).each { |e| (e.sources || []).each { |s| sources << s } }
56
+ end
57
+ sources
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class ConceptContext
7
+ attr_reader :concept, :file_name, :collection_context
8
+
9
+ def initialize(concept, file_name:, collection_context:)
10
+ @concept = concept
11
+ @file_name = file_name
12
+ @collection_context = collection_context
13
+ end
14
+
15
+ def concept_id
16
+ @concept.data&.id&.to_s
17
+ end
18
+
19
+ def bibliography_index
20
+ @collection_context.bibliography_index
21
+ end
22
+
23
+ def asset_index
24
+ @collection_context.asset_index
25
+ end
26
+
27
+ def concept_ids
28
+ @collection_context.concept_ids
29
+ end
30
+
31
+ def declared_languages
32
+ @collection_context.declared_languages
33
+ end
34
+
35
+ def metadata
36
+ @collection_context.metadata
37
+ end
38
+
39
+ def gcr?
40
+ @collection_context.gcr?
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class ConceptCountRule < Base
7
+ def code = "GLS-011"
8
+ def category = :integrity
9
+ def severity = "error"
10
+ def scope = :collection
11
+
12
+ def applicable?(context)
13
+ context.metadata && context.metadata.concept_count
14
+ end
15
+
16
+ def check(context)
17
+ expected = context.metadata.concept_count
18
+ actual = context.concepts.size
19
+
20
+ return [] if expected == actual
21
+
22
+ [issue(
23
+ "metadata.yaml concept_count is #{expected} " \
24
+ "but found #{actual} concept files",
25
+ code: code, severity: severity,
26
+ location: "metadata.yaml",
27
+ suggestion: "Update concept_count or add/remove concept files",
28
+ )]
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class ConceptIdRule < Base
7
+ def code = "GLS-001"
8
+ def category = :structure
9
+ def scope = :concept
10
+
11
+ def check(context)
12
+ concept = context.concept
13
+ fname = context.file_name
14
+ issues = []
15
+
16
+ id = concept.data&.id
17
+ unless id
18
+ issues << issue("#{fname}: missing concept id",
19
+ code: code, severity: "error")
20
+ return issues
21
+ end
22
+
23
+ issues
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class ConceptIdUniquenessRule < Base
7
+ def code = "GLS-001-uniq"
8
+ def category = :structure
9
+ def severity = "error"
10
+ def scope = :collection
11
+
12
+ def applicable?(context)
13
+ context.concepts.any?
14
+ end
15
+
16
+ def check(context)
17
+ issues = []
18
+ seen_ids = {}
19
+
20
+ context.concepts.each_with_index do |concept, idx|
21
+ id = concept.data&.id&.to_s
22
+ fname = id ? "concept-#{id}.yaml" : "concept-#{idx}.yaml"
23
+
24
+ next unless id
25
+
26
+ if seen_ids[id]
27
+ issues << issue(
28
+ "#{fname}: duplicate id '#{id}' (first seen in #{seen_ids[id]})",
29
+ code: code, severity: "error",
30
+ )
31
+ else
32
+ seen_ids[id] = fname
33
+ end
34
+ end
35
+
36
+ issues
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class ConceptMentionRule < Base
7
+ def code = "GLS-100"
8
+ def category = :references
9
+ def severity = "warning"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.concept.localizations&.any?
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ extractor = ReferenceExtractor.new
20
+ issues = []
21
+
22
+ refs = extractor.extract_from_managed_concept(concept)
23
+ .select { |r| r.is_a?(ConceptReference) && r.local? }
24
+
25
+ refs.each do |ref|
26
+ next if ref.ref_type == "designation"
27
+ next if context.concept_ids.include?(ref.concept_id)
28
+
29
+ issues << issue(
30
+ "unresolved intra-set reference: #{ref.term} -> #{ref.concept_id}",
31
+ code: "GLS-100", severity: severity,
32
+ location: fname,
33
+ suggestion: "add concept '#{ref.concept_id}' to the dataset " \
34
+ "or verify the reference",
35
+ )
36
+ end
37
+
38
+ issues
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class ConceptStatusRule < Base
7
+ def code = "GLS-201"
8
+ def category = :schema
9
+ def severity = "error"
10
+ def scope = :concept
11
+
12
+ VALID_STATUSES = Glossarist::GlossaryDefinition::CONCEPT_STATUSES
13
+
14
+ def applicable?(context)
15
+ !context.concept.status.nil?
16
+ end
17
+
18
+ def check(context)
19
+ concept = context.concept
20
+ fname = context.file_name
21
+ status = concept.status
22
+
23
+ return [] if VALID_STATUSES.include?(status)
24
+
25
+ [issue(
26
+ "invalid concept status '#{status}'",
27
+ code: code, severity: severity,
28
+ location: fname,
29
+ suggestion: "Use one of: #{VALID_STATUSES.join(', ')}",
30
+ )]
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class ConceptUriRule < Base
7
+ def code = "GLS-014"
8
+ def category = :structure
9
+ def severity = "warning"
10
+ def scope = :collection
11
+
12
+ def applicable?(context)
13
+ context.gcr? && context.metadata
14
+ end
15
+
16
+ def check(context)
17
+ meta = context.metadata
18
+ return [] if meta.uri_prefix && !meta.uri_prefix.strip.empty?
19
+
20
+ [issue(
21
+ "no concept URI prefix or template defined in metadata",
22
+ code: code, severity: severity,
23
+ location: "metadata.yaml",
24
+ suggestion: "Add uri_prefix or concept_uri_template to metadata.yaml",
25
+ )]
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class DatasetContext
7
+ attr_reader :path
8
+
9
+ def initialize(path)
10
+ @path = File.expand_path(path)
11
+ @concepts = nil
12
+ @bibliography_index = nil
13
+ @asset_index = nil
14
+ @declared_languages = nil
15
+ end
16
+
17
+ def concepts
18
+ @concepts ||= ConceptCollector.collect(@path)
19
+ end
20
+
21
+ def concept_ids
22
+ @concept_ids ||= concepts.filter_map { |c| c.data&.id&.to_s }.to_set
23
+ end
24
+
25
+ def metadata
26
+ nil
27
+ end
28
+
29
+ def bibliography_index
30
+ @bibliography_index ||= BibliographyIndex.build_from_concepts(
31
+ concepts, dataset_path: @path
32
+ )
33
+ end
34
+
35
+ def asset_index
36
+ @asset_index ||= AssetIndex.build_from_directory(@path)
37
+ end
38
+
39
+ def declared_languages
40
+ @declared_languages ||= begin
41
+ reg = load_register_data
42
+ if reg && reg["languages"].is_a?(Array)
43
+ reg["languages"]
44
+ else
45
+ actual_languages
46
+ end
47
+ end
48
+ end
49
+
50
+ def actual_languages
51
+ @actual_languages ||= concepts.flat_map do |c|
52
+ c.localizations.map(&:language_code)
53
+ end.compact.uniq.sort
54
+ end
55
+
56
+ def localization_index
57
+ @localization_index ||= build_localization_index
58
+ end
59
+
60
+ def referenced_l10n_uuids
61
+ @referenced_l10n_uuids ||= concepts.flat_map do |c|
62
+ (c.data.localized_concepts || {}).values
63
+ end.to_set
64
+ end
65
+
66
+ def gcr?
67
+ false
68
+ end
69
+
70
+ def read_zip_file(_name)
71
+ nil
72
+ end
73
+
74
+ private
75
+
76
+ def load_register_data
77
+ reg_path = File.join(@path, "register.yaml")
78
+ return nil unless File.exist?(reg_path)
79
+
80
+ YAML.safe_load_file(reg_path)
81
+ end
82
+
83
+ def build_localization_index
84
+ index = {}
85
+ %w[localized_concept localized-concept].each do |name|
86
+ dir = File.join(@path, "concepts", name)
87
+ next unless File.directory?(dir)
88
+
89
+ Dir.glob(File.join(dir, "*.{yaml,yml}")).each do |f|
90
+ uuid = File.basename(f, ".*")
91
+ index[uuid] = f
92
+ end
93
+ end
94
+ index
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class DateTypeRule < Base
7
+ def code = "GLS-205"
8
+ def category = :schema
9
+ def severity = "warning"
10
+ def scope = :concept
11
+
12
+ VALID_TYPES = Glossarist::GlossaryDefinition::CONCEPT_DATE_TYPES
13
+
14
+ def applicable?(context)
15
+ concept = context.concept
16
+ (concept.dates&.any?) || concept.date_accepted
17
+ end
18
+
19
+ def check(context)
20
+ concept = context.concept
21
+ fname = context.file_name
22
+ issues = []
23
+
24
+ check_date_collection(concept.dates, fname, issues)
25
+
26
+ if concept.date_accepted && concept.date_accepted.type
27
+ validate_date_type(concept.date_accepted, "date_accepted", fname, issues)
28
+ end
29
+
30
+ issues
31
+ end
32
+
33
+ private
34
+
35
+ def check_date_collection(dates, fname, issues)
36
+ (dates || []).each_with_index do |date, idx|
37
+ next unless date.type
38
+ validate_date_type(date, "date #{idx + 1}", fname, issues)
39
+ end
40
+ end
41
+
42
+ def validate_date_type(date, label, fname, issues)
43
+ return if VALID_TYPES.include?(date.type.to_s)
44
+
45
+ issues << issue(
46
+ "#{label} has invalid type '#{date.type}'",
47
+ location: fname,
48
+ suggestion: "Use one of: #{VALID_TYPES.join(', ')}",
49
+ )
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+
5
+ module Glossarist
6
+ module Validation
7
+ module Rules
8
+ class DateValidityRule < Base
9
+ def code = "GLS-307"
10
+ def category = :quality
11
+ def severity = "warning"
12
+ def scope = :concept
13
+
14
+ def applicable?(context)
15
+ concept = context.concept
16
+ (concept.dates&.any?) || concept.date_accepted
17
+ end
18
+
19
+ def check(context)
20
+ concept = context.concept
21
+ fname = context.file_name
22
+ issues = []
23
+
24
+ (concept.dates || []).each_with_index do |date, idx|
25
+ validate_date(date, "date #{idx + 1}", fname, issues)
26
+ end
27
+
28
+ if concept.date_accepted
29
+ validate_date(concept.date_accepted, "date_accepted", fname, issues)
30
+ end
31
+
32
+ issues
33
+ end
34
+
35
+ private
36
+
37
+ def validate_date(concept_date, label, fname, issues)
38
+ date_value = concept_date.date
39
+
40
+ if date_value.nil? && concept_date.type
41
+ issues << issue(
42
+ "#{label} has no date value (type: #{concept_date.type})",
43
+ location: fname,
44
+ suggestion: "Provide a valid ISO 8601 date (e.g. 2024-01-15)",
45
+ )
46
+ return
47
+ end
48
+
49
+ return if date_value.nil?
50
+
51
+ str = date_value.respond_to?(:iso8601) ? date_value.iso8601 : date_value.to_s
52
+
53
+ begin
54
+ DateTime.parse(str)
55
+ rescue ArgumentError, TypeError
56
+ issues << issue(
57
+ "#{label} has unparseable date value '#{str}'",
58
+ location: fname,
59
+ suggestion: "Use an ISO 8601 date format (e.g. 2024-01-15)",
60
+ )
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end