glossarist 2.6.5 → 2.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/release.yml +1 -4
  3. data/.rubocop_todo.yml +53 -2
  4. data/CLAUDE.md +27 -2
  5. data/README.adoc +532 -56
  6. data/config.yml +68 -1
  7. data/glossarist.gemspec +2 -0
  8. data/lib/glossarist/citation.rb +26 -123
  9. data/lib/glossarist/cli/compare_command.rb +106 -0
  10. data/lib/glossarist/cli/export_command.rb +11 -14
  11. data/lib/glossarist/cli/validate_command.rb +111 -20
  12. data/lib/glossarist/cli.rb +18 -0
  13. data/lib/glossarist/collections/bibliography_collection.rb +4 -2
  14. data/lib/glossarist/collections/localization_collection.rb +2 -0
  15. data/lib/glossarist/comparison_result.rb +35 -0
  16. data/lib/glossarist/concept.rb +1 -1
  17. data/lib/glossarist/concept_collector.rb +44 -0
  18. data/lib/glossarist/concept_comparator.rb +72 -0
  19. data/lib/glossarist/concept_data.rb +20 -0
  20. data/lib/glossarist/concept_diff.rb +15 -0
  21. data/lib/glossarist/concept_document.rb +11 -0
  22. data/lib/glossarist/concept_manager.rb +19 -5
  23. data/lib/glossarist/concept_ref.rb +13 -0
  24. data/lib/glossarist/concept_reference.rb +12 -19
  25. data/lib/glossarist/concept_validator.rb +6 -1
  26. data/lib/glossarist/context_configuration.rb +90 -0
  27. data/lib/glossarist/dataset_validator.rb +8 -4
  28. data/lib/glossarist/designation/abbreviation.rb +0 -2
  29. data/lib/glossarist/designation/base.rb +21 -1
  30. data/lib/glossarist/designation/expression.rb +3 -0
  31. data/lib/glossarist/designation/letter_symbol.rb +0 -4
  32. data/lib/glossarist/designation/prefix.rb +17 -0
  33. data/lib/glossarist/designation/suffix.rb +17 -0
  34. data/lib/glossarist/designation/symbol.rb +0 -2
  35. data/lib/glossarist/gcr_metadata.rb +7 -14
  36. data/lib/glossarist/gcr_package.rb +35 -23
  37. data/lib/glossarist/gcr_validator.rb +38 -17
  38. data/lib/glossarist/glossary_definition.rb +5 -0
  39. data/lib/glossarist/localized_concept.rb +8 -0
  40. data/lib/glossarist/managed_concept.rb +39 -6
  41. data/lib/glossarist/managed_concept_data.rb +22 -2
  42. data/lib/glossarist/non_verb_rep.rb +21 -6
  43. data/lib/glossarist/pronunciation.rb +32 -0
  44. data/lib/glossarist/rdf/ext/jsonld_transform_ext.rb +208 -0
  45. data/lib/glossarist/rdf/ext/mapping_ext.rb +37 -0
  46. data/lib/glossarist/rdf/ext/mapping_rule_ext.rb +27 -0
  47. data/lib/glossarist/rdf/ext/member_rule_ext.rb +34 -0
  48. data/lib/glossarist/rdf/ext/turtle_transform_ext.rb +222 -0
  49. data/lib/glossarist/rdf/ext.rb +39 -0
  50. data/lib/glossarist/rdf/gloss_citation.rb +36 -0
  51. data/lib/glossarist/rdf/gloss_concept.rb +58 -0
  52. data/lib/glossarist/rdf/gloss_concept_date.rb +24 -0
  53. data/lib/glossarist/rdf/gloss_concept_reference.rb +29 -0
  54. data/lib/glossarist/rdf/gloss_concept_source.rb +37 -0
  55. data/lib/glossarist/rdf/gloss_designation.rb +146 -0
  56. data/lib/glossarist/rdf/gloss_detailed_definition.rb +24 -0
  57. data/lib/glossarist/rdf/gloss_grammar_info.rb +57 -0
  58. data/lib/glossarist/rdf/gloss_locality.rb +25 -0
  59. data/lib/glossarist/rdf/gloss_localized_concept.rb +67 -0
  60. data/lib/glossarist/rdf/gloss_non_verbal_rep.rb +31 -0
  61. data/lib/glossarist/rdf/gloss_pronunciation.rb +32 -0
  62. data/lib/glossarist/rdf/gloss_reference.rb +55 -0
  63. data/lib/glossarist/rdf/namespaces/glossarist_namespace.rb +12 -0
  64. data/lib/glossarist/rdf/namespaces/iso_thes_namespace.rb +12 -0
  65. data/lib/glossarist/rdf/namespaces/owl_namespace.rb +12 -0
  66. data/lib/glossarist/rdf/namespaces/prov_namespace.rb +12 -0
  67. data/lib/glossarist/rdf/namespaces/rdf_namespace.rb +12 -0
  68. data/lib/glossarist/rdf/namespaces/skosxl_namespace.rb +12 -0
  69. data/lib/glossarist/rdf/namespaces.rb +8 -2
  70. data/lib/glossarist/rdf/relationships.rb +19 -0
  71. data/lib/glossarist/rdf/v3/configuration.rb +15 -0
  72. data/lib/glossarist/rdf/v3.rb +79 -0
  73. data/lib/glossarist/rdf.rb +22 -2
  74. data/lib/glossarist/reference_extractor.rb +15 -24
  75. data/lib/glossarist/reference_resolver.rb +3 -3
  76. data/lib/glossarist/related_concept.rb +2 -10
  77. data/lib/glossarist/schema_migration.rb +39 -0
  78. data/lib/glossarist/sts/term_mapper.rb +2 -2
  79. data/lib/glossarist/transforms/concept_to_gloss_transform.rb +355 -0
  80. data/lib/glossarist/transforms.rb +2 -2
  81. data/lib/glossarist/urn_resolver.rb +13 -1
  82. data/lib/glossarist/v1/concept.rb +18 -11
  83. data/lib/glossarist/v2/citation.rb +36 -0
  84. data/lib/glossarist/v2/concept_data.rb +46 -0
  85. data/lib/glossarist/v2/concept_document.rb +18 -0
  86. data/lib/glossarist/v2/concept_ref.rb +8 -0
  87. data/lib/glossarist/v2/concept_source.rb +16 -0
  88. data/lib/glossarist/v2/configuration.rb +13 -0
  89. data/lib/glossarist/v2/detailed_definition.rb +14 -0
  90. data/lib/glossarist/v2/localized_concept.rb +9 -0
  91. data/lib/glossarist/v2/managed_concept.rb +25 -0
  92. data/lib/glossarist/v2/managed_concept_data.rb +49 -0
  93. data/lib/glossarist/v2/related_concept.rb +15 -0
  94. data/lib/glossarist/v2.rb +28 -0
  95. data/lib/glossarist/v3/bibliography_entry.rb +19 -0
  96. data/lib/glossarist/v3/bibliography_file.rb +27 -0
  97. data/lib/glossarist/v3/citation.rb +30 -0
  98. data/lib/glossarist/v3/concept_data.rb +46 -0
  99. data/lib/glossarist/v3/concept_document.rb +18 -0
  100. data/lib/glossarist/v3/concept_ref.rb +8 -0
  101. data/lib/glossarist/v3/concept_source.rb +16 -0
  102. data/lib/glossarist/v3/configuration.rb +13 -0
  103. data/lib/glossarist/v3/detailed_definition.rb +14 -0
  104. data/lib/glossarist/v3/image_entry.rb +21 -0
  105. data/lib/glossarist/v3/image_file.rb +31 -0
  106. data/lib/glossarist/v3/localized_concept.rb +9 -0
  107. data/lib/glossarist/v3/managed_concept.rb +26 -0
  108. data/lib/glossarist/v3/managed_concept_data.rb +34 -0
  109. data/lib/glossarist/v3/related_concept.rb +15 -0
  110. data/lib/glossarist/v3.rb +36 -0
  111. data/lib/glossarist/validation/asset_index.rb +4 -3
  112. data/lib/glossarist/validation/bibliography_index.rb +61 -30
  113. data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +2 -15
  114. data/lib/glossarist/validation/rules/authoritative_source_rule.rb +2 -15
  115. data/lib/glossarist/validation/rules/base.rb +5 -0
  116. data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +2 -3
  117. data/lib/glossarist/validation/rules/citation_completeness_rule.rb +5 -27
  118. data/lib/glossarist/validation/rules/dataset_context.rb +8 -3
  119. data/lib/glossarist/validation/rules/date_validity_rule.rb +1 -1
  120. data/lib/glossarist/validation/rules/designation_status_rule.rb +0 -1
  121. data/lib/glossarist/validation/rules/designation_type_rule.rb +1 -5
  122. data/lib/glossarist/validation/rules/domain_ref_rule.rb +37 -0
  123. data/lib/glossarist/validation/rules/domain_target_rule.rb +56 -0
  124. data/lib/glossarist/validation/rules/gcr_context.rb +12 -13
  125. data/lib/glossarist/validation/rules/image_reference_rule.rb +2 -17
  126. data/lib/glossarist/validation/rules/locality_completeness_rule.rb +58 -0
  127. data/lib/glossarist/validation/rules/localization_consistency_rule.rb +72 -0
  128. data/lib/glossarist/validation/rules/localization_presence_rule.rb +1 -1
  129. data/lib/glossarist/validation/rules/model_validity_rule.rb +71 -0
  130. data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +1 -13
  131. data/lib/glossarist/validation/rules/orphaned_images_rule.rb +16 -11
  132. data/lib/glossarist/validation/rules/ref_shape_rule.rb +68 -0
  133. data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +1 -3
  134. data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +1 -3
  135. data/lib/glossarist/validation/rules/related_concept_target_rule.rb +64 -0
  136. data/lib/glossarist/validation/rules/schema_version_rule.rb +41 -0
  137. data/lib/glossarist/validation/rules/source_type_rule.rb +1 -15
  138. data/lib/glossarist/validation/rules/source_urn_format_rule.rb +65 -0
  139. data/lib/glossarist/validation/rules/uuid_format_rule.rb +33 -0
  140. data/lib/glossarist/validation/rules.rb +10 -43
  141. data/lib/glossarist/validation/validation_issue.rb +14 -11
  142. data/lib/glossarist/validation_result.rb +12 -22
  143. data/lib/glossarist/version.rb +1 -1
  144. data/lib/glossarist.rb +10 -0
  145. data/memory/project-status.md +43 -0
  146. data/scripts/migrate_dataset.rb +180 -0
  147. data/scripts/migrate_isotc204_to_v3.rb +134 -0
  148. data/scripts/migrate_isotc211_to_v3.rb +153 -0
  149. data/scripts/migrate_osgeo_to_v3.rb +155 -0
  150. data/scripts/upgrade_dataset_to_v3.rb +47 -0
  151. metadata +112 -6
  152. data/TODO.integration/01-gcr-package-cli.md +0 -180
  153. data/lib/glossarist/rdf/skos_concept.rb +0 -43
  154. data/lib/glossarist/rdf/skos_vocabulary.rb +0 -25
  155. data/lib/glossarist/transforms/concept_to_skos_transform.rb +0 -131
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ # Verifies that every entry in localized_concepts map points to a loaded
7
+ # localization, and that every loaded localization has a corresponding
8
+ # entry in the map.
9
+ class LocalizationConsistencyRule < Base
10
+ def code = "GLS-017"
11
+ def category = :integrity
12
+ def severity = "error"
13
+ def scope = :concept
14
+
15
+ def applicable?(context)
16
+ context.concept.localizations&.any? ||
17
+ context.concept.data&.localized_concepts&.any?
18
+ end
19
+
20
+ def check(context)
21
+ concept = context.concept
22
+ fname = context.file_name
23
+ issues = []
24
+
25
+ lc_map = concept.data&.localized_concepts || {}
26
+ loaded_langs = concept.localizations&.map(&:language_code)&.compact || []
27
+
28
+ # Map has entry but no loaded localization
29
+ lc_map.each_key do |lang|
30
+ next if loaded_langs.include?(lang)
31
+
32
+ issues << issue(
33
+ "localized_concepts map has '#{lang}' but no localization loaded",
34
+ location: fname,
35
+ suggestion: "Add a localization for '#{lang}' or remove it from the map",
36
+ )
37
+ end
38
+
39
+ # Loaded localization not in map
40
+ loaded_langs.each do |lang|
41
+ next if lc_map.key?(lang)
42
+
43
+ issues << issue(
44
+ "localization '#{lang}' is loaded but not in localized_concepts map",
45
+ location: fname,
46
+ suggestion: "Add '#{lang}' to the localized_concepts map",
47
+ )
48
+ end
49
+
50
+ # UUID mismatch between map and loaded localization
51
+ concept.localizations.each do |l10n|
52
+ lang = l10n.language_code
53
+ next unless lang
54
+
55
+ expected_uuid = lc_map[lang]
56
+ actual_uuid = l10n.uuid
57
+ next unless expected_uuid && actual_uuid
58
+ next if expected_uuid == actual_uuid
59
+
60
+ issues << issue(
61
+ "UUID mismatch for '#{lang}': map says '#{expected_uuid}', localization is '#{actual_uuid}'",
62
+ location: fname,
63
+ suggestion: "Ensure the UUID in the map matches the localization file",
64
+ )
65
+ end
66
+
67
+ issues
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -16,7 +16,7 @@ module Glossarist
16
16
  return [] if l10ns.any?
17
17
 
18
18
  [issue("#{fname}: no localizations found",
19
- code: code, severity: "error")]
19
+ code: code, severity: "warning")]
20
20
  end
21
21
  end
22
22
  end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class ModelValidityRule < Base
7
+ def code = "GLS-050"
8
+ def category = :structure
9
+ def severity = "error"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.concept.is_a?(Lutaml::Model::Serializable)
14
+ end
15
+
16
+ def check(context)
17
+ validate_recursive(context.concept, context.file_name)
18
+ end
19
+
20
+ private
21
+
22
+ def validate_recursive(model, location, path = "")
23
+ return [] unless model.is_a?(Lutaml::Model::Serializable)
24
+
25
+ issues = collect_model_errors(model, location, path)
26
+ issues.concat(recurse_attributes(model, location, path))
27
+ issues
28
+ end
29
+
30
+ def collect_model_errors(model, location, path)
31
+ errors = model.validate
32
+ return [] if errors.empty?
33
+
34
+ prefix = path.empty? ? "" : "#{path}: "
35
+ errors.map { |e| issue("#{prefix}#{e}", location: location) }
36
+ end
37
+
38
+ def recurse_attributes(model, location, path)
39
+ issues = []
40
+
41
+ model.class.attributes.each do |name, _|
42
+ value = model.public_send(name)
43
+ next if value.nil?
44
+
45
+ child_path = build_path(path, name)
46
+ issues.concat(validate_collection(value, location, child_path))
47
+ end
48
+
49
+ issues
50
+ end
51
+
52
+ def validate_collection(value, location, path)
53
+ case value
54
+ when Array
55
+ value.each_with_index.flat_map do |item, idx|
56
+ validate_recursive(item, location, "#{path}[#{idx}]")
57
+ end
58
+ when Lutaml::Model::Serializable
59
+ validate_recursive(value, location, path)
60
+ else
61
+ []
62
+ end
63
+ end
64
+
65
+ def build_path(parent, name)
66
+ parent.empty? ? name.to_s : "#{parent}.#{name}"
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -20,8 +20,7 @@ module Glossarist
20
20
 
21
21
  context.concepts.each do |concept|
22
22
  concept.localizations.each do |l10n|
23
- texts = extract_texts(l10n)
24
- texts.each do |text|
23
+ l10n.text_content.each do |text|
25
24
  next unless text
26
25
  extractor.extract_from_text(text).each do |ref|
27
26
  if ref.is_a?(BibliographicReference)
@@ -47,18 +46,7 @@ module Glossarist
47
46
 
48
47
  issues
49
48
  end
50
-
51
- private
52
-
53
- def extract_texts(l10n)
54
- texts = []
55
- (l10n.data&.definition || []).each { |d| texts << d.content if d.content }
56
- (l10n.data&.notes || []).each { |n| texts << n.content if n.content }
57
- (l10n.data&.examples || []).each { |e| texts << e.content if e.content }
58
- texts
59
- end
60
49
  end
61
50
  end
62
51
  end
63
52
  end
64
-
@@ -18,10 +18,8 @@ module Glossarist
18
18
  referenced_paths = Set.new
19
19
 
20
20
  context.concepts.each do |concept|
21
- # Text-embedded image refs
22
21
  concept.localizations.each do |l10n|
23
- texts = extract_texts(l10n)
24
- texts.each do |text|
22
+ l10n.text_content.each do |text|
25
23
  next unless text
26
24
  extractor.extract_from_text(text).each do |ref|
27
25
  if ref.is_a?(AssetReference)
@@ -31,12 +29,20 @@ module Glossarist
31
29
  end
32
30
  end
33
31
 
34
- # Model-level asset refs
35
32
  extractor.extract_asset_refs_from_concept(concept).each do |ref|
36
33
  referenced_paths.add(ref.path)
37
34
  end
38
35
  end
39
36
 
37
+ images_file = load_images_file(context)
38
+ if images_file
39
+ context.bibliography_index.entries.each_value do |entry|
40
+ next unless entry[:source].is_a?(V3::ImageEntry)
41
+ path = entry[:source].path
42
+ referenced_paths.add(path) if path
43
+ end
44
+ end
45
+
40
46
  issues = []
41
47
  context.asset_index.each_path do |path|
42
48
  next if referenced_paths.include?(path)
@@ -54,15 +60,14 @@ module Glossarist
54
60
 
55
61
  private
56
62
 
57
- def extract_texts(l10n)
58
- texts = []
59
- (l10n.data&.definition || []).each { |d| texts << d.content if d.content }
60
- (l10n.data&.notes || []).each { |n| texts << n.content if n.content }
61
- (l10n.data&.examples || []).each { |e| texts << e.content if e.content }
62
- texts
63
+ def load_images_file(context)
64
+ return @images_file if defined?(@images_file)
65
+
66
+ @images_file = V3::ImageFile.from_file(
67
+ File.join(context.path, "images.yaml")
68
+ )
63
69
  end
64
70
  end
65
71
  end
66
72
  end
67
73
  end
68
-
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class RefShapeRule < Base
7
+ def code = "GLS-305"
8
+ def category = :schema
9
+ def severity = "error"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.concept.localizations&.any?
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ issues = []
20
+
21
+ check_sources(concept, fname, issues)
22
+ check_related(concept, fname, issues)
23
+
24
+ issues
25
+ end
26
+
27
+ private
28
+
29
+ def check_sources(concept, fname, issues)
30
+ concept.localizations.flat_map(&:all_sources).each_with_index do |source, idx|
31
+ origin = source.origin
32
+ next unless origin
33
+
34
+ ref = origin.ref
35
+ if ref.nil?
36
+ issues << issue(
37
+ "source #{idx + 1} origin has nil ref (expected Citation::Ref hash)",
38
+ location: fname,
39
+ suggestion: "Set origin.ref to { source: ..., id: ... }",
40
+ )
41
+ elsif ref.source.nil? && ref.id.nil?
42
+ issues << issue(
43
+ "source #{idx + 1} origin.ref has neither source nor id",
44
+ location: fname,
45
+ suggestion: "Provide at least origin.ref.source or origin.ref.id",
46
+ )
47
+ end
48
+ end
49
+ end
50
+
51
+ def check_related(concept, fname, issues)
52
+ (concept.related || []).each_with_index do |rel, idx|
53
+ ref = rel.ref
54
+ next unless ref
55
+
56
+ if ref.source.nil? && ref.id.nil?
57
+ issues << issue(
58
+ "related concept #{idx + 1} has empty ref (no source or id)",
59
+ location: fname,
60
+ suggestion: "Provide at least ref.source or ref.id",
61
+ )
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -58,9 +58,7 @@ module Glossarist
58
58
  ref = rel.ref
59
59
  return nil unless ref
60
60
 
61
- if ref.is_a?(Glossarist::Citation)
62
- ref.id || ref.text
63
- end
61
+ ref.id || ref.source
64
62
  end
65
63
 
66
64
  def detect_cycles(graph)
@@ -77,9 +77,7 @@ module Glossarist
77
77
  ref = rel.ref
78
78
  return nil unless ref
79
79
 
80
- if ref.is_a?(Glossarist::Citation)
81
- ref.id || ref.text
82
- end
80
+ ref.id || ref.source
83
81
  end
84
82
  end
85
83
  end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ # Verifies that related concept refs point to concepts that exist
7
+ # in the dataset (for local refs) or have valid source/URN (for external).
8
+ class RelatedConceptTargetRule < Base
9
+ URN_RE = %r{\Aurn:[a-z0-9][a-z0-9-]{0,31}:[a-z0-9()+,\-.:=@;$_!*'%/?#]+\z}i.freeze
10
+
11
+ def code = "GLS-110"
12
+ def category = :references
13
+ def severity = "warning"
14
+ def scope = :concept
15
+
16
+ def applicable?(context)
17
+ context.concept.related&.any?
18
+ end
19
+
20
+ def check(context)
21
+ concept = context.concept
22
+ fname = context.file_name
23
+ issues = []
24
+
25
+ (concept.related || []).each_with_index do |rel, idx|
26
+ ref = rel.ref
27
+ next unless ref
28
+
29
+ id = ref.id
30
+ source = ref.source
31
+
32
+ if id && local_ref?(source)
33
+ # Local ref — concept_id must exist in dataset
34
+ unless context.concept_ids.include?(id)
35
+ issues << issue(
36
+ "related concept #{idx + 1} references '#{id}' which is not in the dataset",
37
+ location: fname,
38
+ suggestion: "Add concept '#{id}' to the dataset or fix the reference",
39
+ )
40
+ end
41
+ elsif source && !id
42
+ # Source-only ref — should be a valid URN or known format
43
+ if source.start_with?("urn:") && !URN_RE.match?(source)
44
+ issues << issue(
45
+ "related concept #{idx + 1} has invalid URN '#{source}'",
46
+ location: fname,
47
+ suggestion: "Fix the URN format (e.g. urn:iso:std:iso:ts:14812)",
48
+ )
49
+ end
50
+ end
51
+ end
52
+
53
+ issues
54
+ end
55
+
56
+ private
57
+
58
+ def local_ref?(source)
59
+ source.nil? || source.strip.empty?
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class SchemaVersionRule < Base
7
+ def code = "GLS-010"
8
+ def category = :schema
9
+ def severity = "warning"
10
+ def scope = :concept
11
+
12
+ def applicable?(context)
13
+ context.concept.is_a?(ManagedConcept)
14
+ end
15
+
16
+ def check(context)
17
+ concept = context.concept
18
+ fname = context.file_name
19
+ issues = []
20
+
21
+ version = concept.schema_version
22
+ if version.nil? || version.to_s.strip.empty?
23
+ issues << issue(
24
+ "concept has no schema_version",
25
+ location: fname,
26
+ suggestion: "Add schema_version: \"3\" to the concept",
27
+ )
28
+ elsif version.to_s != "3"
29
+ issues << issue(
30
+ "concept has schema_version '#{version}', expected '3'",
31
+ location: fname,
32
+ suggestion: "Run schema migration to upgrade to version 3",
33
+ )
34
+ end
35
+
36
+ issues
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -21,7 +21,7 @@ module Glossarist
21
21
  fname = context.file_name
22
22
  issues = []
23
23
 
24
- gather_all_sources(concept).each_with_index do |source, idx|
24
+ concept.localizations.flat_map(&:all_sources).each_with_index do |source, idx|
25
25
  unless VALID_TYPES.include?(source.type)
26
26
  issues << issue(
27
27
  "source #{idx + 1} has invalid type '#{source.type}'",
@@ -43,21 +43,7 @@ module Glossarist
43
43
 
44
44
  issues
45
45
  end
46
-
47
- private
48
-
49
- def gather_all_sources(concept)
50
- sources = []
51
- concept.localizations.each do |l10n|
52
- (l10n.data&.sources || []).each { |s| sources << s }
53
- (l10n.data&.definition || []).each { |d| (d.sources || []).each { |s| sources << s } }
54
- (l10n.data&.notes || []).each { |n| (n.sources || []).each { |s| sources << s } }
55
- (l10n.data&.examples || []).each { |e| (e.sources || []).each { |s| sources << s } }
56
- end
57
- sources
58
- end
59
46
  end
60
47
  end
61
48
  end
62
49
  end
63
-
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ # Validates that every URN-format source in citations and references
7
+ # follows a recognized scheme (iso, iec, itu, etc).
8
+ class SourceUrnFormatRule < Base
9
+ URN_RE = %r{\Aurn:([a-z0-9][a-z0-9-]{0,31}):(.+)\z}i.freeze
10
+
11
+ KNOWN_SCHEMES = %w[
12
+ iso iec itu iso:std:iso iso:std:iec
13
+ ].freeze
14
+
15
+ def code = "GLS-310"
16
+ def category = :quality
17
+ def severity = "warning"
18
+ def scope = :concept
19
+
20
+ def applicable?(context)
21
+ context.concept.localizations&.any?
22
+ end
23
+
24
+ def check(context)
25
+ concept = context.concept
26
+ fname = context.file_name
27
+ issues = []
28
+
29
+ all_refs(concept).each_with_index do |ref_str, idx|
30
+ next unless ref_str && ref_str.start_with?("urn:")
31
+
32
+ match = URN_RE.match(ref_str)
33
+ unless match
34
+ issues << issue(
35
+ "source #{idx + 1} has malformed URN '#{ref_str}'",
36
+ location: fname,
37
+ suggestion: "Fix the URN to follow RFC 8141 format",
38
+ )
39
+ end
40
+ end
41
+
42
+ issues
43
+ end
44
+
45
+ private
46
+
47
+ def all_refs(concept)
48
+ refs = []
49
+ concept.localizations.each do |l10n|
50
+ (l10n.data&.sources || []).each do |s|
51
+ refs << s.origin&.ref&.source if s.origin&.ref&.source&.start_with?("urn:")
52
+ end
53
+ end
54
+ (concept.data&.domains || []).each do |d|
55
+ refs << d.urn if d.urn
56
+ end
57
+ (concept.related || []).each do |r|
58
+ refs << r.ref&.source if r.ref&.source&.start_with?("urn:")
59
+ end
60
+ refs.compact
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Validation
5
+ module Rules
6
+ class UuidFormatRule < Base
7
+ UUID_RE = /\A[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\z/i.freeze
8
+
9
+ def code = "GLS-016"
10
+ def category = :integrity
11
+ def severity = "error"
12
+ def scope = :concept
13
+
14
+ def check(context)
15
+ concept = context.concept
16
+ fname = context.file_name
17
+ issues = []
18
+
19
+ uuid = concept.uuid
20
+ if uuid && !uuid.to_s.empty? && !UUID_RE.match?(uuid.to_s)
21
+ issues << issue(
22
+ "concept UUID '#{uuid}' is not valid UUID format",
23
+ location: fname,
24
+ suggestion: "Use a valid UUID (e.g. 0ce27901-02ce-531e-8ba5-fdb136139d1a)",
25
+ )
26
+ end
27
+
28
+ issues
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -40,46 +40,13 @@ require_relative "rules/date_type_rule"
40
40
  require_relative "rules/language_code_format_rule"
41
41
  require_relative "rules/designation_type_rule"
42
42
  require_relative "rules/date_validity_rule"
43
-
44
- # Register all built-in rules
45
- module Glossarist
46
- module Validation
47
- module Rules
48
- R = Registry
49
-
50
- R.register(ConceptIdRule)
51
- R.register(ConceptIdUniquenessRule)
52
- R.register(LocalizationPresenceRule)
53
- R.register(EntryStatusRule)
54
- R.register(AsciidocXrefRule)
55
- R.register(ImageReferenceRule)
56
- R.register(ConceptMentionRule)
57
- R.register(ConceptCountRule)
58
- R.register(LanguageListRule)
59
- R.register(LanguageCoverageRule)
60
- R.register(FilenameIdRule)
61
- R.register(L10nUuidIntegrityRule)
62
- R.register(OrphanedL10nFilesRule)
63
- R.register(OrphanedBibliographyRule)
64
- R.register(OrphanedImagesRule)
65
- R.register(DefinitionContentRule)
66
- R.register(PreferredTermRule)
67
- R.register(DuplicateTermRule)
68
- R.register(CitationCompletenessRule)
69
- R.register(AuthoritativeSourceRule)
70
- R.register(RelatedConceptRule)
71
- R.register(ConceptStatusRule)
72
- R.register(SourceEnumRule)
73
- R.register(TermsPresenceRule)
74
- R.register(BibliographyYamlRule)
75
- R.register(ConceptUriRule)
76
- R.register(RelatedConceptSymmetryRule)
77
- R.register(RelatedConceptCycleRule)
78
- R.register(DesignationStatusRule)
79
- R.register(DateTypeRule)
80
- R.register(LanguageCodeFormatRule)
81
- R.register(DesignationTypeRule)
82
- R.register(DateValidityRule)
83
- end
84
- end
85
- end
43
+ require_relative "rules/schema_version_rule"
44
+ require_relative "rules/ref_shape_rule"
45
+ require_relative "rules/locality_completeness_rule"
46
+ require_relative "rules/domain_ref_rule"
47
+ require_relative "rules/uuid_format_rule"
48
+ require_relative "rules/localization_consistency_rule"
49
+ require_relative "rules/related_concept_target_rule"
50
+ require_relative "rules/domain_target_rule"
51
+ require_relative "rules/source_urn_format_rule"
52
+ require_relative "rules/model_validity_rule"
@@ -2,16 +2,19 @@
2
2
 
3
3
  module Glossarist
4
4
  module Validation
5
- class ValidationIssue
6
- attr_reader :severity, :code, :message, :location, :suggestion
5
+ class ValidationIssue < Lutaml::Model::Serializable
6
+ attribute :severity, :string
7
+ attribute :code, :string
8
+ attribute :message, :string
9
+ attribute :location, :string
10
+ attribute :suggestion, :string
7
11
 
8
- def initialize(severity:, message:, code: nil, location: nil,
9
- suggestion: nil)
10
- @severity = severity
11
- @code = code
12
- @message = message
13
- @location = location
14
- @suggestion = suggestion
12
+ key_value do
13
+ map :severity, to: :severity
14
+ map :code, to: :code
15
+ map :message, to: :message
16
+ map :location, to: :location
17
+ map :suggestion, to: :suggestion
15
18
  end
16
19
 
17
20
  def error?
@@ -29,9 +32,9 @@ suggestion: nil)
29
32
  def to_s
30
33
  parts = ["[#{severity.upcase}]"]
31
34
  parts << "[#{code}]" if code
32
- parts << (location ? "#{location}: " : "")
35
+ parts << "#{location}: " if location
33
36
  parts << message
34
- parts << " (#{suggestion})" if suggestion
37
+ parts << "(#{suggestion})" if suggestion
35
38
  parts.join(" ")
36
39
  end
37
40
  end