glossarist 2.6.5 → 2.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/release.yml +1 -4
- data/.rubocop_todo.yml +53 -2
- data/CLAUDE.md +27 -2
- data/README.adoc +532 -56
- data/config.yml +68 -1
- data/glossarist.gemspec +2 -0
- data/lib/glossarist/citation.rb +26 -123
- data/lib/glossarist/cli/compare_command.rb +106 -0
- data/lib/glossarist/cli/export_command.rb +11 -14
- data/lib/glossarist/cli/validate_command.rb +111 -20
- data/lib/glossarist/cli.rb +18 -0
- data/lib/glossarist/collections/bibliography_collection.rb +4 -2
- data/lib/glossarist/collections/localization_collection.rb +2 -0
- data/lib/glossarist/comparison_result.rb +35 -0
- data/lib/glossarist/concept.rb +1 -1
- data/lib/glossarist/concept_collector.rb +44 -0
- data/lib/glossarist/concept_comparator.rb +72 -0
- data/lib/glossarist/concept_data.rb +20 -0
- data/lib/glossarist/concept_diff.rb +15 -0
- data/lib/glossarist/concept_document.rb +11 -0
- data/lib/glossarist/concept_manager.rb +19 -5
- data/lib/glossarist/concept_ref.rb +13 -0
- data/lib/glossarist/concept_reference.rb +12 -19
- data/lib/glossarist/concept_validator.rb +6 -1
- data/lib/glossarist/context_configuration.rb +90 -0
- data/lib/glossarist/dataset_validator.rb +8 -4
- data/lib/glossarist/designation/abbreviation.rb +0 -2
- data/lib/glossarist/designation/base.rb +21 -1
- data/lib/glossarist/designation/expression.rb +3 -0
- data/lib/glossarist/designation/letter_symbol.rb +0 -4
- data/lib/glossarist/designation/prefix.rb +17 -0
- data/lib/glossarist/designation/suffix.rb +17 -0
- data/lib/glossarist/designation/symbol.rb +0 -2
- data/lib/glossarist/gcr_metadata.rb +7 -14
- data/lib/glossarist/gcr_package.rb +35 -23
- data/lib/glossarist/gcr_validator.rb +38 -17
- data/lib/glossarist/glossary_definition.rb +5 -0
- data/lib/glossarist/localized_concept.rb +8 -0
- data/lib/glossarist/managed_concept.rb +39 -6
- data/lib/glossarist/managed_concept_data.rb +22 -2
- data/lib/glossarist/non_verb_rep.rb +21 -6
- data/lib/glossarist/pronunciation.rb +32 -0
- data/lib/glossarist/rdf/ext/jsonld_transform_ext.rb +208 -0
- data/lib/glossarist/rdf/ext/mapping_ext.rb +37 -0
- data/lib/glossarist/rdf/ext/mapping_rule_ext.rb +27 -0
- data/lib/glossarist/rdf/ext/member_rule_ext.rb +34 -0
- data/lib/glossarist/rdf/ext/turtle_transform_ext.rb +222 -0
- data/lib/glossarist/rdf/ext.rb +39 -0
- data/lib/glossarist/rdf/gloss_citation.rb +36 -0
- data/lib/glossarist/rdf/gloss_concept.rb +58 -0
- data/lib/glossarist/rdf/gloss_concept_date.rb +24 -0
- data/lib/glossarist/rdf/gloss_concept_reference.rb +29 -0
- data/lib/glossarist/rdf/gloss_concept_source.rb +37 -0
- data/lib/glossarist/rdf/gloss_designation.rb +146 -0
- data/lib/glossarist/rdf/gloss_detailed_definition.rb +24 -0
- data/lib/glossarist/rdf/gloss_grammar_info.rb +57 -0
- data/lib/glossarist/rdf/gloss_locality.rb +25 -0
- data/lib/glossarist/rdf/gloss_localized_concept.rb +67 -0
- data/lib/glossarist/rdf/gloss_non_verbal_rep.rb +31 -0
- data/lib/glossarist/rdf/gloss_pronunciation.rb +32 -0
- data/lib/glossarist/rdf/gloss_reference.rb +55 -0
- data/lib/glossarist/rdf/namespaces/glossarist_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/iso_thes_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/owl_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/prov_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/rdf_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/skosxl_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces.rb +8 -2
- data/lib/glossarist/rdf/relationships.rb +19 -0
- data/lib/glossarist/rdf/v3/configuration.rb +15 -0
- data/lib/glossarist/rdf/v3.rb +79 -0
- data/lib/glossarist/rdf.rb +22 -2
- data/lib/glossarist/reference_extractor.rb +15 -24
- data/lib/glossarist/reference_resolver.rb +3 -3
- data/lib/glossarist/related_concept.rb +2 -10
- data/lib/glossarist/schema_migration.rb +39 -0
- data/lib/glossarist/sts/term_mapper.rb +2 -2
- data/lib/glossarist/transforms/concept_to_gloss_transform.rb +355 -0
- data/lib/glossarist/transforms.rb +2 -2
- data/lib/glossarist/urn_resolver.rb +13 -1
- data/lib/glossarist/v1/concept.rb +18 -11
- data/lib/glossarist/v2/citation.rb +36 -0
- data/lib/glossarist/v2/concept_data.rb +46 -0
- data/lib/glossarist/v2/concept_document.rb +18 -0
- data/lib/glossarist/v2/concept_ref.rb +8 -0
- data/lib/glossarist/v2/concept_source.rb +16 -0
- data/lib/glossarist/v2/configuration.rb +13 -0
- data/lib/glossarist/v2/detailed_definition.rb +14 -0
- data/lib/glossarist/v2/localized_concept.rb +9 -0
- data/lib/glossarist/v2/managed_concept.rb +25 -0
- data/lib/glossarist/v2/managed_concept_data.rb +49 -0
- data/lib/glossarist/v2/related_concept.rb +15 -0
- data/lib/glossarist/v2.rb +28 -0
- data/lib/glossarist/v3/bibliography_entry.rb +19 -0
- data/lib/glossarist/v3/bibliography_file.rb +27 -0
- data/lib/glossarist/v3/citation.rb +30 -0
- data/lib/glossarist/v3/concept_data.rb +46 -0
- data/lib/glossarist/v3/concept_document.rb +18 -0
- data/lib/glossarist/v3/concept_ref.rb +8 -0
- data/lib/glossarist/v3/concept_source.rb +16 -0
- data/lib/glossarist/v3/configuration.rb +13 -0
- data/lib/glossarist/v3/detailed_definition.rb +14 -0
- data/lib/glossarist/v3/image_entry.rb +21 -0
- data/lib/glossarist/v3/image_file.rb +31 -0
- data/lib/glossarist/v3/localized_concept.rb +9 -0
- data/lib/glossarist/v3/managed_concept.rb +26 -0
- data/lib/glossarist/v3/managed_concept_data.rb +34 -0
- data/lib/glossarist/v3/related_concept.rb +15 -0
- data/lib/glossarist/v3.rb +36 -0
- data/lib/glossarist/validation/asset_index.rb +4 -3
- data/lib/glossarist/validation/bibliography_index.rb +61 -30
- data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +2 -15
- data/lib/glossarist/validation/rules/authoritative_source_rule.rb +2 -15
- data/lib/glossarist/validation/rules/base.rb +5 -0
- data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +2 -3
- data/lib/glossarist/validation/rules/citation_completeness_rule.rb +5 -27
- data/lib/glossarist/validation/rules/dataset_context.rb +8 -3
- data/lib/glossarist/validation/rules/date_validity_rule.rb +1 -1
- data/lib/glossarist/validation/rules/designation_status_rule.rb +0 -1
- data/lib/glossarist/validation/rules/designation_type_rule.rb +1 -5
- data/lib/glossarist/validation/rules/domain_ref_rule.rb +37 -0
- data/lib/glossarist/validation/rules/domain_target_rule.rb +56 -0
- data/lib/glossarist/validation/rules/gcr_context.rb +12 -13
- data/lib/glossarist/validation/rules/image_reference_rule.rb +2 -17
- data/lib/glossarist/validation/rules/locality_completeness_rule.rb +58 -0
- data/lib/glossarist/validation/rules/localization_consistency_rule.rb +72 -0
- data/lib/glossarist/validation/rules/localization_presence_rule.rb +1 -1
- data/lib/glossarist/validation/rules/model_validity_rule.rb +71 -0
- data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +1 -13
- data/lib/glossarist/validation/rules/orphaned_images_rule.rb +16 -11
- data/lib/glossarist/validation/rules/ref_shape_rule.rb +68 -0
- data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +1 -3
- data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +1 -3
- data/lib/glossarist/validation/rules/related_concept_target_rule.rb +64 -0
- data/lib/glossarist/validation/rules/schema_version_rule.rb +41 -0
- data/lib/glossarist/validation/rules/source_type_rule.rb +1 -15
- data/lib/glossarist/validation/rules/source_urn_format_rule.rb +65 -0
- data/lib/glossarist/validation/rules/uuid_format_rule.rb +33 -0
- data/lib/glossarist/validation/rules.rb +10 -43
- data/lib/glossarist/validation/validation_issue.rb +14 -11
- data/lib/glossarist/validation_result.rb +12 -22
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +10 -0
- data/memory/project-status.md +43 -0
- data/scripts/migrate_dataset.rb +180 -0
- data/scripts/migrate_isotc204_to_v3.rb +134 -0
- data/scripts/migrate_isotc211_to_v3.rb +153 -0
- data/scripts/migrate_osgeo_to_v3.rb +155 -0
- data/scripts/upgrade_dataset_to_v3.rb +47 -0
- metadata +112 -6
- data/TODO.integration/01-gcr-package-cli.md +0 -180
- data/lib/glossarist/rdf/skos_concept.rb +0 -43
- data/lib/glossarist/rdf/skos_vocabulary.rb +0 -25
- data/lib/glossarist/transforms/concept_to_skos_transform.rb +0 -131
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Validation
|
|
5
|
+
module Rules
|
|
6
|
+
# Verifies that every entry in localized_concepts map points to a loaded
|
|
7
|
+
# localization, and that every loaded localization has a corresponding
|
|
8
|
+
# entry in the map.
|
|
9
|
+
class LocalizationConsistencyRule < Base
|
|
10
|
+
def code = "GLS-017"
|
|
11
|
+
def category = :integrity
|
|
12
|
+
def severity = "error"
|
|
13
|
+
def scope = :concept
|
|
14
|
+
|
|
15
|
+
def applicable?(context)
|
|
16
|
+
context.concept.localizations&.any? ||
|
|
17
|
+
context.concept.data&.localized_concepts&.any?
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def check(context)
|
|
21
|
+
concept = context.concept
|
|
22
|
+
fname = context.file_name
|
|
23
|
+
issues = []
|
|
24
|
+
|
|
25
|
+
lc_map = concept.data&.localized_concepts || {}
|
|
26
|
+
loaded_langs = concept.localizations&.map(&:language_code)&.compact || []
|
|
27
|
+
|
|
28
|
+
# Map has entry but no loaded localization
|
|
29
|
+
lc_map.each_key do |lang|
|
|
30
|
+
next if loaded_langs.include?(lang)
|
|
31
|
+
|
|
32
|
+
issues << issue(
|
|
33
|
+
"localized_concepts map has '#{lang}' but no localization loaded",
|
|
34
|
+
location: fname,
|
|
35
|
+
suggestion: "Add a localization for '#{lang}' or remove it from the map",
|
|
36
|
+
)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Loaded localization not in map
|
|
40
|
+
loaded_langs.each do |lang|
|
|
41
|
+
next if lc_map.key?(lang)
|
|
42
|
+
|
|
43
|
+
issues << issue(
|
|
44
|
+
"localization '#{lang}' is loaded but not in localized_concepts map",
|
|
45
|
+
location: fname,
|
|
46
|
+
suggestion: "Add '#{lang}' to the localized_concepts map",
|
|
47
|
+
)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# UUID mismatch between map and loaded localization
|
|
51
|
+
concept.localizations.each do |l10n|
|
|
52
|
+
lang = l10n.language_code
|
|
53
|
+
next unless lang
|
|
54
|
+
|
|
55
|
+
expected_uuid = lc_map[lang]
|
|
56
|
+
actual_uuid = l10n.uuid
|
|
57
|
+
next unless expected_uuid && actual_uuid
|
|
58
|
+
next if expected_uuid == actual_uuid
|
|
59
|
+
|
|
60
|
+
issues << issue(
|
|
61
|
+
"UUID mismatch for '#{lang}': map says '#{expected_uuid}', localization is '#{actual_uuid}'",
|
|
62
|
+
location: fname,
|
|
63
|
+
suggestion: "Ensure the UUID in the map matches the localization file",
|
|
64
|
+
)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
issues
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Validation
|
|
5
|
+
module Rules
|
|
6
|
+
class ModelValidityRule < Base
|
|
7
|
+
def code = "GLS-050"
|
|
8
|
+
def category = :structure
|
|
9
|
+
def severity = "error"
|
|
10
|
+
def scope = :concept
|
|
11
|
+
|
|
12
|
+
def applicable?(context)
|
|
13
|
+
context.concept.is_a?(Lutaml::Model::Serializable)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def check(context)
|
|
17
|
+
validate_recursive(context.concept, context.file_name)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
|
|
22
|
+
def validate_recursive(model, location, path = "")
|
|
23
|
+
return [] unless model.is_a?(Lutaml::Model::Serializable)
|
|
24
|
+
|
|
25
|
+
issues = collect_model_errors(model, location, path)
|
|
26
|
+
issues.concat(recurse_attributes(model, location, path))
|
|
27
|
+
issues
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def collect_model_errors(model, location, path)
|
|
31
|
+
errors = model.validate
|
|
32
|
+
return [] if errors.empty?
|
|
33
|
+
|
|
34
|
+
prefix = path.empty? ? "" : "#{path}: "
|
|
35
|
+
errors.map { |e| issue("#{prefix}#{e}", location: location) }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def recurse_attributes(model, location, path)
|
|
39
|
+
issues = []
|
|
40
|
+
|
|
41
|
+
model.class.attributes.each do |name, _|
|
|
42
|
+
value = model.public_send(name)
|
|
43
|
+
next if value.nil?
|
|
44
|
+
|
|
45
|
+
child_path = build_path(path, name)
|
|
46
|
+
issues.concat(validate_collection(value, location, child_path))
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
issues
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def validate_collection(value, location, path)
|
|
53
|
+
case value
|
|
54
|
+
when Array
|
|
55
|
+
value.each_with_index.flat_map do |item, idx|
|
|
56
|
+
validate_recursive(item, location, "#{path}[#{idx}]")
|
|
57
|
+
end
|
|
58
|
+
when Lutaml::Model::Serializable
|
|
59
|
+
validate_recursive(value, location, path)
|
|
60
|
+
else
|
|
61
|
+
[]
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def build_path(parent, name)
|
|
66
|
+
parent.empty? ? name.to_s : "#{parent}.#{name}"
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -20,8 +20,7 @@ module Glossarist
|
|
|
20
20
|
|
|
21
21
|
context.concepts.each do |concept|
|
|
22
22
|
concept.localizations.each do |l10n|
|
|
23
|
-
|
|
24
|
-
texts.each do |text|
|
|
23
|
+
l10n.text_content.each do |text|
|
|
25
24
|
next unless text
|
|
26
25
|
extractor.extract_from_text(text).each do |ref|
|
|
27
26
|
if ref.is_a?(BibliographicReference)
|
|
@@ -47,18 +46,7 @@ module Glossarist
|
|
|
47
46
|
|
|
48
47
|
issues
|
|
49
48
|
end
|
|
50
|
-
|
|
51
|
-
private
|
|
52
|
-
|
|
53
|
-
def extract_texts(l10n)
|
|
54
|
-
texts = []
|
|
55
|
-
(l10n.data&.definition || []).each { |d| texts << d.content if d.content }
|
|
56
|
-
(l10n.data&.notes || []).each { |n| texts << n.content if n.content }
|
|
57
|
-
(l10n.data&.examples || []).each { |e| texts << e.content if e.content }
|
|
58
|
-
texts
|
|
59
|
-
end
|
|
60
49
|
end
|
|
61
50
|
end
|
|
62
51
|
end
|
|
63
52
|
end
|
|
64
|
-
|
|
@@ -18,10 +18,8 @@ module Glossarist
|
|
|
18
18
|
referenced_paths = Set.new
|
|
19
19
|
|
|
20
20
|
context.concepts.each do |concept|
|
|
21
|
-
# Text-embedded image refs
|
|
22
21
|
concept.localizations.each do |l10n|
|
|
23
|
-
|
|
24
|
-
texts.each do |text|
|
|
22
|
+
l10n.text_content.each do |text|
|
|
25
23
|
next unless text
|
|
26
24
|
extractor.extract_from_text(text).each do |ref|
|
|
27
25
|
if ref.is_a?(AssetReference)
|
|
@@ -31,12 +29,20 @@ module Glossarist
|
|
|
31
29
|
end
|
|
32
30
|
end
|
|
33
31
|
|
|
34
|
-
# Model-level asset refs
|
|
35
32
|
extractor.extract_asset_refs_from_concept(concept).each do |ref|
|
|
36
33
|
referenced_paths.add(ref.path)
|
|
37
34
|
end
|
|
38
35
|
end
|
|
39
36
|
|
|
37
|
+
images_file = load_images_file(context)
|
|
38
|
+
if images_file
|
|
39
|
+
context.bibliography_index.entries.each_value do |entry|
|
|
40
|
+
next unless entry[:source].is_a?(V3::ImageEntry)
|
|
41
|
+
path = entry[:source].path
|
|
42
|
+
referenced_paths.add(path) if path
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
40
46
|
issues = []
|
|
41
47
|
context.asset_index.each_path do |path|
|
|
42
48
|
next if referenced_paths.include?(path)
|
|
@@ -54,15 +60,14 @@ module Glossarist
|
|
|
54
60
|
|
|
55
61
|
private
|
|
56
62
|
|
|
57
|
-
def
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
+
def load_images_file(context)
|
|
64
|
+
return @images_file if defined?(@images_file)
|
|
65
|
+
|
|
66
|
+
@images_file = V3::ImageFile.from_file(
|
|
67
|
+
File.join(context.path, "images.yaml")
|
|
68
|
+
)
|
|
63
69
|
end
|
|
64
70
|
end
|
|
65
71
|
end
|
|
66
72
|
end
|
|
67
73
|
end
|
|
68
|
-
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Validation
|
|
5
|
+
module Rules
|
|
6
|
+
class RefShapeRule < Base
|
|
7
|
+
def code = "GLS-305"
|
|
8
|
+
def category = :schema
|
|
9
|
+
def severity = "error"
|
|
10
|
+
def scope = :concept
|
|
11
|
+
|
|
12
|
+
def applicable?(context)
|
|
13
|
+
context.concept.localizations&.any?
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def check(context)
|
|
17
|
+
concept = context.concept
|
|
18
|
+
fname = context.file_name
|
|
19
|
+
issues = []
|
|
20
|
+
|
|
21
|
+
check_sources(concept, fname, issues)
|
|
22
|
+
check_related(concept, fname, issues)
|
|
23
|
+
|
|
24
|
+
issues
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def check_sources(concept, fname, issues)
|
|
30
|
+
concept.localizations.flat_map(&:all_sources).each_with_index do |source, idx|
|
|
31
|
+
origin = source.origin
|
|
32
|
+
next unless origin
|
|
33
|
+
|
|
34
|
+
ref = origin.ref
|
|
35
|
+
if ref.nil?
|
|
36
|
+
issues << issue(
|
|
37
|
+
"source #{idx + 1} origin has nil ref (expected Citation::Ref hash)",
|
|
38
|
+
location: fname,
|
|
39
|
+
suggestion: "Set origin.ref to { source: ..., id: ... }",
|
|
40
|
+
)
|
|
41
|
+
elsif ref.source.nil? && ref.id.nil?
|
|
42
|
+
issues << issue(
|
|
43
|
+
"source #{idx + 1} origin.ref has neither source nor id",
|
|
44
|
+
location: fname,
|
|
45
|
+
suggestion: "Provide at least origin.ref.source or origin.ref.id",
|
|
46
|
+
)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def check_related(concept, fname, issues)
|
|
52
|
+
(concept.related || []).each_with_index do |rel, idx|
|
|
53
|
+
ref = rel.ref
|
|
54
|
+
next unless ref
|
|
55
|
+
|
|
56
|
+
if ref.source.nil? && ref.id.nil?
|
|
57
|
+
issues << issue(
|
|
58
|
+
"related concept #{idx + 1} has empty ref (no source or id)",
|
|
59
|
+
location: fname,
|
|
60
|
+
suggestion: "Provide at least ref.source or ref.id",
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Validation
|
|
5
|
+
module Rules
|
|
6
|
+
# Verifies that related concept refs point to concepts that exist
|
|
7
|
+
# in the dataset (for local refs) or have valid source/URN (for external).
|
|
8
|
+
class RelatedConceptTargetRule < Base
|
|
9
|
+
URN_RE = %r{\Aurn:[a-z0-9][a-z0-9-]{0,31}:[a-z0-9()+,\-.:=@;$_!*'%/?#]+\z}i.freeze
|
|
10
|
+
|
|
11
|
+
def code = "GLS-110"
|
|
12
|
+
def category = :references
|
|
13
|
+
def severity = "warning"
|
|
14
|
+
def scope = :concept
|
|
15
|
+
|
|
16
|
+
def applicable?(context)
|
|
17
|
+
context.concept.related&.any?
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def check(context)
|
|
21
|
+
concept = context.concept
|
|
22
|
+
fname = context.file_name
|
|
23
|
+
issues = []
|
|
24
|
+
|
|
25
|
+
(concept.related || []).each_with_index do |rel, idx|
|
|
26
|
+
ref = rel.ref
|
|
27
|
+
next unless ref
|
|
28
|
+
|
|
29
|
+
id = ref.id
|
|
30
|
+
source = ref.source
|
|
31
|
+
|
|
32
|
+
if id && local_ref?(source)
|
|
33
|
+
# Local ref — concept_id must exist in dataset
|
|
34
|
+
unless context.concept_ids.include?(id)
|
|
35
|
+
issues << issue(
|
|
36
|
+
"related concept #{idx + 1} references '#{id}' which is not in the dataset",
|
|
37
|
+
location: fname,
|
|
38
|
+
suggestion: "Add concept '#{id}' to the dataset or fix the reference",
|
|
39
|
+
)
|
|
40
|
+
end
|
|
41
|
+
elsif source && !id
|
|
42
|
+
# Source-only ref — should be a valid URN or known format
|
|
43
|
+
if source.start_with?("urn:") && !URN_RE.match?(source)
|
|
44
|
+
issues << issue(
|
|
45
|
+
"related concept #{idx + 1} has invalid URN '#{source}'",
|
|
46
|
+
location: fname,
|
|
47
|
+
suggestion: "Fix the URN format (e.g. urn:iso:std:iso:ts:14812)",
|
|
48
|
+
)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
issues
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
def local_ref?(source)
|
|
59
|
+
source.nil? || source.strip.empty?
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Validation
|
|
5
|
+
module Rules
|
|
6
|
+
class SchemaVersionRule < Base
|
|
7
|
+
def code = "GLS-010"
|
|
8
|
+
def category = :schema
|
|
9
|
+
def severity = "warning"
|
|
10
|
+
def scope = :concept
|
|
11
|
+
|
|
12
|
+
def applicable?(context)
|
|
13
|
+
context.concept.is_a?(ManagedConcept)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def check(context)
|
|
17
|
+
concept = context.concept
|
|
18
|
+
fname = context.file_name
|
|
19
|
+
issues = []
|
|
20
|
+
|
|
21
|
+
version = concept.schema_version
|
|
22
|
+
if version.nil? || version.to_s.strip.empty?
|
|
23
|
+
issues << issue(
|
|
24
|
+
"concept has no schema_version",
|
|
25
|
+
location: fname,
|
|
26
|
+
suggestion: "Add schema_version: \"3\" to the concept",
|
|
27
|
+
)
|
|
28
|
+
elsif version.to_s != "3"
|
|
29
|
+
issues << issue(
|
|
30
|
+
"concept has schema_version '#{version}', expected '3'",
|
|
31
|
+
location: fname,
|
|
32
|
+
suggestion: "Run schema migration to upgrade to version 3",
|
|
33
|
+
)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
issues
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -21,7 +21,7 @@ module Glossarist
|
|
|
21
21
|
fname = context.file_name
|
|
22
22
|
issues = []
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
concept.localizations.flat_map(&:all_sources).each_with_index do |source, idx|
|
|
25
25
|
unless VALID_TYPES.include?(source.type)
|
|
26
26
|
issues << issue(
|
|
27
27
|
"source #{idx + 1} has invalid type '#{source.type}'",
|
|
@@ -43,21 +43,7 @@ module Glossarist
|
|
|
43
43
|
|
|
44
44
|
issues
|
|
45
45
|
end
|
|
46
|
-
|
|
47
|
-
private
|
|
48
|
-
|
|
49
|
-
def gather_all_sources(concept)
|
|
50
|
-
sources = []
|
|
51
|
-
concept.localizations.each do |l10n|
|
|
52
|
-
(l10n.data&.sources || []).each { |s| sources << s }
|
|
53
|
-
(l10n.data&.definition || []).each { |d| (d.sources || []).each { |s| sources << s } }
|
|
54
|
-
(l10n.data&.notes || []).each { |n| (n.sources || []).each { |s| sources << s } }
|
|
55
|
-
(l10n.data&.examples || []).each { |e| (e.sources || []).each { |s| sources << s } }
|
|
56
|
-
end
|
|
57
|
-
sources
|
|
58
|
-
end
|
|
59
46
|
end
|
|
60
47
|
end
|
|
61
48
|
end
|
|
62
49
|
end
|
|
63
|
-
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Validation
|
|
5
|
+
module Rules
|
|
6
|
+
# Validates that every URN-format source in citations and references
|
|
7
|
+
# follows a recognized scheme (iso, iec, itu, etc).
|
|
8
|
+
class SourceUrnFormatRule < Base
|
|
9
|
+
URN_RE = %r{\Aurn:([a-z0-9][a-z0-9-]{0,31}):(.+)\z}i.freeze
|
|
10
|
+
|
|
11
|
+
KNOWN_SCHEMES = %w[
|
|
12
|
+
iso iec itu iso:std:iso iso:std:iec
|
|
13
|
+
].freeze
|
|
14
|
+
|
|
15
|
+
def code = "GLS-310"
|
|
16
|
+
def category = :quality
|
|
17
|
+
def severity = "warning"
|
|
18
|
+
def scope = :concept
|
|
19
|
+
|
|
20
|
+
def applicable?(context)
|
|
21
|
+
context.concept.localizations&.any?
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def check(context)
|
|
25
|
+
concept = context.concept
|
|
26
|
+
fname = context.file_name
|
|
27
|
+
issues = []
|
|
28
|
+
|
|
29
|
+
all_refs(concept).each_with_index do |ref_str, idx|
|
|
30
|
+
next unless ref_str && ref_str.start_with?("urn:")
|
|
31
|
+
|
|
32
|
+
match = URN_RE.match(ref_str)
|
|
33
|
+
unless match
|
|
34
|
+
issues << issue(
|
|
35
|
+
"source #{idx + 1} has malformed URN '#{ref_str}'",
|
|
36
|
+
location: fname,
|
|
37
|
+
suggestion: "Fix the URN to follow RFC 8141 format",
|
|
38
|
+
)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
issues
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def all_refs(concept)
|
|
48
|
+
refs = []
|
|
49
|
+
concept.localizations.each do |l10n|
|
|
50
|
+
(l10n.data&.sources || []).each do |s|
|
|
51
|
+
refs << s.origin&.ref&.source if s.origin&.ref&.source&.start_with?("urn:")
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
(concept.data&.domains || []).each do |d|
|
|
55
|
+
refs << d.urn if d.urn
|
|
56
|
+
end
|
|
57
|
+
(concept.related || []).each do |r|
|
|
58
|
+
refs << r.ref&.source if r.ref&.source&.start_with?("urn:")
|
|
59
|
+
end
|
|
60
|
+
refs.compact
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Validation
|
|
5
|
+
module Rules
|
|
6
|
+
class UuidFormatRule < Base
|
|
7
|
+
UUID_RE = /\A[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\z/i.freeze
|
|
8
|
+
|
|
9
|
+
def code = "GLS-016"
|
|
10
|
+
def category = :integrity
|
|
11
|
+
def severity = "error"
|
|
12
|
+
def scope = :concept
|
|
13
|
+
|
|
14
|
+
def check(context)
|
|
15
|
+
concept = context.concept
|
|
16
|
+
fname = context.file_name
|
|
17
|
+
issues = []
|
|
18
|
+
|
|
19
|
+
uuid = concept.uuid
|
|
20
|
+
if uuid && !uuid.to_s.empty? && !UUID_RE.match?(uuid.to_s)
|
|
21
|
+
issues << issue(
|
|
22
|
+
"concept UUID '#{uuid}' is not valid UUID format",
|
|
23
|
+
location: fname,
|
|
24
|
+
suggestion: "Use a valid UUID (e.g. 0ce27901-02ce-531e-8ba5-fdb136139d1a)",
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
issues
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -40,46 +40,13 @@ require_relative "rules/date_type_rule"
|
|
|
40
40
|
require_relative "rules/language_code_format_rule"
|
|
41
41
|
require_relative "rules/designation_type_rule"
|
|
42
42
|
require_relative "rules/date_validity_rule"
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
R.register(EntryStatusRule)
|
|
54
|
-
R.register(AsciidocXrefRule)
|
|
55
|
-
R.register(ImageReferenceRule)
|
|
56
|
-
R.register(ConceptMentionRule)
|
|
57
|
-
R.register(ConceptCountRule)
|
|
58
|
-
R.register(LanguageListRule)
|
|
59
|
-
R.register(LanguageCoverageRule)
|
|
60
|
-
R.register(FilenameIdRule)
|
|
61
|
-
R.register(L10nUuidIntegrityRule)
|
|
62
|
-
R.register(OrphanedL10nFilesRule)
|
|
63
|
-
R.register(OrphanedBibliographyRule)
|
|
64
|
-
R.register(OrphanedImagesRule)
|
|
65
|
-
R.register(DefinitionContentRule)
|
|
66
|
-
R.register(PreferredTermRule)
|
|
67
|
-
R.register(DuplicateTermRule)
|
|
68
|
-
R.register(CitationCompletenessRule)
|
|
69
|
-
R.register(AuthoritativeSourceRule)
|
|
70
|
-
R.register(RelatedConceptRule)
|
|
71
|
-
R.register(ConceptStatusRule)
|
|
72
|
-
R.register(SourceEnumRule)
|
|
73
|
-
R.register(TermsPresenceRule)
|
|
74
|
-
R.register(BibliographyYamlRule)
|
|
75
|
-
R.register(ConceptUriRule)
|
|
76
|
-
R.register(RelatedConceptSymmetryRule)
|
|
77
|
-
R.register(RelatedConceptCycleRule)
|
|
78
|
-
R.register(DesignationStatusRule)
|
|
79
|
-
R.register(DateTypeRule)
|
|
80
|
-
R.register(LanguageCodeFormatRule)
|
|
81
|
-
R.register(DesignationTypeRule)
|
|
82
|
-
R.register(DateValidityRule)
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
end
|
|
43
|
+
require_relative "rules/schema_version_rule"
|
|
44
|
+
require_relative "rules/ref_shape_rule"
|
|
45
|
+
require_relative "rules/locality_completeness_rule"
|
|
46
|
+
require_relative "rules/domain_ref_rule"
|
|
47
|
+
require_relative "rules/uuid_format_rule"
|
|
48
|
+
require_relative "rules/localization_consistency_rule"
|
|
49
|
+
require_relative "rules/related_concept_target_rule"
|
|
50
|
+
require_relative "rules/domain_target_rule"
|
|
51
|
+
require_relative "rules/source_urn_format_rule"
|
|
52
|
+
require_relative "rules/model_validity_rule"
|
|
@@ -2,16 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
module Glossarist
|
|
4
4
|
module Validation
|
|
5
|
-
class ValidationIssue
|
|
6
|
-
|
|
5
|
+
class ValidationIssue < Lutaml::Model::Serializable
|
|
6
|
+
attribute :severity, :string
|
|
7
|
+
attribute :code, :string
|
|
8
|
+
attribute :message, :string
|
|
9
|
+
attribute :location, :string
|
|
10
|
+
attribute :suggestion, :string
|
|
7
11
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@suggestion = suggestion
|
|
12
|
+
key_value do
|
|
13
|
+
map :severity, to: :severity
|
|
14
|
+
map :code, to: :code
|
|
15
|
+
map :message, to: :message
|
|
16
|
+
map :location, to: :location
|
|
17
|
+
map :suggestion, to: :suggestion
|
|
15
18
|
end
|
|
16
19
|
|
|
17
20
|
def error?
|
|
@@ -29,9 +32,9 @@ suggestion: nil)
|
|
|
29
32
|
def to_s
|
|
30
33
|
parts = ["[#{severity.upcase}]"]
|
|
31
34
|
parts << "[#{code}]" if code
|
|
32
|
-
parts <<
|
|
35
|
+
parts << "#{location}: " if location
|
|
33
36
|
parts << message
|
|
34
|
-
parts << "
|
|
37
|
+
parts << "(#{suggestion})" if suggestion
|
|
35
38
|
parts.join(" ")
|
|
36
39
|
end
|
|
37
40
|
end
|