glossarist 2.6.4 → 2.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +11 -111
- data/Gemfile +0 -2
- data/README.adoc +207 -1
- data/glossarist.gemspec +1 -1
- data/lib/glossarist/asset_reference.rb +16 -0
- data/lib/glossarist/bibliographic_reference.rb +16 -0
- data/lib/glossarist/concept_enricher.rb +1 -0
- data/lib/glossarist/concept_reference.rb +4 -0
- data/lib/glossarist/concept_validator.rb +27 -56
- data/lib/glossarist/dataset_validator.rb +30 -34
- data/lib/glossarist/gcr_validator.rb +26 -101
- data/lib/glossarist/reference_extractor.rb +80 -10
- data/lib/glossarist/reference_resolver.rb +1 -0
- data/lib/glossarist/validation/asset_index.rb +113 -0
- data/lib/glossarist/validation/bibliography_index.rb +121 -0
- data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +60 -0
- data/lib/glossarist/validation/rules/authoritative_source_rule.rb +47 -0
- data/lib/glossarist/validation/rules/base.rb +46 -0
- data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +37 -0
- data/lib/glossarist/validation/rules/citation_completeness_rule.rb +63 -0
- data/lib/glossarist/validation/rules/concept_context.rb +45 -0
- data/lib/glossarist/validation/rules/concept_count_rule.rb +34 -0
- data/lib/glossarist/validation/rules/concept_id_rule.rb +29 -0
- data/lib/glossarist/validation/rules/concept_id_uniqueness_rule.rb +42 -0
- data/lib/glossarist/validation/rules/concept_mention_rule.rb +44 -0
- data/lib/glossarist/validation/rules/concept_status_rule.rb +36 -0
- data/lib/glossarist/validation/rules/concept_uri_rule.rb +30 -0
- data/lib/glossarist/validation/rules/dataset_context.rb +99 -0
- data/lib/glossarist/validation/rules/date_type_rule.rb +54 -0
- data/lib/glossarist/validation/rules/date_validity_rule.rb +66 -0
- data/lib/glossarist/validation/rules/definition_content_rule.rb +41 -0
- data/lib/glossarist/validation/rules/designation_status_rule.rb +45 -0
- data/lib/glossarist/validation/rules/designation_type_rule.rb +55 -0
- data/lib/glossarist/validation/rules/duplicate_term_rule.rb +63 -0
- data/lib/glossarist/validation/rules/entry_status_rule.rb +39 -0
- data/lib/glossarist/validation/rules/filename_id_rule.rb +35 -0
- data/lib/glossarist/validation/rules/gcr_context.rb +92 -0
- data/lib/glossarist/validation/rules/image_reference_rule.rb +73 -0
- data/lib/glossarist/validation/rules/l10n_uuid_integrity_rule.rb +40 -0
- data/lib/glossarist/validation/rules/language_code_format_rule.rb +39 -0
- data/lib/glossarist/validation/rules/language_coverage_rule.rb +37 -0
- data/lib/glossarist/validation/rules/language_list_rule.rb +46 -0
- data/lib/glossarist/validation/rules/localization_presence_rule.rb +25 -0
- data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +64 -0
- data/lib/glossarist/validation/rules/orphaned_images_rule.rb +68 -0
- data/lib/glossarist/validation/rules/orphaned_l10n_files_rule.rb +39 -0
- data/lib/glossarist/validation/rules/preferred_term_rule.rb +41 -0
- data/lib/glossarist/validation/rules/registry.rb +42 -0
- data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +102 -0
- data/lib/glossarist/validation/rules/related_concept_rule.rb +40 -0
- data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +87 -0
- data/lib/glossarist/validation/rules/source_type_rule.rb +63 -0
- data/lib/glossarist/validation/rules/terms_presence_rule.rb +39 -0
- data/lib/glossarist/validation/rules.rb +85 -0
- data/lib/glossarist/validation/validation_issue.rb +39 -0
- data/lib/glossarist/validation.rb +12 -0
- data/lib/glossarist/validation_result.rb +26 -9
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +3 -0
- metadata +60 -15
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0e3cd8f02f83acf4b2c43139fe2ec0afd8b635860e02e31e530827297648168b
|
|
4
|
+
data.tar.gz: 755e65d489c0d889ae3d9f4652703d55055aa0b0f0b669b588380070746fc63a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5794f170646fa14bae1f7e9d3bf82499c4298f3976804d6cf1b0d33dea90cd26715c5c394cfd6335f8616c4a55d05d78b9cdeb0f653b7a86d629a7b3d0bfe799
|
|
7
|
+
data.tar.gz: 7681ef4afc518c7cf5bc4fe99a62dc3b41a44c66778257fc5f1e81004867ed86b98ef4c9e4026b8c5a11791d3d618849cf4bf809f11e07d4d87dfef56ffbec5c
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,94 +1,23 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-05-12
|
|
3
|
+
# on 2026-05-12 10:41:50 UTC using RuboCop version 1.86.1.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
|
8
8
|
|
|
9
|
-
# Offense count: 7
|
|
10
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
11
|
-
# Configuration parameters: TreatCommentsAsGroupSeparators, ConsiderPunctuation.
|
|
12
|
-
Bundler/OrderedGems:
|
|
13
|
-
Exclude:
|
|
14
|
-
- 'Gemfile'
|
|
15
|
-
|
|
16
9
|
# Offense count: 1
|
|
17
10
|
Gemspec/RequiredRubyVersion:
|
|
18
11
|
Exclude:
|
|
19
12
|
- 'glossarist.gemspec'
|
|
20
13
|
|
|
21
|
-
# Offense count:
|
|
22
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
23
|
-
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
24
|
-
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
25
|
-
Layout/ArgumentAlignment:
|
|
26
|
-
Exclude:
|
|
27
|
-
- 'lib/glossarist/sts/import_result.rb'
|
|
28
|
-
- 'lib/glossarist/sts/importer.rb'
|
|
29
|
-
- 'lib/glossarist/sts/term_mapper.rb'
|
|
30
|
-
|
|
31
|
-
# Offense count: 1
|
|
32
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
33
|
-
# Configuration parameters: IndentationWidth.
|
|
34
|
-
Layout/AssignmentIndentation:
|
|
35
|
-
Exclude:
|
|
36
|
-
- 'lib/glossarist/sts/term_mapper.rb'
|
|
37
|
-
|
|
38
|
-
# Offense count: 6
|
|
39
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
40
|
-
# Configuration parameters: EnforcedStyleAlignWith.
|
|
41
|
-
# SupportedStylesAlignWith: either, start_of_block, start_of_line
|
|
42
|
-
Layout/BlockAlignment:
|
|
43
|
-
Exclude:
|
|
44
|
-
- 'lib/glossarist/sts/term_extractor.rb'
|
|
45
|
-
- 'spec/unit/sts/term_extractor_spec.rb'
|
|
46
|
-
- 'spec/unit/sts/term_mapper_spec.rb'
|
|
47
|
-
|
|
48
|
-
# Offense count: 6
|
|
49
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
50
|
-
Layout/BlockEndNewline:
|
|
51
|
-
Exclude:
|
|
52
|
-
- 'lib/glossarist/sts/term_extractor.rb'
|
|
53
|
-
- 'spec/unit/sts/term_extractor_spec.rb'
|
|
54
|
-
- 'spec/unit/sts/term_mapper_spec.rb'
|
|
55
|
-
|
|
56
|
-
# Offense count: 1
|
|
57
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
58
|
-
# Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
|
|
59
|
-
# SupportedHashRocketStyles: key, separator, table
|
|
60
|
-
# SupportedColonStyles: key, separator, table
|
|
61
|
-
# SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
|
|
62
|
-
Layout/HashAlignment:
|
|
63
|
-
Exclude:
|
|
64
|
-
- 'lib/glossarist/sts/importer.rb'
|
|
65
|
-
|
|
66
|
-
# Offense count: 12
|
|
67
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
68
|
-
# Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
|
|
69
|
-
# SupportedStylesAlignWith: start_of_line, relative_to_receiver
|
|
70
|
-
Layout/IndentationWidth:
|
|
71
|
-
Exclude:
|
|
72
|
-
- 'lib/glossarist/sts/term_extractor.rb'
|
|
73
|
-
- 'spec/unit/sts/term_extractor_spec.rb'
|
|
74
|
-
- 'spec/unit/sts/term_mapper_spec.rb'
|
|
75
|
-
|
|
76
|
-
# Offense count: 236
|
|
14
|
+
# Offense count: 254
|
|
77
15
|
# This cop supports safe autocorrection (--autocorrect).
|
|
78
16
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
79
17
|
# URISchemes: http, https
|
|
80
18
|
Layout/LineLength:
|
|
81
19
|
Enabled: false
|
|
82
20
|
|
|
83
|
-
# Offense count: 7
|
|
84
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
85
|
-
# Configuration parameters: AllowInHeredoc.
|
|
86
|
-
Layout/TrailingWhitespace:
|
|
87
|
-
Exclude:
|
|
88
|
-
- 'lib/glossarist/sts/import_result.rb'
|
|
89
|
-
- 'lib/glossarist/sts/importer.rb'
|
|
90
|
-
- 'lib/glossarist/sts/term_mapper.rb'
|
|
91
|
-
|
|
92
21
|
# Offense count: 1
|
|
93
22
|
# Configuration parameters: AllowedMethods.
|
|
94
23
|
# AllowedMethods: enums
|
|
@@ -104,26 +33,12 @@ Lint/UnusedMethodArgument:
|
|
|
104
33
|
Exclude:
|
|
105
34
|
- 'lib/glossarist/dataset_validator.rb'
|
|
106
35
|
|
|
107
|
-
# Offense count:
|
|
36
|
+
# Offense count: 37
|
|
108
37
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
109
38
|
Metrics/AbcSize:
|
|
110
|
-
|
|
111
|
-
- 'lib/glossarist/cli/export_command.rb'
|
|
112
|
-
- 'lib/glossarist/cli/package_command.rb'
|
|
113
|
-
- 'lib/glossarist/cli/validate_command.rb'
|
|
114
|
-
- 'lib/glossarist/concept_manager.rb'
|
|
115
|
-
- 'lib/glossarist/gcr_metadata.rb'
|
|
116
|
-
- 'lib/glossarist/gcr_package.rb'
|
|
117
|
-
- 'lib/glossarist/reference_extractor.rb'
|
|
118
|
-
- 'lib/glossarist/reference_resolver.rb'
|
|
119
|
-
- 'lib/glossarist/resolution_adapter/local.rb'
|
|
120
|
-
- 'lib/glossarist/schema_migration.rb'
|
|
121
|
-
- 'lib/glossarist/transforms/concept_to_skos_transform.rb'
|
|
122
|
-
- 'lib/glossarist/transforms/concept_to_tbx_transform.rb'
|
|
123
|
-
- 'lib/glossarist/utilities/uuid.rb'
|
|
124
|
-
- 'spec/unit/concept_collector_spec.rb'
|
|
39
|
+
Enabled: false
|
|
125
40
|
|
|
126
|
-
# Offense count:
|
|
41
|
+
# Offense count: 24
|
|
127
42
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
128
43
|
Metrics/CyclomaticComplexity:
|
|
129
44
|
Exclude:
|
|
@@ -132,6 +47,7 @@ Metrics/CyclomaticComplexity:
|
|
|
132
47
|
- 'lib/glossarist/designation/expression.rb'
|
|
133
48
|
- 'lib/glossarist/gcr_metadata.rb'
|
|
134
49
|
- 'lib/glossarist/gcr_statistics.rb'
|
|
50
|
+
- 'lib/glossarist/gcr_validator.rb'
|
|
135
51
|
- 'lib/glossarist/managed_concept.rb'
|
|
136
52
|
- 'lib/glossarist/reference_extractor.rb'
|
|
137
53
|
- 'lib/glossarist/reference_resolver.rb'
|
|
@@ -139,8 +55,9 @@ Metrics/CyclomaticComplexity:
|
|
|
139
55
|
- 'lib/glossarist/schema_migration.rb'
|
|
140
56
|
- 'lib/glossarist/transforms/concept_to_skos_transform.rb'
|
|
141
57
|
- 'lib/glossarist/transforms/concept_to_tbx_transform.rb'
|
|
58
|
+
- 'lib/glossarist/validation/bibliography_index.rb'
|
|
142
59
|
|
|
143
|
-
# Offense count:
|
|
60
|
+
# Offense count: 48
|
|
144
61
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
145
62
|
Metrics/MethodLength:
|
|
146
63
|
Max: 42
|
|
@@ -150,19 +67,21 @@ Metrics/MethodLength:
|
|
|
150
67
|
Metrics/ParameterLists:
|
|
151
68
|
Max: 6
|
|
152
69
|
|
|
153
|
-
# Offense count:
|
|
70
|
+
# Offense count: 18
|
|
154
71
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
155
72
|
Metrics/PerceivedComplexity:
|
|
156
73
|
Exclude:
|
|
157
74
|
- 'lib/glossarist/concept_validator.rb'
|
|
158
75
|
- 'lib/glossarist/designation/expression.rb'
|
|
159
76
|
- 'lib/glossarist/gcr_metadata.rb'
|
|
77
|
+
- 'lib/glossarist/gcr_validator.rb'
|
|
160
78
|
- 'lib/glossarist/reference_extractor.rb'
|
|
161
79
|
- 'lib/glossarist/reference_resolver.rb'
|
|
162
80
|
- 'lib/glossarist/resolution_adapter/local.rb'
|
|
163
81
|
- 'lib/glossarist/schema_migration.rb'
|
|
164
82
|
- 'lib/glossarist/transforms/concept_to_skos_transform.rb'
|
|
165
83
|
- 'lib/glossarist/transforms/concept_to_tbx_transform.rb'
|
|
84
|
+
- 'lib/glossarist/validation/bibliography_index.rb'
|
|
166
85
|
|
|
167
86
|
# Offense count: 6
|
|
168
87
|
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
|
@@ -179,19 +98,6 @@ Naming/VariableNumber:
|
|
|
179
98
|
Exclude:
|
|
180
99
|
- 'spec/unit/rdf/skos_vocabulary_spec.rb'
|
|
181
100
|
|
|
182
|
-
# Offense count: 9
|
|
183
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
184
|
-
# Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
|
|
185
|
-
# SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
|
|
186
|
-
# ProceduralMethods: benchmark, bm, bmbm, create, each_with_object, measure, new, realtime, tap, with_object
|
|
187
|
-
# FunctionalMethods: let, let!, subject, watch
|
|
188
|
-
# AllowedMethods: lambda, proc, it
|
|
189
|
-
Style/BlockDelimiters:
|
|
190
|
-
Exclude:
|
|
191
|
-
- 'lib/glossarist/sts/term_extractor.rb'
|
|
192
|
-
- 'spec/unit/sts/term_extractor_spec.rb'
|
|
193
|
-
- 'spec/unit/sts/term_mapper_spec.rb'
|
|
194
|
-
|
|
195
101
|
# Offense count: 6
|
|
196
102
|
# This cop supports safe autocorrection (--autocorrect).
|
|
197
103
|
# Configuration parameters: MaxUnannotatedPlaceholdersAllowed, Mode, AllowedMethods, AllowedPatterns.
|
|
@@ -199,12 +105,6 @@ Style/BlockDelimiters:
|
|
|
199
105
|
Style/FormatStringToken:
|
|
200
106
|
EnforcedStyle: unannotated
|
|
201
107
|
|
|
202
|
-
# Offense count: 2
|
|
203
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
204
|
-
Style/MultilineIfModifier:
|
|
205
|
-
Exclude:
|
|
206
|
-
- 'lib/glossarist/sts/importer.rb'
|
|
207
|
-
|
|
208
108
|
# Offense count: 1
|
|
209
109
|
# Configuration parameters: AllowedClasses.
|
|
210
110
|
Style/OneClassPerFile:
|
data/Gemfile
CHANGED
data/README.adoc
CHANGED
|
@@ -626,12 +626,14 @@ skipped_count:: `Integer` — concepts skipped due to duplicates (strategy: skip
|
|
|
626
626
|
|
|
627
627
|
=== validate
|
|
628
628
|
|
|
629
|
-
Validate a dataset directory or `.gcr` file for schema compliance
|
|
629
|
+
Validate a dataset directory or `.gcr` file for schema compliance, structural
|
|
630
|
+
integrity, cross-reference resolution, and data quality.
|
|
630
631
|
|
|
631
632
|
[,bash]
|
|
632
633
|
----
|
|
633
634
|
glossarist validate PATH
|
|
634
635
|
glossarist validate PATH --reference-path path/to/gcrs/
|
|
636
|
+
glossarist validate PATH --strict
|
|
635
637
|
----
|
|
636
638
|
|
|
637
639
|
Options:
|
|
@@ -657,6 +659,210 @@ result.errors # => [...]
|
|
|
657
659
|
result.warnings # => [...]
|
|
658
660
|
----
|
|
659
661
|
|
|
662
|
+
== Validation System
|
|
663
|
+
|
|
664
|
+
Glossarist provides a rule-based validation framework that checks dataset
|
|
665
|
+
directories and GCR packages for structural, schema, reference, integrity,
|
|
666
|
+
quality, and localization issues.
|
|
667
|
+
|
|
668
|
+
=== Architecture
|
|
669
|
+
|
|
670
|
+
The validation system uses the **rule-registry pattern** (Open/Closed
|
|
671
|
+
Principle). Each check is a self-describing rule class that subclasses
|
|
672
|
+
`Glossarist::Validation::Rules::Base`. New rules are added by subclassing and
|
|
673
|
+
registering — no existing code is modified.
|
|
674
|
+
|
|
675
|
+
```
|
|
676
|
+
Glossarist::Validation
|
|
677
|
+
├── Rules
|
|
678
|
+
│ ├── Base # Abstract rule: code, category, severity, scope, check
|
|
679
|
+
│ ├── Registry # Global registry: register, all, for_category, for_scope
|
|
680
|
+
│ ├── DatasetContext # Lazy-loaded access to a directory dataset
|
|
681
|
+
│ ├── GcrContext # Lazy-loaded access to a .gcr package
|
|
682
|
+
│ └── (26 rule classes) # One file per rule
|
|
683
|
+
├── ValidationIssue # Single finding: severity, code, message, location, suggestion
|
|
684
|
+
├── BibliographyIndex # Index of bibliography anchors from sources + bibliography.yaml
|
|
685
|
+
├── AssetIndex # Index of asset paths from images/ directory or GCR ZIP
|
|
686
|
+
├── ConceptValidator # Orchestrator: runs per-concept rules
|
|
687
|
+
├── GcrValidator # Orchestrator: runs GCR-level rules
|
|
688
|
+
└── DatasetValidator # Orchestrator: runs directory-level + collection rules
|
|
689
|
+
```
|
|
690
|
+
|
|
691
|
+
=== Rule Categories
|
|
692
|
+
|
|
693
|
+
Rules are classified into six MECE (Mutually Exclusive, Collectively
|
|
694
|
+
Exhaustive) categories:
|
|
695
|
+
|
|
696
|
+
[cols="1,2"]
|
|
697
|
+
|===
|
|
698
|
+
|Category |What it checks
|
|
699
|
+
|
|
700
|
+
|`structure` |File/directory layout, ZIP contents, required parts
|
|
701
|
+
|`schema` |Field types, enum values, required fields, YAML syntax
|
|
702
|
+
|`references` |Cross-references between concepts, bibliography, assets
|
|
703
|
+
|`integrity` |Metadata vs. reality, filename vs. ID, UUID cross-references
|
|
704
|
+
|`quality` |Empty content, missing preferred terms, duplicate terms
|
|
705
|
+
|`localization` |Language coverage, orphaned/missing localization files
|
|
706
|
+
|===
|
|
707
|
+
|
|
708
|
+
=== Built-in Rules
|
|
709
|
+
|
|
710
|
+
The following rules are registered by default. Each rule has a unique code
|
|
711
|
+
(e.g. `GLS-001`), a severity (`error` or `warning`), and a scope (`:concept`
|
|
712
|
+
for per-concept checks or `:collection` for dataset-wide checks).
|
|
713
|
+
|
|
714
|
+
==== Structure Rules
|
|
715
|
+
|
|
716
|
+
[cols="1,2,1,1"]
|
|
717
|
+
|===
|
|
718
|
+
|Code |Rule |Severity |Scope
|
|
719
|
+
|
|
720
|
+
|GLS-001 |Concept ID is present |error |`:concept`
|
|
721
|
+
|GLS-002 |At least one localization per concept |error |`:concept`
|
|
722
|
+
|GLS-005 |Each localization has at least 1 term |error |`:concept`
|
|
723
|
+
|GLS-020-YAML |bibliography.yaml is valid YAML |error |`:collection`
|
|
724
|
+
|===
|
|
725
|
+
|
|
726
|
+
==== Schema Rules
|
|
727
|
+
|
|
728
|
+
[cols="1,2,1,1"]
|
|
729
|
+
|===
|
|
730
|
+
|Code |Rule |Severity |Scope
|
|
731
|
+
|
|
732
|
+
|GLS-003 |Entry status is a valid enum value |error |`:concept`
|
|
733
|
+
|GLS-201 |Concept status is a valid enum value |error |`:concept`
|
|
734
|
+
|GLS-202/203 |Source type and status are valid enums |error |`:concept`
|
|
735
|
+
|GLS-200 |Related concept type is valid |error |`:concept`
|
|
736
|
+
|GLS-204 |Designation normative_status is valid |error |`:concept`
|
|
737
|
+
|GLS-205 |Date type is a valid enum |warning |`:concept`
|
|
738
|
+
|GLS-206 |Language code is exactly 3 lowercase letters |error |`:concept`
|
|
739
|
+
|GLS-207 |Designation type maps to a known subclass |error |`:concept`
|
|
740
|
+
|===
|
|
741
|
+
|
|
742
|
+
==== Reference Rules
|
|
743
|
+
|
|
744
|
+
[cols="1,2,1,1"]
|
|
745
|
+
|===
|
|
746
|
+
|Code |Rule |Severity |Scope
|
|
747
|
+
|
|
748
|
+
|GLS-100 |`{{...}}` concept mentions resolve locally |warning |`:concept`
|
|
749
|
+
|GLS-102 |`<<anchor>>` AsciiDoc xrefs resolve in bibliography index |warning |`:concept`
|
|
750
|
+
|GLS-103-105 |Image references resolve in asset index |warning |`:concept`
|
|
751
|
+
|GLS-110 |Related concept references resolve |warning |`:concept`
|
|
752
|
+
|GLS-020 |Orphaned bibliography entries |warning |`:collection`
|
|
753
|
+
|GLS-021 |Orphaned images |warning |`:collection`
|
|
754
|
+
|GLS-112 |Supersedes/superseded_by symmetry check |warning |`:collection`
|
|
755
|
+
|GLS-113 |No circular related-concept chains |error |`:collection`
|
|
756
|
+
|===
|
|
757
|
+
|
|
758
|
+
==== Integrity Rules
|
|
759
|
+
|
|
760
|
+
[cols="1,2,1,1"]
|
|
761
|
+
|===
|
|
762
|
+
|Code |Rule |Severity |Scope
|
|
763
|
+
|
|
764
|
+
|GLS-001-U |Concept IDs are unique |error |`:collection`
|
|
765
|
+
|GLS-011 |Concept count matches metadata |error |`:collection`
|
|
766
|
+
|GLS-012 |Language list matches actual languages |warning |`:collection`
|
|
767
|
+
|GLS-013 |Language coverage per concept |warning |`:concept`
|
|
768
|
+
|GLS-015 |Filename matches concept ID (GCR) |error |`:concept`
|
|
769
|
+
|GLS-016 |Concept URI is set or template is applicable |warning |`:collection`
|
|
770
|
+
|GLS-018 |Localized concept UUID cross-references resolve |error |`:concept`
|
|
771
|
+
|GLS-019 |Orphaned localization files |warning |`:collection`
|
|
772
|
+
|===
|
|
773
|
+
|
|
774
|
+
==== Quality Rules
|
|
775
|
+
|
|
776
|
+
[cols="1,2,1,1"]
|
|
777
|
+
|===
|
|
778
|
+
|Code |Rule |Severity |Scope
|
|
779
|
+
|
|
780
|
+
|GLS-300 |Definition content is non-empty |warning |`:concept`
|
|
781
|
+
|GLS-301 |At least one preferred designation per localization |warning |`:concept`
|
|
782
|
+
|GLS-302 |No duplicate preferred terms within a language |warning |`:collection`
|
|
783
|
+
|GLS-304 |Source citation is not empty |warning |`:concept`
|
|
784
|
+
|GLS-306 |At least one authoritative source |warning |`:concept`
|
|
785
|
+
|GLS-307 |Date values are parseable |warning |`:concept`
|
|
786
|
+
|===
|
|
787
|
+
|
|
788
|
+
=== Cross-Reference Validation
|
|
789
|
+
|
|
790
|
+
The validation system checks that all references in concept content point to
|
|
791
|
+
resources that actually exist:
|
|
792
|
+
|
|
793
|
+
* **Bibliographic cross-references** — AsciiDoc `<<anchor>>` xrefs are checked
|
|
794
|
+
against a `BibliographyIndex` built from all `ConceptSource` entries and
|
|
795
|
+
optional `bibliography.yaml`.
|
|
796
|
+
* **Image/asset references** — `image::path[]` references and model-level asset
|
|
797
|
+
paths (`NonVerbRep`, `GraphicalSymbol`) are checked against an `AssetIndex`
|
|
798
|
+
built from the `images/` directory or GCR ZIP entries.
|
|
799
|
+
* **Inter-concept references** — `{{...}}` concept mentions are checked against
|
|
800
|
+
the concept collection for local references, and against registered GCR
|
|
801
|
+
packages for inter-set URN references.
|
|
802
|
+
|
|
803
|
+
=== Validation Result
|
|
804
|
+
|
|
805
|
+
`ValidationResult` holds the aggregated findings from all rules:
|
|
806
|
+
|
|
807
|
+
[,ruby]
|
|
808
|
+
----
|
|
809
|
+
result = DatasetValidator.new.validate("path/to/dataset")
|
|
810
|
+
result.valid? # => true if no errors
|
|
811
|
+
result.errors # => Array of error strings
|
|
812
|
+
result.warnings # => Array of warning strings
|
|
813
|
+
result.issues # => Array of ValidationIssue objects (full detail)
|
|
814
|
+
----
|
|
815
|
+
|
|
816
|
+
Each `ValidationIssue` carries structured metadata:
|
|
817
|
+
|
|
818
|
+
[,ruby]
|
|
819
|
+
----
|
|
820
|
+
issue = result.issues.first
|
|
821
|
+
issue.severity # => "error" or "warning"
|
|
822
|
+
issue.code # => "GLS-300"
|
|
823
|
+
issue.message # => "definition 1 has empty content"
|
|
824
|
+
issue.location # => "concepts/100.yaml/eng"
|
|
825
|
+
issue.suggestion # => "Add definition text or remove the empty entry"
|
|
826
|
+
issue.to_s # => "[ERROR] [GLS-300] concepts/100.yaml/eng: definition 1 has empty content"
|
|
827
|
+
----
|
|
828
|
+
|
|
829
|
+
=== Adding Custom Rules
|
|
830
|
+
|
|
831
|
+
New validation rules are added by subclassing `Base` and registering with the
|
|
832
|
+
global `Registry`. This extends validation without modifying existing code:
|
|
833
|
+
|
|
834
|
+
[,ruby]
|
|
835
|
+
----
|
|
836
|
+
class MyCustomRule < Glossarist::Validation::Rules::Base
|
|
837
|
+
def code = "CUSTOM-001"
|
|
838
|
+
def category = :quality
|
|
839
|
+
def severity = "warning"
|
|
840
|
+
def scope = :concept
|
|
841
|
+
|
|
842
|
+
def applicable?(context)
|
|
843
|
+
context.concept&.localizations&.any?
|
|
844
|
+
end
|
|
845
|
+
|
|
846
|
+
def check(context)
|
|
847
|
+
issues = []
|
|
848
|
+
context.concept.localizations.each do |l10n|
|
|
849
|
+
# ... your check logic ...
|
|
850
|
+
if some_condition
|
|
851
|
+
issues << issue("something is wrong",
|
|
852
|
+
location: context.file_name,
|
|
853
|
+
suggestion: "how to fix it")
|
|
854
|
+
end
|
|
855
|
+
end
|
|
856
|
+
issues
|
|
857
|
+
end
|
|
858
|
+
end
|
|
859
|
+
|
|
860
|
+
Glossarist::Validation::Rules::Registry.register(MyCustomRule)
|
|
861
|
+
----
|
|
862
|
+
|
|
863
|
+
Custom rules are automatically picked up by `DatasetValidator`, `GcrValidator`,
|
|
864
|
+
and `ConceptValidator` on the next validation run.
|
|
865
|
+
|
|
660
866
|
=== upgrade
|
|
661
867
|
|
|
662
868
|
Upgrade a dataset to the current schema version.
|
data/glossarist.gemspec
CHANGED
|
@@ -32,9 +32,9 @@ Gem::Specification.new do |spec|
|
|
|
32
32
|
spec.require_paths = ["lib"]
|
|
33
33
|
|
|
34
34
|
spec.add_dependency "lutaml-model", "~> 0.8.5"
|
|
35
|
-
spec.add_dependency "sts", "~> 0.5.6"
|
|
36
35
|
spec.add_dependency "relaton", ">= 2.0.0", "< 3"
|
|
37
36
|
spec.add_dependency "rubyzip", ">= 2.3", "< 3"
|
|
37
|
+
spec.add_dependency "sts", "~> 0.5.6"
|
|
38
38
|
spec.add_dependency "tbx", "~> 0.1"
|
|
39
39
|
spec.add_dependency "thor"
|
|
40
40
|
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class BibliographicReference
|
|
5
|
+
attr_reader :anchor, :location
|
|
6
|
+
|
|
7
|
+
def initialize(anchor:, location: nil)
|
|
8
|
+
@anchor = anchor
|
|
9
|
+
@location = location
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def dedup_key
|
|
13
|
+
anchor
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -2,9 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
module Glossarist
|
|
4
4
|
class ConceptValidator
|
|
5
|
-
LANG_CODES = Glossarist::LANG_CODES
|
|
6
|
-
VALID_ENTRY_STATUSES = %w[valid superseded withdrawn draft].freeze
|
|
7
|
-
|
|
8
5
|
attr_reader :path, :errors, :warnings
|
|
9
6
|
|
|
10
7
|
def initialize(path)
|
|
@@ -14,23 +11,46 @@ module Glossarist
|
|
|
14
11
|
end
|
|
15
12
|
|
|
16
13
|
def validate_all
|
|
17
|
-
|
|
14
|
+
result = ValidationResult.new
|
|
15
|
+
context = Validation::Rules::DatasetContext.new(@path)
|
|
16
|
+
concept_rules = Validation::Rules::Registry.for_scope(:concept)
|
|
18
17
|
file_idx = 0
|
|
19
18
|
|
|
20
19
|
ConceptCollector.each_concept(@path) do |concept|
|
|
21
20
|
fname = concept_file_name(concept, file_idx)
|
|
22
|
-
|
|
21
|
+
concept_context = Validation::Rules::ConceptContext.new(
|
|
22
|
+
concept, file_name: fname, collection_context: context
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
concept_rules.each do |rule|
|
|
26
|
+
next unless rule.applicable?(concept_context)
|
|
27
|
+
|
|
28
|
+
rule.check(concept_context).each { |i| result.add_issue(i) }
|
|
29
|
+
end
|
|
30
|
+
|
|
23
31
|
file_idx += 1
|
|
24
32
|
end
|
|
25
33
|
|
|
26
34
|
if file_idx.zero?
|
|
27
35
|
yaml_files = find_yaml_files
|
|
28
36
|
if yaml_files.any?
|
|
29
|
-
|
|
37
|
+
result.add_error("YAML files found but no parseable concepts")
|
|
30
38
|
end
|
|
31
39
|
end
|
|
32
40
|
|
|
33
|
-
|
|
41
|
+
# Run collection-level rules
|
|
42
|
+
collection_rules = Validation::Rules::Registry.for_scope(:collection)
|
|
43
|
+
collection_rules.each do |rule|
|
|
44
|
+
next unless rule.applicable?(context)
|
|
45
|
+
|
|
46
|
+
rule.check(context).each { |i| result.add_issue(i) }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Sync legacy arrays for backward compatibility
|
|
50
|
+
@errors = result.errors
|
|
51
|
+
@warnings = result.warnings
|
|
52
|
+
|
|
53
|
+
result
|
|
34
54
|
end
|
|
35
55
|
|
|
36
56
|
private
|
|
@@ -48,54 +68,5 @@ module Glossarist
|
|
|
48
68
|
id = concept.data&.id
|
|
49
69
|
id ? "concept-#{id}.yaml" : "concept-#{idx}.yaml"
|
|
50
70
|
end
|
|
51
|
-
|
|
52
|
-
def validate_concept(concept, fname, seen_ids)
|
|
53
|
-
validate_id(concept, fname, seen_ids)
|
|
54
|
-
validate_localizations(concept, fname)
|
|
55
|
-
validate_entry_statuses(concept, fname)
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
def validate_id(concept, fname, seen_ids)
|
|
59
|
-
id = concept.data&.id
|
|
60
|
-
unless id
|
|
61
|
-
@errors << "#{fname}: missing concept id"
|
|
62
|
-
return
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
id_str = id.to_s
|
|
66
|
-
if seen_ids[id_str]
|
|
67
|
-
@errors << "#{fname}: duplicate id '#{id_str}' (first seen in #{seen_ids[id_str]})"
|
|
68
|
-
else
|
|
69
|
-
seen_ids[id_str] = fname
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
def validate_localizations(concept, fname)
|
|
74
|
-
l10ns = concept.localizations&.values || []
|
|
75
|
-
if l10ns.empty?
|
|
76
|
-
@errors << "#{fname}: no localizations found"
|
|
77
|
-
return
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
l10ns.each do |l10n|
|
|
81
|
-
lang = l10n.language_code || "unknown"
|
|
82
|
-
terms = l10n.data&.terms
|
|
83
|
-
unless terms.is_a?(Array) && terms.any?
|
|
84
|
-
@errors << "#{fname}/#{lang}: must have at least 1 term"
|
|
85
|
-
end
|
|
86
|
-
end
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
def validate_entry_statuses(concept, fname)
|
|
90
|
-
(concept.localizations&.values || []).each do |l10n|
|
|
91
|
-
lang = l10n.language_code || "unknown"
|
|
92
|
-
status = l10n.data&.entry_status
|
|
93
|
-
next unless status
|
|
94
|
-
|
|
95
|
-
unless VALID_ENTRY_STATUSES.include?(status)
|
|
96
|
-
@errors << "#{fname}/#{lang}: invalid entry_status '#{status}' (expected one of: #{VALID_ENTRY_STATUSES.join(', ')})"
|
|
97
|
-
end
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
71
|
end
|
|
101
72
|
end
|