glossarist 2.6.4 → 2.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +11 -111
- data/Gemfile +0 -2
- data/README.adoc +207 -1
- data/glossarist.gemspec +1 -1
- data/lib/glossarist/asset_reference.rb +16 -0
- data/lib/glossarist/bibliographic_reference.rb +16 -0
- data/lib/glossarist/concept_enricher.rb +1 -0
- data/lib/glossarist/concept_reference.rb +4 -0
- data/lib/glossarist/concept_validator.rb +27 -56
- data/lib/glossarist/dataset_validator.rb +30 -34
- data/lib/glossarist/gcr_validator.rb +26 -101
- data/lib/glossarist/reference_extractor.rb +80 -10
- data/lib/glossarist/reference_resolver.rb +1 -0
- data/lib/glossarist/validation/asset_index.rb +113 -0
- data/lib/glossarist/validation/bibliography_index.rb +121 -0
- data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +60 -0
- data/lib/glossarist/validation/rules/authoritative_source_rule.rb +47 -0
- data/lib/glossarist/validation/rules/base.rb +46 -0
- data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +37 -0
- data/lib/glossarist/validation/rules/citation_completeness_rule.rb +63 -0
- data/lib/glossarist/validation/rules/concept_context.rb +45 -0
- data/lib/glossarist/validation/rules/concept_count_rule.rb +34 -0
- data/lib/glossarist/validation/rules/concept_id_rule.rb +29 -0
- data/lib/glossarist/validation/rules/concept_id_uniqueness_rule.rb +42 -0
- data/lib/glossarist/validation/rules/concept_mention_rule.rb +44 -0
- data/lib/glossarist/validation/rules/concept_status_rule.rb +36 -0
- data/lib/glossarist/validation/rules/concept_uri_rule.rb +30 -0
- data/lib/glossarist/validation/rules/dataset_context.rb +99 -0
- data/lib/glossarist/validation/rules/date_type_rule.rb +54 -0
- data/lib/glossarist/validation/rules/date_validity_rule.rb +66 -0
- data/lib/glossarist/validation/rules/definition_content_rule.rb +41 -0
- data/lib/glossarist/validation/rules/designation_status_rule.rb +45 -0
- data/lib/glossarist/validation/rules/designation_type_rule.rb +55 -0
- data/lib/glossarist/validation/rules/duplicate_term_rule.rb +63 -0
- data/lib/glossarist/validation/rules/entry_status_rule.rb +39 -0
- data/lib/glossarist/validation/rules/filename_id_rule.rb +35 -0
- data/lib/glossarist/validation/rules/gcr_context.rb +92 -0
- data/lib/glossarist/validation/rules/image_reference_rule.rb +73 -0
- data/lib/glossarist/validation/rules/l10n_uuid_integrity_rule.rb +40 -0
- data/lib/glossarist/validation/rules/language_code_format_rule.rb +39 -0
- data/lib/glossarist/validation/rules/language_coverage_rule.rb +37 -0
- data/lib/glossarist/validation/rules/language_list_rule.rb +46 -0
- data/lib/glossarist/validation/rules/localization_presence_rule.rb +25 -0
- data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +64 -0
- data/lib/glossarist/validation/rules/orphaned_images_rule.rb +68 -0
- data/lib/glossarist/validation/rules/orphaned_l10n_files_rule.rb +39 -0
- data/lib/glossarist/validation/rules/preferred_term_rule.rb +41 -0
- data/lib/glossarist/validation/rules/registry.rb +42 -0
- data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +102 -0
- data/lib/glossarist/validation/rules/related_concept_rule.rb +40 -0
- data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +87 -0
- data/lib/glossarist/validation/rules/source_type_rule.rb +63 -0
- data/lib/glossarist/validation/rules/terms_presence_rule.rb +39 -0
- data/lib/glossarist/validation/rules.rb +85 -0
- data/lib/glossarist/validation/validation_issue.rb +39 -0
- data/lib/glossarist/validation.rb +12 -0
- data/lib/glossarist/validation_result.rb +26 -9
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +3 -0
- metadata +60 -15
|
@@ -3,43 +3,53 @@
|
|
|
3
3
|
module Glossarist
|
|
4
4
|
class DatasetValidator
|
|
5
5
|
def validate(path, strict: false, reference_path: nil)
|
|
6
|
-
|
|
6
|
+
if File.extname(path).downcase == ".gcr"
|
|
7
|
+
validate_gcr(path, reference_path: reference_path)
|
|
8
|
+
else
|
|
9
|
+
validate_directory(path, reference_path: reference_path)
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
private
|
|
14
|
+
|
|
15
|
+
def validate_gcr(path, reference_path: nil)
|
|
16
|
+
result = GcrValidator.new.validate(path)
|
|
7
17
|
|
|
8
18
|
if reference_path
|
|
9
|
-
ref_result =
|
|
19
|
+
ref_result = validate_gcr_cross_references(path, reference_path)
|
|
10
20
|
result.merge(ref_result)
|
|
11
21
|
end
|
|
12
22
|
|
|
13
23
|
result
|
|
14
24
|
end
|
|
15
25
|
|
|
16
|
-
|
|
26
|
+
def validate_directory(path, reference_path: nil)
|
|
27
|
+
result = ConceptValidator.new(path).validate_all
|
|
17
28
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
else
|
|
22
|
-
validate_directory(path)
|
|
29
|
+
if reference_path
|
|
30
|
+
ref_result = validate_directory_cross_references(path, reference_path)
|
|
31
|
+
result.merge(ref_result)
|
|
23
32
|
end
|
|
24
|
-
end
|
|
25
33
|
|
|
26
|
-
|
|
27
|
-
GcrValidator.new.validate(path)
|
|
34
|
+
result
|
|
28
35
|
end
|
|
29
36
|
|
|
30
|
-
def
|
|
31
|
-
|
|
37
|
+
def validate_gcr_cross_references(path, reference_path)
|
|
38
|
+
extractor = ReferenceExtractor.new
|
|
39
|
+
resolver = build_resolver(reference_path)
|
|
40
|
+
pkg = GcrPackage.load(path)
|
|
41
|
+
uri_prefix = pkg.metadata&.dig("uri_prefix") || pkg.metadata&.dig("shortname")
|
|
42
|
+
resolver.register_self(pkg.concepts)
|
|
43
|
+
resolver.register_package(pkg, uri_prefix: uri_prefix)
|
|
44
|
+
resolver.validate_all(pkg, extractor: extractor)
|
|
32
45
|
end
|
|
33
46
|
|
|
34
|
-
def
|
|
47
|
+
def validate_directory_cross_references(path, reference_path)
|
|
35
48
|
extractor = ReferenceExtractor.new
|
|
36
49
|
resolver = build_resolver(reference_path)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
else
|
|
41
|
-
validate_directory_refs(resolver, path, extractor)
|
|
42
|
-
end
|
|
50
|
+
concepts = ConceptCollector.collect(path)
|
|
51
|
+
resolver.register_self(concepts)
|
|
52
|
+
resolver.validate_all(concepts, extractor: extractor)
|
|
43
53
|
end
|
|
44
54
|
|
|
45
55
|
def build_resolver(reference_path)
|
|
@@ -51,19 +61,5 @@ module Glossarist
|
|
|
51
61
|
end
|
|
52
62
|
resolver
|
|
53
63
|
end
|
|
54
|
-
|
|
55
|
-
def validate_gcr_refs(resolver, path, extractor)
|
|
56
|
-
pkg = GcrPackage.load(path)
|
|
57
|
-
uri_prefix = pkg.metadata&.dig("uri_prefix") || pkg.metadata&.dig("shortname")
|
|
58
|
-
resolver.register_self(pkg.concepts)
|
|
59
|
-
resolver.register_package(pkg, uri_prefix: uri_prefix)
|
|
60
|
-
resolver.validate_all(pkg, extractor: extractor)
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
def validate_directory_refs(resolver, path, extractor)
|
|
64
|
-
concepts = ConceptCollector.collect(path)
|
|
65
|
-
resolver.register_self(concepts)
|
|
66
|
-
resolver.validate_all(concepts, extractor: extractor)
|
|
67
|
-
end
|
|
68
64
|
end
|
|
69
65
|
end
|
|
@@ -4,7 +4,7 @@ require "zip"
|
|
|
4
4
|
|
|
5
5
|
module Glossarist
|
|
6
6
|
class GcrValidator
|
|
7
|
-
def validate(zip_path)
|
|
7
|
+
def validate(zip_path)
|
|
8
8
|
result = ValidationResult.new
|
|
9
9
|
|
|
10
10
|
unless File.exist?(zip_path)
|
|
@@ -13,123 +13,48 @@ module Glossarist
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
begin
|
|
16
|
-
Zip::File.open(zip_path)
|
|
17
|
-
validate_zip_contents(zip_file, result)
|
|
18
|
-
end
|
|
16
|
+
zip_entries = Zip::File.open(zip_path) { |zf| zf.entries.to_set(&:name) }
|
|
19
17
|
rescue StandardError => e
|
|
20
18
|
result.add_error("Failed to read ZIP: #{e.message}")
|
|
19
|
+
return result
|
|
21
20
|
end
|
|
22
21
|
|
|
23
|
-
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
private
|
|
27
|
-
|
|
28
|
-
def validate_zip_contents(zip_file, result) # rubocop:disable Metrics/AbcSize
|
|
29
|
-
unless zip_file.find_entry("metadata.yaml")
|
|
22
|
+
unless zip_entries.include?("metadata.yaml")
|
|
30
23
|
result.add_error("Missing metadata.yaml")
|
|
31
|
-
return
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
metadata = GcrMetadata.from_yaml(
|
|
35
|
-
zip_file.find_entry("metadata.yaml").get_input_stream.read,
|
|
36
|
-
)
|
|
37
|
-
validate_metadata(metadata, result)
|
|
38
|
-
|
|
39
|
-
concept_entries = zip_file.entries.select do |e|
|
|
40
|
-
e.name.start_with?("concepts/") && e.name.end_with?(".yaml")
|
|
41
|
-
end
|
|
42
|
-
if concept_entries.empty?
|
|
43
|
-
result.add_error("No concept files found in concepts/")
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
concept_entries.each do |entry|
|
|
47
|
-
validate_concept_entry(entry, metadata, result)
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
validate_assets(zip_file, result)
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
def validate_metadata(metadata, result)
|
|
54
|
-
unless metadata&.concept_count
|
|
55
|
-
result.add_error("metadata.yaml missing required fields (concept_count)")
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
unless metadata&.shortname
|
|
59
|
-
result.add_error("metadata.yaml missing shortname")
|
|
24
|
+
return result
|
|
60
25
|
end
|
|
61
26
|
|
|
62
|
-
|
|
63
|
-
|
|
27
|
+
begin
|
|
28
|
+
context = Validation::Rules::GcrContext.new(zip_path)
|
|
29
|
+
rescue StandardError => e
|
|
30
|
+
result.add_error("Failed to load GCR: #{e.message}")
|
|
31
|
+
return result
|
|
64
32
|
end
|
|
65
|
-
end
|
|
66
33
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
result.add_error("#{entry.name}: invalid YAML at line #{e.line}: #{e.message}")
|
|
72
|
-
rescue StandardError => e
|
|
73
|
-
result.add_error("#{entry.name}: parse error: #{e.message}")
|
|
74
|
-
else
|
|
75
|
-
concept = doc.concept
|
|
76
|
-
unless concept&.data&.id
|
|
77
|
-
result.add_error("#{entry.name}: document 0 missing data.identifier")
|
|
78
|
-
end
|
|
34
|
+
# Collection-level rules (metadata, structure, integrity)
|
|
35
|
+
collection_rules = Validation::Rules::Registry.for_scope(:collection)
|
|
36
|
+
collection_rules.each do |rule|
|
|
37
|
+
next unless rule.applicable?(context)
|
|
79
38
|
|
|
80
|
-
|
|
81
|
-
if localizations.empty?
|
|
82
|
-
result.add_error("#{entry.name}: expected at least 1 localization document")
|
|
83
|
-
else
|
|
84
|
-
localizations.each_with_index do |l10n, idx|
|
|
85
|
-
unless l10n&.language_code
|
|
86
|
-
result.add_error("#{entry.name}: document #{idx + 1} missing data.language_code")
|
|
87
|
-
end
|
|
88
|
-
end
|
|
39
|
+
rule.check(context).each { |i| result.add_issue(i) }
|
|
89
40
|
end
|
|
90
41
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
42
|
+
# Per-concept rules
|
|
43
|
+
concept_rules = Validation::Rules::Registry.for_scope(:concept)
|
|
44
|
+
context.concepts.each_with_index do |concept, idx|
|
|
45
|
+
fname = concept.data&.id ? "concepts/#{concept.data.id}.yaml" : "concepts/concept-#{idx}.yaml"
|
|
46
|
+
concept_context = Validation::Rules::ConceptContext.new(
|
|
47
|
+
concept, file_name: fname, collection_context: context
|
|
48
|
+
)
|
|
98
49
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
end
|
|
102
|
-
end
|
|
50
|
+
concept_rules.each do |rule|
|
|
51
|
+
next unless rule.applicable?(concept_context)
|
|
103
52
|
|
|
104
|
-
|
|
105
|
-
GcrPackage::DATASET_ASSETS.each do |asset|
|
|
106
|
-
case asset[:type]
|
|
107
|
-
when :file
|
|
108
|
-
validate_file_asset_entry(zip_file, asset[:path], result)
|
|
109
|
-
when :directory
|
|
110
|
-
validate_directory_asset(zip_file, asset[:path], result)
|
|
53
|
+
rule.check(concept_context).each { |i| result.add_issue(i) }
|
|
111
54
|
end
|
|
112
55
|
end
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
def validate_file_asset_entry(zip_file, path, result)
|
|
116
|
-
entry = zip_file.find_entry(path)
|
|
117
|
-
return unless entry
|
|
118
|
-
|
|
119
|
-
YAML.safe_load(entry.get_input_stream.read)
|
|
120
|
-
rescue Psych::SyntaxError => e
|
|
121
|
-
result.add_error("#{path}: invalid YAML at line #{e.line}: #{e.message}")
|
|
122
|
-
end
|
|
123
56
|
|
|
124
|
-
|
|
125
|
-
dir_entries = zip_file.entries.select do |e|
|
|
126
|
-
e.name.start_with?("#{dir_path}/")
|
|
127
|
-
end
|
|
128
|
-
return unless dir_entries.any? && dir_entries.all? do |e|
|
|
129
|
-
e.name.end_with?("/")
|
|
130
|
-
end
|
|
131
|
-
|
|
132
|
-
result.add_warning("#{dir_path}/ directory is empty")
|
|
57
|
+
result
|
|
133
58
|
end
|
|
134
59
|
end
|
|
135
60
|
end
|
|
@@ -152,6 +152,65 @@ module Glossarist
|
|
|
152
152
|
|
|
153
153
|
LANG_CODES = Glossarist::LANG_CODES
|
|
154
154
|
|
|
155
|
+
# Extract asset references from model attributes (NonVerbRep, GraphicalSymbol).
|
|
156
|
+
def extract_asset_refs_from_concept(concept)
|
|
157
|
+
refs = []
|
|
158
|
+
|
|
159
|
+
concept.localizations.each do |l10n|
|
|
160
|
+
nvr = l10n.non_verb_rep
|
|
161
|
+
if nvr.is_a?(String) && !nvr.strip.empty?
|
|
162
|
+
nvr.strip.split.each do |p|
|
|
163
|
+
refs << AssetReference.new(path: p) unless p.empty?
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
(l10n.data&.terms || []).each do |term|
|
|
168
|
+
if term.is_a?(Designation::GraphicalSymbol) && term.image && !term.image.strip.empty?
|
|
169
|
+
refs << AssetReference.new(path: term.image.strip)
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
refs
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Extract bibliographic xrefs from model-level source citations.
|
|
178
|
+
def extract_bib_refs_from_concept(concept)
|
|
179
|
+
refs = []
|
|
180
|
+
concept.localizations.each do |l10n|
|
|
181
|
+
gather_all_sources(l10n).each do |source|
|
|
182
|
+
origin = source.origin
|
|
183
|
+
next unless origin
|
|
184
|
+
|
|
185
|
+
if origin.text && !origin.text.strip.empty?
|
|
186
|
+
refs << BibliographicReference.new(anchor: origin.text)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
next unless origin.source && origin.id
|
|
190
|
+
|
|
191
|
+
key = "#{origin.source} #{origin.id}"
|
|
192
|
+
refs << BibliographicReference.new(anchor: key)
|
|
193
|
+
refs << BibliographicReference.new(anchor: origin.id.to_s)
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
refs
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Extract all reference types from a managed concept.
|
|
200
|
+
def extract_all_from_managed_concept(concept)
|
|
201
|
+
concept_refs = extract_from_managed_concept(concept)
|
|
202
|
+
asset_refs = extract_asset_refs_from_concept(concept)
|
|
203
|
+
concept_refs + asset_refs
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def resolve_asciidoc_xref(target)
|
|
207
|
+
BibliographicReference.new(anchor: target.strip)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def resolve_image_ref(path)
|
|
211
|
+
AssetReference.new(path: path.strip)
|
|
212
|
+
end
|
|
213
|
+
|
|
155
214
|
private
|
|
156
215
|
|
|
157
216
|
def gather_texts(lc_hash)
|
|
@@ -170,16 +229,7 @@ module Glossarist
|
|
|
170
229
|
|
|
171
230
|
def deduplicate(refs)
|
|
172
231
|
seen = Set.new
|
|
173
|
-
refs.select
|
|
174
|
-
key = if ref.concept_id
|
|
175
|
-
[ref.source,
|
|
176
|
-
ref.concept_id]
|
|
177
|
-
else
|
|
178
|
-
[ref.source, ref.concept_id,
|
|
179
|
-
ref.term]
|
|
180
|
-
end
|
|
181
|
-
seen.add?(key)
|
|
182
|
-
end
|
|
232
|
+
refs.select { |ref| seen.add?(ref.dedup_key) }
|
|
183
233
|
end
|
|
184
234
|
|
|
185
235
|
def extract_term_id_from_urn_tail(tail)
|
|
@@ -212,6 +262,18 @@ module Glossarist
|
|
|
212
262
|
regex: /\{\{([^}]+)\}\}/,
|
|
213
263
|
) { |ext, content| ext.resolve_mention(content) }
|
|
214
264
|
|
|
265
|
+
# AsciiDoc cross-references: <<anchor>> or <<anchor,display text>>
|
|
266
|
+
register_pattern(
|
|
267
|
+
name: :asciidoc_xref,
|
|
268
|
+
regex: /<<([^,>\n]+?)(?:,[^>\n]*)?>>/,
|
|
269
|
+
) { |ext, target| ext.resolve_asciidoc_xref(target) }
|
|
270
|
+
|
|
271
|
+
# Image references: image::path[] or image:path[]
|
|
272
|
+
register_pattern(
|
|
273
|
+
name: :asciidoc_image,
|
|
274
|
+
regex: /image::?([^\[\]]+)\[/,
|
|
275
|
+
) { |ext, path| ext.resolve_image_ref(path) }
|
|
276
|
+
|
|
215
277
|
register_identifier_resolver("urn:iec:std:iec:60050") do |ext, identifier, display|
|
|
216
278
|
ext.resolve_iec_urn(identifier, display)
|
|
217
279
|
end
|
|
@@ -223,5 +285,13 @@ module Glossarist
|
|
|
223
285
|
register_identifier_resolver("urn:") do |ext, identifier, display|
|
|
224
286
|
ext.resolve_generic_urn(identifier, display)
|
|
225
287
|
end
|
|
288
|
+
|
|
289
|
+
def gather_all_sources(l10n)
|
|
290
|
+
sources = Array(l10n.data&.sources)
|
|
291
|
+
sources += Array((l10n.data&.definition || []).flat_map(&:sources).compact)
|
|
292
|
+
sources += Array((l10n.data&.notes || []).flat_map(&:sources).compact)
|
|
293
|
+
sources += Array((l10n.data&.examples || []).flat_map(&:sources).compact)
|
|
294
|
+
sources
|
|
295
|
+
end
|
|
226
296
|
end
|
|
227
297
|
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
require "zip"
|
|
5
|
+
|
|
6
|
+
module Glossarist
|
|
7
|
+
module Validation
|
|
8
|
+
class AssetIndex
|
|
9
|
+
IMAGE_TERMS = %w[id ref text anchor].freeze
|
|
10
|
+
private_constant :IMAGE_TERMS
|
|
11
|
+
|
|
12
|
+
attr_reader :paths
|
|
13
|
+
|
|
14
|
+
def initialize
|
|
15
|
+
@paths = Set.new
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def register(path)
|
|
19
|
+
@paths.add(normalize_path(path))
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def resolve?(path)
|
|
23
|
+
@paths.include?(normalize_path(path))
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def each_path(&block)
|
|
27
|
+
@paths.each(&block)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def self.build_from_directory(dataset_path)
|
|
31
|
+
index = new
|
|
32
|
+
index_image_files(index, dataset_path)
|
|
33
|
+
index_model_assets(index, dataset_path)
|
|
34
|
+
index
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def self.build_from_zip(zip_path)
|
|
38
|
+
index = new
|
|
39
|
+
index_zip_images(index, zip_path)
|
|
40
|
+
index_zip_concept_assets(index, zip_path)
|
|
41
|
+
index
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def normalize_path(path)
|
|
47
|
+
path.to_s.delete_prefix("/")
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
class << self
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def index_image_files(index, dataset_path)
|
|
54
|
+
images_dir = File.join(dataset_path, "images")
|
|
55
|
+
return unless File.directory?(images_dir)
|
|
56
|
+
|
|
57
|
+
base = File.expand_path(dataset_path)
|
|
58
|
+
Dir.glob(File.join(images_dir, "**", "*")).each do |file|
|
|
59
|
+
next unless File.file?(file)
|
|
60
|
+
|
|
61
|
+
relative = file.sub("#{base}/", "")
|
|
62
|
+
index.register(relative)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def index_model_assets(index, dataset_path)
|
|
67
|
+
concepts = ConceptCollector.collect(dataset_path)
|
|
68
|
+
index_concept_assets(index, concepts)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def index_zip_images(index, zip_path)
|
|
72
|
+
Zip::File.open(zip_path) do |zf|
|
|
73
|
+
zf.entries.each do |entry|
|
|
74
|
+
next if entry.name.end_with?("/")
|
|
75
|
+
next unless entry.name.start_with?("images/")
|
|
76
|
+
|
|
77
|
+
index.register(entry.name)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def index_zip_concept_assets(index, zip_path)
|
|
83
|
+
pkg = GcrPackage.load(zip_path)
|
|
84
|
+
index_concept_assets(index, pkg.concepts)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def index_concept_assets(index, concepts)
|
|
88
|
+
concepts.each do |concept|
|
|
89
|
+
concept.localizations.each do |l10n|
|
|
90
|
+
register_non_verb_rep(index, l10n)
|
|
91
|
+
register_graphical_symbols(index, l10n)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def register_non_verb_rep(index, l10n)
|
|
97
|
+
nvr = l10n.non_verb_rep
|
|
98
|
+
return unless nvr.is_a?(String) && !nvr.strip.empty?
|
|
99
|
+
|
|
100
|
+
nvr.strip.split.each { |p| index.register(p) }
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def register_graphical_symbols(index, l10n)
|
|
104
|
+
(l10n.data&.terms || []).each do |term|
|
|
105
|
+
next unless term.is_a?(Designation::GraphicalSymbol) && term.image
|
|
106
|
+
|
|
107
|
+
index.register(term.image)
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Validation
|
|
5
|
+
class BibliographyIndex
|
|
6
|
+
BIB_ENTRY_KEYS = %w[id ref text anchor].freeze
|
|
7
|
+
private_constant :BIB_ENTRY_KEYS
|
|
8
|
+
|
|
9
|
+
attr_reader :entries
|
|
10
|
+
|
|
11
|
+
def initialize
|
|
12
|
+
@entries = {}
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def register(anchor, source = nil)
|
|
16
|
+
@entries[normalize_anchor(anchor)] = { anchor: anchor, source: source }
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def resolve?(anchor)
|
|
20
|
+
@entries.key?(normalize_anchor(anchor))
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def anchors
|
|
24
|
+
@entries.keys
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def each_entry(&block)
|
|
28
|
+
@entries.each_value(&block)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.build_from_concepts(concepts, dataset_path: nil,
|
|
32
|
+
bibliography_yaml: nil)
|
|
33
|
+
index = new
|
|
34
|
+
|
|
35
|
+
concepts.each { |concept| index_concept_sources(index, concept) }
|
|
36
|
+
|
|
37
|
+
yaml = bibliography_yaml || read_bibliography_file(dataset_path)
|
|
38
|
+
index_bibliography_yaml(index, yaml) if yaml
|
|
39
|
+
|
|
40
|
+
index
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def normalize_anchor(anchor)
|
|
46
|
+
anchor.to_s.gsub(/[ \/:]/, "_").gsub(/__+/, "_")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
class << self
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def read_bibliography_file(dataset_path)
|
|
53
|
+
return nil unless dataset_path
|
|
54
|
+
|
|
55
|
+
bib_path = File.join(dataset_path, "bibliography.yaml")
|
|
56
|
+
File.exist?(bib_path) ? File.read(bib_path) : nil
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def index_concept_sources(index, concept)
|
|
60
|
+
concept.localizations.each do |l10n|
|
|
61
|
+
index_l10n_sources(index, l10n)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def index_l10n_sources(index, l10n)
|
|
66
|
+
data = l10n.data
|
|
67
|
+
return unless data
|
|
68
|
+
|
|
69
|
+
register_source_collection(index, data.sources)
|
|
70
|
+
register_source_collection(index,
|
|
71
|
+
data.definition&.flat_map(&:sources))
|
|
72
|
+
register_source_collection(index, data.notes&.flat_map(&:sources))
|
|
73
|
+
register_source_collection(index, data.examples&.flat_map(&:sources))
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def register_source_collection(index, sources)
|
|
77
|
+
Array(sources).compact.each { |s| register_source(index, s) }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def register_source(index, source)
|
|
81
|
+
origin = source.origin
|
|
82
|
+
return unless origin
|
|
83
|
+
|
|
84
|
+
register_origin_text(index, origin)
|
|
85
|
+
register_origin_ref(index, origin)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def register_origin_text(index, origin)
|
|
89
|
+
return unless origin.text && !origin.text.strip.empty?
|
|
90
|
+
|
|
91
|
+
index.register(origin.text, origin)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def register_origin_ref(index, origin)
|
|
95
|
+
return unless origin.source && origin.id
|
|
96
|
+
|
|
97
|
+
key = "#{origin.source} #{origin.id}"
|
|
98
|
+
index.register(key, origin)
|
|
99
|
+
index.register(origin.id.to_s, origin)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def index_bibliography_yaml(index, yaml_content)
|
|
103
|
+
data = YAML.safe_load(yaml_content)
|
|
104
|
+
return unless data.is_a?(Hash) || data.is_a?(Array)
|
|
105
|
+
|
|
106
|
+
entries = data.is_a?(Hash) ? data.values : data
|
|
107
|
+
entries.each do |entry|
|
|
108
|
+
next unless entry.is_a?(Hash)
|
|
109
|
+
|
|
110
|
+
BIB_ENTRY_KEYS.each do |key|
|
|
111
|
+
val = entry[key]
|
|
112
|
+
index.register(val.to_s, entry) if val && !val.to_s.strip.empty?
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
rescue Psych::SyntaxError, Psych::DisallowedClass
|
|
116
|
+
nil
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Validation
|
|
5
|
+
module Rules
|
|
6
|
+
class AsciidocXrefRule < Base
|
|
7
|
+
def code = "GLS-102"
|
|
8
|
+
def category = :references
|
|
9
|
+
def severity = "warning"
|
|
10
|
+
def scope = :concept
|
|
11
|
+
|
|
12
|
+
def applicable?(context)
|
|
13
|
+
context.concept.localizations&.any?
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def check(context)
|
|
17
|
+
concept = context.concept
|
|
18
|
+
fname = context.file_name
|
|
19
|
+
extractor = ReferenceExtractor.new
|
|
20
|
+
issues = []
|
|
21
|
+
|
|
22
|
+
concept.localizations.each do |l10n|
|
|
23
|
+
lang = l10n.language_code || "unknown"
|
|
24
|
+
texts = extract_texts(l10n)
|
|
25
|
+
|
|
26
|
+
texts.each do |text|
|
|
27
|
+
next unless text
|
|
28
|
+
refs = extractor.extract_from_text(text)
|
|
29
|
+
refs.each do |ref|
|
|
30
|
+
next unless ref.is_a?(BibliographicReference)
|
|
31
|
+
next if context.bibliography_index.resolve?(ref.anchor)
|
|
32
|
+
|
|
33
|
+
issues << issue(
|
|
34
|
+
"unresolved bibliography reference <<#{ref.anchor}>>",
|
|
35
|
+
code: code, severity: severity,
|
|
36
|
+
location: "#{fname}/#{lang}",
|
|
37
|
+
suggestion: "add '#{ref.anchor}' as a source, " \
|
|
38
|
+
"or verify it exists in bibliography.yaml",
|
|
39
|
+
)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
issues
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def extract_texts(l10n)
|
|
50
|
+
texts = []
|
|
51
|
+
(l10n.data&.definition || []).each { |d| texts << d.content if d.content }
|
|
52
|
+
(l10n.data&.notes || []).each { |n| texts << n.content if n.content }
|
|
53
|
+
(l10n.data&.examples || []).each { |e| texts << e.content if e.content }
|
|
54
|
+
texts
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|