glossarist 2.6.4 → 2.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/release.yml +1 -4
- data/.rubocop_todo.yml +25 -74
- data/CLAUDE.md +27 -2
- data/Gemfile +0 -2
- data/README.adoc +650 -29
- data/config.yml +68 -1
- data/glossarist.gemspec +1 -1
- data/lib/glossarist/asset_reference.rb +16 -0
- data/lib/glossarist/bibliographic_reference.rb +16 -0
- data/lib/glossarist/concept.rb +1 -1
- data/lib/glossarist/concept_data.rb +4 -0
- data/lib/glossarist/concept_enricher.rb +1 -0
- data/lib/glossarist/concept_reference.rb +14 -17
- data/lib/glossarist/concept_validator.rb +27 -56
- data/lib/glossarist/dataset_validator.rb +30 -34
- data/lib/glossarist/designation/abbreviation.rb +0 -2
- data/lib/glossarist/designation/base.rb +21 -1
- data/lib/glossarist/designation/expression.rb +3 -0
- data/lib/glossarist/designation/letter_symbol.rb +0 -4
- data/lib/glossarist/designation/symbol.rb +0 -2
- data/lib/glossarist/gcr_validator.rb +26 -101
- data/lib/glossarist/glossary_definition.rb +5 -0
- data/lib/glossarist/managed_concept_data.rb +21 -2
- data/lib/glossarist/non_verb_rep.rb +21 -6
- data/lib/glossarist/pronunciation.rb +32 -0
- data/lib/glossarist/reference_extractor.rb +78 -10
- data/lib/glossarist/reference_resolver.rb +1 -0
- data/lib/glossarist/urn_resolver.rb +13 -1
- data/lib/glossarist/v1/concept.rb +7 -0
- data/lib/glossarist/validation/asset_index.rb +114 -0
- data/lib/glossarist/validation/bibliography_index.rb +121 -0
- data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +60 -0
- data/lib/glossarist/validation/rules/authoritative_source_rule.rb +47 -0
- data/lib/glossarist/validation/rules/base.rb +46 -0
- data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +37 -0
- data/lib/glossarist/validation/rules/citation_completeness_rule.rb +63 -0
- data/lib/glossarist/validation/rules/concept_context.rb +45 -0
- data/lib/glossarist/validation/rules/concept_count_rule.rb +34 -0
- data/lib/glossarist/validation/rules/concept_id_rule.rb +29 -0
- data/lib/glossarist/validation/rules/concept_id_uniqueness_rule.rb +42 -0
- data/lib/glossarist/validation/rules/concept_mention_rule.rb +44 -0
- data/lib/glossarist/validation/rules/concept_status_rule.rb +36 -0
- data/lib/glossarist/validation/rules/concept_uri_rule.rb +30 -0
- data/lib/glossarist/validation/rules/dataset_context.rb +99 -0
- data/lib/glossarist/validation/rules/date_type_rule.rb +54 -0
- data/lib/glossarist/validation/rules/date_validity_rule.rb +66 -0
- data/lib/glossarist/validation/rules/definition_content_rule.rb +41 -0
- data/lib/glossarist/validation/rules/designation_status_rule.rb +45 -0
- data/lib/glossarist/validation/rules/designation_type_rule.rb +55 -0
- data/lib/glossarist/validation/rules/duplicate_term_rule.rb +63 -0
- data/lib/glossarist/validation/rules/entry_status_rule.rb +39 -0
- data/lib/glossarist/validation/rules/filename_id_rule.rb +35 -0
- data/lib/glossarist/validation/rules/gcr_context.rb +92 -0
- data/lib/glossarist/validation/rules/image_reference_rule.rb +73 -0
- data/lib/glossarist/validation/rules/l10n_uuid_integrity_rule.rb +40 -0
- data/lib/glossarist/validation/rules/language_code_format_rule.rb +39 -0
- data/lib/glossarist/validation/rules/language_coverage_rule.rb +37 -0
- data/lib/glossarist/validation/rules/language_list_rule.rb +46 -0
- data/lib/glossarist/validation/rules/localization_presence_rule.rb +25 -0
- data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +64 -0
- data/lib/glossarist/validation/rules/orphaned_images_rule.rb +68 -0
- data/lib/glossarist/validation/rules/orphaned_l10n_files_rule.rb +39 -0
- data/lib/glossarist/validation/rules/preferred_term_rule.rb +41 -0
- data/lib/glossarist/validation/rules/registry.rb +42 -0
- data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +102 -0
- data/lib/glossarist/validation/rules/related_concept_rule.rb +40 -0
- data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +87 -0
- data/lib/glossarist/validation/rules/source_type_rule.rb +63 -0
- data/lib/glossarist/validation/rules/terms_presence_rule.rb +39 -0
- data/lib/glossarist/validation/rules.rb +85 -0
- data/lib/glossarist/validation/validation_issue.rb +39 -0
- data/lib/glossarist/validation.rb +12 -0
- data/lib/glossarist/validation_result.rb +26 -9
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +4 -0
- metadata +62 -16
|
@@ -4,7 +4,7 @@ require "zip"
|
|
|
4
4
|
|
|
5
5
|
module Glossarist
|
|
6
6
|
class GcrValidator
|
|
7
|
-
def validate(zip_path)
|
|
7
|
+
def validate(zip_path)
|
|
8
8
|
result = ValidationResult.new
|
|
9
9
|
|
|
10
10
|
unless File.exist?(zip_path)
|
|
@@ -13,123 +13,48 @@ module Glossarist
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
begin
|
|
16
|
-
Zip::File.open(zip_path)
|
|
17
|
-
validate_zip_contents(zip_file, result)
|
|
18
|
-
end
|
|
16
|
+
zip_entries = Zip::File.open(zip_path) { |zf| zf.entries.to_set(&:name) }
|
|
19
17
|
rescue StandardError => e
|
|
20
18
|
result.add_error("Failed to read ZIP: #{e.message}")
|
|
19
|
+
return result
|
|
21
20
|
end
|
|
22
21
|
|
|
23
|
-
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
private
|
|
27
|
-
|
|
28
|
-
def validate_zip_contents(zip_file, result) # rubocop:disable Metrics/AbcSize
|
|
29
|
-
unless zip_file.find_entry("metadata.yaml")
|
|
22
|
+
unless zip_entries.include?("metadata.yaml")
|
|
30
23
|
result.add_error("Missing metadata.yaml")
|
|
31
|
-
return
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
metadata = GcrMetadata.from_yaml(
|
|
35
|
-
zip_file.find_entry("metadata.yaml").get_input_stream.read,
|
|
36
|
-
)
|
|
37
|
-
validate_metadata(metadata, result)
|
|
38
|
-
|
|
39
|
-
concept_entries = zip_file.entries.select do |e|
|
|
40
|
-
e.name.start_with?("concepts/") && e.name.end_with?(".yaml")
|
|
41
|
-
end
|
|
42
|
-
if concept_entries.empty?
|
|
43
|
-
result.add_error("No concept files found in concepts/")
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
concept_entries.each do |entry|
|
|
47
|
-
validate_concept_entry(entry, metadata, result)
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
validate_assets(zip_file, result)
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
def validate_metadata(metadata, result)
|
|
54
|
-
unless metadata&.concept_count
|
|
55
|
-
result.add_error("metadata.yaml missing required fields (concept_count)")
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
unless metadata&.shortname
|
|
59
|
-
result.add_error("metadata.yaml missing shortname")
|
|
24
|
+
return result
|
|
60
25
|
end
|
|
61
26
|
|
|
62
|
-
|
|
63
|
-
|
|
27
|
+
begin
|
|
28
|
+
context = Validation::Rules::GcrContext.new(zip_path)
|
|
29
|
+
rescue StandardError => e
|
|
30
|
+
result.add_error("Failed to load GCR: #{e.message}")
|
|
31
|
+
return result
|
|
64
32
|
end
|
|
65
|
-
end
|
|
66
33
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
result.add_error("#{entry.name}: invalid YAML at line #{e.line}: #{e.message}")
|
|
72
|
-
rescue StandardError => e
|
|
73
|
-
result.add_error("#{entry.name}: parse error: #{e.message}")
|
|
74
|
-
else
|
|
75
|
-
concept = doc.concept
|
|
76
|
-
unless concept&.data&.id
|
|
77
|
-
result.add_error("#{entry.name}: document 0 missing data.identifier")
|
|
78
|
-
end
|
|
34
|
+
# Collection-level rules (metadata, structure, integrity)
|
|
35
|
+
collection_rules = Validation::Rules::Registry.for_scope(:collection)
|
|
36
|
+
collection_rules.each do |rule|
|
|
37
|
+
next unless rule.applicable?(context)
|
|
79
38
|
|
|
80
|
-
|
|
81
|
-
if localizations.empty?
|
|
82
|
-
result.add_error("#{entry.name}: expected at least 1 localization document")
|
|
83
|
-
else
|
|
84
|
-
localizations.each_with_index do |l10n, idx|
|
|
85
|
-
unless l10n&.language_code
|
|
86
|
-
result.add_error("#{entry.name}: document #{idx + 1} missing data.language_code")
|
|
87
|
-
end
|
|
88
|
-
end
|
|
39
|
+
rule.check(context).each { |i| result.add_issue(i) }
|
|
89
40
|
end
|
|
90
41
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
42
|
+
# Per-concept rules
|
|
43
|
+
concept_rules = Validation::Rules::Registry.for_scope(:concept)
|
|
44
|
+
context.concepts.each_with_index do |concept, idx|
|
|
45
|
+
fname = concept.data&.id ? "concepts/#{concept.data.id}.yaml" : "concepts/concept-#{idx}.yaml"
|
|
46
|
+
concept_context = Validation::Rules::ConceptContext.new(
|
|
47
|
+
concept, file_name: fname, collection_context: context
|
|
48
|
+
)
|
|
98
49
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
end
|
|
102
|
-
end
|
|
50
|
+
concept_rules.each do |rule|
|
|
51
|
+
next unless rule.applicable?(concept_context)
|
|
103
52
|
|
|
104
|
-
|
|
105
|
-
GcrPackage::DATASET_ASSETS.each do |asset|
|
|
106
|
-
case asset[:type]
|
|
107
|
-
when :file
|
|
108
|
-
validate_file_asset_entry(zip_file, asset[:path], result)
|
|
109
|
-
when :directory
|
|
110
|
-
validate_directory_asset(zip_file, asset[:path], result)
|
|
53
|
+
rule.check(concept_context).each { |i| result.add_issue(i) }
|
|
111
54
|
end
|
|
112
55
|
end
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
def validate_file_asset_entry(zip_file, path, result)
|
|
116
|
-
entry = zip_file.find_entry(path)
|
|
117
|
-
return unless entry
|
|
118
|
-
|
|
119
|
-
YAML.safe_load(entry.get_input_stream.read)
|
|
120
|
-
rescue Psych::SyntaxError => e
|
|
121
|
-
result.add_error("#{path}: invalid YAML at line #{e.line}: #{e.message}")
|
|
122
|
-
end
|
|
123
56
|
|
|
124
|
-
|
|
125
|
-
dir_entries = zip_file.entries.select do |e|
|
|
126
|
-
e.name.start_with?("#{dir_path}/")
|
|
127
|
-
end
|
|
128
|
-
return unless dir_entries.any? && dir_entries.all? do |e|
|
|
129
|
-
e.name.end_with?("/")
|
|
130
|
-
end
|
|
131
|
-
|
|
132
|
-
result.add_warning("#{dir_path}/ directory is empty")
|
|
57
|
+
result
|
|
133
58
|
end
|
|
134
59
|
end
|
|
135
60
|
end
|
|
@@ -28,5 +28,10 @@ module Glossarist
|
|
|
28
28
|
CONCEPT_DATE_TYPES = config.dig("concept_date", "type").freeze
|
|
29
29
|
|
|
30
30
|
CONCEPT_STATUSES = config.dig("concept", "status").freeze
|
|
31
|
+
|
|
32
|
+
DESIGNATION_RELATIONSHIP_TYPES = config.dig("designation",
|
|
33
|
+
"relationship_type")&.freeze
|
|
34
|
+
|
|
35
|
+
ISO12620_TERM_TYPES = config.dig("iso12620", "term_type").freeze
|
|
31
36
|
end
|
|
32
37
|
end
|
|
@@ -5,7 +5,7 @@ module Glossarist
|
|
|
5
5
|
attribute :id, :string
|
|
6
6
|
attribute :uri, :string
|
|
7
7
|
attribute :localized_concepts, :hash
|
|
8
|
-
attribute :
|
|
8
|
+
attribute :domains, ConceptReference, collection: true
|
|
9
9
|
attribute :sources, ConceptSource, collection: true
|
|
10
10
|
attribute :localizations, LocalizedConcept,
|
|
11
11
|
collection: Collections::LocalizationCollection,
|
|
@@ -16,7 +16,8 @@ module Glossarist
|
|
|
16
16
|
with: { to: :id_to_yaml, from: :id_from_yaml }
|
|
17
17
|
map :uri, to: :uri
|
|
18
18
|
map %i[localized_concepts localizedConcepts], to: :localized_concepts
|
|
19
|
-
map
|
|
19
|
+
map %i[domains groups], to: :domains,
|
|
20
|
+
with: { from: :domains_from_yaml, to: :domains_to_yaml }
|
|
20
21
|
map :sources, to: :sources
|
|
21
22
|
map :localizations, to: :localizations,
|
|
22
23
|
with: { from: :localizations_from_yaml, to: :localizations_to_yaml }
|
|
@@ -41,6 +42,24 @@ module Glossarist
|
|
|
41
42
|
|
|
42
43
|
def localizations_to_yaml(model, doc); end
|
|
43
44
|
|
|
45
|
+
def domains_from_yaml(model, value)
|
|
46
|
+
return unless value.is_a?(Array)
|
|
47
|
+
|
|
48
|
+
model.domains = value.map do |item|
|
|
49
|
+
if item.is_a?(Hash)
|
|
50
|
+
ConceptReference.of_yaml(item)
|
|
51
|
+
else
|
|
52
|
+
ConceptReference.new(concept_id: item.to_s, ref_type: "domain")
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def domains_to_yaml(model, doc)
|
|
58
|
+
return if model.domains.nil? || model.domains.empty?
|
|
59
|
+
|
|
60
|
+
doc["domains"] = model.domains.map(&:to_hash)
|
|
61
|
+
end
|
|
62
|
+
|
|
44
63
|
def authoritative_source
|
|
45
64
|
return [] unless sources
|
|
46
65
|
|
|
@@ -1,14 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Glossarist
|
|
4
|
+
# A non-verbal representation used to help define a concept, following
|
|
5
|
+
# ISO 10241-1 §6.5.
|
|
6
|
+
#
|
|
7
|
+
# Non-verbal representations are associated resources (images, tables,
|
|
8
|
+
# formulas) that live outside the concept model. They are referenced by URI
|
|
9
|
+
# and can be shared across concepts. The resource belongs either to the
|
|
10
|
+
# dataset package (relative path) or is externally referenced (URL/URN).
|
|
11
|
+
#
|
|
12
|
+
# Each non-verbal representation specifies:
|
|
13
|
+
# - +type+: one of "image", "table", "formula"
|
|
14
|
+
# - +ref+: URI reference to the resource (relative path, URN, or URL)
|
|
15
|
+
# - +text+: optional text description or alt text
|
|
16
|
+
# - +sources+: bibliographic sources for the representation
|
|
2
17
|
class NonVerbRep < Lutaml::Model::Serializable
|
|
3
|
-
attribute :
|
|
4
|
-
attribute :
|
|
5
|
-
attribute :
|
|
18
|
+
attribute :type, :string
|
|
19
|
+
attribute :ref, :string
|
|
20
|
+
attribute :text, :string
|
|
6
21
|
attribute :sources, ConceptSource, collection: true
|
|
7
22
|
|
|
8
23
|
key_value do
|
|
9
|
-
map :
|
|
10
|
-
map :
|
|
11
|
-
map :
|
|
24
|
+
map :type, to: :type
|
|
25
|
+
map :ref, to: :ref
|
|
26
|
+
map :text, to: :text
|
|
12
27
|
map :sources, to: :sources
|
|
13
28
|
end
|
|
14
29
|
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
module Glossarist
|
|
2
|
+
# A pronunciation or transcription of a designation, following ISO 24229
|
|
3
|
+
# spelling system conventions.
|
|
4
|
+
#
|
|
5
|
+
# Each pronunciation entry specifies the text of the pronunciation and the
|
|
6
|
+
# context in which it is expressed:
|
|
7
|
+
# - +language+ (ISO 639) identifies the language or dialect being pronounced
|
|
8
|
+
# - +script+ (ISO 15924) identifies the script used for the pronunciation text
|
|
9
|
+
# - +country+ (ISO 3166-1) identifies the country variant
|
|
10
|
+
# - +system+ identifies the transcription/romanization system used (ISO 24229
|
|
11
|
+
# conversion system code or a simple identifier like "IPA")
|
|
12
|
+
#
|
|
13
|
+
# A designation can have multiple pronunciations, e.g.:
|
|
14
|
+
# - IPA: { content: "toːkjoː", script: "Latn", language: "jpn", system: "IPA" }
|
|
15
|
+
# - Hepburn: { content: "Tōkyō", script: "Latn", language: "jpn", system: "Var:jpn-Hrkt:Latn:Hepburn-1886" }
|
|
16
|
+
# - Cyrillic: { content: "Токио", script: "Cyrl", language: "rus", system: "polivanov" }
|
|
17
|
+
class Pronunciation < Lutaml::Model::Serializable
|
|
18
|
+
attribute :content, :string
|
|
19
|
+
attribute :language, :string
|
|
20
|
+
attribute :script, :string
|
|
21
|
+
attribute :country, :string
|
|
22
|
+
attribute :system, :string
|
|
23
|
+
|
|
24
|
+
key_value do
|
|
25
|
+
map :content, to: :content
|
|
26
|
+
map :language, to: :language
|
|
27
|
+
map :script, to: :script
|
|
28
|
+
map :country, to: :country
|
|
29
|
+
map :system, to: :system
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -152,6 +152,63 @@ module Glossarist
|
|
|
152
152
|
|
|
153
153
|
LANG_CODES = Glossarist::LANG_CODES
|
|
154
154
|
|
|
155
|
+
# Extract asset references from model attributes (NonVerbRep, GraphicalSymbol).
|
|
156
|
+
def extract_asset_refs_from_concept(concept)
|
|
157
|
+
refs = []
|
|
158
|
+
|
|
159
|
+
concept.localizations.each do |l10n|
|
|
160
|
+
Array(l10n.non_verb_rep).each do |nvr|
|
|
161
|
+
next unless nvr.is_a?(NonVerbRep) && nvr.ref && !nvr.ref.strip.empty?
|
|
162
|
+
refs << AssetReference.new(path: nvr.ref.strip)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
(l10n.data&.terms || []).each do |term|
|
|
166
|
+
if term.is_a?(Designation::GraphicalSymbol) && term.image && !term.image.strip.empty?
|
|
167
|
+
refs << AssetReference.new(path: term.image.strip)
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
refs
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Extract bibliographic xrefs from model-level source citations.
|
|
176
|
+
def extract_bib_refs_from_concept(concept)
|
|
177
|
+
refs = []
|
|
178
|
+
concept.localizations.each do |l10n|
|
|
179
|
+
gather_all_sources(l10n).each do |source|
|
|
180
|
+
origin = source.origin
|
|
181
|
+
next unless origin
|
|
182
|
+
|
|
183
|
+
if origin.text && !origin.text.strip.empty?
|
|
184
|
+
refs << BibliographicReference.new(anchor: origin.text)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
next unless origin.source && origin.id
|
|
188
|
+
|
|
189
|
+
key = "#{origin.source} #{origin.id}"
|
|
190
|
+
refs << BibliographicReference.new(anchor: key)
|
|
191
|
+
refs << BibliographicReference.new(anchor: origin.id.to_s)
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
refs
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Extract all reference types from a managed concept.
|
|
198
|
+
def extract_all_from_managed_concept(concept)
|
|
199
|
+
concept_refs = extract_from_managed_concept(concept)
|
|
200
|
+
asset_refs = extract_asset_refs_from_concept(concept)
|
|
201
|
+
concept_refs + asset_refs
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def resolve_asciidoc_xref(target)
|
|
205
|
+
BibliographicReference.new(anchor: target.strip)
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def resolve_image_ref(path)
|
|
209
|
+
AssetReference.new(path: path.strip)
|
|
210
|
+
end
|
|
211
|
+
|
|
155
212
|
private
|
|
156
213
|
|
|
157
214
|
def gather_texts(lc_hash)
|
|
@@ -170,16 +227,7 @@ module Glossarist
|
|
|
170
227
|
|
|
171
228
|
def deduplicate(refs)
|
|
172
229
|
seen = Set.new
|
|
173
|
-
refs.select
|
|
174
|
-
key = if ref.concept_id
|
|
175
|
-
[ref.source,
|
|
176
|
-
ref.concept_id]
|
|
177
|
-
else
|
|
178
|
-
[ref.source, ref.concept_id,
|
|
179
|
-
ref.term]
|
|
180
|
-
end
|
|
181
|
-
seen.add?(key)
|
|
182
|
-
end
|
|
230
|
+
refs.select { |ref| seen.add?(ref.dedup_key) }
|
|
183
231
|
end
|
|
184
232
|
|
|
185
233
|
def extract_term_id_from_urn_tail(tail)
|
|
@@ -212,6 +260,18 @@ module Glossarist
|
|
|
212
260
|
regex: /\{\{([^}]+)\}\}/,
|
|
213
261
|
) { |ext, content| ext.resolve_mention(content) }
|
|
214
262
|
|
|
263
|
+
# AsciiDoc cross-references: <<anchor>> or <<anchor,display text>>
|
|
264
|
+
register_pattern(
|
|
265
|
+
name: :asciidoc_xref,
|
|
266
|
+
regex: /<<([^,>\n]+?)(?:,[^>\n]*)?>>/,
|
|
267
|
+
) { |ext, target| ext.resolve_asciidoc_xref(target) }
|
|
268
|
+
|
|
269
|
+
# Image references: image::path[] or image:path[]
|
|
270
|
+
register_pattern(
|
|
271
|
+
name: :asciidoc_image,
|
|
272
|
+
regex: /image::?([^\[\]]+)\[/,
|
|
273
|
+
) { |ext, path| ext.resolve_image_ref(path) }
|
|
274
|
+
|
|
215
275
|
register_identifier_resolver("urn:iec:std:iec:60050") do |ext, identifier, display|
|
|
216
276
|
ext.resolve_iec_urn(identifier, display)
|
|
217
277
|
end
|
|
@@ -223,5 +283,13 @@ module Glossarist
|
|
|
223
283
|
register_identifier_resolver("urn:") do |ext, identifier, display|
|
|
224
284
|
ext.resolve_generic_urn(identifier, display)
|
|
225
285
|
end
|
|
286
|
+
|
|
287
|
+
def gather_all_sources(l10n)
|
|
288
|
+
sources = Array(l10n.data&.sources)
|
|
289
|
+
sources += Array((l10n.data&.definition || []).flat_map(&:sources).compact)
|
|
290
|
+
sources += Array((l10n.data&.notes || []).flat_map(&:sources).compact)
|
|
291
|
+
sources += Array((l10n.data&.examples || []).flat_map(&:sources).compact)
|
|
292
|
+
sources
|
|
293
|
+
end
|
|
226
294
|
end
|
|
227
295
|
end
|
|
@@ -64,7 +64,19 @@ module Glossarist
|
|
|
64
64
|
def to_urn(urn_or_reference)
|
|
65
65
|
case urn_or_reference
|
|
66
66
|
when String then urn_or_reference
|
|
67
|
-
when ConceptReference then urn_or_reference
|
|
67
|
+
when ConceptReference then concept_reference_to_urn(urn_or_reference)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def concept_reference_to_urn(ref)
|
|
72
|
+
return ref.urn if ref.urn && !ref.urn.strip.empty?
|
|
73
|
+
return nil unless ref.external?
|
|
74
|
+
return nil unless ref.source && ref.concept_id
|
|
75
|
+
|
|
76
|
+
case ref.source
|
|
77
|
+
when /\Aurn:iec/ then "#{ref.source}-#{ref.concept_id}"
|
|
78
|
+
when /\Aurn:iso/ then "#{ref.source}:term:#{ref.concept_id}"
|
|
79
|
+
else "#{ref.source}/#{ref.concept_id}"
|
|
68
80
|
end
|
|
69
81
|
end
|
|
70
82
|
end
|
|
@@ -42,6 +42,7 @@ module Glossarist
|
|
|
42
42
|
mc.add_localization(LocalizedConcept.of_yaml({ "data" => data }))
|
|
43
43
|
end
|
|
44
44
|
|
|
45
|
+
assign_domains(mc) if groups.is_a?(Array) && groups.any?
|
|
45
46
|
assign_references(mc) if references.is_a?(Array) && references.any?
|
|
46
47
|
|
|
47
48
|
mc
|
|
@@ -49,6 +50,12 @@ module Glossarist
|
|
|
49
50
|
|
|
50
51
|
private
|
|
51
52
|
|
|
53
|
+
def assign_domains(concept)
|
|
54
|
+
concept.data.domains = groups.map do |g|
|
|
55
|
+
ConceptReference.new(concept_id: g.to_s, ref_type: "domain")
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
52
59
|
def assign_references(concept)
|
|
53
60
|
l10n = concept.localization("eng") || concept.localizations.values.first
|
|
54
61
|
return unless l10n
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
require "zip"
|
|
5
|
+
|
|
6
|
+
module Glossarist
|
|
7
|
+
module Validation
|
|
8
|
+
class AssetIndex
|
|
9
|
+
IMAGE_TERMS = %w[id ref text anchor].freeze
|
|
10
|
+
private_constant :IMAGE_TERMS
|
|
11
|
+
|
|
12
|
+
attr_reader :paths
|
|
13
|
+
|
|
14
|
+
def initialize
|
|
15
|
+
@paths = Set.new
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def register(path)
|
|
19
|
+
@paths.add(normalize_path(path))
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def resolve?(path)
|
|
23
|
+
@paths.include?(normalize_path(path))
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def each_path(&block)
|
|
27
|
+
@paths.each(&block)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def self.build_from_directory(dataset_path)
|
|
31
|
+
index = new
|
|
32
|
+
index_image_files(index, dataset_path)
|
|
33
|
+
index_model_assets(index, dataset_path)
|
|
34
|
+
index
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def self.build_from_zip(zip_path)
|
|
38
|
+
index = new
|
|
39
|
+
index_zip_images(index, zip_path)
|
|
40
|
+
index_zip_concept_assets(index, zip_path)
|
|
41
|
+
index
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def normalize_path(path)
|
|
47
|
+
path.to_s.delete_prefix("/")
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
class << self
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def index_image_files(index, dataset_path)
|
|
54
|
+
images_dir = File.join(dataset_path, "images")
|
|
55
|
+
return unless File.directory?(images_dir)
|
|
56
|
+
|
|
57
|
+
base = File.expand_path(dataset_path)
|
|
58
|
+
Dir.glob(File.join(images_dir, "**", "*")).each do |file|
|
|
59
|
+
next unless File.file?(file)
|
|
60
|
+
|
|
61
|
+
relative = file.sub("#{base}/", "")
|
|
62
|
+
index.register(relative)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def index_model_assets(index, dataset_path)
|
|
67
|
+
concepts = ConceptCollector.collect(dataset_path)
|
|
68
|
+
index_concept_assets(index, concepts)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def index_zip_images(index, zip_path)
|
|
72
|
+
Zip::File.open(zip_path) do |zf|
|
|
73
|
+
zf.entries.each do |entry|
|
|
74
|
+
next if entry.name.end_with?("/")
|
|
75
|
+
next unless entry.name.start_with?("images/")
|
|
76
|
+
|
|
77
|
+
index.register(entry.name)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def index_zip_concept_assets(index, zip_path)
|
|
83
|
+
pkg = GcrPackage.load(zip_path)
|
|
84
|
+
index_concept_assets(index, pkg.concepts)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def index_concept_assets(index, concepts)
|
|
88
|
+
concepts.each do |concept|
|
|
89
|
+
concept.localizations.each do |l10n|
|
|
90
|
+
register_non_verb_rep(index, l10n)
|
|
91
|
+
register_graphical_symbols(index, l10n)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def register_non_verb_rep(index, l10n)
|
|
97
|
+
Array(l10n.non_verb_rep).each do |nvr|
|
|
98
|
+
next unless nvr.is_a?(NonVerbRep) && nvr.ref && !nvr.ref.strip.empty?
|
|
99
|
+
|
|
100
|
+
index.register(nvr.ref.strip)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def register_graphical_symbols(index, l10n)
|
|
105
|
+
(l10n.data&.terms || []).each do |term|
|
|
106
|
+
next unless term.is_a?(Designation::GraphicalSymbol) && term.image
|
|
107
|
+
|
|
108
|
+
index.register(term.image)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Validation
|
|
5
|
+
class BibliographyIndex
|
|
6
|
+
BIB_ENTRY_KEYS = %w[id ref text anchor].freeze
|
|
7
|
+
private_constant :BIB_ENTRY_KEYS
|
|
8
|
+
|
|
9
|
+
attr_reader :entries
|
|
10
|
+
|
|
11
|
+
def initialize
|
|
12
|
+
@entries = {}
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def register(anchor, source = nil)
|
|
16
|
+
@entries[normalize_anchor(anchor)] = { anchor: anchor, source: source }
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def resolve?(anchor)
|
|
20
|
+
@entries.key?(normalize_anchor(anchor))
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def anchors
|
|
24
|
+
@entries.keys
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def each_entry(&block)
|
|
28
|
+
@entries.each_value(&block)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.build_from_concepts(concepts, dataset_path: nil,
|
|
32
|
+
bibliography_yaml: nil)
|
|
33
|
+
index = new
|
|
34
|
+
|
|
35
|
+
concepts.each { |concept| index_concept_sources(index, concept) }
|
|
36
|
+
|
|
37
|
+
yaml = bibliography_yaml || read_bibliography_file(dataset_path)
|
|
38
|
+
index_bibliography_yaml(index, yaml) if yaml
|
|
39
|
+
|
|
40
|
+
index
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def normalize_anchor(anchor)
|
|
46
|
+
anchor.to_s.gsub(/[ \/:]/, "_").gsub(/__+/, "_")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
class << self
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def read_bibliography_file(dataset_path)
|
|
53
|
+
return nil unless dataset_path
|
|
54
|
+
|
|
55
|
+
bib_path = File.join(dataset_path, "bibliography.yaml")
|
|
56
|
+
File.exist?(bib_path) ? File.read(bib_path) : nil
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def index_concept_sources(index, concept)
|
|
60
|
+
concept.localizations.each do |l10n|
|
|
61
|
+
index_l10n_sources(index, l10n)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def index_l10n_sources(index, l10n)
|
|
66
|
+
data = l10n.data
|
|
67
|
+
return unless data
|
|
68
|
+
|
|
69
|
+
register_source_collection(index, data.sources)
|
|
70
|
+
register_source_collection(index,
|
|
71
|
+
data.definition&.flat_map(&:sources))
|
|
72
|
+
register_source_collection(index, data.notes&.flat_map(&:sources))
|
|
73
|
+
register_source_collection(index, data.examples&.flat_map(&:sources))
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def register_source_collection(index, sources)
|
|
77
|
+
Array(sources).compact.each { |s| register_source(index, s) }
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def register_source(index, source)
|
|
81
|
+
origin = source.origin
|
|
82
|
+
return unless origin
|
|
83
|
+
|
|
84
|
+
register_origin_text(index, origin)
|
|
85
|
+
register_origin_ref(index, origin)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def register_origin_text(index, origin)
|
|
89
|
+
return unless origin.text && !origin.text.strip.empty?
|
|
90
|
+
|
|
91
|
+
index.register(origin.text, origin)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def register_origin_ref(index, origin)
|
|
95
|
+
return unless origin.source && origin.id
|
|
96
|
+
|
|
97
|
+
key = "#{origin.source} #{origin.id}"
|
|
98
|
+
index.register(key, origin)
|
|
99
|
+
index.register(origin.id.to_s, origin)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def index_bibliography_yaml(index, yaml_content)
|
|
103
|
+
data = YAML.safe_load(yaml_content)
|
|
104
|
+
return unless data.is_a?(Hash) || data.is_a?(Array)
|
|
105
|
+
|
|
106
|
+
entries = data.is_a?(Hash) ? data.values : data
|
|
107
|
+
entries.each do |entry|
|
|
108
|
+
next unless entry.is_a?(Hash)
|
|
109
|
+
|
|
110
|
+
BIB_ENTRY_KEYS.each do |key|
|
|
111
|
+
val = entry[key]
|
|
112
|
+
index.register(val.to_s, entry) if val && !val.to_s.strip.empty?
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
rescue Psych::SyntaxError, Psych::DisallowedClass
|
|
116
|
+
nil
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|