glossarist 2.5.0 → 2.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop_todo.yml +50 -146
- data/CLAUDE.md +33 -7
- data/Gemfile +20 -19
- data/README.adoc +383 -7
- data/TODO.integration/01-gcr-package-cli.md +180 -0
- data/exe/glossarist +1 -53
- data/glossarist.gemspec +1 -0
- data/lib/glossarist/asset.rb +1 -1
- data/lib/glossarist/citation.rb +1 -1
- data/lib/glossarist/cli/package_command.rb +32 -0
- data/lib/glossarist/cli/upgrade_command.rb +34 -0
- data/lib/glossarist/cli/validate_command.rb +56 -0
- data/lib/glossarist/cli.rb +105 -0
- data/lib/glossarist/collection_config.rb +23 -0
- data/lib/glossarist/collections.rb +15 -8
- data/lib/glossarist/concept.rb +1 -1
- data/lib/glossarist/concept_collector.rb +153 -0
- data/lib/glossarist/concept_data.rb +3 -1
- data/lib/glossarist/concept_date.rb +1 -1
- data/lib/glossarist/concept_document.rb +29 -0
- data/lib/glossarist/concept_enricher.rb +34 -0
- data/lib/glossarist/concept_manager.rb +31 -49
- data/lib/glossarist/concept_reference.rb +45 -0
- data/lib/glossarist/concept_source.rb +1 -1
- data/lib/glossarist/concept_validator.rb +101 -0
- data/lib/glossarist/custom_locality.rb +1 -1
- data/lib/glossarist/dataset_validator.rb +69 -0
- data/lib/glossarist/designation/abbreviation.rb +1 -1
- data/lib/glossarist/designation/base.rb +11 -4
- data/lib/glossarist/designation/expression.rb +1 -1
- data/lib/glossarist/designation/grammar_info.rb +1 -1
- data/lib/glossarist/designation/graphical_symbol.rb +1 -1
- data/lib/glossarist/designation/letter_symbol.rb +1 -1
- data/lib/glossarist/designation/symbol.rb +2 -2
- data/lib/glossarist/detailed_definition.rb +1 -1
- data/lib/glossarist/gcr_metadata.rb +87 -0
- data/lib/glossarist/gcr_package.rb +223 -0
- data/lib/glossarist/gcr_statistics.rb +35 -0
- data/lib/glossarist/gcr_validator.rb +98 -0
- data/lib/glossarist/locality.rb +1 -1
- data/lib/glossarist/localized_concept.rb +12 -1
- data/lib/glossarist/managed_concept.rb +1 -1
- data/lib/glossarist/managed_concept_data.rb +5 -2
- data/lib/glossarist/non_verb_rep.rb +1 -1
- data/lib/glossarist/reference_extractor.rb +227 -0
- data/lib/glossarist/reference_resolver.rb +169 -0
- data/lib/glossarist/register_data.rb +39 -0
- data/lib/glossarist/related_concept.rb +1 -1
- data/lib/glossarist/resolution_adapter/local.rb +73 -0
- data/lib/glossarist/resolution_adapter/package.rb +22 -0
- data/lib/glossarist/resolution_adapter/remote.rb +60 -0
- data/lib/glossarist/resolution_adapter/route.rb +34 -0
- data/lib/glossarist/resolution_adapter.rb +14 -0
- data/lib/glossarist/schema_migration.rb +334 -0
- data/lib/glossarist/urn_resolver.rb +71 -0
- data/lib/glossarist/v1/concept.rb +81 -0
- data/lib/glossarist/v1/cross_references.rb +41 -0
- data/lib/glossarist/v1/register.rb +50 -0
- data/lib/glossarist/v1.rb +9 -0
- data/lib/glossarist/validation_result.rb +38 -0
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +29 -4
- data/relaton-bib-2.0.0.gem +0 -0
- data/relaton-bib-2.1.0.gem +0 -0
- data/relaton-cen-2.0.0.gem +0 -0
- data/relaton-iec-2.0.0.gem +0 -0
- data/relaton-iso-2.0.0.gem +0 -0
- data/relaton-itu-2.0.0.gem +0 -0
- metadata +60 -7
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "zip"
|
|
4
|
+
|
|
5
|
+
module Glossarist
|
|
6
|
+
class GcrValidator
|
|
7
|
+
def validate(zip_path) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
8
|
+
result = ValidationResult.new
|
|
9
|
+
|
|
10
|
+
unless File.exist?(zip_path)
|
|
11
|
+
result.add_error("File not found: #{zip_path}")
|
|
12
|
+
return result
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
begin
|
|
16
|
+
Zip::File.open(zip_path) do |zf|
|
|
17
|
+
unless zf.find_entry("metadata.yaml")
|
|
18
|
+
result.add_error("Missing metadata.yaml")
|
|
19
|
+
return result
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
metadata = GcrMetadata.from_yaml(
|
|
23
|
+
zf.find_entry("metadata.yaml").get_input_stream.read,
|
|
24
|
+
)
|
|
25
|
+
validate_metadata(metadata, result)
|
|
26
|
+
|
|
27
|
+
concept_entries = zf.entries.select do |e|
|
|
28
|
+
e.name.start_with?("concepts/") && e.name.end_with?(".yaml")
|
|
29
|
+
end
|
|
30
|
+
if concept_entries.empty?
|
|
31
|
+
result.add_error("No concept files found in concepts/")
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
concept_entries.each do |entry|
|
|
35
|
+
validate_concept_entry(entry, metadata, result)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
rescue StandardError => e
|
|
39
|
+
result.add_error("Failed to read ZIP: #{e.message}")
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
result
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def validate_metadata(metadata, result)
|
|
48
|
+
unless metadata&.concept_count
|
|
49
|
+
result.add_error("metadata.yaml missing required fields (concept_count)")
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
unless metadata&.shortname
|
|
53
|
+
result.add_error("metadata.yaml missing shortname")
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
unless metadata&.version
|
|
57
|
+
result.add_error("metadata.yaml missing version")
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def validate_concept_entry(entry, metadata, result) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
62
|
+
raw = entry.get_input_stream.read
|
|
63
|
+
doc = ConceptDocument.from_yamls(raw)
|
|
64
|
+
rescue Psych::SyntaxError => e
|
|
65
|
+
result.add_error("#{entry.name}: invalid YAML at line #{e.line}: #{e.message}")
|
|
66
|
+
rescue StandardError => e
|
|
67
|
+
result.add_error("#{entry.name}: parse error: #{e.message}")
|
|
68
|
+
else
|
|
69
|
+
concept = doc.concept
|
|
70
|
+
unless concept&.data&.id
|
|
71
|
+
result.add_error("#{entry.name}: document 0 missing data.identifier")
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
localizations = doc.localizations
|
|
75
|
+
if localizations.empty?
|
|
76
|
+
result.add_error("#{entry.name}: expected at least 1 localization document")
|
|
77
|
+
else
|
|
78
|
+
localizations.each_with_index do |l10n, idx|
|
|
79
|
+
unless l10n&.language_code
|
|
80
|
+
result.add_error("#{entry.name}: document #{idx + 1} missing data.language_code")
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
validate_concept_uri(entry, concept, metadata, result)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def validate_concept_uri(entry, concept, metadata, result) # rubocop:disable Metrics/CyclomaticComplexity
|
|
89
|
+
concept_uri = concept&.data&.uri
|
|
90
|
+
template = metadata&.concept_uri_template
|
|
91
|
+
uri_prefix = metadata&.uri_prefix
|
|
92
|
+
|
|
93
|
+
if concept_uri.nil? && template.nil? && uri_prefix.nil?
|
|
94
|
+
result.add_warning("#{entry.name}: no concept URI (data.uri) and no concept_uri_template or uri_prefix in metadata")
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
data/lib/glossarist/locality.rb
CHANGED
|
@@ -4,11 +4,22 @@ module Glossarist
|
|
|
4
4
|
attribute :review_type, :string
|
|
5
5
|
attribute :entry_status, :string
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
key_value do
|
|
8
8
|
map :classification, to: :classification
|
|
9
9
|
map %i[review_type reviewType], to: :review_type
|
|
10
10
|
end
|
|
11
11
|
|
|
12
|
+
NIL_COLLECTION_KEYS = %w[definition examples notes].freeze
|
|
13
|
+
|
|
14
|
+
def self.of_yaml(hash, options = {})
|
|
15
|
+
if hash.is_a?(Hash) && (data = hash["data"]).is_a?(Hash)
|
|
16
|
+
NIL_COLLECTION_KEYS.each do |key|
|
|
17
|
+
data[key] = [] if data.key?(key) && data[key].nil?
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
super
|
|
21
|
+
end
|
|
22
|
+
|
|
12
23
|
alias_method :status=, :entry_status=
|
|
13
24
|
|
|
14
25
|
def language_code
|
|
@@ -3,6 +3,7 @@ module Glossarist
|
|
|
3
3
|
include Glossarist::Utilities::CommonFunctions
|
|
4
4
|
|
|
5
5
|
attribute :id, :string
|
|
6
|
+
attribute :uri, :string
|
|
6
7
|
attribute :localized_concepts, :hash
|
|
7
8
|
attribute :groups, :string, collection: true
|
|
8
9
|
attribute :sources, ConceptSource, collection: true
|
|
@@ -10,9 +11,10 @@ module Glossarist
|
|
|
10
11
|
collection: Collections::LocalizationCollection,
|
|
11
12
|
initialize_empty: true
|
|
12
13
|
|
|
13
|
-
|
|
14
|
+
key_value do
|
|
14
15
|
map %i[id identifier], to: :id,
|
|
15
16
|
with: { to: :id_to_yaml, from: :id_from_yaml }
|
|
17
|
+
map :uri, to: :uri
|
|
16
18
|
map %i[localized_concepts localizedConcepts], to: :localized_concepts
|
|
17
19
|
map :groups, to: :groups
|
|
18
20
|
map :sources, to: :sources
|
|
@@ -32,7 +34,8 @@ module Glossarist
|
|
|
32
34
|
def localizations_from_yaml(model, value)
|
|
33
35
|
value.each do |localized_concept_hash|
|
|
34
36
|
localized_concept = Glossarist::LocalizedConcept.of_yaml(localized_concept_hash)
|
|
35
|
-
model.localizations.store(localized_concept.language_code,
|
|
37
|
+
model.localizations.store(localized_concept.language_code,
|
|
38
|
+
localized_concept)
|
|
36
39
|
end
|
|
37
40
|
end
|
|
38
41
|
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module Glossarist
|
|
6
|
+
class ReferenceExtractor
|
|
7
|
+
@identifier_resolvers = []
|
|
8
|
+
@patterns = []
|
|
9
|
+
|
|
10
|
+
IdentifierResolver = Struct.new(:prefix, :resolver, keyword_init: true)
|
|
11
|
+
Pattern = Struct.new(:name, :regex, :resolver, keyword_init: true)
|
|
12
|
+
|
|
13
|
+
class << self
|
|
14
|
+
def register_identifier_resolver(prefix, &resolver)
|
|
15
|
+
@identifier_resolvers << IdentifierResolver.new(prefix: prefix,
|
|
16
|
+
resolver: resolver)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def register_pattern(name:, regex:, &resolver)
|
|
20
|
+
@patterns << Pattern.new(name: name, regex: regex, resolver: resolver)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def patterns
|
|
24
|
+
@patterns.dup
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def identifier_resolvers
|
|
28
|
+
@identifier_resolvers.dup
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def extract_from_text(text)
|
|
33
|
+
return [] unless text.is_a?(String)
|
|
34
|
+
|
|
35
|
+
refs = []
|
|
36
|
+
self.class.patterns.each do |pattern|
|
|
37
|
+
text.scan(pattern.regex).each do |captures|
|
|
38
|
+
captures = [captures] unless captures.is_a?(Array)
|
|
39
|
+
ref = pattern.resolver.call(self, *captures)
|
|
40
|
+
refs << ref if ref
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
deduplicate(refs)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def extract_from_localized(lc_hash)
|
|
47
|
+
gather_texts(lc_hash).flat_map { |t| extract_from_text(t) }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def extract_from_concept_hash(concept_hash)
|
|
51
|
+
LANG_CODES.flat_map do |lang|
|
|
52
|
+
next [] unless concept_hash[lang].is_a?(Hash)
|
|
53
|
+
|
|
54
|
+
extract_from_localized(concept_hash[lang])
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def extract_from_managed_concept(concept)
|
|
59
|
+
concept.localizations.flat_map do |l10n|
|
|
60
|
+
extract_from_localized_concept(l10n)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def extract_from_localized_concept(l10n)
|
|
65
|
+
texts = []
|
|
66
|
+
l10n.data.definition&.each { |d| texts << d.content if d.content }
|
|
67
|
+
l10n.data.notes&.each { |n| texts << n.content if n.content }
|
|
68
|
+
l10n.data.examples&.each { |e| texts << e.content if e.content }
|
|
69
|
+
texts.flat_map { |t| extract_from_text(t) }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Unified concept mention dispatcher.
|
|
73
|
+
# Content is the text inside {{...}}.
|
|
74
|
+
def resolve_mention(content)
|
|
75
|
+
content = content.strip
|
|
76
|
+
|
|
77
|
+
if content.include?(",")
|
|
78
|
+
parts = content.split(",", 2)
|
|
79
|
+
display = parts[0].strip
|
|
80
|
+
identifier = parts[1].strip
|
|
81
|
+
resolve_by_identifier(identifier, display)
|
|
82
|
+
else
|
|
83
|
+
resolve_by_identifier(content, nil)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def resolve_by_identifier(identifier, display)
|
|
88
|
+
# Check registered identifier resolvers (built-in + custom)
|
|
89
|
+
self.class.identifier_resolvers.each do |ir|
|
|
90
|
+
next unless identifier.start_with?(ir.prefix)
|
|
91
|
+
|
|
92
|
+
return ir.resolver.call(self, identifier, display)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
case identifier
|
|
96
|
+
when /\A\d[\d.-]*\z/
|
|
97
|
+
resolve_local(display || identifier, identifier)
|
|
98
|
+
else
|
|
99
|
+
resolve_designation(identifier, display)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def resolve_local(term, concept_id)
|
|
104
|
+
ConceptReference.new(
|
|
105
|
+
term: term.strip,
|
|
106
|
+
concept_id: concept_id.strip,
|
|
107
|
+
source: nil,
|
|
108
|
+
ref_type: "local",
|
|
109
|
+
)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def resolve_designation(text, display)
|
|
113
|
+
ConceptReference.new(
|
|
114
|
+
term: display || text,
|
|
115
|
+
concept_id: nil,
|
|
116
|
+
source: nil,
|
|
117
|
+
ref_type: "designation",
|
|
118
|
+
)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def resolve_iec_urn(urn, display)
|
|
122
|
+
concept_id = extract_iec_concept_id(urn)
|
|
123
|
+
|
|
124
|
+
ConceptReference.new(
|
|
125
|
+
term: display || "",
|
|
126
|
+
concept_id: concept_id,
|
|
127
|
+
source: "urn:iec:std:iec:60050",
|
|
128
|
+
ref_type: "urn",
|
|
129
|
+
)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def resolve_iso_urn(urn, display)
|
|
133
|
+
if (m = urn.match(/\Aurn:iso:std:iso:(\d+)(?::(.*))?\z/))
|
|
134
|
+
term_id = extract_term_id_from_urn_tail(m[2])
|
|
135
|
+
ConceptReference.new(
|
|
136
|
+
term: display || "",
|
|
137
|
+
concept_id: term_id,
|
|
138
|
+
source: "urn:iso:std:iso:#{m[1]}",
|
|
139
|
+
ref_type: "urn",
|
|
140
|
+
)
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def resolve_generic_urn(urn, display)
|
|
145
|
+
ConceptReference.new(
|
|
146
|
+
term: display || "",
|
|
147
|
+
concept_id: nil,
|
|
148
|
+
source: urn,
|
|
149
|
+
ref_type: "urn",
|
|
150
|
+
)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
LANG_CODES = Glossarist::LANG_CODES
|
|
154
|
+
|
|
155
|
+
private
|
|
156
|
+
|
|
157
|
+
def gather_texts(lc_hash)
|
|
158
|
+
texts = extract_text_fields(lc_hash["definition"])
|
|
159
|
+
texts << lc_hash["definition"].to_s if lc_hash["definition"].is_a?(String)
|
|
160
|
+
texts.concat(extract_text_fields(lc_hash["notes"]))
|
|
161
|
+
texts.concat(extract_text_fields(lc_hash["examples"]))
|
|
162
|
+
texts
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def extract_text_fields(items)
|
|
166
|
+
Array(items).filter_map do |item|
|
|
167
|
+
item.is_a?(Hash) ? item["content"]&.to_s : item.to_s
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def deduplicate(refs)
|
|
172
|
+
seen = Set.new
|
|
173
|
+
refs.select do |ref|
|
|
174
|
+
key = if ref.concept_id
|
|
175
|
+
[ref.source,
|
|
176
|
+
ref.concept_id]
|
|
177
|
+
else
|
|
178
|
+
[ref.source, ref.concept_id,
|
|
179
|
+
ref.term]
|
|
180
|
+
end
|
|
181
|
+
seen.add?(key)
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def extract_term_id_from_urn_tail(tail)
|
|
186
|
+
return "" unless tail
|
|
187
|
+
|
|
188
|
+
if (m = tail.match(/term:([\d.,]+)/))
|
|
189
|
+
m[1].split(",").first
|
|
190
|
+
elsif (m = tail.match(/sec:([\d.]+)/))
|
|
191
|
+
m[1]
|
|
192
|
+
else
|
|
193
|
+
tail
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def extract_iec_concept_id(urn)
|
|
198
|
+
if (m = urn.match(/::#con-([\d-]+)/))
|
|
199
|
+
m[1]
|
|
200
|
+
else
|
|
201
|
+
segments = urn.split(":")
|
|
202
|
+
code_part = segments.find { |s| s.start_with?("60050-") }
|
|
203
|
+
return "" unless code_part
|
|
204
|
+
|
|
205
|
+
code_part.delete_prefix("60050-").sub(/-\d{4}-\d{2}\z/, "")
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Unified concept mention pattern: {{...}}
|
|
210
|
+
register_pattern(
|
|
211
|
+
name: :concept_mention,
|
|
212
|
+
regex: /\{\{([^}]+)\}\}/,
|
|
213
|
+
) { |ext, content| ext.resolve_mention(content) }
|
|
214
|
+
|
|
215
|
+
register_identifier_resolver("urn:iec:std:iec:60050") do |ext, identifier, display|
|
|
216
|
+
ext.resolve_iec_urn(identifier, display)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
register_identifier_resolver("urn:iso:std:iso:") do |ext, identifier, display|
|
|
220
|
+
ext.resolve_iso_urn(identifier, display)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
register_identifier_resolver("urn:") do |ext, identifier, display|
|
|
224
|
+
ext.resolve_generic_urn(identifier, display)
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class ReferenceResolver
|
|
5
|
+
def initialize
|
|
6
|
+
@local_adapter = nil
|
|
7
|
+
@package_adapters = []
|
|
8
|
+
@route_adapter = ResolutionAdapter::Route.new
|
|
9
|
+
@remote_adapters = []
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def register_self(concepts)
|
|
13
|
+
@local_adapter = ResolutionAdapter::Local.new(concepts)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def register_package(package_or_concepts, uri_prefix: nil)
|
|
17
|
+
concepts = extract_concepts(package_or_concepts)
|
|
18
|
+
prefix = uri_prefix || infer_uri_prefix(package_or_concepts)
|
|
19
|
+
raise ArgumentError, "uri_prefix required" unless prefix
|
|
20
|
+
|
|
21
|
+
@package_adapters << ResolutionAdapter::Package.new(concepts,
|
|
22
|
+
uri_prefix: prefix)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def add_route(from:, to:)
|
|
26
|
+
@route_adapter.add(from: from, to: to)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def register_remote(uri_prefix:, endpoint:)
|
|
30
|
+
@remote_adapters << ResolutionAdapter::Remote.new(uri_prefix: uri_prefix,
|
|
31
|
+
endpoint: endpoint)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def resolve(reference)
|
|
35
|
+
if reference.local?
|
|
36
|
+
return @local_adapter&.resolve(reference)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
routed_ref = apply_routes(reference)
|
|
40
|
+
|
|
41
|
+
@package_adapters.each do |adapter|
|
|
42
|
+
result = adapter.resolve(routed_ref)
|
|
43
|
+
return result if result
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
@remote_adapters.each do |adapter|
|
|
47
|
+
result = adapter.resolve(routed_ref)
|
|
48
|
+
return result if result
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
nil
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def resolve_all(concept, extractor: nil)
|
|
55
|
+
extractor ||= ReferenceExtractor.new
|
|
56
|
+
refs = extract_refs(concept, extractor)
|
|
57
|
+
refs.map { |ref| [ref, resolve(ref)] }
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def validate_all(package_or_concepts, extractor: nil, mode: :multi)
|
|
61
|
+
concepts = extract_concepts(package_or_concepts)
|
|
62
|
+
extractor ||= ReferenceExtractor.new
|
|
63
|
+
result = ValidationResult.new
|
|
64
|
+
|
|
65
|
+
concepts.each do |concept|
|
|
66
|
+
refs = extract_refs(concept, extractor)
|
|
67
|
+
termid = extract_termid(concept)
|
|
68
|
+
|
|
69
|
+
refs.each do |ref|
|
|
70
|
+
resolved = resolve(ref)
|
|
71
|
+
if resolved.nil?
|
|
72
|
+
scope = ref.local? ? "intra-set" : "inter-set (#{ref.source})"
|
|
73
|
+
result.add_warning("#{termid}: Unresolvable #{scope} reference: #{ref.term} -> #{ref.concept_id}")
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
if mode == :single && !@local_adapter
|
|
78
|
+
external_refs = refs.select(&:external?)
|
|
79
|
+
if external_refs.any?
|
|
80
|
+
result.add_warning("#{termid}: #{external_refs.size} external reference(s) not checked in single mode")
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
result
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def load_collection(collection_dir)
|
|
89
|
+
config_path = File.join(collection_dir, "collection.yaml")
|
|
90
|
+
if File.exist?(config_path)
|
|
91
|
+
load_collection_config(config_path, collection_dir)
|
|
92
|
+
else
|
|
93
|
+
load_gcr_directory(collection_dir)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def registered_datasets
|
|
98
|
+
@package_adapters.map(&:uri_prefix)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private
|
|
102
|
+
|
|
103
|
+
def apply_routes(reference)
|
|
104
|
+
routed = @route_adapter.resolve(reference)
|
|
105
|
+
routed || reference
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def extract_concepts(package_or_concepts)
|
|
109
|
+
case package_or_concepts
|
|
110
|
+
when GcrPackage then package_or_concepts.concepts
|
|
111
|
+
when Array then package_or_concepts
|
|
112
|
+
when Hash then [package_or_concepts]
|
|
113
|
+
else raise ArgumentError, "Expected GcrPackage, Array, or Hash"
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def infer_uri_prefix(package_or_concepts)
|
|
118
|
+
case package_or_concepts
|
|
119
|
+
when GcrPackage then package_or_concepts.metadata&.dig("uri_prefix")
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def load_collection_config(config_path, collection_dir)
|
|
124
|
+
config = CollectionConfig.from_file(config_path)
|
|
125
|
+
|
|
126
|
+
config.packages.each do |pkg|
|
|
127
|
+
gcr_path = File.join(collection_dir, pkg["file"])
|
|
128
|
+
next unless File.exist?(gcr_path)
|
|
129
|
+
|
|
130
|
+
gcr = GcrPackage.load(gcr_path)
|
|
131
|
+
prefix = pkg["uri_prefix"] || gcr.metadata&.dig("uri_prefix")
|
|
132
|
+
register_package(gcr, uri_prefix: prefix)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
config.routes.each do |route|
|
|
136
|
+
add_route(from: route["from"], to: route["to"])
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
config.remotes.each do |remote|
|
|
140
|
+
register_remote(uri_prefix: remote["uri_prefix"],
|
|
141
|
+
endpoint: remote["endpoint"])
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def load_gcr_directory(dir)
|
|
146
|
+
Dir.glob(File.join(dir, "*.gcr")).each do |gcr_path|
|
|
147
|
+
pkg = GcrPackage.load(gcr_path)
|
|
148
|
+
prefix = pkg.metadata&.dig("uri_prefix")
|
|
149
|
+
register_package(pkg, uri_prefix: prefix)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def extract_refs(concept, extractor)
|
|
154
|
+
if concept.is_a?(ManagedConcept)
|
|
155
|
+
extractor.extract_from_managed_concept(concept)
|
|
156
|
+
else
|
|
157
|
+
extractor.extract_from_concept_hash(concept)
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def extract_termid(concept)
|
|
162
|
+
if concept.is_a?(ManagedConcept)
|
|
163
|
+
concept.data.id&.to_s
|
|
164
|
+
else
|
|
165
|
+
(concept["termid"] || concept["id"])&.to_s
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class RegisterData < Lutaml::Model::Serializable
|
|
5
|
+
attribute :data, :hash, default: -> { {} }
|
|
6
|
+
|
|
7
|
+
key_value do
|
|
8
|
+
map nil, to: :data, with: { from: :data_from, to: :data_to }
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def self.from_file(path)
|
|
12
|
+
from_yaml(File.read(path))
|
|
13
|
+
rescue Errno::ENOENT
|
|
14
|
+
nil
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def [](key)
|
|
18
|
+
data[key]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def dig(*keys)
|
|
22
|
+
data.dig(*keys)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def to_h
|
|
26
|
+
data
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def data_from(model, value)
|
|
30
|
+
model.data = value
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def data_to(model, doc)
|
|
34
|
+
model.data.each do |key, value|
|
|
35
|
+
doc[key] = value
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class ResolutionAdapter
|
|
5
|
+
class Local < ResolutionAdapter
|
|
6
|
+
attr_reader :index, :concepts
|
|
7
|
+
|
|
8
|
+
def initialize(concepts)
|
|
9
|
+
super()
|
|
10
|
+
@concepts = concepts
|
|
11
|
+
@index = {}
|
|
12
|
+
build_index
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def resolve(reference)
|
|
16
|
+
case reference.ref_type
|
|
17
|
+
when "local"
|
|
18
|
+
resolve_by_id(reference.concept_id)
|
|
19
|
+
when "designation"
|
|
20
|
+
resolve_by_designation(reference.term)
|
|
21
|
+
else
|
|
22
|
+
resolve_by_id(reference.concept_id) if reference.concept_id
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def resolve_by_id(concept_id)
|
|
27
|
+
@index[concept_id]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def resolve_by_designation(term)
|
|
31
|
+
return nil unless term
|
|
32
|
+
|
|
33
|
+
downcased = term.downcase
|
|
34
|
+
concepts.find do |concept|
|
|
35
|
+
designations_for(concept).any? { |d| d&.downcase == downcased }
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def designations_for(concept)
|
|
42
|
+
if concept.is_a?(ManagedConcept)
|
|
43
|
+
concept.localizations.flat_map do |l10n|
|
|
44
|
+
l10n.data.terms.filter_map do |t|
|
|
45
|
+
t.respond_to?(:designation) ? t.designation : nil
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
else
|
|
49
|
+
concept.each_value.flat_map do |lang_block|
|
|
50
|
+
next [] unless lang_block.is_a?(Hash) && lang_block.key?("terms")
|
|
51
|
+
|
|
52
|
+
Array(lang_block["terms"]).filter_map { |t| t["designation"] }
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def build_index
|
|
58
|
+
concepts.each do |concept|
|
|
59
|
+
termid = extract_termid(concept)
|
|
60
|
+
@index[termid] = concept if termid
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def extract_termid(concept)
|
|
65
|
+
if concept.is_a?(ManagedConcept)
|
|
66
|
+
concept.data.id&.to_s
|
|
67
|
+
else
|
|
68
|
+
(concept["termid"] || concept["id"])&.to_s
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|