glossarist 2.4.0 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop_todo.yml +50 -146
- data/CLAUDE.md +85 -0
- data/Gemfile +26 -5
- data/README.adoc +383 -7
- data/TODO.integration/01-gcr-package-cli.md +180 -0
- data/exe/glossarist +1 -53
- data/glossarist.gemspec +3 -2
- data/lib/glossarist/asset.rb +1 -1
- data/lib/glossarist/citation.rb +1 -1
- data/lib/glossarist/cli/package_command.rb +32 -0
- data/lib/glossarist/cli/upgrade_command.rb +34 -0
- data/lib/glossarist/cli/validate_command.rb +56 -0
- data/lib/glossarist/cli.rb +105 -0
- data/lib/glossarist/collection_config.rb +23 -0
- data/lib/glossarist/collections/concept_source_collection.rb +9 -0
- data/lib/glossarist/collections/detailed_definition_collection.rb +18 -0
- data/lib/glossarist/collections/localization_collection.rb +37 -0
- data/lib/glossarist/collections/typed_collection.rb +26 -0
- data/lib/glossarist/collections.rb +21 -4
- data/lib/glossarist/concept.rb +1 -1
- data/lib/glossarist/concept_collector.rb +153 -0
- data/lib/glossarist/concept_data.rb +15 -8
- data/lib/glossarist/concept_date.rb +1 -1
- data/lib/glossarist/concept_document.rb +29 -0
- data/lib/glossarist/concept_enricher.rb +34 -0
- data/lib/glossarist/concept_manager.rb +31 -49
- data/lib/glossarist/concept_reference.rb +45 -0
- data/lib/glossarist/concept_source.rb +1 -1
- data/lib/glossarist/concept_validator.rb +114 -0
- data/lib/glossarist/custom_locality.rb +1 -1
- data/lib/glossarist/dataset_validator.rb +69 -0
- data/lib/glossarist/designation/abbreviation.rb +1 -1
- data/lib/glossarist/designation/base.rb +11 -4
- data/lib/glossarist/designation/expression.rb +1 -1
- data/lib/glossarist/designation/grammar_info.rb +1 -1
- data/lib/glossarist/designation/graphical_symbol.rb +1 -1
- data/lib/glossarist/designation/letter_symbol.rb +1 -1
- data/lib/glossarist/designation/symbol.rb +2 -2
- data/lib/glossarist/designation.rb +8 -11
- data/lib/glossarist/detailed_definition.rb +1 -1
- data/lib/glossarist/error.rb +2 -5
- data/lib/glossarist/gcr_metadata.rb +87 -0
- data/lib/glossarist/gcr_package.rb +223 -0
- data/lib/glossarist/gcr_statistics.rb +35 -0
- data/lib/glossarist/gcr_validator.rb +98 -0
- data/lib/glossarist/locality.rb +1 -1
- data/lib/glossarist/localized_concept.rb +12 -1
- data/lib/glossarist/managed_concept.rb +1 -1
- data/lib/glossarist/managed_concept_data.rb +8 -5
- data/lib/glossarist/non_verb_rep.rb +1 -1
- data/lib/glossarist/reference_extractor.rb +227 -0
- data/lib/glossarist/reference_resolver.rb +169 -0
- data/lib/glossarist/register_data.rb +39 -0
- data/lib/glossarist/related_concept.rb +1 -1
- data/lib/glossarist/resolution_adapter/local.rb +73 -0
- data/lib/glossarist/resolution_adapter/package.rb +22 -0
- data/lib/glossarist/resolution_adapter/remote.rb +60 -0
- data/lib/glossarist/resolution_adapter/route.rb +34 -0
- data/lib/glossarist/resolution_adapter.rb +14 -0
- data/lib/glossarist/schema_migration.rb +334 -0
- data/lib/glossarist/urn_resolver.rb +71 -0
- data/lib/glossarist/utilities.rb +6 -2
- data/lib/glossarist/v1/concept.rb +81 -0
- data/lib/glossarist/v1/cross_references.rb +41 -0
- data/lib/glossarist/v1/register.rb +50 -0
- data/lib/glossarist/v1.rb +9 -0
- data/lib/glossarist/validation_result.rb +38 -0
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +54 -24
- metadata +62 -6
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class ReferenceResolver
|
|
5
|
+
def initialize
|
|
6
|
+
@local_adapter = nil
|
|
7
|
+
@package_adapters = []
|
|
8
|
+
@route_adapter = ResolutionAdapter::Route.new
|
|
9
|
+
@remote_adapters = []
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def register_self(concepts)
|
|
13
|
+
@local_adapter = ResolutionAdapter::Local.new(concepts)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def register_package(package_or_concepts, uri_prefix: nil)
|
|
17
|
+
concepts = extract_concepts(package_or_concepts)
|
|
18
|
+
prefix = uri_prefix || infer_uri_prefix(package_or_concepts)
|
|
19
|
+
raise ArgumentError, "uri_prefix required" unless prefix
|
|
20
|
+
|
|
21
|
+
@package_adapters << ResolutionAdapter::Package.new(concepts,
|
|
22
|
+
uri_prefix: prefix)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def add_route(from:, to:)
|
|
26
|
+
@route_adapter.add(from: from, to: to)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def register_remote(uri_prefix:, endpoint:)
|
|
30
|
+
@remote_adapters << ResolutionAdapter::Remote.new(uri_prefix: uri_prefix,
|
|
31
|
+
endpoint: endpoint)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def resolve(reference)
|
|
35
|
+
if reference.local?
|
|
36
|
+
return @local_adapter&.resolve(reference)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
routed_ref = apply_routes(reference)
|
|
40
|
+
|
|
41
|
+
@package_adapters.each do |adapter|
|
|
42
|
+
result = adapter.resolve(routed_ref)
|
|
43
|
+
return result if result
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
@remote_adapters.each do |adapter|
|
|
47
|
+
result = adapter.resolve(routed_ref)
|
|
48
|
+
return result if result
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
nil
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def resolve_all(concept, extractor: nil)
|
|
55
|
+
extractor ||= ReferenceExtractor.new
|
|
56
|
+
refs = extract_refs(concept, extractor)
|
|
57
|
+
refs.map { |ref| [ref, resolve(ref)] }
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def validate_all(package_or_concepts, extractor: nil, mode: :multi)
|
|
61
|
+
concepts = extract_concepts(package_or_concepts)
|
|
62
|
+
extractor ||= ReferenceExtractor.new
|
|
63
|
+
result = ValidationResult.new
|
|
64
|
+
|
|
65
|
+
concepts.each do |concept|
|
|
66
|
+
refs = extract_refs(concept, extractor)
|
|
67
|
+
termid = extract_termid(concept)
|
|
68
|
+
|
|
69
|
+
refs.each do |ref|
|
|
70
|
+
resolved = resolve(ref)
|
|
71
|
+
if resolved.nil?
|
|
72
|
+
scope = ref.local? ? "intra-set" : "inter-set (#{ref.source})"
|
|
73
|
+
result.add_warning("#{termid}: Unresolvable #{scope} reference: #{ref.term} -> #{ref.concept_id}")
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
if mode == :single && !@local_adapter
|
|
78
|
+
external_refs = refs.select(&:external?)
|
|
79
|
+
if external_refs.any?
|
|
80
|
+
result.add_warning("#{termid}: #{external_refs.size} external reference(s) not checked in single mode")
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
result
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def load_collection(collection_dir)
|
|
89
|
+
config_path = File.join(collection_dir, "collection.yaml")
|
|
90
|
+
if File.exist?(config_path)
|
|
91
|
+
load_collection_config(config_path, collection_dir)
|
|
92
|
+
else
|
|
93
|
+
load_gcr_directory(collection_dir)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def registered_datasets
|
|
98
|
+
@package_adapters.map(&:uri_prefix)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private
|
|
102
|
+
|
|
103
|
+
def apply_routes(reference)
|
|
104
|
+
routed = @route_adapter.resolve(reference)
|
|
105
|
+
routed || reference
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def extract_concepts(package_or_concepts)
|
|
109
|
+
case package_or_concepts
|
|
110
|
+
when GcrPackage then package_or_concepts.concepts
|
|
111
|
+
when Array then package_or_concepts
|
|
112
|
+
when Hash then [package_or_concepts]
|
|
113
|
+
else raise ArgumentError, "Expected GcrPackage, Array, or Hash"
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def infer_uri_prefix(package_or_concepts)
|
|
118
|
+
case package_or_concepts
|
|
119
|
+
when GcrPackage then package_or_concepts.metadata&.dig("uri_prefix")
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def load_collection_config(config_path, collection_dir)
|
|
124
|
+
config = CollectionConfig.from_file(config_path)
|
|
125
|
+
|
|
126
|
+
config.packages.each do |pkg|
|
|
127
|
+
gcr_path = File.join(collection_dir, pkg["file"])
|
|
128
|
+
next unless File.exist?(gcr_path)
|
|
129
|
+
|
|
130
|
+
gcr = GcrPackage.load(gcr_path)
|
|
131
|
+
prefix = pkg["uri_prefix"] || gcr.metadata&.dig("uri_prefix")
|
|
132
|
+
register_package(gcr, uri_prefix: prefix)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
config.routes.each do |route|
|
|
136
|
+
add_route(from: route["from"], to: route["to"])
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
config.remotes.each do |remote|
|
|
140
|
+
register_remote(uri_prefix: remote["uri_prefix"],
|
|
141
|
+
endpoint: remote["endpoint"])
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def load_gcr_directory(dir)
|
|
146
|
+
Dir.glob(File.join(dir, "*.gcr")).each do |gcr_path|
|
|
147
|
+
pkg = GcrPackage.load(gcr_path)
|
|
148
|
+
prefix = pkg.metadata&.dig("uri_prefix")
|
|
149
|
+
register_package(pkg, uri_prefix: prefix)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def extract_refs(concept, extractor)
|
|
154
|
+
if concept.is_a?(ManagedConcept)
|
|
155
|
+
extractor.extract_from_managed_concept(concept)
|
|
156
|
+
else
|
|
157
|
+
extractor.extract_from_concept_hash(concept)
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def extract_termid(concept)
|
|
162
|
+
if concept.is_a?(ManagedConcept)
|
|
163
|
+
concept.data.id&.to_s
|
|
164
|
+
else
|
|
165
|
+
(concept["termid"] || concept["id"])&.to_s
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class RegisterData < Lutaml::Model::Serializable
|
|
5
|
+
attribute :data, :hash, default: -> { {} }
|
|
6
|
+
|
|
7
|
+
key_value do
|
|
8
|
+
map nil, to: :data, with: { from: :data_from, to: :data_to }
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def self.from_file(path)
|
|
12
|
+
from_yaml(File.read(path))
|
|
13
|
+
rescue Errno::ENOENT
|
|
14
|
+
nil
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def [](key)
|
|
18
|
+
data[key]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def dig(*keys)
|
|
22
|
+
data.dig(*keys)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def to_h
|
|
26
|
+
data
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def data_from(model, value)
|
|
30
|
+
model.data = value
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def data_to(model, doc)
|
|
34
|
+
model.data.each do |key, value|
|
|
35
|
+
doc[key] = value
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class ResolutionAdapter
|
|
5
|
+
class Local < ResolutionAdapter
|
|
6
|
+
attr_reader :index, :concepts
|
|
7
|
+
|
|
8
|
+
def initialize(concepts)
|
|
9
|
+
super()
|
|
10
|
+
@concepts = concepts
|
|
11
|
+
@index = {}
|
|
12
|
+
build_index
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def resolve(reference)
|
|
16
|
+
case reference.ref_type
|
|
17
|
+
when "local"
|
|
18
|
+
resolve_by_id(reference.concept_id)
|
|
19
|
+
when "designation"
|
|
20
|
+
resolve_by_designation(reference.term)
|
|
21
|
+
else
|
|
22
|
+
resolve_by_id(reference.concept_id) if reference.concept_id
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def resolve_by_id(concept_id)
|
|
27
|
+
@index[concept_id]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def resolve_by_designation(term)
|
|
31
|
+
return nil unless term
|
|
32
|
+
|
|
33
|
+
downcased = term.downcase
|
|
34
|
+
concepts.find do |concept|
|
|
35
|
+
designations_for(concept).any? { |d| d&.downcase == downcased }
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def designations_for(concept)
|
|
42
|
+
if concept.is_a?(ManagedConcept)
|
|
43
|
+
concept.localizations.flat_map do |l10n|
|
|
44
|
+
l10n.data.terms.filter_map do |t|
|
|
45
|
+
t.respond_to?(:designation) ? t.designation : nil
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
else
|
|
49
|
+
concept.each_value.flat_map do |lang_block|
|
|
50
|
+
next [] unless lang_block.is_a?(Hash) && lang_block.key?("terms")
|
|
51
|
+
|
|
52
|
+
Array(lang_block["terms"]).filter_map { |t| t["designation"] }
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def build_index
|
|
58
|
+
concepts.each do |concept|
|
|
59
|
+
termid = extract_termid(concept)
|
|
60
|
+
@index[termid] = concept if termid
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def extract_termid(concept)
|
|
65
|
+
if concept.is_a?(ManagedConcept)
|
|
66
|
+
concept.data.id&.to_s
|
|
67
|
+
else
|
|
68
|
+
(concept["termid"] || concept["id"])&.to_s
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class ResolutionAdapter
|
|
5
|
+
class Package < ResolutionAdapter
|
|
6
|
+
attr_reader :uri_prefix, :local_adapter
|
|
7
|
+
|
|
8
|
+
def initialize(concepts, uri_prefix:)
|
|
9
|
+
super()
|
|
10
|
+
@uri_prefix = uri_prefix
|
|
11
|
+
@local_adapter = Local.new(concepts)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def resolve(reference)
|
|
15
|
+
return nil unless reference.ref_type == "urn"
|
|
16
|
+
return nil unless reference.source == uri_prefix
|
|
17
|
+
|
|
18
|
+
@local_adapter.resolve_by_id(reference.concept_id)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
module Glossarist
|
|
7
|
+
class ResolutionAdapter
|
|
8
|
+
class Remote < ResolutionAdapter
|
|
9
|
+
attr_reader :uri_prefix, :endpoint, :cache
|
|
10
|
+
|
|
11
|
+
def initialize(uri_prefix:, endpoint:)
|
|
12
|
+
super()
|
|
13
|
+
@uri_prefix = uri_prefix
|
|
14
|
+
@endpoint = endpoint.chomp("/")
|
|
15
|
+
@cache = {}
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def resolve(reference)
|
|
19
|
+
return nil unless reference.ref_type == "urn"
|
|
20
|
+
return nil unless reference.source == uri_prefix
|
|
21
|
+
|
|
22
|
+
key = cache_key(reference)
|
|
23
|
+
return @cache[key] if @cache.key?(key)
|
|
24
|
+
|
|
25
|
+
@cache[key] = fetch(reference)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def build_uri(reference)
|
|
31
|
+
URI.parse("#{endpoint}/#{URI.encode_www_form_component(reference.source)}/#{URI.encode_www_form_component(reference.concept_id)}")
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def parse_response(response)
|
|
35
|
+
content_type = response["content-type"].to_s
|
|
36
|
+
if content_type.include?("json")
|
|
37
|
+
JSON.parse(response.body)
|
|
38
|
+
elsif content_type.include?("yaml")
|
|
39
|
+
ConceptDocument.from_yamls(response.body).to_managed_concept
|
|
40
|
+
else
|
|
41
|
+
ManagedConcept.from_yaml(response.body)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def cache_key(reference)
|
|
46
|
+
"#{reference.source}/#{reference.concept_id}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def fetch(reference)
|
|
50
|
+
uri = build_uri(reference)
|
|
51
|
+
response = Net::HTTP.get_response(uri)
|
|
52
|
+
return nil unless response.is_a?(Net::HTTPSuccess)
|
|
53
|
+
|
|
54
|
+
parse_response(response)
|
|
55
|
+
rescue StandardError
|
|
56
|
+
nil
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class ResolutionAdapter
|
|
5
|
+
class Route < ResolutionAdapter
|
|
6
|
+
attr_reader :routes
|
|
7
|
+
|
|
8
|
+
def initialize(routes = {})
|
|
9
|
+
super()
|
|
10
|
+
@routes = routes
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def add(from:, to:)
|
|
14
|
+
@routes[from] = to
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def resolve(reference)
|
|
18
|
+
return nil unless reference.ref_type == "urn"
|
|
19
|
+
return nil unless routes.key?(reference.source)
|
|
20
|
+
|
|
21
|
+
ConceptReference.new(
|
|
22
|
+
term: reference.term,
|
|
23
|
+
concept_id: reference.concept_id,
|
|
24
|
+
source: routes[reference.source],
|
|
25
|
+
ref_type: reference.ref_type,
|
|
26
|
+
)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def remap(source)
|
|
30
|
+
routes[source] || source
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class ResolutionAdapter
|
|
5
|
+
autoload :Local, "glossarist/resolution_adapter/local"
|
|
6
|
+
autoload :Package, "glossarist/resolution_adapter/package"
|
|
7
|
+
autoload :Route, "glossarist/resolution_adapter/route"
|
|
8
|
+
autoload :Remote, "glossarist/resolution_adapter/remote"
|
|
9
|
+
|
|
10
|
+
def resolve(_reference)
|
|
11
|
+
raise NotImplementedError, "#{self.class}#resolve must be implemented"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|