glossarist 2.4.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop_todo.yml +50 -146
  4. data/CLAUDE.md +85 -0
  5. data/Gemfile +26 -5
  6. data/README.adoc +383 -7
  7. data/TODO.integration/01-gcr-package-cli.md +180 -0
  8. data/exe/glossarist +1 -53
  9. data/glossarist.gemspec +3 -2
  10. data/lib/glossarist/asset.rb +1 -1
  11. data/lib/glossarist/citation.rb +1 -1
  12. data/lib/glossarist/cli/package_command.rb +32 -0
  13. data/lib/glossarist/cli/upgrade_command.rb +34 -0
  14. data/lib/glossarist/cli/validate_command.rb +56 -0
  15. data/lib/glossarist/cli.rb +105 -0
  16. data/lib/glossarist/collection_config.rb +23 -0
  17. data/lib/glossarist/collections/concept_source_collection.rb +9 -0
  18. data/lib/glossarist/collections/detailed_definition_collection.rb +18 -0
  19. data/lib/glossarist/collections/localization_collection.rb +37 -0
  20. data/lib/glossarist/collections/typed_collection.rb +26 -0
  21. data/lib/glossarist/collections.rb +21 -4
  22. data/lib/glossarist/concept.rb +1 -1
  23. data/lib/glossarist/concept_collector.rb +153 -0
  24. data/lib/glossarist/concept_data.rb +15 -8
  25. data/lib/glossarist/concept_date.rb +1 -1
  26. data/lib/glossarist/concept_document.rb +29 -0
  27. data/lib/glossarist/concept_enricher.rb +34 -0
  28. data/lib/glossarist/concept_manager.rb +31 -49
  29. data/lib/glossarist/concept_reference.rb +45 -0
  30. data/lib/glossarist/concept_source.rb +1 -1
  31. data/lib/glossarist/concept_validator.rb +114 -0
  32. data/lib/glossarist/custom_locality.rb +1 -1
  33. data/lib/glossarist/dataset_validator.rb +69 -0
  34. data/lib/glossarist/designation/abbreviation.rb +1 -1
  35. data/lib/glossarist/designation/base.rb +11 -4
  36. data/lib/glossarist/designation/expression.rb +1 -1
  37. data/lib/glossarist/designation/grammar_info.rb +1 -1
  38. data/lib/glossarist/designation/graphical_symbol.rb +1 -1
  39. data/lib/glossarist/designation/letter_symbol.rb +1 -1
  40. data/lib/glossarist/designation/symbol.rb +2 -2
  41. data/lib/glossarist/designation.rb +8 -11
  42. data/lib/glossarist/detailed_definition.rb +1 -1
  43. data/lib/glossarist/error.rb +2 -5
  44. data/lib/glossarist/gcr_metadata.rb +87 -0
  45. data/lib/glossarist/gcr_package.rb +223 -0
  46. data/lib/glossarist/gcr_statistics.rb +35 -0
  47. data/lib/glossarist/gcr_validator.rb +98 -0
  48. data/lib/glossarist/locality.rb +1 -1
  49. data/lib/glossarist/localized_concept.rb +12 -1
  50. data/lib/glossarist/managed_concept.rb +1 -1
  51. data/lib/glossarist/managed_concept_data.rb +8 -5
  52. data/lib/glossarist/non_verb_rep.rb +1 -1
  53. data/lib/glossarist/reference_extractor.rb +227 -0
  54. data/lib/glossarist/reference_resolver.rb +169 -0
  55. data/lib/glossarist/register_data.rb +39 -0
  56. data/lib/glossarist/related_concept.rb +1 -1
  57. data/lib/glossarist/resolution_adapter/local.rb +73 -0
  58. data/lib/glossarist/resolution_adapter/package.rb +22 -0
  59. data/lib/glossarist/resolution_adapter/remote.rb +60 -0
  60. data/lib/glossarist/resolution_adapter/route.rb +34 -0
  61. data/lib/glossarist/resolution_adapter.rb +14 -0
  62. data/lib/glossarist/schema_migration.rb +334 -0
  63. data/lib/glossarist/urn_resolver.rb +71 -0
  64. data/lib/glossarist/utilities.rb +6 -2
  65. data/lib/glossarist/v1/concept.rb +81 -0
  66. data/lib/glossarist/v1/cross_references.rb +41 -0
  67. data/lib/glossarist/v1/register.rb +50 -0
  68. data/lib/glossarist/v1.rb +9 -0
  69. data/lib/glossarist/validation_result.rb +38 -0
  70. data/lib/glossarist/version.rb +1 -1
  71. data/lib/glossarist.rb +54 -24
  72. metadata +62 -6
@@ -0,0 +1,169 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ReferenceResolver
5
+ def initialize
6
+ @local_adapter = nil
7
+ @package_adapters = []
8
+ @route_adapter = ResolutionAdapter::Route.new
9
+ @remote_adapters = []
10
+ end
11
+
12
+ def register_self(concepts)
13
+ @local_adapter = ResolutionAdapter::Local.new(concepts)
14
+ end
15
+
16
+ def register_package(package_or_concepts, uri_prefix: nil)
17
+ concepts = extract_concepts(package_or_concepts)
18
+ prefix = uri_prefix || infer_uri_prefix(package_or_concepts)
19
+ raise ArgumentError, "uri_prefix required" unless prefix
20
+
21
+ @package_adapters << ResolutionAdapter::Package.new(concepts,
22
+ uri_prefix: prefix)
23
+ end
24
+
25
+ def add_route(from:, to:)
26
+ @route_adapter.add(from: from, to: to)
27
+ end
28
+
29
+ def register_remote(uri_prefix:, endpoint:)
30
+ @remote_adapters << ResolutionAdapter::Remote.new(uri_prefix: uri_prefix,
31
+ endpoint: endpoint)
32
+ end
33
+
34
+ def resolve(reference)
35
+ if reference.local?
36
+ return @local_adapter&.resolve(reference)
37
+ end
38
+
39
+ routed_ref = apply_routes(reference)
40
+
41
+ @package_adapters.each do |adapter|
42
+ result = adapter.resolve(routed_ref)
43
+ return result if result
44
+ end
45
+
46
+ @remote_adapters.each do |adapter|
47
+ result = adapter.resolve(routed_ref)
48
+ return result if result
49
+ end
50
+
51
+ nil
52
+ end
53
+
54
+ def resolve_all(concept, extractor: nil)
55
+ extractor ||= ReferenceExtractor.new
56
+ refs = extract_refs(concept, extractor)
57
+ refs.map { |ref| [ref, resolve(ref)] }
58
+ end
59
+
60
+ def validate_all(package_or_concepts, extractor: nil, mode: :multi)
61
+ concepts = extract_concepts(package_or_concepts)
62
+ extractor ||= ReferenceExtractor.new
63
+ result = ValidationResult.new
64
+
65
+ concepts.each do |concept|
66
+ refs = extract_refs(concept, extractor)
67
+ termid = extract_termid(concept)
68
+
69
+ refs.each do |ref|
70
+ resolved = resolve(ref)
71
+ if resolved.nil?
72
+ scope = ref.local? ? "intra-set" : "inter-set (#{ref.source})"
73
+ result.add_warning("#{termid}: Unresolvable #{scope} reference: #{ref.term} -> #{ref.concept_id}")
74
+ end
75
+ end
76
+
77
+ if mode == :single && !@local_adapter
78
+ external_refs = refs.select(&:external?)
79
+ if external_refs.any?
80
+ result.add_warning("#{termid}: #{external_refs.size} external reference(s) not checked in single mode")
81
+ end
82
+ end
83
+ end
84
+
85
+ result
86
+ end
87
+
88
+ def load_collection(collection_dir)
89
+ config_path = File.join(collection_dir, "collection.yaml")
90
+ if File.exist?(config_path)
91
+ load_collection_config(config_path, collection_dir)
92
+ else
93
+ load_gcr_directory(collection_dir)
94
+ end
95
+ end
96
+
97
+ def registered_datasets
98
+ @package_adapters.map(&:uri_prefix)
99
+ end
100
+
101
+ private
102
+
103
+ def apply_routes(reference)
104
+ routed = @route_adapter.resolve(reference)
105
+ routed || reference
106
+ end
107
+
108
+ def extract_concepts(package_or_concepts)
109
+ case package_or_concepts
110
+ when GcrPackage then package_or_concepts.concepts
111
+ when Array then package_or_concepts
112
+ when Hash then [package_or_concepts]
113
+ else raise ArgumentError, "Expected GcrPackage, Array, or Hash"
114
+ end
115
+ end
116
+
117
+ def infer_uri_prefix(package_or_concepts)
118
+ case package_or_concepts
119
+ when GcrPackage then package_or_concepts.metadata&.dig("uri_prefix")
120
+ end
121
+ end
122
+
123
+ def load_collection_config(config_path, collection_dir)
124
+ config = CollectionConfig.from_file(config_path)
125
+
126
+ config.packages.each do |pkg|
127
+ gcr_path = File.join(collection_dir, pkg["file"])
128
+ next unless File.exist?(gcr_path)
129
+
130
+ gcr = GcrPackage.load(gcr_path)
131
+ prefix = pkg["uri_prefix"] || gcr.metadata&.dig("uri_prefix")
132
+ register_package(gcr, uri_prefix: prefix)
133
+ end
134
+
135
+ config.routes.each do |route|
136
+ add_route(from: route["from"], to: route["to"])
137
+ end
138
+
139
+ config.remotes.each do |remote|
140
+ register_remote(uri_prefix: remote["uri_prefix"],
141
+ endpoint: remote["endpoint"])
142
+ end
143
+ end
144
+
145
+ def load_gcr_directory(dir)
146
+ Dir.glob(File.join(dir, "*.gcr")).each do |gcr_path|
147
+ pkg = GcrPackage.load(gcr_path)
148
+ prefix = pkg.metadata&.dig("uri_prefix")
149
+ register_package(pkg, uri_prefix: prefix)
150
+ end
151
+ end
152
+
153
+ def extract_refs(concept, extractor)
154
+ if concept.is_a?(ManagedConcept)
155
+ extractor.extract_from_managed_concept(concept)
156
+ else
157
+ extractor.extract_from_concept_hash(concept)
158
+ end
159
+ end
160
+
161
+ def extract_termid(concept)
162
+ if concept.is_a?(ManagedConcept)
163
+ concept.data.id&.to_s
164
+ else
165
+ (concept["termid"] || concept["id"])&.to_s
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class RegisterData < Lutaml::Model::Serializable
5
+ attribute :data, :hash, default: -> { {} }
6
+
7
+ key_value do
8
+ map nil, to: :data, with: { from: :data_from, to: :data_to }
9
+ end
10
+
11
+ def self.from_file(path)
12
+ from_yaml(File.read(path))
13
+ rescue Errno::ENOENT
14
+ nil
15
+ end
16
+
17
+ def [](key)
18
+ data[key]
19
+ end
20
+
21
+ def dig(*keys)
22
+ data.dig(*keys)
23
+ end
24
+
25
+ def to_h
26
+ data
27
+ end
28
+
29
+ def data_from(model, value)
30
+ model.data = value
31
+ end
32
+
33
+ def data_to(model, doc)
34
+ model.data.each do |key, value|
35
+ doc[key] = value
36
+ end
37
+ end
38
+ end
39
+ end
@@ -7,7 +7,7 @@ module Glossarist
7
7
  values: Glossarist::GlossaryDefinition::RELATED_CONCEPT_TYPES
8
8
  attribute :ref, Citation
9
9
 
10
- yaml do
10
+ key_value do
11
11
  map :content, to: :content
12
12
  map :type, to: :type
13
13
  map :ref, with: { from: :ref_from_yaml, to: :ref_to_yaml }
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ResolutionAdapter
5
+ class Local < ResolutionAdapter
6
+ attr_reader :index, :concepts
7
+
8
+ def initialize(concepts)
9
+ super()
10
+ @concepts = concepts
11
+ @index = {}
12
+ build_index
13
+ end
14
+
15
+ def resolve(reference)
16
+ case reference.ref_type
17
+ when "local"
18
+ resolve_by_id(reference.concept_id)
19
+ when "designation"
20
+ resolve_by_designation(reference.term)
21
+ else
22
+ resolve_by_id(reference.concept_id) if reference.concept_id
23
+ end
24
+ end
25
+
26
+ def resolve_by_id(concept_id)
27
+ @index[concept_id]
28
+ end
29
+
30
+ def resolve_by_designation(term)
31
+ return nil unless term
32
+
33
+ downcased = term.downcase
34
+ concepts.find do |concept|
35
+ designations_for(concept).any? { |d| d&.downcase == downcased }
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def designations_for(concept)
42
+ if concept.is_a?(ManagedConcept)
43
+ concept.localizations.flat_map do |l10n|
44
+ l10n.data.terms.filter_map do |t|
45
+ t.respond_to?(:designation) ? t.designation : nil
46
+ end
47
+ end
48
+ else
49
+ concept.each_value.flat_map do |lang_block|
50
+ next [] unless lang_block.is_a?(Hash) && lang_block.key?("terms")
51
+
52
+ Array(lang_block["terms"]).filter_map { |t| t["designation"] }
53
+ end
54
+ end
55
+ end
56
+
57
+ def build_index
58
+ concepts.each do |concept|
59
+ termid = extract_termid(concept)
60
+ @index[termid] = concept if termid
61
+ end
62
+ end
63
+
64
+ def extract_termid(concept)
65
+ if concept.is_a?(ManagedConcept)
66
+ concept.data.id&.to_s
67
+ else
68
+ (concept["termid"] || concept["id"])&.to_s
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ResolutionAdapter
5
+ class Package < ResolutionAdapter
6
+ attr_reader :uri_prefix, :local_adapter
7
+
8
+ def initialize(concepts, uri_prefix:)
9
+ super()
10
+ @uri_prefix = uri_prefix
11
+ @local_adapter = Local.new(concepts)
12
+ end
13
+
14
+ def resolve(reference)
15
+ return nil unless reference.ref_type == "urn"
16
+ return nil unless reference.source == uri_prefix
17
+
18
+ @local_adapter.resolve_by_id(reference.concept_id)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+
6
+ module Glossarist
7
+ class ResolutionAdapter
8
+ class Remote < ResolutionAdapter
9
+ attr_reader :uri_prefix, :endpoint, :cache
10
+
11
+ def initialize(uri_prefix:, endpoint:)
12
+ super()
13
+ @uri_prefix = uri_prefix
14
+ @endpoint = endpoint.chomp("/")
15
+ @cache = {}
16
+ end
17
+
18
+ def resolve(reference)
19
+ return nil unless reference.ref_type == "urn"
20
+ return nil unless reference.source == uri_prefix
21
+
22
+ key = cache_key(reference)
23
+ return @cache[key] if @cache.key?(key)
24
+
25
+ @cache[key] = fetch(reference)
26
+ end
27
+
28
+ private
29
+
30
+ def build_uri(reference)
31
+ URI.parse("#{endpoint}/#{URI.encode_www_form_component(reference.source)}/#{URI.encode_www_form_component(reference.concept_id)}")
32
+ end
33
+
34
+ def parse_response(response)
35
+ content_type = response["content-type"].to_s
36
+ if content_type.include?("json")
37
+ JSON.parse(response.body)
38
+ elsif content_type.include?("yaml")
39
+ ConceptDocument.from_yamls(response.body).to_managed_concept
40
+ else
41
+ ManagedConcept.from_yaml(response.body)
42
+ end
43
+ end
44
+
45
+ def cache_key(reference)
46
+ "#{reference.source}/#{reference.concept_id}"
47
+ end
48
+
49
+ def fetch(reference)
50
+ uri = build_uri(reference)
51
+ response = Net::HTTP.get_response(uri)
52
+ return nil unless response.is_a?(Net::HTTPSuccess)
53
+
54
+ parse_response(response)
55
+ rescue StandardError
56
+ nil
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ResolutionAdapter
5
+ class Route < ResolutionAdapter
6
+ attr_reader :routes
7
+
8
+ def initialize(routes = {})
9
+ super()
10
+ @routes = routes
11
+ end
12
+
13
+ def add(from:, to:)
14
+ @routes[from] = to
15
+ end
16
+
17
+ def resolve(reference)
18
+ return nil unless reference.ref_type == "urn"
19
+ return nil unless routes.key?(reference.source)
20
+
21
+ ConceptReference.new(
22
+ term: reference.term,
23
+ concept_id: reference.concept_id,
24
+ source: routes[reference.source],
25
+ ref_type: reference.ref_type,
26
+ )
27
+ end
28
+
29
+ def remap(source)
30
+ routes[source] || source
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ResolutionAdapter
5
+ autoload :Local, "glossarist/resolution_adapter/local"
6
+ autoload :Package, "glossarist/resolution_adapter/package"
7
+ autoload :Route, "glossarist/resolution_adapter/route"
8
+ autoload :Remote, "glossarist/resolution_adapter/remote"
9
+
10
+ def resolve(_reference)
11
+ raise NotImplementedError, "#{self.class}#resolve must be implemented"
12
+ end
13
+ end
14
+ end