glossarist 2.8.6 → 2.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop_todo.yml +109 -20
  4. data/CLAUDE.md +34 -3
  5. data/Gemfile +1 -0
  6. data/lib/glossarist/citation.rb +5 -0
  7. data/lib/glossarist/cli/compare_command.rb +2 -2
  8. data/lib/glossarist/cli/export_command.rb +1 -3
  9. data/lib/glossarist/collection.rb +1 -1
  10. data/lib/glossarist/collections/bibliography_collection.rb +1 -1
  11. data/lib/glossarist/concept_data.rb +5 -0
  12. data/lib/glossarist/concept_reference.rb +7 -1
  13. data/lib/glossarist/concept_source.rb +2 -2
  14. data/lib/glossarist/concept_validator.rb +3 -1
  15. data/lib/glossarist/dataset_validator.rb +1 -1
  16. data/lib/glossarist/{error.rb → errors/base.rb} +3 -1
  17. data/lib/glossarist/errors/cache_version_mismatch_error.rb +12 -0
  18. data/lib/glossarist/errors/invalid_language_code_error.rb +19 -0
  19. data/lib/glossarist/errors/invalid_type_error.rb +8 -0
  20. data/lib/glossarist/errors/load_error.rb +22 -0
  21. data/lib/glossarist/errors/parse_error.rb +24 -0
  22. data/lib/glossarist/errors.rb +14 -0
  23. data/lib/glossarist/gcr_package.rb +4 -2
  24. data/lib/glossarist/glossary_store.rb +175 -1
  25. data/lib/glossarist/managed_concept.rb +16 -2
  26. data/lib/glossarist/managed_concept_collection.rb +52 -8
  27. data/lib/glossarist/reference_extractor.rb +22 -2
  28. data/lib/glossarist/reference_resolver.rb +38 -3
  29. data/lib/glossarist/resolution_adapter/bibliography.rb +22 -0
  30. data/lib/glossarist/resolution_adapter.rb +1 -0
  31. data/lib/glossarist/schema_migration/v0_to_v1.rb +200 -0
  32. data/lib/glossarist/schema_migration/v2_to_v3.rb +50 -0
  33. data/lib/glossarist/schema_migration.rb +10 -224
  34. data/lib/glossarist/sts/importer.rb +11 -12
  35. data/lib/glossarist/sts/term_extractor.rb +104 -6
  36. data/lib/glossarist/v3/citation.rb +2 -0
  37. data/lib/glossarist/v3/managed_concept_data.rb +1 -0
  38. data/lib/glossarist/validation/asset_index.rb +1 -1
  39. data/lib/glossarist/validation/rules/cite_ref_integrity_rule.rb +75 -0
  40. data/lib/glossarist/version.rb +1 -1
  41. data/lib/glossarist.rb +5 -13
  42. data/scripts/upgrade_dataset_to_v3.rb +1 -1
  43. metadata +13 -9
  44. data/lib/glossarist/concept_collector.rb +0 -231
  45. data/lib/glossarist/concept_manager.rb +0 -174
  46. data/lib/glossarist/error/cache_version_mismatch_error.rb +0 -8
  47. data/lib/glossarist/error/invalid_language_code_error.rb +0 -15
  48. data/lib/glossarist/error/invalid_type_error.rb +0 -4
  49. data/lib/glossarist/error/parse_error.rb +0 -16
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: glossarist
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.8.6
4
+ version: 2.8.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-06-08 00:00:00.000000000 Z
11
+ date: 2026-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: lutaml-model
@@ -199,14 +199,12 @@ files:
199
199
  - lib/glossarist/collections/typed_collection.rb
200
200
  - lib/glossarist/comparison_result.rb
201
201
  - lib/glossarist/concept.rb
202
- - lib/glossarist/concept_collector.rb
203
202
  - lib/glossarist/concept_comparator.rb
204
203
  - lib/glossarist/concept_data.rb
205
204
  - lib/glossarist/concept_date.rb
206
205
  - lib/glossarist/concept_diff.rb
207
206
  - lib/glossarist/concept_document.rb
208
207
  - lib/glossarist/concept_enricher.rb
209
- - lib/glossarist/concept_manager.rb
210
208
  - lib/glossarist/concept_ref.rb
211
209
  - lib/glossarist/concept_reference.rb
212
210
  - lib/glossarist/concept_set.rb
@@ -230,11 +228,13 @@ files:
230
228
  - lib/glossarist/designation/suffix.rb
231
229
  - lib/glossarist/designation/symbol.rb
232
230
  - lib/glossarist/detailed_definition.rb
233
- - lib/glossarist/error.rb
234
- - lib/glossarist/error/cache_version_mismatch_error.rb
235
- - lib/glossarist/error/invalid_language_code_error.rb
236
- - lib/glossarist/error/invalid_type_error.rb
237
- - lib/glossarist/error/parse_error.rb
231
+ - lib/glossarist/errors.rb
232
+ - lib/glossarist/errors/base.rb
233
+ - lib/glossarist/errors/cache_version_mismatch_error.rb
234
+ - lib/glossarist/errors/invalid_language_code_error.rb
235
+ - lib/glossarist/errors/invalid_type_error.rb
236
+ - lib/glossarist/errors/load_error.rb
237
+ - lib/glossarist/errors/parse_error.rb
238
238
  - lib/glossarist/gcr_metadata.rb
239
239
  - lib/glossarist/gcr_package.rb
240
240
  - lib/glossarist/gcr_package_definition.rb
@@ -281,11 +281,14 @@ files:
281
281
  - lib/glossarist/register_data.rb
282
282
  - lib/glossarist/related_concept.rb
283
283
  - lib/glossarist/resolution_adapter.rb
284
+ - lib/glossarist/resolution_adapter/bibliography.rb
284
285
  - lib/glossarist/resolution_adapter/local.rb
285
286
  - lib/glossarist/resolution_adapter/package.rb
286
287
  - lib/glossarist/resolution_adapter/remote.rb
287
288
  - lib/glossarist/resolution_adapter/route.rb
288
289
  - lib/glossarist/schema_migration.rb
290
+ - lib/glossarist/schema_migration/v0_to_v1.rb
291
+ - lib/glossarist/schema_migration/v2_to_v3.rb
289
292
  - lib/glossarist/section.rb
290
293
  - lib/glossarist/sts.rb
291
294
  - lib/glossarist/sts/extracted_designation.rb
@@ -343,6 +346,7 @@ files:
343
346
  - lib/glossarist/validation/rules/base.rb
344
347
  - lib/glossarist/validation/rules/bibliography_yaml_rule.rb
345
348
  - lib/glossarist/validation/rules/citation_completeness_rule.rb
349
+ - lib/glossarist/validation/rules/cite_ref_integrity_rule.rb
346
350
  - lib/glossarist/validation/rules/concept_context.rb
347
351
  - lib/glossarist/validation/rules/concept_count_rule.rb
348
352
  - lib/glossarist/validation/rules/concept_id_rule.rb
@@ -1,231 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Glossarist
4
- class ConceptCollector
5
- def self.collect(dir)
6
- dir = File.expand_path(dir)
7
- unless File.directory?(dir)
8
- raise ArgumentError, "#{dir} is not a directory"
9
- end
10
-
11
- if v2_concepts?(dir)
12
- collect_v2_concepts(dir)
13
- elsif managed_concepts?(dir)
14
- collect_managed_concepts(dir)
15
- elsif v1_concepts?(dir)
16
- collect_v1_concepts(dir)
17
- else
18
- []
19
- end
20
- end
21
-
22
- def self.each_concept(dir, &block)
23
- dir = File.expand_path(dir)
24
- unless File.directory?(dir)
25
- raise ArgumentError, "#{dir} is not a directory"
26
- end
27
- return enum_for(:each_concept, dir) unless block
28
-
29
- if v2_concepts?(dir)
30
- each_v2_concept(dir, &block)
31
- elsif managed_concepts?(dir)
32
- each_managed_concept(dir, &block)
33
- elsif v1_concepts?(dir)
34
- each_v1_concept(dir, &block)
35
- end
36
- end
37
-
38
- def self.count(dir)
39
- dir = File.expand_path(dir)
40
- return 0 unless File.directory?(dir)
41
-
42
- if managed_concepts?(dir)
43
- Dir.glob(File.join(dir, "concepts", "concept", "*.yaml")).length
44
- elsif v2_concepts?(dir)
45
- count_v2(dir)
46
- elsif v1_concepts?(dir)
47
- Dir.glob(File.join(dir, "concepts", "*.yaml")).length
48
- else
49
- 0
50
- end
51
- end
52
-
53
- class << self
54
- private
55
-
56
- def count_v2(dir)
57
- if v2_flat_concepts?(dir)
58
- Dir.glob(File.join(dir, "concepts", "*.yaml")).length
59
- else
60
- v2_dir = File.join(dir, "geolexica-v2")
61
- if File.directory?(File.join(v2_dir, "concepts"))
62
- Dir.glob(File.join(v2_dir, "concepts", "concept", "*.yaml")).length
63
- else
64
- Dir.glob(File.join(v2_dir, "*.yaml")).length
65
- end
66
- end
67
- end
68
-
69
- def v1_concepts?(dir)
70
- concepts_dir = File.join(dir, "concepts")
71
- File.directory?(concepts_dir) &&
72
- !v2_flat_concepts?(dir) &&
73
- !managed_concepts?(dir) &&
74
- Dir.glob(File.join(concepts_dir, "*.yaml")).any? do |f|
75
- V1::Concept.from_file(f)&.termid?
76
- end
77
- end
78
-
79
- def v2_concepts?(dir)
80
- File.directory?(File.join(dir, "geolexica-v2")) ||
81
- v2_flat_concepts?(dir)
82
- end
83
-
84
- def v2_flat_concepts?(dir)
85
- return false if managed_concepts?(dir)
86
-
87
- concepts_dir = File.join(dir, "concepts")
88
- return false unless File.directory?(concepts_dir)
89
-
90
- Dir.glob(File.join(concepts_dir, "*.yaml")).first(5).any? do |f|
91
- v2_flat_concept_file?(f)
92
- end
93
- end
94
-
95
- def v2_flat_concept_file?(path)
96
- raw = File.read(path, encoding: "utf-8")
97
- doc = ConceptDocument.from_yamls(raw)
98
- !!doc.concept&.data&.id
99
- rescue StandardError
100
- false
101
- end
102
-
103
- def managed_concepts?(dir)
104
- concept_dir = File.join(dir, "concepts", "concept")
105
- File.directory?(concept_dir) &&
106
- Dir.glob(File.join(concept_dir, "*.yaml")).any?
107
- end
108
-
109
- def collect_v1_concepts(dir)
110
- concepts = []
111
- each_v1_concept(dir) { |mc| concepts << mc }
112
- concepts
113
- end
114
-
115
- def each_v1_concept(dir)
116
- concepts_dir = File.join(dir, "concepts")
117
- files = Dir.glob(File.join(concepts_dir, "*.yaml"))
118
- files.each do |file|
119
- v1 = V1::Concept.from_file(file)
120
- next unless v1
121
-
122
- yield v1.to_managed_concept
123
- end
124
- end
125
-
126
- def collect_v2_concepts(dir)
127
- if v2_flat_concepts?(dir)
128
- collect_grouped_v2_concepts(File.join(dir, "concepts"))
129
- else
130
- v2_dir = File.join(dir, "geolexica-v2")
131
- if File.directory?(File.join(v2_dir, "concepts"))
132
- collect_managed_concepts(v2_dir)
133
- else
134
- collect_grouped_v2_concepts(v2_dir)
135
- end
136
- end
137
- end
138
-
139
- def each_v2_concept(dir, &)
140
- if v2_flat_concepts?(dir)
141
- each_grouped_v2_concepts(File.join(dir, "concepts"), &)
142
- else
143
- v2_dir = File.join(dir, "geolexica-v2")
144
- if File.directory?(File.join(v2_dir, "concepts"))
145
- each_managed_concept(v2_dir, &)
146
- else
147
- each_grouped_v2_concepts(v2_dir, &)
148
- end
149
- end
150
- end
151
-
152
- def each_grouped_v2_concepts(v2_dir, &)
153
- collection = ManagedConceptCollection.new
154
- manager = ConceptManager.new(path: v2_dir)
155
- manager.version = detect_schema_version(v2_dir)
156
- manager.load_from_files(collection: collection)
157
- collection.each(&)
158
- end
159
-
160
- def collect_grouped_v2_concepts(v2_dir)
161
- collection = ManagedConceptCollection.new
162
- manager = ConceptManager.new(path: v2_dir)
163
- manager.version = detect_schema_version(v2_dir)
164
- manager.load_from_files(collection: collection)
165
- collection.to_a
166
- end
167
-
168
- def collect_managed_concepts(dir) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
169
- concepts = []
170
- each_managed_concept(dir) { |mc| concepts << mc }
171
- concepts
172
- end
173
-
174
- def each_managed_concept(dir) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
175
- concepts_dir = File.join(dir, "concepts")
176
- concept_files = Dir.glob(File.join(concepts_dir, "concept", "*.yaml"))
177
- return if concept_files.empty?
178
-
179
- lc_dir = find_localized_concepts_dir(concepts_dir)
180
- lc_index = build_lc_index(lc_dir) if lc_dir
181
-
182
- concept_files.each do |f|
183
- mc = ManagedConcept.from_yaml(File.read(f))
184
- next unless mc.data&.id
185
-
186
- lc_map = mc.data.localized_concepts || {}
187
- lc_map.each_value do |uuid|
188
- lc_file = lc_index ? lc_index[uuid] : nil
189
- next unless lc_file
190
-
191
- l10n = LocalizedConcept.from_yaml(File.read(lc_file))
192
- mc.add_localization(l10n)
193
- rescue StandardError
194
- next
195
- end
196
-
197
- yield mc
198
- rescue StandardError
199
- next
200
- end
201
- end
202
-
203
- def build_lc_index(lc_dir)
204
- Dir.glob(File.join(lc_dir, "*.{yaml,yml}"))
205
- .to_h { |f| [File.basename(f, ".*"), f] }
206
- end
207
-
208
- def find_localized_concepts_dir(concepts_dir)
209
- %w[localized_concept localized-concept].each do |name|
210
- d = File.join(concepts_dir, name)
211
- return d if File.directory?(d)
212
- end
213
- nil
214
- end
215
-
216
- def detect_schema_version(dir)
217
- concepts_dir = File.join(dir, "concepts")
218
- search_dir = File.directory?(concepts_dir) ? concepts_dir : dir
219
- sample = Dir.glob(File.join(search_dir, "*.yaml")).first
220
- return "2" unless sample
221
-
222
- raw = File.read(sample, encoding: "utf-8")
223
- doc = ConceptDocument.from_yamls(raw)
224
- ver = doc.concept&.schema_version.to_s
225
- ver == "3" ? "3" : "2"
226
- rescue StandardError
227
- "2"
228
- end
229
- end
230
- end
231
- end
@@ -1,174 +0,0 @@
1
- module Glossarist
2
- class ConceptManager < Lutaml::Model::Serializable
3
- attribute :path, :string
4
- attribute :localized_concepts_path, :string
5
- attribute :version, :string, default: -> { "2" }
6
-
7
- key_value do
8
- map :path, to: :path
9
- map %i[localized_concepts_path localizedConceptsPath],
10
- to: :localized_concepts_path
11
- end
12
-
13
- def concept_document_class
14
- ConceptDocument.for_version(version)
15
- end
16
-
17
- def localized_concept_class
18
- if version.to_s == "2"
19
- V2::LocalizedConcept
20
- else
21
- LocalizedConcept
22
- end
23
- end
24
-
25
- def load_from_files(collection: nil)
26
- collection ||= ManagedConceptCollection.new
27
-
28
- Dir.glob(concepts_glob) do |filename|
29
- concepts = load_concept_from_file(filename)
30
-
31
- concepts.each do |concept|
32
- collection.store(concept)
33
- end
34
- rescue StandardError
35
- next
36
- end
37
- end
38
-
39
- def save_to_files(managed_concepts)
40
- managed_concepts.each do |concept|
41
- save_concept_to_file(concept)
42
- end
43
- end
44
-
45
- def save_grouped_concepts_to_files(managed_concepts)
46
- managed_concepts.each do |concept|
47
- save_grouped_concepts_to_file(concept)
48
- end
49
- end
50
-
51
- def load_concept_from_file(filename) # rubocop:disable Metrics/CyclomaticComplexity
52
- raw = File.read(filename, encoding: "utf-8")
53
- doc = concept_document_class.from_yamls(raw)
54
- concept = doc.concept
55
- unless concept
56
- raise Glossarist::ParseError.new(filename: filename)
57
- end
58
-
59
- concept_uuid = concept.identifier || concept.data&.id || File.basename(
60
- filename, ".*"
61
- )
62
- concept.uuid = concept_uuid
63
-
64
- concept.data.localized_concepts.each_value do |id|
65
- localized_concept = load_localized_concept(id, doc.localizations)
66
- concept.add_l10n(localized_concept)
67
- end
68
-
69
- [concept]
70
- rescue Psych::SyntaxError => e
71
- raise Glossarist::ParseError.new(filename: filename, line: e.line)
72
- end
73
-
74
- def load_localized_concept(id, inline_localizations = nil)
75
- if inline_localizations
76
- l10n = inline_localizations.find { |l| l.id == id }
77
- if l10n
78
- l10n.uuid = id
79
- return l10n
80
- end
81
- end
82
-
83
- l10n = localized_concept_class.from_yaml(
84
- File.read(localized_concept_path(id), encoding: "utf-8"),
85
- )
86
- l10n.uuid = id
87
- l10n
88
- rescue Psych::SyntaxError => e
89
- raise Glossarist::ParseError.new(filename: filename, line: e.line)
90
- end
91
-
92
- def save_concept_to_file(concept)
93
- @localized_concepts_path ||= "localized_concept"
94
- concept_dir = File.join(path, "concept")
95
-
96
- localized_concept_dir = File.join(path, @localized_concepts_path)
97
-
98
- FileUtils.mkdir_p(concept_dir)
99
- FileUtils.mkdir_p(localized_concept_dir)
100
-
101
- filename = File.join(concept_dir, "#{concept.uuid}.yaml")
102
- File.write(filename, concept.to_yaml, encoding: "utf-8")
103
-
104
- concept.localized_concepts.each do |lang, uuid|
105
- filename = File.join(localized_concept_dir, "#{uuid}.yaml")
106
- File.write(filename, concept.localization(lang).to_yaml,
107
- encoding: "utf-8")
108
- end
109
- end
110
-
111
- def save_grouped_concepts_to_file(concept)
112
- @localized_concepts_path ||= "localized_concept"
113
- concept_dir = File.join(path)
114
-
115
- FileUtils.mkdir_p(concept_dir)
116
-
117
- content = []
118
-
119
- filename = File.join(concept_dir, "#{concept.uuid}.yaml")
120
- content << concept.to_yaml
121
-
122
- concept.localized_concepts.each_key do |lang|
123
- content << concept.localization(lang).to_yaml
124
- end
125
-
126
- File.write(filename, content.join("\n"), encoding: "utf-8")
127
- end
128
-
129
- def concepts_glob
130
- return path if File.file?(path)
131
-
132
- if v1_collection?
133
- File.join(path, "concept-*.{yaml,yml}")
134
- else
135
- candidates = [
136
- File.join(path, "concept", "*.{yaml,yml}"),
137
- File.join(path, "concepts", "*.{yaml,yml}"),
138
- File.join(path, "*.{yaml,yml}"),
139
- ]
140
- candidates.find { |g| !Dir.glob(g).empty? }
141
- end
142
- end
143
-
144
- def localized_concept_path(id)
145
- localized_concept_possible_dir = {
146
- "localized_concept" => File.join(
147
- path,
148
- "localized_concept",
149
- "#{id}.{yaml,yml}",
150
- ),
151
-
152
- "localized-concept" => File.join(
153
- path,
154
- "localized-concept",
155
- "#{id}.{yaml,yml}",
156
- ),
157
- }
158
-
159
- localized_concept_possible_dir.each do |dir_name, file_path|
160
- actual_path = Dir.glob(file_path)&.first
161
-
162
- if actual_path
163
- @localized_concepts_path = dir_name
164
- return actual_path
165
- end
166
- end
167
- end
168
-
169
- def v1_collection?
170
- @v1_collection ||= !Dir.glob(File.join(path,
171
- "concept-*.{yaml,yml}")).empty?
172
- end
173
- end
174
- end
@@ -1,8 +0,0 @@
1
- module Glossarist
2
- class CacheVersionMismatchError < Error
3
- def initialize(cache_dir, expected, actual)
4
- super("Relaton cache version mismatch in '#{cache_dir}': " \
5
- "expected '#{expected}', got '#{actual}'")
6
- end
7
- end
8
- end
@@ -1,15 +0,0 @@
1
- module Glossarist
2
- class InvalidLanguageCodeError < Error
3
- attr_reader :code
4
-
5
- def initialize(code:)
6
- @code = code
7
-
8
- super()
9
- end
10
-
11
- def to_s
12
- "Invalid value for language_code: `#{code}`. It must be 3 characters long string."
13
- end
14
- end
15
- end
@@ -1,4 +0,0 @@
1
- module Glossarist
2
- class InvalidTypeError < Error
3
- end
4
- end
@@ -1,16 +0,0 @@
1
- module Glossarist
2
- class ParseError < Error
3
- attr_accessor :line, :filename
4
-
5
- def initialize(filename:, line: nil)
6
- @filename = filename
7
- @line = line
8
-
9
- super()
10
- end
11
-
12
- def to_s
13
- "Unable to parse file: #{filename}, error on line: #{line}"
14
- end
15
- end
16
- end