glossarist 2.8.7 → 2.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop_todo.yml +128 -11
  4. data/CLAUDE.md +34 -3
  5. data/Gemfile +1 -0
  6. data/lib/glossarist/cli/compare_command.rb +2 -2
  7. data/lib/glossarist/cli/export_command.rb +1 -3
  8. data/lib/glossarist/collection.rb +1 -1
  9. data/lib/glossarist/collections/bibliography_collection.rb +1 -1
  10. data/lib/glossarist/concept_data.rb +1 -0
  11. data/lib/glossarist/concept_reference.rb +7 -1
  12. data/lib/glossarist/concept_source.rb +2 -2
  13. data/lib/glossarist/concept_validator.rb +3 -1
  14. data/lib/glossarist/dataset_validator.rb +1 -1
  15. data/lib/glossarist/{error.rb → errors/base.rb} +3 -1
  16. data/lib/glossarist/errors/cache_version_mismatch_error.rb +12 -0
  17. data/lib/glossarist/errors/invalid_language_code_error.rb +19 -0
  18. data/lib/glossarist/errors/invalid_type_error.rb +8 -0
  19. data/lib/glossarist/errors/load_error.rb +22 -0
  20. data/lib/glossarist/errors/parse_error.rb +24 -0
  21. data/lib/glossarist/errors.rb +14 -0
  22. data/lib/glossarist/gcr_package.rb +4 -2
  23. data/lib/glossarist/glossary_store.rb +175 -1
  24. data/lib/glossarist/managed_concept.rb +16 -2
  25. data/lib/glossarist/managed_concept_collection.rb +52 -8
  26. data/lib/glossarist/reference_extractor.rb +22 -2
  27. data/lib/glossarist/reference_resolver.rb +38 -3
  28. data/lib/glossarist/resolution_adapter/bibliography.rb +22 -0
  29. data/lib/glossarist/resolution_adapter.rb +1 -0
  30. data/lib/glossarist/schema_migration/v0_to_v1.rb +200 -0
  31. data/lib/glossarist/schema_migration/v2_to_v3.rb +50 -0
  32. data/lib/glossarist/schema_migration.rb +10 -224
  33. data/lib/glossarist/sts/importer.rb +11 -12
  34. data/lib/glossarist/sts/term_extractor.rb +104 -6
  35. data/lib/glossarist/validation/asset_index.rb +1 -1
  36. data/lib/glossarist/validation/rules/cite_ref_integrity_rule.rb +75 -0
  37. data/lib/glossarist/version.rb +1 -1
  38. data/lib/glossarist.rb +5 -13
  39. data/scripts/upgrade_dataset_to_v3.rb +1 -1
  40. metadata +13 -9
  41. data/lib/glossarist/concept_collector.rb +0 -231
  42. data/lib/glossarist/concept_manager.rb +0 -183
  43. data/lib/glossarist/error/cache_version_mismatch_error.rb +0 -8
  44. data/lib/glossarist/error/invalid_language_code_error.rb +0 -15
  45. data/lib/glossarist/error/invalid_type_error.rb +0 -4
  46. data/lib/glossarist/error/parse_error.rb +0 -16
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: glossarist
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.8.7
4
+ version: 2.8.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-06-09 00:00:00.000000000 Z
11
+ date: 2026-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: lutaml-model
@@ -199,14 +199,12 @@ files:
199
199
  - lib/glossarist/collections/typed_collection.rb
200
200
  - lib/glossarist/comparison_result.rb
201
201
  - lib/glossarist/concept.rb
202
- - lib/glossarist/concept_collector.rb
203
202
  - lib/glossarist/concept_comparator.rb
204
203
  - lib/glossarist/concept_data.rb
205
204
  - lib/glossarist/concept_date.rb
206
205
  - lib/glossarist/concept_diff.rb
207
206
  - lib/glossarist/concept_document.rb
208
207
  - lib/glossarist/concept_enricher.rb
209
- - lib/glossarist/concept_manager.rb
210
208
  - lib/glossarist/concept_ref.rb
211
209
  - lib/glossarist/concept_reference.rb
212
210
  - lib/glossarist/concept_set.rb
@@ -230,11 +228,13 @@ files:
230
228
  - lib/glossarist/designation/suffix.rb
231
229
  - lib/glossarist/designation/symbol.rb
232
230
  - lib/glossarist/detailed_definition.rb
233
- - lib/glossarist/error.rb
234
- - lib/glossarist/error/cache_version_mismatch_error.rb
235
- - lib/glossarist/error/invalid_language_code_error.rb
236
- - lib/glossarist/error/invalid_type_error.rb
237
- - lib/glossarist/error/parse_error.rb
231
+ - lib/glossarist/errors.rb
232
+ - lib/glossarist/errors/base.rb
233
+ - lib/glossarist/errors/cache_version_mismatch_error.rb
234
+ - lib/glossarist/errors/invalid_language_code_error.rb
235
+ - lib/glossarist/errors/invalid_type_error.rb
236
+ - lib/glossarist/errors/load_error.rb
237
+ - lib/glossarist/errors/parse_error.rb
238
238
  - lib/glossarist/gcr_metadata.rb
239
239
  - lib/glossarist/gcr_package.rb
240
240
  - lib/glossarist/gcr_package_definition.rb
@@ -281,11 +281,14 @@ files:
281
281
  - lib/glossarist/register_data.rb
282
282
  - lib/glossarist/related_concept.rb
283
283
  - lib/glossarist/resolution_adapter.rb
284
+ - lib/glossarist/resolution_adapter/bibliography.rb
284
285
  - lib/glossarist/resolution_adapter/local.rb
285
286
  - lib/glossarist/resolution_adapter/package.rb
286
287
  - lib/glossarist/resolution_adapter/remote.rb
287
288
  - lib/glossarist/resolution_adapter/route.rb
288
289
  - lib/glossarist/schema_migration.rb
290
+ - lib/glossarist/schema_migration/v0_to_v1.rb
291
+ - lib/glossarist/schema_migration/v2_to_v3.rb
289
292
  - lib/glossarist/section.rb
290
293
  - lib/glossarist/sts.rb
291
294
  - lib/glossarist/sts/extracted_designation.rb
@@ -343,6 +346,7 @@ files:
343
346
  - lib/glossarist/validation/rules/base.rb
344
347
  - lib/glossarist/validation/rules/bibliography_yaml_rule.rb
345
348
  - lib/glossarist/validation/rules/citation_completeness_rule.rb
349
+ - lib/glossarist/validation/rules/cite_ref_integrity_rule.rb
346
350
  - lib/glossarist/validation/rules/concept_context.rb
347
351
  - lib/glossarist/validation/rules/concept_count_rule.rb
348
352
  - lib/glossarist/validation/rules/concept_id_rule.rb
@@ -1,231 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Glossarist
4
- class ConceptCollector
5
- def self.collect(dir)
6
- dir = File.expand_path(dir)
7
- unless File.directory?(dir)
8
- raise ArgumentError, "#{dir} is not a directory"
9
- end
10
-
11
- if v2_concepts?(dir)
12
- collect_v2_concepts(dir)
13
- elsif managed_concepts?(dir)
14
- collect_managed_concepts(dir)
15
- elsif v1_concepts?(dir)
16
- collect_v1_concepts(dir)
17
- else
18
- []
19
- end
20
- end
21
-
22
- def self.each_concept(dir, &block)
23
- dir = File.expand_path(dir)
24
- unless File.directory?(dir)
25
- raise ArgumentError, "#{dir} is not a directory"
26
- end
27
- return enum_for(:each_concept, dir) unless block
28
-
29
- if v2_concepts?(dir)
30
- each_v2_concept(dir, &block)
31
- elsif managed_concepts?(dir)
32
- each_managed_concept(dir, &block)
33
- elsif v1_concepts?(dir)
34
- each_v1_concept(dir, &block)
35
- end
36
- end
37
-
38
- def self.count(dir)
39
- dir = File.expand_path(dir)
40
- return 0 unless File.directory?(dir)
41
-
42
- if managed_concepts?(dir)
43
- Dir.glob(File.join(dir, "concepts", "concept", "*.yaml")).length
44
- elsif v2_concepts?(dir)
45
- count_v2(dir)
46
- elsif v1_concepts?(dir)
47
- Dir.glob(File.join(dir, "concepts", "*.yaml")).length
48
- else
49
- 0
50
- end
51
- end
52
-
53
- class << self
54
- private
55
-
56
- def count_v2(dir)
57
- if v2_flat_concepts?(dir)
58
- Dir.glob(File.join(dir, "concepts", "*.yaml")).length
59
- else
60
- v2_dir = File.join(dir, "geolexica-v2")
61
- if File.directory?(File.join(v2_dir, "concepts"))
62
- Dir.glob(File.join(v2_dir, "concepts", "concept", "*.yaml")).length
63
- else
64
- Dir.glob(File.join(v2_dir, "*.yaml")).length
65
- end
66
- end
67
- end
68
-
69
- def v1_concepts?(dir)
70
- concepts_dir = File.join(dir, "concepts")
71
- File.directory?(concepts_dir) &&
72
- !v2_flat_concepts?(dir) &&
73
- !managed_concepts?(dir) &&
74
- Dir.glob(File.join(concepts_dir, "*.yaml")).any? do |f|
75
- V1::Concept.from_file(f)&.termid?
76
- end
77
- end
78
-
79
- def v2_concepts?(dir)
80
- File.directory?(File.join(dir, "geolexica-v2")) ||
81
- v2_flat_concepts?(dir)
82
- end
83
-
84
- def v2_flat_concepts?(dir)
85
- return false if managed_concepts?(dir)
86
-
87
- concepts_dir = File.join(dir, "concepts")
88
- return false unless File.directory?(concepts_dir)
89
-
90
- Dir.glob(File.join(concepts_dir, "*.yaml")).first(5).any? do |f|
91
- v2_flat_concept_file?(f)
92
- end
93
- end
94
-
95
- def v2_flat_concept_file?(path)
96
- raw = File.read(path, encoding: "utf-8")
97
- doc = ConceptDocument.from_yamls(raw)
98
- !!doc.concept&.data&.id
99
- rescue StandardError
100
- false
101
- end
102
-
103
- def managed_concepts?(dir)
104
- concept_dir = File.join(dir, "concepts", "concept")
105
- File.directory?(concept_dir) &&
106
- Dir.glob(File.join(concept_dir, "*.yaml")).any?
107
- end
108
-
109
- def collect_v1_concepts(dir)
110
- concepts = []
111
- each_v1_concept(dir) { |mc| concepts << mc }
112
- concepts
113
- end
114
-
115
- def each_v1_concept(dir)
116
- concepts_dir = File.join(dir, "concepts")
117
- files = Dir.glob(File.join(concepts_dir, "*.yaml"))
118
- files.each do |file|
119
- v1 = V1::Concept.from_file(file)
120
- next unless v1
121
-
122
- yield v1.to_managed_concept
123
- end
124
- end
125
-
126
- def collect_v2_concepts(dir)
127
- if v2_flat_concepts?(dir)
128
- collect_grouped_v2_concepts(File.join(dir, "concepts"))
129
- else
130
- v2_dir = File.join(dir, "geolexica-v2")
131
- if File.directory?(File.join(v2_dir, "concepts"))
132
- collect_managed_concepts(v2_dir)
133
- else
134
- collect_grouped_v2_concepts(v2_dir)
135
- end
136
- end
137
- end
138
-
139
- def each_v2_concept(dir, &)
140
- if v2_flat_concepts?(dir)
141
- each_grouped_v2_concepts(File.join(dir, "concepts"), &)
142
- else
143
- v2_dir = File.join(dir, "geolexica-v2")
144
- if File.directory?(File.join(v2_dir, "concepts"))
145
- each_managed_concept(v2_dir, &)
146
- else
147
- each_grouped_v2_concepts(v2_dir, &)
148
- end
149
- end
150
- end
151
-
152
- def each_grouped_v2_concepts(v2_dir, &)
153
- collection = ManagedConceptCollection.new
154
- manager = ConceptManager.new(path: v2_dir)
155
- manager.version = detect_schema_version(v2_dir)
156
- manager.load_from_files(collection: collection)
157
- collection.each(&)
158
- end
159
-
160
- def collect_grouped_v2_concepts(v2_dir)
161
- collection = ManagedConceptCollection.new
162
- manager = ConceptManager.new(path: v2_dir)
163
- manager.version = detect_schema_version(v2_dir)
164
- manager.load_from_files(collection: collection)
165
- collection.to_a
166
- end
167
-
168
- def collect_managed_concepts(dir) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
169
- concepts = []
170
- each_managed_concept(dir) { |mc| concepts << mc }
171
- concepts
172
- end
173
-
174
- def each_managed_concept(dir) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
175
- concepts_dir = File.join(dir, "concepts")
176
- concept_files = Dir.glob(File.join(concepts_dir, "concept", "*.yaml"))
177
- return if concept_files.empty?
178
-
179
- lc_dir = find_localized_concepts_dir(concepts_dir)
180
- lc_index = build_lc_index(lc_dir) if lc_dir
181
-
182
- concept_files.each do |f|
183
- mc = ManagedConcept.from_yaml(File.read(f))
184
- next unless mc.data&.id
185
-
186
- lc_map = mc.data.localized_concepts || {}
187
- lc_map.each_value do |uuid|
188
- lc_file = lc_index ? lc_index[uuid] : nil
189
- next unless lc_file
190
-
191
- l10n = LocalizedConcept.from_yaml(File.read(lc_file))
192
- mc.add_localization(l10n)
193
- rescue StandardError
194
- next
195
- end
196
-
197
- yield mc
198
- rescue StandardError
199
- next
200
- end
201
- end
202
-
203
- def build_lc_index(lc_dir)
204
- Dir.glob(File.join(lc_dir, "*.{yaml,yml}"))
205
- .to_h { |f| [File.basename(f, ".*"), f] }
206
- end
207
-
208
- def find_localized_concepts_dir(concepts_dir)
209
- %w[localized_concept localized-concept].each do |name|
210
- d = File.join(concepts_dir, name)
211
- return d if File.directory?(d)
212
- end
213
- nil
214
- end
215
-
216
- def detect_schema_version(dir)
217
- concepts_dir = File.join(dir, "concepts")
218
- search_dir = File.directory?(concepts_dir) ? concepts_dir : dir
219
- sample = Dir.glob(File.join(search_dir, "*.yaml")).first
220
- return "2" unless sample
221
-
222
- raw = File.read(sample, encoding: "utf-8")
223
- doc = ConceptDocument.from_yamls(raw)
224
- ver = doc.concept&.schema_version.to_s
225
- ver == "3" ? "3" : "2"
226
- rescue StandardError
227
- "2"
228
- end
229
- end
230
- end
231
- end
@@ -1,183 +0,0 @@
1
- module Glossarist
2
- class ConceptManager < Lutaml::Model::Serializable
3
- attribute :path, :string
4
- attribute :localized_concepts_path, :string
5
- attribute :version, :string, default: -> { "2" }
6
-
7
- key_value do
8
- map :path, to: :path
9
- map %i[localized_concepts_path localizedConceptsPath],
10
- to: :localized_concepts_path
11
- end
12
-
13
- def concept_document_class
14
- ConceptDocument.for_version(version)
15
- end
16
-
17
- def localized_concept_class
18
- if version.to_s == "2"
19
- V2::LocalizedConcept
20
- else
21
- LocalizedConcept
22
- end
23
- end
24
-
25
- def load_from_files(collection: nil)
26
- collection ||= ManagedConceptCollection.new
27
-
28
- Dir.glob(concepts_glob) do |filename|
29
- concepts = load_concept_from_file(filename)
30
-
31
- concepts.each do |concept|
32
- collection.store(concept)
33
- end
34
- rescue StandardError
35
- next
36
- end
37
- end
38
-
39
- def save_to_files(managed_concepts)
40
- managed_concepts.each do |concept|
41
- save_concept_to_file(concept)
42
- end
43
- end
44
-
45
- def save_grouped_concepts_to_files(managed_concepts)
46
- managed_concepts.each do |concept|
47
- save_grouped_concepts_to_file(concept)
48
- end
49
- end
50
-
51
- def load_concept_from_file(filename) # rubocop:disable Metrics/CyclomaticComplexity
52
- raw = File.read(filename, encoding: "utf-8")
53
- self.version = detect_version(raw)
54
- doc = concept_document_class.from_yamls(raw)
55
- concept = doc.concept
56
- unless concept
57
- raise Glossarist::ParseError.new(filename: filename)
58
- end
59
-
60
- concept_uuid = concept.identifier || concept.data&.id || File.basename(
61
- filename, ".*"
62
- )
63
- concept.uuid = concept_uuid
64
-
65
- concept.data.localized_concepts.each_value do |id|
66
- localized_concept = load_localized_concept(id, doc.localizations)
67
- concept.add_l10n(localized_concept)
68
- end
69
-
70
- [concept]
71
- rescue Psych::SyntaxError => e
72
- raise Glossarist::ParseError.new(filename: filename, line: e.line)
73
- end
74
-
75
- def load_localized_concept(id, inline_localizations = nil)
76
- if inline_localizations
77
- l10n = inline_localizations.find { |l| l.id == id }
78
- if l10n
79
- l10n.uuid = id
80
- return l10n
81
- end
82
- end
83
-
84
- l10n = localized_concept_class.from_yaml(
85
- File.read(localized_concept_path(id), encoding: "utf-8"),
86
- )
87
- l10n.uuid = id
88
- l10n
89
- rescue Psych::SyntaxError => e
90
- raise Glossarist::ParseError.new(filename: filename, line: e.line)
91
- end
92
-
93
- def save_concept_to_file(concept)
94
- @localized_concepts_path ||= "localized_concept"
95
- concept_dir = File.join(path, "concept")
96
-
97
- localized_concept_dir = File.join(path, @localized_concepts_path)
98
-
99
- FileUtils.mkdir_p(concept_dir)
100
- FileUtils.mkdir_p(localized_concept_dir)
101
-
102
- filename = File.join(concept_dir, "#{concept.uuid}.yaml")
103
- File.write(filename, concept.to_yaml, encoding: "utf-8")
104
-
105
- concept.localized_concepts.each do |lang, uuid|
106
- filename = File.join(localized_concept_dir, "#{uuid}.yaml")
107
- File.write(filename, concept.localization(lang).to_yaml,
108
- encoding: "utf-8")
109
- end
110
- end
111
-
112
- def save_grouped_concepts_to_file(concept)
113
- @localized_concepts_path ||= "localized_concept"
114
- concept_dir = File.join(path)
115
-
116
- FileUtils.mkdir_p(concept_dir)
117
-
118
- content = []
119
-
120
- filename = File.join(concept_dir, "#{concept.uuid}.yaml")
121
- content << concept.to_yaml
122
-
123
- concept.localized_concepts.each_key do |lang|
124
- content << concept.localization(lang).to_yaml
125
- end
126
-
127
- File.write(filename, content.join("\n"), encoding: "utf-8")
128
- end
129
-
130
- def concepts_glob
131
- return path if File.file?(path)
132
-
133
- if v1_collection?
134
- File.join(path, "concept-*.{yaml,yml}")
135
- else
136
- candidates = [
137
- File.join(path, "concept", "*.{yaml,yml}"),
138
- File.join(path, "concepts", "*.{yaml,yml}"),
139
- File.join(path, "*.{yaml,yml}"),
140
- ]
141
- candidates.find { |g| !Dir.glob(g).empty? }
142
- end
143
- end
144
-
145
- def localized_concept_path(id)
146
- localized_concept_possible_dir = {
147
- "localized_concept" => File.join(
148
- path,
149
- "localized_concept",
150
- "#{id}.{yaml,yml}",
151
- ),
152
-
153
- "localized-concept" => File.join(
154
- path,
155
- "localized-concept",
156
- "#{id}.{yaml,yml}",
157
- ),
158
- }
159
-
160
- localized_concept_possible_dir.each do |dir_name, file_path|
161
- actual_path = Dir.glob(file_path)&.first
162
-
163
- if actual_path
164
- @localized_concepts_path = dir_name
165
- return actual_path
166
- end
167
- end
168
- end
169
-
170
- def detect_version(raw)
171
- if (m = raw.match(/^schema_version:\s*v?(\d)/))
172
- m[1]
173
- else
174
- version
175
- end
176
- end
177
-
178
- def v1_collection?
179
- @v1_collection ||= !Dir.glob(File.join(path,
180
- "concept-*.{yaml,yml}")).empty?
181
- end
182
- end
183
- end
@@ -1,8 +0,0 @@
1
- module Glossarist
2
- class CacheVersionMismatchError < Error
3
- def initialize(cache_dir, expected, actual)
4
- super("Relaton cache version mismatch in '#{cache_dir}': " \
5
- "expected '#{expected}', got '#{actual}'")
6
- end
7
- end
8
- end
@@ -1,15 +0,0 @@
1
- module Glossarist
2
- class InvalidLanguageCodeError < Error
3
- attr_reader :code
4
-
5
- def initialize(code:)
6
- @code = code
7
-
8
- super()
9
- end
10
-
11
- def to_s
12
- "Invalid value for language_code: `#{code}`. It must be 3 characters long string."
13
- end
14
- end
15
- end
@@ -1,4 +0,0 @@
1
- module Glossarist
2
- class InvalidTypeError < Error
3
- end
4
- end
@@ -1,16 +0,0 @@
1
- module Glossarist
2
- class ParseError < Error
3
- attr_accessor :line, :filename
4
-
5
- def initialize(filename:, line: nil)
6
- @filename = filename
7
- @line = line
8
-
9
- super()
10
- end
11
-
12
- def to_s
13
- "Unable to parse file: #{filename}, error on line: #{line}"
14
- end
15
- end
16
- end