glossarist 2.8.7 → 2.8.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop_todo.yml +128 -11
- data/CLAUDE.md +34 -3
- data/Gemfile +1 -0
- data/lib/glossarist/cli/compare_command.rb +2 -2
- data/lib/glossarist/cli/export_command.rb +1 -3
- data/lib/glossarist/collection.rb +1 -1
- data/lib/glossarist/collections/bibliography_collection.rb +1 -1
- data/lib/glossarist/concept_data.rb +1 -0
- data/lib/glossarist/concept_reference.rb +7 -1
- data/lib/glossarist/concept_source.rb +2 -2
- data/lib/glossarist/concept_validator.rb +3 -1
- data/lib/glossarist/dataset_validator.rb +1 -1
- data/lib/glossarist/{error.rb → errors/base.rb} +3 -1
- data/lib/glossarist/errors/cache_version_mismatch_error.rb +12 -0
- data/lib/glossarist/errors/invalid_language_code_error.rb +19 -0
- data/lib/glossarist/errors/invalid_type_error.rb +8 -0
- data/lib/glossarist/errors/load_error.rb +22 -0
- data/lib/glossarist/errors/parse_error.rb +24 -0
- data/lib/glossarist/errors.rb +14 -0
- data/lib/glossarist/gcr_package.rb +4 -2
- data/lib/glossarist/glossary_store.rb +175 -1
- data/lib/glossarist/managed_concept.rb +16 -2
- data/lib/glossarist/managed_concept_collection.rb +52 -8
- data/lib/glossarist/reference_extractor.rb +22 -2
- data/lib/glossarist/reference_resolver.rb +38 -3
- data/lib/glossarist/resolution_adapter/bibliography.rb +22 -0
- data/lib/glossarist/resolution_adapter.rb +1 -0
- data/lib/glossarist/schema_migration/v0_to_v1.rb +200 -0
- data/lib/glossarist/schema_migration/v2_to_v3.rb +50 -0
- data/lib/glossarist/schema_migration.rb +10 -224
- data/lib/glossarist/sts/importer.rb +11 -12
- data/lib/glossarist/sts/term_extractor.rb +104 -6
- data/lib/glossarist/validation/asset_index.rb +1 -1
- data/lib/glossarist/validation/rules/cite_ref_integrity_rule.rb +75 -0
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +5 -13
- data/scripts/upgrade_dataset_to_v3.rb +1 -1
- metadata +13 -9
- data/lib/glossarist/concept_collector.rb +0 -231
- data/lib/glossarist/concept_manager.rb +0 -183
- data/lib/glossarist/error/cache_version_mismatch_error.rb +0 -8
- data/lib/glossarist/error/invalid_language_code_error.rb +0 -15
- data/lib/glossarist/error/invalid_type_error.rb +0 -4
- data/lib/glossarist/error/parse_error.rb +0 -16
|
@@ -2,16 +2,33 @@
|
|
|
2
2
|
|
|
3
3
|
module Glossarist
|
|
4
4
|
class GlossaryStore
|
|
5
|
-
attr_reader :package
|
|
5
|
+
attr_reader :package, :localized_concepts_dir_name
|
|
6
6
|
|
|
7
7
|
def initialize
|
|
8
8
|
@package = nil
|
|
9
9
|
@concept_document_class = V3::ConceptDocument
|
|
10
|
+
@v1_concepts = nil
|
|
11
|
+
@localized_concepts_dir_name = nil
|
|
10
12
|
end
|
|
11
13
|
|
|
12
14
|
# ── Load ──
|
|
13
15
|
|
|
14
16
|
def load_directory(path, format: nil)
|
|
17
|
+
if v1_dataset?(path)
|
|
18
|
+
load_v1_fallback(path)
|
|
19
|
+
return self
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
if legacy_managed_layout?(path)
|
|
23
|
+
load_legacy_managed(path)
|
|
24
|
+
return self
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
if grouped_at_root?(path)
|
|
28
|
+
load_grouped_at_root(path)
|
|
29
|
+
return self
|
|
30
|
+
end
|
|
31
|
+
|
|
15
32
|
metadata = load_metadata_from_directory(path)
|
|
16
33
|
@concept_document_class = resolve_concept_document_class(metadata)
|
|
17
34
|
|
|
@@ -64,9 +81,23 @@ module Glossarist
|
|
|
64
81
|
# ── Concepts ──
|
|
65
82
|
|
|
66
83
|
def concepts
|
|
84
|
+
return @v1_concepts if @v1_concepts
|
|
85
|
+
|
|
67
86
|
@package.models_for(@concept_document_class).map(&:to_managed_concept)
|
|
68
87
|
end
|
|
69
88
|
|
|
89
|
+
def each_concept(&block)
|
|
90
|
+
return enum_for(:each_concept) unless block
|
|
91
|
+
|
|
92
|
+
if @v1_concepts
|
|
93
|
+
@v1_concepts.each(&block)
|
|
94
|
+
else
|
|
95
|
+
@package.models_for(@concept_document_class).each do |doc|
|
|
96
|
+
yield doc.to_managed_concept
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
70
101
|
def concept(uuid)
|
|
71
102
|
doc = @package.fetch_model(@concept_document_class, uuid)
|
|
72
103
|
doc&.to_managed_concept
|
|
@@ -194,5 +225,148 @@ module Glossarist
|
|
|
194
225
|
def apply_metadata(metadata)
|
|
195
226
|
@package.metadata = metadata if metadata && @package
|
|
196
227
|
end
|
|
228
|
+
|
|
229
|
+
def load_v1_fallback(path)
|
|
230
|
+
concepts_dir = File.join(path, "concepts")
|
|
231
|
+
files = Dir.glob(File.join(concepts_dir, "*.yaml"))
|
|
232
|
+
@v1_concepts = files.filter_map do |file|
|
|
233
|
+
v1 = V1::Concept.from_file(file)
|
|
234
|
+
v1&.to_managed_concept
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def legacy_managed_layout?(path)
|
|
239
|
+
concept_dir = File.join(path, "concept")
|
|
240
|
+
return false unless File.directory?(concept_dir)
|
|
241
|
+
return false if File.directory?(File.join(path, "concepts"))
|
|
242
|
+
|
|
243
|
+
Dir.glob(File.join(concept_dir, "*.yaml")).any?
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def load_legacy_managed(path)
|
|
247
|
+
concept_dir = File.join(path, "concept")
|
|
248
|
+
lc_dir = find_localized_concepts_dir(path)
|
|
249
|
+
lc_index = build_lc_index(lc_dir) if lc_dir
|
|
250
|
+
|
|
251
|
+
@v1_concepts = []
|
|
252
|
+
Dir.glob(File.join(concept_dir, "*.yaml")).each do |f|
|
|
253
|
+
raw = File.read(f, encoding: "utf-8")
|
|
254
|
+
version = detect_version(raw)
|
|
255
|
+
doc_class = ConceptDocument.for_version(version)
|
|
256
|
+
doc = doc_class.from_yamls(raw)
|
|
257
|
+
mc = doc.concept
|
|
258
|
+
next unless mc&.data&.id
|
|
259
|
+
|
|
260
|
+
load_legacy_localizations(mc, lc_index, version) if lc_index
|
|
261
|
+
@v1_concepts << mc
|
|
262
|
+
rescue Psych::SyntaxError => e
|
|
263
|
+
raise Errors::ParseError.new(filename: f, line: e.line)
|
|
264
|
+
rescue Lutaml::Model::InvalidFormatError => e
|
|
265
|
+
raise Errors::ParseError.new(filename: f, message: e.message)
|
|
266
|
+
rescue Encoding::InvalidByteSequenceError => e
|
|
267
|
+
raise Errors::LoadError.new(path: f, reason: e.message)
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def load_legacy_localizations(managed_concept, lc_index, version = "3")
|
|
272
|
+
l10n_class = version.to_s == "2" ? V2::LocalizedConcept : LocalizedConcept
|
|
273
|
+
lc_map = managed_concept.data.localized_concepts || {}
|
|
274
|
+
lc_map.each_value do |uuid|
|
|
275
|
+
lc_file = lc_index[uuid]
|
|
276
|
+
unless lc_file
|
|
277
|
+
raise Errors::LoadError.new(path: lc_file,
|
|
278
|
+
reason: "Referenced localization #{uuid} not found")
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
l10n = l10n_class.from_yaml(File.read(lc_file, encoding: "utf-8"))
|
|
282
|
+
l10n.uuid = uuid
|
|
283
|
+
managed_concept.add_localization(l10n)
|
|
284
|
+
rescue Errors::LoadError
|
|
285
|
+
raise
|
|
286
|
+
rescue Psych::SyntaxError => e
|
|
287
|
+
raise Errors::ParseError.new(filename: lc_file, line: e.line)
|
|
288
|
+
rescue Errno::ENOENT
|
|
289
|
+
raise Errors::LoadError.new(path: lc_file, reason: "File not found")
|
|
290
|
+
rescue Errno::EACCES
|
|
291
|
+
raise Errors::LoadError.new(path: lc_file, reason: "Permission denied")
|
|
292
|
+
end
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
def find_localized_concepts_dir(path)
|
|
296
|
+
%w[localized_concept localized-concept].each do |name|
|
|
297
|
+
d = File.join(path, name)
|
|
298
|
+
if File.directory?(d)
|
|
299
|
+
@localized_concepts_dir_name = name
|
|
300
|
+
return d
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
nil
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def build_lc_index(lc_dir)
|
|
307
|
+
Dir.glob(File.join(lc_dir, "*.{yaml,yml}"))
|
|
308
|
+
.to_h { |f| [File.basename(f, ".*"), f] }
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def grouped_at_root?(path)
|
|
312
|
+
return false if File.directory?(File.join(path, "concepts"))
|
|
313
|
+
return false if File.directory?(File.join(path, "concept"))
|
|
314
|
+
|
|
315
|
+
Dir.glob(File.join(path, "*.yaml")).any? do |f|
|
|
316
|
+
raw = File.read(f, encoding: "utf-8")
|
|
317
|
+
hash = YAML.safe_load(raw, permitted_classes: [Date, Time])
|
|
318
|
+
hash.is_a?(Hash) && hash.key?("data") && hash["data"].is_a?(Hash) &&
|
|
319
|
+
hash["data"].key?("identifier")
|
|
320
|
+
rescue Psych::SyntaxError, Encoding::InvalidByteSequenceError
|
|
321
|
+
false
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def load_grouped_at_root(path)
|
|
326
|
+
@v1_concepts = []
|
|
327
|
+
Dir.glob(File.join(path, "*.yaml")).each do |f|
|
|
328
|
+
raw = File.read(f, encoding: "utf-8")
|
|
329
|
+
version = detect_version(raw)
|
|
330
|
+
doc_class = ConceptDocument.for_version(version)
|
|
331
|
+
doc = doc_class.from_yamls(raw)
|
|
332
|
+
mc = doc.concept
|
|
333
|
+
next unless mc&.data&.id
|
|
334
|
+
|
|
335
|
+
Array(doc.localizations).each { |l10n| mc.add_localization(l10n) }
|
|
336
|
+
@v1_concepts << mc
|
|
337
|
+
rescue Psych::SyntaxError => e
|
|
338
|
+
raise Errors::ParseError.new(filename: f, line: e.line)
|
|
339
|
+
rescue Lutaml::Model::InvalidFormatError => e
|
|
340
|
+
raise Errors::ParseError.new(filename: f, message: e.message)
|
|
341
|
+
rescue Encoding::InvalidByteSequenceError => e
|
|
342
|
+
raise Errors::LoadError.new(path: f, reason: e.message)
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
def detect_version(raw)
|
|
347
|
+
if (m = raw.match(/^schema_version:\s*v?(\d)/))
|
|
348
|
+
m[1]
|
|
349
|
+
else
|
|
350
|
+
"2"
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
def v1_dataset?(path)
|
|
355
|
+
concepts_dir = File.join(path, "concepts")
|
|
356
|
+
return false unless File.directory?(concepts_dir)
|
|
357
|
+
|
|
358
|
+
metadata_file = File.join(path, "metadata.yaml")
|
|
359
|
+
concept_subdir = File.join(concepts_dir, "concept")
|
|
360
|
+
return false if File.exist?(metadata_file) || File.directory?(concept_subdir)
|
|
361
|
+
|
|
362
|
+
sample = Dir.glob(File.join(concepts_dir, "*.yaml")).first
|
|
363
|
+
return false unless sample
|
|
364
|
+
|
|
365
|
+
raw = File.read(sample, encoding: "utf-8")
|
|
366
|
+
hash = YAML.safe_load(raw, permitted_classes: [Date, Time])
|
|
367
|
+
hash.is_a?(Hash) && hash.key?("termid")
|
|
368
|
+
rescue Psych::SyntaxError, Encoding::InvalidByteSequenceError
|
|
369
|
+
false
|
|
370
|
+
end
|
|
197
371
|
end
|
|
198
372
|
end
|
|
@@ -19,11 +19,11 @@ module Glossarist
|
|
|
19
19
|
|
|
20
20
|
attribute :uuid, :string
|
|
21
21
|
|
|
22
|
+
attribute :version, :string
|
|
22
23
|
attribute :schema_version, :string
|
|
23
24
|
|
|
24
25
|
key_value do
|
|
25
26
|
map :data, to: :data
|
|
26
|
-
map :id, with: { to: :identifier_to_yaml, from: :identifier_from_yaml }
|
|
27
27
|
map :identifier,
|
|
28
28
|
with: { to: :identifier_to_yaml, from: :identifier_from_yaml }
|
|
29
29
|
map :related, to: :related
|
|
@@ -33,7 +33,8 @@ module Glossarist
|
|
|
33
33
|
with: { from: :date_accepted_from_yaml, to: :date_accepted_to_yaml }
|
|
34
34
|
map :status, to: :status
|
|
35
35
|
|
|
36
|
-
map
|
|
36
|
+
map %i[id uuid], to: :uuid,
|
|
37
|
+
with: { from: :uuid_from_yaml, to: :uuid_to_yaml }
|
|
37
38
|
map :schema_version, to: :schema_version
|
|
38
39
|
end
|
|
39
40
|
|
|
@@ -143,6 +144,19 @@ module Glossarist
|
|
|
143
144
|
localization("eng") || localizations.values.first
|
|
144
145
|
end
|
|
145
146
|
|
|
147
|
+
def all_sources
|
|
148
|
+
list = Array(sources)
|
|
149
|
+
list.concat(Array(data&.sources))
|
|
150
|
+
localizations.each_value { |l10n| list.concat(l10n.all_sources) }
|
|
151
|
+
list
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def find_source_by_id(id)
|
|
155
|
+
return nil if id.nil? || id.to_s.strip.empty?
|
|
156
|
+
|
|
157
|
+
all_sources.find { |source| source.id == id }
|
|
158
|
+
end
|
|
159
|
+
|
|
146
160
|
def schema_version
|
|
147
161
|
@schema_version
|
|
148
162
|
end
|
|
@@ -1,13 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Glossarist
|
|
2
4
|
class ManagedConceptCollection
|
|
3
5
|
include Enumerable
|
|
4
6
|
|
|
5
7
|
attr_accessor :managed_concepts
|
|
6
8
|
|
|
7
|
-
def initialize
|
|
9
|
+
def initialize(file_key: nil)
|
|
8
10
|
@managed_concepts = []
|
|
9
11
|
@managed_concepts_ids = {}
|
|
10
|
-
@
|
|
12
|
+
@file_key = file_key
|
|
11
13
|
end
|
|
12
14
|
|
|
13
15
|
def to_h
|
|
@@ -33,6 +35,16 @@ module Glossarist
|
|
|
33
35
|
end
|
|
34
36
|
alias :[] :fetch
|
|
35
37
|
|
|
38
|
+
def by_id_and(id, version = nil)
|
|
39
|
+
return fetch(id) if version.nil?
|
|
40
|
+
|
|
41
|
+
@managed_concepts.find do |c|
|
|
42
|
+
next false unless c.uuid == id || c.uuid == @managed_concepts_ids[id]
|
|
43
|
+
|
|
44
|
+
c.version == version
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
36
48
|
# If ManagedConcept with given ID is present in this collection, then
|
|
37
49
|
# returns it. Otherwise, instantiates a new ManagedConcept, adds it to
|
|
38
50
|
# the collection, and returns it.
|
|
@@ -62,18 +74,50 @@ module Glossarist
|
|
|
62
74
|
alias :<< :store
|
|
63
75
|
|
|
64
76
|
def load_from_files(path)
|
|
65
|
-
|
|
66
|
-
|
|
77
|
+
store = GlossaryStore.new
|
|
78
|
+
store.load(path)
|
|
79
|
+
store.concepts.each { |mc| store(mc) }
|
|
80
|
+
@localized_concepts_path = store.localized_concepts_dir_name || "localized_concept"
|
|
67
81
|
end
|
|
68
82
|
|
|
69
83
|
def save_to_files(path)
|
|
70
|
-
|
|
71
|
-
|
|
84
|
+
concept_dir = File.join(path, "concept")
|
|
85
|
+
lc_dir = File.join(path, @localized_concepts_path || "localized_concept")
|
|
86
|
+
FileUtils.mkdir_p(concept_dir)
|
|
87
|
+
FileUtils.mkdir_p(lc_dir)
|
|
88
|
+
|
|
89
|
+
@managed_concepts.each do |mc|
|
|
90
|
+
File.write(File.join(concept_dir, "#{file_key(mc)}.yaml"), mc.to_yaml,
|
|
91
|
+
encoding: "utf-8")
|
|
92
|
+
|
|
93
|
+
mc.localized_concepts.each do |lang, uuid|
|
|
94
|
+
l10n = mc.localization(lang)
|
|
95
|
+
next unless l10n
|
|
96
|
+
|
|
97
|
+
File.write(File.join(lc_dir, "#{uuid}.yaml"), l10n.to_yaml,
|
|
98
|
+
encoding: "utf-8")
|
|
99
|
+
end
|
|
100
|
+
end
|
|
72
101
|
end
|
|
73
102
|
|
|
74
103
|
def save_grouped_concepts_to_files(path)
|
|
75
|
-
|
|
76
|
-
|
|
104
|
+
FileUtils.mkdir_p(path)
|
|
105
|
+
|
|
106
|
+
@managed_concepts.each do |mc|
|
|
107
|
+
parts = [mc.to_yaml]
|
|
108
|
+
mc.localized_concepts.each_key do |lang|
|
|
109
|
+
l10n = mc.localization(lang)
|
|
110
|
+
parts << l10n.to_yaml if l10n
|
|
111
|
+
end
|
|
112
|
+
File.write(File.join(path, "#{file_key(mc)}.yaml"), parts.join("\n"),
|
|
113
|
+
encoding: "utf-8")
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
private
|
|
118
|
+
|
|
119
|
+
def file_key(concept)
|
|
120
|
+
@file_key ? @file_key.call(concept) : concept.uuid
|
|
77
121
|
end
|
|
78
122
|
end
|
|
79
123
|
end
|
|
@@ -72,8 +72,8 @@ module Glossarist
|
|
|
72
72
|
|
|
73
73
|
if content.include?(",")
|
|
74
74
|
parts = content.split(",", 2)
|
|
75
|
-
|
|
76
|
-
|
|
75
|
+
identifier = parts[0].strip
|
|
76
|
+
display = parts[1].strip
|
|
77
77
|
resolve_by_identifier(identifier, display)
|
|
78
78
|
else
|
|
79
79
|
resolve_by_identifier(content, nil)
|
|
@@ -114,6 +114,22 @@ module Glossarist
|
|
|
114
114
|
)
|
|
115
115
|
end
|
|
116
116
|
|
|
117
|
+
def resolve_cite_key(identifier, display)
|
|
118
|
+
cleaned = identifier.delete_prefix("cite:").strip
|
|
119
|
+
return nil if cleaned.empty?
|
|
120
|
+
|
|
121
|
+
if cleaned.start_with?('"') && cleaned.end_with?('"') && cleaned.length >= 2
|
|
122
|
+
cleaned = cleaned[1..-2].gsub('""', '"')
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
ConceptReference.new(
|
|
126
|
+
concept_id: cleaned,
|
|
127
|
+
source: nil,
|
|
128
|
+
term: display || cleaned,
|
|
129
|
+
ref_type: "cite",
|
|
130
|
+
)
|
|
131
|
+
end
|
|
132
|
+
|
|
117
133
|
def resolve_iec_urn(urn, display)
|
|
118
134
|
concept_id = extract_iec_concept_id(urn)
|
|
119
135
|
|
|
@@ -273,6 +289,10 @@ module Glossarist
|
|
|
273
289
|
regex: /image::?([^\[\]]+)\[/,
|
|
274
290
|
) { |ext, path| ext.resolve_image_ref(path) }
|
|
275
291
|
|
|
292
|
+
register_identifier_resolver("cite:") do |ext, identifier, display|
|
|
293
|
+
ext.resolve_cite_key(identifier, display)
|
|
294
|
+
end
|
|
295
|
+
|
|
276
296
|
register_identifier_resolver("urn:iec:std:iec:60050") do |ext, identifier, display|
|
|
277
297
|
ext.resolve_iec_urn(identifier, display)
|
|
278
298
|
end
|
|
@@ -5,6 +5,7 @@ module Glossarist
|
|
|
5
5
|
def initialize
|
|
6
6
|
@local_adapter = nil
|
|
7
7
|
@package_adapters = []
|
|
8
|
+
@bibliography_adapters = []
|
|
8
9
|
@route_adapter = ResolutionAdapter::Route.new
|
|
9
10
|
@remote_adapters = []
|
|
10
11
|
end
|
|
@@ -31,13 +32,28 @@ module Glossarist
|
|
|
31
32
|
endpoint: endpoint)
|
|
32
33
|
end
|
|
33
34
|
|
|
34
|
-
def
|
|
35
|
+
def register_bibliography(source_id, concepts)
|
|
36
|
+
@bibliography_adapters << ResolutionAdapter::Bibliography.new(source_id,
|
|
37
|
+
concepts)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def resolve(reference, concept: nil)
|
|
41
|
+
if concept && reference.is_a?(ConceptReference) && reference.cite?
|
|
42
|
+
source = concept.find_source_by_id(reference.concept_id)
|
|
43
|
+
return source&.origin
|
|
44
|
+
end
|
|
45
|
+
|
|
35
46
|
if reference.local?
|
|
36
47
|
return @local_adapter&.resolve(reference)
|
|
37
48
|
end
|
|
38
49
|
|
|
39
50
|
routed_ref = apply_routes(reference)
|
|
40
51
|
|
|
52
|
+
@bibliography_adapters.each do |adapter|
|
|
53
|
+
result = adapter.resolve(routed_ref)
|
|
54
|
+
return result if result
|
|
55
|
+
end
|
|
56
|
+
|
|
41
57
|
@package_adapters.each do |adapter|
|
|
42
58
|
result = adapter.resolve(routed_ref)
|
|
43
59
|
return result if result
|
|
@@ -54,7 +70,8 @@ module Glossarist
|
|
|
54
70
|
def resolve_all(concept, extractor: nil)
|
|
55
71
|
extractor ||= ReferenceExtractor.new
|
|
56
72
|
refs = extract_refs(concept, extractor)
|
|
57
|
-
|
|
73
|
+
source_concept = concept.is_a?(ManagedConcept) ? concept : nil
|
|
74
|
+
refs.map { |ref| [ref, resolve(ref, concept: source_concept)] }
|
|
58
75
|
end
|
|
59
76
|
|
|
60
77
|
def validate_all(package_or_concepts, extractor: nil, mode: :multi)
|
|
@@ -65,9 +82,10 @@ module Glossarist
|
|
|
65
82
|
concepts.each do |concept|
|
|
66
83
|
refs = extract_refs(concept, extractor)
|
|
67
84
|
termid = extract_termid(concept)
|
|
85
|
+
source_concept = concept.is_a?(ManagedConcept) ? concept : nil
|
|
68
86
|
|
|
69
87
|
refs.each do |ref|
|
|
70
|
-
resolved = resolve(ref)
|
|
88
|
+
resolved = resolve(ref, concept: source_concept)
|
|
71
89
|
if resolved.nil?
|
|
72
90
|
scope = ref.local? ? "intra-set" : "inter-set (#{ref.source})"
|
|
73
91
|
result.add_warning("#{termid}: Unresolvable #{scope} reference: #{ref.term} -> #{ref.concept_id}")
|
|
@@ -98,8 +116,25 @@ module Glossarist
|
|
|
98
116
|
@package_adapters.map(&:uri_prefix)
|
|
99
117
|
end
|
|
100
118
|
|
|
119
|
+
def classify(reference, concept: nil)
|
|
120
|
+
return "unknown" unless reference.is_a?(ConceptReference)
|
|
121
|
+
|
|
122
|
+
resolved = resolve(reference, concept: concept)
|
|
123
|
+
classify_from_resolution(reference, resolved)
|
|
124
|
+
end
|
|
125
|
+
|
|
101
126
|
private
|
|
102
127
|
|
|
128
|
+
def classify_from_resolution(reference, resolved)
|
|
129
|
+
if reference.cite?
|
|
130
|
+
resolved ? "self-contained-citation" : "unresolved-citation"
|
|
131
|
+
elsif reference.external?
|
|
132
|
+
resolved ? "internal-citation" : "external-citation"
|
|
133
|
+
else
|
|
134
|
+
resolved ? "same-dataset" : "unresolved"
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
103
138
|
def apply_routes(reference)
|
|
104
139
|
routed = @route_adapter.resolve(reference)
|
|
105
140
|
routed || reference
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class ResolutionAdapter
|
|
5
|
+
class Bibliography < ResolutionAdapter
|
|
6
|
+
attr_reader :source_id, :concepts
|
|
7
|
+
|
|
8
|
+
def initialize(source_id, concepts)
|
|
9
|
+
super()
|
|
10
|
+
@source_id = source_id
|
|
11
|
+
@concepts = concepts
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def resolve(reference)
|
|
15
|
+
return nil unless reference.is_a?(ConceptReference)
|
|
16
|
+
return nil unless reference.source == @source_id
|
|
17
|
+
|
|
18
|
+
concepts.by_id_and(reference.concept_id, reference.version)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -4,6 +4,7 @@ module Glossarist
|
|
|
4
4
|
class ResolutionAdapter
|
|
5
5
|
autoload :Local, "glossarist/resolution_adapter/local"
|
|
6
6
|
autoload :Package, "glossarist/resolution_adapter/package"
|
|
7
|
+
autoload :Bibliography, "glossarist/resolution_adapter/bibliography"
|
|
7
8
|
autoload :Route, "glossarist/resolution_adapter/route"
|
|
8
9
|
autoload :Remote, "glossarist/resolution_adapter/remote"
|
|
9
10
|
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class SchemaMigration
|
|
5
|
+
class V0ToV1
|
|
6
|
+
ENTRY_STATUS_MAP = {
|
|
7
|
+
"Standard" => "valid",
|
|
8
|
+
"Confirmed" => "valid",
|
|
9
|
+
"Proposed" => "draft",
|
|
10
|
+
}.freeze
|
|
11
|
+
|
|
12
|
+
LANG_CODES = Glossarist::LANG_CODES
|
|
13
|
+
|
|
14
|
+
IEV_PATTERN = /\{\{([^,}]+),\s*IEV:([^}]+)\}\}/
|
|
15
|
+
URN_PATTERN = /\{urn:iso:std:iso:(\d+):([^,}]+),([^}]+)\}/
|
|
16
|
+
|
|
17
|
+
attr_reader :from_version, :to_version
|
|
18
|
+
|
|
19
|
+
def initialize(concept_hash, from_version: "0",
|
|
20
|
+
to_version: SchemaMigration::CURRENT_SCHEMA_VERSION,
|
|
21
|
+
ref_maps: {})
|
|
22
|
+
@concept = concept_hash
|
|
23
|
+
@from_version = from_version
|
|
24
|
+
@to_version = to_version
|
|
25
|
+
@ref_maps = ref_maps
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def migrate
|
|
29
|
+
case [from_version, to_version]
|
|
30
|
+
when ["0", "1"] then migrate_v0_to_v1
|
|
31
|
+
else
|
|
32
|
+
raise Errors::Base,
|
|
33
|
+
"Unsupported migration: #{from_version} -> #{to_version}"
|
|
34
|
+
end
|
|
35
|
+
@concept
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def migrate_v0_to_v1
|
|
41
|
+
migrate_termid
|
|
42
|
+
LANG_CODES.each do |lang|
|
|
43
|
+
migrate_language_block(lang) if @concept[lang]
|
|
44
|
+
end
|
|
45
|
+
strip_revisions
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def migrate_termid
|
|
49
|
+
if @concept.key?("termid")
|
|
50
|
+
@concept["termid"] =
|
|
51
|
+
String(@concept["termid"])
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def migrate_language_block(lang)
|
|
56
|
+
l10n_block = @concept[lang]
|
|
57
|
+
return unless l10n_block.is_a?(Hash)
|
|
58
|
+
|
|
59
|
+
migrate_definition(l10n_block)
|
|
60
|
+
migrate_authoritative_source(l10n_block)
|
|
61
|
+
migrate_dates(l10n_block)
|
|
62
|
+
migrate_entry_status(l10n_block)
|
|
63
|
+
migrate_terms_abbrev(l10n_block)
|
|
64
|
+
extract_inline_refs(l10n_block)
|
|
65
|
+
strip_revisions(l10n_block)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def migrate_definition(l10n_block)
|
|
69
|
+
return unless l10n_block.key?("definition")
|
|
70
|
+
return unless l10n_block["definition"].is_a?(String)
|
|
71
|
+
|
|
72
|
+
l10n_block["definition"] = [{ "content" => l10n_block["definition"] }]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def migrate_authoritative_source(l10n_block)
|
|
76
|
+
return unless l10n_block.key?("authoritative_source")
|
|
77
|
+
|
|
78
|
+
src = l10n_block.delete("authoritative_source")
|
|
79
|
+
return if l10n_block.key?("sources")
|
|
80
|
+
|
|
81
|
+
sources = (src.is_a?(Array) ? src : [src]).filter_map do |s|
|
|
82
|
+
next unless s.is_a?(Hash)
|
|
83
|
+
|
|
84
|
+
origin = {}
|
|
85
|
+
origin["ref"] = s["ref"] if s["ref"]
|
|
86
|
+
origin["clause"] = s["clause"] if s["clause"]
|
|
87
|
+
origin["link"] = s["link"] if s["link"]
|
|
88
|
+
|
|
89
|
+
entry = { "type" => "authoritative", "origin" => origin }
|
|
90
|
+
if s["relationship"]
|
|
91
|
+
entry["status"] = s["relationship"]["type"] || "identical"
|
|
92
|
+
if s["relationship"]["modification"]
|
|
93
|
+
entry["modification"] =
|
|
94
|
+
s["relationship"]["modification"]
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
entry
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
l10n_block["sources"] = sources if sources.any?
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def migrate_dates(l10n_block)
|
|
104
|
+
return if l10n_block.key?("dates")
|
|
105
|
+
|
|
106
|
+
dates = []
|
|
107
|
+
if l10n_block["date_accepted"]
|
|
108
|
+
dates << { "type" => "accepted",
|
|
109
|
+
"date" => l10n_block["date_accepted"] }
|
|
110
|
+
end
|
|
111
|
+
if l10n_block["date_amended"]
|
|
112
|
+
dates << { "type" => "amended", "date" => l10n_block["date_amended"] }
|
|
113
|
+
end
|
|
114
|
+
l10n_block["dates"] = dates if dates.any?
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def migrate_entry_status(l10n_block)
|
|
118
|
+
return unless l10n_block.key?("entry_status")
|
|
119
|
+
|
|
120
|
+
mapped = ENTRY_STATUS_MAP[l10n_block["entry_status"]]
|
|
121
|
+
l10n_block["entry_status"] = mapped if mapped
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def migrate_terms_abbrev(l10n_block)
|
|
125
|
+
return unless l10n_block["terms"].is_a?(Array)
|
|
126
|
+
|
|
127
|
+
l10n_block["terms"].each do |term|
|
|
128
|
+
next unless term.is_a?(Hash)
|
|
129
|
+
next unless term["abbrev"] == true
|
|
130
|
+
|
|
131
|
+
term["type"] = "abbreviation"
|
|
132
|
+
term.delete("abbrev")
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def extract_inline_refs(l10n_block)
|
|
137
|
+
texts = []
|
|
138
|
+
|
|
139
|
+
if l10n_block["definition"].is_a?(Array)
|
|
140
|
+
l10n_block["definition"].each do |d|
|
|
141
|
+
texts << (d.is_a?(Hash) ? d["content"].to_s : d.to_s)
|
|
142
|
+
end
|
|
143
|
+
elsif l10n_block["definition"].is_a?(String)
|
|
144
|
+
texts << l10n_block["definition"]
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
Array(l10n_block["notes"]).each do |n|
|
|
148
|
+
texts << (n.is_a?(Hash) ? n["content"].to_s : n.to_s)
|
|
149
|
+
end
|
|
150
|
+
Array(l10n_block["examples"]).each do |e|
|
|
151
|
+
texts << (e.is_a?(Hash) ? e["content"].to_s : e.to_s)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
full_text = texts.join(" ")
|
|
155
|
+
|
|
156
|
+
refs = []
|
|
157
|
+
|
|
158
|
+
full_text.scan(IEV_PATTERN) do |term, id|
|
|
159
|
+
refs << {
|
|
160
|
+
"term" => term.strip,
|
|
161
|
+
"concept_id" => id.strip,
|
|
162
|
+
"source" => "urn:iec:std:iec:60050",
|
|
163
|
+
"ref_type" => "urn",
|
|
164
|
+
}
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
full_text.scan(URN_PATTERN) do |std_num, id, term|
|
|
168
|
+
refs << {
|
|
169
|
+
"term" => term.strip,
|
|
170
|
+
"concept_id" => id.strip,
|
|
171
|
+
"source" => "urn:iso:std:iso:#{std_num}",
|
|
172
|
+
"ref_type" => "urn",
|
|
173
|
+
}
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
return if refs.empty?
|
|
177
|
+
|
|
178
|
+
existing = l10n_block["references"] || []
|
|
179
|
+
seen_ids = existing.to_set { |r| r["concept_id"] || r["id"] }
|
|
180
|
+
refs.each do |ref|
|
|
181
|
+
key = ref["concept_id"] || ref["id"]
|
|
182
|
+
next if seen_ids.include?(key)
|
|
183
|
+
|
|
184
|
+
seen_ids.add(key)
|
|
185
|
+
existing << ref
|
|
186
|
+
end
|
|
187
|
+
l10n_block["references"] = existing
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def strip_revisions(hash = @concept)
|
|
191
|
+
hash.delete("_revisions")
|
|
192
|
+
LANG_CODES.each do |lang|
|
|
193
|
+
next unless hash[lang].is_a?(Hash)
|
|
194
|
+
|
|
195
|
+
hash[lang].delete("_revisions")
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|