glossarist 2.8.7 → 2.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rubocop_todo.yml +128 -11
  4. data/CLAUDE.md +34 -3
  5. data/Gemfile +1 -0
  6. data/lib/glossarist/cli/compare_command.rb +2 -2
  7. data/lib/glossarist/cli/export_command.rb +1 -3
  8. data/lib/glossarist/collection.rb +1 -1
  9. data/lib/glossarist/collections/bibliography_collection.rb +1 -1
  10. data/lib/glossarist/concept_data.rb +1 -0
  11. data/lib/glossarist/concept_reference.rb +7 -1
  12. data/lib/glossarist/concept_source.rb +2 -2
  13. data/lib/glossarist/concept_validator.rb +3 -1
  14. data/lib/glossarist/dataset_validator.rb +1 -1
  15. data/lib/glossarist/{error.rb → errors/base.rb} +3 -1
  16. data/lib/glossarist/errors/cache_version_mismatch_error.rb +12 -0
  17. data/lib/glossarist/errors/invalid_language_code_error.rb +19 -0
  18. data/lib/glossarist/errors/invalid_type_error.rb +8 -0
  19. data/lib/glossarist/errors/load_error.rb +22 -0
  20. data/lib/glossarist/errors/parse_error.rb +24 -0
  21. data/lib/glossarist/errors.rb +14 -0
  22. data/lib/glossarist/gcr_package.rb +4 -2
  23. data/lib/glossarist/glossary_store.rb +175 -1
  24. data/lib/glossarist/managed_concept.rb +16 -2
  25. data/lib/glossarist/managed_concept_collection.rb +52 -8
  26. data/lib/glossarist/reference_extractor.rb +22 -2
  27. data/lib/glossarist/reference_resolver.rb +38 -3
  28. data/lib/glossarist/resolution_adapter/bibliography.rb +22 -0
  29. data/lib/glossarist/resolution_adapter.rb +1 -0
  30. data/lib/glossarist/schema_migration/v0_to_v1.rb +200 -0
  31. data/lib/glossarist/schema_migration/v2_to_v3.rb +50 -0
  32. data/lib/glossarist/schema_migration.rb +10 -224
  33. data/lib/glossarist/sts/importer.rb +11 -12
  34. data/lib/glossarist/sts/term_extractor.rb +104 -6
  35. data/lib/glossarist/validation/asset_index.rb +1 -1
  36. data/lib/glossarist/validation/rules/cite_ref_integrity_rule.rb +75 -0
  37. data/lib/glossarist/version.rb +1 -1
  38. data/lib/glossarist.rb +5 -13
  39. data/scripts/upgrade_dataset_to_v3.rb +1 -1
  40. metadata +13 -9
  41. data/lib/glossarist/concept_collector.rb +0 -231
  42. data/lib/glossarist/concept_manager.rb +0 -183
  43. data/lib/glossarist/error/cache_version_mismatch_error.rb +0 -8
  44. data/lib/glossarist/error/invalid_language_code_error.rb +0 -15
  45. data/lib/glossarist/error/invalid_type_error.rb +0 -4
  46. data/lib/glossarist/error/parse_error.rb +0 -16
@@ -2,16 +2,33 @@
2
2
 
3
3
  module Glossarist
4
4
  class GlossaryStore
5
- attr_reader :package
5
+ attr_reader :package, :localized_concepts_dir_name
6
6
 
7
7
  def initialize
8
8
  @package = nil
9
9
  @concept_document_class = V3::ConceptDocument
10
+ @v1_concepts = nil
11
+ @localized_concepts_dir_name = nil
10
12
  end
11
13
 
12
14
  # ── Load ──
13
15
 
14
16
  def load_directory(path, format: nil)
17
+ if v1_dataset?(path)
18
+ load_v1_fallback(path)
19
+ return self
20
+ end
21
+
22
+ if legacy_managed_layout?(path)
23
+ load_legacy_managed(path)
24
+ return self
25
+ end
26
+
27
+ if grouped_at_root?(path)
28
+ load_grouped_at_root(path)
29
+ return self
30
+ end
31
+
15
32
  metadata = load_metadata_from_directory(path)
16
33
  @concept_document_class = resolve_concept_document_class(metadata)
17
34
 
@@ -64,9 +81,23 @@ module Glossarist
64
81
  # ── Concepts ──
65
82
 
66
83
  def concepts
84
+ return @v1_concepts if @v1_concepts
85
+
67
86
  @package.models_for(@concept_document_class).map(&:to_managed_concept)
68
87
  end
69
88
 
89
+ def each_concept(&block)
90
+ return enum_for(:each_concept) unless block
91
+
92
+ if @v1_concepts
93
+ @v1_concepts.each(&block)
94
+ else
95
+ @package.models_for(@concept_document_class).each do |doc|
96
+ yield doc.to_managed_concept
97
+ end
98
+ end
99
+ end
100
+
70
101
  def concept(uuid)
71
102
  doc = @package.fetch_model(@concept_document_class, uuid)
72
103
  doc&.to_managed_concept
@@ -194,5 +225,148 @@ module Glossarist
194
225
  def apply_metadata(metadata)
195
226
  @package.metadata = metadata if metadata && @package
196
227
  end
228
+
229
+ def load_v1_fallback(path)
230
+ concepts_dir = File.join(path, "concepts")
231
+ files = Dir.glob(File.join(concepts_dir, "*.yaml"))
232
+ @v1_concepts = files.filter_map do |file|
233
+ v1 = V1::Concept.from_file(file)
234
+ v1&.to_managed_concept
235
+ end
236
+ end
237
+
238
+ def legacy_managed_layout?(path)
239
+ concept_dir = File.join(path, "concept")
240
+ return false unless File.directory?(concept_dir)
241
+ return false if File.directory?(File.join(path, "concepts"))
242
+
243
+ Dir.glob(File.join(concept_dir, "*.yaml")).any?
244
+ end
245
+
246
+ def load_legacy_managed(path)
247
+ concept_dir = File.join(path, "concept")
248
+ lc_dir = find_localized_concepts_dir(path)
249
+ lc_index = build_lc_index(lc_dir) if lc_dir
250
+
251
+ @v1_concepts = []
252
+ Dir.glob(File.join(concept_dir, "*.yaml")).each do |f|
253
+ raw = File.read(f, encoding: "utf-8")
254
+ version = detect_version(raw)
255
+ doc_class = ConceptDocument.for_version(version)
256
+ doc = doc_class.from_yamls(raw)
257
+ mc = doc.concept
258
+ next unless mc&.data&.id
259
+
260
+ load_legacy_localizations(mc, lc_index, version) if lc_index
261
+ @v1_concepts << mc
262
+ rescue Psych::SyntaxError => e
263
+ raise Errors::ParseError.new(filename: f, line: e.line)
264
+ rescue Lutaml::Model::InvalidFormatError => e
265
+ raise Errors::ParseError.new(filename: f, message: e.message)
266
+ rescue Encoding::InvalidByteSequenceError => e
267
+ raise Errors::LoadError.new(path: f, reason: e.message)
268
+ end
269
+ end
270
+
271
+ def load_legacy_localizations(managed_concept, lc_index, version = "3")
272
+ l10n_class = version.to_s == "2" ? V2::LocalizedConcept : LocalizedConcept
273
+ lc_map = managed_concept.data.localized_concepts || {}
274
+ lc_map.each_value do |uuid|
275
+ lc_file = lc_index[uuid]
276
+ unless lc_file
277
+ raise Errors::LoadError.new(path: lc_file,
278
+ reason: "Referenced localization #{uuid} not found")
279
+ end
280
+
281
+ l10n = l10n_class.from_yaml(File.read(lc_file, encoding: "utf-8"))
282
+ l10n.uuid = uuid
283
+ managed_concept.add_localization(l10n)
284
+ rescue Errors::LoadError
285
+ raise
286
+ rescue Psych::SyntaxError => e
287
+ raise Errors::ParseError.new(filename: lc_file, line: e.line)
288
+ rescue Errno::ENOENT
289
+ raise Errors::LoadError.new(path: lc_file, reason: "File not found")
290
+ rescue Errno::EACCES
291
+ raise Errors::LoadError.new(path: lc_file, reason: "Permission denied")
292
+ end
293
+ end
294
+
295
+ def find_localized_concepts_dir(path)
296
+ %w[localized_concept localized-concept].each do |name|
297
+ d = File.join(path, name)
298
+ if File.directory?(d)
299
+ @localized_concepts_dir_name = name
300
+ return d
301
+ end
302
+ end
303
+ nil
304
+ end
305
+
306
+ def build_lc_index(lc_dir)
307
+ Dir.glob(File.join(lc_dir, "*.{yaml,yml}"))
308
+ .to_h { |f| [File.basename(f, ".*"), f] }
309
+ end
310
+
311
+ def grouped_at_root?(path)
312
+ return false if File.directory?(File.join(path, "concepts"))
313
+ return false if File.directory?(File.join(path, "concept"))
314
+
315
+ Dir.glob(File.join(path, "*.yaml")).any? do |f|
316
+ raw = File.read(f, encoding: "utf-8")
317
+ hash = YAML.safe_load(raw, permitted_classes: [Date, Time])
318
+ hash.is_a?(Hash) && hash.key?("data") && hash["data"].is_a?(Hash) &&
319
+ hash["data"].key?("identifier")
320
+ rescue Psych::SyntaxError, Encoding::InvalidByteSequenceError
321
+ false
322
+ end
323
+ end
324
+
325
+ def load_grouped_at_root(path)
326
+ @v1_concepts = []
327
+ Dir.glob(File.join(path, "*.yaml")).each do |f|
328
+ raw = File.read(f, encoding: "utf-8")
329
+ version = detect_version(raw)
330
+ doc_class = ConceptDocument.for_version(version)
331
+ doc = doc_class.from_yamls(raw)
332
+ mc = doc.concept
333
+ next unless mc&.data&.id
334
+
335
+ Array(doc.localizations).each { |l10n| mc.add_localization(l10n) }
336
+ @v1_concepts << mc
337
+ rescue Psych::SyntaxError => e
338
+ raise Errors::ParseError.new(filename: f, line: e.line)
339
+ rescue Lutaml::Model::InvalidFormatError => e
340
+ raise Errors::ParseError.new(filename: f, message: e.message)
341
+ rescue Encoding::InvalidByteSequenceError => e
342
+ raise Errors::LoadError.new(path: f, reason: e.message)
343
+ end
344
+ end
345
+
346
+ def detect_version(raw)
347
+ if (m = raw.match(/^schema_version:\s*v?(\d)/))
348
+ m[1]
349
+ else
350
+ "2"
351
+ end
352
+ end
353
+
354
+ def v1_dataset?(path)
355
+ concepts_dir = File.join(path, "concepts")
356
+ return false unless File.directory?(concepts_dir)
357
+
358
+ metadata_file = File.join(path, "metadata.yaml")
359
+ concept_subdir = File.join(concepts_dir, "concept")
360
+ return false if File.exist?(metadata_file) || File.directory?(concept_subdir)
361
+
362
+ sample = Dir.glob(File.join(concepts_dir, "*.yaml")).first
363
+ return false unless sample
364
+
365
+ raw = File.read(sample, encoding: "utf-8")
366
+ hash = YAML.safe_load(raw, permitted_classes: [Date, Time])
367
+ hash.is_a?(Hash) && hash.key?("termid")
368
+ rescue Psych::SyntaxError, Encoding::InvalidByteSequenceError
369
+ false
370
+ end
197
371
  end
198
372
  end
@@ -19,11 +19,11 @@ module Glossarist
19
19
 
20
20
  attribute :uuid, :string
21
21
 
22
+ attribute :version, :string
22
23
  attribute :schema_version, :string
23
24
 
24
25
  key_value do
25
26
  map :data, to: :data
26
- map :id, with: { to: :identifier_to_yaml, from: :identifier_from_yaml }
27
27
  map :identifier,
28
28
  with: { to: :identifier_to_yaml, from: :identifier_from_yaml }
29
29
  map :related, to: :related
@@ -33,7 +33,8 @@ module Glossarist
33
33
  with: { from: :date_accepted_from_yaml, to: :date_accepted_to_yaml }
34
34
  map :status, to: :status
35
35
 
36
- map :uuid, to: :uuid, with: { from: :uuid_from_yaml, to: :uuid_to_yaml }
36
+ map %i[id uuid], to: :uuid,
37
+ with: { from: :uuid_from_yaml, to: :uuid_to_yaml }
37
38
  map :schema_version, to: :schema_version
38
39
  end
39
40
 
@@ -143,6 +144,19 @@ module Glossarist
143
144
  localization("eng") || localizations.values.first
144
145
  end
145
146
 
147
+ def all_sources
148
+ list = Array(sources)
149
+ list.concat(Array(data&.sources))
150
+ localizations.each_value { |l10n| list.concat(l10n.all_sources) }
151
+ list
152
+ end
153
+
154
+ def find_source_by_id(id)
155
+ return nil if id.nil? || id.to_s.strip.empty?
156
+
157
+ all_sources.find { |source| source.id == id }
158
+ end
159
+
146
160
  def schema_version
147
161
  @schema_version
148
162
  end
@@ -1,13 +1,15 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Glossarist
2
4
  class ManagedConceptCollection
3
5
  include Enumerable
4
6
 
5
7
  attr_accessor :managed_concepts
6
8
 
7
- def initialize
9
+ def initialize(file_key: nil)
8
10
  @managed_concepts = []
9
11
  @managed_concepts_ids = {}
10
- @concept_manager = ConceptManager.new
12
+ @file_key = file_key
11
13
  end
12
14
 
13
15
  def to_h
@@ -33,6 +35,16 @@ module Glossarist
33
35
  end
34
36
  alias :[] :fetch
35
37
 
38
+ def by_id_and(id, version = nil)
39
+ return fetch(id) if version.nil?
40
+
41
+ @managed_concepts.find do |c|
42
+ next false unless c.uuid == id || c.uuid == @managed_concepts_ids[id]
43
+
44
+ c.version == version
45
+ end
46
+ end
47
+
36
48
  # If ManagedConcept with given ID is present in this collection, then
37
49
  # returns it. Otherwise, instantiates a new ManagedConcept, adds it to
38
50
  # the collection, and returns it.
@@ -62,18 +74,50 @@ module Glossarist
62
74
  alias :<< :store
63
75
 
64
76
  def load_from_files(path)
65
- @concept_manager.path = path
66
- @concept_manager.load_from_files(collection: self)
77
+ store = GlossaryStore.new
78
+ store.load(path)
79
+ store.concepts.each { |mc| store(mc) }
80
+ @localized_concepts_path = store.localized_concepts_dir_name || "localized_concept"
67
81
  end
68
82
 
69
83
  def save_to_files(path)
70
- @concept_manager.path = path
71
- @concept_manager.save_to_files(@managed_concepts)
84
+ concept_dir = File.join(path, "concept")
85
+ lc_dir = File.join(path, @localized_concepts_path || "localized_concept")
86
+ FileUtils.mkdir_p(concept_dir)
87
+ FileUtils.mkdir_p(lc_dir)
88
+
89
+ @managed_concepts.each do |mc|
90
+ File.write(File.join(concept_dir, "#{file_key(mc)}.yaml"), mc.to_yaml,
91
+ encoding: "utf-8")
92
+
93
+ mc.localized_concepts.each do |lang, uuid|
94
+ l10n = mc.localization(lang)
95
+ next unless l10n
96
+
97
+ File.write(File.join(lc_dir, "#{uuid}.yaml"), l10n.to_yaml,
98
+ encoding: "utf-8")
99
+ end
100
+ end
72
101
  end
73
102
 
74
103
  def save_grouped_concepts_to_files(path)
75
- @concept_manager.path = path
76
- @concept_manager.save_grouped_concepts_to_files(@managed_concepts)
104
+ FileUtils.mkdir_p(path)
105
+
106
+ @managed_concepts.each do |mc|
107
+ parts = [mc.to_yaml]
108
+ mc.localized_concepts.each_key do |lang|
109
+ l10n = mc.localization(lang)
110
+ parts << l10n.to_yaml if l10n
111
+ end
112
+ File.write(File.join(path, "#{file_key(mc)}.yaml"), parts.join("\n"),
113
+ encoding: "utf-8")
114
+ end
115
+ end
116
+
117
+ private
118
+
119
+ def file_key(concept)
120
+ @file_key ? @file_key.call(concept) : concept.uuid
77
121
  end
78
122
  end
79
123
  end
@@ -72,8 +72,8 @@ module Glossarist
72
72
 
73
73
  if content.include?(",")
74
74
  parts = content.split(",", 2)
75
- display = parts[0].strip
76
- identifier = parts[1].strip
75
+ identifier = parts[0].strip
76
+ display = parts[1].strip
77
77
  resolve_by_identifier(identifier, display)
78
78
  else
79
79
  resolve_by_identifier(content, nil)
@@ -114,6 +114,22 @@ module Glossarist
114
114
  )
115
115
  end
116
116
 
117
+ def resolve_cite_key(identifier, display)
118
+ cleaned = identifier.delete_prefix("cite:").strip
119
+ return nil if cleaned.empty?
120
+
121
+ if cleaned.start_with?('"') && cleaned.end_with?('"') && cleaned.length >= 2
122
+ cleaned = cleaned[1..-2].gsub('""', '"')
123
+ end
124
+
125
+ ConceptReference.new(
126
+ concept_id: cleaned,
127
+ source: nil,
128
+ term: display || cleaned,
129
+ ref_type: "cite",
130
+ )
131
+ end
132
+
117
133
  def resolve_iec_urn(urn, display)
118
134
  concept_id = extract_iec_concept_id(urn)
119
135
 
@@ -273,6 +289,10 @@ module Glossarist
273
289
  regex: /image::?([^\[\]]+)\[/,
274
290
  ) { |ext, path| ext.resolve_image_ref(path) }
275
291
 
292
+ register_identifier_resolver("cite:") do |ext, identifier, display|
293
+ ext.resolve_cite_key(identifier, display)
294
+ end
295
+
276
296
  register_identifier_resolver("urn:iec:std:iec:60050") do |ext, identifier, display|
277
297
  ext.resolve_iec_urn(identifier, display)
278
298
  end
@@ -5,6 +5,7 @@ module Glossarist
5
5
  def initialize
6
6
  @local_adapter = nil
7
7
  @package_adapters = []
8
+ @bibliography_adapters = []
8
9
  @route_adapter = ResolutionAdapter::Route.new
9
10
  @remote_adapters = []
10
11
  end
@@ -31,13 +32,28 @@ module Glossarist
31
32
  endpoint: endpoint)
32
33
  end
33
34
 
34
- def resolve(reference)
35
+ def register_bibliography(source_id, concepts)
36
+ @bibliography_adapters << ResolutionAdapter::Bibliography.new(source_id,
37
+ concepts)
38
+ end
39
+
40
+ def resolve(reference, concept: nil)
41
+ if concept && reference.is_a?(ConceptReference) && reference.cite?
42
+ source = concept.find_source_by_id(reference.concept_id)
43
+ return source&.origin
44
+ end
45
+
35
46
  if reference.local?
36
47
  return @local_adapter&.resolve(reference)
37
48
  end
38
49
 
39
50
  routed_ref = apply_routes(reference)
40
51
 
52
+ @bibliography_adapters.each do |adapter|
53
+ result = adapter.resolve(routed_ref)
54
+ return result if result
55
+ end
56
+
41
57
  @package_adapters.each do |adapter|
42
58
  result = adapter.resolve(routed_ref)
43
59
  return result if result
@@ -54,7 +70,8 @@ module Glossarist
54
70
  def resolve_all(concept, extractor: nil)
55
71
  extractor ||= ReferenceExtractor.new
56
72
  refs = extract_refs(concept, extractor)
57
- refs.map { |ref| [ref, resolve(ref)] }
73
+ source_concept = concept.is_a?(ManagedConcept) ? concept : nil
74
+ refs.map { |ref| [ref, resolve(ref, concept: source_concept)] }
58
75
  end
59
76
 
60
77
  def validate_all(package_or_concepts, extractor: nil, mode: :multi)
@@ -65,9 +82,10 @@ module Glossarist
65
82
  concepts.each do |concept|
66
83
  refs = extract_refs(concept, extractor)
67
84
  termid = extract_termid(concept)
85
+ source_concept = concept.is_a?(ManagedConcept) ? concept : nil
68
86
 
69
87
  refs.each do |ref|
70
- resolved = resolve(ref)
88
+ resolved = resolve(ref, concept: source_concept)
71
89
  if resolved.nil?
72
90
  scope = ref.local? ? "intra-set" : "inter-set (#{ref.source})"
73
91
  result.add_warning("#{termid}: Unresolvable #{scope} reference: #{ref.term} -> #{ref.concept_id}")
@@ -98,8 +116,25 @@ module Glossarist
98
116
  @package_adapters.map(&:uri_prefix)
99
117
  end
100
118
 
119
+ def classify(reference, concept: nil)
120
+ return "unknown" unless reference.is_a?(ConceptReference)
121
+
122
+ resolved = resolve(reference, concept: concept)
123
+ classify_from_resolution(reference, resolved)
124
+ end
125
+
101
126
  private
102
127
 
128
+ def classify_from_resolution(reference, resolved)
129
+ if reference.cite?
130
+ resolved ? "self-contained-citation" : "unresolved-citation"
131
+ elsif reference.external?
132
+ resolved ? "internal-citation" : "external-citation"
133
+ else
134
+ resolved ? "same-dataset" : "unresolved"
135
+ end
136
+ end
137
+
103
138
  def apply_routes(reference)
104
139
  routed = @route_adapter.resolve(reference)
105
140
  routed || reference
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ResolutionAdapter
5
+ class Bibliography < ResolutionAdapter
6
+ attr_reader :source_id, :concepts
7
+
8
+ def initialize(source_id, concepts)
9
+ super()
10
+ @source_id = source_id
11
+ @concepts = concepts
12
+ end
13
+
14
+ def resolve(reference)
15
+ return nil unless reference.is_a?(ConceptReference)
16
+ return nil unless reference.source == @source_id
17
+
18
+ concepts.by_id_and(reference.concept_id, reference.version)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -4,6 +4,7 @@ module Glossarist
4
4
  class ResolutionAdapter
5
5
  autoload :Local, "glossarist/resolution_adapter/local"
6
6
  autoload :Package, "glossarist/resolution_adapter/package"
7
+ autoload :Bibliography, "glossarist/resolution_adapter/bibliography"
7
8
  autoload :Route, "glossarist/resolution_adapter/route"
8
9
  autoload :Remote, "glossarist/resolution_adapter/remote"
9
10
 
@@ -0,0 +1,200 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class SchemaMigration
5
+ class V0ToV1
6
+ ENTRY_STATUS_MAP = {
7
+ "Standard" => "valid",
8
+ "Confirmed" => "valid",
9
+ "Proposed" => "draft",
10
+ }.freeze
11
+
12
+ LANG_CODES = Glossarist::LANG_CODES
13
+
14
+ IEV_PATTERN = /\{\{([^,}]+),\s*IEV:([^}]+)\}\}/
15
+ URN_PATTERN = /\{urn:iso:std:iso:(\d+):([^,}]+),([^}]+)\}/
16
+
17
+ attr_reader :from_version, :to_version
18
+
19
+ def initialize(concept_hash, from_version: "0",
20
+ to_version: SchemaMigration::CURRENT_SCHEMA_VERSION,
21
+ ref_maps: {})
22
+ @concept = concept_hash
23
+ @from_version = from_version
24
+ @to_version = to_version
25
+ @ref_maps = ref_maps
26
+ end
27
+
28
+ def migrate
29
+ case [from_version, to_version]
30
+ when ["0", "1"] then migrate_v0_to_v1
31
+ else
32
+ raise Errors::Base,
33
+ "Unsupported migration: #{from_version} -> #{to_version}"
34
+ end
35
+ @concept
36
+ end
37
+
38
+ private
39
+
40
+ def migrate_v0_to_v1
41
+ migrate_termid
42
+ LANG_CODES.each do |lang|
43
+ migrate_language_block(lang) if @concept[lang]
44
+ end
45
+ strip_revisions
46
+ end
47
+
48
+ def migrate_termid
49
+ if @concept.key?("termid")
50
+ @concept["termid"] =
51
+ String(@concept["termid"])
52
+ end
53
+ end
54
+
55
+ def migrate_language_block(lang)
56
+ l10n_block = @concept[lang]
57
+ return unless l10n_block.is_a?(Hash)
58
+
59
+ migrate_definition(l10n_block)
60
+ migrate_authoritative_source(l10n_block)
61
+ migrate_dates(l10n_block)
62
+ migrate_entry_status(l10n_block)
63
+ migrate_terms_abbrev(l10n_block)
64
+ extract_inline_refs(l10n_block)
65
+ strip_revisions(l10n_block)
66
+ end
67
+
68
+ def migrate_definition(l10n_block)
69
+ return unless l10n_block.key?("definition")
70
+ return unless l10n_block["definition"].is_a?(String)
71
+
72
+ l10n_block["definition"] = [{ "content" => l10n_block["definition"] }]
73
+ end
74
+
75
+ def migrate_authoritative_source(l10n_block)
76
+ return unless l10n_block.key?("authoritative_source")
77
+
78
+ src = l10n_block.delete("authoritative_source")
79
+ return if l10n_block.key?("sources")
80
+
81
+ sources = (src.is_a?(Array) ? src : [src]).filter_map do |s|
82
+ next unless s.is_a?(Hash)
83
+
84
+ origin = {}
85
+ origin["ref"] = s["ref"] if s["ref"]
86
+ origin["clause"] = s["clause"] if s["clause"]
87
+ origin["link"] = s["link"] if s["link"]
88
+
89
+ entry = { "type" => "authoritative", "origin" => origin }
90
+ if s["relationship"]
91
+ entry["status"] = s["relationship"]["type"] || "identical"
92
+ if s["relationship"]["modification"]
93
+ entry["modification"] =
94
+ s["relationship"]["modification"]
95
+ end
96
+ end
97
+ entry
98
+ end
99
+
100
+ l10n_block["sources"] = sources if sources.any?
101
+ end
102
+
103
+ def migrate_dates(l10n_block)
104
+ return if l10n_block.key?("dates")
105
+
106
+ dates = []
107
+ if l10n_block["date_accepted"]
108
+ dates << { "type" => "accepted",
109
+ "date" => l10n_block["date_accepted"] }
110
+ end
111
+ if l10n_block["date_amended"]
112
+ dates << { "type" => "amended", "date" => l10n_block["date_amended"] }
113
+ end
114
+ l10n_block["dates"] = dates if dates.any?
115
+ end
116
+
117
+ def migrate_entry_status(l10n_block)
118
+ return unless l10n_block.key?("entry_status")
119
+
120
+ mapped = ENTRY_STATUS_MAP[l10n_block["entry_status"]]
121
+ l10n_block["entry_status"] = mapped if mapped
122
+ end
123
+
124
+ def migrate_terms_abbrev(l10n_block)
125
+ return unless l10n_block["terms"].is_a?(Array)
126
+
127
+ l10n_block["terms"].each do |term|
128
+ next unless term.is_a?(Hash)
129
+ next unless term["abbrev"] == true
130
+
131
+ term["type"] = "abbreviation"
132
+ term.delete("abbrev")
133
+ end
134
+ end
135
+
136
+ def extract_inline_refs(l10n_block)
137
+ texts = []
138
+
139
+ if l10n_block["definition"].is_a?(Array)
140
+ l10n_block["definition"].each do |d|
141
+ texts << (d.is_a?(Hash) ? d["content"].to_s : d.to_s)
142
+ end
143
+ elsif l10n_block["definition"].is_a?(String)
144
+ texts << l10n_block["definition"]
145
+ end
146
+
147
+ Array(l10n_block["notes"]).each do |n|
148
+ texts << (n.is_a?(Hash) ? n["content"].to_s : n.to_s)
149
+ end
150
+ Array(l10n_block["examples"]).each do |e|
151
+ texts << (e.is_a?(Hash) ? e["content"].to_s : e.to_s)
152
+ end
153
+
154
+ full_text = texts.join(" ")
155
+
156
+ refs = []
157
+
158
+ full_text.scan(IEV_PATTERN) do |term, id|
159
+ refs << {
160
+ "term" => term.strip,
161
+ "concept_id" => id.strip,
162
+ "source" => "urn:iec:std:iec:60050",
163
+ "ref_type" => "urn",
164
+ }
165
+ end
166
+
167
+ full_text.scan(URN_PATTERN) do |std_num, id, term|
168
+ refs << {
169
+ "term" => term.strip,
170
+ "concept_id" => id.strip,
171
+ "source" => "urn:iso:std:iso:#{std_num}",
172
+ "ref_type" => "urn",
173
+ }
174
+ end
175
+
176
+ return if refs.empty?
177
+
178
+ existing = l10n_block["references"] || []
179
+ seen_ids = existing.to_set { |r| r["concept_id"] || r["id"] }
180
+ refs.each do |ref|
181
+ key = ref["concept_id"] || ref["id"]
182
+ next if seen_ids.include?(key)
183
+
184
+ seen_ids.add(key)
185
+ existing << ref
186
+ end
187
+ l10n_block["references"] = existing
188
+ end
189
+
190
+ def strip_revisions(hash = @concept)
191
+ hash.delete("_revisions")
192
+ LANG_CODES.each do |lang|
193
+ next unless hash[lang].is_a?(Hash)
194
+
195
+ hash[lang].delete("_revisions")
196
+ end
197
+ end
198
+ end
199
+ end
200
+ end