glossarist 2.4.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop_todo.yml +50 -146
  4. data/CLAUDE.md +85 -0
  5. data/Gemfile +26 -5
  6. data/README.adoc +383 -7
  7. data/TODO.integration/01-gcr-package-cli.md +180 -0
  8. data/exe/glossarist +1 -53
  9. data/glossarist.gemspec +3 -2
  10. data/lib/glossarist/asset.rb +1 -1
  11. data/lib/glossarist/citation.rb +1 -1
  12. data/lib/glossarist/cli/package_command.rb +32 -0
  13. data/lib/glossarist/cli/upgrade_command.rb +34 -0
  14. data/lib/glossarist/cli/validate_command.rb +56 -0
  15. data/lib/glossarist/cli.rb +105 -0
  16. data/lib/glossarist/collection_config.rb +23 -0
  17. data/lib/glossarist/collections/concept_source_collection.rb +9 -0
  18. data/lib/glossarist/collections/detailed_definition_collection.rb +18 -0
  19. data/lib/glossarist/collections/localization_collection.rb +37 -0
  20. data/lib/glossarist/collections/typed_collection.rb +26 -0
  21. data/lib/glossarist/collections.rb +21 -4
  22. data/lib/glossarist/concept.rb +1 -1
  23. data/lib/glossarist/concept_collector.rb +153 -0
  24. data/lib/glossarist/concept_data.rb +15 -8
  25. data/lib/glossarist/concept_date.rb +1 -1
  26. data/lib/glossarist/concept_document.rb +29 -0
  27. data/lib/glossarist/concept_enricher.rb +34 -0
  28. data/lib/glossarist/concept_manager.rb +31 -49
  29. data/lib/glossarist/concept_reference.rb +45 -0
  30. data/lib/glossarist/concept_source.rb +1 -1
  31. data/lib/glossarist/concept_validator.rb +114 -0
  32. data/lib/glossarist/custom_locality.rb +1 -1
  33. data/lib/glossarist/dataset_validator.rb +69 -0
  34. data/lib/glossarist/designation/abbreviation.rb +1 -1
  35. data/lib/glossarist/designation/base.rb +11 -4
  36. data/lib/glossarist/designation/expression.rb +1 -1
  37. data/lib/glossarist/designation/grammar_info.rb +1 -1
  38. data/lib/glossarist/designation/graphical_symbol.rb +1 -1
  39. data/lib/glossarist/designation/letter_symbol.rb +1 -1
  40. data/lib/glossarist/designation/symbol.rb +2 -2
  41. data/lib/glossarist/designation.rb +8 -11
  42. data/lib/glossarist/detailed_definition.rb +1 -1
  43. data/lib/glossarist/error.rb +2 -5
  44. data/lib/glossarist/gcr_metadata.rb +87 -0
  45. data/lib/glossarist/gcr_package.rb +223 -0
  46. data/lib/glossarist/gcr_statistics.rb +35 -0
  47. data/lib/glossarist/gcr_validator.rb +98 -0
  48. data/lib/glossarist/locality.rb +1 -1
  49. data/lib/glossarist/localized_concept.rb +12 -1
  50. data/lib/glossarist/managed_concept.rb +1 -1
  51. data/lib/glossarist/managed_concept_data.rb +8 -5
  52. data/lib/glossarist/non_verb_rep.rb +1 -1
  53. data/lib/glossarist/reference_extractor.rb +227 -0
  54. data/lib/glossarist/reference_resolver.rb +169 -0
  55. data/lib/glossarist/register_data.rb +39 -0
  56. data/lib/glossarist/related_concept.rb +1 -1
  57. data/lib/glossarist/resolution_adapter/local.rb +73 -0
  58. data/lib/glossarist/resolution_adapter/package.rb +22 -0
  59. data/lib/glossarist/resolution_adapter/remote.rb +60 -0
  60. data/lib/glossarist/resolution_adapter/route.rb +34 -0
  61. data/lib/glossarist/resolution_adapter.rb +14 -0
  62. data/lib/glossarist/schema_migration.rb +334 -0
  63. data/lib/glossarist/urn_resolver.rb +71 -0
  64. data/lib/glossarist/utilities.rb +6 -2
  65. data/lib/glossarist/v1/concept.rb +81 -0
  66. data/lib/glossarist/v1/cross_references.rb +41 -0
  67. data/lib/glossarist/v1/register.rb +50 -0
  68. data/lib/glossarist/v1.rb +9 -0
  69. data/lib/glossarist/validation_result.rb +38 -0
  70. data/lib/glossarist/version.rb +1 -1
  71. data/lib/glossarist.rb +54 -24
  72. metadata +62 -6
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zip"
4
+ require "fileutils"
5
+
6
+ module Glossarist
7
+ class GcrPackage
8
+ attr_reader :zip_path, :metadata, :concepts
9
+
10
+ def initialize(zip_path)
11
+ @zip_path = zip_path
12
+ @metadata = nil
13
+ @concepts = []
14
+ end
15
+
16
+ def self.create(concepts:, metadata:, output_path:, register_data: nil)
17
+ FileUtils.mkdir_p(File.dirname(output_path))
18
+ package = new(output_path)
19
+ package.send(:write, concepts, metadata, register_data)
20
+ package
21
+ end
22
+
23
+ def self.load(zip_path)
24
+ package = new(zip_path)
25
+ package.send(:read)
26
+ package
27
+ end
28
+
29
+ def self.create_from_directory(dir, output:, shortname:, version:, # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
30
+ title: nil, description: nil, owner: nil,
31
+ tags: [], register_yaml: nil,
32
+ uri_prefix: nil, concept_uri_template: nil,
33
+ streaming: false)
34
+ dir = File.expand_path(dir)
35
+
36
+ if streaming
37
+ create_streaming(dir, output: output, shortname: shortname, version: version,
38
+ title: title, description: description, owner: owner,
39
+ tags: tags, register_yaml: register_yaml,
40
+ uri_prefix: uri_prefix,
41
+ concept_uri_template: concept_uri_template)
42
+ else
43
+ create_batch(dir, output: output, shortname: shortname, version: version,
44
+ title: title, description: description, owner: owner,
45
+ tags: tags, register_yaml: register_yaml,
46
+ uri_prefix: uri_prefix,
47
+ concept_uri_template: concept_uri_template)
48
+ end
49
+ end
50
+
51
+ def validate
52
+ GcrValidator.new.validate(@zip_path)
53
+ end
54
+
55
+ private
56
+
57
+ def write(concepts, metadata, register_data)
58
+ Zip::File.open(@zip_path, create: true) do |zf|
59
+ zf.get_output_stream("metadata.yaml") do |f|
60
+ f.write(metadata.to_yaml)
61
+ end
62
+
63
+ if register_data
64
+ zf.get_output_stream("register.yaml") do |f|
65
+ f.write(register_data.to_yaml)
66
+ end
67
+ end
68
+
69
+ concepts.each do |mc|
70
+ write_concept(zf, mc)
71
+ end
72
+ end
73
+ end
74
+
75
+ def write_concept(zip_file, concept)
76
+ termid = concept.data.id.to_s
77
+ doc = ConceptDocument.from_managed_concept(concept)
78
+ zip_file.get_output_stream("concepts/#{termid}.yaml") do |f|
79
+ f.write(doc.to_yamls)
80
+ end
81
+ end
82
+
83
+ def read
84
+ @concepts = []
85
+
86
+ Zip::File.open(@zip_path) do |zf|
87
+ if (entry = zf.find_entry("metadata.yaml"))
88
+ @metadata = GcrMetadata.from_yaml(entry.get_input_stream.read)
89
+ end
90
+
91
+ zf.entries.each do |entry|
92
+ next unless entry.name.start_with?("concepts/") && entry.name.end_with?(".yaml")
93
+
94
+ raw = entry.get_input_stream.read
95
+ doc = ConceptDocument.from_yamls(raw)
96
+ @concepts << doc.to_managed_concept
97
+ end
98
+ end
99
+ end
100
+
101
+ class << self
102
+ private
103
+
104
+ def create_batch(dir, output:, shortname:, version:, **opts)
105
+ concepts = ConceptCollector.collect(dir)
106
+ if concepts.empty?
107
+ raise ArgumentError,
108
+ "No concept files found in #{dir}"
109
+ end
110
+
111
+ enricher = ConceptEnricher.new
112
+ enricher.inject_references(concepts)
113
+ if opts[:concept_uri_template]
114
+ enricher.apply_uri_template(concepts,
115
+ opts[:concept_uri_template])
116
+ end
117
+
118
+ register_data = load_register_data(opts[:register_yaml], dir)
119
+ metadata = build_metadata(concepts, shortname: shortname, version: version,
120
+ register_data: register_data, **opts)
121
+
122
+ create(
123
+ concepts: concepts,
124
+ metadata: metadata,
125
+ register_data: register_data,
126
+ output_path: File.expand_path(output),
127
+ )
128
+ end
129
+
130
+ def create_streaming(dir, output:, shortname:, version:, **opts) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/BlockLength
131
+ enricher = ConceptEnricher.new
132
+ output_path = File.expand_path(output)
133
+ FileUtils.mkdir_p(File.dirname(output_path))
134
+
135
+ register_data = load_register_data(opts[:register_yaml], dir)
136
+ concept_count = 0
137
+ languages = Set.new
138
+
139
+ Zip::OutputStream.open(output_path) do |zos|
140
+ if register_data
141
+ zos.put_next_entry("register.yaml")
142
+ zos.write(register_data.to_yaml)
143
+ end
144
+
145
+ ConceptCollector.each_concept(dir) do |mc|
146
+ enricher.inject_references([mc])
147
+ if opts[:concept_uri_template]
148
+ enricher.apply_uri_template([mc],
149
+ opts[:concept_uri_template])
150
+ end
151
+
152
+ mc.localizations.each do |l10n|
153
+ languages << l10n.language_code if l10n.language_code
154
+ end
155
+ concept_count += 1
156
+
157
+ termid = mc.data.id.to_s
158
+ doc = ConceptDocument.from_managed_concept(mc)
159
+ zos.put_next_entry("concepts/#{termid}.yaml")
160
+ zos.write(doc.to_yamls)
161
+ end
162
+
163
+ if concept_count.zero?
164
+ raise ArgumentError,
165
+ "No concept files found in #{dir}"
166
+ end
167
+
168
+ metadata = build_streaming_metadata(concept_count, languages,
169
+ shortname: shortname, version: version,
170
+ register_data: register_data, **opts)
171
+ zos.put_next_entry("metadata.yaml")
172
+ zos.write(metadata.to_yaml)
173
+ end
174
+
175
+ new(output_path)
176
+ end
177
+
178
+ def build_streaming_metadata(concept_count, languages, shortname:, version:, # rubocop:disable Metrics/ParameterLists
179
+ register_data: nil, **opts)
180
+ GcrMetadata.new(
181
+ shortname: shortname,
182
+ version: version,
183
+ title: opts[:title],
184
+ description: opts[:description],
185
+ owner: opts[:owner],
186
+ tags: opts[:tags] || [],
187
+ concept_count: concept_count,
188
+ languages: languages.sort,
189
+ created_at: Time.now.utc.iso8601,
190
+ glossarist_version: Glossarist::VERSION,
191
+ schema_version: register_data&.dig("schema_version") || SchemaMigration::CURRENT_SCHEMA_VERSION,
192
+ uri_prefix: opts[:uri_prefix],
193
+ concept_uri_template: opts[:concept_uri_template],
194
+ )
195
+ end
196
+
197
+ def build_metadata(concepts, shortname:, version:, register_data: nil,
198
+ **opts)
199
+ GcrMetadata.from_concepts(
200
+ concepts,
201
+ register_data: register_data,
202
+ options: {
203
+ shortname: shortname,
204
+ version: version,
205
+ title: opts[:title],
206
+ description: opts[:description],
207
+ owner: opts[:owner],
208
+ tags: opts[:tags],
209
+ uri_prefix: opts[:uri_prefix],
210
+ concept_uri_template: opts[:concept_uri_template],
211
+ },
212
+ )
213
+ end
214
+
215
+ def load_register_data(register_yaml_path, dir)
216
+ path = register_yaml_path || File.join(dir, "register.yaml")
217
+ return nil unless File.exist?(path)
218
+
219
+ RegisterData.from_file(path)
220
+ end
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class GcrStatistics < Lutaml::Model::Serializable
5
+ attribute :total_concepts, :integer
6
+ attribute :languages, :string, collection: true
7
+ attribute :concepts_by_status, :hash
8
+ attribute :concepts_with_definitions, :integer
9
+ attribute :concepts_with_sources, :integer
10
+
11
+ key_value do
12
+ map :total_concepts, to: :total_concepts
13
+ map :languages, to: :languages
14
+ map :concepts_by_status, to: :concepts_by_status
15
+ map :concepts_with_definitions, to: :concepts_with_definitions
16
+ map :concepts_with_sources, to: :concepts_with_sources
17
+ end
18
+
19
+ def self.from_concepts(concepts)
20
+ l10ns = concepts.flat_map { |c| c.localizations.to_a }
21
+
22
+ new(
23
+ total_concepts: concepts.length,
24
+ languages: l10ns.map(&:language_code).compact.sort.uniq,
25
+ concepts_by_status: l10ns.map(&:entry_status).compact.tally,
26
+ concepts_with_definitions: count_with(l10ns, :definition),
27
+ concepts_with_sources: count_with(l10ns, :sources),
28
+ )
29
+ end
30
+
31
+ def self.count_with(l10ns, attr)
32
+ l10ns.count { |l| l.data.send(attr)&.any? }
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zip"
4
+
5
+ module Glossarist
6
+ class GcrValidator
7
+ def validate(zip_path) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
8
+ result = ValidationResult.new
9
+
10
+ unless File.exist?(zip_path)
11
+ result.add_error("File not found: #{zip_path}")
12
+ return result
13
+ end
14
+
15
+ begin
16
+ Zip::File.open(zip_path) do |zf|
17
+ unless zf.find_entry("metadata.yaml")
18
+ result.add_error("Missing metadata.yaml")
19
+ return result
20
+ end
21
+
22
+ metadata = GcrMetadata.from_yaml(
23
+ zf.find_entry("metadata.yaml").get_input_stream.read,
24
+ )
25
+ validate_metadata(metadata, result)
26
+
27
+ concept_entries = zf.entries.select do |e|
28
+ e.name.start_with?("concepts/") && e.name.end_with?(".yaml")
29
+ end
30
+ if concept_entries.empty?
31
+ result.add_error("No concept files found in concepts/")
32
+ end
33
+
34
+ concept_entries.each do |entry|
35
+ validate_concept_entry(entry, metadata, result)
36
+ end
37
+ end
38
+ rescue StandardError => e
39
+ result.add_error("Failed to read ZIP: #{e.message}")
40
+ end
41
+
42
+ result
43
+ end
44
+
45
+ private
46
+
47
+ def validate_metadata(metadata, result)
48
+ unless metadata&.concept_count
49
+ result.add_error("metadata.yaml missing required fields (concept_count)")
50
+ end
51
+
52
+ unless metadata&.shortname
53
+ result.add_error("metadata.yaml missing shortname")
54
+ end
55
+
56
+ unless metadata&.version
57
+ result.add_error("metadata.yaml missing version")
58
+ end
59
+ end
60
+
61
+ def validate_concept_entry(entry, metadata, result) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
62
+ raw = entry.get_input_stream.read
63
+ doc = ConceptDocument.from_yamls(raw)
64
+ rescue Psych::SyntaxError => e
65
+ result.add_error("#{entry.name}: invalid YAML at line #{e.line}: #{e.message}")
66
+ rescue StandardError => e
67
+ result.add_error("#{entry.name}: parse error: #{e.message}")
68
+ else
69
+ concept = doc.concept
70
+ unless concept&.data&.id
71
+ result.add_error("#{entry.name}: document 0 missing data.identifier")
72
+ end
73
+
74
+ localizations = doc.localizations
75
+ if localizations.empty?
76
+ result.add_error("#{entry.name}: expected at least 1 localization document")
77
+ else
78
+ localizations.each_with_index do |l10n, idx|
79
+ unless l10n&.language_code
80
+ result.add_error("#{entry.name}: document #{idx + 1} missing data.language_code")
81
+ end
82
+ end
83
+ end
84
+
85
+ validate_concept_uri(entry, concept, metadata, result)
86
+ end
87
+
88
+ def validate_concept_uri(entry, concept, metadata, result) # rubocop:disable Metrics/CyclomaticComplexity
89
+ concept_uri = concept&.data&.uri
90
+ template = metadata&.concept_uri_template
91
+ uri_prefix = metadata&.uri_prefix
92
+
93
+ if concept_uri.nil? && template.nil? && uri_prefix.nil?
94
+ result.add_warning("#{entry.name}: no concept URI (data.uri) and no concept_uri_template or uri_prefix in metadata")
95
+ end
96
+ end
97
+ end
98
+ end
@@ -13,7 +13,7 @@ module Glossarist
13
13
  # @return [String]
14
14
  attribute :reference_to, :string
15
15
 
16
- yaml do
16
+ key_value do
17
17
  map :type, to: :type
18
18
  map :reference_from, to: :reference_from
19
19
  map :reference_to, to: :reference_to
@@ -4,11 +4,22 @@ module Glossarist
4
4
  attribute :review_type, :string
5
5
  attribute :entry_status, :string
6
6
 
7
- yaml do
7
+ key_value do
8
8
  map :classification, to: :classification
9
9
  map %i[review_type reviewType], to: :review_type
10
10
  end
11
11
 
12
+ NIL_COLLECTION_KEYS = %w[definition examples notes].freeze
13
+
14
+ def self.of_yaml(hash, options = {})
15
+ if hash.is_a?(Hash) && (data = hash["data"]).is_a?(Hash)
16
+ NIL_COLLECTION_KEYS.each do |key|
17
+ data[key] = [] if data.key?(key) && data[key].nil?
18
+ end
19
+ end
20
+ super
21
+ end
22
+
12
23
  alias_method :status=, :entry_status=
13
24
 
14
25
  def language_code
@@ -21,7 +21,7 @@ module Glossarist
21
21
 
22
22
  attribute :uuid, :string
23
23
 
24
- yaml do
24
+ key_value do
25
25
  map :data, to: :data
26
26
  map :id, with: { to: :identifier_to_yaml, from: :identifier_from_yaml }
27
27
  map :identifier,
@@ -3,14 +3,18 @@ module Glossarist
3
3
  include Glossarist::Utilities::CommonFunctions
4
4
 
5
5
  attribute :id, :string
6
+ attribute :uri, :string
6
7
  attribute :localized_concepts, :hash
7
8
  attribute :groups, :string, collection: true
8
9
  attribute :sources, ConceptSource, collection: true
9
- attribute :localizations, :hash, collection: true, default: -> { {} }
10
+ attribute :localizations, LocalizedConcept,
11
+ collection: Collections::LocalizationCollection,
12
+ initialize_empty: true
10
13
 
11
- yaml do
14
+ key_value do
12
15
  map %i[id identifier], to: :id,
13
16
  with: { to: :id_to_yaml, from: :id_from_yaml }
17
+ map :uri, to: :uri
14
18
  map %i[localized_concepts localizedConcepts], to: :localized_concepts
15
19
  map :groups, to: :groups
16
20
  map :sources, to: :sources
@@ -28,11 +32,10 @@ module Glossarist
28
32
  end
29
33
 
30
34
  def localizations_from_yaml(model, value)
31
- model.localizations ||= {}
32
-
33
35
  value.each do |localized_concept_hash|
34
36
  localized_concept = Glossarist::LocalizedConcept.of_yaml(localized_concept_hash)
35
- model.localizations[localized_concept.language_code] = localized_concept
37
+ model.localizations.store(localized_concept.language_code,
38
+ localized_concept)
36
39
  end
37
40
  end
38
41
 
@@ -5,7 +5,7 @@ module Glossarist
5
5
  attribute :formula, :string
6
6
  attribute :sources, ConceptSource, collection: true
7
7
 
8
- yaml do
8
+ key_value do
9
9
  map :image, to: :image
10
10
  map :table, to: :table
11
11
  map :formula, to: :formula
@@ -0,0 +1,227 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+
5
+ module Glossarist
6
+ class ReferenceExtractor
7
+ @identifier_resolvers = []
8
+ @patterns = []
9
+
10
+ IdentifierResolver = Struct.new(:prefix, :resolver, keyword_init: true)
11
+ Pattern = Struct.new(:name, :regex, :resolver, keyword_init: true)
12
+
13
+ class << self
14
+ def register_identifier_resolver(prefix, &resolver)
15
+ @identifier_resolvers << IdentifierResolver.new(prefix: prefix,
16
+ resolver: resolver)
17
+ end
18
+
19
+ def register_pattern(name:, regex:, &resolver)
20
+ @patterns << Pattern.new(name: name, regex: regex, resolver: resolver)
21
+ end
22
+
23
+ def patterns
24
+ @patterns.dup
25
+ end
26
+
27
+ def identifier_resolvers
28
+ @identifier_resolvers.dup
29
+ end
30
+ end
31
+
32
+ def extract_from_text(text)
33
+ return [] unless text.is_a?(String)
34
+
35
+ refs = []
36
+ self.class.patterns.each do |pattern|
37
+ text.scan(pattern.regex).each do |captures|
38
+ captures = [captures] unless captures.is_a?(Array)
39
+ ref = pattern.resolver.call(self, *captures)
40
+ refs << ref if ref
41
+ end
42
+ end
43
+ deduplicate(refs)
44
+ end
45
+
46
+ def extract_from_localized(lc_hash)
47
+ gather_texts(lc_hash).flat_map { |t| extract_from_text(t) }
48
+ end
49
+
50
+ def extract_from_concept_hash(concept_hash)
51
+ LANG_CODES.flat_map do |lang|
52
+ next [] unless concept_hash[lang].is_a?(Hash)
53
+
54
+ extract_from_localized(concept_hash[lang])
55
+ end
56
+ end
57
+
58
+ def extract_from_managed_concept(concept)
59
+ concept.localizations.flat_map do |l10n|
60
+ extract_from_localized_concept(l10n)
61
+ end
62
+ end
63
+
64
+ def extract_from_localized_concept(l10n)
65
+ texts = []
66
+ l10n.data.definition&.each { |d| texts << d.content if d.content }
67
+ l10n.data.notes&.each { |n| texts << n.content if n.content }
68
+ l10n.data.examples&.each { |e| texts << e.content if e.content }
69
+ texts.flat_map { |t| extract_from_text(t) }
70
+ end
71
+
72
+ # Unified concept mention dispatcher.
73
+ # Content is the text inside {{...}}.
74
+ def resolve_mention(content)
75
+ content = content.strip
76
+
77
+ if content.include?(",")
78
+ parts = content.split(",", 2)
79
+ display = parts[0].strip
80
+ identifier = parts[1].strip
81
+ resolve_by_identifier(identifier, display)
82
+ else
83
+ resolve_by_identifier(content, nil)
84
+ end
85
+ end
86
+
87
+ def resolve_by_identifier(identifier, display)
88
+ # Check registered identifier resolvers (built-in + custom)
89
+ self.class.identifier_resolvers.each do |ir|
90
+ next unless identifier.start_with?(ir.prefix)
91
+
92
+ return ir.resolver.call(self, identifier, display)
93
+ end
94
+
95
+ case identifier
96
+ when /\A\d[\d.-]*\z/
97
+ resolve_local(display || identifier, identifier)
98
+ else
99
+ resolve_designation(identifier, display)
100
+ end
101
+ end
102
+
103
+ def resolve_local(term, concept_id)
104
+ ConceptReference.new(
105
+ term: term.strip,
106
+ concept_id: concept_id.strip,
107
+ source: nil,
108
+ ref_type: "local",
109
+ )
110
+ end
111
+
112
+ def resolve_designation(text, display)
113
+ ConceptReference.new(
114
+ term: display || text,
115
+ concept_id: nil,
116
+ source: nil,
117
+ ref_type: "designation",
118
+ )
119
+ end
120
+
121
+ def resolve_iec_urn(urn, display)
122
+ concept_id = extract_iec_concept_id(urn)
123
+
124
+ ConceptReference.new(
125
+ term: display || "",
126
+ concept_id: concept_id,
127
+ source: "urn:iec:std:iec:60050",
128
+ ref_type: "urn",
129
+ )
130
+ end
131
+
132
+ def resolve_iso_urn(urn, display)
133
+ if (m = urn.match(/\Aurn:iso:std:iso:(\d+)(?::(.*))?\z/))
134
+ term_id = extract_term_id_from_urn_tail(m[2])
135
+ ConceptReference.new(
136
+ term: display || "",
137
+ concept_id: term_id,
138
+ source: "urn:iso:std:iso:#{m[1]}",
139
+ ref_type: "urn",
140
+ )
141
+ end
142
+ end
143
+
144
+ def resolve_generic_urn(urn, display)
145
+ ConceptReference.new(
146
+ term: display || "",
147
+ concept_id: nil,
148
+ source: urn,
149
+ ref_type: "urn",
150
+ )
151
+ end
152
+
153
+ LANG_CODES = Glossarist::LANG_CODES
154
+
155
+ private
156
+
157
+ def gather_texts(lc_hash)
158
+ texts = extract_text_fields(lc_hash["definition"])
159
+ texts << lc_hash["definition"].to_s if lc_hash["definition"].is_a?(String)
160
+ texts.concat(extract_text_fields(lc_hash["notes"]))
161
+ texts.concat(extract_text_fields(lc_hash["examples"]))
162
+ texts
163
+ end
164
+
165
+ def extract_text_fields(items)
166
+ Array(items).filter_map do |item|
167
+ item.is_a?(Hash) ? item["content"]&.to_s : item.to_s
168
+ end
169
+ end
170
+
171
+ def deduplicate(refs)
172
+ seen = Set.new
173
+ refs.select do |ref|
174
+ key = if ref.concept_id
175
+ [ref.source,
176
+ ref.concept_id]
177
+ else
178
+ [ref.source, ref.concept_id,
179
+ ref.term]
180
+ end
181
+ seen.add?(key)
182
+ end
183
+ end
184
+
185
+ def extract_term_id_from_urn_tail(tail)
186
+ return "" unless tail
187
+
188
+ if (m = tail.match(/term:([\d.,]+)/))
189
+ m[1].split(",").first
190
+ elsif (m = tail.match(/sec:([\d.]+)/))
191
+ m[1]
192
+ else
193
+ tail
194
+ end
195
+ end
196
+
197
+ def extract_iec_concept_id(urn)
198
+ if (m = urn.match(/::#con-([\d-]+)/))
199
+ m[1]
200
+ else
201
+ segments = urn.split(":")
202
+ code_part = segments.find { |s| s.start_with?("60050-") }
203
+ return "" unless code_part
204
+
205
+ code_part.delete_prefix("60050-").sub(/-\d{4}-\d{2}\z/, "")
206
+ end
207
+ end
208
+
209
+ # Unified concept mention pattern: {{...}}
210
+ register_pattern(
211
+ name: :concept_mention,
212
+ regex: /\{\{([^}]+)\}\}/,
213
+ ) { |ext, content| ext.resolve_mention(content) }
214
+
215
+ register_identifier_resolver("urn:iec:std:iec:60050") do |ext, identifier, display|
216
+ ext.resolve_iec_urn(identifier, display)
217
+ end
218
+
219
+ register_identifier_resolver("urn:iso:std:iso:") do |ext, identifier, display|
220
+ ext.resolve_iso_urn(identifier, display)
221
+ end
222
+
223
+ register_identifier_resolver("urn:") do |ext, identifier, display|
224
+ ext.resolve_generic_urn(identifier, display)
225
+ end
226
+ end
227
+ end