glossarist 2.4.0 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop_todo.yml +50 -146
- data/CLAUDE.md +85 -0
- data/Gemfile +26 -5
- data/README.adoc +383 -7
- data/TODO.integration/01-gcr-package-cli.md +180 -0
- data/exe/glossarist +1 -53
- data/glossarist.gemspec +3 -2
- data/lib/glossarist/asset.rb +1 -1
- data/lib/glossarist/citation.rb +1 -1
- data/lib/glossarist/cli/package_command.rb +32 -0
- data/lib/glossarist/cli/upgrade_command.rb +34 -0
- data/lib/glossarist/cli/validate_command.rb +56 -0
- data/lib/glossarist/cli.rb +105 -0
- data/lib/glossarist/collection_config.rb +23 -0
- data/lib/glossarist/collections/concept_source_collection.rb +9 -0
- data/lib/glossarist/collections/detailed_definition_collection.rb +18 -0
- data/lib/glossarist/collections/localization_collection.rb +37 -0
- data/lib/glossarist/collections/typed_collection.rb +26 -0
- data/lib/glossarist/collections.rb +21 -4
- data/lib/glossarist/concept.rb +1 -1
- data/lib/glossarist/concept_collector.rb +153 -0
- data/lib/glossarist/concept_data.rb +15 -8
- data/lib/glossarist/concept_date.rb +1 -1
- data/lib/glossarist/concept_document.rb +29 -0
- data/lib/glossarist/concept_enricher.rb +34 -0
- data/lib/glossarist/concept_manager.rb +31 -49
- data/lib/glossarist/concept_reference.rb +45 -0
- data/lib/glossarist/concept_source.rb +1 -1
- data/lib/glossarist/concept_validator.rb +114 -0
- data/lib/glossarist/custom_locality.rb +1 -1
- data/lib/glossarist/dataset_validator.rb +69 -0
- data/lib/glossarist/designation/abbreviation.rb +1 -1
- data/lib/glossarist/designation/base.rb +11 -4
- data/lib/glossarist/designation/expression.rb +1 -1
- data/lib/glossarist/designation/grammar_info.rb +1 -1
- data/lib/glossarist/designation/graphical_symbol.rb +1 -1
- data/lib/glossarist/designation/letter_symbol.rb +1 -1
- data/lib/glossarist/designation/symbol.rb +2 -2
- data/lib/glossarist/designation.rb +8 -11
- data/lib/glossarist/detailed_definition.rb +1 -1
- data/lib/glossarist/error.rb +2 -5
- data/lib/glossarist/gcr_metadata.rb +87 -0
- data/lib/glossarist/gcr_package.rb +223 -0
- data/lib/glossarist/gcr_statistics.rb +35 -0
- data/lib/glossarist/gcr_validator.rb +98 -0
- data/lib/glossarist/locality.rb +1 -1
- data/lib/glossarist/localized_concept.rb +12 -1
- data/lib/glossarist/managed_concept.rb +1 -1
- data/lib/glossarist/managed_concept_data.rb +8 -5
- data/lib/glossarist/non_verb_rep.rb +1 -1
- data/lib/glossarist/reference_extractor.rb +227 -0
- data/lib/glossarist/reference_resolver.rb +169 -0
- data/lib/glossarist/register_data.rb +39 -0
- data/lib/glossarist/related_concept.rb +1 -1
- data/lib/glossarist/resolution_adapter/local.rb +73 -0
- data/lib/glossarist/resolution_adapter/package.rb +22 -0
- data/lib/glossarist/resolution_adapter/remote.rb +60 -0
- data/lib/glossarist/resolution_adapter/route.rb +34 -0
- data/lib/glossarist/resolution_adapter.rb +14 -0
- data/lib/glossarist/schema_migration.rb +334 -0
- data/lib/glossarist/urn_resolver.rb +71 -0
- data/lib/glossarist/utilities.rb +6 -2
- data/lib/glossarist/v1/concept.rb +81 -0
- data/lib/glossarist/v1/cross_references.rb +41 -0
- data/lib/glossarist/v1/register.rb +50 -0
- data/lib/glossarist/v1.rb +9 -0
- data/lib/glossarist/validation_result.rb +38 -0
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +54 -24
- metadata +62 -6
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "zip"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
|
|
6
|
+
module Glossarist
|
|
7
|
+
class GcrPackage
|
|
8
|
+
attr_reader :zip_path, :metadata, :concepts
|
|
9
|
+
|
|
10
|
+
def initialize(zip_path)
|
|
11
|
+
@zip_path = zip_path
|
|
12
|
+
@metadata = nil
|
|
13
|
+
@concepts = []
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def self.create(concepts:, metadata:, output_path:, register_data: nil)
|
|
17
|
+
FileUtils.mkdir_p(File.dirname(output_path))
|
|
18
|
+
package = new(output_path)
|
|
19
|
+
package.send(:write, concepts, metadata, register_data)
|
|
20
|
+
package
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def self.load(zip_path)
|
|
24
|
+
package = new(zip_path)
|
|
25
|
+
package.send(:read)
|
|
26
|
+
package
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def self.create_from_directory(dir, output:, shortname:, version:, # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
|
|
30
|
+
title: nil, description: nil, owner: nil,
|
|
31
|
+
tags: [], register_yaml: nil,
|
|
32
|
+
uri_prefix: nil, concept_uri_template: nil,
|
|
33
|
+
streaming: false)
|
|
34
|
+
dir = File.expand_path(dir)
|
|
35
|
+
|
|
36
|
+
if streaming
|
|
37
|
+
create_streaming(dir, output: output, shortname: shortname, version: version,
|
|
38
|
+
title: title, description: description, owner: owner,
|
|
39
|
+
tags: tags, register_yaml: register_yaml,
|
|
40
|
+
uri_prefix: uri_prefix,
|
|
41
|
+
concept_uri_template: concept_uri_template)
|
|
42
|
+
else
|
|
43
|
+
create_batch(dir, output: output, shortname: shortname, version: version,
|
|
44
|
+
title: title, description: description, owner: owner,
|
|
45
|
+
tags: tags, register_yaml: register_yaml,
|
|
46
|
+
uri_prefix: uri_prefix,
|
|
47
|
+
concept_uri_template: concept_uri_template)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def validate
|
|
52
|
+
GcrValidator.new.validate(@zip_path)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def write(concepts, metadata, register_data)
|
|
58
|
+
Zip::File.open(@zip_path, create: true) do |zf|
|
|
59
|
+
zf.get_output_stream("metadata.yaml") do |f|
|
|
60
|
+
f.write(metadata.to_yaml)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
if register_data
|
|
64
|
+
zf.get_output_stream("register.yaml") do |f|
|
|
65
|
+
f.write(register_data.to_yaml)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
concepts.each do |mc|
|
|
70
|
+
write_concept(zf, mc)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def write_concept(zip_file, concept)
|
|
76
|
+
termid = concept.data.id.to_s
|
|
77
|
+
doc = ConceptDocument.from_managed_concept(concept)
|
|
78
|
+
zip_file.get_output_stream("concepts/#{termid}.yaml") do |f|
|
|
79
|
+
f.write(doc.to_yamls)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def read
|
|
84
|
+
@concepts = []
|
|
85
|
+
|
|
86
|
+
Zip::File.open(@zip_path) do |zf|
|
|
87
|
+
if (entry = zf.find_entry("metadata.yaml"))
|
|
88
|
+
@metadata = GcrMetadata.from_yaml(entry.get_input_stream.read)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
zf.entries.each do |entry|
|
|
92
|
+
next unless entry.name.start_with?("concepts/") && entry.name.end_with?(".yaml")
|
|
93
|
+
|
|
94
|
+
raw = entry.get_input_stream.read
|
|
95
|
+
doc = ConceptDocument.from_yamls(raw)
|
|
96
|
+
@concepts << doc.to_managed_concept
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
class << self
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
def create_batch(dir, output:, shortname:, version:, **opts)
|
|
105
|
+
concepts = ConceptCollector.collect(dir)
|
|
106
|
+
if concepts.empty?
|
|
107
|
+
raise ArgumentError,
|
|
108
|
+
"No concept files found in #{dir}"
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
enricher = ConceptEnricher.new
|
|
112
|
+
enricher.inject_references(concepts)
|
|
113
|
+
if opts[:concept_uri_template]
|
|
114
|
+
enricher.apply_uri_template(concepts,
|
|
115
|
+
opts[:concept_uri_template])
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
register_data = load_register_data(opts[:register_yaml], dir)
|
|
119
|
+
metadata = build_metadata(concepts, shortname: shortname, version: version,
|
|
120
|
+
register_data: register_data, **opts)
|
|
121
|
+
|
|
122
|
+
create(
|
|
123
|
+
concepts: concepts,
|
|
124
|
+
metadata: metadata,
|
|
125
|
+
register_data: register_data,
|
|
126
|
+
output_path: File.expand_path(output),
|
|
127
|
+
)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def create_streaming(dir, output:, shortname:, version:, **opts) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/BlockLength
|
|
131
|
+
enricher = ConceptEnricher.new
|
|
132
|
+
output_path = File.expand_path(output)
|
|
133
|
+
FileUtils.mkdir_p(File.dirname(output_path))
|
|
134
|
+
|
|
135
|
+
register_data = load_register_data(opts[:register_yaml], dir)
|
|
136
|
+
concept_count = 0
|
|
137
|
+
languages = Set.new
|
|
138
|
+
|
|
139
|
+
Zip::OutputStream.open(output_path) do |zos|
|
|
140
|
+
if register_data
|
|
141
|
+
zos.put_next_entry("register.yaml")
|
|
142
|
+
zos.write(register_data.to_yaml)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
ConceptCollector.each_concept(dir) do |mc|
|
|
146
|
+
enricher.inject_references([mc])
|
|
147
|
+
if opts[:concept_uri_template]
|
|
148
|
+
enricher.apply_uri_template([mc],
|
|
149
|
+
opts[:concept_uri_template])
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
mc.localizations.each do |l10n|
|
|
153
|
+
languages << l10n.language_code if l10n.language_code
|
|
154
|
+
end
|
|
155
|
+
concept_count += 1
|
|
156
|
+
|
|
157
|
+
termid = mc.data.id.to_s
|
|
158
|
+
doc = ConceptDocument.from_managed_concept(mc)
|
|
159
|
+
zos.put_next_entry("concepts/#{termid}.yaml")
|
|
160
|
+
zos.write(doc.to_yamls)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
if concept_count.zero?
|
|
164
|
+
raise ArgumentError,
|
|
165
|
+
"No concept files found in #{dir}"
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
metadata = build_streaming_metadata(concept_count, languages,
|
|
169
|
+
shortname: shortname, version: version,
|
|
170
|
+
register_data: register_data, **opts)
|
|
171
|
+
zos.put_next_entry("metadata.yaml")
|
|
172
|
+
zos.write(metadata.to_yaml)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
new(output_path)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def build_streaming_metadata(concept_count, languages, shortname:, version:, # rubocop:disable Metrics/ParameterLists
|
|
179
|
+
register_data: nil, **opts)
|
|
180
|
+
GcrMetadata.new(
|
|
181
|
+
shortname: shortname,
|
|
182
|
+
version: version,
|
|
183
|
+
title: opts[:title],
|
|
184
|
+
description: opts[:description],
|
|
185
|
+
owner: opts[:owner],
|
|
186
|
+
tags: opts[:tags] || [],
|
|
187
|
+
concept_count: concept_count,
|
|
188
|
+
languages: languages.sort,
|
|
189
|
+
created_at: Time.now.utc.iso8601,
|
|
190
|
+
glossarist_version: Glossarist::VERSION,
|
|
191
|
+
schema_version: register_data&.dig("schema_version") || SchemaMigration::CURRENT_SCHEMA_VERSION,
|
|
192
|
+
uri_prefix: opts[:uri_prefix],
|
|
193
|
+
concept_uri_template: opts[:concept_uri_template],
|
|
194
|
+
)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def build_metadata(concepts, shortname:, version:, register_data: nil,
|
|
198
|
+
**opts)
|
|
199
|
+
GcrMetadata.from_concepts(
|
|
200
|
+
concepts,
|
|
201
|
+
register_data: register_data,
|
|
202
|
+
options: {
|
|
203
|
+
shortname: shortname,
|
|
204
|
+
version: version,
|
|
205
|
+
title: opts[:title],
|
|
206
|
+
description: opts[:description],
|
|
207
|
+
owner: opts[:owner],
|
|
208
|
+
tags: opts[:tags],
|
|
209
|
+
uri_prefix: opts[:uri_prefix],
|
|
210
|
+
concept_uri_template: opts[:concept_uri_template],
|
|
211
|
+
},
|
|
212
|
+
)
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def load_register_data(register_yaml_path, dir)
|
|
216
|
+
path = register_yaml_path || File.join(dir, "register.yaml")
|
|
217
|
+
return nil unless File.exist?(path)
|
|
218
|
+
|
|
219
|
+
RegisterData.from_file(path)
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class GcrStatistics < Lutaml::Model::Serializable
|
|
5
|
+
attribute :total_concepts, :integer
|
|
6
|
+
attribute :languages, :string, collection: true
|
|
7
|
+
attribute :concepts_by_status, :hash
|
|
8
|
+
attribute :concepts_with_definitions, :integer
|
|
9
|
+
attribute :concepts_with_sources, :integer
|
|
10
|
+
|
|
11
|
+
key_value do
|
|
12
|
+
map :total_concepts, to: :total_concepts
|
|
13
|
+
map :languages, to: :languages
|
|
14
|
+
map :concepts_by_status, to: :concepts_by_status
|
|
15
|
+
map :concepts_with_definitions, to: :concepts_with_definitions
|
|
16
|
+
map :concepts_with_sources, to: :concepts_with_sources
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def self.from_concepts(concepts)
|
|
20
|
+
l10ns = concepts.flat_map { |c| c.localizations.to_a }
|
|
21
|
+
|
|
22
|
+
new(
|
|
23
|
+
total_concepts: concepts.length,
|
|
24
|
+
languages: l10ns.map(&:language_code).compact.sort.uniq,
|
|
25
|
+
concepts_by_status: l10ns.map(&:entry_status).compact.tally,
|
|
26
|
+
concepts_with_definitions: count_with(l10ns, :definition),
|
|
27
|
+
concepts_with_sources: count_with(l10ns, :sources),
|
|
28
|
+
)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.count_with(l10ns, attr)
|
|
32
|
+
l10ns.count { |l| l.data.send(attr)&.any? }
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "zip"
|
|
4
|
+
|
|
5
|
+
module Glossarist
|
|
6
|
+
class GcrValidator
|
|
7
|
+
def validate(zip_path) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
8
|
+
result = ValidationResult.new
|
|
9
|
+
|
|
10
|
+
unless File.exist?(zip_path)
|
|
11
|
+
result.add_error("File not found: #{zip_path}")
|
|
12
|
+
return result
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
begin
|
|
16
|
+
Zip::File.open(zip_path) do |zf|
|
|
17
|
+
unless zf.find_entry("metadata.yaml")
|
|
18
|
+
result.add_error("Missing metadata.yaml")
|
|
19
|
+
return result
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
metadata = GcrMetadata.from_yaml(
|
|
23
|
+
zf.find_entry("metadata.yaml").get_input_stream.read,
|
|
24
|
+
)
|
|
25
|
+
validate_metadata(metadata, result)
|
|
26
|
+
|
|
27
|
+
concept_entries = zf.entries.select do |e|
|
|
28
|
+
e.name.start_with?("concepts/") && e.name.end_with?(".yaml")
|
|
29
|
+
end
|
|
30
|
+
if concept_entries.empty?
|
|
31
|
+
result.add_error("No concept files found in concepts/")
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
concept_entries.each do |entry|
|
|
35
|
+
validate_concept_entry(entry, metadata, result)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
rescue StandardError => e
|
|
39
|
+
result.add_error("Failed to read ZIP: #{e.message}")
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
result
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def validate_metadata(metadata, result)
|
|
48
|
+
unless metadata&.concept_count
|
|
49
|
+
result.add_error("metadata.yaml missing required fields (concept_count)")
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
unless metadata&.shortname
|
|
53
|
+
result.add_error("metadata.yaml missing shortname")
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
unless metadata&.version
|
|
57
|
+
result.add_error("metadata.yaml missing version")
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def validate_concept_entry(entry, metadata, result) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
62
|
+
raw = entry.get_input_stream.read
|
|
63
|
+
doc = ConceptDocument.from_yamls(raw)
|
|
64
|
+
rescue Psych::SyntaxError => e
|
|
65
|
+
result.add_error("#{entry.name}: invalid YAML at line #{e.line}: #{e.message}")
|
|
66
|
+
rescue StandardError => e
|
|
67
|
+
result.add_error("#{entry.name}: parse error: #{e.message}")
|
|
68
|
+
else
|
|
69
|
+
concept = doc.concept
|
|
70
|
+
unless concept&.data&.id
|
|
71
|
+
result.add_error("#{entry.name}: document 0 missing data.identifier")
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
localizations = doc.localizations
|
|
75
|
+
if localizations.empty?
|
|
76
|
+
result.add_error("#{entry.name}: expected at least 1 localization document")
|
|
77
|
+
else
|
|
78
|
+
localizations.each_with_index do |l10n, idx|
|
|
79
|
+
unless l10n&.language_code
|
|
80
|
+
result.add_error("#{entry.name}: document #{idx + 1} missing data.language_code")
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
validate_concept_uri(entry, concept, metadata, result)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def validate_concept_uri(entry, concept, metadata, result) # rubocop:disable Metrics/CyclomaticComplexity
|
|
89
|
+
concept_uri = concept&.data&.uri
|
|
90
|
+
template = metadata&.concept_uri_template
|
|
91
|
+
uri_prefix = metadata&.uri_prefix
|
|
92
|
+
|
|
93
|
+
if concept_uri.nil? && template.nil? && uri_prefix.nil?
|
|
94
|
+
result.add_warning("#{entry.name}: no concept URI (data.uri) and no concept_uri_template or uri_prefix in metadata")
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
data/lib/glossarist/locality.rb
CHANGED
|
@@ -4,11 +4,22 @@ module Glossarist
|
|
|
4
4
|
attribute :review_type, :string
|
|
5
5
|
attribute :entry_status, :string
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
key_value do
|
|
8
8
|
map :classification, to: :classification
|
|
9
9
|
map %i[review_type reviewType], to: :review_type
|
|
10
10
|
end
|
|
11
11
|
|
|
12
|
+
NIL_COLLECTION_KEYS = %w[definition examples notes].freeze
|
|
13
|
+
|
|
14
|
+
def self.of_yaml(hash, options = {})
|
|
15
|
+
if hash.is_a?(Hash) && (data = hash["data"]).is_a?(Hash)
|
|
16
|
+
NIL_COLLECTION_KEYS.each do |key|
|
|
17
|
+
data[key] = [] if data.key?(key) && data[key].nil?
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
super
|
|
21
|
+
end
|
|
22
|
+
|
|
12
23
|
alias_method :status=, :entry_status=
|
|
13
24
|
|
|
14
25
|
def language_code
|
|
@@ -3,14 +3,18 @@ module Glossarist
|
|
|
3
3
|
include Glossarist::Utilities::CommonFunctions
|
|
4
4
|
|
|
5
5
|
attribute :id, :string
|
|
6
|
+
attribute :uri, :string
|
|
6
7
|
attribute :localized_concepts, :hash
|
|
7
8
|
attribute :groups, :string, collection: true
|
|
8
9
|
attribute :sources, ConceptSource, collection: true
|
|
9
|
-
attribute :localizations,
|
|
10
|
+
attribute :localizations, LocalizedConcept,
|
|
11
|
+
collection: Collections::LocalizationCollection,
|
|
12
|
+
initialize_empty: true
|
|
10
13
|
|
|
11
|
-
|
|
14
|
+
key_value do
|
|
12
15
|
map %i[id identifier], to: :id,
|
|
13
16
|
with: { to: :id_to_yaml, from: :id_from_yaml }
|
|
17
|
+
map :uri, to: :uri
|
|
14
18
|
map %i[localized_concepts localizedConcepts], to: :localized_concepts
|
|
15
19
|
map :groups, to: :groups
|
|
16
20
|
map :sources, to: :sources
|
|
@@ -28,11 +32,10 @@ module Glossarist
|
|
|
28
32
|
end
|
|
29
33
|
|
|
30
34
|
def localizations_from_yaml(model, value)
|
|
31
|
-
model.localizations ||= {}
|
|
32
|
-
|
|
33
35
|
value.each do |localized_concept_hash|
|
|
34
36
|
localized_concept = Glossarist::LocalizedConcept.of_yaml(localized_concept_hash)
|
|
35
|
-
model.localizations
|
|
37
|
+
model.localizations.store(localized_concept.language_code,
|
|
38
|
+
localized_concept)
|
|
36
39
|
end
|
|
37
40
|
end
|
|
38
41
|
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module Glossarist
|
|
6
|
+
class ReferenceExtractor
|
|
7
|
+
@identifier_resolvers = []
|
|
8
|
+
@patterns = []
|
|
9
|
+
|
|
10
|
+
IdentifierResolver = Struct.new(:prefix, :resolver, keyword_init: true)
|
|
11
|
+
Pattern = Struct.new(:name, :regex, :resolver, keyword_init: true)
|
|
12
|
+
|
|
13
|
+
class << self
|
|
14
|
+
def register_identifier_resolver(prefix, &resolver)
|
|
15
|
+
@identifier_resolvers << IdentifierResolver.new(prefix: prefix,
|
|
16
|
+
resolver: resolver)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def register_pattern(name:, regex:, &resolver)
|
|
20
|
+
@patterns << Pattern.new(name: name, regex: regex, resolver: resolver)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def patterns
|
|
24
|
+
@patterns.dup
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def identifier_resolvers
|
|
28
|
+
@identifier_resolvers.dup
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def extract_from_text(text)
|
|
33
|
+
return [] unless text.is_a?(String)
|
|
34
|
+
|
|
35
|
+
refs = []
|
|
36
|
+
self.class.patterns.each do |pattern|
|
|
37
|
+
text.scan(pattern.regex).each do |captures|
|
|
38
|
+
captures = [captures] unless captures.is_a?(Array)
|
|
39
|
+
ref = pattern.resolver.call(self, *captures)
|
|
40
|
+
refs << ref if ref
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
deduplicate(refs)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def extract_from_localized(lc_hash)
|
|
47
|
+
gather_texts(lc_hash).flat_map { |t| extract_from_text(t) }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def extract_from_concept_hash(concept_hash)
|
|
51
|
+
LANG_CODES.flat_map do |lang|
|
|
52
|
+
next [] unless concept_hash[lang].is_a?(Hash)
|
|
53
|
+
|
|
54
|
+
extract_from_localized(concept_hash[lang])
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def extract_from_managed_concept(concept)
|
|
59
|
+
concept.localizations.flat_map do |l10n|
|
|
60
|
+
extract_from_localized_concept(l10n)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def extract_from_localized_concept(l10n)
|
|
65
|
+
texts = []
|
|
66
|
+
l10n.data.definition&.each { |d| texts << d.content if d.content }
|
|
67
|
+
l10n.data.notes&.each { |n| texts << n.content if n.content }
|
|
68
|
+
l10n.data.examples&.each { |e| texts << e.content if e.content }
|
|
69
|
+
texts.flat_map { |t| extract_from_text(t) }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Unified concept mention dispatcher.
|
|
73
|
+
# Content is the text inside {{...}}.
|
|
74
|
+
def resolve_mention(content)
|
|
75
|
+
content = content.strip
|
|
76
|
+
|
|
77
|
+
if content.include?(",")
|
|
78
|
+
parts = content.split(",", 2)
|
|
79
|
+
display = parts[0].strip
|
|
80
|
+
identifier = parts[1].strip
|
|
81
|
+
resolve_by_identifier(identifier, display)
|
|
82
|
+
else
|
|
83
|
+
resolve_by_identifier(content, nil)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def resolve_by_identifier(identifier, display)
|
|
88
|
+
# Check registered identifier resolvers (built-in + custom)
|
|
89
|
+
self.class.identifier_resolvers.each do |ir|
|
|
90
|
+
next unless identifier.start_with?(ir.prefix)
|
|
91
|
+
|
|
92
|
+
return ir.resolver.call(self, identifier, display)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
case identifier
|
|
96
|
+
when /\A\d[\d.-]*\z/
|
|
97
|
+
resolve_local(display || identifier, identifier)
|
|
98
|
+
else
|
|
99
|
+
resolve_designation(identifier, display)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def resolve_local(term, concept_id)
|
|
104
|
+
ConceptReference.new(
|
|
105
|
+
term: term.strip,
|
|
106
|
+
concept_id: concept_id.strip,
|
|
107
|
+
source: nil,
|
|
108
|
+
ref_type: "local",
|
|
109
|
+
)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def resolve_designation(text, display)
|
|
113
|
+
ConceptReference.new(
|
|
114
|
+
term: display || text,
|
|
115
|
+
concept_id: nil,
|
|
116
|
+
source: nil,
|
|
117
|
+
ref_type: "designation",
|
|
118
|
+
)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def resolve_iec_urn(urn, display)
|
|
122
|
+
concept_id = extract_iec_concept_id(urn)
|
|
123
|
+
|
|
124
|
+
ConceptReference.new(
|
|
125
|
+
term: display || "",
|
|
126
|
+
concept_id: concept_id,
|
|
127
|
+
source: "urn:iec:std:iec:60050",
|
|
128
|
+
ref_type: "urn",
|
|
129
|
+
)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def resolve_iso_urn(urn, display)
|
|
133
|
+
if (m = urn.match(/\Aurn:iso:std:iso:(\d+)(?::(.*))?\z/))
|
|
134
|
+
term_id = extract_term_id_from_urn_tail(m[2])
|
|
135
|
+
ConceptReference.new(
|
|
136
|
+
term: display || "",
|
|
137
|
+
concept_id: term_id,
|
|
138
|
+
source: "urn:iso:std:iso:#{m[1]}",
|
|
139
|
+
ref_type: "urn",
|
|
140
|
+
)
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def resolve_generic_urn(urn, display)
|
|
145
|
+
ConceptReference.new(
|
|
146
|
+
term: display || "",
|
|
147
|
+
concept_id: nil,
|
|
148
|
+
source: urn,
|
|
149
|
+
ref_type: "urn",
|
|
150
|
+
)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
LANG_CODES = Glossarist::LANG_CODES
|
|
154
|
+
|
|
155
|
+
private
|
|
156
|
+
|
|
157
|
+
def gather_texts(lc_hash)
|
|
158
|
+
texts = extract_text_fields(lc_hash["definition"])
|
|
159
|
+
texts << lc_hash["definition"].to_s if lc_hash["definition"].is_a?(String)
|
|
160
|
+
texts.concat(extract_text_fields(lc_hash["notes"]))
|
|
161
|
+
texts.concat(extract_text_fields(lc_hash["examples"]))
|
|
162
|
+
texts
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def extract_text_fields(items)
|
|
166
|
+
Array(items).filter_map do |item|
|
|
167
|
+
item.is_a?(Hash) ? item["content"]&.to_s : item.to_s
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def deduplicate(refs)
|
|
172
|
+
seen = Set.new
|
|
173
|
+
refs.select do |ref|
|
|
174
|
+
key = if ref.concept_id
|
|
175
|
+
[ref.source,
|
|
176
|
+
ref.concept_id]
|
|
177
|
+
else
|
|
178
|
+
[ref.source, ref.concept_id,
|
|
179
|
+
ref.term]
|
|
180
|
+
end
|
|
181
|
+
seen.add?(key)
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def extract_term_id_from_urn_tail(tail)
|
|
186
|
+
return "" unless tail
|
|
187
|
+
|
|
188
|
+
if (m = tail.match(/term:([\d.,]+)/))
|
|
189
|
+
m[1].split(",").first
|
|
190
|
+
elsif (m = tail.match(/sec:([\d.]+)/))
|
|
191
|
+
m[1]
|
|
192
|
+
else
|
|
193
|
+
tail
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def extract_iec_concept_id(urn)
|
|
198
|
+
if (m = urn.match(/::#con-([\d-]+)/))
|
|
199
|
+
m[1]
|
|
200
|
+
else
|
|
201
|
+
segments = urn.split(":")
|
|
202
|
+
code_part = segments.find { |s| s.start_with?("60050-") }
|
|
203
|
+
return "" unless code_part
|
|
204
|
+
|
|
205
|
+
code_part.delete_prefix("60050-").sub(/-\d{4}-\d{2}\z/, "")
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Unified concept mention pattern: {{...}}
|
|
210
|
+
register_pattern(
|
|
211
|
+
name: :concept_mention,
|
|
212
|
+
regex: /\{\{([^}]+)\}\}/,
|
|
213
|
+
) { |ext, content| ext.resolve_mention(content) }
|
|
214
|
+
|
|
215
|
+
register_identifier_resolver("urn:iec:std:iec:60050") do |ext, identifier, display|
|
|
216
|
+
ext.resolve_iec_urn(identifier, display)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
register_identifier_resolver("urn:iso:std:iso:") do |ext, identifier, display|
|
|
220
|
+
ext.resolve_iso_urn(identifier, display)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
register_identifier_resolver("urn:") do |ext, identifier, display|
|
|
224
|
+
ext.resolve_generic_urn(identifier, display)
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
end
|