glossarist 2.8.7 → 2.8.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop_todo.yml +128 -11
- data/CLAUDE.md +34 -3
- data/Gemfile +1 -0
- data/lib/glossarist/cli/compare_command.rb +2 -2
- data/lib/glossarist/cli/export_command.rb +1 -3
- data/lib/glossarist/collection.rb +1 -1
- data/lib/glossarist/collections/bibliography_collection.rb +1 -1
- data/lib/glossarist/concept_data.rb +1 -0
- data/lib/glossarist/concept_reference.rb +7 -1
- data/lib/glossarist/concept_source.rb +2 -2
- data/lib/glossarist/concept_validator.rb +3 -1
- data/lib/glossarist/dataset_validator.rb +1 -1
- data/lib/glossarist/{error.rb → errors/base.rb} +3 -1
- data/lib/glossarist/errors/cache_version_mismatch_error.rb +12 -0
- data/lib/glossarist/errors/invalid_language_code_error.rb +19 -0
- data/lib/glossarist/errors/invalid_type_error.rb +8 -0
- data/lib/glossarist/errors/load_error.rb +22 -0
- data/lib/glossarist/errors/parse_error.rb +24 -0
- data/lib/glossarist/errors.rb +14 -0
- data/lib/glossarist/gcr_package.rb +4 -2
- data/lib/glossarist/glossary_store.rb +175 -1
- data/lib/glossarist/managed_concept.rb +16 -2
- data/lib/glossarist/managed_concept_collection.rb +52 -8
- data/lib/glossarist/reference_extractor.rb +22 -2
- data/lib/glossarist/reference_resolver.rb +38 -3
- data/lib/glossarist/resolution_adapter/bibliography.rb +22 -0
- data/lib/glossarist/resolution_adapter.rb +1 -0
- data/lib/glossarist/schema_migration/v0_to_v1.rb +200 -0
- data/lib/glossarist/schema_migration/v2_to_v3.rb +50 -0
- data/lib/glossarist/schema_migration.rb +10 -224
- data/lib/glossarist/sts/importer.rb +11 -12
- data/lib/glossarist/sts/term_extractor.rb +104 -6
- data/lib/glossarist/validation/asset_index.rb +1 -1
- data/lib/glossarist/validation/rules/cite_ref_integrity_rule.rb +75 -0
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +5 -13
- data/scripts/upgrade_dataset_to_v3.rb +1 -1
- metadata +13 -9
- data/lib/glossarist/concept_collector.rb +0 -231
- data/lib/glossarist/concept_manager.rb +0 -183
- data/lib/glossarist/error/cache_version_mismatch_error.rb +0 -8
- data/lib/glossarist/error/invalid_language_code_error.rb +0 -15
- data/lib/glossarist/error/invalid_type_error.rb +0 -4
- data/lib/glossarist/error/parse_error.rb +0 -16
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class SchemaMigration
|
|
5
|
+
module V2ToV3
|
|
6
|
+
def self.migrate_concept(concept, target_version: Glossarist::SCHEMA_VERSION)
|
|
7
|
+
current = concept_version(concept)
|
|
8
|
+
target = target_version.to_s
|
|
9
|
+
|
|
10
|
+
return concept if current == target
|
|
11
|
+
|
|
12
|
+
max_steps = 5
|
|
13
|
+
max_steps.times do
|
|
14
|
+
break if current == target
|
|
15
|
+
|
|
16
|
+
case current
|
|
17
|
+
when "2" then current = step_v2_to_v3(concept)
|
|
18
|
+
else
|
|
19
|
+
raise Errors::Base,
|
|
20
|
+
"No concept migration step from version #{current}"
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
unless current == target
|
|
25
|
+
raise Errors::Base,
|
|
26
|
+
"Migration chain too long or unresolvable"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
concept.schema_version = target
|
|
30
|
+
concept
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def self.concept_version(concept)
|
|
34
|
+
version = concept.schema_version
|
|
35
|
+
return version.to_s if version && !version.to_s.empty?
|
|
36
|
+
|
|
37
|
+
ManagedConcept.detect_schema_version(concept)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def self.step_v2_to_v3(concept)
|
|
41
|
+
if concept.data&.related&.any?
|
|
42
|
+
concept.related ||= []
|
|
43
|
+
concept.related = (concept.related + concept.data.related).uniq
|
|
44
|
+
concept.data.related = []
|
|
45
|
+
end
|
|
46
|
+
"3"
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -6,76 +6,19 @@ module Glossarist
|
|
|
6
6
|
class SchemaMigration
|
|
7
7
|
CURRENT_SCHEMA_VERSION = "1"
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
target = target_version.to_s
|
|
12
|
-
|
|
13
|
-
return concept if current == target
|
|
14
|
-
|
|
15
|
-
max_steps = 5
|
|
16
|
-
max_steps.times do
|
|
17
|
-
break if current == target
|
|
18
|
-
|
|
19
|
-
case current
|
|
20
|
-
when "2" then current = step_v2_to_v3(concept)
|
|
21
|
-
else
|
|
22
|
-
raise Error, "No concept migration step from version #{current}"
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
unless current == target
|
|
27
|
-
raise Error,
|
|
28
|
-
"Migration chain too long or unresolvable"
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
concept.schema_version = target
|
|
32
|
-
concept
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def self.concept_version(concept)
|
|
36
|
-
version = concept.schema_version
|
|
37
|
-
return version.to_s if version && !version.to_s.empty?
|
|
38
|
-
|
|
39
|
-
ManagedConcept.detect_schema_version(concept)
|
|
40
|
-
end
|
|
9
|
+
autoload :V0ToV1, "glossarist/schema_migration/v0_to_v1"
|
|
10
|
+
autoload :V2ToV3, "glossarist/schema_migration/v2_to_v3"
|
|
41
11
|
|
|
42
|
-
def self.
|
|
43
|
-
|
|
44
|
-
concept.related ||= []
|
|
45
|
-
concept.related = (concept.related + concept.data.related).uniq
|
|
46
|
-
concept.data.related = []
|
|
47
|
-
end
|
|
48
|
-
"3"
|
|
12
|
+
def self.new(...)
|
|
13
|
+
V0ToV1.new(...)
|
|
49
14
|
end
|
|
50
15
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
"Confirmed" => "valid",
|
|
54
|
-
"Proposed" => "draft",
|
|
55
|
-
}.freeze
|
|
56
|
-
|
|
57
|
-
LANG_CODES = Glossarist::LANG_CODES
|
|
58
|
-
|
|
59
|
-
IEV_PATTERN = /\{\{([^,}]+),\s*IEV:([^}]+)\}\}/
|
|
60
|
-
URN_PATTERN = /\{urn:iso:std:iso:(\d+):([^,}]+),([^}]+)\}/
|
|
61
|
-
|
|
62
|
-
attr_reader :from_version, :to_version
|
|
63
|
-
|
|
64
|
-
def initialize(concept_hash, from_version: "0",
|
|
65
|
-
to_version: CURRENT_SCHEMA_VERSION, ref_maps: {})
|
|
66
|
-
@concept = concept_hash
|
|
67
|
-
@from_version = from_version
|
|
68
|
-
@to_version = to_version
|
|
69
|
-
@ref_maps = ref_maps
|
|
16
|
+
def self.migrate_concept(concept, target_version: Glossarist::SCHEMA_VERSION)
|
|
17
|
+
V2ToV3.migrate_concept(concept, target_version: target_version)
|
|
70
18
|
end
|
|
71
19
|
|
|
72
|
-
def
|
|
73
|
-
|
|
74
|
-
when ["0", "1"] then migrate_v0_to_v1
|
|
75
|
-
else
|
|
76
|
-
raise Error, "Unsupported migration: #{from_version} -> #{to_version}"
|
|
77
|
-
end
|
|
78
|
-
@concept
|
|
20
|
+
def self.concept_version(concept)
|
|
21
|
+
V2ToV3.concept_version(concept)
|
|
79
22
|
end
|
|
80
23
|
|
|
81
24
|
def self.upgrade_directory(source_dir, output:, # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
|
|
@@ -112,163 +55,6 @@ module Glossarist
|
|
|
112
55
|
}
|
|
113
56
|
end
|
|
114
57
|
|
|
115
|
-
private
|
|
116
|
-
|
|
117
|
-
def migrate_v0_to_v1
|
|
118
|
-
migrate_termid
|
|
119
|
-
LANG_CODES.each do |lang|
|
|
120
|
-
migrate_language_block(lang) if @concept[lang]
|
|
121
|
-
end
|
|
122
|
-
strip_revisions
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
def migrate_termid
|
|
126
|
-
@concept["termid"] = String(@concept["termid"]) if @concept.key?("termid")
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
def migrate_language_block(lang)
|
|
130
|
-
lc = @concept[lang]
|
|
131
|
-
return unless lc.is_a?(Hash)
|
|
132
|
-
|
|
133
|
-
migrate_definition(lc)
|
|
134
|
-
migrate_authoritative_source(lc)
|
|
135
|
-
migrate_dates(lc)
|
|
136
|
-
migrate_entry_status(lc)
|
|
137
|
-
migrate_terms_abbrev(lc)
|
|
138
|
-
extract_inline_refs(lc)
|
|
139
|
-
strip_revisions(lc)
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
def migrate_definition(lc)
|
|
143
|
-
return unless lc.key?("definition")
|
|
144
|
-
return unless lc["definition"].is_a?(String)
|
|
145
|
-
|
|
146
|
-
lc["definition"] = [{ "content" => lc["definition"] }]
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
def migrate_authoritative_source(lc)
|
|
150
|
-
return unless lc.key?("authoritative_source")
|
|
151
|
-
|
|
152
|
-
src = lc.delete("authoritative_source")
|
|
153
|
-
return if lc.key?("sources")
|
|
154
|
-
|
|
155
|
-
sources = (src.is_a?(Array) ? src : [src]).filter_map do |s|
|
|
156
|
-
next unless s.is_a?(Hash)
|
|
157
|
-
|
|
158
|
-
origin = {}
|
|
159
|
-
origin["ref"] = s["ref"] if s["ref"]
|
|
160
|
-
origin["clause"] = s["clause"] if s["clause"]
|
|
161
|
-
origin["link"] = s["link"] if s["link"]
|
|
162
|
-
|
|
163
|
-
entry = { "type" => "authoritative", "origin" => origin }
|
|
164
|
-
if s["relationship"]
|
|
165
|
-
entry["status"] = s["relationship"]["type"] || "identical"
|
|
166
|
-
if s["relationship"]["modification"]
|
|
167
|
-
entry["modification"] =
|
|
168
|
-
s["relationship"]["modification"]
|
|
169
|
-
end
|
|
170
|
-
end
|
|
171
|
-
entry
|
|
172
|
-
end
|
|
173
|
-
|
|
174
|
-
lc["sources"] = sources if sources.any?
|
|
175
|
-
end
|
|
176
|
-
|
|
177
|
-
def migrate_dates(lc)
|
|
178
|
-
return if lc.key?("dates")
|
|
179
|
-
|
|
180
|
-
dates = []
|
|
181
|
-
if lc["date_accepted"]
|
|
182
|
-
dates << { "type" => "accepted", "date" => lc["date_accepted"] }
|
|
183
|
-
end
|
|
184
|
-
if lc["date_amended"]
|
|
185
|
-
dates << { "type" => "amended", "date" => lc["date_amended"] }
|
|
186
|
-
end
|
|
187
|
-
lc["dates"] = dates if dates.any?
|
|
188
|
-
end
|
|
189
|
-
|
|
190
|
-
def migrate_entry_status(lc)
|
|
191
|
-
return unless lc.key?("entry_status")
|
|
192
|
-
|
|
193
|
-
mapped = ENTRY_STATUS_MAP[lc["entry_status"]]
|
|
194
|
-
lc["entry_status"] = mapped if mapped
|
|
195
|
-
end
|
|
196
|
-
|
|
197
|
-
def migrate_terms_abbrev(lc)
|
|
198
|
-
return unless lc["terms"].is_a?(Array)
|
|
199
|
-
|
|
200
|
-
lc["terms"].each do |term|
|
|
201
|
-
next unless term.is_a?(Hash)
|
|
202
|
-
next unless term["abbrev"] == true
|
|
203
|
-
|
|
204
|
-
term["type"] = "abbreviation"
|
|
205
|
-
term.delete("abbrev")
|
|
206
|
-
end
|
|
207
|
-
end
|
|
208
|
-
|
|
209
|
-
def extract_inline_refs(lc)
|
|
210
|
-
texts = []
|
|
211
|
-
|
|
212
|
-
if lc["definition"].is_a?(Array)
|
|
213
|
-
lc["definition"].each do |d|
|
|
214
|
-
texts << (d.is_a?(Hash) ? d["content"].to_s : d.to_s)
|
|
215
|
-
end
|
|
216
|
-
elsif lc["definition"].is_a?(String)
|
|
217
|
-
texts << lc["definition"]
|
|
218
|
-
end
|
|
219
|
-
|
|
220
|
-
Array(lc["notes"]).each do |n|
|
|
221
|
-
texts << (n.is_a?(Hash) ? n["content"].to_s : n.to_s)
|
|
222
|
-
end
|
|
223
|
-
Array(lc["examples"]).each do |e|
|
|
224
|
-
texts << (e.is_a?(Hash) ? e["content"].to_s : e.to_s)
|
|
225
|
-
end
|
|
226
|
-
|
|
227
|
-
full_text = texts.join(" ")
|
|
228
|
-
|
|
229
|
-
refs = []
|
|
230
|
-
|
|
231
|
-
full_text.scan(IEV_PATTERN) do |term, id|
|
|
232
|
-
refs << {
|
|
233
|
-
"term" => term.strip,
|
|
234
|
-
"concept_id" => id.strip,
|
|
235
|
-
"source" => "urn:iec:std:iec:60050",
|
|
236
|
-
"ref_type" => "urn",
|
|
237
|
-
}
|
|
238
|
-
end
|
|
239
|
-
|
|
240
|
-
full_text.scan(URN_PATTERN) do |std_num, id, term|
|
|
241
|
-
refs << {
|
|
242
|
-
"term" => term.strip,
|
|
243
|
-
"concept_id" => id.strip,
|
|
244
|
-
"source" => "urn:iso:std:iso:#{std_num}",
|
|
245
|
-
"ref_type" => "urn",
|
|
246
|
-
}
|
|
247
|
-
end
|
|
248
|
-
|
|
249
|
-
return if refs.empty?
|
|
250
|
-
|
|
251
|
-
existing = lc["references"] || []
|
|
252
|
-
seen_ids = existing.to_set { |r| r["concept_id"] || r["id"] }
|
|
253
|
-
refs.each do |ref|
|
|
254
|
-
key = ref["concept_id"] || ref["id"]
|
|
255
|
-
next if seen_ids.include?(key)
|
|
256
|
-
|
|
257
|
-
seen_ids.add(key)
|
|
258
|
-
existing << ref
|
|
259
|
-
end
|
|
260
|
-
lc["references"] = existing
|
|
261
|
-
end
|
|
262
|
-
|
|
263
|
-
def strip_revisions(hash = @concept)
|
|
264
|
-
hash.delete("_revisions")
|
|
265
|
-
LANG_CODES.each do |lang|
|
|
266
|
-
next unless hash[lang].is_a?(Hash)
|
|
267
|
-
|
|
268
|
-
hash[lang].delete("_revisions")
|
|
269
|
-
end
|
|
270
|
-
end
|
|
271
|
-
|
|
272
58
|
class << self
|
|
273
59
|
private
|
|
274
60
|
|
|
@@ -301,14 +87,14 @@ module Glossarist
|
|
|
301
87
|
v1 = V1::Concept.from_file(file)
|
|
302
88
|
next unless v1
|
|
303
89
|
|
|
304
|
-
migration = new(
|
|
90
|
+
migration = V0ToV1.new(
|
|
305
91
|
v1.to_yaml_hash,
|
|
306
92
|
from_version: source_version,
|
|
307
93
|
to_version: target_version,
|
|
308
94
|
ref_maps: ref_maps,
|
|
309
95
|
)
|
|
310
96
|
concepts << migration.migrate
|
|
311
|
-
rescue
|
|
97
|
+
rescue Errors::Base, Psych::SyntaxError => e
|
|
312
98
|
errors += 1
|
|
313
99
|
warn " Error migrating #{File.basename(file)}: #{e.message}" if errors <= 5
|
|
314
100
|
end
|
|
@@ -184,8 +184,9 @@ module Glossarist
|
|
|
184
184
|
package = GcrPackage.load(path)
|
|
185
185
|
package.concepts.each { |mc| collection.store(mc) }
|
|
186
186
|
else
|
|
187
|
-
|
|
188
|
-
|
|
187
|
+
GlossaryStore.new.tap do |s|
|
|
188
|
+
s.load(path)
|
|
189
|
+
end.each_concept { |mc| collection.store(mc) }
|
|
189
190
|
end
|
|
190
191
|
collection
|
|
191
192
|
end
|
|
@@ -214,11 +215,10 @@ module Glossarist
|
|
|
214
215
|
end
|
|
215
216
|
|
|
216
217
|
def save_dataset(concepts, dir)
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
collection.save_grouped_concepts_to_files(concepts_dir)
|
|
218
|
+
FileUtils.mkdir_p(dir)
|
|
219
|
+
store = GlossaryStore.new
|
|
220
|
+
concepts.each { |mc| store.add_concept(mc) }
|
|
221
|
+
store.save_directory(dir)
|
|
222
222
|
end
|
|
223
223
|
|
|
224
224
|
def create_gcr(concepts, output, shortname:, version:, **opts)
|
|
@@ -238,12 +238,11 @@ module Glossarist
|
|
|
238
238
|
|
|
239
239
|
def build_temp_dataset(concepts)
|
|
240
240
|
tmpdir = Dir.mktmpdir("glossarist-sts-import")
|
|
241
|
-
|
|
242
|
-
FileUtils.mkdir_p(concepts_dir)
|
|
241
|
+
FileUtils.mkdir_p(tmpdir)
|
|
243
242
|
|
|
244
|
-
|
|
245
|
-
concepts.each { |mc|
|
|
246
|
-
|
|
243
|
+
store = GlossaryStore.new
|
|
244
|
+
concepts.each { |mc| store.add_concept(mc) }
|
|
245
|
+
store.save_directory(tmpdir)
|
|
247
246
|
|
|
248
247
|
tmpdir
|
|
249
248
|
end
|
|
@@ -7,6 +7,7 @@ module Glossarist
|
|
|
7
7
|
raw = File.read(xml_path)
|
|
8
8
|
@standard = ::Sts::IsoSts::Standard.from_xml(raw)
|
|
9
9
|
@source_ref = extract_source_ref
|
|
10
|
+
@std_prefix = extract_std_prefix(@source_ref)
|
|
10
11
|
end
|
|
11
12
|
|
|
12
13
|
def extract
|
|
@@ -90,27 +91,104 @@ module Glossarist
|
|
|
90
91
|
)
|
|
91
92
|
end
|
|
92
93
|
|
|
94
|
+
ELEMENT_NAME_TO_ATTR = {
|
|
95
|
+
"entailedTerm" => :entailed_term,
|
|
96
|
+
"xref" => :xref,
|
|
97
|
+
"italic" => :italic,
|
|
98
|
+
"bold" => :bold,
|
|
99
|
+
"sup" => :sup,
|
|
100
|
+
"sub" => :sub,
|
|
101
|
+
"monospace" => :monospace,
|
|
102
|
+
"std" => :std,
|
|
103
|
+
"math" => :math,
|
|
104
|
+
"inline-formula" => :inline_formula,
|
|
105
|
+
"list" => :list,
|
|
106
|
+
"styled-content" => :styled_content,
|
|
107
|
+
"ext-link" => :ext_link,
|
|
108
|
+
}.freeze
|
|
109
|
+
|
|
93
110
|
def extract_definition_text(lang_set)
|
|
94
111
|
definitions = lang_set.definition
|
|
95
112
|
return "" unless definitions&.any?
|
|
96
113
|
|
|
97
|
-
definitions.first
|
|
114
|
+
extract_mixed_text(definitions.first)
|
|
98
115
|
end
|
|
99
116
|
|
|
100
117
|
def extract_note_texts(lang_set)
|
|
101
118
|
lang_set.note.filter_map do |n|
|
|
102
|
-
text = n
|
|
119
|
+
text = extract_mixed_text(n)
|
|
103
120
|
text unless text.empty?
|
|
104
121
|
end
|
|
105
122
|
end
|
|
106
123
|
|
|
107
124
|
def extract_example_texts(lang_set)
|
|
108
125
|
lang_set.example.filter_map do |e|
|
|
109
|
-
text = e
|
|
126
|
+
text = extract_mixed_text(e)
|
|
110
127
|
text unless text.empty?
|
|
111
128
|
end
|
|
112
129
|
end
|
|
113
130
|
|
|
131
|
+
def extract_mixed_text(mixed_element)
|
|
132
|
+
indices = Hash.new(0)
|
|
133
|
+
parts = []
|
|
134
|
+
|
|
135
|
+
mixed_element.element_order.each do |elem|
|
|
136
|
+
if elem.node_type == :text
|
|
137
|
+
parts << elem.text_content.to_s
|
|
138
|
+
else
|
|
139
|
+
attr_name = ELEMENT_NAME_TO_ATTR[elem.name]
|
|
140
|
+
next unless attr_name
|
|
141
|
+
|
|
142
|
+
collection = mixed_element.class.attributes.key?(attr_name) &&
|
|
143
|
+
mixed_element.public_send(attr_name)
|
|
144
|
+
next unless collection
|
|
145
|
+
|
|
146
|
+
child = collection[indices[elem.name]]
|
|
147
|
+
if child
|
|
148
|
+
if elem.name == "entailedTerm"
|
|
149
|
+
parts << format_entailed_term(child)
|
|
150
|
+
else
|
|
151
|
+
text = child_value_text(child)
|
|
152
|
+
parts << text if text
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
indices[elem.name] += 1
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
normalize_whitespace(parts.join)
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# entailedTerm → "{{19135:2026:3.5.1,concept}}"
|
|
163
|
+
# format: {{concept_id, render_text}}
|
|
164
|
+
def format_entailed_term(entailed)
|
|
165
|
+
raw_text = entailed.value.to_s
|
|
166
|
+
designation = raw_text.gsub(/\s+\(\d[\d.]*\)\s*\z/, "").strip
|
|
167
|
+
section = extract_section_from_target(entailed.target)
|
|
168
|
+
|
|
169
|
+
if @std_prefix && section
|
|
170
|
+
"{{#{@std_prefix}:#{section},#{designation}}}"
|
|
171
|
+
else
|
|
172
|
+
raw_text.strip
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# "term_3.5.1" → "3.5.1", "term_3.8.2-1" → "3.8.2"
|
|
177
|
+
def extract_section_from_target(target)
|
|
178
|
+
return nil unless target
|
|
179
|
+
|
|
180
|
+
match = target.match(/term_(\d+(?:\.\d+)*)/)
|
|
181
|
+
match ? match[1] : nil
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def child_value_text(child)
|
|
185
|
+
val = child.value
|
|
186
|
+
case val
|
|
187
|
+
when Array then val.join.to_s
|
|
188
|
+
when String then val
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
114
192
|
def extract_source_texts(lang_set)
|
|
115
193
|
lang_set.source.filter_map do |s|
|
|
116
194
|
text = s.value&.join.to_s.strip
|
|
@@ -175,12 +253,32 @@ module Glossarist
|
|
|
175
253
|
end
|
|
176
254
|
|
|
177
255
|
def extract_ref_text(ref)
|
|
178
|
-
if ref.
|
|
179
|
-
ref.
|
|
256
|
+
if ref.respond_to?(:content) && ref.content.is_a?(Array)
|
|
257
|
+
normalize_whitespace(ref.content.join.to_s)
|
|
258
|
+
elsif ref.respond_to?(:value)
|
|
259
|
+
normalize_whitespace(ref.value.to_s)
|
|
180
260
|
else
|
|
181
|
-
|
|
261
|
+
""
|
|
182
262
|
end
|
|
183
263
|
end
|
|
264
|
+
|
|
265
|
+
def normalize_whitespace(text)
|
|
266
|
+
text.gsub(/[\s\u00a0]+/, " ").strip
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# "ISO 19101-1:2014" → "19101-1:2014", "ISO/TS 19130-2:2014" → "TS-19130-2:2014"
|
|
270
|
+
def extract_std_prefix(source_ref)
|
|
271
|
+
return nil unless source_ref
|
|
272
|
+
|
|
273
|
+
match = source_ref.match(/\AISO(?:\/(\p{Upper}+))? (\d+(?:-\d+)?):(\d+)\z/)
|
|
274
|
+
return nil unless match
|
|
275
|
+
|
|
276
|
+
type_part = match[1]
|
|
277
|
+
number = match[2]
|
|
278
|
+
year = match[3]
|
|
279
|
+
|
|
280
|
+
type_part ? "#{type_part}-#{number}:#{year}" : "#{number}:#{year}"
|
|
281
|
+
end
|
|
184
282
|
end
|
|
185
283
|
end
|
|
186
284
|
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Validation
|
|
5
|
+
module Rules
|
|
6
|
+
class CiteRefIntegrityRule < Base
|
|
7
|
+
def code = "GLS-110"
|
|
8
|
+
def category = :references
|
|
9
|
+
def severity = "warning"
|
|
10
|
+
def scope = :concept
|
|
11
|
+
|
|
12
|
+
def applicable?(context)
|
|
13
|
+
context.concept.localizations&.any?
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def check(context)
|
|
17
|
+
concept = context.concept
|
|
18
|
+
fname = context.file_name
|
|
19
|
+
issues = []
|
|
20
|
+
|
|
21
|
+
check_unique_source_ids(concept, fname, issues)
|
|
22
|
+
check_unresolved_mentions(concept, fname, issues)
|
|
23
|
+
|
|
24
|
+
issues
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def check_unique_source_ids(concept, fname, issues)
|
|
30
|
+
seen = Hash.new { |h, k| h[k] = [] }
|
|
31
|
+
concept.all_sources.each do |source|
|
|
32
|
+
next unless source.id
|
|
33
|
+
|
|
34
|
+
seen[source.id] << source
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
seen.each do |id, sources|
|
|
38
|
+
next if sources.length <= 1
|
|
39
|
+
|
|
40
|
+
issues << issue(
|
|
41
|
+
"duplicate source id '#{id}' appears #{sources.length} times",
|
|
42
|
+
code: "GLS-110", severity: severity,
|
|
43
|
+
location: fname,
|
|
44
|
+
suggestion: "source ids must be unique within a concept"
|
|
45
|
+
)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def check_unresolved_mentions(concept, fname, issues)
|
|
50
|
+
keys = cite_mention_keys(concept)
|
|
51
|
+
return if keys.empty?
|
|
52
|
+
|
|
53
|
+
known_ids = concept.all_sources.filter_map(&:id).to_set
|
|
54
|
+
keys.each do |key|
|
|
55
|
+
next if known_ids.include?(key)
|
|
56
|
+
|
|
57
|
+
issues << issue(
|
|
58
|
+
"inline {{cite:#{key}}} does not resolve to any source",
|
|
59
|
+
code: "GLS-110", severity: severity,
|
|
60
|
+
location: fname,
|
|
61
|
+
suggestion: "add a source with id '#{key}' or fix the reference"
|
|
62
|
+
)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def cite_mention_keys(concept)
|
|
67
|
+
extractor = ReferenceExtractor.new
|
|
68
|
+
extractor.extract_from_managed_concept(concept)
|
|
69
|
+
.select(&:cite?)
|
|
70
|
+
.filter_map(&:concept_id)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
data/lib/glossarist/version.rb
CHANGED
data/lib/glossarist.rb
CHANGED
|
@@ -27,35 +27,27 @@ module Glossarist
|
|
|
27
27
|
autoload :ReferenceResolver, "glossarist/reference_resolver"
|
|
28
28
|
autoload :ResolutionAdapter, "glossarist/resolution_adapter"
|
|
29
29
|
autoload :ConceptDate, "glossarist/concept_date"
|
|
30
|
-
autoload :ConceptManager, "glossarist/concept_manager"
|
|
31
30
|
autoload :ConceptSet, "glossarist/concept_set"
|
|
32
31
|
autoload :ConceptSource, "glossarist/concept_source"
|
|
33
32
|
autoload :ConceptStore, "glossarist/concept_store"
|
|
34
33
|
autoload :ConceptValidator, "glossarist/concept_validator"
|
|
35
|
-
autoload :ConceptCollector, "glossarist/concept_collector"
|
|
36
34
|
autoload :ConceptComparator, "glossarist/concept_comparator"
|
|
37
|
-
autoload :ContextConfiguration,
|
|
35
|
+
autoload :ContextConfiguration, "glossarist/context_configuration"
|
|
38
36
|
autoload :ComparisonResult, "glossarist/comparison_result"
|
|
39
37
|
autoload :ConceptDiff, "glossarist/concept_diff"
|
|
40
38
|
autoload :ConceptDocument, "glossarist/concept_document"
|
|
41
|
-
autoload :ConceptEnricher,
|
|
39
|
+
autoload :ConceptEnricher, "glossarist/concept_enricher"
|
|
42
40
|
autoload :Config, "glossarist/config"
|
|
43
41
|
autoload :DatasetValidator, "glossarist/dataset_validator"
|
|
44
42
|
autoload :CustomLocality, "glossarist/custom_locality"
|
|
45
43
|
autoload :DetailedDefinition, "glossarist/detailed_definition"
|
|
46
44
|
autoload :Designation, "glossarist/designation"
|
|
47
|
-
autoload :
|
|
45
|
+
autoload :Errors, "glossarist/errors"
|
|
48
46
|
autoload :GcrPackage, "glossarist/gcr_package"
|
|
49
47
|
autoload :GcrPackageDefinition, "glossarist/gcr_package_definition"
|
|
50
48
|
autoload :GcrMetadata, "glossarist/gcr_metadata"
|
|
51
49
|
autoload :GcrStatistics, "glossarist/gcr_statistics"
|
|
52
50
|
autoload :GcrValidator, "glossarist/gcr_validator"
|
|
53
|
-
autoload :InvalidTypeError, "glossarist/error/invalid_type_error"
|
|
54
|
-
autoload :InvalidLanguageCodeError,
|
|
55
|
-
"glossarist/error/invalid_language_code_error"
|
|
56
|
-
autoload :ParseError, "glossarist/error/parse_error"
|
|
57
|
-
autoload :CacheVersionMismatchError,
|
|
58
|
-
"glossarist/error/cache_version_mismatch_error"
|
|
59
51
|
autoload :Locality, "glossarist/locality"
|
|
60
52
|
autoload :LocalizedConcept, "glossarist/localized_concept"
|
|
61
53
|
autoload :ManagedConcept, "glossarist/managed_concept"
|
|
@@ -72,8 +64,8 @@ module Glossarist
|
|
|
72
64
|
autoload :Utilities, "glossarist/utilities"
|
|
73
65
|
autoload :Validation, "glossarist/validation"
|
|
74
66
|
autoload :RegisterData, "glossarist/register_data"
|
|
75
|
-
autoload :Section,
|
|
76
|
-
autoload :DatasetRegister,
|
|
67
|
+
autoload :Section, "glossarist/section"
|
|
68
|
+
autoload :DatasetRegister, "glossarist/dataset_register"
|
|
77
69
|
autoload :ValidationResult, "glossarist/validation_result"
|
|
78
70
|
autoload :V1, "glossarist/v1"
|
|
79
71
|
autoload :V2, "glossarist/v2"
|