glossarist 2.8.7 → 2.8.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop_todo.yml +128 -11
- data/CLAUDE.md +34 -3
- data/Gemfile +1 -0
- data/lib/glossarist/cli/compare_command.rb +2 -2
- data/lib/glossarist/cli/export_command.rb +1 -3
- data/lib/glossarist/collection.rb +1 -1
- data/lib/glossarist/collections/bibliography_collection.rb +1 -1
- data/lib/glossarist/concept_data.rb +3 -2
- data/lib/glossarist/concept_reference.rb +7 -1
- data/lib/glossarist/concept_set.rb +5 -1
- data/lib/glossarist/concept_source.rb +2 -2
- data/lib/glossarist/concept_validator.rb +3 -1
- data/lib/glossarist/dataset_validator.rb +1 -1
- data/lib/glossarist/{error.rb → errors/base.rb} +3 -1
- data/lib/glossarist/errors/cache_version_mismatch_error.rb +12 -0
- data/lib/glossarist/errors/invalid_language_code_error.rb +19 -0
- data/lib/glossarist/errors/invalid_type_error.rb +8 -0
- data/lib/glossarist/errors/load_error.rb +22 -0
- data/lib/glossarist/errors/parse_error.rb +24 -0
- data/lib/glossarist/errors.rb +14 -0
- data/lib/glossarist/gcr_package.rb +4 -2
- data/lib/glossarist/glossary_store.rb +175 -1
- data/lib/glossarist/managed_concept.rb +31 -17
- data/lib/glossarist/managed_concept_collection.rb +52 -8
- data/lib/glossarist/reference_extractor.rb +22 -2
- data/lib/glossarist/reference_resolver.rb +38 -3
- data/lib/glossarist/resolution_adapter/bibliography.rb +22 -0
- data/lib/glossarist/resolution_adapter.rb +1 -0
- data/lib/glossarist/schema_migration/v0_to_v1.rb +200 -0
- data/lib/glossarist/schema_migration/v2_to_v3.rb +50 -0
- data/lib/glossarist/schema_migration.rb +10 -224
- data/lib/glossarist/sts/importer.rb +11 -12
- data/lib/glossarist/sts/term_extractor.rb +105 -6
- data/lib/glossarist/transforms/concept_to_gloss_transform.rb +1 -1
- data/lib/glossarist/v2/managed_concept.rb +2 -4
- data/lib/glossarist/v3/managed_concept.rb +2 -4
- data/lib/glossarist/validation/asset_index.rb +1 -1
- data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +11 -21
- data/lib/glossarist/validation/rules/cite_ref_integrity_rule.rb +74 -0
- data/lib/glossarist/validation/rules/concept_context.rb +24 -0
- data/lib/glossarist/validation/rules/concept_mention_rule.rb +1 -3
- data/lib/glossarist/validation/rules/image_reference_rule.rb +10 -21
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +5 -13
- data/scripts/upgrade_dataset_to_v3.rb +1 -1
- metadata +13 -9
- data/lib/glossarist/concept_collector.rb +0 -231
- data/lib/glossarist/concept_manager.rb +0 -183
- data/lib/glossarist/error/cache_version_mismatch_error.rb +0 -8
- data/lib/glossarist/error/invalid_language_code_error.rb +0 -15
- data/lib/glossarist/error/invalid_type_error.rb +0 -4
- data/lib/glossarist/error/parse_error.rb +0 -16
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class SchemaMigration
|
|
5
|
+
class V0ToV1
|
|
6
|
+
ENTRY_STATUS_MAP = {
|
|
7
|
+
"Standard" => "valid",
|
|
8
|
+
"Confirmed" => "valid",
|
|
9
|
+
"Proposed" => "draft",
|
|
10
|
+
}.freeze
|
|
11
|
+
|
|
12
|
+
LANG_CODES = Glossarist::LANG_CODES
|
|
13
|
+
|
|
14
|
+
IEV_PATTERN = /\{\{([^,}]+),\s*IEV:([^}]+)\}\}/
|
|
15
|
+
URN_PATTERN = /\{urn:iso:std:iso:(\d+):([^,}]+),([^}]+)\}/
|
|
16
|
+
|
|
17
|
+
attr_reader :from_version, :to_version
|
|
18
|
+
|
|
19
|
+
def initialize(concept_hash, from_version: "0",
|
|
20
|
+
to_version: SchemaMigration::CURRENT_SCHEMA_VERSION,
|
|
21
|
+
ref_maps: {})
|
|
22
|
+
@concept = concept_hash
|
|
23
|
+
@from_version = from_version
|
|
24
|
+
@to_version = to_version
|
|
25
|
+
@ref_maps = ref_maps
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def migrate
|
|
29
|
+
case [from_version, to_version]
|
|
30
|
+
when ["0", "1"] then migrate_v0_to_v1
|
|
31
|
+
else
|
|
32
|
+
raise Errors::Base,
|
|
33
|
+
"Unsupported migration: #{from_version} -> #{to_version}"
|
|
34
|
+
end
|
|
35
|
+
@concept
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def migrate_v0_to_v1
|
|
41
|
+
migrate_termid
|
|
42
|
+
LANG_CODES.each do |lang|
|
|
43
|
+
migrate_language_block(lang) if @concept[lang]
|
|
44
|
+
end
|
|
45
|
+
strip_revisions
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def migrate_termid
|
|
49
|
+
if @concept.key?("termid")
|
|
50
|
+
@concept["termid"] =
|
|
51
|
+
String(@concept["termid"])
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def migrate_language_block(lang)
|
|
56
|
+
l10n_block = @concept[lang]
|
|
57
|
+
return unless l10n_block.is_a?(Hash)
|
|
58
|
+
|
|
59
|
+
migrate_definition(l10n_block)
|
|
60
|
+
migrate_authoritative_source(l10n_block)
|
|
61
|
+
migrate_dates(l10n_block)
|
|
62
|
+
migrate_entry_status(l10n_block)
|
|
63
|
+
migrate_terms_abbrev(l10n_block)
|
|
64
|
+
extract_inline_refs(l10n_block)
|
|
65
|
+
strip_revisions(l10n_block)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def migrate_definition(l10n_block)
|
|
69
|
+
return unless l10n_block.key?("definition")
|
|
70
|
+
return unless l10n_block["definition"].is_a?(String)
|
|
71
|
+
|
|
72
|
+
l10n_block["definition"] = [{ "content" => l10n_block["definition"] }]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def migrate_authoritative_source(l10n_block)
|
|
76
|
+
return unless l10n_block.key?("authoritative_source")
|
|
77
|
+
|
|
78
|
+
src = l10n_block.delete("authoritative_source")
|
|
79
|
+
return if l10n_block.key?("sources")
|
|
80
|
+
|
|
81
|
+
sources = (src.is_a?(Array) ? src : [src]).filter_map do |s|
|
|
82
|
+
next unless s.is_a?(Hash)
|
|
83
|
+
|
|
84
|
+
origin = {}
|
|
85
|
+
origin["ref"] = s["ref"] if s["ref"]
|
|
86
|
+
origin["clause"] = s["clause"] if s["clause"]
|
|
87
|
+
origin["link"] = s["link"] if s["link"]
|
|
88
|
+
|
|
89
|
+
entry = { "type" => "authoritative", "origin" => origin }
|
|
90
|
+
if s["relationship"]
|
|
91
|
+
entry["status"] = s["relationship"]["type"] || "identical"
|
|
92
|
+
if s["relationship"]["modification"]
|
|
93
|
+
entry["modification"] =
|
|
94
|
+
s["relationship"]["modification"]
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
entry
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
l10n_block["sources"] = sources if sources.any?
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def migrate_dates(l10n_block)
|
|
104
|
+
return if l10n_block.key?("dates")
|
|
105
|
+
|
|
106
|
+
dates = []
|
|
107
|
+
if l10n_block["date_accepted"]
|
|
108
|
+
dates << { "type" => "accepted",
|
|
109
|
+
"date" => l10n_block["date_accepted"] }
|
|
110
|
+
end
|
|
111
|
+
if l10n_block["date_amended"]
|
|
112
|
+
dates << { "type" => "amended", "date" => l10n_block["date_amended"] }
|
|
113
|
+
end
|
|
114
|
+
l10n_block["dates"] = dates if dates.any?
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def migrate_entry_status(l10n_block)
|
|
118
|
+
return unless l10n_block.key?("entry_status")
|
|
119
|
+
|
|
120
|
+
mapped = ENTRY_STATUS_MAP[l10n_block["entry_status"]]
|
|
121
|
+
l10n_block["entry_status"] = mapped if mapped
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def migrate_terms_abbrev(l10n_block)
|
|
125
|
+
return unless l10n_block["terms"].is_a?(Array)
|
|
126
|
+
|
|
127
|
+
l10n_block["terms"].each do |term|
|
|
128
|
+
next unless term.is_a?(Hash)
|
|
129
|
+
next unless term["abbrev"] == true
|
|
130
|
+
|
|
131
|
+
term["type"] = "abbreviation"
|
|
132
|
+
term.delete("abbrev")
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def extract_inline_refs(l10n_block)
|
|
137
|
+
texts = []
|
|
138
|
+
|
|
139
|
+
if l10n_block["definition"].is_a?(Array)
|
|
140
|
+
l10n_block["definition"].each do |d|
|
|
141
|
+
texts << (d.is_a?(Hash) ? d["content"].to_s : d.to_s)
|
|
142
|
+
end
|
|
143
|
+
elsif l10n_block["definition"].is_a?(String)
|
|
144
|
+
texts << l10n_block["definition"]
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
Array(l10n_block["notes"]).each do |n|
|
|
148
|
+
texts << (n.is_a?(Hash) ? n["content"].to_s : n.to_s)
|
|
149
|
+
end
|
|
150
|
+
Array(l10n_block["examples"]).each do |e|
|
|
151
|
+
texts << (e.is_a?(Hash) ? e["content"].to_s : e.to_s)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
full_text = texts.join(" ")
|
|
155
|
+
|
|
156
|
+
refs = []
|
|
157
|
+
|
|
158
|
+
full_text.scan(IEV_PATTERN) do |term, id|
|
|
159
|
+
refs << {
|
|
160
|
+
"term" => term.strip,
|
|
161
|
+
"concept_id" => id.strip,
|
|
162
|
+
"source" => "urn:iec:std:iec:60050",
|
|
163
|
+
"ref_type" => "urn",
|
|
164
|
+
}
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
full_text.scan(URN_PATTERN) do |std_num, id, term|
|
|
168
|
+
refs << {
|
|
169
|
+
"term" => term.strip,
|
|
170
|
+
"concept_id" => id.strip,
|
|
171
|
+
"source" => "urn:iso:std:iso:#{std_num}",
|
|
172
|
+
"ref_type" => "urn",
|
|
173
|
+
}
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
return if refs.empty?
|
|
177
|
+
|
|
178
|
+
existing = l10n_block["references"] || []
|
|
179
|
+
seen_ids = existing.to_set { |r| r["concept_id"] || r["id"] }
|
|
180
|
+
refs.each do |ref|
|
|
181
|
+
key = ref["concept_id"] || ref["id"]
|
|
182
|
+
next if seen_ids.include?(key)
|
|
183
|
+
|
|
184
|
+
seen_ids.add(key)
|
|
185
|
+
existing << ref
|
|
186
|
+
end
|
|
187
|
+
l10n_block["references"] = existing
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def strip_revisions(hash = @concept)
|
|
191
|
+
hash.delete("_revisions")
|
|
192
|
+
LANG_CODES.each do |lang|
|
|
193
|
+
next unless hash[lang].is_a?(Hash)
|
|
194
|
+
|
|
195
|
+
hash[lang].delete("_revisions")
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class SchemaMigration
|
|
5
|
+
module V2ToV3
|
|
6
|
+
def self.migrate_concept(concept, target_version: Glossarist::SCHEMA_VERSION)
|
|
7
|
+
current = concept_version(concept)
|
|
8
|
+
target = target_version.to_s
|
|
9
|
+
|
|
10
|
+
return concept if current == target
|
|
11
|
+
|
|
12
|
+
max_steps = 5
|
|
13
|
+
max_steps.times do
|
|
14
|
+
break if current == target
|
|
15
|
+
|
|
16
|
+
case current
|
|
17
|
+
when "2" then current = step_v2_to_v3(concept)
|
|
18
|
+
else
|
|
19
|
+
raise Errors::Base,
|
|
20
|
+
"No concept migration step from version #{current}"
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
unless current == target
|
|
25
|
+
raise Errors::Base,
|
|
26
|
+
"Migration chain too long or unresolvable"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
concept.schema_version = target
|
|
30
|
+
concept
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def self.concept_version(concept)
|
|
34
|
+
version = concept.schema_version
|
|
35
|
+
return version.to_s if version && !version.to_s.empty?
|
|
36
|
+
|
|
37
|
+
ManagedConcept.detect_schema_version(concept)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def self.step_v2_to_v3(concept)
|
|
41
|
+
if concept.data&.related&.any?
|
|
42
|
+
concept.related ||= []
|
|
43
|
+
concept.related = (concept.related + concept.data.related).uniq
|
|
44
|
+
concept.data.related = []
|
|
45
|
+
end
|
|
46
|
+
"3"
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -6,76 +6,19 @@ module Glossarist
|
|
|
6
6
|
class SchemaMigration
|
|
7
7
|
CURRENT_SCHEMA_VERSION = "1"
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
target = target_version.to_s
|
|
12
|
-
|
|
13
|
-
return concept if current == target
|
|
14
|
-
|
|
15
|
-
max_steps = 5
|
|
16
|
-
max_steps.times do
|
|
17
|
-
break if current == target
|
|
18
|
-
|
|
19
|
-
case current
|
|
20
|
-
when "2" then current = step_v2_to_v3(concept)
|
|
21
|
-
else
|
|
22
|
-
raise Error, "No concept migration step from version #{current}"
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
unless current == target
|
|
27
|
-
raise Error,
|
|
28
|
-
"Migration chain too long or unresolvable"
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
concept.schema_version = target
|
|
32
|
-
concept
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def self.concept_version(concept)
|
|
36
|
-
version = concept.schema_version
|
|
37
|
-
return version.to_s if version && !version.to_s.empty?
|
|
38
|
-
|
|
39
|
-
ManagedConcept.detect_schema_version(concept)
|
|
40
|
-
end
|
|
9
|
+
autoload :V0ToV1, "glossarist/schema_migration/v0_to_v1"
|
|
10
|
+
autoload :V2ToV3, "glossarist/schema_migration/v2_to_v3"
|
|
41
11
|
|
|
42
|
-
def self.
|
|
43
|
-
|
|
44
|
-
concept.related ||= []
|
|
45
|
-
concept.related = (concept.related + concept.data.related).uniq
|
|
46
|
-
concept.data.related = []
|
|
47
|
-
end
|
|
48
|
-
"3"
|
|
12
|
+
def self.new(...)
|
|
13
|
+
V0ToV1.new(...)
|
|
49
14
|
end
|
|
50
15
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
"Confirmed" => "valid",
|
|
54
|
-
"Proposed" => "draft",
|
|
55
|
-
}.freeze
|
|
56
|
-
|
|
57
|
-
LANG_CODES = Glossarist::LANG_CODES
|
|
58
|
-
|
|
59
|
-
IEV_PATTERN = /\{\{([^,}]+),\s*IEV:([^}]+)\}\}/
|
|
60
|
-
URN_PATTERN = /\{urn:iso:std:iso:(\d+):([^,}]+),([^}]+)\}/
|
|
61
|
-
|
|
62
|
-
attr_reader :from_version, :to_version
|
|
63
|
-
|
|
64
|
-
def initialize(concept_hash, from_version: "0",
|
|
65
|
-
to_version: CURRENT_SCHEMA_VERSION, ref_maps: {})
|
|
66
|
-
@concept = concept_hash
|
|
67
|
-
@from_version = from_version
|
|
68
|
-
@to_version = to_version
|
|
69
|
-
@ref_maps = ref_maps
|
|
16
|
+
def self.migrate_concept(concept, target_version: Glossarist::SCHEMA_VERSION)
|
|
17
|
+
V2ToV3.migrate_concept(concept, target_version: target_version)
|
|
70
18
|
end
|
|
71
19
|
|
|
72
|
-
def
|
|
73
|
-
|
|
74
|
-
when ["0", "1"] then migrate_v0_to_v1
|
|
75
|
-
else
|
|
76
|
-
raise Error, "Unsupported migration: #{from_version} -> #{to_version}"
|
|
77
|
-
end
|
|
78
|
-
@concept
|
|
20
|
+
def self.concept_version(concept)
|
|
21
|
+
V2ToV3.concept_version(concept)
|
|
79
22
|
end
|
|
80
23
|
|
|
81
24
|
def self.upgrade_directory(source_dir, output:, # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
|
|
@@ -112,163 +55,6 @@ module Glossarist
|
|
|
112
55
|
}
|
|
113
56
|
end
|
|
114
57
|
|
|
115
|
-
private
|
|
116
|
-
|
|
117
|
-
def migrate_v0_to_v1
|
|
118
|
-
migrate_termid
|
|
119
|
-
LANG_CODES.each do |lang|
|
|
120
|
-
migrate_language_block(lang) if @concept[lang]
|
|
121
|
-
end
|
|
122
|
-
strip_revisions
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
def migrate_termid
|
|
126
|
-
@concept["termid"] = String(@concept["termid"]) if @concept.key?("termid")
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
def migrate_language_block(lang)
|
|
130
|
-
lc = @concept[lang]
|
|
131
|
-
return unless lc.is_a?(Hash)
|
|
132
|
-
|
|
133
|
-
migrate_definition(lc)
|
|
134
|
-
migrate_authoritative_source(lc)
|
|
135
|
-
migrate_dates(lc)
|
|
136
|
-
migrate_entry_status(lc)
|
|
137
|
-
migrate_terms_abbrev(lc)
|
|
138
|
-
extract_inline_refs(lc)
|
|
139
|
-
strip_revisions(lc)
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
def migrate_definition(lc)
|
|
143
|
-
return unless lc.key?("definition")
|
|
144
|
-
return unless lc["definition"].is_a?(String)
|
|
145
|
-
|
|
146
|
-
lc["definition"] = [{ "content" => lc["definition"] }]
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
def migrate_authoritative_source(lc)
|
|
150
|
-
return unless lc.key?("authoritative_source")
|
|
151
|
-
|
|
152
|
-
src = lc.delete("authoritative_source")
|
|
153
|
-
return if lc.key?("sources")
|
|
154
|
-
|
|
155
|
-
sources = (src.is_a?(Array) ? src : [src]).filter_map do |s|
|
|
156
|
-
next unless s.is_a?(Hash)
|
|
157
|
-
|
|
158
|
-
origin = {}
|
|
159
|
-
origin["ref"] = s["ref"] if s["ref"]
|
|
160
|
-
origin["clause"] = s["clause"] if s["clause"]
|
|
161
|
-
origin["link"] = s["link"] if s["link"]
|
|
162
|
-
|
|
163
|
-
entry = { "type" => "authoritative", "origin" => origin }
|
|
164
|
-
if s["relationship"]
|
|
165
|
-
entry["status"] = s["relationship"]["type"] || "identical"
|
|
166
|
-
if s["relationship"]["modification"]
|
|
167
|
-
entry["modification"] =
|
|
168
|
-
s["relationship"]["modification"]
|
|
169
|
-
end
|
|
170
|
-
end
|
|
171
|
-
entry
|
|
172
|
-
end
|
|
173
|
-
|
|
174
|
-
lc["sources"] = sources if sources.any?
|
|
175
|
-
end
|
|
176
|
-
|
|
177
|
-
def migrate_dates(lc)
|
|
178
|
-
return if lc.key?("dates")
|
|
179
|
-
|
|
180
|
-
dates = []
|
|
181
|
-
if lc["date_accepted"]
|
|
182
|
-
dates << { "type" => "accepted", "date" => lc["date_accepted"] }
|
|
183
|
-
end
|
|
184
|
-
if lc["date_amended"]
|
|
185
|
-
dates << { "type" => "amended", "date" => lc["date_amended"] }
|
|
186
|
-
end
|
|
187
|
-
lc["dates"] = dates if dates.any?
|
|
188
|
-
end
|
|
189
|
-
|
|
190
|
-
def migrate_entry_status(lc)
|
|
191
|
-
return unless lc.key?("entry_status")
|
|
192
|
-
|
|
193
|
-
mapped = ENTRY_STATUS_MAP[lc["entry_status"]]
|
|
194
|
-
lc["entry_status"] = mapped if mapped
|
|
195
|
-
end
|
|
196
|
-
|
|
197
|
-
def migrate_terms_abbrev(lc)
|
|
198
|
-
return unless lc["terms"].is_a?(Array)
|
|
199
|
-
|
|
200
|
-
lc["terms"].each do |term|
|
|
201
|
-
next unless term.is_a?(Hash)
|
|
202
|
-
next unless term["abbrev"] == true
|
|
203
|
-
|
|
204
|
-
term["type"] = "abbreviation"
|
|
205
|
-
term.delete("abbrev")
|
|
206
|
-
end
|
|
207
|
-
end
|
|
208
|
-
|
|
209
|
-
def extract_inline_refs(lc)
|
|
210
|
-
texts = []
|
|
211
|
-
|
|
212
|
-
if lc["definition"].is_a?(Array)
|
|
213
|
-
lc["definition"].each do |d|
|
|
214
|
-
texts << (d.is_a?(Hash) ? d["content"].to_s : d.to_s)
|
|
215
|
-
end
|
|
216
|
-
elsif lc["definition"].is_a?(String)
|
|
217
|
-
texts << lc["definition"]
|
|
218
|
-
end
|
|
219
|
-
|
|
220
|
-
Array(lc["notes"]).each do |n|
|
|
221
|
-
texts << (n.is_a?(Hash) ? n["content"].to_s : n.to_s)
|
|
222
|
-
end
|
|
223
|
-
Array(lc["examples"]).each do |e|
|
|
224
|
-
texts << (e.is_a?(Hash) ? e["content"].to_s : e.to_s)
|
|
225
|
-
end
|
|
226
|
-
|
|
227
|
-
full_text = texts.join(" ")
|
|
228
|
-
|
|
229
|
-
refs = []
|
|
230
|
-
|
|
231
|
-
full_text.scan(IEV_PATTERN) do |term, id|
|
|
232
|
-
refs << {
|
|
233
|
-
"term" => term.strip,
|
|
234
|
-
"concept_id" => id.strip,
|
|
235
|
-
"source" => "urn:iec:std:iec:60050",
|
|
236
|
-
"ref_type" => "urn",
|
|
237
|
-
}
|
|
238
|
-
end
|
|
239
|
-
|
|
240
|
-
full_text.scan(URN_PATTERN) do |std_num, id, term|
|
|
241
|
-
refs << {
|
|
242
|
-
"term" => term.strip,
|
|
243
|
-
"concept_id" => id.strip,
|
|
244
|
-
"source" => "urn:iso:std:iso:#{std_num}",
|
|
245
|
-
"ref_type" => "urn",
|
|
246
|
-
}
|
|
247
|
-
end
|
|
248
|
-
|
|
249
|
-
return if refs.empty?
|
|
250
|
-
|
|
251
|
-
existing = lc["references"] || []
|
|
252
|
-
seen_ids = existing.to_set { |r| r["concept_id"] || r["id"] }
|
|
253
|
-
refs.each do |ref|
|
|
254
|
-
key = ref["concept_id"] || ref["id"]
|
|
255
|
-
next if seen_ids.include?(key)
|
|
256
|
-
|
|
257
|
-
seen_ids.add(key)
|
|
258
|
-
existing << ref
|
|
259
|
-
end
|
|
260
|
-
lc["references"] = existing
|
|
261
|
-
end
|
|
262
|
-
|
|
263
|
-
def strip_revisions(hash = @concept)
|
|
264
|
-
hash.delete("_revisions")
|
|
265
|
-
LANG_CODES.each do |lang|
|
|
266
|
-
next unless hash[lang].is_a?(Hash)
|
|
267
|
-
|
|
268
|
-
hash[lang].delete("_revisions")
|
|
269
|
-
end
|
|
270
|
-
end
|
|
271
|
-
|
|
272
58
|
class << self
|
|
273
59
|
private
|
|
274
60
|
|
|
@@ -301,14 +87,14 @@ module Glossarist
|
|
|
301
87
|
v1 = V1::Concept.from_file(file)
|
|
302
88
|
next unless v1
|
|
303
89
|
|
|
304
|
-
migration = new(
|
|
90
|
+
migration = V0ToV1.new(
|
|
305
91
|
v1.to_yaml_hash,
|
|
306
92
|
from_version: source_version,
|
|
307
93
|
to_version: target_version,
|
|
308
94
|
ref_maps: ref_maps,
|
|
309
95
|
)
|
|
310
96
|
concepts << migration.migrate
|
|
311
|
-
rescue
|
|
97
|
+
rescue Errors::Base, Psych::SyntaxError => e
|
|
312
98
|
errors += 1
|
|
313
99
|
warn " Error migrating #{File.basename(file)}: #{e.message}" if errors <= 5
|
|
314
100
|
end
|
|
@@ -184,8 +184,9 @@ module Glossarist
|
|
|
184
184
|
package = GcrPackage.load(path)
|
|
185
185
|
package.concepts.each { |mc| collection.store(mc) }
|
|
186
186
|
else
|
|
187
|
-
|
|
188
|
-
|
|
187
|
+
GlossaryStore.new.tap do |s|
|
|
188
|
+
s.load(path)
|
|
189
|
+
end.each_concept { |mc| collection.store(mc) }
|
|
189
190
|
end
|
|
190
191
|
collection
|
|
191
192
|
end
|
|
@@ -214,11 +215,10 @@ module Glossarist
|
|
|
214
215
|
end
|
|
215
216
|
|
|
216
217
|
def save_dataset(concepts, dir)
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
collection.save_grouped_concepts_to_files(concepts_dir)
|
|
218
|
+
FileUtils.mkdir_p(dir)
|
|
219
|
+
store = GlossaryStore.new
|
|
220
|
+
concepts.each { |mc| store.add_concept(mc) }
|
|
221
|
+
store.save_directory(dir)
|
|
222
222
|
end
|
|
223
223
|
|
|
224
224
|
def create_gcr(concepts, output, shortname:, version:, **opts)
|
|
@@ -238,12 +238,11 @@ module Glossarist
|
|
|
238
238
|
|
|
239
239
|
def build_temp_dataset(concepts)
|
|
240
240
|
tmpdir = Dir.mktmpdir("glossarist-sts-import")
|
|
241
|
-
|
|
242
|
-
FileUtils.mkdir_p(concepts_dir)
|
|
241
|
+
FileUtils.mkdir_p(tmpdir)
|
|
243
242
|
|
|
244
|
-
|
|
245
|
-
concepts.each { |mc|
|
|
246
|
-
|
|
243
|
+
store = GlossaryStore.new
|
|
244
|
+
concepts.each { |mc| store.add_concept(mc) }
|
|
245
|
+
store.save_directory(tmpdir)
|
|
247
246
|
|
|
248
247
|
tmpdir
|
|
249
248
|
end
|