glossarist 2.6.1 → 2.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/.rubocop_todo.yml +80 -17
- data/Gemfile +3 -19
- data/README.adoc +117 -0
- data/lib/glossarist/cli/import_command.rb +54 -0
- data/lib/glossarist/cli.rb +29 -8
- data/lib/glossarist/concept_collector.rb +40 -10
- data/lib/glossarist/designation/expression.rb +1 -2
- data/lib/glossarist/designation/graphical_symbol.rb +1 -1
- data/lib/glossarist/gcr_package.rb +93 -21
- data/lib/glossarist/gcr_validator.rb +58 -21
- data/lib/glossarist/managed_concept.rb +1 -1
- data/lib/glossarist/rdf/skos_concept.rb +0 -1
- data/lib/glossarist/rdf/skos_vocabulary.rb +0 -1
- data/lib/glossarist/sts/extracted_designation.rb +14 -0
- data/lib/glossarist/sts/extracted_lang_set.rb +16 -0
- data/lib/glossarist/sts/extracted_term.rb +13 -0
- data/lib/glossarist/sts/import_result.rb +24 -0
- data/lib/glossarist/sts/importer.rb +253 -0
- data/lib/glossarist/sts/term_extractor.rb +186 -0
- data/lib/glossarist/sts/term_mapper.rb +118 -0
- data/lib/glossarist/sts.rb +87 -0
- data/lib/glossarist/transforms/concept_to_skos_transform.rb +0 -2
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +10 -7
- metadata +11 -2
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require "zip"
|
|
4
4
|
require "fileutils"
|
|
5
|
+
require "pathname"
|
|
5
6
|
|
|
6
7
|
module Glossarist
|
|
7
8
|
class GcrPackage
|
|
@@ -14,12 +15,18 @@ module Glossarist
|
|
|
14
15
|
|
|
15
16
|
KNOWN_COMPILED_FORMATS = COMPILED_EXTENSIONS.keys.freeze
|
|
16
17
|
|
|
17
|
-
|
|
18
|
+
DATASET_ASSETS = [
|
|
19
|
+
{ path: "bibliography.yaml", type: :file, attr: :bibliography },
|
|
20
|
+
{ path: "images", type: :directory },
|
|
21
|
+
].freeze
|
|
22
|
+
|
|
23
|
+
attr_reader :zip_path, :metadata, :concepts, :bibliography
|
|
18
24
|
|
|
19
25
|
def initialize(zip_path)
|
|
20
26
|
@zip_path = zip_path
|
|
21
27
|
@metadata = nil
|
|
22
28
|
@concepts = []
|
|
29
|
+
@bibliography = nil
|
|
23
30
|
end
|
|
24
31
|
|
|
25
32
|
def self.create(concepts:, metadata:, output_path:, register_data: nil,
|
|
@@ -66,12 +73,43 @@ module Glossarist
|
|
|
66
73
|
end
|
|
67
74
|
end
|
|
68
75
|
|
|
76
|
+
def self.each_dataset_asset(source_dir)
|
|
77
|
+
base = Pathname.new(source_dir)
|
|
78
|
+
DATASET_ASSETS.each do |asset|
|
|
79
|
+
path = File.join(source_dir, asset[:path])
|
|
80
|
+
case asset[:type]
|
|
81
|
+
when :file
|
|
82
|
+
yield_file_asset(path, asset[:path]) { |*a| yield(*a) }
|
|
83
|
+
when :directory
|
|
84
|
+
yield_directory_assets(path, base) { |*a| yield(*a) }
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def self.yield_file_asset(path, entry_name)
|
|
90
|
+
return unless File.exist?(path)
|
|
91
|
+
|
|
92
|
+
yield entry_name, File.binread(path)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def self.yield_directory_assets(dir_path, base_path)
|
|
96
|
+
return unless File.directory?(dir_path)
|
|
97
|
+
|
|
98
|
+
Dir.glob(File.join(dir_path, "**", "*")).each do |file|
|
|
99
|
+
next unless File.file?(file)
|
|
100
|
+
|
|
101
|
+
relative = Pathname.new(file).relative_path_from(base_path).to_s
|
|
102
|
+
yield relative, File.binread(file)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
69
106
|
def validate
|
|
70
107
|
GcrValidator.new.validate(@zip_path)
|
|
71
108
|
end
|
|
72
109
|
|
|
73
|
-
def write(concepts, metadata, register_data,
|
|
74
|
-
|
|
110
|
+
def write(concepts, metadata, register_data, # rubocop:disable Metrics/ParameterLists
|
|
111
|
+
compiled_formats: [],
|
|
112
|
+
shortname: nil, source_dir: nil, **opts)
|
|
75
113
|
Zip::File.open(@zip_path, create: true) do |zf|
|
|
76
114
|
zf.get_output_stream("metadata.yaml") do |f|
|
|
77
115
|
f.write(metadata.to_yaml)
|
|
@@ -87,6 +125,12 @@ module Glossarist
|
|
|
87
125
|
write_concept(zf, mc)
|
|
88
126
|
end
|
|
89
127
|
|
|
128
|
+
if source_dir
|
|
129
|
+
self.class.each_dataset_asset(source_dir) do |name, content|
|
|
130
|
+
zf.get_output_stream(name) { |f| f.write(content) }
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
90
134
|
if compiled_formats.any?
|
|
91
135
|
write_compiled(zf, concepts, compiled_formats, shortname: shortname,
|
|
92
136
|
**opts)
|
|
@@ -94,29 +138,51 @@ module Glossarist
|
|
|
94
138
|
end
|
|
95
139
|
end
|
|
96
140
|
|
|
97
|
-
def write_concept(zip_file, concept)
|
|
98
|
-
termid = concept.data.id.to_s
|
|
99
|
-
doc = ConceptDocument.from_managed_concept(concept)
|
|
100
|
-
zip_file.get_output_stream("concepts/#{termid}.yaml") do |f|
|
|
101
|
-
f.write(doc.to_yamls)
|
|
102
|
-
end
|
|
103
|
-
end
|
|
104
|
-
|
|
105
141
|
def read
|
|
106
142
|
@concepts = []
|
|
107
143
|
|
|
108
144
|
Zip::File.open(@zip_path) do |zf|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
145
|
+
read_metadata(zf)
|
|
146
|
+
read_file_assets(zf)
|
|
147
|
+
read_concepts(zf)
|
|
148
|
+
end
|
|
149
|
+
end
|
|
112
150
|
|
|
113
|
-
|
|
114
|
-
|
|
151
|
+
def read_metadata(zip_file)
|
|
152
|
+
entry = zip_file.find_entry("metadata.yaml")
|
|
153
|
+
return unless entry
|
|
115
154
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
155
|
+
@metadata = GcrMetadata.from_yaml(entry.get_input_stream.read)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def read_file_assets(zip_file)
|
|
159
|
+
DATASET_ASSETS.each do |asset|
|
|
160
|
+
next unless asset[:type] == :file && asset[:attr]
|
|
161
|
+
|
|
162
|
+
entry = zip_file.find_entry(asset[:path])
|
|
163
|
+
next unless entry
|
|
164
|
+
|
|
165
|
+
instance_variable_set("@#{asset[:attr]}", entry.get_input_stream.read)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def read_concepts(zip_file)
|
|
170
|
+
zip_file.entries.each do |entry|
|
|
171
|
+
next unless entry.name.start_with?("concepts/") && entry.name.end_with?(".yaml")
|
|
172
|
+
|
|
173
|
+
raw = entry.get_input_stream.read
|
|
174
|
+
doc = ConceptDocument.from_yamls(raw)
|
|
175
|
+
@concepts << doc.to_managed_concept
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
private
|
|
180
|
+
|
|
181
|
+
def write_concept(zip_file, concept)
|
|
182
|
+
termid = concept.data.id.to_s
|
|
183
|
+
doc = ConceptDocument.from_managed_concept(concept)
|
|
184
|
+
zip_file.get_output_stream("concepts/#{termid}.yaml") do |f|
|
|
185
|
+
f.write(doc.to_yamls)
|
|
120
186
|
end
|
|
121
187
|
end
|
|
122
188
|
|
|
@@ -206,6 +272,7 @@ compiled_formats: [], **opts)
|
|
|
206
272
|
output_path: File.expand_path(output),
|
|
207
273
|
compiled_formats: compiled_formats,
|
|
208
274
|
shortname: shortname,
|
|
275
|
+
source_dir: dir,
|
|
209
276
|
**opts,
|
|
210
277
|
)
|
|
211
278
|
end
|
|
@@ -219,7 +286,7 @@ compiled_formats: [], **opts)
|
|
|
219
286
|
concept_count = 0
|
|
220
287
|
languages = Set.new
|
|
221
288
|
|
|
222
|
-
Zip::OutputStream.open(output_path) do |zos|
|
|
289
|
+
Zip::OutputStream.open(output_path) do |zos| # rubocop:disable Metrics/BlockLength
|
|
223
290
|
if register_data
|
|
224
291
|
zos.put_next_entry("register.yaml")
|
|
225
292
|
zos.write(register_data.to_yaml)
|
|
@@ -253,6 +320,11 @@ compiled_formats: [], **opts)
|
|
|
253
320
|
register_data: register_data, **opts)
|
|
254
321
|
zos.put_next_entry("metadata.yaml")
|
|
255
322
|
zos.write(metadata.to_yaml)
|
|
323
|
+
|
|
324
|
+
each_dataset_asset(dir) do |name, content|
|
|
325
|
+
zos.put_next_entry(name)
|
|
326
|
+
zos.write(content)
|
|
327
|
+
end
|
|
256
328
|
end
|
|
257
329
|
|
|
258
330
|
new(output_path)
|
|
@@ -13,27 +13,8 @@ module Glossarist
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
begin
|
|
16
|
-
Zip::File.open(zip_path) do |
|
|
17
|
-
|
|
18
|
-
result.add_error("Missing metadata.yaml")
|
|
19
|
-
return result
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
metadata = GcrMetadata.from_yaml(
|
|
23
|
-
zf.find_entry("metadata.yaml").get_input_stream.read,
|
|
24
|
-
)
|
|
25
|
-
validate_metadata(metadata, result)
|
|
26
|
-
|
|
27
|
-
concept_entries = zf.entries.select do |e|
|
|
28
|
-
e.name.start_with?("concepts/") && e.name.end_with?(".yaml")
|
|
29
|
-
end
|
|
30
|
-
if concept_entries.empty?
|
|
31
|
-
result.add_error("No concept files found in concepts/")
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
concept_entries.each do |entry|
|
|
35
|
-
validate_concept_entry(entry, metadata, result)
|
|
36
|
-
end
|
|
16
|
+
Zip::File.open(zip_path) do |zip_file|
|
|
17
|
+
validate_zip_contents(zip_file, result)
|
|
37
18
|
end
|
|
38
19
|
rescue StandardError => e
|
|
39
20
|
result.add_error("Failed to read ZIP: #{e.message}")
|
|
@@ -44,6 +25,31 @@ module Glossarist
|
|
|
44
25
|
|
|
45
26
|
private
|
|
46
27
|
|
|
28
|
+
def validate_zip_contents(zip_file, result) # rubocop:disable Metrics/AbcSize
|
|
29
|
+
unless zip_file.find_entry("metadata.yaml")
|
|
30
|
+
result.add_error("Missing metadata.yaml")
|
|
31
|
+
return
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
metadata = GcrMetadata.from_yaml(
|
|
35
|
+
zip_file.find_entry("metadata.yaml").get_input_stream.read,
|
|
36
|
+
)
|
|
37
|
+
validate_metadata(metadata, result)
|
|
38
|
+
|
|
39
|
+
concept_entries = zip_file.entries.select do |e|
|
|
40
|
+
e.name.start_with?("concepts/") && e.name.end_with?(".yaml")
|
|
41
|
+
end
|
|
42
|
+
if concept_entries.empty?
|
|
43
|
+
result.add_error("No concept files found in concepts/")
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
concept_entries.each do |entry|
|
|
47
|
+
validate_concept_entry(entry, metadata, result)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
validate_assets(zip_file, result)
|
|
51
|
+
end
|
|
52
|
+
|
|
47
53
|
def validate_metadata(metadata, result)
|
|
48
54
|
unless metadata&.concept_count
|
|
49
55
|
result.add_error("metadata.yaml missing required fields (concept_count)")
|
|
@@ -94,5 +100,36 @@ module Glossarist
|
|
|
94
100
|
result.add_warning("#{entry.name}: no concept URI (data.uri) and no concept_uri_template or uri_prefix in metadata")
|
|
95
101
|
end
|
|
96
102
|
end
|
|
103
|
+
|
|
104
|
+
def validate_assets(zip_file, result)
|
|
105
|
+
GcrPackage::DATASET_ASSETS.each do |asset|
|
|
106
|
+
case asset[:type]
|
|
107
|
+
when :file
|
|
108
|
+
validate_file_asset_entry(zip_file, asset[:path], result)
|
|
109
|
+
when :directory
|
|
110
|
+
validate_directory_asset(zip_file, asset[:path], result)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def validate_file_asset_entry(zip_file, path, result)
|
|
116
|
+
entry = zip_file.find_entry(path)
|
|
117
|
+
return unless entry
|
|
118
|
+
|
|
119
|
+
YAML.safe_load(entry.get_input_stream.read)
|
|
120
|
+
rescue Psych::SyntaxError => e
|
|
121
|
+
result.add_error("#{path}: invalid YAML at line #{e.line}: #{e.message}")
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def validate_directory_asset(zip_file, dir_path, result)
|
|
125
|
+
dir_entries = zip_file.entries.select do |e|
|
|
126
|
+
e.name.start_with?("#{dir_path}/")
|
|
127
|
+
end
|
|
128
|
+
return unless dir_entries.any? && dir_entries.all? do |e|
|
|
129
|
+
e.name.end_with?("/")
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
result.add_warning("#{dir_path}/ directory is empty")
|
|
133
|
+
end
|
|
97
134
|
end
|
|
98
135
|
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Sts
|
|
5
|
+
ExtractedLangSet = Struct.new(
|
|
6
|
+
:language_code,
|
|
7
|
+
:definition_text,
|
|
8
|
+
:note_texts,
|
|
9
|
+
:example_texts,
|
|
10
|
+
:source_texts,
|
|
11
|
+
:domain,
|
|
12
|
+
:designations,
|
|
13
|
+
keyword_init: true,
|
|
14
|
+
)
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module Sts
|
|
5
|
+
DuplicateConflict = Struct.new(:new_concept, :existing_concept, :key,
|
|
6
|
+
keyword_init: true)
|
|
7
|
+
|
|
8
|
+
class ImportResult
|
|
9
|
+
attr_reader :concepts, :conflicts, :source_files, :skipped_count
|
|
10
|
+
|
|
11
|
+
def initialize(concepts:, conflicts: [], source_files: [],
|
|
12
|
+
skipped_count: 0)
|
|
13
|
+
@concepts = concepts
|
|
14
|
+
@conflicts = conflicts
|
|
15
|
+
@source_files = source_files
|
|
16
|
+
@skipped_count = skipped_count
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def conflict?
|
|
20
|
+
!conflicts.empty?
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "tmpdir"
|
|
4
|
+
require_relative "import_result"
|
|
5
|
+
|
|
6
|
+
module Glossarist
|
|
7
|
+
module Sts
|
|
8
|
+
class Importer
|
|
9
|
+
STRATEGIES = %i[skip replace merge].freeze
|
|
10
|
+
|
|
11
|
+
attr_reader :duplicate_strategy
|
|
12
|
+
|
|
13
|
+
def initialize(duplicate_strategy: :skip)
|
|
14
|
+
unless STRATEGIES.include?(duplicate_strategy)
|
|
15
|
+
raise ArgumentError,
|
|
16
|
+
"duplicate_strategy must be one of #{STRATEGIES.join(', ')}, got #{duplicate_strategy}"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
@duplicate_strategy = duplicate_strategy
|
|
20
|
+
@mapper = TermMapper.new
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def import_new(xml_files, output:, shortname: nil, version: nil, **opts)
|
|
24
|
+
raw_concepts = extract_all_concepts(xml_files)
|
|
25
|
+
concepts, conflicts, skipped = dedup_concepts(raw_concepts)
|
|
26
|
+
|
|
27
|
+
if output.end_with?(".gcr")
|
|
28
|
+
unless shortname
|
|
29
|
+
raise ArgumentError,
|
|
30
|
+
"--shortname is required for GCR output"
|
|
31
|
+
end
|
|
32
|
+
unless version
|
|
33
|
+
raise ArgumentError,
|
|
34
|
+
"--version is required for GCR output"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
create_gcr(concepts, output, shortname: shortname, version: version,
|
|
38
|
+
**opts)
|
|
39
|
+
else
|
|
40
|
+
save_dataset(concepts, output)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
ImportResult.new(
|
|
44
|
+
concepts: concepts,
|
|
45
|
+
conflicts: conflicts,
|
|
46
|
+
source_files: xml_files,
|
|
47
|
+
skipped_count: skipped,
|
|
48
|
+
)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def import_into_existing(xml_files, dataset_path)
|
|
52
|
+
existing = load_existing(dataset_path)
|
|
53
|
+
new_concepts = extract_all_concepts(xml_files)
|
|
54
|
+
index = build_concept_index(existing)
|
|
55
|
+
|
|
56
|
+
result_state = apply_with_dedup(new_concepts, existing, index)
|
|
57
|
+
|
|
58
|
+
save_to_path(existing, dataset_path)
|
|
59
|
+
|
|
60
|
+
ImportResult.new(
|
|
61
|
+
concepts: existing.managed_concepts,
|
|
62
|
+
conflicts: result_state.conflicts,
|
|
63
|
+
source_files: xml_files,
|
|
64
|
+
skipped_count: result_state.skipped,
|
|
65
|
+
)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
DedupState = Struct.new(:conflicts, :skipped, keyword_init: true)
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
def apply_with_dedup(new_concepts, existing, index)
|
|
73
|
+
state = DedupState.new(conflicts: [], skipped: 0)
|
|
74
|
+
|
|
75
|
+
new_concepts.each do |mc|
|
|
76
|
+
key = concept_key(mc)
|
|
77
|
+
existing_mc = index[key]
|
|
78
|
+
|
|
79
|
+
if existing_mc.nil?
|
|
80
|
+
existing.store(mc)
|
|
81
|
+
index[key] = mc
|
|
82
|
+
else
|
|
83
|
+
state.conflicts << DuplicateConflict.new(
|
|
84
|
+
new_concept: mc, existing_concept: existing_mc, key: key,
|
|
85
|
+
)
|
|
86
|
+
handle_duplicate(existing, existing_mc, mc, index, key, state)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
state
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def handle_duplicate(existing, old_mc, new_mc, index, key, state)
|
|
94
|
+
case duplicate_strategy
|
|
95
|
+
when :skip
|
|
96
|
+
state.skipped += 1
|
|
97
|
+
when :replace
|
|
98
|
+
replace_in_collection(existing, old_mc, new_mc)
|
|
99
|
+
index[key] = new_mc
|
|
100
|
+
when :merge
|
|
101
|
+
merge_concept(old_mc, new_mc)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def extract_all_concepts(xml_files)
|
|
106
|
+
xml_files.flat_map do |path|
|
|
107
|
+
extractor = TermExtractor.new(path)
|
|
108
|
+
terms = extractor.extract
|
|
109
|
+
terms.map { |t| @mapper.map(t) }
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def dedup_concepts(concepts) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
|
|
114
|
+
seen = {}
|
|
115
|
+
conflicts = []
|
|
116
|
+
skipped = 0
|
|
117
|
+
unique = []
|
|
118
|
+
|
|
119
|
+
concepts.each do |mc|
|
|
120
|
+
key = concept_key(mc)
|
|
121
|
+
if key.first.empty? || seen[key].nil?
|
|
122
|
+
unique << mc
|
|
123
|
+
seen[key] = mc unless key.first.empty?
|
|
124
|
+
else
|
|
125
|
+
conflicts << DuplicateConflict.new(
|
|
126
|
+
new_concept: mc, existing_concept: seen[key], key: key,
|
|
127
|
+
)
|
|
128
|
+
skipped += apply_dedup_to_unique(unique, seen, mc, key)
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
[unique, conflicts, skipped]
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def apply_dedup_to_unique(unique, seen, new_mc, key)
|
|
136
|
+
case duplicate_strategy
|
|
137
|
+
when :skip
|
|
138
|
+
1
|
|
139
|
+
when :replace
|
|
140
|
+
unique.delete(seen[key])
|
|
141
|
+
unique << new_mc
|
|
142
|
+
seen[key] = new_mc
|
|
143
|
+
0
|
|
144
|
+
when :merge
|
|
145
|
+
merge_concept(seen[key], new_mc)
|
|
146
|
+
0
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def concept_key(managed_concept)
|
|
151
|
+
designation = managed_concept.default_designation.to_s.downcase.strip
|
|
152
|
+
domain = begin
|
|
153
|
+
l10n = managed_concept.default_lang
|
|
154
|
+
l10n&.data&.domain.to_s.downcase.strip
|
|
155
|
+
end
|
|
156
|
+
[designation, domain]
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def build_concept_index(collection)
|
|
160
|
+
index = {}
|
|
161
|
+
collection.each do |mc|
|
|
162
|
+
key = concept_key(mc)
|
|
163
|
+
index[key] = mc unless key.first.empty?
|
|
164
|
+
end
|
|
165
|
+
index
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def merge_concept(existing_mc, new_mc)
|
|
169
|
+
new_mc.localizations.each do |l10n|
|
|
170
|
+
lang = l10n.language_code
|
|
171
|
+
if existing_mc.localization(lang).nil?
|
|
172
|
+
existing_mc.add_localization(l10n)
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def replace_in_collection(collection, old_mc, new_mc)
|
|
178
|
+
collection.managed_concepts.delete(old_mc)
|
|
179
|
+
collection.store(new_mc)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def load_existing(path)
|
|
183
|
+
collection = ManagedConceptCollection.new
|
|
184
|
+
if path.end_with?(".gcr")
|
|
185
|
+
package = GcrPackage.load(path)
|
|
186
|
+
package.concepts.each { |mc| collection.store(mc) }
|
|
187
|
+
else
|
|
188
|
+
concepts = ConceptCollector.collect(path)
|
|
189
|
+
concepts.each { |mc| collection.store(mc) }
|
|
190
|
+
end
|
|
191
|
+
collection
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def save_to_path(collection, path)
|
|
195
|
+
if path.end_with?(".gcr")
|
|
196
|
+
tmpdir = build_temp_dataset(collection.managed_concepts)
|
|
197
|
+
begin
|
|
198
|
+
GC.start
|
|
199
|
+
tmp_gcr = "#{path}.tmp.#{Process.pid}"
|
|
200
|
+
GcrPackage.create_from_directory(
|
|
201
|
+
tmpdir,
|
|
202
|
+
output: tmp_gcr,
|
|
203
|
+
shortname: File.basename(path, ".gcr"),
|
|
204
|
+
version: "1.0.0",
|
|
205
|
+
)
|
|
206
|
+
FileUtils.rm_f(path)
|
|
207
|
+
FileUtils.mv(tmp_gcr, path)
|
|
208
|
+
ensure
|
|
209
|
+
FileUtils.rm_rf(tmpdir)
|
|
210
|
+
FileUtils.rm_f(tmp_gcr)
|
|
211
|
+
end
|
|
212
|
+
else
|
|
213
|
+
save_dataset(collection.managed_concepts, path)
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def save_dataset(concepts, dir)
|
|
218
|
+
concepts_dir = File.join(dir, "concepts")
|
|
219
|
+
FileUtils.mkdir_p(concepts_dir)
|
|
220
|
+
collection = ManagedConceptCollection.new
|
|
221
|
+
concepts.each { |mc| collection.store(mc) }
|
|
222
|
+
collection.save_grouped_concepts_to_files(concepts_dir)
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def create_gcr(concepts, output, shortname:, version:, **opts)
|
|
226
|
+
tmpdir = build_temp_dataset(concepts)
|
|
227
|
+
begin
|
|
228
|
+
GcrPackage.create_from_directory(
|
|
229
|
+
tmpdir,
|
|
230
|
+
output: output,
|
|
231
|
+
shortname: shortname,
|
|
232
|
+
version: version,
|
|
233
|
+
**opts,
|
|
234
|
+
)
|
|
235
|
+
ensure
|
|
236
|
+
FileUtils.rm_rf(tmpdir)
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def build_temp_dataset(concepts)
|
|
241
|
+
tmpdir = Dir.mktmpdir("glossarist-sts-import")
|
|
242
|
+
concepts_dir = File.join(tmpdir, "concepts")
|
|
243
|
+
FileUtils.mkdir_p(concepts_dir)
|
|
244
|
+
|
|
245
|
+
collection = ManagedConceptCollection.new
|
|
246
|
+
concepts.each { |mc| collection.store(mc) }
|
|
247
|
+
collection.save_grouped_concepts_to_files(concepts_dir)
|
|
248
|
+
|
|
249
|
+
tmpdir
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
end
|