semtools 0.1.2 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -1
- data/bin/onto2json.rb +9 -3
- data/bin/semtools.rb +446 -0
- data/bin/strsimnet.rb +1 -2
- data/external_data/ontologies.txt +4 -0
- data/lib/semtools/math_methods.rb +137 -129
- data/lib/semtools/ontology.rb +2550 -2032
- data/lib/semtools/sim_handler.rb +1 -1
- data/lib/semtools/version.rb +1 -1
- data/lib/semtools.rb +0 -1
- data/semtools.gemspec +3 -0
- metadata +48 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a3f63cc6548a9938e31121d2018d1c1c477987007c5d253b5fa814a285bdb576
|
4
|
+
data.tar.gz: e1911d3157c3046590ca13bc86215d2260b4a8b2b1b25affa5c2673881036795
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 30c95df80957a4a35b6fea05b9552352f529d8e45c10f6b128924a3ce2ee5d90e92a1e9d5fe0016d25538147e12d3a9199c81222642c94cdd0eb3c89eea168ef
|
7
|
+
data.tar.gz: ddc9e600fd984e68d060b7be05adf27b3f20bb67e638d42acc4b9b156eedabfce20d6f588a03d1fbc2948fedbd80d498f1767c0e3f8ea03720fa0ca327b95f3c
|
data/Gemfile
CHANGED
@@ -5,5 +5,8 @@ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
|
|
5
5
|
# Specify your gem's dependencies in semtools.gemspec
|
6
6
|
gemspec
|
7
7
|
|
8
|
-
gem "rake", "~>
|
8
|
+
gem "rake", "~> 13.0"
|
9
9
|
gem "minitest", "~> 5.0"
|
10
|
+
|
11
|
+
expcalc_dev_path = File.expand_path('~/dev_gems/expcalc')
|
12
|
+
gem "expcalc", github: "seoanezonjic/expcalc", branch: "master" if Dir.exist?(expcalc_dev_path)
|
data/bin/onto2json.rb
CHANGED
@@ -18,14 +18,20 @@ OptionParser.new do |opts|
|
|
18
18
|
opts.banner = "Usage: #{__FILE__} [options]"
|
19
19
|
|
20
20
|
options[:input_file] = nil
|
21
|
-
opts.on("-i", "--input_file
|
21
|
+
opts.on("-i", "--input_file FILE", "Input file with ontology in OBO format") do |data|
|
22
22
|
options[:input_file] = data
|
23
23
|
end
|
24
24
|
|
25
25
|
options[:output_file] = nil
|
26
|
-
opts.on("-o", "--output_file
|
26
|
+
opts.on("-o", "--output_file FILE", "Output path") do |data|
|
27
27
|
options[:output_file] = data
|
28
28
|
end
|
29
|
+
|
30
|
+
options[:build] = false
|
31
|
+
opts.on("-b", "--build", "Activate build mode (calculate dictionaries)") do
|
32
|
+
options[:build] = true
|
33
|
+
end
|
34
|
+
|
29
35
|
|
30
36
|
opts.on_tail("-h", "--help", "Show this message") do
|
31
37
|
puts opts
|
@@ -39,7 +45,7 @@ end.parse!
|
|
39
45
|
# MAIN
|
40
46
|
##########################
|
41
47
|
puts "Loading ontology ..."
|
42
|
-
onto = Ontology.new(file: options[:input_file], load_file: true)
|
48
|
+
onto = Ontology.new(file: options[:input_file], load_file: true, build: options[:build])
|
43
49
|
puts "Exporting ontology to JSON ..."
|
44
50
|
onto.write(options[:output_file])
|
45
51
|
puts "Ontology exported"
|
data/bin/semtools.rb
ADDED
@@ -0,0 +1,446 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
ROOT_PATH = File.dirname(__FILE__)
|
3
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
4
|
+
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
require 'down'
|
8
|
+
require 'semtools'
|
9
|
+
|
10
|
+
######################################################################################
|
11
|
+
## METHODS
|
12
|
+
######################################################################################
|
13
|
+
def load_tabular_file(file)
|
14
|
+
records = []
|
15
|
+
File.open(file).each do |line|
|
16
|
+
line.chomp!
|
17
|
+
fields = line.split("\t")
|
18
|
+
records << fields
|
19
|
+
end
|
20
|
+
return records
|
21
|
+
end
|
22
|
+
|
23
|
+
def store_profiles(file, ontology)
|
24
|
+
file.each do |id, terms|
|
25
|
+
ontology.add_profile(id, terms)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def load_value(hash_to_load, key, value, unique = true)
|
30
|
+
query = hash_to_load[key]
|
31
|
+
if query.nil?
|
32
|
+
value = [value] if value.class != Array
|
33
|
+
hash_to_load[key] = value
|
34
|
+
else
|
35
|
+
if value.class == Array
|
36
|
+
query.concat(value)
|
37
|
+
else
|
38
|
+
query << value
|
39
|
+
end
|
40
|
+
query.uniq! unless unique == nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def translate(ontology, type, options, profiles = nil)
|
45
|
+
not_translated = {}
|
46
|
+
if type == 'names'
|
47
|
+
ontology.profiles.each do |id, terms|
|
48
|
+
translation, untranslated = ontology.translate_ids(terms)
|
49
|
+
ontology.profiles[id] = translation
|
50
|
+
not_translated[id] = untranslated unless untranslated.empty?
|
51
|
+
end
|
52
|
+
elsif type == 'codes'
|
53
|
+
profiles.each do |id,terms|
|
54
|
+
translation, untranslated = ontology.translate_names(terms)
|
55
|
+
profiles[id] = translation
|
56
|
+
profiles[id] = profiles[id].join("#{options[:separator]}")
|
57
|
+
not_translated[id] = untranslated unless untranslated.empty?
|
58
|
+
end
|
59
|
+
end
|
60
|
+
if !not_translated.empty?
|
61
|
+
File.open(options[:untranslated_path], 'w') do |file|
|
62
|
+
not_translated.each do |id, terms|
|
63
|
+
file.puts([id, terms.join(";")].join("\t"))
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def clean_profile(profile, ontology, options)
|
70
|
+
cleaned_profile = ontology.clean_profile_hard(profile)
|
71
|
+
unless options[:term_filter].nil?
|
72
|
+
cleaned_profile.select! {|term| ontology.get_ancestors(term).include?(options[:term_filter])}
|
73
|
+
end
|
74
|
+
return cleaned_profile
|
75
|
+
end
|
76
|
+
|
77
|
+
def clean_profiles(profiles, ontology, options)
|
78
|
+
removed_profiles = []
|
79
|
+
profiles.each do |id, terms|
|
80
|
+
cleaned_profile = clean_profile(terms, ontology, options)
|
81
|
+
profiles[id] = cleaned_profile
|
82
|
+
removed_profiles << id if cleaned_profile.empty?
|
83
|
+
end
|
84
|
+
removed_profiles.each{|rp| profiles.delete(rp)}
|
85
|
+
return removed_profiles
|
86
|
+
end
|
87
|
+
|
88
|
+
def expand_profiles(profiles, ontology, unwanted_terms = [])
|
89
|
+
profiles.each do |disease_id, terms|
|
90
|
+
terms.each do |term|
|
91
|
+
profiles[disease_id] << ontology.get_ancestors(term).difference(unwanted_terms)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def write_similarity_profile_list(input, onto_obj, similarity_type)
|
97
|
+
similarity_file = File.basename(input, ".*")+'_semantic_similarity_list'
|
98
|
+
File.open(similarity_file, 'w') do |file|
|
99
|
+
onto_obj.profiles.each do |profile_query_key, profile_query_value|
|
100
|
+
onto_obj.profiles.each do |profile_search_key, profile_search_value|
|
101
|
+
file.puts([profile_query_key, profile_search_key, onto_obj.compare(profile_query_value, profile_search_value, sim_type: similarity_type)].join("\t"))
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def download(source, key, output)
|
108
|
+
source_list = load_tabular_file(source).to_h
|
109
|
+
external_data = File.dirname(source)
|
110
|
+
if key == 'list'
|
111
|
+
Dir.glob(File.join(external_data,'*.obo')){|f| puts f}
|
112
|
+
else
|
113
|
+
url = source_list[key]
|
114
|
+
if !output.nil?
|
115
|
+
output_path = output
|
116
|
+
else
|
117
|
+
file_name = key + '.obo'
|
118
|
+
if File.writable?(external_data)
|
119
|
+
output_path = File.join(external_data, file_name)
|
120
|
+
else
|
121
|
+
output_path = file_name
|
122
|
+
end
|
123
|
+
end
|
124
|
+
if !url.nil?
|
125
|
+
Down::NetHttp.download(url, destination: output_path, max_redirects: 5)
|
126
|
+
File.chmod(0644, output_path) # Correct file permissions set by down gem
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def get_ontology_file(path, source)
|
132
|
+
if !File.exists?(path)
|
133
|
+
ont_index = load_tabular_file(source).to_h
|
134
|
+
if !ont_index[path].nil?
|
135
|
+
path = File.join(File.dirname(source), path + '.obo')
|
136
|
+
else
|
137
|
+
abort("Input ontology file not exists")
|
138
|
+
end
|
139
|
+
end
|
140
|
+
return path
|
141
|
+
end
|
142
|
+
|
143
|
+
def get_stats(stats)
|
144
|
+
report_stats = []
|
145
|
+
report_stats << ['Elements', stats[:count]]
|
146
|
+
report_stats << ['Elements Non Zero', stats[:countNonZero]]
|
147
|
+
report_stats << ['Non Zero Density', stats[:countNonZero].fdiv(stats[:count])]
|
148
|
+
report_stats << ['Max', stats[:max]]
|
149
|
+
report_stats << ['Min', stats[:min]]
|
150
|
+
report_stats << ['Average', stats[:average]]
|
151
|
+
report_stats << ['Variance', stats[:variance]]
|
152
|
+
report_stats << ['Standard Deviation', stats[:standardDeviation]]
|
153
|
+
report_stats << ['Q1', stats[:q1]]
|
154
|
+
report_stats << ['Median', stats[:median]]
|
155
|
+
report_stats << ['Q3', stats[:q3]]
|
156
|
+
return report_stats
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
####################################################################################
|
165
|
+
## OPTPARSE
|
166
|
+
####################################################################################
|
167
|
+
options = {}
|
168
|
+
OptionParser.new do |opts|
|
169
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
170
|
+
|
171
|
+
options[:download] = nil
|
172
|
+
opts.on("-d", "--download STRING", "Download obo file from official resource. MONDO, GO and HPO are possible values.") do |item|
|
173
|
+
options[:download] = item
|
174
|
+
end
|
175
|
+
|
176
|
+
options[:input_file] = nil
|
177
|
+
opts.on("-i", "--input_file PATH", "Filepath of profile data") do |item|
|
178
|
+
options[:input_file] = item
|
179
|
+
end
|
180
|
+
|
181
|
+
options[:output_file] = nil
|
182
|
+
opts.on("-o", "--output_file PATH", "Output filepath") do |item|
|
183
|
+
options[:output_file] = item
|
184
|
+
end
|
185
|
+
|
186
|
+
options[:IC] = false
|
187
|
+
opts.on("-I", "--IC", "Get IC") do
|
188
|
+
options[:IC] = true
|
189
|
+
end
|
190
|
+
|
191
|
+
options[:ontology_file] = nil
|
192
|
+
opts.on("-O PATH", "--ontology_file PATH", "Path to ontology file") do |item|
|
193
|
+
options[:ontology_file] = item
|
194
|
+
end
|
195
|
+
|
196
|
+
options[:term_filter] = nil
|
197
|
+
opts.on("-T STRING", "--term_filter STRING", "If specified, only terms that are descendants of the specified term will be kept on a profile when cleaned") do |item|
|
198
|
+
options[:term_filter] = item.to_sym
|
199
|
+
end
|
200
|
+
|
201
|
+
options[:translate] = nil
|
202
|
+
opts.on("-t STRING", "--translate STRING", "Translate to 'names' or to 'codes'") do |item|
|
203
|
+
options[:translate] = item
|
204
|
+
end
|
205
|
+
|
206
|
+
opts.on("-s method", "--similarity method", "Calculate similarity between profile IDs computed by 'resnik', 'lin' or 'jiang_conrath' methods. ") do |sim_method|
|
207
|
+
options[:similarity] = sim_method.to_sym
|
208
|
+
end
|
209
|
+
|
210
|
+
options[:clean_profiles] = false
|
211
|
+
opts.on("-c", "--clean_profiles", "Removes ancestors, descendants and obsolete terms from profiles") do
|
212
|
+
options[:clean_profiles] = true
|
213
|
+
end
|
214
|
+
|
215
|
+
options[:removed_path] = 'rejected_profs'
|
216
|
+
opts.on("-r PATH", "--removed_path PATH", "Desired path to write removed profiles file") do |item|
|
217
|
+
options[:removed_path] = item
|
218
|
+
end
|
219
|
+
|
220
|
+
options[:untranslated_path] = nil
|
221
|
+
opts.on("-u PATH", "--untranslated_path PATH", "Desired path to write untranslated terms file") do |item|
|
222
|
+
options[:untranslated_path] = item
|
223
|
+
end
|
224
|
+
|
225
|
+
options[:keyword] = nil
|
226
|
+
opts.on("-k STRING", "--keyword STRING", "regex used to get xref terms in the ontology file") do |item|
|
227
|
+
options[:keyword] = item
|
228
|
+
end
|
229
|
+
|
230
|
+
options[:xref_sense] = :byValue
|
231
|
+
opts.on("--xref_sense ", "Ontology-xref or xref-ontology. By default xref-ontology if set, ontology-xref") do
|
232
|
+
options[:xref_sense] = :byTerm
|
233
|
+
end
|
234
|
+
|
235
|
+
options[:expand_profiles] = false
|
236
|
+
opts.on("-e", "--expand_profiles", "Expand profiles adding ancestors") do
|
237
|
+
options[:expand_profiles] = true
|
238
|
+
end
|
239
|
+
|
240
|
+
options[:unwanted_terms] = []
|
241
|
+
opts.on("-U", "--unwanted_terms STRING", "Comma separated terms not wanted to be included in profile expansion") do |item|
|
242
|
+
options[:unwanted_terms] = item
|
243
|
+
end
|
244
|
+
|
245
|
+
options[:separator] = ";"
|
246
|
+
opts.on("-S STRING", "--separator STRING", "Separator used for the terms profile") do |sep|
|
247
|
+
options[:separator] = sep
|
248
|
+
end
|
249
|
+
|
250
|
+
options[:childs] = [[], '']
|
251
|
+
opts.on("-C STRING", "--childs STRING", "Term code list (comma separated) to generate child list") do |item|
|
252
|
+
if item.include?('/')
|
253
|
+
modifiers, terms = item.split('/')
|
254
|
+
else
|
255
|
+
modifiers = ''
|
256
|
+
terms = item
|
257
|
+
end
|
258
|
+
terms = terms.split(',').map{|t| t.to_sym}
|
259
|
+
options[:childs] = [terms, modifiers]
|
260
|
+
end
|
261
|
+
|
262
|
+
options[:statistics] = false
|
263
|
+
opts.on("-n", "--statistics", "To obtain main statistical descriptors of the profiles file") do
|
264
|
+
options[:statistics] = true
|
265
|
+
end
|
266
|
+
|
267
|
+
options[:list_translate] = nil
|
268
|
+
opts.on("-l STRING", "--list_translate STRING", "Translate to 'names' or to 'codes' input list") do |sep|
|
269
|
+
options[:list_translate] = sep
|
270
|
+
end
|
271
|
+
|
272
|
+
options[:subject_column] = 0
|
273
|
+
opts.on("-f NUM", "--subject_column NUM", "The number of the column for the subject id") do |ncol|
|
274
|
+
options[:subject_column] = ncol.to_i
|
275
|
+
end
|
276
|
+
|
277
|
+
options[:annotations_column] = 1
|
278
|
+
opts.on("-a NUM", "--annotations_column NUM", "The number of the column for the annotation ids") do |ncol|
|
279
|
+
options[:annotations_column] = ncol.to_i
|
280
|
+
end
|
281
|
+
|
282
|
+
|
283
|
+
options[:list_term_attributes] = false
|
284
|
+
opts.on("--list_term_attributes", "The number of the column for the annotation ids") do
|
285
|
+
options[:list_term_attributes] = true
|
286
|
+
end
|
287
|
+
|
288
|
+
end.parse!
|
289
|
+
|
290
|
+
####################################################################################
|
291
|
+
## MAIN
|
292
|
+
####################################################################################
|
293
|
+
ont_index_file = File.join(EXTERNAL_DATA, 'ontologies.txt')
|
294
|
+
if !options[:download].nil?
|
295
|
+
download(ont_index_file, options[:download], options[:output_file])
|
296
|
+
Process.exit
|
297
|
+
end
|
298
|
+
|
299
|
+
if !options[:ontology_file].nil?
|
300
|
+
options[:ontology_file] = get_ontology_file(options[:ontology_file], ont_index_file)
|
301
|
+
end
|
302
|
+
ontology = Ontology.new(file: options[:ontology_file], load_file: true)
|
303
|
+
|
304
|
+
if !options[:input_file].nil?
|
305
|
+
data = load_tabular_file(options[:input_file])
|
306
|
+
if options[:list_translate].nil? || !options[:keyword].nil?
|
307
|
+
data.map!{|row|
|
308
|
+
[row[options[:subject_column]],
|
309
|
+
row[options[:annotations_column]].split(options[:separator]).map!{|term| term.to_sym}]
|
310
|
+
}
|
311
|
+
store_profiles(data, ontology) if options[:translate] != 'codes' && options[:keyword].nil?
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
if !options[:list_translate].nil?
|
316
|
+
data.each do |term|
|
317
|
+
if options[:list_translate] == 'names'
|
318
|
+
translation, untranslated = ontology.translate_ids(term)
|
319
|
+
elsif options[:list_translate] == 'codes'
|
320
|
+
translation, untranslated = ontology.translate_names(term)
|
321
|
+
end
|
322
|
+
puts "#{term.first}\t#{translation.empty? ? '-' : translation.first}"
|
323
|
+
end
|
324
|
+
Process.exit
|
325
|
+
end
|
326
|
+
|
327
|
+
if options[:translate] == 'codes'
|
328
|
+
profiles = {}
|
329
|
+
data.each do |id, terms|
|
330
|
+
load_value(profiles, id, terms)
|
331
|
+
profiles[id] = terms.split(options[:separator])
|
332
|
+
end
|
333
|
+
translate(ontology, 'codes', options, profiles)
|
334
|
+
store_profiles(profiles, ontology)
|
335
|
+
end
|
336
|
+
|
337
|
+
if options[:clean_profiles]
|
338
|
+
removed_profiles = clean_profiles(ontology.profiles, ontology, options)
|
339
|
+
if !removed_profiles.nil? && !removed_profiles.empty?
|
340
|
+
File.open(options[:removed_path], 'w') do |f|
|
341
|
+
removed_profiles.each do |profile|
|
342
|
+
f.puts profile
|
343
|
+
end
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
if options[:expand_profiles]
|
349
|
+
expanded_profiles = expand_profiles(ontology.profiles, ontology, options[:unwanted_terms])
|
350
|
+
end
|
351
|
+
|
352
|
+
if !options[:similarity].nil?
|
353
|
+
write_similarity_profile_list(input = options[:input_file], onto_obj=ontology, similarity_type = options[:similarity])
|
354
|
+
end
|
355
|
+
|
356
|
+
|
357
|
+
if options[:IC]
|
358
|
+
ontology.add_observed_terms_from_profiles
|
359
|
+
by_ontology, by_freq = ontology.get_profiles_resnik_dual_ICs
|
360
|
+
ic_file = File.basename(options[:input_file], ".*")+'_IC_onto_freq'
|
361
|
+
File.open(ic_file , 'w') do |file|
|
362
|
+
ontology.profiles.keys.each do |id|
|
363
|
+
file.puts([id, by_ontology[id], by_freq[id]].join("\t"))
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
if options[:translate] == 'names'
|
369
|
+
translate(ontology, 'names', options)
|
370
|
+
end
|
371
|
+
|
372
|
+
if !options[:childs].first.empty?
|
373
|
+
terms, modifiers = options[:childs]
|
374
|
+
all_childs = []
|
375
|
+
terms.each do |term|
|
376
|
+
childs = ontology.get_descendants(term)
|
377
|
+
all_childs = all_childs | childs
|
378
|
+
end
|
379
|
+
if modifiers.include?('r')
|
380
|
+
relations = []
|
381
|
+
all_childs = all_childs | terms # Add parents that generated child list
|
382
|
+
all_childs.each do |term|
|
383
|
+
descendants = ontology.get_direct_descendants(term)
|
384
|
+
if !descendants.nil?
|
385
|
+
descendants.each do |desc|
|
386
|
+
relations << [term, desc]
|
387
|
+
end
|
388
|
+
end
|
389
|
+
end
|
390
|
+
relations.each do |rel|
|
391
|
+
rel, _ = ontology.translate_ids(rel) if modifiers.include?('n')
|
392
|
+
puts rel.join("\t")
|
393
|
+
end
|
394
|
+
else
|
395
|
+
all_childs.each do |c|
|
396
|
+
if modifiers.include?('n')
|
397
|
+
puts ontology.translate_id(c)
|
398
|
+
else
|
399
|
+
puts c
|
400
|
+
end
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
if !options[:output_file].nil?
|
406
|
+
File.open(options[:output_file], 'w') do |file|
|
407
|
+
ontology.profiles.each do |id, terms|
|
408
|
+
file.puts([id, terms.join("|")].join("\t"))
|
409
|
+
end
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
if options[:statistics]
|
414
|
+
get_stats(ontology.profile_stats).each do |stat|
|
415
|
+
puts stat.join("\t")
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
419
|
+
if options[:list_term_attributes]
|
420
|
+
term_attributes = ontology.list_term_attributes
|
421
|
+
term_attributes.each do |t_attr|
|
422
|
+
t_attr[0] = t_attr[0].to_s
|
423
|
+
puts t_attr.join("\t")
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
if !options[:keyword].nil?
|
428
|
+
xref_translated = []
|
429
|
+
ontology.calc_dictionary(:xref, select_regex: /(#{options[:keyword]})/, store_tag: :tag, multiterm: true, substitute_alternatives: false)
|
430
|
+
dict = ontology.dicts[:tag][options[:xref_sense]]
|
431
|
+
data.each do |id, prof|
|
432
|
+
xrefs = []
|
433
|
+
prof.each do |t|
|
434
|
+
query = dict[t.to_s]
|
435
|
+
xrefs.concat(query) if !query.nil?
|
436
|
+
end
|
437
|
+
xref_translated << [id, xrefs] if !xrefs.empty?
|
438
|
+
end
|
439
|
+
File.open(options[:output_file], 'w') do |f|
|
440
|
+
xref_translated.each do |id, prof|
|
441
|
+
prof.each do |t|
|
442
|
+
f.puts [id, t].join("\t")
|
443
|
+
end
|
444
|
+
end
|
445
|
+
end
|
446
|
+
end
|
data/bin/strsimnet.rb
CHANGED
@@ -111,12 +111,11 @@ texts2compare = load_table_file(input_file = options[:input_file],
|
|
111
111
|
targetCol = options[:cindex],
|
112
112
|
filterCol = options[:findex],
|
113
113
|
filterValue = options[:filter_value])
|
114
|
-
|
115
114
|
# Verbose point
|
116
115
|
puts "Calculating similitude for (" + texts2compare.length.to_s + ") elements"
|
117
116
|
|
118
117
|
# Obtain all Vs all
|
119
|
-
similitudes_AllVsAll = similitude_network(texts2compare,options[:rm_char])
|
118
|
+
similitudes_AllVsAll = similitude_network(texts2compare, charsToRemove: options[:rm_char])
|
120
119
|
|
121
120
|
# Verbose point
|
122
121
|
puts "Writing output file ..."
|