semtools 0.1.2 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +4 -1
- data/bin/onto2json.rb +9 -3
- data/bin/semtools.rb +446 -0
- data/bin/strsimnet.rb +1 -2
- data/external_data/ontologies.txt +4 -0
- data/lib/semtools/math_methods.rb +137 -129
- data/lib/semtools/ontology.rb +2550 -2032
- data/lib/semtools/sim_handler.rb +1 -1
- data/lib/semtools/version.rb +1 -1
- data/lib/semtools.rb +0 -1
- data/semtools.gemspec +3 -0
- metadata +48 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a3f63cc6548a9938e31121d2018d1c1c477987007c5d253b5fa814a285bdb576
|
4
|
+
data.tar.gz: e1911d3157c3046590ca13bc86215d2260b4a8b2b1b25affa5c2673881036795
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 30c95df80957a4a35b6fea05b9552352f529d8e45c10f6b128924a3ce2ee5d90e92a1e9d5fe0016d25538147e12d3a9199c81222642c94cdd0eb3c89eea168ef
|
7
|
+
data.tar.gz: ddc9e600fd984e68d060b7be05adf27b3f20bb67e638d42acc4b9b156eedabfce20d6f588a03d1fbc2948fedbd80d498f1767c0e3f8ea03720fa0ca327b95f3c
|
data/Gemfile
CHANGED
@@ -5,5 +5,8 @@ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
|
|
5
5
|
# Specify your gem's dependencies in semtools.gemspec
|
6
6
|
gemspec
|
7
7
|
|
8
|
-
gem "rake", "~>
|
8
|
+
gem "rake", "~> 13.0"
|
9
9
|
gem "minitest", "~> 5.0"
|
10
|
+
|
11
|
+
expcalc_dev_path = File.expand_path('~/dev_gems/expcalc')
|
12
|
+
gem "expcalc", github: "seoanezonjic/expcalc", branch: "master" if Dir.exist?(expcalc_dev_path)
|
data/bin/onto2json.rb
CHANGED
@@ -18,14 +18,20 @@ OptionParser.new do |opts|
|
|
18
18
|
opts.banner = "Usage: #{__FILE__} [options]"
|
19
19
|
|
20
20
|
options[:input_file] = nil
|
21
|
-
opts.on("-i", "--input_file
|
21
|
+
opts.on("-i", "--input_file FILE", "Input file with ontology in OBO format") do |data|
|
22
22
|
options[:input_file] = data
|
23
23
|
end
|
24
24
|
|
25
25
|
options[:output_file] = nil
|
26
|
-
opts.on("-o", "--output_file
|
26
|
+
opts.on("-o", "--output_file FILE", "Output path") do |data|
|
27
27
|
options[:output_file] = data
|
28
28
|
end
|
29
|
+
|
30
|
+
options[:build] = false
|
31
|
+
opts.on("-b", "--build", "Activate build mode (calculate dictionaries)") do
|
32
|
+
options[:build] = true
|
33
|
+
end
|
34
|
+
|
29
35
|
|
30
36
|
opts.on_tail("-h", "--help", "Show this message") do
|
31
37
|
puts opts
|
@@ -39,7 +45,7 @@ end.parse!
|
|
39
45
|
# MAIN
|
40
46
|
##########################
|
41
47
|
puts "Loading ontology ..."
|
42
|
-
onto = Ontology.new(file: options[:input_file], load_file: true)
|
48
|
+
onto = Ontology.new(file: options[:input_file], load_file: true, build: options[:build])
|
43
49
|
puts "Exporting ontology to JSON ..."
|
44
50
|
onto.write(options[:output_file])
|
45
51
|
puts "Ontology exported"
|
data/bin/semtools.rb
ADDED
@@ -0,0 +1,446 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
ROOT_PATH = File.dirname(__FILE__)
|
3
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
4
|
+
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
require 'down'
|
8
|
+
require 'semtools'
|
9
|
+
|
10
|
+
######################################################################################
|
11
|
+
## METHODS
|
12
|
+
######################################################################################
|
13
|
+
def load_tabular_file(file)
|
14
|
+
records = []
|
15
|
+
File.open(file).each do |line|
|
16
|
+
line.chomp!
|
17
|
+
fields = line.split("\t")
|
18
|
+
records << fields
|
19
|
+
end
|
20
|
+
return records
|
21
|
+
end
|
22
|
+
|
23
|
+
def store_profiles(file, ontology)
|
24
|
+
file.each do |id, terms|
|
25
|
+
ontology.add_profile(id, terms)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def load_value(hash_to_load, key, value, unique = true)
|
30
|
+
query = hash_to_load[key]
|
31
|
+
if query.nil?
|
32
|
+
value = [value] if value.class != Array
|
33
|
+
hash_to_load[key] = value
|
34
|
+
else
|
35
|
+
if value.class == Array
|
36
|
+
query.concat(value)
|
37
|
+
else
|
38
|
+
query << value
|
39
|
+
end
|
40
|
+
query.uniq! unless unique == nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def translate(ontology, type, options, profiles = nil)
|
45
|
+
not_translated = {}
|
46
|
+
if type == 'names'
|
47
|
+
ontology.profiles.each do |id, terms|
|
48
|
+
translation, untranslated = ontology.translate_ids(terms)
|
49
|
+
ontology.profiles[id] = translation
|
50
|
+
not_translated[id] = untranslated unless untranslated.empty?
|
51
|
+
end
|
52
|
+
elsif type == 'codes'
|
53
|
+
profiles.each do |id,terms|
|
54
|
+
translation, untranslated = ontology.translate_names(terms)
|
55
|
+
profiles[id] = translation
|
56
|
+
profiles[id] = profiles[id].join("#{options[:separator]}")
|
57
|
+
not_translated[id] = untranslated unless untranslated.empty?
|
58
|
+
end
|
59
|
+
end
|
60
|
+
if !not_translated.empty?
|
61
|
+
File.open(options[:untranslated_path], 'w') do |file|
|
62
|
+
not_translated.each do |id, terms|
|
63
|
+
file.puts([id, terms.join(";")].join("\t"))
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def clean_profile(profile, ontology, options)
|
70
|
+
cleaned_profile = ontology.clean_profile_hard(profile)
|
71
|
+
unless options[:term_filter].nil?
|
72
|
+
cleaned_profile.select! {|term| ontology.get_ancestors(term).include?(options[:term_filter])}
|
73
|
+
end
|
74
|
+
return cleaned_profile
|
75
|
+
end
|
76
|
+
|
77
|
+
def clean_profiles(profiles, ontology, options)
|
78
|
+
removed_profiles = []
|
79
|
+
profiles.each do |id, terms|
|
80
|
+
cleaned_profile = clean_profile(terms, ontology, options)
|
81
|
+
profiles[id] = cleaned_profile
|
82
|
+
removed_profiles << id if cleaned_profile.empty?
|
83
|
+
end
|
84
|
+
removed_profiles.each{|rp| profiles.delete(rp)}
|
85
|
+
return removed_profiles
|
86
|
+
end
|
87
|
+
|
88
|
+
def expand_profiles(profiles, ontology, unwanted_terms = [])
|
89
|
+
profiles.each do |disease_id, terms|
|
90
|
+
terms.each do |term|
|
91
|
+
profiles[disease_id] << ontology.get_ancestors(term).difference(unwanted_terms)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def write_similarity_profile_list(input, onto_obj, similarity_type)
|
97
|
+
similarity_file = File.basename(input, ".*")+'_semantic_similarity_list'
|
98
|
+
File.open(similarity_file, 'w') do |file|
|
99
|
+
onto_obj.profiles.each do |profile_query_key, profile_query_value|
|
100
|
+
onto_obj.profiles.each do |profile_search_key, profile_search_value|
|
101
|
+
file.puts([profile_query_key, profile_search_key, onto_obj.compare(profile_query_value, profile_search_value, sim_type: similarity_type)].join("\t"))
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def download(source, key, output)
|
108
|
+
source_list = load_tabular_file(source).to_h
|
109
|
+
external_data = File.dirname(source)
|
110
|
+
if key == 'list'
|
111
|
+
Dir.glob(File.join(external_data,'*.obo')){|f| puts f}
|
112
|
+
else
|
113
|
+
url = source_list[key]
|
114
|
+
if !output.nil?
|
115
|
+
output_path = output
|
116
|
+
else
|
117
|
+
file_name = key + '.obo'
|
118
|
+
if File.writable?(external_data)
|
119
|
+
output_path = File.join(external_data, file_name)
|
120
|
+
else
|
121
|
+
output_path = file_name
|
122
|
+
end
|
123
|
+
end
|
124
|
+
if !url.nil?
|
125
|
+
Down::NetHttp.download(url, destination: output_path, max_redirects: 5)
|
126
|
+
File.chmod(0644, output_path) # Correct file permissions set by down gem
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def get_ontology_file(path, source)
|
132
|
+
if !File.exists?(path)
|
133
|
+
ont_index = load_tabular_file(source).to_h
|
134
|
+
if !ont_index[path].nil?
|
135
|
+
path = File.join(File.dirname(source), path + '.obo')
|
136
|
+
else
|
137
|
+
abort("Input ontology file not exists")
|
138
|
+
end
|
139
|
+
end
|
140
|
+
return path
|
141
|
+
end
|
142
|
+
|
143
|
+
def get_stats(stats)
|
144
|
+
report_stats = []
|
145
|
+
report_stats << ['Elements', stats[:count]]
|
146
|
+
report_stats << ['Elements Non Zero', stats[:countNonZero]]
|
147
|
+
report_stats << ['Non Zero Density', stats[:countNonZero].fdiv(stats[:count])]
|
148
|
+
report_stats << ['Max', stats[:max]]
|
149
|
+
report_stats << ['Min', stats[:min]]
|
150
|
+
report_stats << ['Average', stats[:average]]
|
151
|
+
report_stats << ['Variance', stats[:variance]]
|
152
|
+
report_stats << ['Standard Deviation', stats[:standardDeviation]]
|
153
|
+
report_stats << ['Q1', stats[:q1]]
|
154
|
+
report_stats << ['Median', stats[:median]]
|
155
|
+
report_stats << ['Q3', stats[:q3]]
|
156
|
+
return report_stats
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
####################################################################################
|
165
|
+
## OPTPARSE
|
166
|
+
####################################################################################
|
167
|
+
options = {}
|
168
|
+
OptionParser.new do |opts|
|
169
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
170
|
+
|
171
|
+
options[:download] = nil
|
172
|
+
opts.on("-d", "--download STRING", "Download obo file from official resource. MONDO, GO and HPO are possible values.") do |item|
|
173
|
+
options[:download] = item
|
174
|
+
end
|
175
|
+
|
176
|
+
options[:input_file] = nil
|
177
|
+
opts.on("-i", "--input_file PATH", "Filepath of profile data") do |item|
|
178
|
+
options[:input_file] = item
|
179
|
+
end
|
180
|
+
|
181
|
+
options[:output_file] = nil
|
182
|
+
opts.on("-o", "--output_file PATH", "Output filepath") do |item|
|
183
|
+
options[:output_file] = item
|
184
|
+
end
|
185
|
+
|
186
|
+
options[:IC] = false
|
187
|
+
opts.on("-I", "--IC", "Get IC") do
|
188
|
+
options[:IC] = true
|
189
|
+
end
|
190
|
+
|
191
|
+
options[:ontology_file] = nil
|
192
|
+
opts.on("-O PATH", "--ontology_file PATH", "Path to ontology file") do |item|
|
193
|
+
options[:ontology_file] = item
|
194
|
+
end
|
195
|
+
|
196
|
+
options[:term_filter] = nil
|
197
|
+
opts.on("-T STRING", "--term_filter STRING", "If specified, only terms that are descendants of the specified term will be kept on a profile when cleaned") do |item|
|
198
|
+
options[:term_filter] = item.to_sym
|
199
|
+
end
|
200
|
+
|
201
|
+
options[:translate] = nil
|
202
|
+
opts.on("-t STRING", "--translate STRING", "Translate to 'names' or to 'codes'") do |item|
|
203
|
+
options[:translate] = item
|
204
|
+
end
|
205
|
+
|
206
|
+
opts.on("-s method", "--similarity method", "Calculate similarity between profile IDs computed by 'resnik', 'lin' or 'jiang_conrath' methods. ") do |sim_method|
|
207
|
+
options[:similarity] = sim_method.to_sym
|
208
|
+
end
|
209
|
+
|
210
|
+
options[:clean_profiles] = false
|
211
|
+
opts.on("-c", "--clean_profiles", "Removes ancestors, descendants and obsolete terms from profiles") do
|
212
|
+
options[:clean_profiles] = true
|
213
|
+
end
|
214
|
+
|
215
|
+
options[:removed_path] = 'rejected_profs'
|
216
|
+
opts.on("-r PATH", "--removed_path PATH", "Desired path to write removed profiles file") do |item|
|
217
|
+
options[:removed_path] = item
|
218
|
+
end
|
219
|
+
|
220
|
+
options[:untranslated_path] = nil
|
221
|
+
opts.on("-u PATH", "--untranslated_path PATH", "Desired path to write untranslated terms file") do |item|
|
222
|
+
options[:untranslated_path] = item
|
223
|
+
end
|
224
|
+
|
225
|
+
options[:keyword] = nil
|
226
|
+
opts.on("-k STRING", "--keyword STRING", "regex used to get xref terms in the ontology file") do |item|
|
227
|
+
options[:keyword] = item
|
228
|
+
end
|
229
|
+
|
230
|
+
options[:xref_sense] = :byValue
|
231
|
+
opts.on("--xref_sense ", "Ontology-xref or xref-ontology. By default xref-ontology if set, ontology-xref") do
|
232
|
+
options[:xref_sense] = :byTerm
|
233
|
+
end
|
234
|
+
|
235
|
+
options[:expand_profiles] = false
|
236
|
+
opts.on("-e", "--expand_profiles", "Expand profiles adding ancestors") do
|
237
|
+
options[:expand_profiles] = true
|
238
|
+
end
|
239
|
+
|
240
|
+
options[:unwanted_terms] = []
|
241
|
+
opts.on("-U", "--unwanted_terms STRING", "Comma separated terms not wanted to be included in profile expansion") do |item|
|
242
|
+
options[:unwanted_terms] = item
|
243
|
+
end
|
244
|
+
|
245
|
+
options[:separator] = ";"
|
246
|
+
opts.on("-S STRING", "--separator STRING", "Separator used for the terms profile") do |sep|
|
247
|
+
options[:separator] = sep
|
248
|
+
end
|
249
|
+
|
250
|
+
options[:childs] = [[], '']
|
251
|
+
opts.on("-C STRING", "--childs STRING", "Term code list (comma separated) to generate child list") do |item|
|
252
|
+
if item.include?('/')
|
253
|
+
modifiers, terms = item.split('/')
|
254
|
+
else
|
255
|
+
modifiers = ''
|
256
|
+
terms = item
|
257
|
+
end
|
258
|
+
terms = terms.split(',').map{|t| t.to_sym}
|
259
|
+
options[:childs] = [terms, modifiers]
|
260
|
+
end
|
261
|
+
|
262
|
+
options[:statistics] = false
|
263
|
+
opts.on("-n", "--statistics", "To obtain main statistical descriptors of the profiles file") do
|
264
|
+
options[:statistics] = true
|
265
|
+
end
|
266
|
+
|
267
|
+
options[:list_translate] = nil
|
268
|
+
opts.on("-l STRING", "--list_translate STRING", "Translate to 'names' or to 'codes' input list") do |sep|
|
269
|
+
options[:list_translate] = sep
|
270
|
+
end
|
271
|
+
|
272
|
+
options[:subject_column] = 0
|
273
|
+
opts.on("-f NUM", "--subject_column NUM", "The number of the column for the subject id") do |ncol|
|
274
|
+
options[:subject_column] = ncol.to_i
|
275
|
+
end
|
276
|
+
|
277
|
+
options[:annotations_column] = 1
|
278
|
+
opts.on("-a NUM", "--annotations_column NUM", "The number of the column for the annotation ids") do |ncol|
|
279
|
+
options[:annotations_column] = ncol.to_i
|
280
|
+
end
|
281
|
+
|
282
|
+
|
283
|
+
options[:list_term_attributes] = false
|
284
|
+
opts.on("--list_term_attributes", "The number of the column for the annotation ids") do
|
285
|
+
options[:list_term_attributes] = true
|
286
|
+
end
|
287
|
+
|
288
|
+
end.parse!
|
289
|
+
|
290
|
+
####################################################################################
|
291
|
+
## MAIN
|
292
|
+
####################################################################################
|
293
|
+
ont_index_file = File.join(EXTERNAL_DATA, 'ontologies.txt')
|
294
|
+
if !options[:download].nil?
|
295
|
+
download(ont_index_file, options[:download], options[:output_file])
|
296
|
+
Process.exit
|
297
|
+
end
|
298
|
+
|
299
|
+
if !options[:ontology_file].nil?
|
300
|
+
options[:ontology_file] = get_ontology_file(options[:ontology_file], ont_index_file)
|
301
|
+
end
|
302
|
+
ontology = Ontology.new(file: options[:ontology_file], load_file: true)
|
303
|
+
|
304
|
+
if !options[:input_file].nil?
|
305
|
+
data = load_tabular_file(options[:input_file])
|
306
|
+
if options[:list_translate].nil? || !options[:keyword].nil?
|
307
|
+
data.map!{|row|
|
308
|
+
[row[options[:subject_column]],
|
309
|
+
row[options[:annotations_column]].split(options[:separator]).map!{|term| term.to_sym}]
|
310
|
+
}
|
311
|
+
store_profiles(data, ontology) if options[:translate] != 'codes' && options[:keyword].nil?
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
if !options[:list_translate].nil?
|
316
|
+
data.each do |term|
|
317
|
+
if options[:list_translate] == 'names'
|
318
|
+
translation, untranslated = ontology.translate_ids(term)
|
319
|
+
elsif options[:list_translate] == 'codes'
|
320
|
+
translation, untranslated = ontology.translate_names(term)
|
321
|
+
end
|
322
|
+
puts "#{term.first}\t#{translation.empty? ? '-' : translation.first}"
|
323
|
+
end
|
324
|
+
Process.exit
|
325
|
+
end
|
326
|
+
|
327
|
+
if options[:translate] == 'codes'
|
328
|
+
profiles = {}
|
329
|
+
data.each do |id, terms|
|
330
|
+
load_value(profiles, id, terms)
|
331
|
+
profiles[id] = terms.split(options[:separator])
|
332
|
+
end
|
333
|
+
translate(ontology, 'codes', options, profiles)
|
334
|
+
store_profiles(profiles, ontology)
|
335
|
+
end
|
336
|
+
|
337
|
+
if options[:clean_profiles]
|
338
|
+
removed_profiles = clean_profiles(ontology.profiles, ontology, options)
|
339
|
+
if !removed_profiles.nil? && !removed_profiles.empty?
|
340
|
+
File.open(options[:removed_path], 'w') do |f|
|
341
|
+
removed_profiles.each do |profile|
|
342
|
+
f.puts profile
|
343
|
+
end
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
if options[:expand_profiles]
|
349
|
+
expanded_profiles = expand_profiles(ontology.profiles, ontology, options[:unwanted_terms])
|
350
|
+
end
|
351
|
+
|
352
|
+
if !options[:similarity].nil?
|
353
|
+
write_similarity_profile_list(input = options[:input_file], onto_obj=ontology, similarity_type = options[:similarity])
|
354
|
+
end
|
355
|
+
|
356
|
+
|
357
|
+
if options[:IC]
|
358
|
+
ontology.add_observed_terms_from_profiles
|
359
|
+
by_ontology, by_freq = ontology.get_profiles_resnik_dual_ICs
|
360
|
+
ic_file = File.basename(options[:input_file], ".*")+'_IC_onto_freq'
|
361
|
+
File.open(ic_file , 'w') do |file|
|
362
|
+
ontology.profiles.keys.each do |id|
|
363
|
+
file.puts([id, by_ontology[id], by_freq[id]].join("\t"))
|
364
|
+
end
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
if options[:translate] == 'names'
|
369
|
+
translate(ontology, 'names', options)
|
370
|
+
end
|
371
|
+
|
372
|
+
if !options[:childs].first.empty?
|
373
|
+
terms, modifiers = options[:childs]
|
374
|
+
all_childs = []
|
375
|
+
terms.each do |term|
|
376
|
+
childs = ontology.get_descendants(term)
|
377
|
+
all_childs = all_childs | childs
|
378
|
+
end
|
379
|
+
if modifiers.include?('r')
|
380
|
+
relations = []
|
381
|
+
all_childs = all_childs | terms # Add parents that generated child list
|
382
|
+
all_childs.each do |term|
|
383
|
+
descendants = ontology.get_direct_descendants(term)
|
384
|
+
if !descendants.nil?
|
385
|
+
descendants.each do |desc|
|
386
|
+
relations << [term, desc]
|
387
|
+
end
|
388
|
+
end
|
389
|
+
end
|
390
|
+
relations.each do |rel|
|
391
|
+
rel, _ = ontology.translate_ids(rel) if modifiers.include?('n')
|
392
|
+
puts rel.join("\t")
|
393
|
+
end
|
394
|
+
else
|
395
|
+
all_childs.each do |c|
|
396
|
+
if modifiers.include?('n')
|
397
|
+
puts ontology.translate_id(c)
|
398
|
+
else
|
399
|
+
puts c
|
400
|
+
end
|
401
|
+
end
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
405
|
+
if !options[:output_file].nil?
|
406
|
+
File.open(options[:output_file], 'w') do |file|
|
407
|
+
ontology.profiles.each do |id, terms|
|
408
|
+
file.puts([id, terms.join("|")].join("\t"))
|
409
|
+
end
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
if options[:statistics]
|
414
|
+
get_stats(ontology.profile_stats).each do |stat|
|
415
|
+
puts stat.join("\t")
|
416
|
+
end
|
417
|
+
end
|
418
|
+
|
419
|
+
if options[:list_term_attributes]
|
420
|
+
term_attributes = ontology.list_term_attributes
|
421
|
+
term_attributes.each do |t_attr|
|
422
|
+
t_attr[0] = t_attr[0].to_s
|
423
|
+
puts t_attr.join("\t")
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
if !options[:keyword].nil?
|
428
|
+
xref_translated = []
|
429
|
+
ontology.calc_dictionary(:xref, select_regex: /(#{options[:keyword]})/, store_tag: :tag, multiterm: true, substitute_alternatives: false)
|
430
|
+
dict = ontology.dicts[:tag][options[:xref_sense]]
|
431
|
+
data.each do |id, prof|
|
432
|
+
xrefs = []
|
433
|
+
prof.each do |t|
|
434
|
+
query = dict[t.to_s]
|
435
|
+
xrefs.concat(query) if !query.nil?
|
436
|
+
end
|
437
|
+
xref_translated << [id, xrefs] if !xrefs.empty?
|
438
|
+
end
|
439
|
+
File.open(options[:output_file], 'w') do |f|
|
440
|
+
xref_translated.each do |id, prof|
|
441
|
+
prof.each do |t|
|
442
|
+
f.puts [id, t].join("\t")
|
443
|
+
end
|
444
|
+
end
|
445
|
+
end
|
446
|
+
end
|
data/bin/strsimnet.rb
CHANGED
@@ -111,12 +111,11 @@ texts2compare = load_table_file(input_file = options[:input_file],
|
|
111
111
|
targetCol = options[:cindex],
|
112
112
|
filterCol = options[:findex],
|
113
113
|
filterValue = options[:filter_value])
|
114
|
-
|
115
114
|
# Verbose point
|
116
115
|
puts "Calculating similitude for (" + texts2compare.length.to_s + ") elements"
|
117
116
|
|
118
117
|
# Obtain all Vs all
|
119
|
-
similitudes_AllVsAll = similitude_network(texts2compare,options[:rm_char])
|
118
|
+
similitudes_AllVsAll = similitude_network(texts2compare, charsToRemove: options[:rm_char])
|
120
119
|
|
121
120
|
# Verbose point
|
122
121
|
puts "Writing output file ..."
|