semtools 0.1.6 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -1
- data/README.md +2 -0
- data/bin/semtools.rb +521 -0
- data/bin/strsimnet.rb +1 -2
- data/external_data/ontologies.txt +4 -0
- data/lib/semtools/ontology.rb +1241 -2002
- data/lib/semtools/parsers/file_parser.rb +32 -0
- data/lib/semtools/parsers/json_parser.rb +84 -0
- data/lib/semtools/parsers/oboparser.rb +511 -0
- data/lib/semtools/sim_handler.rb +1 -1
- data/lib/semtools/version.rb +1 -1
- data/lib/semtools.rb +3 -1
- data/semtools.gemspec +3 -1
- metadata +40 -6
- data/lib/semtools/math_methods.rb +0 -148
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9f1ccf05983df627128b8144f90199835732a40c1f6889eb859872d49dd60be9
|
4
|
+
data.tar.gz: 6596a7056d3a8ed1f8f873ef2485b2a319947826354388b26d3828158f31aab3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9a233d99c5800170c3fab9f223d84c0bbaf69680517fc9129370602a68f846c59a8062709e7c46d236f0030059653ab11ecabb9f8bf0db022b7cd4d1e185988e
|
7
|
+
data.tar.gz: 2a5e0f2e26fad97a938cac362ad5e1beeef466d88a502ae2f13e841ebe2480159f4233c0366642cb096270506d17bd68beaa4057a96ed4c42f85bbe2aa4c7fbf
|
data/Gemfile
CHANGED
@@ -5,5 +5,8 @@ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
|
|
5
5
|
# Specify your gem's dependencies in semtools.gemspec
|
6
6
|
gemspec
|
7
7
|
|
8
|
-
gem "rake", "~>
|
8
|
+
gem "rake", "~> 13.0"
|
9
9
|
gem "minitest", "~> 5.0"
|
10
|
+
|
11
|
+
expcalc_dev_path = File.expand_path('~/dev_gems/expcalc')
|
12
|
+
gem "expcalc", github: "seoanezonjic/expcalc", branch: "master" if Dir.exist?(expcalc_dev_path)
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Semtools
|
2
2
|
|
3
|
+
DEPRECATED PROJECT. MIGRATED TO [python semtools](https://github.com/seoanezonjic/py_semtools)
|
4
|
+
|
3
5
|
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/semtools`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
6
|
|
5
7
|
TODO: Delete this and the text above, and describe your gem
|
data/bin/semtools.rb
ADDED
@@ -0,0 +1,521 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
ROOT_PATH = File.dirname(__FILE__)
|
3
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
|
4
|
+
EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
require 'down'
|
8
|
+
require 'semtools'
|
9
|
+
|
10
|
+
######################################################################################
|
11
|
+
## METHODS
|
12
|
+
######################################################################################
|
13
|
+
def load_tabular_file(file)
|
14
|
+
records = []
|
15
|
+
File.open(file).each do |line|
|
16
|
+
line.chomp!
|
17
|
+
fields = line.split("\t")
|
18
|
+
records << fields
|
19
|
+
end
|
20
|
+
return records
|
21
|
+
end
|
22
|
+
|
23
|
+
def format_tabular_data(data, separator, id_col, terms_col)
|
24
|
+
data.map!{|row|
|
25
|
+
[row[id_col],
|
26
|
+
row[terms_col].split(separator).map!{|term| term.to_sym}]
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
def store_profiles(file, ontology)
|
31
|
+
file.each do |id, terms|
|
32
|
+
ontology.add_profile(id, terms)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def load_value(hash_to_load, key, value, unique = true)
|
37
|
+
query = hash_to_load[key]
|
38
|
+
if query.nil?
|
39
|
+
value = [value] if value.class != Array
|
40
|
+
hash_to_load[key] = value
|
41
|
+
else
|
42
|
+
if value.class == Array
|
43
|
+
query.concat(value)
|
44
|
+
else
|
45
|
+
query << value
|
46
|
+
end
|
47
|
+
query.uniq! unless unique == nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def translate(ontology, type, options, profiles = nil)
|
52
|
+
not_translated = {}
|
53
|
+
if type == 'names'
|
54
|
+
ontology.profiles.each do |id, terms|
|
55
|
+
translation, untranslated = ontology.translate_ids(terms)
|
56
|
+
ontology.profiles[id] = translation
|
57
|
+
not_translated[id] = untranslated unless untranslated.empty?
|
58
|
+
end
|
59
|
+
elsif type == 'codes'
|
60
|
+
profiles.each do |id,terms|
|
61
|
+
translation, untranslated = ontology.translate_names(terms)
|
62
|
+
profiles[id] = translation
|
63
|
+
profiles[id] = profiles[id].join("#{options[:separator]}")
|
64
|
+
not_translated[id] = untranslated unless untranslated.empty?
|
65
|
+
end
|
66
|
+
end
|
67
|
+
if !not_translated.empty?
|
68
|
+
File.open(options[:untranslated_path], 'w') do |file|
|
69
|
+
not_translated.each do |id, terms|
|
70
|
+
file.puts([id, terms.join(";")].join("\t"))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def clean_profile(profile, ontology, options)
|
77
|
+
cleaned_profile = ontology.clean_profile_hard(profile)
|
78
|
+
unless options[:term_filter].nil?
|
79
|
+
cleaned_profile.select! {|term| ontology.get_ancestors(term).include?(options[:term_filter])}
|
80
|
+
end
|
81
|
+
return cleaned_profile
|
82
|
+
end
|
83
|
+
|
84
|
+
def clean_profiles(profiles, ontology, options)
|
85
|
+
removed_profiles = []
|
86
|
+
profiles.each do |id, terms|
|
87
|
+
cleaned_profile = clean_profile(terms, ontology, options)
|
88
|
+
profiles[id] = cleaned_profile
|
89
|
+
removed_profiles << id if cleaned_profile.empty?
|
90
|
+
end
|
91
|
+
removed_profiles.each{|rp| profiles.delete(rp)}
|
92
|
+
return removed_profiles
|
93
|
+
end
|
94
|
+
|
95
|
+
def write_similarity_profile_list(output, onto_obj, similarity_type, refs)
|
96
|
+
profiles_similarity = onto_obj.compare_profiles(sim_type: similarity_type, external_profiles: refs)
|
97
|
+
File.open(output, 'w') do |f|
|
98
|
+
profiles_similarity.each do |pairsA, pairsB_and_values|
|
99
|
+
pairsB_and_values.each do |pairsB, values|
|
100
|
+
f.puts "#{pairsA}\t#{pairsB}\t#{values}"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def download(source, key, output)
|
107
|
+
source_list = load_tabular_file(source).to_h
|
108
|
+
external_data = File.dirname(source)
|
109
|
+
if key == 'list'
|
110
|
+
Dir.glob(File.join(external_data,'*.obo')){|f| puts f}
|
111
|
+
else
|
112
|
+
url = source_list[key]
|
113
|
+
if !output.nil?
|
114
|
+
output_path = output
|
115
|
+
else
|
116
|
+
file_name = key + '.obo'
|
117
|
+
if File.writable?(external_data)
|
118
|
+
output_path = File.join(external_data, file_name)
|
119
|
+
else
|
120
|
+
output_path = file_name
|
121
|
+
end
|
122
|
+
end
|
123
|
+
if !url.nil?
|
124
|
+
Down::NetHttp.download(url, destination: output_path, max_redirects: 5)
|
125
|
+
File.chmod(0644, output_path) # Correct file permissions set by down gem
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def get_ontology_file(path, source)
|
131
|
+
if !File.exists?(path)
|
132
|
+
ont_index = load_tabular_file(source).to_h
|
133
|
+
if !ont_index[path].nil?
|
134
|
+
path = File.join(File.dirname(source), path + '.obo')
|
135
|
+
else
|
136
|
+
abort("Input ontology file not exists")
|
137
|
+
end
|
138
|
+
end
|
139
|
+
return path
|
140
|
+
end
|
141
|
+
|
142
|
+
def get_stats(stats)
|
143
|
+
report_stats = []
|
144
|
+
report_stats << ['Elements', stats[:count]]
|
145
|
+
report_stats << ['Elements Non Zero', stats[:countNonZero]]
|
146
|
+
report_stats << ['Non Zero Density', stats[:countNonZero].fdiv(stats[:count])]
|
147
|
+
report_stats << ['Max', stats[:max]]
|
148
|
+
report_stats << ['Min', stats[:min]]
|
149
|
+
report_stats << ['Average', stats[:average]]
|
150
|
+
report_stats << ['Variance', stats[:variance]]
|
151
|
+
report_stats << ['Standard Deviation', stats[:standardDeviation]]
|
152
|
+
report_stats << ['Q1', stats[:q1]]
|
153
|
+
report_stats << ['Median', stats[:median]]
|
154
|
+
report_stats << ['Q3', stats[:q3]]
|
155
|
+
return report_stats
|
156
|
+
end
|
157
|
+
|
158
|
+
def sort_terms_by_levels(terms, modifiers, ontology, all_childs)
|
159
|
+
term_levels = ontology.get_terms_levels(all_childs)
|
160
|
+
if modifiers.include?('a')
|
161
|
+
term_levels.sort!{|t1,t2| t2[1] <=> t1[1]}
|
162
|
+
else
|
163
|
+
term_levels.sort!{|t1,t2| t1[1] <=> t2[1]}
|
164
|
+
end
|
165
|
+
all_childs = term_levels.map{|t| t.first}
|
166
|
+
return all_childs, term_levels
|
167
|
+
end
|
168
|
+
|
169
|
+
def get_childs(ontology, terms, modifiers)
|
170
|
+
#modifiers
|
171
|
+
# - a: get ancestors instead of decendants
|
172
|
+
# - r: get parent-child relations instead of list descendants/ancestors
|
173
|
+
# - hN: when list of relations, it is limited to N hops from given term
|
174
|
+
# - n: give terms names instead of term codes
|
175
|
+
all_childs = []
|
176
|
+
terms.each do |term|
|
177
|
+
if modifiers.include?('a')
|
178
|
+
childs = ontology.get_ancestors(term)
|
179
|
+
else
|
180
|
+
childs = ontology.get_descendants(term)
|
181
|
+
end
|
182
|
+
all_childs = all_childs | childs
|
183
|
+
end
|
184
|
+
if modifiers.include?('r')
|
185
|
+
relations = []
|
186
|
+
all_childs = all_childs | terms # Add parents that generated child list
|
187
|
+
target_hops = nil
|
188
|
+
if /h([0-9]+)/ =~ modifiers
|
189
|
+
target_hops = $1.to_i + 1 # take into account refernce term (parent/child) addition
|
190
|
+
all_childs, term_levels = sort_terms_by_levels(terms, modifiers, ontology, all_childs)
|
191
|
+
end
|
192
|
+
|
193
|
+
current_level = nil
|
194
|
+
hops = 0
|
195
|
+
all_childs.each_with_index do |term, i|
|
196
|
+
if !target_hops.nil?
|
197
|
+
level = term_levels[i][1]
|
198
|
+
if level != current_level
|
199
|
+
current_level = level
|
200
|
+
hops +=1
|
201
|
+
break if hops == target_hops + 1 # +1 take into account that we have detected a level change and we saved the last one entirely
|
202
|
+
end
|
203
|
+
end
|
204
|
+
if modifiers.include?('a')
|
205
|
+
descendants = ontology.get_direct_ancentors(term)
|
206
|
+
else
|
207
|
+
descendants = ontology.get_direct_descendants(term)
|
208
|
+
end
|
209
|
+
if !descendants.nil?
|
210
|
+
descendants.each do |desc|
|
211
|
+
if modifiers.include?('a')
|
212
|
+
relations << [desc, term]
|
213
|
+
else
|
214
|
+
relations << [term, desc]
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
all_childs = []
|
220
|
+
relations.each do |rel|
|
221
|
+
rel, _ = ontology.translate_ids(rel) if modifiers.include?('n')
|
222
|
+
all_childs << rel
|
223
|
+
end
|
224
|
+
else
|
225
|
+
all_childs.map!{|c| ontology.translate_id(c)} if modifiers.include?('n')
|
226
|
+
end
|
227
|
+
return all_childs
|
228
|
+
end
|
229
|
+
|
230
|
+
|
231
|
+
|
232
|
+
####################################################################################
|
233
|
+
## OPTPARSE
|
234
|
+
####################################################################################
|
235
|
+
options = {}
|
236
|
+
OptionParser.new do |opts|
|
237
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
238
|
+
|
239
|
+
options[:download] = nil
|
240
|
+
opts.on("-d", "--download STRING", "Download obo file from official resource. MONDO, GO and HPO are possible values.") do |item|
|
241
|
+
options[:download] = item
|
242
|
+
end
|
243
|
+
|
244
|
+
options[:input_file] = nil
|
245
|
+
opts.on("-i", "--input_file PATH", "Filepath of profile data") do |item|
|
246
|
+
options[:input_file] = item
|
247
|
+
end
|
248
|
+
|
249
|
+
options[:output_file] = nil
|
250
|
+
opts.on("-o", "--output_file PATH", "Output filepath") do |item|
|
251
|
+
options[:output_file] = item
|
252
|
+
end
|
253
|
+
|
254
|
+
options[:IC] = nil
|
255
|
+
opts.on("-I", "--IC STRING", "Get IC. 'prof' for stored profiles or 'ont' for terms in ontology") do |item|
|
256
|
+
options[:IC] = item
|
257
|
+
end
|
258
|
+
|
259
|
+
options[:ontology_file] = nil
|
260
|
+
opts.on("-O PATH", "--ontology_file PATH", "Path to ontology file") do |item|
|
261
|
+
options[:ontology_file] = item
|
262
|
+
end
|
263
|
+
|
264
|
+
options[:term_filter] = nil
|
265
|
+
opts.on("-T STRING", "--term_filter STRING", "If specified, only terms that are descendants of the specified term will be kept on a profile when cleaned") do |item|
|
266
|
+
options[:term_filter] = item.to_sym
|
267
|
+
end
|
268
|
+
|
269
|
+
options[:translate] = nil
|
270
|
+
opts.on("-t STRING", "--translate STRING", "Translate to 'names' or to 'codes'") do |item|
|
271
|
+
options[:translate] = item
|
272
|
+
end
|
273
|
+
|
274
|
+
opts.on("-s method", "--similarity method", "Calculate similarity between profile IDs computed by 'resnik', 'lin' or 'jiang_conrath' methods. ") do |sim_method|
|
275
|
+
options[:similarity] = sim_method.to_sym
|
276
|
+
end
|
277
|
+
|
278
|
+
options[:reference_profiles] = nil
|
279
|
+
opts.on("--reference_profiles PATH", "Path to file tabulated file with first column as id profile and second column with ontology terms separated by separator. ") do |opt|
|
280
|
+
options[:reference_profiles] = opt
|
281
|
+
end
|
282
|
+
|
283
|
+
options[:clean_profiles] = false
|
284
|
+
opts.on("-c", "--clean_profiles", "Removes ancestors, descendants and obsolete terms from profiles") do
|
285
|
+
options[:clean_profiles] = true
|
286
|
+
end
|
287
|
+
|
288
|
+
options[:removed_path] = 'rejected_profs'
|
289
|
+
opts.on("-r PATH", "--removed_path PATH", "Desired path to write removed profiles file") do |item|
|
290
|
+
options[:removed_path] = item
|
291
|
+
end
|
292
|
+
|
293
|
+
options[:untranslated_path] = nil
|
294
|
+
opts.on("-u PATH", "--untranslated_path PATH", "Desired path to write untranslated terms file") do |item|
|
295
|
+
options[:untranslated_path] = item
|
296
|
+
end
|
297
|
+
|
298
|
+
options[:keyword] = nil
|
299
|
+
opts.on("-k STRING", "--keyword STRING", "regex used to get xref terms in the ontology file") do |item|
|
300
|
+
options[:keyword] = item
|
301
|
+
end
|
302
|
+
|
303
|
+
options[:xref_sense] = :byValue
|
304
|
+
opts.on("--xref_sense", "Ontology-xref or xref-ontology. By default xref-ontology if set, ontology-xref") do
|
305
|
+
options[:xref_sense] = :byTerm
|
306
|
+
end
|
307
|
+
|
308
|
+
options[:expand_profiles] = nil
|
309
|
+
opts.on("-e", "--expand_profiles STRING", "Expand profiles adding ancestors if 'parental', adding new profiles if 'propagate'") do |meth|
|
310
|
+
options[:expand_profiles] = meth
|
311
|
+
end
|
312
|
+
|
313
|
+
options[:unwanted_terms] = []
|
314
|
+
opts.on("-U", "--unwanted_terms STRING", "Comma separated terms not wanted to be included in profile expansion") do |item|
|
315
|
+
options[:unwanted_terms] = item
|
316
|
+
end
|
317
|
+
|
318
|
+
options[:separator] = ";"
|
319
|
+
opts.on("-S STRING", "--separator STRING", "Separator used for the terms profile") do |sep|
|
320
|
+
options[:separator] = sep
|
321
|
+
end
|
322
|
+
|
323
|
+
options[:childs] = [[], '']
|
324
|
+
opts.on("-C STRING", "--childs STRING", "Term code list (comma separated) to generate child list") do |item|
|
325
|
+
if item.include?('/')
|
326
|
+
modifiers, terms = item.split('/')
|
327
|
+
else
|
328
|
+
modifiers = ''
|
329
|
+
terms = item
|
330
|
+
end
|
331
|
+
terms = terms.split(',').map{|t| t.to_sym}
|
332
|
+
options[:childs] = [terms, modifiers]
|
333
|
+
end
|
334
|
+
|
335
|
+
options[:statistics] = false
|
336
|
+
opts.on("-n", "--statistics", "To obtain main statistical descriptors of the profiles file") do
|
337
|
+
options[:statistics] = true
|
338
|
+
end
|
339
|
+
|
340
|
+
options[:list_translate] = nil
|
341
|
+
opts.on("-l STRING", "--list_translate STRING", "Translate to 'names' or to 'codes' input list") do |sep|
|
342
|
+
options[:list_translate] = sep
|
343
|
+
end
|
344
|
+
|
345
|
+
options[:subject_column] = 0
|
346
|
+
opts.on("-f NUM", "--subject_column INTEGER", "The number of the column for the subject id") do |ncol|
|
347
|
+
options[:subject_column] = ncol.to_i
|
348
|
+
end
|
349
|
+
|
350
|
+
options[:annotations_column] = 1
|
351
|
+
opts.on("-a NUM", "--annotations_column INTEGER", "The number of the column for the annotation ids") do |item|
|
352
|
+
options[:annotations_column] = item.to_i
|
353
|
+
end
|
354
|
+
|
355
|
+
options[:root] = nil
|
356
|
+
opts.on("-R STRING", "--root STRING", "Term id to be considered the new root of the ontology") do |item|
|
357
|
+
options[:root] = item.to_sym
|
358
|
+
end
|
359
|
+
|
360
|
+
options[:list_term_attributes] = false
|
361
|
+
opts.on("--list_term_attributes", "The number of the column for the annotation ids") do
|
362
|
+
options[:list_term_attributes] = true
|
363
|
+
end
|
364
|
+
|
365
|
+
end.parse!
|
366
|
+
|
367
|
+
####################################################################################
|
368
|
+
## MAIN
|
369
|
+
####################################################################################
|
370
|
+
ont_index_file = File.join(EXTERNAL_DATA, 'ontologies.txt')
|
371
|
+
if !options[:download].nil?
|
372
|
+
download(ont_index_file, options[:download], options[:output_file])
|
373
|
+
Process.exit
|
374
|
+
end
|
375
|
+
|
376
|
+
if !options[:ontology_file].nil?
|
377
|
+
options[:ontology_file] = get_ontology_file(options[:ontology_file], ont_index_file)
|
378
|
+
end
|
379
|
+
|
380
|
+
extra_dicts = []
|
381
|
+
extra_dicts << [:xref, {select_regex: /(#{options[:keyword]})/, store_tag: :tag, multiterm: true}] if !options[:keyword].nil?
|
382
|
+
ontology = Ontology.new(file: options[:ontology_file], load_file: true, extra_dicts: extra_dicts)
|
383
|
+
|
384
|
+
Ontology.mutate(options[:root], ontology, clone: false) if !options[:root].nil? # TODO fix method and convert in class method
|
385
|
+
|
386
|
+
if !options[:input_file].nil?
|
387
|
+
data = load_tabular_file(options[:input_file])
|
388
|
+
if options[:list_translate].nil? || !options[:keyword].nil?
|
389
|
+
format_tabular_data(data, options[:separator], options[:subject_column], options[:annotations_column])
|
390
|
+
store_profiles(data, ontology) if options[:translate] != 'codes' && options[:keyword].nil?
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
if !options[:list_translate].nil?
|
395
|
+
data.each do |term|
|
396
|
+
if options[:list_translate] == 'names'
|
397
|
+
translation, untranslated = ontology.translate_ids(term)
|
398
|
+
elsif options[:list_translate] == 'codes'
|
399
|
+
translation, untranslated = ontology.translate_names(term)
|
400
|
+
end
|
401
|
+
puts "#{term.first}\t#{translation.empty? ? '-' : translation.first}"
|
402
|
+
end
|
403
|
+
Process.exit
|
404
|
+
end
|
405
|
+
|
406
|
+
if options[:translate] == 'codes'
|
407
|
+
profiles = {}
|
408
|
+
data.each do |id, terms|
|
409
|
+
load_value(profiles, id, terms)
|
410
|
+
profiles[id] = terms.split(options[:separator])
|
411
|
+
end
|
412
|
+
translate(ontology, 'codes', options, profiles)
|
413
|
+
store_profiles(profiles, ontology)
|
414
|
+
end
|
415
|
+
|
416
|
+
if options[:clean_profiles]
|
417
|
+
removed_profiles = clean_profiles(ontology.profiles, ontology, options)
|
418
|
+
if !removed_profiles.nil? && !removed_profiles.empty?
|
419
|
+
File.open(options[:removed_path], 'w') do |f|
|
420
|
+
removed_profiles.each do |profile|
|
421
|
+
f.puts profile
|
422
|
+
end
|
423
|
+
end
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
if !options[:expand_profiles].nil?
|
428
|
+
ontology.expand_profiles(options[:expand_profiles], unwanted_terms: options[:unwanted_terms])
|
429
|
+
end
|
430
|
+
|
431
|
+
if !options[:similarity].nil?
|
432
|
+
refs = nil
|
433
|
+
if !options[:reference_profiles].nil?
|
434
|
+
refs = load_tabular_file(options[:reference_profiles])
|
435
|
+
format_tabular_data(refs, options[:separator], 0, 1)
|
436
|
+
refs = refs.to_h
|
437
|
+
refs = clean_profiles(ontology.profiles, ontology, options) if options[:clean_profiles]
|
438
|
+
abort('Reference profiles are empty after cleaning ') if refs.nil? || refs.empty?
|
439
|
+
end
|
440
|
+
write_similarity_profile_list(options[:output_file], ontology, options[:similarity], refs)
|
441
|
+
end
|
442
|
+
|
443
|
+
|
444
|
+
if options[:IC] == 'prof'
|
445
|
+
ontology.add_observed_terms_from_profiles
|
446
|
+
by_ontology, by_freq = ontology.get_profiles_resnik_dual_ICs
|
447
|
+
ic_file = File.basename(options[:input_file], ".*")+'_IC_onto_freq'
|
448
|
+
File.open(ic_file , 'w') do |file|
|
449
|
+
ontology.profiles.keys.each do |id|
|
450
|
+
file.puts([id, by_ontology[id], by_freq[id]].join("\t"))
|
451
|
+
end
|
452
|
+
end
|
453
|
+
elsif options[:IC] == 'ont'
|
454
|
+
File.open('ont_IC' , 'w') do |file|
|
455
|
+
ontology.each do |term|
|
456
|
+
file.puts "#{term}\t#{ontology.get_IC(term)}"
|
457
|
+
end
|
458
|
+
end
|
459
|
+
end
|
460
|
+
|
461
|
+
if options[:translate] == 'names'
|
462
|
+
translate(ontology, 'names', options)
|
463
|
+
end
|
464
|
+
|
465
|
+
if !options[:childs].first.empty?
|
466
|
+
terms, modifiers = options[:childs]
|
467
|
+
all_childs = get_childs(ontology, terms, modifiers)
|
468
|
+
all_childs.each do |ac|
|
469
|
+
if modifiers.include?('r')
|
470
|
+
puts ac.join("\t")
|
471
|
+
else
|
472
|
+
puts ac
|
473
|
+
end
|
474
|
+
end
|
475
|
+
end
|
476
|
+
|
477
|
+
if !options[:output_file].nil? && options[:similarity].nil?
|
478
|
+
File.open(options[:output_file], 'w') do |file|
|
479
|
+
ontology.profiles.each do |id, terms|
|
480
|
+
file.puts([id, terms.join("|")].join("\t"))
|
481
|
+
end
|
482
|
+
end
|
483
|
+
end
|
484
|
+
|
485
|
+
if options[:statistics]
|
486
|
+
get_stats(ontology.profile_stats).each do |stat|
|
487
|
+
puts stat.join("\t")
|
488
|
+
end
|
489
|
+
end
|
490
|
+
|
491
|
+
if options[:list_term_attributes]
|
492
|
+
term_attributes = ontology.list_term_attributes
|
493
|
+
term_attributes.each do |t_attr|
|
494
|
+
t_attr[0] = t_attr[0].to_s
|
495
|
+
puts t_attr.join("\t")
|
496
|
+
end
|
497
|
+
end
|
498
|
+
|
499
|
+
if !options[:keyword].nil?
|
500
|
+
xref_translated = []
|
501
|
+
dict = ontology.dicts[:tag][options[:xref_sense]]
|
502
|
+
data.each do |id, prof|
|
503
|
+
xrefs = []
|
504
|
+
prof.each do |t|
|
505
|
+
if options[:xref_sense] == :byValue
|
506
|
+
query = dict[t.to_s]
|
507
|
+
else
|
508
|
+
query = dict[t]
|
509
|
+
end
|
510
|
+
xrefs.concat(query) if !query.nil?
|
511
|
+
end
|
512
|
+
xref_translated << [id, xrefs] if !xrefs.empty?
|
513
|
+
end
|
514
|
+
File.open(options[:output_file], 'w') do |f|
|
515
|
+
xref_translated.each do |id, prof|
|
516
|
+
prof.each do |t|
|
517
|
+
f.puts [id, t].join("\t")
|
518
|
+
end
|
519
|
+
end
|
520
|
+
end
|
521
|
+
end
|
data/bin/strsimnet.rb
CHANGED
@@ -111,12 +111,11 @@ texts2compare = load_table_file(input_file = options[:input_file],
|
|
111
111
|
targetCol = options[:cindex],
|
112
112
|
filterCol = options[:findex],
|
113
113
|
filterValue = options[:filter_value])
|
114
|
-
|
115
114
|
# Verbose point
|
116
115
|
puts "Calculating similitude for (" + texts2compare.length.to_s + ") elements"
|
117
116
|
|
118
117
|
# Obtain all Vs all
|
119
|
-
similitudes_AllVsAll = similitude_network(texts2compare,options[:rm_char])
|
118
|
+
similitudes_AllVsAll = similitude_network(texts2compare, charsToRemove: options[:rm_char])
|
120
119
|
|
121
120
|
# Verbose point
|
122
121
|
puts "Writing output file ..."
|