semtools 0.1.2 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8fc29918a31045893647355dd72264a04386c5171c48ea868f7e9bbc93062151
4
- data.tar.gz: 692ce02343cb00ac37bbc34476da08386bedf0eaca7946689eb62c9a1f06d555
3
+ metadata.gz: a3f63cc6548a9938e31121d2018d1c1c477987007c5d253b5fa814a285bdb576
4
+ data.tar.gz: e1911d3157c3046590ca13bc86215d2260b4a8b2b1b25affa5c2673881036795
5
5
  SHA512:
6
- metadata.gz: 1b52667c81a0a25786b91156e9ed88a8de47e86fd18baddffc43b05ff199f95129b09da4e03025b6fb709d18a0274e22bf4a55c81471fda748e75aadca4d6ef1
7
- data.tar.gz: 46e5b49f611c021ee8576a522a0a6ef22a8b9ed349084dadb9e44fd76c712c05221e6314985f08bdba575ac2dd849f1f14d84d5ae686889f33fac993132a8372
6
+ metadata.gz: 30c95df80957a4a35b6fea05b9552352f529d8e45c10f6b128924a3ce2ee5d90e92a1e9d5fe0016d25538147e12d3a9199c81222642c94cdd0eb3c89eea168ef
7
+ data.tar.gz: ddc9e600fd984e68d060b7be05adf27b3f20bb67e638d42acc4b9b156eedabfce20d6f588a03d1fbc2948fedbd80d498f1767c0e3f8ea03720fa0ca327b95f3c
data/Gemfile CHANGED
@@ -5,5 +5,8 @@ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
5
5
  # Specify your gem's dependencies in semtools.gemspec
6
6
  gemspec
7
7
 
8
- gem "rake", "~> 12.0"
8
+ gem "rake", "~> 13.0"
9
9
  gem "minitest", "~> 5.0"
10
+
11
+ expcalc_dev_path = File.expand_path('~/dev_gems/expcalc')
12
+ gem "expcalc", github: "seoanezonjic/expcalc", branch: "master" if Dir.exist?(expcalc_dev_path)
data/bin/onto2json.rb CHANGED
@@ -18,14 +18,20 @@ OptionParser.new do |opts|
18
18
  opts.banner = "Usage: #{__FILE__} [options]"
19
19
 
20
20
  options[:input_file] = nil
21
- opts.on("-i", "--input_file PATH", "Input file with ontology in OBO format") do |data|
21
+ opts.on("-i", "--input_file FILE", "Input file with ontology in OBO format") do |data|
22
22
  options[:input_file] = data
23
23
  end
24
24
 
25
25
  options[:output_file] = nil
26
- opts.on("-o", "--output_file PATH", "Output path") do |data|
26
+ opts.on("-o", "--output_file FILE", "Output path") do |data|
27
27
  options[:output_file] = data
28
28
  end
29
+
30
+ options[:build] = false
31
+ opts.on("-b", "--build", "Activate build mode (calculate dictionaries)") do
32
+ options[:build] = true
33
+ end
34
+
29
35
 
30
36
  opts.on_tail("-h", "--help", "Show this message") do
31
37
  puts opts
@@ -39,7 +45,7 @@ end.parse!
39
45
  # MAIN
40
46
  ##########################
41
47
  puts "Loading ontology ..."
42
- onto = Ontology.new(file: options[:input_file], load_file: true)
48
+ onto = Ontology.new(file: options[:input_file], load_file: true, build: options[:build])
43
49
  puts "Exporting ontology to JSON ..."
44
50
  onto.write(options[:output_file])
45
51
  puts "Ontology exported"
data/bin/semtools.rb ADDED
@@ -0,0 +1,446 @@
1
+ #! /usr/bin/env ruby
2
+ ROOT_PATH = File.dirname(__FILE__)
3
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
4
+ EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
5
+
6
+ require 'optparse'
7
+ require 'down'
8
+ require 'semtools'
9
+
10
+ ######################################################################################
11
+ ## METHODS
12
+ ######################################################################################
13
+ def load_tabular_file(file)
14
+ records = []
15
+ File.open(file).each do |line|
16
+ line.chomp!
17
+ fields = line.split("\t")
18
+ records << fields
19
+ end
20
+ return records
21
+ end
22
+
23
+ def store_profiles(file, ontology)
24
+ file.each do |id, terms|
25
+ ontology.add_profile(id, terms)
26
+ end
27
+ end
28
+
29
+ def load_value(hash_to_load, key, value, unique = true)
30
+ query = hash_to_load[key]
31
+ if query.nil?
32
+ value = [value] if value.class != Array
33
+ hash_to_load[key] = value
34
+ else
35
+ if value.class == Array
36
+ query.concat(value)
37
+ else
38
+ query << value
39
+ end
40
+ query.uniq! unless unique == nil
41
+ end
42
+ end
43
+
44
+ def translate(ontology, type, options, profiles = nil)
45
+ not_translated = {}
46
+ if type == 'names'
47
+ ontology.profiles.each do |id, terms|
48
+ translation, untranslated = ontology.translate_ids(terms)
49
+ ontology.profiles[id] = translation
50
+ not_translated[id] = untranslated unless untranslated.empty?
51
+ end
52
+ elsif type == 'codes'
53
+ profiles.each do |id,terms|
54
+ translation, untranslated = ontology.translate_names(terms)
55
+ profiles[id] = translation
56
+ profiles[id] = profiles[id].join("#{options[:separator]}")
57
+ not_translated[id] = untranslated unless untranslated.empty?
58
+ end
59
+ end
60
+ if !not_translated.empty?
61
+ File.open(options[:untranslated_path], 'w') do |file|
62
+ not_translated.each do |id, terms|
63
+ file.puts([id, terms.join(";")].join("\t"))
64
+ end
65
+ end
66
+ end
67
+ end
68
+
69
+ def clean_profile(profile, ontology, options)
70
+ cleaned_profile = ontology.clean_profile_hard(profile)
71
+ unless options[:term_filter].nil?
72
+ cleaned_profile.select! {|term| ontology.get_ancestors(term).include?(options[:term_filter])}
73
+ end
74
+ return cleaned_profile
75
+ end
76
+
77
+ def clean_profiles(profiles, ontology, options)
78
+ removed_profiles = []
79
+ profiles.each do |id, terms|
80
+ cleaned_profile = clean_profile(terms, ontology, options)
81
+ profiles[id] = cleaned_profile
82
+ removed_profiles << id if cleaned_profile.empty?
83
+ end
84
+ removed_profiles.each{|rp| profiles.delete(rp)}
85
+ return removed_profiles
86
+ end
87
+
88
+ def expand_profiles(profiles, ontology, unwanted_terms = [])
89
+ profiles.each do |disease_id, terms|
90
+ terms.each do |term|
91
+ profiles[disease_id] << ontology.get_ancestors(term).difference(unwanted_terms)
92
+ end
93
+ end
94
+ end
95
+
96
+ def write_similarity_profile_list(input, onto_obj, similarity_type)
97
+ similarity_file = File.basename(input, ".*")+'_semantic_similarity_list'
98
+ File.open(similarity_file, 'w') do |file|
99
+ onto_obj.profiles.each do |profile_query_key, profile_query_value|
100
+ onto_obj.profiles.each do |profile_search_key, profile_search_value|
101
+ file.puts([profile_query_key, profile_search_key, onto_obj.compare(profile_query_value, profile_search_value, sim_type: similarity_type)].join("\t"))
102
+ end
103
+ end
104
+ end
105
+ end
106
+
107
+ def download(source, key, output)
108
+ source_list = load_tabular_file(source).to_h
109
+ external_data = File.dirname(source)
110
+ if key == 'list'
111
+ Dir.glob(File.join(external_data,'*.obo')){|f| puts f}
112
+ else
113
+ url = source_list[key]
114
+ if !output.nil?
115
+ output_path = output
116
+ else
117
+ file_name = key + '.obo'
118
+ if File.writable?(external_data)
119
+ output_path = File.join(external_data, file_name)
120
+ else
121
+ output_path = file_name
122
+ end
123
+ end
124
+ if !url.nil?
125
+ Down::NetHttp.download(url, destination: output_path, max_redirects: 5)
126
+ File.chmod(0644, output_path) # Correct file permissions set by down gem
127
+ end
128
+ end
129
+ end
130
+
131
+ def get_ontology_file(path, source)
132
+ if !File.exists?(path)
133
+ ont_index = load_tabular_file(source).to_h
134
+ if !ont_index[path].nil?
135
+ path = File.join(File.dirname(source), path + '.obo')
136
+ else
137
+ abort("Input ontology file not exists")
138
+ end
139
+ end
140
+ return path
141
+ end
142
+
143
+ def get_stats(stats)
144
+ report_stats = []
145
+ report_stats << ['Elements', stats[:count]]
146
+ report_stats << ['Elements Non Zero', stats[:countNonZero]]
147
+ report_stats << ['Non Zero Density', stats[:countNonZero].fdiv(stats[:count])]
148
+ report_stats << ['Max', stats[:max]]
149
+ report_stats << ['Min', stats[:min]]
150
+ report_stats << ['Average', stats[:average]]
151
+ report_stats << ['Variance', stats[:variance]]
152
+ report_stats << ['Standard Deviation', stats[:standardDeviation]]
153
+ report_stats << ['Q1', stats[:q1]]
154
+ report_stats << ['Median', stats[:median]]
155
+ report_stats << ['Q3', stats[:q3]]
156
+ return report_stats
157
+ end
158
+
159
+
160
+
161
+
162
+
163
+
164
+ ####################################################################################
165
+ ## OPTPARSE
166
+ ####################################################################################
167
+ options = {}
168
+ OptionParser.new do |opts|
169
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
170
+
171
+ options[:download] = nil
172
+ opts.on("-d", "--download STRING", "Download obo file from official resource. MONDO, GO and HPO are possible values.") do |item|
173
+ options[:download] = item
174
+ end
175
+
176
+ options[:input_file] = nil
177
+ opts.on("-i", "--input_file PATH", "Filepath of profile data") do |item|
178
+ options[:input_file] = item
179
+ end
180
+
181
+ options[:output_file] = nil
182
+ opts.on("-o", "--output_file PATH", "Output filepath") do |item|
183
+ options[:output_file] = item
184
+ end
185
+
186
+ options[:IC] = false
187
+ opts.on("-I", "--IC", "Get IC") do
188
+ options[:IC] = true
189
+ end
190
+
191
+ options[:ontology_file] = nil
192
+ opts.on("-O PATH", "--ontology_file PATH", "Path to ontology file") do |item|
193
+ options[:ontology_file] = item
194
+ end
195
+
196
+ options[:term_filter] = nil
197
+ opts.on("-T STRING", "--term_filter STRING", "If specified, only terms that are descendants of the specified term will be kept on a profile when cleaned") do |item|
198
+ options[:term_filter] = item.to_sym
199
+ end
200
+
201
+ options[:translate] = nil
202
+ opts.on("-t STRING", "--translate STRING", "Translate to 'names' or to 'codes'") do |item|
203
+ options[:translate] = item
204
+ end
205
+
206
+ opts.on("-s method", "--similarity method", "Calculate similarity between profile IDs computed by 'resnik', 'lin' or 'jiang_conrath' methods. ") do |sim_method|
207
+ options[:similarity] = sim_method.to_sym
208
+ end
209
+
210
+ options[:clean_profiles] = false
211
+ opts.on("-c", "--clean_profiles", "Removes ancestors, descendants and obsolete terms from profiles") do
212
+ options[:clean_profiles] = true
213
+ end
214
+
215
+ options[:removed_path] = 'rejected_profs'
216
+ opts.on("-r PATH", "--removed_path PATH", "Desired path to write removed profiles file") do |item|
217
+ options[:removed_path] = item
218
+ end
219
+
220
+ options[:untranslated_path] = nil
221
+ opts.on("-u PATH", "--untranslated_path PATH", "Desired path to write untranslated terms file") do |item|
222
+ options[:untranslated_path] = item
223
+ end
224
+
225
+ options[:keyword] = nil
226
+ opts.on("-k STRING", "--keyword STRING", "regex used to get xref terms in the ontology file") do |item|
227
+ options[:keyword] = item
228
+ end
229
+
230
+ options[:xref_sense] = :byValue
231
+ opts.on("--xref_sense ", "Ontology-xref or xref-ontology. By default xref-ontology if set, ontology-xref") do
232
+ options[:xref_sense] = :byTerm
233
+ end
234
+
235
+ options[:expand_profiles] = false
236
+ opts.on("-e", "--expand_profiles", "Expand profiles adding ancestors") do
237
+ options[:expand_profiles] = true
238
+ end
239
+
240
+ options[:unwanted_terms] = []
241
+ opts.on("-U", "--unwanted_terms STRING", "Comma separated terms not wanted to be included in profile expansion") do |item|
242
+ options[:unwanted_terms] = item
243
+ end
244
+
245
+ options[:separator] = ";"
246
+ opts.on("-S STRING", "--separator STRING", "Separator used for the terms profile") do |sep|
247
+ options[:separator] = sep
248
+ end
249
+
250
+ options[:childs] = [[], '']
251
+ opts.on("-C STRING", "--childs STRING", "Term code list (comma separated) to generate child list") do |item|
252
+ if item.include?('/')
253
+ modifiers, terms = item.split('/')
254
+ else
255
+ modifiers = ''
256
+ terms = item
257
+ end
258
+ terms = terms.split(',').map{|t| t.to_sym}
259
+ options[:childs] = [terms, modifiers]
260
+ end
261
+
262
+ options[:statistics] = false
263
+ opts.on("-n", "--statistics", "To obtain main statistical descriptors of the profiles file") do
264
+ options[:statistics] = true
265
+ end
266
+
267
+ options[:list_translate] = nil
268
+ opts.on("-l STRING", "--list_translate STRING", "Translate to 'names' or to 'codes' input list") do |sep|
269
+ options[:list_translate] = sep
270
+ end
271
+
272
+ options[:subject_column] = 0
273
+ opts.on("-f NUM", "--subject_column NUM", "The number of the column for the subject id") do |ncol|
274
+ options[:subject_column] = ncol.to_i
275
+ end
276
+
277
+ options[:annotations_column] = 1
278
+ opts.on("-a NUM", "--annotations_column NUM", "The number of the column for the annotation ids") do |ncol|
279
+ options[:annotations_column] = ncol.to_i
280
+ end
281
+
282
+
283
+ options[:list_term_attributes] = false
284
+ opts.on("--list_term_attributes", "The number of the column for the annotation ids") do
285
+ options[:list_term_attributes] = true
286
+ end
287
+
288
+ end.parse!
289
+
290
+ ####################################################################################
291
+ ## MAIN
292
+ ####################################################################################
293
+ ont_index_file = File.join(EXTERNAL_DATA, 'ontologies.txt')
294
+ if !options[:download].nil?
295
+ download(ont_index_file, options[:download], options[:output_file])
296
+ Process.exit
297
+ end
298
+
299
+ if !options[:ontology_file].nil?
300
+ options[:ontology_file] = get_ontology_file(options[:ontology_file], ont_index_file)
301
+ end
302
+ ontology = Ontology.new(file: options[:ontology_file], load_file: true)
303
+
304
+ if !options[:input_file].nil?
305
+ data = load_tabular_file(options[:input_file])
306
+ if options[:list_translate].nil? || !options[:keyword].nil?
307
+ data.map!{|row|
308
+ [row[options[:subject_column]],
309
+ row[options[:annotations_column]].split(options[:separator]).map!{|term| term.to_sym}]
310
+ }
311
+ store_profiles(data, ontology) if options[:translate] != 'codes' && options[:keyword].nil?
312
+ end
313
+ end
314
+
315
+ if !options[:list_translate].nil?
316
+ data.each do |term|
317
+ if options[:list_translate] == 'names'
318
+ translation, untranslated = ontology.translate_ids(term)
319
+ elsif options[:list_translate] == 'codes'
320
+ translation, untranslated = ontology.translate_names(term)
321
+ end
322
+ puts "#{term.first}\t#{translation.empty? ? '-' : translation.first}"
323
+ end
324
+ Process.exit
325
+ end
326
+
327
+ if options[:translate] == 'codes'
328
+ profiles = {}
329
+ data.each do |id, terms|
330
+ load_value(profiles, id, terms)
331
+ profiles[id] = terms.split(options[:separator])
332
+ end
333
+ translate(ontology, 'codes', options, profiles)
334
+ store_profiles(profiles, ontology)
335
+ end
336
+
337
+ if options[:clean_profiles]
338
+ removed_profiles = clean_profiles(ontology.profiles, ontology, options)
339
+ if !removed_profiles.nil? && !removed_profiles.empty?
340
+ File.open(options[:removed_path], 'w') do |f|
341
+ removed_profiles.each do |profile|
342
+ f.puts profile
343
+ end
344
+ end
345
+ end
346
+ end
347
+
348
+ if options[:expand_profiles]
349
+ expanded_profiles = expand_profiles(ontology.profiles, ontology, options[:unwanted_terms])
350
+ end
351
+
352
+ if !options[:similarity].nil?
353
+ write_similarity_profile_list(input = options[:input_file], onto_obj=ontology, similarity_type = options[:similarity])
354
+ end
355
+
356
+
357
+ if options[:IC]
358
+ ontology.add_observed_terms_from_profiles
359
+ by_ontology, by_freq = ontology.get_profiles_resnik_dual_ICs
360
+ ic_file = File.basename(options[:input_file], ".*")+'_IC_onto_freq'
361
+ File.open(ic_file , 'w') do |file|
362
+ ontology.profiles.keys.each do |id|
363
+ file.puts([id, by_ontology[id], by_freq[id]].join("\t"))
364
+ end
365
+ end
366
+ end
367
+
368
+ if options[:translate] == 'names'
369
+ translate(ontology, 'names', options)
370
+ end
371
+
372
+ if !options[:childs].first.empty?
373
+ terms, modifiers = options[:childs]
374
+ all_childs = []
375
+ terms.each do |term|
376
+ childs = ontology.get_descendants(term)
377
+ all_childs = all_childs | childs
378
+ end
379
+ if modifiers.include?('r')
380
+ relations = []
381
+ all_childs = all_childs | terms # Add parents that generated child list
382
+ all_childs.each do |term|
383
+ descendants = ontology.get_direct_descendants(term)
384
+ if !descendants.nil?
385
+ descendants.each do |desc|
386
+ relations << [term, desc]
387
+ end
388
+ end
389
+ end
390
+ relations.each do |rel|
391
+ rel, _ = ontology.translate_ids(rel) if modifiers.include?('n')
392
+ puts rel.join("\t")
393
+ end
394
+ else
395
+ all_childs.each do |c|
396
+ if modifiers.include?('n')
397
+ puts ontology.translate_id(c)
398
+ else
399
+ puts c
400
+ end
401
+ end
402
+ end
403
+ end
404
+
405
+ if !options[:output_file].nil?
406
+ File.open(options[:output_file], 'w') do |file|
407
+ ontology.profiles.each do |id, terms|
408
+ file.puts([id, terms.join("|")].join("\t"))
409
+ end
410
+ end
411
+ end
412
+
413
+ if options[:statistics]
414
+ get_stats(ontology.profile_stats).each do |stat|
415
+ puts stat.join("\t")
416
+ end
417
+ end
418
+
419
+ if options[:list_term_attributes]
420
+ term_attributes = ontology.list_term_attributes
421
+ term_attributes.each do |t_attr|
422
+ t_attr[0] = t_attr[0].to_s
423
+ puts t_attr.join("\t")
424
+ end
425
+ end
426
+
427
+ if !options[:keyword].nil?
428
+ xref_translated = []
429
+ ontology.calc_dictionary(:xref, select_regex: /(#{options[:keyword]})/, store_tag: :tag, multiterm: true, substitute_alternatives: false)
430
+ dict = ontology.dicts[:tag][options[:xref_sense]]
431
+ data.each do |id, prof|
432
+ xrefs = []
433
+ prof.each do |t|
434
+ query = dict[t.to_s]
435
+ xrefs.concat(query) if !query.nil?
436
+ end
437
+ xref_translated << [id, xrefs] if !xrefs.empty?
438
+ end
439
+ File.open(options[:output_file], 'w') do |f|
440
+ xref_translated.each do |id, prof|
441
+ prof.each do |t|
442
+ f.puts [id, t].join("\t")
443
+ end
444
+ end
445
+ end
446
+ end
data/bin/strsimnet.rb CHANGED
@@ -111,12 +111,11 @@ texts2compare = load_table_file(input_file = options[:input_file],
111
111
  targetCol = options[:cindex],
112
112
  filterCol = options[:findex],
113
113
  filterValue = options[:filter_value])
114
-
115
114
  # Verbose point
116
115
  puts "Calculating similitude for (" + texts2compare.length.to_s + ") elements"
117
116
 
118
117
  # Obtain all Vs all
119
- similitudes_AllVsAll = similitude_network(texts2compare,options[:rm_char])
118
+ similitudes_AllVsAll = similitude_network(texts2compare, charsToRemove: options[:rm_char])
120
119
 
121
120
  # Verbose point
122
121
  puts "Writing output file ..."
@@ -0,0 +1,4 @@
1
+ GO http://purl.obolibrary.org/obo/go/go-basic.obo
2
+ HPO http://purl.obolibrary.org/obo/hp.obo
3
+ MONDO http://purl.obolibrary.org/obo/mondo.obo
4
+ EFO http://www.ebi.ac.uk/efo/efo.obo