semtools 0.1.2 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8fc29918a31045893647355dd72264a04386c5171c48ea868f7e9bbc93062151
4
- data.tar.gz: 692ce02343cb00ac37bbc34476da08386bedf0eaca7946689eb62c9a1f06d555
3
+ metadata.gz: a3f63cc6548a9938e31121d2018d1c1c477987007c5d253b5fa814a285bdb576
4
+ data.tar.gz: e1911d3157c3046590ca13bc86215d2260b4a8b2b1b25affa5c2673881036795
5
5
  SHA512:
6
- metadata.gz: 1b52667c81a0a25786b91156e9ed88a8de47e86fd18baddffc43b05ff199f95129b09da4e03025b6fb709d18a0274e22bf4a55c81471fda748e75aadca4d6ef1
7
- data.tar.gz: 46e5b49f611c021ee8576a522a0a6ef22a8b9ed349084dadb9e44fd76c712c05221e6314985f08bdba575ac2dd849f1f14d84d5ae686889f33fac993132a8372
6
+ metadata.gz: 30c95df80957a4a35b6fea05b9552352f529d8e45c10f6b128924a3ce2ee5d90e92a1e9d5fe0016d25538147e12d3a9199c81222642c94cdd0eb3c89eea168ef
7
+ data.tar.gz: ddc9e600fd984e68d060b7be05adf27b3f20bb67e638d42acc4b9b156eedabfce20d6f588a03d1fbc2948fedbd80d498f1767c0e3f8ea03720fa0ca327b95f3c
data/Gemfile CHANGED
@@ -5,5 +5,8 @@ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
5
5
  # Specify your gem's dependencies in semtools.gemspec
6
6
  gemspec
7
7
 
8
- gem "rake", "~> 12.0"
8
+ gem "rake", "~> 13.0"
9
9
  gem "minitest", "~> 5.0"
10
+
11
+ expcalc_dev_path = File.expand_path('~/dev_gems/expcalc')
12
+ gem "expcalc", github: "seoanezonjic/expcalc", branch: "master" if Dir.exist?(expcalc_dev_path)
data/bin/onto2json.rb CHANGED
@@ -18,14 +18,20 @@ OptionParser.new do |opts|
18
18
  opts.banner = "Usage: #{__FILE__} [options]"
19
19
 
20
20
  options[:input_file] = nil
21
- opts.on("-i", "--input_file PATH", "Input file with ontology in OBO format") do |data|
21
+ opts.on("-i", "--input_file FILE", "Input file with ontology in OBO format") do |data|
22
22
  options[:input_file] = data
23
23
  end
24
24
 
25
25
  options[:output_file] = nil
26
- opts.on("-o", "--output_file PATH", "Output path") do |data|
26
+ opts.on("-o", "--output_file FILE", "Output path") do |data|
27
27
  options[:output_file] = data
28
28
  end
29
+
30
+ options[:build] = false
31
+ opts.on("-b", "--build", "Activate build mode (calculate dictionaries)") do
32
+ options[:build] = true
33
+ end
34
+
29
35
 
30
36
  opts.on_tail("-h", "--help", "Show this message") do
31
37
  puts opts
@@ -39,7 +45,7 @@ end.parse!
39
45
  # MAIN
40
46
  ##########################
41
47
  puts "Loading ontology ..."
42
- onto = Ontology.new(file: options[:input_file], load_file: true)
48
+ onto = Ontology.new(file: options[:input_file], load_file: true, build: options[:build])
43
49
  puts "Exporting ontology to JSON ..."
44
50
  onto.write(options[:output_file])
45
51
  puts "Ontology exported"
data/bin/semtools.rb ADDED
@@ -0,0 +1,446 @@
1
+ #! /usr/bin/env ruby
2
+ ROOT_PATH = File.dirname(__FILE__)
3
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
4
+ EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
5
+
6
+ require 'optparse'
7
+ require 'down'
8
+ require 'semtools'
9
+
10
+ ######################################################################################
11
+ ## METHODS
12
+ ######################################################################################
13
+ def load_tabular_file(file)
14
+ records = []
15
+ File.open(file).each do |line|
16
+ line.chomp!
17
+ fields = line.split("\t")
18
+ records << fields
19
+ end
20
+ return records
21
+ end
22
+
23
+ def store_profiles(file, ontology)
24
+ file.each do |id, terms|
25
+ ontology.add_profile(id, terms)
26
+ end
27
+ end
28
+
29
+ def load_value(hash_to_load, key, value, unique = true)
30
+ query = hash_to_load[key]
31
+ if query.nil?
32
+ value = [value] if value.class != Array
33
+ hash_to_load[key] = value
34
+ else
35
+ if value.class == Array
36
+ query.concat(value)
37
+ else
38
+ query << value
39
+ end
40
+ query.uniq! unless unique == nil
41
+ end
42
+ end
43
+
44
+ def translate(ontology, type, options, profiles = nil)
45
+ not_translated = {}
46
+ if type == 'names'
47
+ ontology.profiles.each do |id, terms|
48
+ translation, untranslated = ontology.translate_ids(terms)
49
+ ontology.profiles[id] = translation
50
+ not_translated[id] = untranslated unless untranslated.empty?
51
+ end
52
+ elsif type == 'codes'
53
+ profiles.each do |id,terms|
54
+ translation, untranslated = ontology.translate_names(terms)
55
+ profiles[id] = translation
56
+ profiles[id] = profiles[id].join("#{options[:separator]}")
57
+ not_translated[id] = untranslated unless untranslated.empty?
58
+ end
59
+ end
60
+ if !not_translated.empty?
61
+ File.open(options[:untranslated_path], 'w') do |file|
62
+ not_translated.each do |id, terms|
63
+ file.puts([id, terms.join(";")].join("\t"))
64
+ end
65
+ end
66
+ end
67
+ end
68
+
69
+ def clean_profile(profile, ontology, options)
70
+ cleaned_profile = ontology.clean_profile_hard(profile)
71
+ unless options[:term_filter].nil?
72
+ cleaned_profile.select! {|term| ontology.get_ancestors(term).include?(options[:term_filter])}
73
+ end
74
+ return cleaned_profile
75
+ end
76
+
77
+ def clean_profiles(profiles, ontology, options)
78
+ removed_profiles = []
79
+ profiles.each do |id, terms|
80
+ cleaned_profile = clean_profile(terms, ontology, options)
81
+ profiles[id] = cleaned_profile
82
+ removed_profiles << id if cleaned_profile.empty?
83
+ end
84
+ removed_profiles.each{|rp| profiles.delete(rp)}
85
+ return removed_profiles
86
+ end
87
+
88
+ def expand_profiles(profiles, ontology, unwanted_terms = [])
89
+ profiles.each do |disease_id, terms|
90
+ terms.each do |term|
91
+ profiles[disease_id] << ontology.get_ancestors(term).difference(unwanted_terms)
92
+ end
93
+ end
94
+ end
95
+
96
+ def write_similarity_profile_list(input, onto_obj, similarity_type)
97
+ similarity_file = File.basename(input, ".*")+'_semantic_similarity_list'
98
+ File.open(similarity_file, 'w') do |file|
99
+ onto_obj.profiles.each do |profile_query_key, profile_query_value|
100
+ onto_obj.profiles.each do |profile_search_key, profile_search_value|
101
+ file.puts([profile_query_key, profile_search_key, onto_obj.compare(profile_query_value, profile_search_value, sim_type: similarity_type)].join("\t"))
102
+ end
103
+ end
104
+ end
105
+ end
106
+
107
+ def download(source, key, output)
108
+ source_list = load_tabular_file(source).to_h
109
+ external_data = File.dirname(source)
110
+ if key == 'list'
111
+ Dir.glob(File.join(external_data,'*.obo')){|f| puts f}
112
+ else
113
+ url = source_list[key]
114
+ if !output.nil?
115
+ output_path = output
116
+ else
117
+ file_name = key + '.obo'
118
+ if File.writable?(external_data)
119
+ output_path = File.join(external_data, file_name)
120
+ else
121
+ output_path = file_name
122
+ end
123
+ end
124
+ if !url.nil?
125
+ Down::NetHttp.download(url, destination: output_path, max_redirects: 5)
126
+ File.chmod(0644, output_path) # Correct file permissions set by down gem
127
+ end
128
+ end
129
+ end
130
+
131
+ def get_ontology_file(path, source)
132
+ if !File.exists?(path)
133
+ ont_index = load_tabular_file(source).to_h
134
+ if !ont_index[path].nil?
135
+ path = File.join(File.dirname(source), path + '.obo')
136
+ else
137
+ abort("Input ontology file not exists")
138
+ end
139
+ end
140
+ return path
141
+ end
142
+
143
+ def get_stats(stats)
144
+ report_stats = []
145
+ report_stats << ['Elements', stats[:count]]
146
+ report_stats << ['Elements Non Zero', stats[:countNonZero]]
147
+ report_stats << ['Non Zero Density', stats[:countNonZero].fdiv(stats[:count])]
148
+ report_stats << ['Max', stats[:max]]
149
+ report_stats << ['Min', stats[:min]]
150
+ report_stats << ['Average', stats[:average]]
151
+ report_stats << ['Variance', stats[:variance]]
152
+ report_stats << ['Standard Deviation', stats[:standardDeviation]]
153
+ report_stats << ['Q1', stats[:q1]]
154
+ report_stats << ['Median', stats[:median]]
155
+ report_stats << ['Q3', stats[:q3]]
156
+ return report_stats
157
+ end
158
+
159
+
160
+
161
+
162
+
163
+
164
+ ####################################################################################
165
+ ## OPTPARSE
166
+ ####################################################################################
167
+ options = {}
168
+ OptionParser.new do |opts|
169
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
170
+
171
+ options[:download] = nil
172
+ opts.on("-d", "--download STRING", "Download obo file from official resource. MONDO, GO and HPO are possible values.") do |item|
173
+ options[:download] = item
174
+ end
175
+
176
+ options[:input_file] = nil
177
+ opts.on("-i", "--input_file PATH", "Filepath of profile data") do |item|
178
+ options[:input_file] = item
179
+ end
180
+
181
+ options[:output_file] = nil
182
+ opts.on("-o", "--output_file PATH", "Output filepath") do |item|
183
+ options[:output_file] = item
184
+ end
185
+
186
+ options[:IC] = false
187
+ opts.on("-I", "--IC", "Get IC") do
188
+ options[:IC] = true
189
+ end
190
+
191
+ options[:ontology_file] = nil
192
+ opts.on("-O PATH", "--ontology_file PATH", "Path to ontology file") do |item|
193
+ options[:ontology_file] = item
194
+ end
195
+
196
+ options[:term_filter] = nil
197
+ opts.on("-T STRING", "--term_filter STRING", "If specified, only terms that are descendants of the specified term will be kept on a profile when cleaned") do |item|
198
+ options[:term_filter] = item.to_sym
199
+ end
200
+
201
+ options[:translate] = nil
202
+ opts.on("-t STRING", "--translate STRING", "Translate to 'names' or to 'codes'") do |item|
203
+ options[:translate] = item
204
+ end
205
+
206
+ opts.on("-s method", "--similarity method", "Calculate similarity between profile IDs computed by 'resnik', 'lin' or 'jiang_conrath' methods. ") do |sim_method|
207
+ options[:similarity] = sim_method.to_sym
208
+ end
209
+
210
+ options[:clean_profiles] = false
211
+ opts.on("-c", "--clean_profiles", "Removes ancestors, descendants and obsolete terms from profiles") do
212
+ options[:clean_profiles] = true
213
+ end
214
+
215
+ options[:removed_path] = 'rejected_profs'
216
+ opts.on("-r PATH", "--removed_path PATH", "Desired path to write removed profiles file") do |item|
217
+ options[:removed_path] = item
218
+ end
219
+
220
+ options[:untranslated_path] = nil
221
+ opts.on("-u PATH", "--untranslated_path PATH", "Desired path to write untranslated terms file") do |item|
222
+ options[:untranslated_path] = item
223
+ end
224
+
225
+ options[:keyword] = nil
226
+ opts.on("-k STRING", "--keyword STRING", "regex used to get xref terms in the ontology file") do |item|
227
+ options[:keyword] = item
228
+ end
229
+
230
+ options[:xref_sense] = :byValue
231
+ opts.on("--xref_sense ", "Ontology-xref or xref-ontology. By default xref-ontology if set, ontology-xref") do
232
+ options[:xref_sense] = :byTerm
233
+ end
234
+
235
+ options[:expand_profiles] = false
236
+ opts.on("-e", "--expand_profiles", "Expand profiles adding ancestors") do
237
+ options[:expand_profiles] = true
238
+ end
239
+
240
+ options[:unwanted_terms] = []
241
+ opts.on("-U", "--unwanted_terms STRING", "Comma separated terms not wanted to be included in profile expansion") do |item|
242
+ options[:unwanted_terms] = item
243
+ end
244
+
245
+ options[:separator] = ";"
246
+ opts.on("-S STRING", "--separator STRING", "Separator used for the terms profile") do |sep|
247
+ options[:separator] = sep
248
+ end
249
+
250
+ options[:childs] = [[], '']
251
+ opts.on("-C STRING", "--childs STRING", "Term code list (comma separated) to generate child list") do |item|
252
+ if item.include?('/')
253
+ modifiers, terms = item.split('/')
254
+ else
255
+ modifiers = ''
256
+ terms = item
257
+ end
258
+ terms = terms.split(',').map{|t| t.to_sym}
259
+ options[:childs] = [terms, modifiers]
260
+ end
261
+
262
+ options[:statistics] = false
263
+ opts.on("-n", "--statistics", "To obtain main statistical descriptors of the profiles file") do
264
+ options[:statistics] = true
265
+ end
266
+
267
+ options[:list_translate] = nil
268
+ opts.on("-l STRING", "--list_translate STRING", "Translate to 'names' or to 'codes' input list") do |sep|
269
+ options[:list_translate] = sep
270
+ end
271
+
272
+ options[:subject_column] = 0
273
+ opts.on("-f NUM", "--subject_column NUM", "The number of the column for the subject id") do |ncol|
274
+ options[:subject_column] = ncol.to_i
275
+ end
276
+
277
+ options[:annotations_column] = 1
278
+ opts.on("-a NUM", "--annotations_column NUM", "The number of the column for the annotation ids") do |ncol|
279
+ options[:annotations_column] = ncol.to_i
280
+ end
281
+
282
+
283
+ options[:list_term_attributes] = false
284
+ opts.on("--list_term_attributes", "The number of the column for the annotation ids") do
285
+ options[:list_term_attributes] = true
286
+ end
287
+
288
+ end.parse!
289
+
290
+ ####################################################################################
291
+ ## MAIN
292
+ ####################################################################################
293
+ ont_index_file = File.join(EXTERNAL_DATA, 'ontologies.txt')
294
+ if !options[:download].nil?
295
+ download(ont_index_file, options[:download], options[:output_file])
296
+ Process.exit
297
+ end
298
+
299
+ if !options[:ontology_file].nil?
300
+ options[:ontology_file] = get_ontology_file(options[:ontology_file], ont_index_file)
301
+ end
302
+ ontology = Ontology.new(file: options[:ontology_file], load_file: true)
303
+
304
+ if !options[:input_file].nil?
305
+ data = load_tabular_file(options[:input_file])
306
+ if options[:list_translate].nil? || !options[:keyword].nil?
307
+ data.map!{|row|
308
+ [row[options[:subject_column]],
309
+ row[options[:annotations_column]].split(options[:separator]).map!{|term| term.to_sym}]
310
+ }
311
+ store_profiles(data, ontology) if options[:translate] != 'codes' && options[:keyword].nil?
312
+ end
313
+ end
314
+
315
+ if !options[:list_translate].nil?
316
+ data.each do |term|
317
+ if options[:list_translate] == 'names'
318
+ translation, untranslated = ontology.translate_ids(term)
319
+ elsif options[:list_translate] == 'codes'
320
+ translation, untranslated = ontology.translate_names(term)
321
+ end
322
+ puts "#{term.first}\t#{translation.empty? ? '-' : translation.first}"
323
+ end
324
+ Process.exit
325
+ end
326
+
327
+ if options[:translate] == 'codes'
328
+ profiles = {}
329
+ data.each do |id, terms|
330
+ load_value(profiles, id, terms)
331
+ profiles[id] = terms.split(options[:separator])
332
+ end
333
+ translate(ontology, 'codes', options, profiles)
334
+ store_profiles(profiles, ontology)
335
+ end
336
+
337
+ if options[:clean_profiles]
338
+ removed_profiles = clean_profiles(ontology.profiles, ontology, options)
339
+ if !removed_profiles.nil? && !removed_profiles.empty?
340
+ File.open(options[:removed_path], 'w') do |f|
341
+ removed_profiles.each do |profile|
342
+ f.puts profile
343
+ end
344
+ end
345
+ end
346
+ end
347
+
348
+ if options[:expand_profiles]
349
+ expanded_profiles = expand_profiles(ontology.profiles, ontology, options[:unwanted_terms])
350
+ end
351
+
352
+ if !options[:similarity].nil?
353
+ write_similarity_profile_list(input = options[:input_file], onto_obj=ontology, similarity_type = options[:similarity])
354
+ end
355
+
356
+
357
+ if options[:IC]
358
+ ontology.add_observed_terms_from_profiles
359
+ by_ontology, by_freq = ontology.get_profiles_resnik_dual_ICs
360
+ ic_file = File.basename(options[:input_file], ".*")+'_IC_onto_freq'
361
+ File.open(ic_file , 'w') do |file|
362
+ ontology.profiles.keys.each do |id|
363
+ file.puts([id, by_ontology[id], by_freq[id]].join("\t"))
364
+ end
365
+ end
366
+ end
367
+
368
+ if options[:translate] == 'names'
369
+ translate(ontology, 'names', options)
370
+ end
371
+
372
+ if !options[:childs].first.empty?
373
+ terms, modifiers = options[:childs]
374
+ all_childs = []
375
+ terms.each do |term|
376
+ childs = ontology.get_descendants(term)
377
+ all_childs = all_childs | childs
378
+ end
379
+ if modifiers.include?('r')
380
+ relations = []
381
+ all_childs = all_childs | terms # Add parents that generated child list
382
+ all_childs.each do |term|
383
+ descendants = ontology.get_direct_descendants(term)
384
+ if !descendants.nil?
385
+ descendants.each do |desc|
386
+ relations << [term, desc]
387
+ end
388
+ end
389
+ end
390
+ relations.each do |rel|
391
+ rel, _ = ontology.translate_ids(rel) if modifiers.include?('n')
392
+ puts rel.join("\t")
393
+ end
394
+ else
395
+ all_childs.each do |c|
396
+ if modifiers.include?('n')
397
+ puts ontology.translate_id(c)
398
+ else
399
+ puts c
400
+ end
401
+ end
402
+ end
403
+ end
404
+
405
+ if !options[:output_file].nil?
406
+ File.open(options[:output_file], 'w') do |file|
407
+ ontology.profiles.each do |id, terms|
408
+ file.puts([id, terms.join("|")].join("\t"))
409
+ end
410
+ end
411
+ end
412
+
413
+ if options[:statistics]
414
+ get_stats(ontology.profile_stats).each do |stat|
415
+ puts stat.join("\t")
416
+ end
417
+ end
418
+
419
+ if options[:list_term_attributes]
420
+ term_attributes = ontology.list_term_attributes
421
+ term_attributes.each do |t_attr|
422
+ t_attr[0] = t_attr[0].to_s
423
+ puts t_attr.join("\t")
424
+ end
425
+ end
426
+
427
+ if !options[:keyword].nil?
428
+ xref_translated = []
429
+ ontology.calc_dictionary(:xref, select_regex: /(#{options[:keyword]})/, store_tag: :tag, multiterm: true, substitute_alternatives: false)
430
+ dict = ontology.dicts[:tag][options[:xref_sense]]
431
+ data.each do |id, prof|
432
+ xrefs = []
433
+ prof.each do |t|
434
+ query = dict[t.to_s]
435
+ xrefs.concat(query) if !query.nil?
436
+ end
437
+ xref_translated << [id, xrefs] if !xrefs.empty?
438
+ end
439
+ File.open(options[:output_file], 'w') do |f|
440
+ xref_translated.each do |id, prof|
441
+ prof.each do |t|
442
+ f.puts [id, t].join("\t")
443
+ end
444
+ end
445
+ end
446
+ end
data/bin/strsimnet.rb CHANGED
@@ -111,12 +111,11 @@ texts2compare = load_table_file(input_file = options[:input_file],
111
111
  targetCol = options[:cindex],
112
112
  filterCol = options[:findex],
113
113
  filterValue = options[:filter_value])
114
-
115
114
  # Verbose point
116
115
  puts "Calculating similitude for (" + texts2compare.length.to_s + ") elements"
117
116
 
118
117
  # Obtain all Vs all
119
- similitudes_AllVsAll = similitude_network(texts2compare,options[:rm_char])
118
+ similitudes_AllVsAll = similitude_network(texts2compare, charsToRemove: options[:rm_char])
120
119
 
121
120
  # Verbose point
122
121
  puts "Writing output file ..."
@@ -0,0 +1,4 @@
1
+ GO http://purl.obolibrary.org/obo/go/go-basic.obo
2
+ HPO http://purl.obolibrary.org/obo/hp.obo
3
+ MONDO http://purl.obolibrary.org/obo/mondo.obo
4
+ EFO http://www.ebi.ac.uk/efo/efo.obo