semtools 0.1.6 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 83746b4834f16f9bffc404c578be30e427bf90c7f43ba1f55bd04d94a29186c5
4
- data.tar.gz: 25f0bd67c733d4289f2e790bc857886a184dd95677beaf378da9074957660e69
3
+ metadata.gz: 9f1ccf05983df627128b8144f90199835732a40c1f6889eb859872d49dd60be9
4
+ data.tar.gz: 6596a7056d3a8ed1f8f873ef2485b2a319947826354388b26d3828158f31aab3
5
5
  SHA512:
6
- metadata.gz: 602fd8d61f9e9f34c2de957dd4d311a510413eea2bb0d1794b4d2da6f4e8959d3919dd8e9ea4a0d9f5d4e962443b7f50675e075d385830aeb9ef08ecb38d3fe2
7
- data.tar.gz: 5b9f66a1fef9c3296e5fe203330e3575e4fecf0f363fc8b884abd56e8fbcb1bd5dac3563792a4a5da908bcee409e2d36596751123aa2012d5cc9a8a5ff3c7796
6
+ metadata.gz: 9a233d99c5800170c3fab9f223d84c0bbaf69680517fc9129370602a68f846c59a8062709e7c46d236f0030059653ab11ecabb9f8bf0db022b7cd4d1e185988e
7
+ data.tar.gz: 2a5e0f2e26fad97a938cac362ad5e1beeef466d88a502ae2f13e841ebe2480159f4233c0366642cb096270506d17bd68beaa4057a96ed4c42f85bbe2aa4c7fbf
data/Gemfile CHANGED
@@ -5,5 +5,8 @@ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
5
5
  # Specify your gem's dependencies in semtools.gemspec
6
6
  gemspec
7
7
 
8
- gem "rake", "~> 12.0"
8
+ gem "rake", "~> 13.0"
9
9
  gem "minitest", "~> 5.0"
10
+
11
+ expcalc_dev_path = File.expand_path('~/dev_gems/expcalc')
12
+ gem "expcalc", github: "seoanezonjic/expcalc", branch: "master" if Dir.exist?(expcalc_dev_path)
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Semtools
2
2
 
3
+ DEPRECATED PROJECT. MIGRATED TO [python semtools](https://github.com/seoanezonjic/py_semtools)
4
+
3
5
  Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/semtools`. To experiment with that code, run `bin/console` for an interactive prompt.
4
6
 
5
7
  TODO: Delete this and the text above, and describe your gem
data/bin/semtools.rb ADDED
@@ -0,0 +1,521 @@
1
+ #! /usr/bin/env ruby
2
+ ROOT_PATH = File.dirname(__FILE__)
3
+ $LOAD_PATH.unshift(File.expand_path(File.join(ROOT_PATH, '..', 'lib')))
4
+ EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
5
+
6
+ require 'optparse'
7
+ require 'down'
8
+ require 'semtools'
9
+
10
+ ######################################################################################
11
+ ## METHODS
12
+ ######################################################################################
13
+ def load_tabular_file(file)
14
+ records = []
15
+ File.open(file).each do |line|
16
+ line.chomp!
17
+ fields = line.split("\t")
18
+ records << fields
19
+ end
20
+ return records
21
+ end
22
+
23
+ def format_tabular_data(data, separator, id_col, terms_col)
24
+ data.map!{|row|
25
+ [row[id_col],
26
+ row[terms_col].split(separator).map!{|term| term.to_sym}]
27
+ }
28
+ end
29
+
30
+ def store_profiles(file, ontology)
31
+ file.each do |id, terms|
32
+ ontology.add_profile(id, terms)
33
+ end
34
+ end
35
+
36
+ def load_value(hash_to_load, key, value, unique = true)
37
+ query = hash_to_load[key]
38
+ if query.nil?
39
+ value = [value] if value.class != Array
40
+ hash_to_load[key] = value
41
+ else
42
+ if value.class == Array
43
+ query.concat(value)
44
+ else
45
+ query << value
46
+ end
47
+ query.uniq! unless unique == nil
48
+ end
49
+ end
50
+
51
+ def translate(ontology, type, options, profiles = nil)
52
+ not_translated = {}
53
+ if type == 'names'
54
+ ontology.profiles.each do |id, terms|
55
+ translation, untranslated = ontology.translate_ids(terms)
56
+ ontology.profiles[id] = translation
57
+ not_translated[id] = untranslated unless untranslated.empty?
58
+ end
59
+ elsif type == 'codes'
60
+ profiles.each do |id,terms|
61
+ translation, untranslated = ontology.translate_names(terms)
62
+ profiles[id] = translation
63
+ profiles[id] = profiles[id].join("#{options[:separator]}")
64
+ not_translated[id] = untranslated unless untranslated.empty?
65
+ end
66
+ end
67
+ if !not_translated.empty?
68
+ File.open(options[:untranslated_path], 'w') do |file|
69
+ not_translated.each do |id, terms|
70
+ file.puts([id, terms.join(";")].join("\t"))
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ def clean_profile(profile, ontology, options)
77
+ cleaned_profile = ontology.clean_profile_hard(profile)
78
+ unless options[:term_filter].nil?
79
+ cleaned_profile.select! {|term| ontology.get_ancestors(term).include?(options[:term_filter])}
80
+ end
81
+ return cleaned_profile
82
+ end
83
+
84
+ def clean_profiles(profiles, ontology, options)
85
+ removed_profiles = []
86
+ profiles.each do |id, terms|
87
+ cleaned_profile = clean_profile(terms, ontology, options)
88
+ profiles[id] = cleaned_profile
89
+ removed_profiles << id if cleaned_profile.empty?
90
+ end
91
+ removed_profiles.each{|rp| profiles.delete(rp)}
92
+ return removed_profiles
93
+ end
94
+
95
+ def write_similarity_profile_list(output, onto_obj, similarity_type, refs)
96
+ profiles_similarity = onto_obj.compare_profiles(sim_type: similarity_type, external_profiles: refs)
97
+ File.open(output, 'w') do |f|
98
+ profiles_similarity.each do |pairsA, pairsB_and_values|
99
+ pairsB_and_values.each do |pairsB, values|
100
+ f.puts "#{pairsA}\t#{pairsB}\t#{values}"
101
+ end
102
+ end
103
+ end
104
+ end
105
+
106
+ def download(source, key, output)
107
+ source_list = load_tabular_file(source).to_h
108
+ external_data = File.dirname(source)
109
+ if key == 'list'
110
+ Dir.glob(File.join(external_data,'*.obo')){|f| puts f}
111
+ else
112
+ url = source_list[key]
113
+ if !output.nil?
114
+ output_path = output
115
+ else
116
+ file_name = key + '.obo'
117
+ if File.writable?(external_data)
118
+ output_path = File.join(external_data, file_name)
119
+ else
120
+ output_path = file_name
121
+ end
122
+ end
123
+ if !url.nil?
124
+ Down::NetHttp.download(url, destination: output_path, max_redirects: 5)
125
+ File.chmod(0644, output_path) # Correct file permissions set by down gem
126
+ end
127
+ end
128
+ end
129
+
130
+ def get_ontology_file(path, source)
131
+ if !File.exists?(path)
132
+ ont_index = load_tabular_file(source).to_h
133
+ if !ont_index[path].nil?
134
+ path = File.join(File.dirname(source), path + '.obo')
135
+ else
136
+ abort("Input ontology file not exists")
137
+ end
138
+ end
139
+ return path
140
+ end
141
+
142
+ def get_stats(stats)
143
+ report_stats = []
144
+ report_stats << ['Elements', stats[:count]]
145
+ report_stats << ['Elements Non Zero', stats[:countNonZero]]
146
+ report_stats << ['Non Zero Density', stats[:countNonZero].fdiv(stats[:count])]
147
+ report_stats << ['Max', stats[:max]]
148
+ report_stats << ['Min', stats[:min]]
149
+ report_stats << ['Average', stats[:average]]
150
+ report_stats << ['Variance', stats[:variance]]
151
+ report_stats << ['Standard Deviation', stats[:standardDeviation]]
152
+ report_stats << ['Q1', stats[:q1]]
153
+ report_stats << ['Median', stats[:median]]
154
+ report_stats << ['Q3', stats[:q3]]
155
+ return report_stats
156
+ end
157
+
158
+ def sort_terms_by_levels(terms, modifiers, ontology, all_childs)
159
+ term_levels = ontology.get_terms_levels(all_childs)
160
+ if modifiers.include?('a')
161
+ term_levels.sort!{|t1,t2| t2[1] <=> t1[1]}
162
+ else
163
+ term_levels.sort!{|t1,t2| t1[1] <=> t2[1]}
164
+ end
165
+ all_childs = term_levels.map{|t| t.first}
166
+ return all_childs, term_levels
167
+ end
168
+
169
+ def get_childs(ontology, terms, modifiers)
170
+ #modifiers
171
+ # - a: get ancestors instead of decendants
172
+ # - r: get parent-child relations instead of list descendants/ancestors
173
+ # - hN: when list of relations, it is limited to N hops from given term
174
+ # - n: give terms names instead of term codes
175
+ all_childs = []
176
+ terms.each do |term|
177
+ if modifiers.include?('a')
178
+ childs = ontology.get_ancestors(term)
179
+ else
180
+ childs = ontology.get_descendants(term)
181
+ end
182
+ all_childs = all_childs | childs
183
+ end
184
+ if modifiers.include?('r')
185
+ relations = []
186
+ all_childs = all_childs | terms # Add parents that generated child list
187
+ target_hops = nil
188
+ if /h([0-9]+)/ =~ modifiers
189
+ target_hops = $1.to_i + 1 # take into account refernce term (parent/child) addition
190
+ all_childs, term_levels = sort_terms_by_levels(terms, modifiers, ontology, all_childs)
191
+ end
192
+
193
+ current_level = nil
194
+ hops = 0
195
+ all_childs.each_with_index do |term, i|
196
+ if !target_hops.nil?
197
+ level = term_levels[i][1]
198
+ if level != current_level
199
+ current_level = level
200
+ hops +=1
201
+ break if hops == target_hops + 1 # +1 take into account that we have detected a level change and we saved the last one entirely
202
+ end
203
+ end
204
+ if modifiers.include?('a')
205
+ descendants = ontology.get_direct_ancentors(term)
206
+ else
207
+ descendants = ontology.get_direct_descendants(term)
208
+ end
209
+ if !descendants.nil?
210
+ descendants.each do |desc|
211
+ if modifiers.include?('a')
212
+ relations << [desc, term]
213
+ else
214
+ relations << [term, desc]
215
+ end
216
+ end
217
+ end
218
+ end
219
+ all_childs = []
220
+ relations.each do |rel|
221
+ rel, _ = ontology.translate_ids(rel) if modifiers.include?('n')
222
+ all_childs << rel
223
+ end
224
+ else
225
+ all_childs.map!{|c| ontology.translate_id(c)} if modifiers.include?('n')
226
+ end
227
+ return all_childs
228
+ end
229
+
230
+
231
+
232
+ ####################################################################################
233
+ ## OPTPARSE
234
+ ####################################################################################
235
+ options = {}
236
+ OptionParser.new do |opts|
237
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
238
+
239
+ options[:download] = nil
240
+ opts.on("-d", "--download STRING", "Download obo file from official resource. MONDO, GO and HPO are possible values.") do |item|
241
+ options[:download] = item
242
+ end
243
+
244
+ options[:input_file] = nil
245
+ opts.on("-i", "--input_file PATH", "Filepath of profile data") do |item|
246
+ options[:input_file] = item
247
+ end
248
+
249
+ options[:output_file] = nil
250
+ opts.on("-o", "--output_file PATH", "Output filepath") do |item|
251
+ options[:output_file] = item
252
+ end
253
+
254
+ options[:IC] = nil
255
+ opts.on("-I", "--IC STRING", "Get IC. 'prof' for stored profiles or 'ont' for terms in ontology") do |item|
256
+ options[:IC] = item
257
+ end
258
+
259
+ options[:ontology_file] = nil
260
+ opts.on("-O PATH", "--ontology_file PATH", "Path to ontology file") do |item|
261
+ options[:ontology_file] = item
262
+ end
263
+
264
+ options[:term_filter] = nil
265
+ opts.on("-T STRING", "--term_filter STRING", "If specified, only terms that are descendants of the specified term will be kept on a profile when cleaned") do |item|
266
+ options[:term_filter] = item.to_sym
267
+ end
268
+
269
+ options[:translate] = nil
270
+ opts.on("-t STRING", "--translate STRING", "Translate to 'names' or to 'codes'") do |item|
271
+ options[:translate] = item
272
+ end
273
+
274
+ opts.on("-s method", "--similarity method", "Calculate similarity between profile IDs computed by 'resnik', 'lin' or 'jiang_conrath' methods. ") do |sim_method|
275
+ options[:similarity] = sim_method.to_sym
276
+ end
277
+
278
+ options[:reference_profiles] = nil
279
+ opts.on("--reference_profiles PATH", "Path to file tabulated file with first column as id profile and second column with ontology terms separated by separator. ") do |opt|
280
+ options[:reference_profiles] = opt
281
+ end
282
+
283
+ options[:clean_profiles] = false
284
+ opts.on("-c", "--clean_profiles", "Removes ancestors, descendants and obsolete terms from profiles") do
285
+ options[:clean_profiles] = true
286
+ end
287
+
288
+ options[:removed_path] = 'rejected_profs'
289
+ opts.on("-r PATH", "--removed_path PATH", "Desired path to write removed profiles file") do |item|
290
+ options[:removed_path] = item
291
+ end
292
+
293
+ options[:untranslated_path] = nil
294
+ opts.on("-u PATH", "--untranslated_path PATH", "Desired path to write untranslated terms file") do |item|
295
+ options[:untranslated_path] = item
296
+ end
297
+
298
+ options[:keyword] = nil
299
+ opts.on("-k STRING", "--keyword STRING", "regex used to get xref terms in the ontology file") do |item|
300
+ options[:keyword] = item
301
+ end
302
+
303
+ options[:xref_sense] = :byValue
304
+ opts.on("--xref_sense", "Ontology-xref or xref-ontology. By default xref-ontology if set, ontology-xref") do
305
+ options[:xref_sense] = :byTerm
306
+ end
307
+
308
+ options[:expand_profiles] = nil
309
+ opts.on("-e", "--expand_profiles STRING", "Expand profiles adding ancestors if 'parental', adding new profiles if 'propagate'") do |meth|
310
+ options[:expand_profiles] = meth
311
+ end
312
+
313
+ options[:unwanted_terms] = []
314
+ opts.on("-U", "--unwanted_terms STRING", "Comma separated terms not wanted to be included in profile expansion") do |item|
315
+ options[:unwanted_terms] = item
316
+ end
317
+
318
+ options[:separator] = ";"
319
+ opts.on("-S STRING", "--separator STRING", "Separator used for the terms profile") do |sep|
320
+ options[:separator] = sep
321
+ end
322
+
323
+ options[:childs] = [[], '']
324
+ opts.on("-C STRING", "--childs STRING", "Term code list (comma separated) to generate child list") do |item|
325
+ if item.include?('/')
326
+ modifiers, terms = item.split('/')
327
+ else
328
+ modifiers = ''
329
+ terms = item
330
+ end
331
+ terms = terms.split(',').map{|t| t.to_sym}
332
+ options[:childs] = [terms, modifiers]
333
+ end
334
+
335
+ options[:statistics] = false
336
+ opts.on("-n", "--statistics", "To obtain main statistical descriptors of the profiles file") do
337
+ options[:statistics] = true
338
+ end
339
+
340
+ options[:list_translate] = nil
341
+ opts.on("-l STRING", "--list_translate STRING", "Translate to 'names' or to 'codes' input list") do |sep|
342
+ options[:list_translate] = sep
343
+ end
344
+
345
+ options[:subject_column] = 0
346
+ opts.on("-f NUM", "--subject_column INTEGER", "The number of the column for the subject id") do |ncol|
347
+ options[:subject_column] = ncol.to_i
348
+ end
349
+
350
+ options[:annotations_column] = 1
351
+ opts.on("-a NUM", "--annotations_column INTEGER", "The number of the column for the annotation ids") do |item|
352
+ options[:annotations_column] = item.to_i
353
+ end
354
+
355
+ options[:root] = nil
356
+ opts.on("-R STRING", "--root STRING", "Term id to be considered the new root of the ontology") do |item|
357
+ options[:root] = item.to_sym
358
+ end
359
+
360
+ options[:list_term_attributes] = false
361
+ opts.on("--list_term_attributes", "The number of the column for the annotation ids") do
362
+ options[:list_term_attributes] = true
363
+ end
364
+
365
+ end.parse!
366
+
367
+ ####################################################################################
368
+ ## MAIN
369
+ ####################################################################################
370
+ ont_index_file = File.join(EXTERNAL_DATA, 'ontologies.txt')
371
+ if !options[:download].nil?
372
+ download(ont_index_file, options[:download], options[:output_file])
373
+ Process.exit
374
+ end
375
+
376
+ if !options[:ontology_file].nil?
377
+ options[:ontology_file] = get_ontology_file(options[:ontology_file], ont_index_file)
378
+ end
379
+
380
+ extra_dicts = []
381
+ extra_dicts << [:xref, {select_regex: /(#{options[:keyword]})/, store_tag: :tag, multiterm: true}] if !options[:keyword].nil?
382
+ ontology = Ontology.new(file: options[:ontology_file], load_file: true, extra_dicts: extra_dicts)
383
+
384
+ Ontology.mutate(options[:root], ontology, clone: false) if !options[:root].nil? # TODO fix method and convert in class method
385
+
386
+ if !options[:input_file].nil?
387
+ data = load_tabular_file(options[:input_file])
388
+ if options[:list_translate].nil? || !options[:keyword].nil?
389
+ format_tabular_data(data, options[:separator], options[:subject_column], options[:annotations_column])
390
+ store_profiles(data, ontology) if options[:translate] != 'codes' && options[:keyword].nil?
391
+ end
392
+ end
393
+
394
+ if !options[:list_translate].nil?
395
+ data.each do |term|
396
+ if options[:list_translate] == 'names'
397
+ translation, untranslated = ontology.translate_ids(term)
398
+ elsif options[:list_translate] == 'codes'
399
+ translation, untranslated = ontology.translate_names(term)
400
+ end
401
+ puts "#{term.first}\t#{translation.empty? ? '-' : translation.first}"
402
+ end
403
+ Process.exit
404
+ end
405
+
406
+ if options[:translate] == 'codes'
407
+ profiles = {}
408
+ data.each do |id, terms|
409
+ load_value(profiles, id, terms)
410
+ profiles[id] = terms.split(options[:separator])
411
+ end
412
+ translate(ontology, 'codes', options, profiles)
413
+ store_profiles(profiles, ontology)
414
+ end
415
+
416
+ if options[:clean_profiles]
417
+ removed_profiles = clean_profiles(ontology.profiles, ontology, options)
418
+ if !removed_profiles.nil? && !removed_profiles.empty?
419
+ File.open(options[:removed_path], 'w') do |f|
420
+ removed_profiles.each do |profile|
421
+ f.puts profile
422
+ end
423
+ end
424
+ end
425
+ end
426
+
427
+ if !options[:expand_profiles].nil?
428
+ ontology.expand_profiles(options[:expand_profiles], unwanted_terms: options[:unwanted_terms])
429
+ end
430
+
431
+ if !options[:similarity].nil?
432
+ refs = nil
433
+ if !options[:reference_profiles].nil?
434
+ refs = load_tabular_file(options[:reference_profiles])
435
+ format_tabular_data(refs, options[:separator], 0, 1)
436
+ refs = refs.to_h
437
+ refs = clean_profiles(ontology.profiles, ontology, options) if options[:clean_profiles]
438
+ abort('Reference profiles are empty after cleaning ') if refs.nil? || refs.empty?
439
+ end
440
+ write_similarity_profile_list(options[:output_file], ontology, options[:similarity], refs)
441
+ end
442
+
443
+
444
+ if options[:IC] == 'prof'
445
+ ontology.add_observed_terms_from_profiles
446
+ by_ontology, by_freq = ontology.get_profiles_resnik_dual_ICs
447
+ ic_file = File.basename(options[:input_file], ".*")+'_IC_onto_freq'
448
+ File.open(ic_file , 'w') do |file|
449
+ ontology.profiles.keys.each do |id|
450
+ file.puts([id, by_ontology[id], by_freq[id]].join("\t"))
451
+ end
452
+ end
453
+ elsif options[:IC] == 'ont'
454
+ File.open('ont_IC' , 'w') do |file|
455
+ ontology.each do |term|
456
+ file.puts "#{term}\t#{ontology.get_IC(term)}"
457
+ end
458
+ end
459
+ end
460
+
461
+ if options[:translate] == 'names'
462
+ translate(ontology, 'names', options)
463
+ end
464
+
465
+ if !options[:childs].first.empty?
466
+ terms, modifiers = options[:childs]
467
+ all_childs = get_childs(ontology, terms, modifiers)
468
+ all_childs.each do |ac|
469
+ if modifiers.include?('r')
470
+ puts ac.join("\t")
471
+ else
472
+ puts ac
473
+ end
474
+ end
475
+ end
476
+
477
+ if !options[:output_file].nil? && options[:similarity].nil?
478
+ File.open(options[:output_file], 'w') do |file|
479
+ ontology.profiles.each do |id, terms|
480
+ file.puts([id, terms.join("|")].join("\t"))
481
+ end
482
+ end
483
+ end
484
+
485
+ if options[:statistics]
486
+ get_stats(ontology.profile_stats).each do |stat|
487
+ puts stat.join("\t")
488
+ end
489
+ end
490
+
491
+ if options[:list_term_attributes]
492
+ term_attributes = ontology.list_term_attributes
493
+ term_attributes.each do |t_attr|
494
+ t_attr[0] = t_attr[0].to_s
495
+ puts t_attr.join("\t")
496
+ end
497
+ end
498
+
499
+ if !options[:keyword].nil?
500
+ xref_translated = []
501
+ dict = ontology.dicts[:tag][options[:xref_sense]]
502
+ data.each do |id, prof|
503
+ xrefs = []
504
+ prof.each do |t|
505
+ if options[:xref_sense] == :byValue
506
+ query = dict[t.to_s]
507
+ else
508
+ query = dict[t]
509
+ end
510
+ xrefs.concat(query) if !query.nil?
511
+ end
512
+ xref_translated << [id, xrefs] if !xrefs.empty?
513
+ end
514
+ File.open(options[:output_file], 'w') do |f|
515
+ xref_translated.each do |id, prof|
516
+ prof.each do |t|
517
+ f.puts [id, t].join("\t")
518
+ end
519
+ end
520
+ end
521
+ end
data/bin/strsimnet.rb CHANGED
@@ -111,12 +111,11 @@ texts2compare = load_table_file(input_file = options[:input_file],
111
111
  targetCol = options[:cindex],
112
112
  filterCol = options[:findex],
113
113
  filterValue = options[:filter_value])
114
-
115
114
  # Verbose point
116
115
  puts "Calculating similitude for (" + texts2compare.length.to_s + ") elements"
117
116
 
118
117
  # Obtain all Vs all
119
- similitudes_AllVsAll = similitude_network(texts2compare,options[:rm_char])
118
+ similitudes_AllVsAll = similitude_network(texts2compare, charsToRemove: options[:rm_char])
120
119
 
121
120
  # Verbose point
122
121
  puts "Writing output file ..."
@@ -0,0 +1,4 @@
1
+ GO http://purl.obolibrary.org/obo/go/go-basic.obo
2
+ HPO http://purl.obolibrary.org/obo/hp.obo
3
+ MONDO http://purl.obolibrary.org/obo/mondo.obo
4
+ EFO http://www.ebi.ac.uk/efo/efo.obo