semtools 0.1.8 → 0.1.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a3f63cc6548a9938e31121d2018d1c1c477987007c5d253b5fa814a285bdb576
4
- data.tar.gz: e1911d3157c3046590ca13bc86215d2260b4a8b2b1b25affa5c2673881036795
3
+ metadata.gz: c1748a4acbeed0c35e072aad9433498fe72cc70af5b2e42b27b246c306cc822b
4
+ data.tar.gz: 8c03e4a77c4daeea26691210f84d7c5924ee71e1199a5fe6fe21c67e9d297acb
5
5
  SHA512:
6
- metadata.gz: 30c95df80957a4a35b6fea05b9552352f529d8e45c10f6b128924a3ce2ee5d90e92a1e9d5fe0016d25538147e12d3a9199c81222642c94cdd0eb3c89eea168ef
7
- data.tar.gz: ddc9e600fd984e68d060b7be05adf27b3f20bb67e638d42acc4b9b156eedabfce20d6f588a03d1fbc2948fedbd80d498f1767c0e3f8ea03720fa0ca327b95f3c
6
+ metadata.gz: 86eb4530e233bb8ef0e863e7486f42550d37e927988a6c7a8815e487e3c11c25b8fea199a5362a7be109ee6cafd6f9323f9906185504f0c47746d0768584adbd
7
+ data.tar.gz: 0a11d9a0194325bd08f53342f250e798a3cce9eec1377148128a3b8df9a48efb88be625c08b4f36944ad729239537f4f343ad6ccd00bca8ba73b1fc10e828e77
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Semtools
2
2
 
3
+ DEPRECATED PROJECT. MIGRATED TO [python semtools](https://github.com/seoanezonjic/py_semtools)
4
+
3
5
  Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/semtools`. To experiment with that code, run `bin/console` for an interactive prompt.
4
6
 
5
7
  TODO: Delete this and the text above, and describe your gem
data/bin/semtools.rb CHANGED
@@ -20,6 +20,13 @@ def load_tabular_file(file)
20
20
  return records
21
21
  end
22
22
 
23
+ def format_tabular_data(data, separator, id_col, terms_col)
24
+ data.map!{|row|
25
+ [row[id_col],
26
+ row[terms_col].split(separator).map!{|term| term.to_sym}]
27
+ }
28
+ end
29
+
23
30
  def store_profiles(file, ontology)
24
31
  file.each do |id, terms|
25
32
  ontology.add_profile(id, terms)
@@ -85,22 +92,14 @@ def clean_profiles(profiles, ontology, options)
85
92
  return removed_profiles
86
93
  end
87
94
 
88
- def expand_profiles(profiles, ontology, unwanted_terms = [])
89
- profiles.each do |disease_id, terms|
90
- terms.each do |term|
91
- profiles[disease_id] << ontology.get_ancestors(term).difference(unwanted_terms)
92
- end
93
- end
94
- end
95
-
96
- def write_similarity_profile_list(input, onto_obj, similarity_type)
97
- similarity_file = File.basename(input, ".*")+'_semantic_similarity_list'
98
- File.open(similarity_file, 'w') do |file|
99
- onto_obj.profiles.each do |profile_query_key, profile_query_value|
100
- onto_obj.profiles.each do |profile_search_key, profile_search_value|
101
- file.puts([profile_query_key, profile_search_key, onto_obj.compare(profile_query_value, profile_search_value, sim_type: similarity_type)].join("\t"))
95
+ def write_similarity_profile_list(output, onto_obj, similarity_type, refs)
96
+ profiles_similarity = onto_obj.compare_profiles(sim_type: similarity_type, external_profiles: refs)
97
+ File.open(output, 'w') do |f|
98
+ profiles_similarity.each do |pairsA, pairsB_and_values|
99
+ pairsB_and_values.each do |pairsB, values|
100
+ f.puts "#{pairsA}\t#{pairsB}\t#{values}"
102
101
  end
103
- end
102
+ end
104
103
  end
105
104
  end
106
105
 
@@ -156,8 +155,77 @@ def get_stats(stats)
156
155
  return report_stats
157
156
  end
158
157
 
158
+ def sort_terms_by_levels(terms, modifiers, ontology, all_childs)
159
+ term_levels = ontology.get_terms_levels(all_childs)
160
+ if modifiers.include?('a')
161
+ term_levels.sort!{|t1,t2| t2[1] <=> t1[1]}
162
+ else
163
+ term_levels.sort!{|t1,t2| t1[1] <=> t2[1]}
164
+ end
165
+ all_childs = term_levels.map{|t| t.first}
166
+ return all_childs, term_levels
167
+ end
159
168
 
169
+ def get_childs(ontology, terms, modifiers)
170
+ #modifiers
171
+ # - a: get ancestors instead of decendants
172
+ # - r: get parent-child relations instead of list descendants/ancestors
173
+ # - hN: when list of relations, it is limited to N hops from given term
174
+ # - n: give terms names instead of term codes
175
+ all_childs = []
176
+ terms.each do |term|
177
+ if modifiers.include?('a')
178
+ childs = ontology.get_ancestors(term)
179
+ else
180
+ childs = ontology.get_descendants(term)
181
+ end
182
+ all_childs = all_childs | childs
183
+ end
184
+ if modifiers.include?('r')
185
+ relations = []
186
+ all_childs = all_childs | terms # Add parents that generated child list
187
+ target_hops = nil
188
+ if /h([0-9]+)/ =~ modifiers
189
+ target_hops = $1.to_i + 1 # take into account refernce term (parent/child) addition
190
+ all_childs, term_levels = sort_terms_by_levels(terms, modifiers, ontology, all_childs)
191
+ end
160
192
 
193
+ current_level = nil
194
+ hops = 0
195
+ all_childs.each_with_index do |term, i|
196
+ if !target_hops.nil?
197
+ level = term_levels[i][1]
198
+ if level != current_level
199
+ current_level = level
200
+ hops +=1
201
+ break if hops == target_hops + 1 # +1 take into account that we have detected a level change and we saved the last one entirely
202
+ end
203
+ end
204
+ if modifiers.include?('a')
205
+ descendants = ontology.get_direct_ancentors(term)
206
+ else
207
+ descendants = ontology.get_direct_descendants(term)
208
+ end
209
+ if !descendants.nil?
210
+ descendants.each do |desc|
211
+ if modifiers.include?('a')
212
+ relations << [desc, term]
213
+ else
214
+ relations << [term, desc]
215
+ end
216
+ end
217
+ end
218
+ end
219
+ all_childs = []
220
+ relations.each do |rel|
221
+ rel, _ = ontology.translate_ids(rel) if modifiers.include?('n')
222
+ all_childs << rel
223
+ end
224
+ else
225
+ all_childs.map!{|c| ontology.translate_id(c)} if modifiers.include?('n')
226
+ end
227
+ return all_childs
228
+ end
161
229
 
162
230
 
163
231
 
@@ -183,9 +251,9 @@ OptionParser.new do |opts|
183
251
  options[:output_file] = item
184
252
  end
185
253
 
186
- options[:IC] = false
187
- opts.on("-I", "--IC", "Get IC") do
188
- options[:IC] = true
254
+ options[:IC] = nil
255
+ opts.on("-I", "--IC STRING", "Get IC. 'prof' for stored profiles or 'ont' for terms in ontology") do |item|
256
+ options[:IC] = item
189
257
  end
190
258
 
191
259
  options[:ontology_file] = nil
@@ -207,6 +275,11 @@ OptionParser.new do |opts|
207
275
  options[:similarity] = sim_method.to_sym
208
276
  end
209
277
 
278
+ options[:reference_profiles] = nil
279
+ opts.on("--reference_profiles PATH", "Path to file tabulated file with first column as id profile and second column with ontology terms separated by separator. ") do |opt|
280
+ options[:reference_profiles] = opt
281
+ end
282
+
210
283
  options[:clean_profiles] = false
211
284
  opts.on("-c", "--clean_profiles", "Removes ancestors, descendants and obsolete terms from profiles") do
212
285
  options[:clean_profiles] = true
@@ -228,13 +301,13 @@ OptionParser.new do |opts|
228
301
  end
229
302
 
230
303
  options[:xref_sense] = :byValue
231
- opts.on("--xref_sense ", "Ontology-xref or xref-ontology. By default xref-ontology if set, ontology-xref") do
304
+ opts.on("--xref_sense", "Ontology-xref or xref-ontology. By default xref-ontology if set, ontology-xref") do
232
305
  options[:xref_sense] = :byTerm
233
306
  end
234
307
 
235
- options[:expand_profiles] = false
236
- opts.on("-e", "--expand_profiles", "Expand profiles adding ancestors") do
237
- options[:expand_profiles] = true
308
+ options[:expand_profiles] = nil
309
+ opts.on("-e", "--expand_profiles STRING", "Expand profiles adding ancestors if 'parental', adding new profiles if 'propagate'") do |meth|
310
+ options[:expand_profiles] = meth
238
311
  end
239
312
 
240
313
  options[:unwanted_terms] = []
@@ -270,15 +343,19 @@ OptionParser.new do |opts|
270
343
  end
271
344
 
272
345
  options[:subject_column] = 0
273
- opts.on("-f NUM", "--subject_column NUM", "The number of the column for the subject id") do |ncol|
346
+ opts.on("-f NUM", "--subject_column INTEGER", "The number of the column for the subject id") do |ncol|
274
347
  options[:subject_column] = ncol.to_i
275
348
  end
276
349
 
277
350
  options[:annotations_column] = 1
278
- opts.on("-a NUM", "--annotations_column NUM", "The number of the column for the annotation ids") do |ncol|
279
- options[:annotations_column] = ncol.to_i
351
+ opts.on("-a NUM", "--annotations_column INTEGER", "The number of the column for the annotation ids") do |item|
352
+ options[:annotations_column] = item.to_i
280
353
  end
281
354
 
355
+ options[:root] = nil
356
+ opts.on("-R STRING", "--root STRING", "Term id to be considered the new root of the ontology") do |item|
357
+ options[:root] = item.to_sym
358
+ end
282
359
 
283
360
  options[:list_term_attributes] = false
284
361
  opts.on("--list_term_attributes", "The number of the column for the annotation ids") do
@@ -299,15 +376,17 @@ end
299
376
  if !options[:ontology_file].nil?
300
377
  options[:ontology_file] = get_ontology_file(options[:ontology_file], ont_index_file)
301
378
  end
302
- ontology = Ontology.new(file: options[:ontology_file], load_file: true)
379
+
380
+ extra_dicts = []
381
+ extra_dicts << [:xref, {select_regex: /(#{options[:keyword]})/, store_tag: :tag, multiterm: true}] if !options[:keyword].nil?
382
+ ontology = Ontology.new(file: options[:ontology_file], load_file: true, extra_dicts: extra_dicts)
383
+
384
+ Ontology.mutate(options[:root], ontology, clone: false) if !options[:root].nil? # TODO fix method and convert in class method
303
385
 
304
386
  if !options[:input_file].nil?
305
387
  data = load_tabular_file(options[:input_file])
306
388
  if options[:list_translate].nil? || !options[:keyword].nil?
307
- data.map!{|row|
308
- [row[options[:subject_column]],
309
- row[options[:annotations_column]].split(options[:separator]).map!{|term| term.to_sym}]
310
- }
389
+ format_tabular_data(data, options[:separator], options[:subject_column], options[:annotations_column])
311
390
  store_profiles(data, ontology) if options[:translate] != 'codes' && options[:keyword].nil?
312
391
  end
313
392
  end
@@ -345,16 +424,24 @@ if options[:clean_profiles]
345
424
  end
346
425
  end
347
426
 
348
- if options[:expand_profiles]
349
- expanded_profiles = expand_profiles(ontology.profiles, ontology, options[:unwanted_terms])
427
+ if !options[:expand_profiles].nil?
428
+ ontology.expand_profiles(options[:expand_profiles], unwanted_terms: options[:unwanted_terms])
350
429
  end
351
430
 
352
431
  if !options[:similarity].nil?
353
- write_similarity_profile_list(input = options[:input_file], onto_obj=ontology, similarity_type = options[:similarity])
432
+ refs = nil
433
+ if !options[:reference_profiles].nil?
434
+ refs = load_tabular_file(options[:reference_profiles])
435
+ format_tabular_data(refs, options[:separator], 0, 1)
436
+ refs = refs.to_h
437
+ refs = clean_profiles(ontology.profiles, ontology, options) if options[:clean_profiles]
438
+ abort('Reference profiles are empty after cleaning ') if refs.nil? || refs.empty?
439
+ end
440
+ write_similarity_profile_list(options[:output_file], ontology, options[:similarity], refs)
354
441
  end
355
442
 
356
443
 
357
- if options[:IC]
444
+ if options[:IC] == 'prof'
358
445
  ontology.add_observed_terms_from_profiles
359
446
  by_ontology, by_freq = ontology.get_profiles_resnik_dual_ICs
360
447
  ic_file = File.basename(options[:input_file], ".*")+'_IC_onto_freq'
@@ -363,6 +450,12 @@ if options[:IC]
363
450
  file.puts([id, by_ontology[id], by_freq[id]].join("\t"))
364
451
  end
365
452
  end
453
+ elsif options[:IC] == 'ont'
454
+ File.open('ont_IC' , 'w') do |file|
455
+ ontology.each do |term|
456
+ file.puts "#{term}\t#{ontology.get_IC(term)}"
457
+ end
458
+ end
366
459
  end
367
460
 
368
461
  if options[:translate] == 'names'
@@ -371,38 +464,17 @@ end
371
464
 
372
465
  if !options[:childs].first.empty?
373
466
  terms, modifiers = options[:childs]
374
- all_childs = []
375
- terms.each do |term|
376
- childs = ontology.get_descendants(term)
377
- all_childs = all_childs | childs
378
- end
379
- if modifiers.include?('r')
380
- relations = []
381
- all_childs = all_childs | terms # Add parents that generated child list
382
- all_childs.each do |term|
383
- descendants = ontology.get_direct_descendants(term)
384
- if !descendants.nil?
385
- descendants.each do |desc|
386
- relations << [term, desc]
387
- end
388
- end
389
- end
390
- relations.each do |rel|
391
- rel, _ = ontology.translate_ids(rel) if modifiers.include?('n')
392
- puts rel.join("\t")
393
- end
394
- else
395
- all_childs.each do |c|
396
- if modifiers.include?('n')
397
- puts ontology.translate_id(c)
398
- else
399
- puts c
400
- end
467
+ all_childs = get_childs(ontology, terms, modifiers)
468
+ all_childs.each do |ac|
469
+ if modifiers.include?('r')
470
+ puts ac.join("\t")
471
+ else
472
+ puts ac
401
473
  end
402
474
  end
403
475
  end
404
476
 
405
- if !options[:output_file].nil?
477
+ if !options[:output_file].nil? && options[:similarity].nil?
406
478
  File.open(options[:output_file], 'w') do |file|
407
479
  ontology.profiles.each do |id, terms|
408
480
  file.puts([id, terms.join("|")].join("\t"))
@@ -426,12 +498,15 @@ end
426
498
 
427
499
  if !options[:keyword].nil?
428
500
  xref_translated = []
429
- ontology.calc_dictionary(:xref, select_regex: /(#{options[:keyword]})/, store_tag: :tag, multiterm: true, substitute_alternatives: false)
430
501
  dict = ontology.dicts[:tag][options[:xref_sense]]
431
502
  data.each do |id, prof|
432
503
  xrefs = []
433
504
  prof.each do |t|
434
- query = dict[t.to_s]
505
+ if options[:xref_sense] == :byValue
506
+ query = dict[t.to_s]
507
+ else
508
+ query = dict[t]
509
+ end
435
510
  xrefs.concat(query) if !query.nil?
436
511
  end
437
512
  xref_translated << [id, xrefs] if !xrefs.empty?
@@ -443,4 +518,4 @@ if !options[:keyword].nil?
443
518
  end
444
519
  end
445
520
  end
446
- end
521
+ end