semtools 0.1.8 → 0.1.91
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/bin/semtools.rb +140 -65
- data/lib/semtools/ontology.rb +1235 -2061
- data/lib/semtools/parsers/file_parser.rb +32 -0
- data/lib/semtools/parsers/json_parser.rb +84 -0
- data/lib/semtools/parsers/oboparser.rb +511 -0
- data/lib/semtools/version.rb +1 -1
- data/lib/semtools.rb +3 -0
- data/semtools.gemspec +1 -1
- metadata +9 -6
- data/lib/semtools/math_methods.rb +0 -148
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c1748a4acbeed0c35e072aad9433498fe72cc70af5b2e42b27b246c306cc822b
|
4
|
+
data.tar.gz: 8c03e4a77c4daeea26691210f84d7c5924ee71e1199a5fe6fe21c67e9d297acb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86eb4530e233bb8ef0e863e7486f42550d37e927988a6c7a8815e487e3c11c25b8fea199a5362a7be109ee6cafd6f9323f9906185504f0c47746d0768584adbd
|
7
|
+
data.tar.gz: 0a11d9a0194325bd08f53342f250e798a3cce9eec1377148128a3b8df9a48efb88be625c08b4f36944ad729239537f4f343ad6ccd00bca8ba73b1fc10e828e77
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Semtools
|
2
2
|
|
3
|
+
DEPRECATED PROJECT. MIGRATED TO [python semtools](https://github.com/seoanezonjic/py_semtools)
|
4
|
+
|
3
5
|
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/semtools`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
6
|
|
5
7
|
TODO: Delete this and the text above, and describe your gem
|
data/bin/semtools.rb
CHANGED
@@ -20,6 +20,13 @@ def load_tabular_file(file)
|
|
20
20
|
return records
|
21
21
|
end
|
22
22
|
|
23
|
+
def format_tabular_data(data, separator, id_col, terms_col)
|
24
|
+
data.map!{|row|
|
25
|
+
[row[id_col],
|
26
|
+
row[terms_col].split(separator).map!{|term| term.to_sym}]
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
23
30
|
def store_profiles(file, ontology)
|
24
31
|
file.each do |id, terms|
|
25
32
|
ontology.add_profile(id, terms)
|
@@ -85,22 +92,14 @@ def clean_profiles(profiles, ontology, options)
|
|
85
92
|
return removed_profiles
|
86
93
|
end
|
87
94
|
|
88
|
-
def
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
end
|
95
|
-
|
96
|
-
def write_similarity_profile_list(input, onto_obj, similarity_type)
|
97
|
-
similarity_file = File.basename(input, ".*")+'_semantic_similarity_list'
|
98
|
-
File.open(similarity_file, 'w') do |file|
|
99
|
-
onto_obj.profiles.each do |profile_query_key, profile_query_value|
|
100
|
-
onto_obj.profiles.each do |profile_search_key, profile_search_value|
|
101
|
-
file.puts([profile_query_key, profile_search_key, onto_obj.compare(profile_query_value, profile_search_value, sim_type: similarity_type)].join("\t"))
|
95
|
+
def write_similarity_profile_list(output, onto_obj, similarity_type, refs)
|
96
|
+
profiles_similarity = onto_obj.compare_profiles(sim_type: similarity_type, external_profiles: refs)
|
97
|
+
File.open(output, 'w') do |f|
|
98
|
+
profiles_similarity.each do |pairsA, pairsB_and_values|
|
99
|
+
pairsB_and_values.each do |pairsB, values|
|
100
|
+
f.puts "#{pairsA}\t#{pairsB}\t#{values}"
|
102
101
|
end
|
103
|
-
end
|
102
|
+
end
|
104
103
|
end
|
105
104
|
end
|
106
105
|
|
@@ -156,8 +155,77 @@ def get_stats(stats)
|
|
156
155
|
return report_stats
|
157
156
|
end
|
158
157
|
|
158
|
+
def sort_terms_by_levels(terms, modifiers, ontology, all_childs)
|
159
|
+
term_levels = ontology.get_terms_levels(all_childs)
|
160
|
+
if modifiers.include?('a')
|
161
|
+
term_levels.sort!{|t1,t2| t2[1] <=> t1[1]}
|
162
|
+
else
|
163
|
+
term_levels.sort!{|t1,t2| t1[1] <=> t2[1]}
|
164
|
+
end
|
165
|
+
all_childs = term_levels.map{|t| t.first}
|
166
|
+
return all_childs, term_levels
|
167
|
+
end
|
159
168
|
|
169
|
+
def get_childs(ontology, terms, modifiers)
|
170
|
+
#modifiers
|
171
|
+
# - a: get ancestors instead of decendants
|
172
|
+
# - r: get parent-child relations instead of list descendants/ancestors
|
173
|
+
# - hN: when list of relations, it is limited to N hops from given term
|
174
|
+
# - n: give terms names instead of term codes
|
175
|
+
all_childs = []
|
176
|
+
terms.each do |term|
|
177
|
+
if modifiers.include?('a')
|
178
|
+
childs = ontology.get_ancestors(term)
|
179
|
+
else
|
180
|
+
childs = ontology.get_descendants(term)
|
181
|
+
end
|
182
|
+
all_childs = all_childs | childs
|
183
|
+
end
|
184
|
+
if modifiers.include?('r')
|
185
|
+
relations = []
|
186
|
+
all_childs = all_childs | terms # Add parents that generated child list
|
187
|
+
target_hops = nil
|
188
|
+
if /h([0-9]+)/ =~ modifiers
|
189
|
+
target_hops = $1.to_i + 1 # take into account refernce term (parent/child) addition
|
190
|
+
all_childs, term_levels = sort_terms_by_levels(terms, modifiers, ontology, all_childs)
|
191
|
+
end
|
160
192
|
|
193
|
+
current_level = nil
|
194
|
+
hops = 0
|
195
|
+
all_childs.each_with_index do |term, i|
|
196
|
+
if !target_hops.nil?
|
197
|
+
level = term_levels[i][1]
|
198
|
+
if level != current_level
|
199
|
+
current_level = level
|
200
|
+
hops +=1
|
201
|
+
break if hops == target_hops + 1 # +1 take into account that we have detected a level change and we saved the last one entirely
|
202
|
+
end
|
203
|
+
end
|
204
|
+
if modifiers.include?('a')
|
205
|
+
descendants = ontology.get_direct_ancentors(term)
|
206
|
+
else
|
207
|
+
descendants = ontology.get_direct_descendants(term)
|
208
|
+
end
|
209
|
+
if !descendants.nil?
|
210
|
+
descendants.each do |desc|
|
211
|
+
if modifiers.include?('a')
|
212
|
+
relations << [desc, term]
|
213
|
+
else
|
214
|
+
relations << [term, desc]
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
all_childs = []
|
220
|
+
relations.each do |rel|
|
221
|
+
rel, _ = ontology.translate_ids(rel) if modifiers.include?('n')
|
222
|
+
all_childs << rel
|
223
|
+
end
|
224
|
+
else
|
225
|
+
all_childs.map!{|c| ontology.translate_id(c)} if modifiers.include?('n')
|
226
|
+
end
|
227
|
+
return all_childs
|
228
|
+
end
|
161
229
|
|
162
230
|
|
163
231
|
|
@@ -183,9 +251,9 @@ OptionParser.new do |opts|
|
|
183
251
|
options[:output_file] = item
|
184
252
|
end
|
185
253
|
|
186
|
-
options[:IC] =
|
187
|
-
opts.on("-I", "--IC", "Get IC") do
|
188
|
-
options[:IC] =
|
254
|
+
options[:IC] = nil
|
255
|
+
opts.on("-I", "--IC STRING", "Get IC. 'prof' for stored profiles or 'ont' for terms in ontology") do |item|
|
256
|
+
options[:IC] = item
|
189
257
|
end
|
190
258
|
|
191
259
|
options[:ontology_file] = nil
|
@@ -207,6 +275,11 @@ OptionParser.new do |opts|
|
|
207
275
|
options[:similarity] = sim_method.to_sym
|
208
276
|
end
|
209
277
|
|
278
|
+
options[:reference_profiles] = nil
|
279
|
+
opts.on("--reference_profiles PATH", "Path to file tabulated file with first column as id profile and second column with ontology terms separated by separator. ") do |opt|
|
280
|
+
options[:reference_profiles] = opt
|
281
|
+
end
|
282
|
+
|
210
283
|
options[:clean_profiles] = false
|
211
284
|
opts.on("-c", "--clean_profiles", "Removes ancestors, descendants and obsolete terms from profiles") do
|
212
285
|
options[:clean_profiles] = true
|
@@ -228,13 +301,13 @@ OptionParser.new do |opts|
|
|
228
301
|
end
|
229
302
|
|
230
303
|
options[:xref_sense] = :byValue
|
231
|
-
opts.on("--xref_sense
|
304
|
+
opts.on("--xref_sense", "Ontology-xref or xref-ontology. By default xref-ontology if set, ontology-xref") do
|
232
305
|
options[:xref_sense] = :byTerm
|
233
306
|
end
|
234
307
|
|
235
|
-
options[:expand_profiles] =
|
236
|
-
opts.on("-e", "--expand_profiles", "Expand profiles adding ancestors") do
|
237
|
-
options[:expand_profiles] =
|
308
|
+
options[:expand_profiles] = nil
|
309
|
+
opts.on("-e", "--expand_profiles STRING", "Expand profiles adding ancestors if 'parental', adding new profiles if 'propagate'") do |meth|
|
310
|
+
options[:expand_profiles] = meth
|
238
311
|
end
|
239
312
|
|
240
313
|
options[:unwanted_terms] = []
|
@@ -270,15 +343,19 @@ OptionParser.new do |opts|
|
|
270
343
|
end
|
271
344
|
|
272
345
|
options[:subject_column] = 0
|
273
|
-
opts.on("-f NUM", "--subject_column
|
346
|
+
opts.on("-f NUM", "--subject_column INTEGER", "The number of the column for the subject id") do |ncol|
|
274
347
|
options[:subject_column] = ncol.to_i
|
275
348
|
end
|
276
349
|
|
277
350
|
options[:annotations_column] = 1
|
278
|
-
opts.on("-a NUM", "--annotations_column
|
279
|
-
options[:annotations_column] =
|
351
|
+
opts.on("-a NUM", "--annotations_column INTEGER", "The number of the column for the annotation ids") do |item|
|
352
|
+
options[:annotations_column] = item.to_i
|
280
353
|
end
|
281
354
|
|
355
|
+
options[:root] = nil
|
356
|
+
opts.on("-R STRING", "--root STRING", "Term id to be considered the new root of the ontology") do |item|
|
357
|
+
options[:root] = item.to_sym
|
358
|
+
end
|
282
359
|
|
283
360
|
options[:list_term_attributes] = false
|
284
361
|
opts.on("--list_term_attributes", "The number of the column for the annotation ids") do
|
@@ -299,15 +376,17 @@ end
|
|
299
376
|
if !options[:ontology_file].nil?
|
300
377
|
options[:ontology_file] = get_ontology_file(options[:ontology_file], ont_index_file)
|
301
378
|
end
|
302
|
-
|
379
|
+
|
380
|
+
extra_dicts = []
|
381
|
+
extra_dicts << [:xref, {select_regex: /(#{options[:keyword]})/, store_tag: :tag, multiterm: true}] if !options[:keyword].nil?
|
382
|
+
ontology = Ontology.new(file: options[:ontology_file], load_file: true, extra_dicts: extra_dicts)
|
383
|
+
|
384
|
+
Ontology.mutate(options[:root], ontology, clone: false) if !options[:root].nil? # TODO fix method and convert in class method
|
303
385
|
|
304
386
|
if !options[:input_file].nil?
|
305
387
|
data = load_tabular_file(options[:input_file])
|
306
388
|
if options[:list_translate].nil? || !options[:keyword].nil?
|
307
|
-
data
|
308
|
-
[row[options[:subject_column]],
|
309
|
-
row[options[:annotations_column]].split(options[:separator]).map!{|term| term.to_sym}]
|
310
|
-
}
|
389
|
+
format_tabular_data(data, options[:separator], options[:subject_column], options[:annotations_column])
|
311
390
|
store_profiles(data, ontology) if options[:translate] != 'codes' && options[:keyword].nil?
|
312
391
|
end
|
313
392
|
end
|
@@ -345,16 +424,24 @@ if options[:clean_profiles]
|
|
345
424
|
end
|
346
425
|
end
|
347
426
|
|
348
|
-
if options[:expand_profiles]
|
349
|
-
|
427
|
+
if !options[:expand_profiles].nil?
|
428
|
+
ontology.expand_profiles(options[:expand_profiles], unwanted_terms: options[:unwanted_terms])
|
350
429
|
end
|
351
430
|
|
352
431
|
if !options[:similarity].nil?
|
353
|
-
|
432
|
+
refs = nil
|
433
|
+
if !options[:reference_profiles].nil?
|
434
|
+
refs = load_tabular_file(options[:reference_profiles])
|
435
|
+
format_tabular_data(refs, options[:separator], 0, 1)
|
436
|
+
refs = refs.to_h
|
437
|
+
refs = clean_profiles(ontology.profiles, ontology, options) if options[:clean_profiles]
|
438
|
+
abort('Reference profiles are empty after cleaning ') if refs.nil? || refs.empty?
|
439
|
+
end
|
440
|
+
write_similarity_profile_list(options[:output_file], ontology, options[:similarity], refs)
|
354
441
|
end
|
355
442
|
|
356
443
|
|
357
|
-
if options[:IC]
|
444
|
+
if options[:IC] == 'prof'
|
358
445
|
ontology.add_observed_terms_from_profiles
|
359
446
|
by_ontology, by_freq = ontology.get_profiles_resnik_dual_ICs
|
360
447
|
ic_file = File.basename(options[:input_file], ".*")+'_IC_onto_freq'
|
@@ -363,6 +450,12 @@ if options[:IC]
|
|
363
450
|
file.puts([id, by_ontology[id], by_freq[id]].join("\t"))
|
364
451
|
end
|
365
452
|
end
|
453
|
+
elsif options[:IC] == 'ont'
|
454
|
+
File.open('ont_IC' , 'w') do |file|
|
455
|
+
ontology.each do |term|
|
456
|
+
file.puts "#{term}\t#{ontology.get_IC(term)}"
|
457
|
+
end
|
458
|
+
end
|
366
459
|
end
|
367
460
|
|
368
461
|
if options[:translate] == 'names'
|
@@ -371,38 +464,17 @@ end
|
|
371
464
|
|
372
465
|
if !options[:childs].first.empty?
|
373
466
|
terms, modifiers = options[:childs]
|
374
|
-
all_childs =
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
relations = []
|
381
|
-
all_childs = all_childs | terms # Add parents that generated child list
|
382
|
-
all_childs.each do |term|
|
383
|
-
descendants = ontology.get_direct_descendants(term)
|
384
|
-
if !descendants.nil?
|
385
|
-
descendants.each do |desc|
|
386
|
-
relations << [term, desc]
|
387
|
-
end
|
388
|
-
end
|
389
|
-
end
|
390
|
-
relations.each do |rel|
|
391
|
-
rel, _ = ontology.translate_ids(rel) if modifiers.include?('n')
|
392
|
-
puts rel.join("\t")
|
393
|
-
end
|
394
|
-
else
|
395
|
-
all_childs.each do |c|
|
396
|
-
if modifiers.include?('n')
|
397
|
-
puts ontology.translate_id(c)
|
398
|
-
else
|
399
|
-
puts c
|
400
|
-
end
|
467
|
+
all_childs = get_childs(ontology, terms, modifiers)
|
468
|
+
all_childs.each do |ac|
|
469
|
+
if modifiers.include?('r')
|
470
|
+
puts ac.join("\t")
|
471
|
+
else
|
472
|
+
puts ac
|
401
473
|
end
|
402
474
|
end
|
403
475
|
end
|
404
476
|
|
405
|
-
if !options[:output_file].nil?
|
477
|
+
if !options[:output_file].nil? && options[:similarity].nil?
|
406
478
|
File.open(options[:output_file], 'w') do |file|
|
407
479
|
ontology.profiles.each do |id, terms|
|
408
480
|
file.puts([id, terms.join("|")].join("\t"))
|
@@ -426,12 +498,15 @@ end
|
|
426
498
|
|
427
499
|
if !options[:keyword].nil?
|
428
500
|
xref_translated = []
|
429
|
-
ontology.calc_dictionary(:xref, select_regex: /(#{options[:keyword]})/, store_tag: :tag, multiterm: true, substitute_alternatives: false)
|
430
501
|
dict = ontology.dicts[:tag][options[:xref_sense]]
|
431
502
|
data.each do |id, prof|
|
432
503
|
xrefs = []
|
433
504
|
prof.each do |t|
|
434
|
-
|
505
|
+
if options[:xref_sense] == :byValue
|
506
|
+
query = dict[t.to_s]
|
507
|
+
else
|
508
|
+
query = dict[t]
|
509
|
+
end
|
435
510
|
xrefs.concat(query) if !query.nil?
|
436
511
|
end
|
437
512
|
xref_translated << [id, xrefs] if !xrefs.empty?
|
@@ -443,4 +518,4 @@ if !options[:keyword].nil?
|
|
443
518
|
end
|
444
519
|
end
|
445
520
|
end
|
446
|
-
end
|
521
|
+
end
|