obo_parser 0.3.6 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/utilities.rb +212 -138
- data/obo_parser.gemspec +2 -2
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.3.
|
1
|
+
0.3.7
|
data/lib/utilities.rb
CHANGED
@@ -18,7 +18,7 @@ module OboParser::Utilities
|
|
18
18
|
#
|
19
19
|
# @param [Integer] cutoff only Term ids with > cutoff labels will be reported
|
20
20
|
# @param [Array] files an Array of read files
|
21
|
-
# @return [String] the
|
21
|
+
# @return [String] the translation in tab delimted format
|
22
22
|
def self.dump_comparison_by_id(cutoff = 0, files = [])
|
23
23
|
return '' if files.size < 1
|
24
24
|
|
@@ -47,7 +47,9 @@ module OboParser::Utilities
|
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
|
+
|
50
51
|
# Returns all labels found in all passed ontologies. Does not yet include synonyms.
|
52
|
+
# Caution: strips adult, embryonic, larval from labels (comment to remove)
|
51
53
|
#
|
52
54
|
#== Example use
|
53
55
|
# of1 = File.read('fly_anatomy.obo')
|
@@ -64,8 +66,11 @@ module OboParser::Utilities
|
|
64
66
|
files.each do |f|
|
65
67
|
o = parse_obo_file(f)
|
66
68
|
o.term_hash.keys.each do |k|
|
69
|
+
|
70
|
+
# TODO: make this optional
|
67
71
|
tmp = k.gsub(/adult/, "").strip
|
68
72
|
tmp = k.gsub(/embryonic\/larval/, "").strip
|
73
|
+
|
69
74
|
if comparison[tmp]
|
70
75
|
comparison[tmp] += 1
|
71
76
|
else
|
@@ -86,10 +91,9 @@ module OboParser::Utilities
|
|
86
91
|
|
87
92
|
end
|
88
93
|
|
94
|
+
#== Two column correspondances and translation tools
|
89
95
|
|
90
|
-
|
91
|
-
|
92
|
-
HOMOLONTO_HEADER = %{
|
96
|
+
HOMOLONTO_HEADER = %{
|
93
97
|
format-version: 1.2
|
94
98
|
auto-generated-by: obo_parser
|
95
99
|
default-namespace: fix_me
|
@@ -212,31 +216,129 @@ is_anti_symmetric: true
|
|
212
216
|
true
|
213
217
|
end
|
214
218
|
|
215
|
-
|
219
|
+
|
220
|
+
# Takes a two column input file, references it to two ontologies, and returns a report
|
221
|
+
# that identifies data pairs that have parents who are also a data pair given a
|
222
|
+
# provided property/relation type.
|
216
223
|
#
|
217
224
|
#== Example use
|
218
225
|
# file = File.read('HAO_TGMA_list.txt')
|
219
226
|
# col1_obo = File.read('hao.obo')
|
220
227
|
# col2_obo = File.read('tgma.obo')
|
228
|
+
#
|
229
|
+
# foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
|
230
|
+
#
|
231
|
+
# puts "-- NO (#{foo[:no].size})\n"
|
232
|
+
# puts foo[:no].join("\n")
|
233
|
+
# puts "-- YES (#{foo[:yes].size})\n"
|
234
|
+
# puts foo[:yes].join("\n")
|
235
|
+
#
|
236
|
+
# @param [Hash] options options.
|
237
|
+
# @param [Symbol] data the two column data file.
|
238
|
+
# @param [Symbol] colo1_obo the OBO file referenced in the first column
|
239
|
+
# @param [Symbol] colo2_obo the OBO file referenced in the second column
|
240
|
+
# @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of')
|
241
|
+
# @return [Hash] a hash of {:yes => {}, :no => {}}
|
242
|
+
def parents(options = {})
|
243
|
+
opt = {
|
244
|
+
:data => nil,
|
245
|
+
:col1_obo => nil,
|
246
|
+
:col2_obo => nil,
|
247
|
+
:property => nil
|
248
|
+
}.merge!(options)
|
249
|
+
|
250
|
+
return false if opt[:property].nil?
|
251
|
+
c1obo = parse_obo_file(opt[:col1_obo])
|
252
|
+
c2obo = parse_obo_file(opt[:col2_obo])
|
253
|
+
|
254
|
+
result = {:yes => [], :no => [], :unplaced => []}
|
255
|
+
# update
|
256
|
+
array = OboParser::Utilities.arrayify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
|
257
|
+
|
258
|
+
# index the pairs
|
259
|
+
index = array.inject({}){|hsh, i| hsh.update({"#{i[0]}#{i[1]}" => i})}
|
260
|
+
|
261
|
+
obo1_hash = c1obo.id_index
|
262
|
+
obo2_hash = c2obo.id_index
|
263
|
+
|
264
|
+
array.each do |k|
|
265
|
+
a = k[0]
|
266
|
+
b = k[1]
|
267
|
+
|
268
|
+
ids_1 = []
|
269
|
+
ids_2 = []
|
270
|
+
|
271
|
+
if !obo1_hash[a]
|
272
|
+
puts "can't find #{k}\n"
|
273
|
+
next
|
274
|
+
end
|
275
|
+
|
276
|
+
if !obo2_hash[b]
|
277
|
+
puts "can't find #{k}\n"
|
278
|
+
next
|
279
|
+
end
|
280
|
+
|
281
|
+
obo1_hash[a].relationships.each do |rel, id|
|
282
|
+
if rel == opt[:property]
|
283
|
+
ids_1.push id
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
obo2_hash[b].relationships.each do |rel, id|
|
288
|
+
if rel == opt[:property]
|
289
|
+
ids_2.push id
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
unplaced = true
|
294
|
+
|
295
|
+
ids_1.each do |c|
|
296
|
+
ids_2.each do |d|
|
297
|
+
t = "#{a} -> #{b}"
|
298
|
+
if index["#{c}#{d}"] # think this is right hash[c] == d
|
299
|
+
result[:yes].push(t)
|
300
|
+
unplaced = false
|
301
|
+
next # don't add again after we find a hit
|
302
|
+
else
|
303
|
+
result[:no].push(t)
|
304
|
+
unplaced = false
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
result[:unplaced]
|
309
|
+
|
310
|
+
end
|
311
|
+
|
312
|
+
result
|
313
|
+
end
|
314
|
+
|
315
|
+
|
316
|
+
# Takes a two column input file, references it to two ontologies, and returns a array of [[id1, id2], [id3,idn] ...].
|
317
|
+
#
|
318
|
+
#== Example use
|
319
|
+
# file = File.read('HAO_TGMA_list.txt')
|
320
|
+
# col1_obo = File.read('hao.obo')
|
321
|
+
# col2_obo = File.read('tgma.obo')
|
221
322
|
#
|
222
|
-
# OboParser::Utilities.
|
323
|
+
# OboParser::Utilities.arrayify_pairs(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo)
|
223
324
|
#
|
224
325
|
# @param [Hash] options options.
|
225
326
|
# @param [Symbol] data the two column data file.
|
226
327
|
# @param [Symbol] colo1_obo the OBO file referenced in the first column
|
227
328
|
# @param [Symbol] colo2_obo the OBO file referenced in the second column
|
228
|
-
# @return [
|
229
|
-
def self.
|
329
|
+
# @return [Array] an array of [[id string, id string], ...]
|
330
|
+
def self.arrayify_pairs(options = {})
|
230
331
|
opt = {
|
231
332
|
:data => nil,
|
232
333
|
:col1_obo => nil,
|
233
334
|
:col2_obo => nil,
|
335
|
+
:index_start => 0
|
234
336
|
}.merge!(options)
|
235
|
-
|
337
|
+
|
236
338
|
c1obo = parse_obo_file(opt[:col1_obo])
|
237
339
|
c2obo = parse_obo_file(opt[:col2_obo])
|
238
340
|
|
239
|
-
|
341
|
+
array = Array.new
|
240
342
|
|
241
343
|
i = opt[:index_start]
|
242
344
|
v1 = nil # a label like 'head'
|
@@ -246,7 +348,7 @@ is_anti_symmetric: true
|
|
246
348
|
|
247
349
|
opt[:data].split(/\n/).each do |row|
|
248
350
|
i += 1
|
249
|
-
c1, c2 =
|
351
|
+
c1, c2 = row.split(/\t/).map(&:strip)
|
250
352
|
|
251
353
|
if c1.nil? || c2.nil?
|
252
354
|
next
|
@@ -263,11 +365,78 @@ is_anti_symmetric: true
|
|
263
365
|
else
|
264
366
|
v2 = c2obo.term_hash[c2]
|
265
367
|
end
|
266
|
-
|
267
|
-
|
268
|
-
|
368
|
+
|
369
|
+
array << [c1,c2]
|
370
|
+
|
371
|
+
end
|
372
|
+
return array
|
373
|
+
end
|
374
|
+
|
375
|
+
|
376
|
+
# Provides a set comparisson of correspondances between two sets of correspondances. Finds correspondances that are shared, in left only, or in right only.
|
377
|
+
#
|
378
|
+
#== Example use
|
379
|
+
# data1 = File.read('input/hao_fbbt_vetted_list2.txt')
|
380
|
+
# data2 = File.read('input/hao_fbbt_from_bioportal.txt')
|
381
|
+
# col1_obo = File.read('hao.obo')
|
382
|
+
# col2_obo = File.read('tgma.obo')
|
383
|
+
#
|
384
|
+
# OboParser::Utilities.set_comparison(:data1 => data1 , :data2 => data2, :col1_obo => col1_obo, :col2_obo => col2_obo, :return => :text ) # => a text report
|
385
|
+
# or
|
386
|
+
# foo = OboParser::Utilities.set_comparison(:data1 => data1 , :data2 => data2, :col1_obo => col1_obo, :col2_obo => col2_obo, :return => :array )
|
387
|
+
#
|
388
|
+
#== "Report" types
|
389
|
+
# Use :return to return different values
|
390
|
+
# :array - Returns an Array
|
391
|
+
# :text - Generates 4 column report (id, label, id, label) to STDOUT
|
392
|
+
#
|
393
|
+
# @param [Hash] options options.
|
394
|
+
# @param [Symbol] data1 two columns (tab) with a correspondance between ontologies 1 and 2
|
395
|
+
# @param [Symbol] data2 as data1, a second set of correspondances
|
396
|
+
# @param [Symbol] col1_obo the OBO file corresponding to the first column of data1 and data2
|
397
|
+
# @param [Symbol] col2_obo the OBO file corresponding to the second column of data1 and data2
|
398
|
+
# @param [Symbol] return the value to return
|
399
|
+
# @return [String | Array]
|
400
|
+
def self.set_comparison(options = {})
|
401
|
+
opt = {
|
402
|
+
:data1 => nil,
|
403
|
+
:data2 => nil,
|
404
|
+
:col1_obo => nil,
|
405
|
+
:col2_obo => nil,
|
406
|
+
:return => :array # also :text
|
407
|
+
}.merge!(options)
|
408
|
+
|
409
|
+
c1obo = parse_obo_file(opt[:col1_obo])
|
410
|
+
c2obo = parse_obo_file(opt[:col2_obo])
|
411
|
+
|
412
|
+
th1 = c1obo.id_hash
|
413
|
+
th2 = c2obo.id_hash
|
414
|
+
|
415
|
+
array1 = OboParser::Utilities.arrayify_pairs(:data => opt[:data1], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
|
416
|
+
array2 = OboParser::Utilities.arrayify_pairs(:data => opt[:data2], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
|
417
|
+
|
418
|
+
index1 = array1.inject({}){|hsh, i| hsh.merge!( "#{i[0]}#{i[1]}" => i)}
|
419
|
+
index2 = array2.inject({}){|hsh, i| hsh.merge!( "#{i[0]}#{i[1]}" => i)}
|
420
|
+
|
421
|
+
result = {:in_left => [], :in_right => [], :in_both => []}
|
422
|
+
|
423
|
+
result[:in_both] = (index1.keys & index2.keys).collect{|k| [ index1[k][0], th1[index1[k][0]], index1[k][1], th2[index1[k][1]] , ] }
|
424
|
+
result[:in_left] = ((index1.keys | index2.keys) - index2.keys ).collect{|k| [ index1[k][0], th1[index1[k][0]], index1[k][1], th2[index1[k][1]] , ] }
|
425
|
+
result[:in_right] = ((index1.keys | index2.keys) - index1.keys ).collect{|k| [ index2[k][0], th1[index2[k][0]], index2[k][1], th2[index2[k][1]] , ] }
|
426
|
+
|
427
|
+
case opt[:return]
|
428
|
+
when :array
|
429
|
+
return result
|
430
|
+
when :text
|
431
|
+
[:in_both, :in_left, :in_right].each do |t|
|
432
|
+
puts "--- #{t.to_s} (#{result[t].size}) ---"
|
433
|
+
result[t].each do |r|
|
434
|
+
puts r.join("\t")
|
435
|
+
end
|
436
|
+
end
|
437
|
+
else
|
438
|
+
return nil
|
269
439
|
end
|
270
|
-
return hash
|
271
440
|
end
|
272
441
|
|
273
442
|
|
@@ -290,143 +459,48 @@ is_anti_symmetric: true
|
|
290
459
|
end
|
291
460
|
|
292
461
|
|
293
|
-
# Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
|
294
|
-
#
|
295
|
-
#== Example use
|
296
|
-
# OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
|
297
|
-
#
|
298
|
-
# @param [Symbol] ontologies a Hash of #read files as values, keys as working names
|
299
|
-
# @param [Symbol] properties an Array of properties like ['is_a', 'part_of']
|
300
|
-
# TODO: @return File1, File2, Filen
|
301
|
-
def self.cytoscapify(options = {})
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
return false if opt[:properties].empty?
|
308
|
-
return false if opt[:ontologies].empty?
|
309
|
-
|
310
|
-
nodes = File.new("nodes.tab", "w+")
|
311
|
-
edges = File.new("edges.eda", "w+")
|
312
|
-
|
313
|
-
opt[:ontologies].keys.each do |k|
|
314
|
-
|
315
|
-
obo_file = parse_obo_file(opt[:ontologies][k])
|
316
|
-
|
317
|
-
obo_file.terms.each do |t|
|
318
|
-
nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"
|
319
|
-
|
320
|
-
t.relationships.each do |rel, id|
|
321
|
-
edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
|
322
|
-
end
|
323
|
-
end
|
324
|
-
end
|
325
|
-
|
326
|
-
nodes.close
|
327
|
-
edges.close
|
328
|
-
|
329
|
-
true
|
330
|
-
|
331
|
-
end
|
332
|
-
|
462
|
+
# Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
|
463
|
+
#
|
464
|
+
#== Example use
|
465
|
+
# OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
|
466
|
+
#
|
467
|
+
# @param [Symbol] ontologies a Hash of #read files as values, keys as working names
|
468
|
+
# @param [Symbol] properties an Array of properties like ['is_a', 'part_of']
|
469
|
+
# TODO: @return File1, File2, Filen
|
470
|
+
def self.cytoscapify(options = {})
|
471
|
+
opt = {
|
472
|
+
:ontologies => {},
|
473
|
+
:properties => []
|
474
|
+
}.merge!(options)
|
333
475
|
|
334
|
-
|
335
|
-
|
336
|
-
# provided property/relation type.
|
337
|
-
#
|
338
|
-
#== Example use
|
339
|
-
# file = File.read('HAO_TGMA_list.txt')
|
340
|
-
# col1_obo = File.read('hao.obo')
|
341
|
-
# col2_obo = File.read('tgma.obo')
|
342
|
-
#
|
343
|
-
# foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
|
344
|
-
#
|
345
|
-
# puts "-- NO (#{foo[:no].size})\n"
|
346
|
-
# puts foo[:no].join("\n")
|
347
|
-
# puts "-- YES (#{foo[:yes].size})\n"
|
348
|
-
# puts foo[:yes].join("\n")
|
349
|
-
#
|
350
|
-
# @param [Hash] options options.
|
351
|
-
# @param [Symbol] data the two column data file.
|
352
|
-
# @param [Symbol] colo1_obo the OBO file referenced in the first column
|
353
|
-
# @param [Symbol] colo2_obo the OBO file referenced in the second column
|
354
|
-
# @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of')
|
355
|
-
# @return [Hash] a hash of {:yes => {}, :no => {}}
|
356
|
-
def self.parents(options = {})
|
357
|
-
opt = {
|
358
|
-
:data => nil,
|
359
|
-
:col1_obo => nil,
|
360
|
-
:col2_obo => nil,
|
361
|
-
:property => nil
|
362
|
-
}.merge!(options)
|
363
|
-
|
364
|
-
return false if opt[:property].nil?
|
365
|
-
c1obo = parse_obo_file(opt[:col1_obo])
|
366
|
-
c2obo = parse_obo_file(opt[:col2_obo])
|
367
|
-
|
368
|
-
result = {:yes => [], :no => [], :unplaced => []}
|
369
|
-
# update
|
370
|
-
hash = hashify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
|
371
|
-
|
372
|
-
obo1_hash = c1obo.id_index
|
373
|
-
obo2_hash = c2obo.id_index
|
374
|
-
|
375
|
-
hash.keys.each do |k|
|
376
|
-
a = k
|
377
|
-
b = hash[a]
|
378
|
-
|
379
|
-
ids_1 = []
|
380
|
-
ids_2 = []
|
381
|
-
|
382
|
-
if !obo1_hash[a]
|
383
|
-
puts "can't find #{k}\n"
|
384
|
-
next
|
385
|
-
end
|
476
|
+
return false if opt[:properties].empty?
|
477
|
+
return false if opt[:ontologies].empty?
|
386
478
|
|
387
|
-
|
388
|
-
|
389
|
-
next
|
390
|
-
end
|
479
|
+
nodes = File.new("nodes.tab", "w+")
|
480
|
+
edges = File.new("edges.eda", "w+")
|
391
481
|
|
392
|
-
|
393
|
-
if rel == opt[:property]
|
394
|
-
ids_1.push id
|
395
|
-
end
|
396
|
-
end
|
482
|
+
opt[:ontologies].keys.each do |k|
|
397
483
|
|
398
|
-
|
399
|
-
if rel == opt[:property]
|
400
|
-
ids_2.push id
|
401
|
-
end
|
402
|
-
end
|
484
|
+
obo_file = parse_obo_file(opt[:ontologies][k])
|
403
485
|
|
404
|
-
|
486
|
+
obo_file.terms.each do |t|
|
487
|
+
nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"
|
405
488
|
|
406
|
-
|
407
|
-
|
408
|
-
t = "#{a} -> #{b}"
|
409
|
-
if hash[c] == d
|
410
|
-
result[:yes].push(t)
|
411
|
-
unplaced = false
|
412
|
-
next # don't add again after we find a hit
|
413
|
-
else
|
414
|
-
result[:no].push(t)
|
415
|
-
unplaced = false
|
489
|
+
t.relationships.each do |rel, id|
|
490
|
+
edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
|
416
491
|
end
|
417
492
|
end
|
418
493
|
end
|
419
|
-
result[:unplaced]
|
420
494
|
|
421
|
-
|
422
|
-
|
423
|
-
result
|
424
|
-
end
|
495
|
+
nodes.close
|
496
|
+
edges.close
|
425
497
|
|
498
|
+
true
|
426
499
|
|
500
|
+
end
|
427
501
|
|
428
502
|
|
429
|
-
#== Helper methods that don't require the obo_parser library
|
503
|
+
#== Helper methods that don't require the obo_parser library
|
430
504
|
|
431
505
|
# Given a Term id and a String representing an OBO file returns that stanza.
|
432
506
|
#
|
data/obo_parser.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{obo_parser}
|
8
|
-
s.version = "0.3.
|
8
|
+
s.version = "0.3.7"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["mjy"]
|
12
|
-
s.date = %q{2011-08-
|
12
|
+
s.date = %q{2011-08-25}
|
13
13
|
s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
|
14
14
|
s.email = %q{diapriid@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: obo_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 7
|
10
|
+
version: 0.3.7
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- mjy
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-08-
|
18
|
+
date: 2011-08-25 00:00:00 Z
|
19
19
|
dependencies: []
|
20
20
|
|
21
21
|
description: "Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. "
|