obo_parser 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/utilities.rb +212 -138
- data/obo_parser.gemspec +2 -2
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.3.
|
1
|
+
0.3.7
|
data/lib/utilities.rb
CHANGED
@@ -18,7 +18,7 @@ module OboParser::Utilities
|
|
18
18
|
#
|
19
19
|
# @param [Integer] cutoff only Term ids with > cutoff labels will be reported
|
20
20
|
# @param [Array] files an Array of read files
|
21
|
-
# @return [String] the
|
21
|
+
# @return [String] the translation in tab delimted format
|
22
22
|
def self.dump_comparison_by_id(cutoff = 0, files = [])
|
23
23
|
return '' if files.size < 1
|
24
24
|
|
@@ -47,7 +47,9 @@ module OboParser::Utilities
|
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
|
+
|
50
51
|
# Returns all labels found in all passed ontologies. Does not yet include synonyms.
|
52
|
+
# Caution: strips adult, embryonic, larval from labels (comment to remove)
|
51
53
|
#
|
52
54
|
#== Example use
|
53
55
|
# of1 = File.read('fly_anatomy.obo')
|
@@ -64,8 +66,11 @@ module OboParser::Utilities
|
|
64
66
|
files.each do |f|
|
65
67
|
o = parse_obo_file(f)
|
66
68
|
o.term_hash.keys.each do |k|
|
69
|
+
|
70
|
+
# TODO: make this optional
|
67
71
|
tmp = k.gsub(/adult/, "").strip
|
68
72
|
tmp = k.gsub(/embryonic\/larval/, "").strip
|
73
|
+
|
69
74
|
if comparison[tmp]
|
70
75
|
comparison[tmp] += 1
|
71
76
|
else
|
@@ -86,10 +91,9 @@ module OboParser::Utilities
|
|
86
91
|
|
87
92
|
end
|
88
93
|
|
94
|
+
#== Two column correspondances and translation tools
|
89
95
|
|
90
|
-
|
91
|
-
|
92
|
-
HOMOLONTO_HEADER = %{
|
96
|
+
HOMOLONTO_HEADER = %{
|
93
97
|
format-version: 1.2
|
94
98
|
auto-generated-by: obo_parser
|
95
99
|
default-namespace: fix_me
|
@@ -212,31 +216,129 @@ is_anti_symmetric: true
|
|
212
216
|
true
|
213
217
|
end
|
214
218
|
|
215
|
-
|
219
|
+
|
220
|
+
# Takes a two column input file, references it to two ontologies, and returns a report
|
221
|
+
# that identifies data pairs that have parents who are also a data pair given a
|
222
|
+
# provided property/relation type.
|
216
223
|
#
|
217
224
|
#== Example use
|
218
225
|
# file = File.read('HAO_TGMA_list.txt')
|
219
226
|
# col1_obo = File.read('hao.obo')
|
220
227
|
# col2_obo = File.read('tgma.obo')
|
228
|
+
#
|
229
|
+
# foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
|
230
|
+
#
|
231
|
+
# puts "-- NO (#{foo[:no].size})\n"
|
232
|
+
# puts foo[:no].join("\n")
|
233
|
+
# puts "-- YES (#{foo[:yes].size})\n"
|
234
|
+
# puts foo[:yes].join("\n")
|
235
|
+
#
|
236
|
+
# @param [Hash] options options.
|
237
|
+
# @param [Symbol] data the two column data file.
|
238
|
+
# @param [Symbol] colo1_obo the OBO file referenced in the first column
|
239
|
+
# @param [Symbol] colo2_obo the OBO file referenced in the second column
|
240
|
+
# @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of')
|
241
|
+
# @return [Hash] a hash of {:yes => {}, :no => {}}
|
242
|
+
def parents(options = {})
|
243
|
+
opt = {
|
244
|
+
:data => nil,
|
245
|
+
:col1_obo => nil,
|
246
|
+
:col2_obo => nil,
|
247
|
+
:property => nil
|
248
|
+
}.merge!(options)
|
249
|
+
|
250
|
+
return false if opt[:property].nil?
|
251
|
+
c1obo = parse_obo_file(opt[:col1_obo])
|
252
|
+
c2obo = parse_obo_file(opt[:col2_obo])
|
253
|
+
|
254
|
+
result = {:yes => [], :no => [], :unplaced => []}
|
255
|
+
# update
|
256
|
+
array = OboParser::Utilities.arrayify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
|
257
|
+
|
258
|
+
# index the pairs
|
259
|
+
index = array.inject({}){|hsh, i| hsh.update({"#{i[0]}#{i[1]}" => i})}
|
260
|
+
|
261
|
+
obo1_hash = c1obo.id_index
|
262
|
+
obo2_hash = c2obo.id_index
|
263
|
+
|
264
|
+
array.each do |k|
|
265
|
+
a = k[0]
|
266
|
+
b = k[1]
|
267
|
+
|
268
|
+
ids_1 = []
|
269
|
+
ids_2 = []
|
270
|
+
|
271
|
+
if !obo1_hash[a]
|
272
|
+
puts "can't find #{k}\n"
|
273
|
+
next
|
274
|
+
end
|
275
|
+
|
276
|
+
if !obo2_hash[b]
|
277
|
+
puts "can't find #{k}\n"
|
278
|
+
next
|
279
|
+
end
|
280
|
+
|
281
|
+
obo1_hash[a].relationships.each do |rel, id|
|
282
|
+
if rel == opt[:property]
|
283
|
+
ids_1.push id
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
obo2_hash[b].relationships.each do |rel, id|
|
288
|
+
if rel == opt[:property]
|
289
|
+
ids_2.push id
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
unplaced = true
|
294
|
+
|
295
|
+
ids_1.each do |c|
|
296
|
+
ids_2.each do |d|
|
297
|
+
t = "#{a} -> #{b}"
|
298
|
+
if index["#{c}#{d}"] # think this is right hash[c] == d
|
299
|
+
result[:yes].push(t)
|
300
|
+
unplaced = false
|
301
|
+
next # don't add again after we find a hit
|
302
|
+
else
|
303
|
+
result[:no].push(t)
|
304
|
+
unplaced = false
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
result[:unplaced]
|
309
|
+
|
310
|
+
end
|
311
|
+
|
312
|
+
result
|
313
|
+
end
|
314
|
+
|
315
|
+
|
316
|
+
# Takes a two column input file, references it to two ontologies, and returns a array of [[id1, id2], [id3,idn] ...].
|
317
|
+
#
|
318
|
+
#== Example use
|
319
|
+
# file = File.read('HAO_TGMA_list.txt')
|
320
|
+
# col1_obo = File.read('hao.obo')
|
321
|
+
# col2_obo = File.read('tgma.obo')
|
221
322
|
#
|
222
|
-
# OboParser::Utilities.
|
323
|
+
# OboParser::Utilities.arrayify_pairs(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo)
|
223
324
|
#
|
224
325
|
# @param [Hash] options options.
|
225
326
|
# @param [Symbol] data the two column data file.
|
226
327
|
# @param [Symbol] colo1_obo the OBO file referenced in the first column
|
227
328
|
# @param [Symbol] colo2_obo the OBO file referenced in the second column
|
228
|
-
# @return [
|
229
|
-
def self.
|
329
|
+
# @return [Array] an array of [[id string, id string], ...]
|
330
|
+
def self.arrayify_pairs(options = {})
|
230
331
|
opt = {
|
231
332
|
:data => nil,
|
232
333
|
:col1_obo => nil,
|
233
334
|
:col2_obo => nil,
|
335
|
+
:index_start => 0
|
234
336
|
}.merge!(options)
|
235
|
-
|
337
|
+
|
236
338
|
c1obo = parse_obo_file(opt[:col1_obo])
|
237
339
|
c2obo = parse_obo_file(opt[:col2_obo])
|
238
340
|
|
239
|
-
|
341
|
+
array = Array.new
|
240
342
|
|
241
343
|
i = opt[:index_start]
|
242
344
|
v1 = nil # a label like 'head'
|
@@ -246,7 +348,7 @@ is_anti_symmetric: true
|
|
246
348
|
|
247
349
|
opt[:data].split(/\n/).each do |row|
|
248
350
|
i += 1
|
249
|
-
c1, c2 =
|
351
|
+
c1, c2 = row.split(/\t/).map(&:strip)
|
250
352
|
|
251
353
|
if c1.nil? || c2.nil?
|
252
354
|
next
|
@@ -263,11 +365,78 @@ is_anti_symmetric: true
|
|
263
365
|
else
|
264
366
|
v2 = c2obo.term_hash[c2]
|
265
367
|
end
|
266
|
-
|
267
|
-
|
268
|
-
|
368
|
+
|
369
|
+
array << [c1,c2]
|
370
|
+
|
371
|
+
end
|
372
|
+
return array
|
373
|
+
end
|
374
|
+
|
375
|
+
|
376
|
+
# Provides a set comparisson of correspondances between two sets of correspondances. Finds correspondances that are shared, in left only, or in right only.
|
377
|
+
#
|
378
|
+
#== Example use
|
379
|
+
# data1 = File.read('input/hao_fbbt_vetted_list2.txt')
|
380
|
+
# data2 = File.read('input/hao_fbbt_from_bioportal.txt')
|
381
|
+
# col1_obo = File.read('hao.obo')
|
382
|
+
# col2_obo = File.read('tgma.obo')
|
383
|
+
#
|
384
|
+
# OboParser::Utilities.set_comparison(:data1 => data1 , :data2 => data2, :col1_obo => col1_obo, :col2_obo => col2_obo, :return => :text ) # => a text report
|
385
|
+
# or
|
386
|
+
# foo = OboParser::Utilities.set_comparison(:data1 => data1 , :data2 => data2, :col1_obo => col1_obo, :col2_obo => col2_obo, :return => :array )
|
387
|
+
#
|
388
|
+
#== "Report" types
|
389
|
+
# Use :return to return different values
|
390
|
+
# :array - Returns an Array
|
391
|
+
# :text - Generates 4 column report (id, label, id, label) to STDOUT
|
392
|
+
#
|
393
|
+
# @param [Hash] options options.
|
394
|
+
# @param [Symbol] data1 two columns (tab) with a correspondance between ontologies 1 and 2
|
395
|
+
# @param [Symbol] data2 as data1, a second set of correspondances
|
396
|
+
# @param [Symbol] col1_obo the OBO file corresponding to the first column of data1 and data2
|
397
|
+
# @param [Symbol] col2_obo the OBO file corresponding to the second column of data1 and data2
|
398
|
+
# @param [Symbol] return the value to return
|
399
|
+
# @return [String | Array]
|
400
|
+
def self.set_comparison(options = {})
|
401
|
+
opt = {
|
402
|
+
:data1 => nil,
|
403
|
+
:data2 => nil,
|
404
|
+
:col1_obo => nil,
|
405
|
+
:col2_obo => nil,
|
406
|
+
:return => :array # also :text
|
407
|
+
}.merge!(options)
|
408
|
+
|
409
|
+
c1obo = parse_obo_file(opt[:col1_obo])
|
410
|
+
c2obo = parse_obo_file(opt[:col2_obo])
|
411
|
+
|
412
|
+
th1 = c1obo.id_hash
|
413
|
+
th2 = c2obo.id_hash
|
414
|
+
|
415
|
+
array1 = OboParser::Utilities.arrayify_pairs(:data => opt[:data1], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
|
416
|
+
array2 = OboParser::Utilities.arrayify_pairs(:data => opt[:data2], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
|
417
|
+
|
418
|
+
index1 = array1.inject({}){|hsh, i| hsh.merge!( "#{i[0]}#{i[1]}" => i)}
|
419
|
+
index2 = array2.inject({}){|hsh, i| hsh.merge!( "#{i[0]}#{i[1]}" => i)}
|
420
|
+
|
421
|
+
result = {:in_left => [], :in_right => [], :in_both => []}
|
422
|
+
|
423
|
+
result[:in_both] = (index1.keys & index2.keys).collect{|k| [ index1[k][0], th1[index1[k][0]], index1[k][1], th2[index1[k][1]] , ] }
|
424
|
+
result[:in_left] = ((index1.keys | index2.keys) - index2.keys ).collect{|k| [ index1[k][0], th1[index1[k][0]], index1[k][1], th2[index1[k][1]] , ] }
|
425
|
+
result[:in_right] = ((index1.keys | index2.keys) - index1.keys ).collect{|k| [ index2[k][0], th1[index2[k][0]], index2[k][1], th2[index2[k][1]] , ] }
|
426
|
+
|
427
|
+
case opt[:return]
|
428
|
+
when :array
|
429
|
+
return result
|
430
|
+
when :text
|
431
|
+
[:in_both, :in_left, :in_right].each do |t|
|
432
|
+
puts "--- #{t.to_s} (#{result[t].size}) ---"
|
433
|
+
result[t].each do |r|
|
434
|
+
puts r.join("\t")
|
435
|
+
end
|
436
|
+
end
|
437
|
+
else
|
438
|
+
return nil
|
269
439
|
end
|
270
|
-
return hash
|
271
440
|
end
|
272
441
|
|
273
442
|
|
@@ -290,143 +459,48 @@ is_anti_symmetric: true
|
|
290
459
|
end
|
291
460
|
|
292
461
|
|
293
|
-
# Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
|
294
|
-
#
|
295
|
-
#== Example use
|
296
|
-
# OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
|
297
|
-
#
|
298
|
-
# @param [Symbol] ontologies a Hash of #read files as values, keys as working names
|
299
|
-
# @param [Symbol] properties an Array of properties like ['is_a', 'part_of']
|
300
|
-
# TODO: @return File1, File2, Filen
|
301
|
-
def self.cytoscapify(options = {})
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
return false if opt[:properties].empty?
|
308
|
-
return false if opt[:ontologies].empty?
|
309
|
-
|
310
|
-
nodes = File.new("nodes.tab", "w+")
|
311
|
-
edges = File.new("edges.eda", "w+")
|
312
|
-
|
313
|
-
opt[:ontologies].keys.each do |k|
|
314
|
-
|
315
|
-
obo_file = parse_obo_file(opt[:ontologies][k])
|
316
|
-
|
317
|
-
obo_file.terms.each do |t|
|
318
|
-
nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"
|
319
|
-
|
320
|
-
t.relationships.each do |rel, id|
|
321
|
-
edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
|
322
|
-
end
|
323
|
-
end
|
324
|
-
end
|
325
|
-
|
326
|
-
nodes.close
|
327
|
-
edges.close
|
328
|
-
|
329
|
-
true
|
330
|
-
|
331
|
-
end
|
332
|
-
|
462
|
+
# Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
|
463
|
+
#
|
464
|
+
#== Example use
|
465
|
+
# OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
|
466
|
+
#
|
467
|
+
# @param [Symbol] ontologies a Hash of #read files as values, keys as working names
|
468
|
+
# @param [Symbol] properties an Array of properties like ['is_a', 'part_of']
|
469
|
+
# TODO: @return File1, File2, Filen
|
470
|
+
def self.cytoscapify(options = {})
|
471
|
+
opt = {
|
472
|
+
:ontologies => {},
|
473
|
+
:properties => []
|
474
|
+
}.merge!(options)
|
333
475
|
|
334
|
-
|
335
|
-
|
336
|
-
# provided property/relation type.
|
337
|
-
#
|
338
|
-
#== Example use
|
339
|
-
# file = File.read('HAO_TGMA_list.txt')
|
340
|
-
# col1_obo = File.read('hao.obo')
|
341
|
-
# col2_obo = File.read('tgma.obo')
|
342
|
-
#
|
343
|
-
# foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
|
344
|
-
#
|
345
|
-
# puts "-- NO (#{foo[:no].size})\n"
|
346
|
-
# puts foo[:no].join("\n")
|
347
|
-
# puts "-- YES (#{foo[:yes].size})\n"
|
348
|
-
# puts foo[:yes].join("\n")
|
349
|
-
#
|
350
|
-
# @param [Hash] options options.
|
351
|
-
# @param [Symbol] data the two column data file.
|
352
|
-
# @param [Symbol] colo1_obo the OBO file referenced in the first column
|
353
|
-
# @param [Symbol] colo2_obo the OBO file referenced in the second column
|
354
|
-
# @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of')
|
355
|
-
# @return [Hash] a hash of {:yes => {}, :no => {}}
|
356
|
-
def self.parents(options = {})
|
357
|
-
opt = {
|
358
|
-
:data => nil,
|
359
|
-
:col1_obo => nil,
|
360
|
-
:col2_obo => nil,
|
361
|
-
:property => nil
|
362
|
-
}.merge!(options)
|
363
|
-
|
364
|
-
return false if opt[:property].nil?
|
365
|
-
c1obo = parse_obo_file(opt[:col1_obo])
|
366
|
-
c2obo = parse_obo_file(opt[:col2_obo])
|
367
|
-
|
368
|
-
result = {:yes => [], :no => [], :unplaced => []}
|
369
|
-
# update
|
370
|
-
hash = hashify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo => opt[:col2_obo])
|
371
|
-
|
372
|
-
obo1_hash = c1obo.id_index
|
373
|
-
obo2_hash = c2obo.id_index
|
374
|
-
|
375
|
-
hash.keys.each do |k|
|
376
|
-
a = k
|
377
|
-
b = hash[a]
|
378
|
-
|
379
|
-
ids_1 = []
|
380
|
-
ids_2 = []
|
381
|
-
|
382
|
-
if !obo1_hash[a]
|
383
|
-
puts "can't find #{k}\n"
|
384
|
-
next
|
385
|
-
end
|
476
|
+
return false if opt[:properties].empty?
|
477
|
+
return false if opt[:ontologies].empty?
|
386
478
|
|
387
|
-
|
388
|
-
|
389
|
-
next
|
390
|
-
end
|
479
|
+
nodes = File.new("nodes.tab", "w+")
|
480
|
+
edges = File.new("edges.eda", "w+")
|
391
481
|
|
392
|
-
|
393
|
-
if rel == opt[:property]
|
394
|
-
ids_1.push id
|
395
|
-
end
|
396
|
-
end
|
482
|
+
opt[:ontologies].keys.each do |k|
|
397
483
|
|
398
|
-
|
399
|
-
if rel == opt[:property]
|
400
|
-
ids_2.push id
|
401
|
-
end
|
402
|
-
end
|
484
|
+
obo_file = parse_obo_file(opt[:ontologies][k])
|
403
485
|
|
404
|
-
|
486
|
+
obo_file.terms.each do |t|
|
487
|
+
nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"
|
405
488
|
|
406
|
-
|
407
|
-
|
408
|
-
t = "#{a} -> #{b}"
|
409
|
-
if hash[c] == d
|
410
|
-
result[:yes].push(t)
|
411
|
-
unplaced = false
|
412
|
-
next # don't add again after we find a hit
|
413
|
-
else
|
414
|
-
result[:no].push(t)
|
415
|
-
unplaced = false
|
489
|
+
t.relationships.each do |rel, id|
|
490
|
+
edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
|
416
491
|
end
|
417
492
|
end
|
418
493
|
end
|
419
|
-
result[:unplaced]
|
420
494
|
|
421
|
-
|
422
|
-
|
423
|
-
result
|
424
|
-
end
|
495
|
+
nodes.close
|
496
|
+
edges.close
|
425
497
|
|
498
|
+
true
|
426
499
|
|
500
|
+
end
|
427
501
|
|
428
502
|
|
429
|
-
#== Helper methods that don't require the obo_parser library
|
503
|
+
#== Helper methods that don't require the obo_parser library
|
430
504
|
|
431
505
|
# Given a Term id and a String representing an OBO file returns that stanza.
|
432
506
|
#
|
data/obo_parser.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{obo_parser}
|
8
|
-
s.version = "0.3.
|
8
|
+
s.version = "0.3.7"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["mjy"]
|
12
|
-
s.date = %q{2011-08-
|
12
|
+
s.date = %q{2011-08-25}
|
13
13
|
s.description = %q{Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. }
|
14
14
|
s.email = %q{diapriid@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: obo_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 7
|
10
|
+
version: 0.3.7
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- mjy
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-08-
|
18
|
+
date: 2011-08-25 00:00:00 Z
|
19
19
|
dependencies: []
|
20
20
|
|
21
21
|
description: "Provides all-in-one object containing the contents of an OBO formatted file. OBO version 1.2 is targeted, though this should work for 1.0. "
|